{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 5000.0, "global_step": 18533, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.395780499649274e-05, "grad_norm": 9.559964179992676, "learning_rate": 3.597122302158274e-08, "loss": 1.6606271266937256, "step": 1, "token_acc": 0.6314893617021277 }, { "epoch": 0.00010791560999298548, "grad_norm": 9.833085060119629, "learning_rate": 7.194244604316547e-08, "loss": 1.5830100774765015, "step": 2, "token_acc": 0.6389663035216621 }, { "epoch": 0.00016187341498947823, "grad_norm": 12.979348182678223, "learning_rate": 1.0791366906474822e-07, "loss": 1.6477077007293701, "step": 3, "token_acc": 0.6326453083449411 }, { "epoch": 0.00021583121998597097, "grad_norm": 11.000593185424805, "learning_rate": 1.4388489208633095e-07, "loss": 1.5837249755859375, "step": 4, "token_acc": 0.6431232091690544 }, { "epoch": 0.00026978902498246373, "grad_norm": 9.770639419555664, "learning_rate": 1.7985611510791368e-07, "loss": 1.635642409324646, "step": 5, "token_acc": 0.6387638398831975 }, { "epoch": 0.00032374682997895647, "grad_norm": 12.53105640411377, "learning_rate": 2.1582733812949643e-07, "loss": 1.6230473518371582, "step": 6, "token_acc": 0.6296423265912467 }, { "epoch": 0.0003777046349754492, "grad_norm": 9.332720756530762, "learning_rate": 2.5179856115107916e-07, "loss": 1.552910327911377, "step": 7, "token_acc": 0.6528904665314401 }, { "epoch": 0.00043166243997194194, "grad_norm": 11.748541831970215, "learning_rate": 2.877697841726619e-07, "loss": 1.7179698944091797, "step": 8, "token_acc": 0.6311447232726023 }, { "epoch": 0.00048562024496843467, "grad_norm": 10.741602897644043, "learning_rate": 3.237410071942446e-07, "loss": 1.659743070602417, "step": 9, "token_acc": 0.6279226014512228 }, { "epoch": 0.0005395780499649275, "grad_norm": 11.231916427612305, "learning_rate": 3.5971223021582736e-07, "loss": 1.5830087661743164, "step": 10, "token_acc": 0.637848347375243 }, { "epoch": 0.0005935358549614202, "grad_norm": 7.530948162078857, "learning_rate": 3.956834532374101e-07, "loss": 1.6317778825759888, "step": 11, "token_acc": 0.6400153511577331 }, { "epoch": 0.0006474936599579129, "grad_norm": 13.691436767578125, "learning_rate": 4.3165467625899287e-07, "loss": 1.6447772979736328, "step": 12, "token_acc": 0.6400998655655847 }, { "epoch": 0.0007014514649544057, "grad_norm": 11.219429969787598, "learning_rate": 4.676258992805756e-07, "loss": 1.6183569431304932, "step": 13, "token_acc": 0.6400298730395818 }, { "epoch": 0.0007554092699508984, "grad_norm": 9.30926513671875, "learning_rate": 5.035971223021583e-07, "loss": 1.518846869468689, "step": 14, "token_acc": 0.6576095252633186 }, { "epoch": 0.0008093670749473911, "grad_norm": 10.969385147094727, "learning_rate": 5.39568345323741e-07, "loss": 1.6597533226013184, "step": 15, "token_acc": 0.6296007485963818 }, { "epoch": 0.0008633248799438839, "grad_norm": 11.910487174987793, "learning_rate": 5.755395683453238e-07, "loss": 1.5913808345794678, "step": 16, "token_acc": 0.6394092711609463 }, { "epoch": 0.0009172826849403766, "grad_norm": 7.931929588317871, "learning_rate": 6.115107913669066e-07, "loss": 1.4640973806381226, "step": 17, "token_acc": 0.6567429460119973 }, { "epoch": 0.0009712404899368693, "grad_norm": 8.400137901306152, "learning_rate": 6.474820143884893e-07, "loss": 1.4612483978271484, "step": 18, "token_acc": 0.6515679442508711 }, { "epoch": 0.0010251982949333622, "grad_norm": 10.347299575805664, "learning_rate": 6.83453237410072e-07, "loss": 1.4342039823532104, "step": 19, "token_acc": 0.6606847045831032 }, { "epoch": 0.001079156099929855, "grad_norm": 12.240276336669922, "learning_rate": 7.194244604316547e-07, "loss": 1.5061328411102295, "step": 20, "token_acc": 0.6509869802603948 }, { "epoch": 0.0011331139049263477, "grad_norm": 8.147343635559082, "learning_rate": 7.553956834532375e-07, "loss": 1.4267677068710327, "step": 21, "token_acc": 0.6599061810154525 }, { "epoch": 0.0011870717099228404, "grad_norm": 10.749136924743652, "learning_rate": 7.913669064748202e-07, "loss": 1.56380295753479, "step": 22, "token_acc": 0.6291546242774566 }, { "epoch": 0.0012410295149193331, "grad_norm": 6.964962959289551, "learning_rate": 8.27338129496403e-07, "loss": 1.3717201948165894, "step": 23, "token_acc": 0.6536082474226804 }, { "epoch": 0.0012949873199158259, "grad_norm": 5.816498279571533, "learning_rate": 8.633093525179857e-07, "loss": 1.3272801637649536, "step": 24, "token_acc": 0.6638560281902844 }, { "epoch": 0.0013489451249123186, "grad_norm": 6.000529766082764, "learning_rate": 8.992805755395684e-07, "loss": 1.3492252826690674, "step": 25, "token_acc": 0.6659195616984186 }, { "epoch": 0.0014029029299088113, "grad_norm": 6.511415481567383, "learning_rate": 9.352517985611512e-07, "loss": 1.2642052173614502, "step": 26, "token_acc": 0.6660200452634982 }, { "epoch": 0.001456860734905304, "grad_norm": 5.94466495513916, "learning_rate": 9.71223021582734e-07, "loss": 1.2454359531402588, "step": 27, "token_acc": 0.6644793152639087 }, { "epoch": 0.0015108185399017968, "grad_norm": 5.281854152679443, "learning_rate": 1.0071942446043167e-06, "loss": 1.2180542945861816, "step": 28, "token_acc": 0.6697976878612717 }, { "epoch": 0.0015647763448982895, "grad_norm": 4.711416244506836, "learning_rate": 1.0431654676258993e-06, "loss": 1.253058910369873, "step": 29, "token_acc": 0.6725868725868726 }, { "epoch": 0.0016187341498947823, "grad_norm": 3.4368808269500732, "learning_rate": 1.079136690647482e-06, "loss": 1.22200345993042, "step": 30, "token_acc": 0.6807077625570777 }, { "epoch": 0.001672691954891275, "grad_norm": 3.6363885402679443, "learning_rate": 1.115107913669065e-06, "loss": 1.2251341342926025, "step": 31, "token_acc": 0.6679405520169851 }, { "epoch": 0.0017266497598877677, "grad_norm": 2.4751670360565186, "learning_rate": 1.1510791366906476e-06, "loss": 1.1145806312561035, "step": 32, "token_acc": 0.6979662483773258 }, { "epoch": 0.0017806075648842605, "grad_norm": 2.05216646194458, "learning_rate": 1.1870503597122303e-06, "loss": 1.0664018392562866, "step": 33, "token_acc": 0.7033621293485874 }, { "epoch": 0.0018345653698807532, "grad_norm": 3.1620328426361084, "learning_rate": 1.2230215827338131e-06, "loss": 1.0257046222686768, "step": 34, "token_acc": 0.7080542675596772 }, { "epoch": 0.001888523174877246, "grad_norm": 3.0867440700531006, "learning_rate": 1.2589928057553958e-06, "loss": 1.0271971225738525, "step": 35, "token_acc": 0.7086139666784828 }, { "epoch": 0.0019424809798737387, "grad_norm": 2.591360569000244, "learning_rate": 1.2949640287769785e-06, "loss": 1.0282635688781738, "step": 36, "token_acc": 0.7124798611971743 }, { "epoch": 0.0019964387848702314, "grad_norm": 2.285693407058716, "learning_rate": 1.3309352517985614e-06, "loss": 1.051131248474121, "step": 37, "token_acc": 0.7109737248840804 }, { "epoch": 0.0020503965898667244, "grad_norm": 3.1148202419281006, "learning_rate": 1.366906474820144e-06, "loss": 1.0489304065704346, "step": 38, "token_acc": 0.7057036787040162 }, { "epoch": 0.002104354394863217, "grad_norm": 2.1376945972442627, "learning_rate": 1.4028776978417265e-06, "loss": 0.9818482995033264, "step": 39, "token_acc": 0.7211670795485824 }, { "epoch": 0.00215831219985971, "grad_norm": 2.2956383228302, "learning_rate": 1.4388489208633094e-06, "loss": 1.0224790573120117, "step": 40, "token_acc": 0.7173946836623659 }, { "epoch": 0.0022122700048562024, "grad_norm": 2.075606346130371, "learning_rate": 1.474820143884892e-06, "loss": 0.9573520421981812, "step": 41, "token_acc": 0.7202634106187406 }, { "epoch": 0.0022662278098526953, "grad_norm": 2.302959680557251, "learning_rate": 1.510791366906475e-06, "loss": 0.969549298286438, "step": 42, "token_acc": 0.7354080022945647 }, { "epoch": 0.002320185614849188, "grad_norm": 2.0832998752593994, "learning_rate": 1.5467625899280579e-06, "loss": 1.0160224437713623, "step": 43, "token_acc": 0.7183908045977011 }, { "epoch": 0.002374143419845681, "grad_norm": 2.243340253829956, "learning_rate": 1.5827338129496403e-06, "loss": 1.0407905578613281, "step": 44, "token_acc": 0.7159953673915841 }, { "epoch": 0.0024281012248421733, "grad_norm": 1.7671184539794922, "learning_rate": 1.618705035971223e-06, "loss": 0.9822075963020325, "step": 45, "token_acc": 0.7294245680859747 }, { "epoch": 0.0024820590298386663, "grad_norm": 1.8711864948272705, "learning_rate": 1.654676258992806e-06, "loss": 0.9621149301528931, "step": 46, "token_acc": 0.722836955072657 }, { "epoch": 0.0025360168348351588, "grad_norm": 1.783299207687378, "learning_rate": 1.6906474820143886e-06, "loss": 0.947315514087677, "step": 47, "token_acc": 0.7356678904264332 }, { "epoch": 0.0025899746398316517, "grad_norm": 1.2939505577087402, "learning_rate": 1.7266187050359715e-06, "loss": 0.9732876420021057, "step": 48, "token_acc": 0.7276663628076573 }, { "epoch": 0.0026439324448281443, "grad_norm": 2.186213970184326, "learning_rate": 1.762589928057554e-06, "loss": 0.8544223308563232, "step": 49, "token_acc": 0.7468092445670921 }, { "epoch": 0.002697890249824637, "grad_norm": 2.7226076126098633, "learning_rate": 1.7985611510791368e-06, "loss": 0.9075930118560791, "step": 50, "token_acc": 0.7429292398275681 }, { "epoch": 0.0027518480548211297, "grad_norm": 1.8799450397491455, "learning_rate": 1.8345323741007195e-06, "loss": 0.8865325450897217, "step": 51, "token_acc": 0.7393392519283947 }, { "epoch": 0.0028058058598176227, "grad_norm": 1.8995534181594849, "learning_rate": 1.8705035971223024e-06, "loss": 0.8949690461158752, "step": 52, "token_acc": 0.7399842890809112 }, { "epoch": 0.002859763664814115, "grad_norm": 2.1245667934417725, "learning_rate": 1.906474820143885e-06, "loss": 0.8531386852264404, "step": 53, "token_acc": 0.7434296977660972 }, { "epoch": 0.002913721469810608, "grad_norm": 1.80119788646698, "learning_rate": 1.942446043165468e-06, "loss": 0.7698464393615723, "step": 54, "token_acc": 0.7698798627002288 }, { "epoch": 0.0029676792748071007, "grad_norm": 1.7751632928848267, "learning_rate": 1.9784172661870504e-06, "loss": 0.8753108978271484, "step": 55, "token_acc": 0.7491297563317729 }, { "epoch": 0.0030216370798035936, "grad_norm": 1.5797182321548462, "learning_rate": 2.0143884892086333e-06, "loss": 0.8460366129875183, "step": 56, "token_acc": 0.7483273862622658 }, { "epoch": 0.003075594884800086, "grad_norm": 1.365781545639038, "learning_rate": 2.050359712230216e-06, "loss": 0.8282046318054199, "step": 57, "token_acc": 0.7518686794665104 }, { "epoch": 0.003129552689796579, "grad_norm": 0.9890125393867493, "learning_rate": 2.0863309352517987e-06, "loss": 0.8308353424072266, "step": 58, "token_acc": 0.7597122302158273 }, { "epoch": 0.0031835104947930716, "grad_norm": 1.5035436153411865, "learning_rate": 2.1223021582733816e-06, "loss": 0.8532420992851257, "step": 59, "token_acc": 0.750597497539716 }, { "epoch": 0.0032374682997895646, "grad_norm": 1.5096255540847778, "learning_rate": 2.158273381294964e-06, "loss": 0.8085594773292542, "step": 60, "token_acc": 0.7644385810612724 }, { "epoch": 0.0032914261047860575, "grad_norm": 1.7023484706878662, "learning_rate": 2.194244604316547e-06, "loss": 0.7942830324172974, "step": 61, "token_acc": 0.7631367507632975 }, { "epoch": 0.00334538390978255, "grad_norm": 1.6301716566085815, "learning_rate": 2.23021582733813e-06, "loss": 0.8242467045783997, "step": 62, "token_acc": 0.7596153846153846 }, { "epoch": 0.003399341714779043, "grad_norm": 1.6252071857452393, "learning_rate": 2.2661870503597123e-06, "loss": 0.764145016670227, "step": 63, "token_acc": 0.7768717568569311 }, { "epoch": 0.0034532995197755355, "grad_norm": 1.3506051301956177, "learning_rate": 2.302158273381295e-06, "loss": 0.7739633917808533, "step": 64, "token_acc": 0.7670149675232985 }, { "epoch": 0.0035072573247720285, "grad_norm": 1.7686296701431274, "learning_rate": 2.3381294964028776e-06, "loss": 0.8137422800064087, "step": 65, "token_acc": 0.7572296476306197 }, { "epoch": 0.003561215129768521, "grad_norm": 1.249837040901184, "learning_rate": 2.3741007194244605e-06, "loss": 0.7399649620056152, "step": 66, "token_acc": 0.7776877676194884 }, { "epoch": 0.003615172934765014, "grad_norm": 1.7550538778305054, "learning_rate": 2.4100719424460434e-06, "loss": 0.7645026445388794, "step": 67, "token_acc": 0.7666124535315985 }, { "epoch": 0.0036691307397615064, "grad_norm": 1.1143012046813965, "learning_rate": 2.4460431654676263e-06, "loss": 0.7832490801811218, "step": 68, "token_acc": 0.7746639977123249 }, { "epoch": 0.0037230885447579994, "grad_norm": 1.7415302991867065, "learning_rate": 2.4820143884892088e-06, "loss": 0.6933656930923462, "step": 69, "token_acc": 0.7832135042200687 }, { "epoch": 0.003777046349754492, "grad_norm": 1.899679183959961, "learning_rate": 2.5179856115107916e-06, "loss": 0.7338966727256775, "step": 70, "token_acc": 0.7782723042686628 }, { "epoch": 0.003831004154750985, "grad_norm": 1.2925275564193726, "learning_rate": 2.5539568345323745e-06, "loss": 0.7158875465393066, "step": 71, "token_acc": 0.782925488039593 }, { "epoch": 0.0038849619597474774, "grad_norm": 1.6477397680282593, "learning_rate": 2.589928057553957e-06, "loss": 0.7522510290145874, "step": 72, "token_acc": 0.7709095661265029 }, { "epoch": 0.00393891976474397, "grad_norm": 1.5120773315429688, "learning_rate": 2.6258992805755395e-06, "loss": 0.6967668533325195, "step": 73, "token_acc": 0.787575714889421 }, { "epoch": 0.003992877569740463, "grad_norm": 1.6029629707336426, "learning_rate": 2.6618705035971228e-06, "loss": 0.7450041770935059, "step": 74, "token_acc": 0.77083714966111 }, { "epoch": 0.004046835374736955, "grad_norm": 1.5011351108551025, "learning_rate": 2.6978417266187052e-06, "loss": 0.7118775248527527, "step": 75, "token_acc": 0.7808906288230977 }, { "epoch": 0.004100793179733449, "grad_norm": 1.868929147720337, "learning_rate": 2.733812949640288e-06, "loss": 0.6753062605857849, "step": 76, "token_acc": 0.7898801056695793 }, { "epoch": 0.004154750984729941, "grad_norm": 1.7119008302688599, "learning_rate": 2.7697841726618706e-06, "loss": 0.7221365571022034, "step": 77, "token_acc": 0.7759902868417059 }, { "epoch": 0.004208708789726434, "grad_norm": 1.701603889465332, "learning_rate": 2.805755395683453e-06, "loss": 0.6990644931793213, "step": 78, "token_acc": 0.7845684953480911 }, { "epoch": 0.004262666594722926, "grad_norm": 1.3727412223815918, "learning_rate": 2.8417266187050364e-06, "loss": 0.6488600969314575, "step": 79, "token_acc": 0.7905233380480905 }, { "epoch": 0.00431662439971942, "grad_norm": 1.3174437284469604, "learning_rate": 2.877697841726619e-06, "loss": 0.7019875645637512, "step": 80, "token_acc": 0.7793653180929463 }, { "epoch": 0.004370582204715912, "grad_norm": 1.4812413454055786, "learning_rate": 2.9136690647482017e-06, "loss": 0.6753907799720764, "step": 81, "token_acc": 0.7941047716328615 }, { "epoch": 0.004424540009712405, "grad_norm": 1.2656677961349487, "learning_rate": 2.949640287769784e-06, "loss": 0.6833641529083252, "step": 82, "token_acc": 0.7923497267759563 }, { "epoch": 0.004478497814708897, "grad_norm": 1.4131600856781006, "learning_rate": 2.985611510791367e-06, "loss": 0.6741900444030762, "step": 83, "token_acc": 0.7937928142038305 }, { "epoch": 0.004532455619705391, "grad_norm": 1.6124980449676514, "learning_rate": 3.02158273381295e-06, "loss": 0.6974748373031616, "step": 84, "token_acc": 0.7809623981067578 }, { "epoch": 0.004586413424701883, "grad_norm": 1.5160417556762695, "learning_rate": 3.0575539568345324e-06, "loss": 0.6362682580947876, "step": 85, "token_acc": 0.801845166466105 }, { "epoch": 0.004640371229698376, "grad_norm": 1.423172116279602, "learning_rate": 3.0935251798561158e-06, "loss": 0.6460926532745361, "step": 86, "token_acc": 0.7926772023157566 }, { "epoch": 0.004694329034694868, "grad_norm": 1.5266562700271606, "learning_rate": 3.1294964028776982e-06, "loss": 0.6735775470733643, "step": 87, "token_acc": 0.7924372067347502 }, { "epoch": 0.004748286839691362, "grad_norm": 1.6580616235733032, "learning_rate": 3.1654676258992807e-06, "loss": 0.6689672470092773, "step": 88, "token_acc": 0.7943491706494237 }, { "epoch": 0.004802244644687854, "grad_norm": 1.7750211954116821, "learning_rate": 3.2014388489208636e-06, "loss": 0.6139557361602783, "step": 89, "token_acc": 0.8082782408738143 }, { "epoch": 0.004856202449684347, "grad_norm": 1.5144877433776855, "learning_rate": 3.237410071942446e-06, "loss": 0.6207829713821411, "step": 90, "token_acc": 0.8060836501901141 }, { "epoch": 0.00491016025468084, "grad_norm": 1.5389634370803833, "learning_rate": 3.2733812949640294e-06, "loss": 0.630039632320404, "step": 91, "token_acc": 0.8025 }, { "epoch": 0.0049641180596773325, "grad_norm": 1.1948453187942505, "learning_rate": 3.309352517985612e-06, "loss": 0.624320924282074, "step": 92, "token_acc": 0.8079298017549561 }, { "epoch": 0.005018075864673825, "grad_norm": 1.3068158626556396, "learning_rate": 3.3453237410071943e-06, "loss": 0.6413968801498413, "step": 93, "token_acc": 0.8062167783683846 }, { "epoch": 0.0050720336696703176, "grad_norm": 1.6224919557571411, "learning_rate": 3.381294964028777e-06, "loss": 0.639655590057373, "step": 94, "token_acc": 0.8056724326009038 }, { "epoch": 0.005125991474666811, "grad_norm": 1.4654523134231567, "learning_rate": 3.4172661870503596e-06, "loss": 0.6499083042144775, "step": 95, "token_acc": 0.8010513546300041 }, { "epoch": 0.0051799492796633035, "grad_norm": 1.5522289276123047, "learning_rate": 3.453237410071943e-06, "loss": 0.5612356662750244, "step": 96, "token_acc": 0.8272548053228191 }, { "epoch": 0.005233907084659796, "grad_norm": 1.536201000213623, "learning_rate": 3.4892086330935254e-06, "loss": 0.6770619750022888, "step": 97, "token_acc": 0.795983508445272 }, { "epoch": 0.0052878648896562885, "grad_norm": 1.6188323497772217, "learning_rate": 3.525179856115108e-06, "loss": 0.6280099749565125, "step": 98, "token_acc": 0.8009225092250922 }, { "epoch": 0.005341822694652782, "grad_norm": 1.699330449104309, "learning_rate": 3.561151079136691e-06, "loss": 0.5469552278518677, "step": 99, "token_acc": 0.8251385041551247 }, { "epoch": 0.005395780499649274, "grad_norm": 1.5550514459609985, "learning_rate": 3.5971223021582737e-06, "loss": 0.6216689348220825, "step": 100, "token_acc": 0.8086637073320068 }, { "epoch": 0.005449738304645767, "grad_norm": 1.547104835510254, "learning_rate": 3.6330935251798566e-06, "loss": 0.5903574228286743, "step": 101, "token_acc": 0.8173157747268143 }, { "epoch": 0.0055036961096422594, "grad_norm": 1.3937968015670776, "learning_rate": 3.669064748201439e-06, "loss": 0.6215251684188843, "step": 102, "token_acc": 0.8075534931683424 }, { "epoch": 0.005557653914638753, "grad_norm": 1.6914901733398438, "learning_rate": 3.7050359712230215e-06, "loss": 0.5759515762329102, "step": 103, "token_acc": 0.8244125326370757 }, { "epoch": 0.005611611719635245, "grad_norm": 1.3770447969436646, "learning_rate": 3.741007194244605e-06, "loss": 0.6735258102416992, "step": 104, "token_acc": 0.7906112799729821 }, { "epoch": 0.005665569524631738, "grad_norm": 1.7276766300201416, "learning_rate": 3.7769784172661873e-06, "loss": 0.6783890724182129, "step": 105, "token_acc": 0.7902124919510625 }, { "epoch": 0.00571952732962823, "grad_norm": 1.6506996154785156, "learning_rate": 3.81294964028777e-06, "loss": 0.6207314729690552, "step": 106, "token_acc": 0.8052136133236785 }, { "epoch": 0.005773485134624724, "grad_norm": 1.2023359537124634, "learning_rate": 3.848920863309353e-06, "loss": 0.5684773921966553, "step": 107, "token_acc": 0.8213775023832222 }, { "epoch": 0.005827442939621216, "grad_norm": 1.2566616535186768, "learning_rate": 3.884892086330936e-06, "loss": 0.6399157047271729, "step": 108, "token_acc": 0.803731559155337 }, { "epoch": 0.005881400744617709, "grad_norm": 1.5447099208831787, "learning_rate": 3.920863309352518e-06, "loss": 0.5946757197380066, "step": 109, "token_acc": 0.8050914876690533 }, { "epoch": 0.005935358549614201, "grad_norm": 1.4139448404312134, "learning_rate": 3.956834532374101e-06, "loss": 0.626840353012085, "step": 110, "token_acc": 0.8032727272727272 }, { "epoch": 0.005989316354610695, "grad_norm": 1.7479809522628784, "learning_rate": 3.992805755395684e-06, "loss": 0.6080000400543213, "step": 111, "token_acc": 0.8110480123902942 }, { "epoch": 0.006043274159607187, "grad_norm": 1.5500332117080688, "learning_rate": 4.028776978417267e-06, "loss": 0.544524073600769, "step": 112, "token_acc": 0.82572948573271 }, { "epoch": 0.00609723196460368, "grad_norm": 1.427059292793274, "learning_rate": 4.0647482014388495e-06, "loss": 0.5740067958831787, "step": 113, "token_acc": 0.8168877833056938 }, { "epoch": 0.006151189769600172, "grad_norm": 1.5246766805648804, "learning_rate": 4.100719424460432e-06, "loss": 0.563508152961731, "step": 114, "token_acc": 0.8176517893043828 }, { "epoch": 0.006205147574596666, "grad_norm": 1.922025203704834, "learning_rate": 4.1366906474820145e-06, "loss": 0.5616350769996643, "step": 115, "token_acc": 0.8142372881355933 }, { "epoch": 0.006259105379593158, "grad_norm": 1.5401092767715454, "learning_rate": 4.172661870503597e-06, "loss": 0.5000636577606201, "step": 116, "token_acc": 0.8374856266768878 }, { "epoch": 0.006313063184589651, "grad_norm": 1.6082127094268799, "learning_rate": 4.20863309352518e-06, "loss": 0.5913026332855225, "step": 117, "token_acc": 0.8026627218934911 }, { "epoch": 0.006367020989586143, "grad_norm": 1.7741881608963013, "learning_rate": 4.244604316546763e-06, "loss": 0.5652567148208618, "step": 118, "token_acc": 0.8223473697683348 }, { "epoch": 0.006420978794582637, "grad_norm": 1.4993165731430054, "learning_rate": 4.280575539568346e-06, "loss": 0.6496812105178833, "step": 119, "token_acc": 0.796775130737943 }, { "epoch": 0.006474936599579129, "grad_norm": 1.313348650932312, "learning_rate": 4.316546762589928e-06, "loss": 0.584027886390686, "step": 120, "token_acc": 0.8133418043202033 }, { "epoch": 0.006528894404575622, "grad_norm": 1.3294726610183716, "learning_rate": 4.352517985611511e-06, "loss": 0.5630209445953369, "step": 121, "token_acc": 0.8177115987460815 }, { "epoch": 0.006582852209572115, "grad_norm": 1.171988844871521, "learning_rate": 4.388489208633094e-06, "loss": 0.653069019317627, "step": 122, "token_acc": 0.7925604987847406 }, { "epoch": 0.0066368100145686075, "grad_norm": 1.3602224588394165, "learning_rate": 4.424460431654677e-06, "loss": 0.5457449555397034, "step": 123, "token_acc": 0.8247629991381786 }, { "epoch": 0.0066907678195651, "grad_norm": 1.6984714269638062, "learning_rate": 4.46043165467626e-06, "loss": 0.5871272087097168, "step": 124, "token_acc": 0.8108676599474145 }, { "epoch": 0.006744725624561593, "grad_norm": 1.8575698137283325, "learning_rate": 4.496402877697842e-06, "loss": 0.6221872568130493, "step": 125, "token_acc": 0.8106769400110072 }, { "epoch": 0.006798683429558086, "grad_norm": 1.5966886281967163, "learning_rate": 4.5323741007194245e-06, "loss": 0.5685538053512573, "step": 126, "token_acc": 0.8167202572347267 }, { "epoch": 0.0068526412345545785, "grad_norm": 1.8217082023620605, "learning_rate": 4.5683453237410074e-06, "loss": 0.6296782493591309, "step": 127, "token_acc": 0.8006503605259437 }, { "epoch": 0.006906599039551071, "grad_norm": 1.3425428867340088, "learning_rate": 4.60431654676259e-06, "loss": 0.5883862376213074, "step": 128, "token_acc": 0.8099859808044861 }, { "epoch": 0.0069605568445475635, "grad_norm": 2.220085620880127, "learning_rate": 4.640287769784173e-06, "loss": 0.651432991027832, "step": 129, "token_acc": 0.7942673958136903 }, { "epoch": 0.007014514649544057, "grad_norm": 1.1154571771621704, "learning_rate": 4.676258992805755e-06, "loss": 0.5815449953079224, "step": 130, "token_acc": 0.8096450100468855 }, { "epoch": 0.007068472454540549, "grad_norm": 1.3511706590652466, "learning_rate": 4.712230215827339e-06, "loss": 0.5364128947257996, "step": 131, "token_acc": 0.8334594185669513 }, { "epoch": 0.007122430259537042, "grad_norm": 1.3207226991653442, "learning_rate": 4.748201438848921e-06, "loss": 0.5723222494125366, "step": 132, "token_acc": 0.8089299461123941 }, { "epoch": 0.0071763880645335345, "grad_norm": 1.3393226861953735, "learning_rate": 4.784172661870504e-06, "loss": 0.5607361793518066, "step": 133, "token_acc": 0.813882790069245 }, { "epoch": 0.007230345869530028, "grad_norm": 1.5481456518173218, "learning_rate": 4.820143884892087e-06, "loss": 0.5593780875205994, "step": 134, "token_acc": 0.8218477279566657 }, { "epoch": 0.00728430367452652, "grad_norm": 1.4600348472595215, "learning_rate": 4.856115107913669e-06, "loss": 0.5480194091796875, "step": 135, "token_acc": 0.8237632508833922 }, { "epoch": 0.007338261479523013, "grad_norm": 1.131143569946289, "learning_rate": 4.892086330935253e-06, "loss": 0.5914497375488281, "step": 136, "token_acc": 0.8115195236459407 }, { "epoch": 0.007392219284519505, "grad_norm": 1.2420519590377808, "learning_rate": 4.928057553956835e-06, "loss": 0.5916146039962769, "step": 137, "token_acc": 0.8158595522240852 }, { "epoch": 0.007446177089515999, "grad_norm": 1.4112777709960938, "learning_rate": 4.9640287769784175e-06, "loss": 0.5778532028198242, "step": 138, "token_acc": 0.8098159509202454 }, { "epoch": 0.007500134894512491, "grad_norm": 1.456908941268921, "learning_rate": 5e-06, "loss": 0.6188313364982605, "step": 139, "token_acc": 0.8016241299303944 }, { "epoch": 0.007554092699508984, "grad_norm": 1.2272510528564453, "learning_rate": 5.035971223021583e-06, "loss": 0.5762611627578735, "step": 140, "token_acc": 0.8118158644474434 }, { "epoch": 0.007608050504505476, "grad_norm": 1.2193763256072998, "learning_rate": 5.071942446043165e-06, "loss": 0.5794034004211426, "step": 141, "token_acc": 0.8128188358404186 }, { "epoch": 0.00766200830950197, "grad_norm": 1.2935526371002197, "learning_rate": 5.107913669064749e-06, "loss": 0.6022123098373413, "step": 142, "token_acc": 0.8079357727342905 }, { "epoch": 0.007715966114498462, "grad_norm": 1.4786722660064697, "learning_rate": 5.143884892086332e-06, "loss": 0.5886569023132324, "step": 143, "token_acc": 0.8132609652339738 }, { "epoch": 0.007769923919494955, "grad_norm": 1.359052062034607, "learning_rate": 5.179856115107914e-06, "loss": 0.5567423701286316, "step": 144, "token_acc": 0.8231153206194803 }, { "epoch": 0.007823881724491447, "grad_norm": 1.3936436176300049, "learning_rate": 5.215827338129497e-06, "loss": 0.6248652935028076, "step": 145, "token_acc": 0.8042666666666667 }, { "epoch": 0.00787783952948794, "grad_norm": 2.015869140625, "learning_rate": 5.251798561151079e-06, "loss": 0.5790866613388062, "step": 146, "token_acc": 0.8187637969094923 }, { "epoch": 0.007931797334484432, "grad_norm": 1.7472883462905884, "learning_rate": 5.287769784172663e-06, "loss": 0.6093693971633911, "step": 147, "token_acc": 0.8150028200789622 }, { "epoch": 0.007985755139480926, "grad_norm": 1.458225965499878, "learning_rate": 5.3237410071942456e-06, "loss": 0.5817255973815918, "step": 148, "token_acc": 0.8076854652197117 }, { "epoch": 0.008039712944477419, "grad_norm": 1.2005058526992798, "learning_rate": 5.359712230215828e-06, "loss": 0.5292285680770874, "step": 149, "token_acc": 0.8276953029934291 }, { "epoch": 0.00809367074947391, "grad_norm": 1.7864611148834229, "learning_rate": 5.3956834532374105e-06, "loss": 0.6040433645248413, "step": 150, "token_acc": 0.804424778761062 }, { "epoch": 0.008147628554470404, "grad_norm": 1.459464192390442, "learning_rate": 5.4316546762589925e-06, "loss": 0.5614293217658997, "step": 151, "token_acc": 0.816836262719704 }, { "epoch": 0.008201586359466898, "grad_norm": 1.2390775680541992, "learning_rate": 5.467625899280576e-06, "loss": 0.5176115036010742, "step": 152, "token_acc": 0.825901247223646 }, { "epoch": 0.00825554416446339, "grad_norm": 1.4791905879974365, "learning_rate": 5.503597122302159e-06, "loss": 0.5815548300743103, "step": 153, "token_acc": 0.8143245479257771 }, { "epoch": 0.008309501969459883, "grad_norm": 1.773857593536377, "learning_rate": 5.539568345323741e-06, "loss": 0.5370432734489441, "step": 154, "token_acc": 0.8307420048034383 }, { "epoch": 0.008363459774456376, "grad_norm": 1.5791468620300293, "learning_rate": 5.575539568345324e-06, "loss": 0.5880804657936096, "step": 155, "token_acc": 0.8084303755945547 }, { "epoch": 0.008417417579452868, "grad_norm": 1.3936080932617188, "learning_rate": 5.611510791366906e-06, "loss": 0.5522786378860474, "step": 156, "token_acc": 0.8237782640408461 }, { "epoch": 0.008471375384449361, "grad_norm": 1.5635730028152466, "learning_rate": 5.64748201438849e-06, "loss": 0.6219768524169922, "step": 157, "token_acc": 0.7964661451308336 }, { "epoch": 0.008525333189445853, "grad_norm": 1.3928520679473877, "learning_rate": 5.683453237410073e-06, "loss": 0.5637731552124023, "step": 158, "token_acc": 0.8203550295857989 }, { "epoch": 0.008579290994442346, "grad_norm": 3.3942434787750244, "learning_rate": 5.719424460431655e-06, "loss": 0.6444180011749268, "step": 159, "token_acc": 0.7976927747419551 }, { "epoch": 0.00863324879943884, "grad_norm": 1.6234391927719116, "learning_rate": 5.755395683453238e-06, "loss": 0.6153305172920227, "step": 160, "token_acc": 0.8025864042114901 }, { "epoch": 0.008687206604435331, "grad_norm": 1.4380582571029663, "learning_rate": 5.79136690647482e-06, "loss": 0.5922869443893433, "step": 161, "token_acc": 0.8048302872062664 }, { "epoch": 0.008741164409431824, "grad_norm": 1.427549123764038, "learning_rate": 5.8273381294964035e-06, "loss": 0.5487173795700073, "step": 162, "token_acc": 0.821797054993489 }, { "epoch": 0.008795122214428318, "grad_norm": 1.1386022567749023, "learning_rate": 5.863309352517986e-06, "loss": 0.5269443392753601, "step": 163, "token_acc": 0.8339544513457557 }, { "epoch": 0.00884908001942481, "grad_norm": 1.1351927518844604, "learning_rate": 5.899280575539568e-06, "loss": 0.5337866544723511, "step": 164, "token_acc": 0.824900239425379 }, { "epoch": 0.008903037824421303, "grad_norm": 1.3702255487442017, "learning_rate": 5.935251798561151e-06, "loss": 0.6185094118118286, "step": 165, "token_acc": 0.805921052631579 }, { "epoch": 0.008956995629417795, "grad_norm": 1.6039118766784668, "learning_rate": 5.971223021582734e-06, "loss": 0.6094237565994263, "step": 166, "token_acc": 0.8037147804186998 }, { "epoch": 0.009010953434414288, "grad_norm": 1.565748929977417, "learning_rate": 6.007194244604317e-06, "loss": 0.5559983253479004, "step": 167, "token_acc": 0.8161263960773631 }, { "epoch": 0.009064911239410781, "grad_norm": 1.6362353563308716, "learning_rate": 6.0431654676259e-06, "loss": 0.4812791049480438, "step": 168, "token_acc": 0.8328879387137004 }, { "epoch": 0.009118869044407273, "grad_norm": 1.5873842239379883, "learning_rate": 6.079136690647483e-06, "loss": 0.551188051700592, "step": 169, "token_acc": 0.8193998604326588 }, { "epoch": 0.009172826849403766, "grad_norm": 1.6299686431884766, "learning_rate": 6.115107913669065e-06, "loss": 0.5628373622894287, "step": 170, "token_acc": 0.810686482661005 }, { "epoch": 0.00922678465440026, "grad_norm": 1.494086742401123, "learning_rate": 6.151079136690648e-06, "loss": 0.6132087707519531, "step": 171, "token_acc": 0.8085240130751823 }, { "epoch": 0.009280742459396751, "grad_norm": 1.4155868291854858, "learning_rate": 6.1870503597122315e-06, "loss": 0.5614097714424133, "step": 172, "token_acc": 0.8166361060176696 }, { "epoch": 0.009334700264393245, "grad_norm": 1.364546537399292, "learning_rate": 6.2230215827338136e-06, "loss": 0.5376574993133545, "step": 173, "token_acc": 0.8181197747824603 }, { "epoch": 0.009388658069389736, "grad_norm": 1.3126920461654663, "learning_rate": 6.2589928057553964e-06, "loss": 0.5877772569656372, "step": 174, "token_acc": 0.8159555667760666 }, { "epoch": 0.00944261587438623, "grad_norm": 1.5331337451934814, "learning_rate": 6.2949640287769785e-06, "loss": 0.5135956406593323, "step": 175, "token_acc": 0.8353692393114936 }, { "epoch": 0.009496573679382723, "grad_norm": 1.2885714769363403, "learning_rate": 6.330935251798561e-06, "loss": 0.5850123763084412, "step": 176, "token_acc": 0.8163054695562435 }, { "epoch": 0.009550531484379215, "grad_norm": 1.4561015367507935, "learning_rate": 6.366906474820145e-06, "loss": 0.5833641290664673, "step": 177, "token_acc": 0.8122660207002863 }, { "epoch": 0.009604489289375708, "grad_norm": 1.1907709836959839, "learning_rate": 6.402877697841727e-06, "loss": 0.5468986630439758, "step": 178, "token_acc": 0.8183443344334433 }, { "epoch": 0.009658447094372202, "grad_norm": 1.3680394887924194, "learning_rate": 6.43884892086331e-06, "loss": 0.5774180889129639, "step": 179, "token_acc": 0.8089056211865507 }, { "epoch": 0.009712404899368693, "grad_norm": 1.7034814357757568, "learning_rate": 6.474820143884892e-06, "loss": 0.5196416974067688, "step": 180, "token_acc": 0.8308823529411765 }, { "epoch": 0.009766362704365187, "grad_norm": 1.7952170372009277, "learning_rate": 6.510791366906475e-06, "loss": 0.5242214202880859, "step": 181, "token_acc": 0.8263921385119326 }, { "epoch": 0.00982032050936168, "grad_norm": 1.1497944593429565, "learning_rate": 6.546762589928059e-06, "loss": 0.5770757794380188, "step": 182, "token_acc": 0.8119877049180327 }, { "epoch": 0.009874278314358172, "grad_norm": 1.2546610832214355, "learning_rate": 6.582733812949641e-06, "loss": 0.556844174861908, "step": 183, "token_acc": 0.8158415841584158 }, { "epoch": 0.009928236119354665, "grad_norm": 1.4605984687805176, "learning_rate": 6.618705035971224e-06, "loss": 0.5850280523300171, "step": 184, "token_acc": 0.8154753131908622 }, { "epoch": 0.009982193924351157, "grad_norm": 1.2053560018539429, "learning_rate": 6.654676258992806e-06, "loss": 0.6265124678611755, "step": 185, "token_acc": 0.803834808259587 }, { "epoch": 0.01003615172934765, "grad_norm": 1.3696027994155884, "learning_rate": 6.6906474820143886e-06, "loss": 0.5244578123092651, "step": 186, "token_acc": 0.8302437188293968 }, { "epoch": 0.010090109534344143, "grad_norm": 1.4566774368286133, "learning_rate": 6.726618705035972e-06, "loss": 0.6005923748016357, "step": 187, "token_acc": 0.8023466964659869 }, { "epoch": 0.010144067339340635, "grad_norm": 1.4874038696289062, "learning_rate": 6.762589928057554e-06, "loss": 0.5634602308273315, "step": 188, "token_acc": 0.8186619718309859 }, { "epoch": 0.010198025144337129, "grad_norm": 1.4211543798446655, "learning_rate": 6.798561151079137e-06, "loss": 0.5291117429733276, "step": 189, "token_acc": 0.8313140726933831 }, { "epoch": 0.010251982949333622, "grad_norm": 1.5033941268920898, "learning_rate": 6.834532374100719e-06, "loss": 0.5923637747764587, "step": 190, "token_acc": 0.8085629921259843 }, { "epoch": 0.010305940754330114, "grad_norm": 1.584064245223999, "learning_rate": 6.870503597122302e-06, "loss": 0.50920170545578, "step": 191, "token_acc": 0.8319441752277573 }, { "epoch": 0.010359898559326607, "grad_norm": 1.433880090713501, "learning_rate": 6.906474820143886e-06, "loss": 0.538205623626709, "step": 192, "token_acc": 0.8240260702118205 }, { "epoch": 0.010413856364323099, "grad_norm": 1.4481308460235596, "learning_rate": 6.942446043165468e-06, "loss": 0.5727987289428711, "step": 193, "token_acc": 0.8131477642878294 }, { "epoch": 0.010467814169319592, "grad_norm": 1.3321053981781006, "learning_rate": 6.978417266187051e-06, "loss": 0.5017211437225342, "step": 194, "token_acc": 0.8386891909056269 }, { "epoch": 0.010521771974316085, "grad_norm": 1.1111910343170166, "learning_rate": 7.014388489208634e-06, "loss": 0.5506541728973389, "step": 195, "token_acc": 0.8208099536472311 }, { "epoch": 0.010575729779312577, "grad_norm": 1.618987798690796, "learning_rate": 7.050359712230216e-06, "loss": 0.5503823757171631, "step": 196, "token_acc": 0.819327731092437 }, { "epoch": 0.01062968758430907, "grad_norm": 1.1254278421401978, "learning_rate": 7.0863309352517995e-06, "loss": 0.48847052454948425, "step": 197, "token_acc": 0.839422822345777 }, { "epoch": 0.010683645389305564, "grad_norm": 1.3427214622497559, "learning_rate": 7.122302158273382e-06, "loss": 0.5199034214019775, "step": 198, "token_acc": 0.8301773968507076 }, { "epoch": 0.010737603194302055, "grad_norm": 1.250243067741394, "learning_rate": 7.1582733812949644e-06, "loss": 0.5820412635803223, "step": 199, "token_acc": 0.8115319536639334 }, { "epoch": 0.010791560999298549, "grad_norm": 1.5327783823013306, "learning_rate": 7.194244604316547e-06, "loss": 0.5487343072891235, "step": 200, "token_acc": 0.8204589245051673 }, { "epoch": 0.01084551880429504, "grad_norm": 1.5023794174194336, "learning_rate": 7.230215827338129e-06, "loss": 0.5744642019271851, "step": 201, "token_acc": 0.816370324954016 }, { "epoch": 0.010899476609291534, "grad_norm": 2.183767318725586, "learning_rate": 7.266187050359713e-06, "loss": 0.5940910577774048, "step": 202, "token_acc": 0.8144097015812626 }, { "epoch": 0.010953434414288027, "grad_norm": 1.1843457221984863, "learning_rate": 7.302158273381296e-06, "loss": 0.5625585317611694, "step": 203, "token_acc": 0.8200654307524536 }, { "epoch": 0.011007392219284519, "grad_norm": 1.4884339570999146, "learning_rate": 7.338129496402878e-06, "loss": 0.6144727468490601, "step": 204, "token_acc": 0.799217164395477 }, { "epoch": 0.011061350024281012, "grad_norm": 1.2859355211257935, "learning_rate": 7.374100719424461e-06, "loss": 0.4797850251197815, "step": 205, "token_acc": 0.8397154533070985 }, { "epoch": 0.011115307829277506, "grad_norm": 1.1762237548828125, "learning_rate": 7.410071942446043e-06, "loss": 0.6144033670425415, "step": 206, "token_acc": 0.8068070519098922 }, { "epoch": 0.011169265634273997, "grad_norm": 1.387130618095398, "learning_rate": 7.446043165467627e-06, "loss": 0.5452921390533447, "step": 207, "token_acc": 0.8240495137046862 }, { "epoch": 0.01122322343927049, "grad_norm": 1.1603225469589233, "learning_rate": 7.48201438848921e-06, "loss": 0.5066721439361572, "step": 208, "token_acc": 0.8330643301821535 }, { "epoch": 0.011277181244266984, "grad_norm": 1.9399724006652832, "learning_rate": 7.517985611510792e-06, "loss": 0.5975135564804077, "step": 209, "token_acc": 0.8064140695719643 }, { "epoch": 0.011331139049263476, "grad_norm": 1.5770552158355713, "learning_rate": 7.5539568345323745e-06, "loss": 0.5655868053436279, "step": 210, "token_acc": 0.8148436486311146 }, { "epoch": 0.011385096854259969, "grad_norm": 1.3734956979751587, "learning_rate": 7.589928057553958e-06, "loss": 0.5270896553993225, "step": 211, "token_acc": 0.8254764292878636 }, { "epoch": 0.01143905465925646, "grad_norm": 1.167925477027893, "learning_rate": 7.62589928057554e-06, "loss": 0.5139492154121399, "step": 212, "token_acc": 0.8314638514468847 }, { "epoch": 0.011493012464252954, "grad_norm": 1.169402003288269, "learning_rate": 7.661870503597123e-06, "loss": 0.49712133407592773, "step": 213, "token_acc": 0.8397164742197325 }, { "epoch": 0.011546970269249448, "grad_norm": 1.1664925813674927, "learning_rate": 7.697841726618706e-06, "loss": 0.5399752855300903, "step": 214, "token_acc": 0.8269456853985422 }, { "epoch": 0.01160092807424594, "grad_norm": 1.412387728691101, "learning_rate": 7.733812949640287e-06, "loss": 0.5237864255905151, "step": 215, "token_acc": 0.8271545498314877 }, { "epoch": 0.011654885879242433, "grad_norm": 1.6507902145385742, "learning_rate": 7.769784172661872e-06, "loss": 0.4982161819934845, "step": 216, "token_acc": 0.828222828704447 }, { "epoch": 0.011708843684238926, "grad_norm": 1.1067653894424438, "learning_rate": 7.805755395683455e-06, "loss": 0.6081855893135071, "step": 217, "token_acc": 0.8051862577333158 }, { "epoch": 0.011762801489235418, "grad_norm": 1.496937870979309, "learning_rate": 7.841726618705036e-06, "loss": 0.5641226768493652, "step": 218, "token_acc": 0.8113050706566916 }, { "epoch": 0.011816759294231911, "grad_norm": 1.558250069618225, "learning_rate": 7.877697841726619e-06, "loss": 0.5393586158752441, "step": 219, "token_acc": 0.8147421024451187 }, { "epoch": 0.011870717099228403, "grad_norm": 0.9192081689834595, "learning_rate": 7.913669064748202e-06, "loss": 0.591787576675415, "step": 220, "token_acc": 0.8086067522586781 }, { "epoch": 0.011924674904224896, "grad_norm": 1.1075080633163452, "learning_rate": 7.949640287769785e-06, "loss": 0.5435152649879456, "step": 221, "token_acc": 0.8244597905992426 }, { "epoch": 0.01197863270922139, "grad_norm": 1.4713610410690308, "learning_rate": 7.985611510791367e-06, "loss": 0.5606558322906494, "step": 222, "token_acc": 0.8149527737129174 }, { "epoch": 0.012032590514217881, "grad_norm": 1.4312161207199097, "learning_rate": 8.02158273381295e-06, "loss": 0.4793572723865509, "step": 223, "token_acc": 0.838752114264236 }, { "epoch": 0.012086548319214374, "grad_norm": 1.4824622869491577, "learning_rate": 8.057553956834533e-06, "loss": 0.5728539824485779, "step": 224, "token_acc": 0.818702915165411 }, { "epoch": 0.012140506124210868, "grad_norm": 1.4426733255386353, "learning_rate": 8.093525179856116e-06, "loss": 0.5586802363395691, "step": 225, "token_acc": 0.8199889563776919 }, { "epoch": 0.01219446392920736, "grad_norm": 1.4605071544647217, "learning_rate": 8.129496402877699e-06, "loss": 0.5074547529220581, "step": 226, "token_acc": 0.8271531100478469 }, { "epoch": 0.012248421734203853, "grad_norm": 1.7425353527069092, "learning_rate": 8.165467625899282e-06, "loss": 0.5183210372924805, "step": 227, "token_acc": 0.8298750294741806 }, { "epoch": 0.012302379539200345, "grad_norm": 0.968619704246521, "learning_rate": 8.201438848920865e-06, "loss": 0.5853953957557678, "step": 228, "token_acc": 0.8161625708884688 }, { "epoch": 0.012356337344196838, "grad_norm": 1.213275671005249, "learning_rate": 8.237410071942446e-06, "loss": 0.5708547830581665, "step": 229, "token_acc": 0.8156716417910448 }, { "epoch": 0.012410295149193331, "grad_norm": 1.3969135284423828, "learning_rate": 8.273381294964029e-06, "loss": 0.5408069491386414, "step": 230, "token_acc": 0.8248416050686378 }, { "epoch": 0.012464252954189823, "grad_norm": 1.3658289909362793, "learning_rate": 8.309352517985614e-06, "loss": 0.5517289638519287, "step": 231, "token_acc": 0.821817615935263 }, { "epoch": 0.012518210759186316, "grad_norm": 1.2031316757202148, "learning_rate": 8.345323741007195e-06, "loss": 0.5361014604568481, "step": 232, "token_acc": 0.8307713423003019 }, { "epoch": 0.01257216856418281, "grad_norm": 1.1817688941955566, "learning_rate": 8.381294964028778e-06, "loss": 0.5743882656097412, "step": 233, "token_acc": 0.8119775421085464 }, { "epoch": 0.012626126369179301, "grad_norm": 1.4020358324050903, "learning_rate": 8.41726618705036e-06, "loss": 0.5997282266616821, "step": 234, "token_acc": 0.8083632019115891 }, { "epoch": 0.012680084174175795, "grad_norm": 1.1467061042785645, "learning_rate": 8.453237410071943e-06, "loss": 0.5251526236534119, "step": 235, "token_acc": 0.8344356731384681 }, { "epoch": 0.012734041979172286, "grad_norm": 1.3630609512329102, "learning_rate": 8.489208633093526e-06, "loss": 0.46078455448150635, "step": 236, "token_acc": 0.847849712777519 }, { "epoch": 0.01278799978416878, "grad_norm": 1.2029227018356323, "learning_rate": 8.525179856115109e-06, "loss": 0.5617398023605347, "step": 237, "token_acc": 0.816350710900474 }, { "epoch": 0.012841957589165273, "grad_norm": 1.9656102657318115, "learning_rate": 8.561151079136692e-06, "loss": 0.5448175072669983, "step": 238, "token_acc": 0.8255125284738041 }, { "epoch": 0.012895915394161765, "grad_norm": 1.3362354040145874, "learning_rate": 8.597122302158273e-06, "loss": 0.5486705303192139, "step": 239, "token_acc": 0.8204646796526637 }, { "epoch": 0.012949873199158258, "grad_norm": 1.4289524555206299, "learning_rate": 8.633093525179856e-06, "loss": 0.4962988495826721, "step": 240, "token_acc": 0.8317315523700929 }, { "epoch": 0.013003831004154752, "grad_norm": 1.21310293674469, "learning_rate": 8.66906474820144e-06, "loss": 0.5219867825508118, "step": 241, "token_acc": 0.8269289417505337 }, { "epoch": 0.013057788809151243, "grad_norm": 1.0578254461288452, "learning_rate": 8.705035971223022e-06, "loss": 0.5605170726776123, "step": 242, "token_acc": 0.8173173894251549 }, { "epoch": 0.013111746614147737, "grad_norm": 1.4679162502288818, "learning_rate": 8.741007194244605e-06, "loss": 0.5598548650741577, "step": 243, "token_acc": 0.8124033006704486 }, { "epoch": 0.01316570441914423, "grad_norm": 1.1925508975982666, "learning_rate": 8.776978417266188e-06, "loss": 0.5617824792861938, "step": 244, "token_acc": 0.8188132799623263 }, { "epoch": 0.013219662224140722, "grad_norm": 1.3413671255111694, "learning_rate": 8.81294964028777e-06, "loss": 0.5327091217041016, "step": 245, "token_acc": 0.829817158931083 }, { "epoch": 0.013273620029137215, "grad_norm": 1.6636402606964111, "learning_rate": 8.848920863309353e-06, "loss": 0.4660763144493103, "step": 246, "token_acc": 0.8441913439635536 }, { "epoch": 0.013327577834133707, "grad_norm": 1.2907190322875977, "learning_rate": 8.884892086330936e-06, "loss": 0.5064112544059753, "step": 247, "token_acc": 0.8281992337164751 }, { "epoch": 0.0133815356391302, "grad_norm": 1.4607516527175903, "learning_rate": 8.92086330935252e-06, "loss": 0.5282444953918457, "step": 248, "token_acc": 0.8232275232845466 }, { "epoch": 0.013435493444126693, "grad_norm": 1.2288398742675781, "learning_rate": 8.956834532374102e-06, "loss": 0.5967646837234497, "step": 249, "token_acc": 0.8134425377015081 }, { "epoch": 0.013489451249123185, "grad_norm": 1.4080352783203125, "learning_rate": 8.992805755395683e-06, "loss": 0.5897728204727173, "step": 250, "token_acc": 0.8121212121212121 }, { "epoch": 0.013543409054119679, "grad_norm": 1.4970988035202026, "learning_rate": 9.028776978417268e-06, "loss": 0.6147792339324951, "step": 251, "token_acc": 0.8044328552803129 }, { "epoch": 0.013597366859116172, "grad_norm": 1.417802095413208, "learning_rate": 9.064748201438849e-06, "loss": 0.5408434867858887, "step": 252, "token_acc": 0.8198459790430501 }, { "epoch": 0.013651324664112664, "grad_norm": 1.257004737854004, "learning_rate": 9.100719424460432e-06, "loss": 0.5246085524559021, "step": 253, "token_acc": 0.8274244723410126 }, { "epoch": 0.013705282469109157, "grad_norm": 1.2039144039154053, "learning_rate": 9.136690647482015e-06, "loss": 0.5827130079269409, "step": 254, "token_acc": 0.8183173853610041 }, { "epoch": 0.013759240274105649, "grad_norm": 1.2457842826843262, "learning_rate": 9.172661870503598e-06, "loss": 0.5179134607315063, "step": 255, "token_acc": 0.826745298055467 }, { "epoch": 0.013813198079102142, "grad_norm": 1.275498390197754, "learning_rate": 9.20863309352518e-06, "loss": 0.5206499099731445, "step": 256, "token_acc": 0.8275733927907841 }, { "epoch": 0.013867155884098635, "grad_norm": 1.0736984014511108, "learning_rate": 9.244604316546764e-06, "loss": 0.46201813220977783, "step": 257, "token_acc": 0.8450380756678352 }, { "epoch": 0.013921113689095127, "grad_norm": 0.9860892295837402, "learning_rate": 9.280575539568346e-06, "loss": 0.574927568435669, "step": 258, "token_acc": 0.8163969538315088 }, { "epoch": 0.01397507149409162, "grad_norm": 1.4102715253829956, "learning_rate": 9.31654676258993e-06, "loss": 0.5427153706550598, "step": 259, "token_acc": 0.8182720953326713 }, { "epoch": 0.014029029299088114, "grad_norm": 1.4117820262908936, "learning_rate": 9.35251798561151e-06, "loss": 0.552310585975647, "step": 260, "token_acc": 0.8224057821373257 }, { "epoch": 0.014082987104084605, "grad_norm": 1.6471307277679443, "learning_rate": 9.388489208633095e-06, "loss": 0.5717693567276001, "step": 261, "token_acc": 0.8112863070539419 }, { "epoch": 0.014136944909081099, "grad_norm": 1.3561811447143555, "learning_rate": 9.424460431654678e-06, "loss": 0.5174682140350342, "step": 262, "token_acc": 0.8270796742775028 }, { "epoch": 0.01419090271407759, "grad_norm": 1.1884409189224243, "learning_rate": 9.46043165467626e-06, "loss": 0.5148372054100037, "step": 263, "token_acc": 0.8324911958352473 }, { "epoch": 0.014244860519074084, "grad_norm": 1.489378571510315, "learning_rate": 9.496402877697842e-06, "loss": 0.5327551960945129, "step": 264, "token_acc": 0.8204463929852531 }, { "epoch": 0.014298818324070577, "grad_norm": 1.7015371322631836, "learning_rate": 9.532374100719425e-06, "loss": 0.5241466164588928, "step": 265, "token_acc": 0.8238538435022406 }, { "epoch": 0.014352776129067069, "grad_norm": 1.276236653327942, "learning_rate": 9.568345323741008e-06, "loss": 0.5004644393920898, "step": 266, "token_acc": 0.831284046692607 }, { "epoch": 0.014406733934063562, "grad_norm": 1.6502151489257812, "learning_rate": 9.60431654676259e-06, "loss": 0.528168261051178, "step": 267, "token_acc": 0.8245519713261649 }, { "epoch": 0.014460691739060056, "grad_norm": 1.2154854536056519, "learning_rate": 9.640287769784174e-06, "loss": 0.5194615721702576, "step": 268, "token_acc": 0.8269257460097155 }, { "epoch": 0.014514649544056547, "grad_norm": 1.2648961544036865, "learning_rate": 9.676258992805757e-06, "loss": 0.5412278771400452, "step": 269, "token_acc": 0.8188423953896267 }, { "epoch": 0.01456860734905304, "grad_norm": 1.5334676504135132, "learning_rate": 9.712230215827338e-06, "loss": 0.5228724479675293, "step": 270, "token_acc": 0.8295264623955432 }, { "epoch": 0.014622565154049534, "grad_norm": 1.2653883695602417, "learning_rate": 9.748201438848922e-06, "loss": 0.519445538520813, "step": 271, "token_acc": 0.8349468713105077 }, { "epoch": 0.014676522959046026, "grad_norm": 1.3473005294799805, "learning_rate": 9.784172661870505e-06, "loss": 0.5187214612960815, "step": 272, "token_acc": 0.826166236134326 }, { "epoch": 0.01473048076404252, "grad_norm": 1.2597273588180542, "learning_rate": 9.820143884892086e-06, "loss": 0.5356582403182983, "step": 273, "token_acc": 0.8272655840754322 }, { "epoch": 0.01478443856903901, "grad_norm": 1.2643295526504517, "learning_rate": 9.85611510791367e-06, "loss": 0.49263471364974976, "step": 274, "token_acc": 0.8396264176117412 }, { "epoch": 0.014838396374035504, "grad_norm": 1.1977806091308594, "learning_rate": 9.892086330935252e-06, "loss": 0.505371630191803, "step": 275, "token_acc": 0.8339426321709786 }, { "epoch": 0.014892354179031998, "grad_norm": 1.4940130710601807, "learning_rate": 9.928057553956835e-06, "loss": 0.49428877234458923, "step": 276, "token_acc": 0.8379921851517884 }, { "epoch": 0.01494631198402849, "grad_norm": 1.3883461952209473, "learning_rate": 9.964028776978418e-06, "loss": 0.5257331728935242, "step": 277, "token_acc": 0.8206884123265453 }, { "epoch": 0.015000269789024983, "grad_norm": 1.3477522134780884, "learning_rate": 1e-05, "loss": 0.5926815271377563, "step": 278, "token_acc": 0.8090286917609897 }, { "epoch": 0.015054227594021476, "grad_norm": 1.4804612398147583, "learning_rate": 1.0035971223021584e-05, "loss": 0.5661147832870483, "step": 279, "token_acc": 0.8155855992708492 }, { "epoch": 0.015108185399017968, "grad_norm": 1.3477219343185425, "learning_rate": 1.0071942446043167e-05, "loss": 0.6001946926116943, "step": 280, "token_acc": 0.8047512991833704 }, { "epoch": 0.015162143204014461, "grad_norm": 1.1558517217636108, "learning_rate": 1.0107913669064748e-05, "loss": 0.49870720505714417, "step": 281, "token_acc": 0.8352014010507881 }, { "epoch": 0.015216101009010953, "grad_norm": 1.310664176940918, "learning_rate": 1.014388489208633e-05, "loss": 0.5786013603210449, "step": 282, "token_acc": 0.8168379304128358 }, { "epoch": 0.015270058814007446, "grad_norm": 1.1184654235839844, "learning_rate": 1.0179856115107915e-05, "loss": 0.5648998022079468, "step": 283, "token_acc": 0.8149365367180417 }, { "epoch": 0.01532401661900394, "grad_norm": 1.0046764612197876, "learning_rate": 1.0215827338129498e-05, "loss": 0.5153173804283142, "step": 284, "token_acc": 0.8307441929279479 }, { "epoch": 0.015377974424000431, "grad_norm": 1.2191156148910522, "learning_rate": 1.0251798561151081e-05, "loss": 0.5469737648963928, "step": 285, "token_acc": 0.821551132463967 }, { "epoch": 0.015431932228996924, "grad_norm": 1.2196309566497803, "learning_rate": 1.0287769784172664e-05, "loss": 0.4838353991508484, "step": 286, "token_acc": 0.8413699096950077 }, { "epoch": 0.015485890033993418, "grad_norm": 1.0146191120147705, "learning_rate": 1.0323741007194245e-05, "loss": 0.5417523384094238, "step": 287, "token_acc": 0.8240950981701616 }, { "epoch": 0.01553984783898991, "grad_norm": 1.4577950239181519, "learning_rate": 1.0359712230215828e-05, "loss": 0.5224257707595825, "step": 288, "token_acc": 0.8254104211277659 }, { "epoch": 0.015593805643986403, "grad_norm": 1.3644293546676636, "learning_rate": 1.0395683453237411e-05, "loss": 0.5678976774215698, "step": 289, "token_acc": 0.8178240446870636 }, { "epoch": 0.015647763448982895, "grad_norm": 1.2609081268310547, "learning_rate": 1.0431654676258994e-05, "loss": 0.499003529548645, "step": 290, "token_acc": 0.8401200975792832 }, { "epoch": 0.015701721253979388, "grad_norm": 1.2894251346588135, "learning_rate": 1.0467625899280575e-05, "loss": 0.5421448349952698, "step": 291, "token_acc": 0.8211586901763224 }, { "epoch": 0.01575567905897588, "grad_norm": 1.127923846244812, "learning_rate": 1.0503597122302158e-05, "loss": 0.5771473050117493, "step": 292, "token_acc": 0.8188334504567815 }, { "epoch": 0.015809636863972375, "grad_norm": 1.0453364849090576, "learning_rate": 1.0539568345323742e-05, "loss": 0.5432586073875427, "step": 293, "token_acc": 0.8238751978295275 }, { "epoch": 0.015863594668968865, "grad_norm": 1.460123062133789, "learning_rate": 1.0575539568345325e-05, "loss": 0.6022973656654358, "step": 294, "token_acc": 0.8020778537611783 }, { "epoch": 0.015917552473965358, "grad_norm": 1.3758962154388428, "learning_rate": 1.0611510791366908e-05, "loss": 0.5155462622642517, "step": 295, "token_acc": 0.8329034885299308 }, { "epoch": 0.01597151027896185, "grad_norm": 1.585156798362732, "learning_rate": 1.0647482014388491e-05, "loss": 0.5576644539833069, "step": 296, "token_acc": 0.8155677655677656 }, { "epoch": 0.016025468083958345, "grad_norm": 1.2460201978683472, "learning_rate": 1.0683453237410072e-05, "loss": 0.5207813382148743, "step": 297, "token_acc": 0.8324968632371392 }, { "epoch": 0.016079425888954838, "grad_norm": 1.2175770998001099, "learning_rate": 1.0719424460431655e-05, "loss": 0.5617212057113647, "step": 298, "token_acc": 0.8191860465116279 }, { "epoch": 0.01613338369395133, "grad_norm": 1.3517428636550903, "learning_rate": 1.0755395683453238e-05, "loss": 0.5496206283569336, "step": 299, "token_acc": 0.8204001212488633 }, { "epoch": 0.01618734149894782, "grad_norm": 1.4725315570831299, "learning_rate": 1.0791366906474821e-05, "loss": 0.5492569208145142, "step": 300, "token_acc": 0.8246840148698885 }, { "epoch": 0.016241299303944315, "grad_norm": 1.0894795656204224, "learning_rate": 1.0827338129496404e-05, "loss": 0.5560125112533569, "step": 301, "token_acc": 0.8238284029683847 }, { "epoch": 0.016295257108940808, "grad_norm": 1.1109191179275513, "learning_rate": 1.0863309352517985e-05, "loss": 0.5254504680633545, "step": 302, "token_acc": 0.8272417153996101 }, { "epoch": 0.0163492149139373, "grad_norm": 1.3432645797729492, "learning_rate": 1.089928057553957e-05, "loss": 0.5778969526290894, "step": 303, "token_acc": 0.8148032965324211 }, { "epoch": 0.016403172718933795, "grad_norm": 1.6871803998947144, "learning_rate": 1.0935251798561153e-05, "loss": 0.6555008888244629, "step": 304, "token_acc": 0.7945783132530121 }, { "epoch": 0.016457130523930285, "grad_norm": 1.3154829740524292, "learning_rate": 1.0971223021582735e-05, "loss": 0.520861029624939, "step": 305, "token_acc": 0.8215506386040655 }, { "epoch": 0.01651108832892678, "grad_norm": 1.4023215770721436, "learning_rate": 1.1007194244604318e-05, "loss": 0.5023452043533325, "step": 306, "token_acc": 0.8352991944764097 }, { "epoch": 0.01656504613392327, "grad_norm": 1.6083221435546875, "learning_rate": 1.10431654676259e-05, "loss": 0.5097149610519409, "step": 307, "token_acc": 0.8326555204097003 }, { "epoch": 0.016619003938919765, "grad_norm": 1.4152100086212158, "learning_rate": 1.1079136690647482e-05, "loss": 0.516756534576416, "step": 308, "token_acc": 0.8290891283055828 }, { "epoch": 0.01667296174391626, "grad_norm": 1.556890606880188, "learning_rate": 1.1115107913669065e-05, "loss": 0.4910639226436615, "step": 309, "token_acc": 0.8350780169859767 }, { "epoch": 0.016726919548912752, "grad_norm": 1.3186527490615845, "learning_rate": 1.1151079136690648e-05, "loss": 0.5856456160545349, "step": 310, "token_acc": 0.8066315463278587 }, { "epoch": 0.016780877353909242, "grad_norm": 1.0377920866012573, "learning_rate": 1.1187050359712231e-05, "loss": 0.588910698890686, "step": 311, "token_acc": 0.8127238859435449 }, { "epoch": 0.016834835158905735, "grad_norm": 1.1053881645202637, "learning_rate": 1.1223021582733812e-05, "loss": 0.5118277668952942, "step": 312, "token_acc": 0.8272647095880056 }, { "epoch": 0.01688879296390223, "grad_norm": 1.5392589569091797, "learning_rate": 1.1258992805755397e-05, "loss": 0.5128140449523926, "step": 313, "token_acc": 0.8303443897794001 }, { "epoch": 0.016942750768898722, "grad_norm": 1.0312684774398804, "learning_rate": 1.129496402877698e-05, "loss": 0.5948011875152588, "step": 314, "token_acc": 0.8088235294117647 }, { "epoch": 0.016996708573895215, "grad_norm": 1.29421067237854, "learning_rate": 1.1330935251798563e-05, "loss": 0.568129301071167, "step": 315, "token_acc": 0.8184759166067578 }, { "epoch": 0.017050666378891705, "grad_norm": 1.247013807296753, "learning_rate": 1.1366906474820146e-05, "loss": 0.5317374467849731, "step": 316, "token_acc": 0.8245799909187226 }, { "epoch": 0.0171046241838882, "grad_norm": 1.0546456575393677, "learning_rate": 1.1402877697841728e-05, "loss": 0.5182291269302368, "step": 317, "token_acc": 0.8278099972383319 }, { "epoch": 0.017158581988884692, "grad_norm": 1.058121919631958, "learning_rate": 1.143884892086331e-05, "loss": 0.5304502248764038, "step": 318, "token_acc": 0.8249143835616438 }, { "epoch": 0.017212539793881185, "grad_norm": 1.2912797927856445, "learning_rate": 1.1474820143884892e-05, "loss": 0.5256776809692383, "step": 319, "token_acc": 0.8222701626286094 }, { "epoch": 0.01726649759887768, "grad_norm": 1.1466268301010132, "learning_rate": 1.1510791366906475e-05, "loss": 0.5378888845443726, "step": 320, "token_acc": 0.8179936305732484 }, { "epoch": 0.01732045540387417, "grad_norm": 1.3217097520828247, "learning_rate": 1.1546762589928058e-05, "loss": 0.5468847751617432, "step": 321, "token_acc": 0.8223007712082262 }, { "epoch": 0.017374413208870662, "grad_norm": 1.2679111957550049, "learning_rate": 1.158273381294964e-05, "loss": 0.49364379048347473, "step": 322, "token_acc": 0.831918505942275 }, { "epoch": 0.017428371013867155, "grad_norm": 1.5034546852111816, "learning_rate": 1.1618705035971226e-05, "loss": 0.5626762509346008, "step": 323, "token_acc": 0.8141116463587419 }, { "epoch": 0.01748232881886365, "grad_norm": 1.4652575254440308, "learning_rate": 1.1654676258992807e-05, "loss": 0.5902973413467407, "step": 324, "token_acc": 0.8040195299193823 }, { "epoch": 0.017536286623860142, "grad_norm": 1.3117687702178955, "learning_rate": 1.169064748201439e-05, "loss": 0.5653620958328247, "step": 325, "token_acc": 0.8161730136342266 }, { "epoch": 0.017590244428856636, "grad_norm": 1.7523736953735352, "learning_rate": 1.1726618705035973e-05, "loss": 0.5384581089019775, "step": 326, "token_acc": 0.8273153575615475 }, { "epoch": 0.017644202233853126, "grad_norm": 1.5196330547332764, "learning_rate": 1.1762589928057556e-05, "loss": 0.6124386787414551, "step": 327, "token_acc": 0.8008923591745678 }, { "epoch": 0.01769816003884962, "grad_norm": 1.4030938148498535, "learning_rate": 1.1798561151079137e-05, "loss": 0.5663633346557617, "step": 328, "token_acc": 0.8118727674774622 }, { "epoch": 0.017752117843846112, "grad_norm": 1.1722601652145386, "learning_rate": 1.183453237410072e-05, "loss": 0.5476710796356201, "step": 329, "token_acc": 0.8224274749577208 }, { "epoch": 0.017806075648842606, "grad_norm": 1.2381778955459595, "learning_rate": 1.1870503597122303e-05, "loss": 0.5714975595474243, "step": 330, "token_acc": 0.8106441872169099 }, { "epoch": 0.0178600334538391, "grad_norm": 1.2923901081085205, "learning_rate": 1.1906474820143885e-05, "loss": 0.535051167011261, "step": 331, "token_acc": 0.8251593239124411 }, { "epoch": 0.01791399125883559, "grad_norm": 1.291616439819336, "learning_rate": 1.1942446043165468e-05, "loss": 0.5122610330581665, "step": 332, "token_acc": 0.8355974842767295 }, { "epoch": 0.017967949063832082, "grad_norm": 1.0838139057159424, "learning_rate": 1.1978417266187053e-05, "loss": 0.44179898500442505, "step": 333, "token_acc": 0.8486851990984222 }, { "epoch": 0.018021906868828576, "grad_norm": 1.34857976436615, "learning_rate": 1.2014388489208634e-05, "loss": 0.5674254894256592, "step": 334, "token_acc": 0.8222258432458857 }, { "epoch": 0.01807586467382507, "grad_norm": 1.1316167116165161, "learning_rate": 1.2050359712230217e-05, "loss": 0.5013183355331421, "step": 335, "token_acc": 0.8334910122989593 }, { "epoch": 0.018129822478821563, "grad_norm": 1.387528657913208, "learning_rate": 1.20863309352518e-05, "loss": 0.540648877620697, "step": 336, "token_acc": 0.8224160875351175 }, { "epoch": 0.018183780283818056, "grad_norm": 0.9600984454154968, "learning_rate": 1.2122302158273383e-05, "loss": 0.5474786162376404, "step": 337, "token_acc": 0.8243243243243243 }, { "epoch": 0.018237738088814546, "grad_norm": 1.227288007736206, "learning_rate": 1.2158273381294966e-05, "loss": 0.5724318027496338, "step": 338, "token_acc": 0.8148902466621407 }, { "epoch": 0.01829169589381104, "grad_norm": 1.1091607809066772, "learning_rate": 1.2194244604316547e-05, "loss": 0.5529879331588745, "step": 339, "token_acc": 0.8249716553287982 }, { "epoch": 0.018345653698807533, "grad_norm": 1.3561499118804932, "learning_rate": 1.223021582733813e-05, "loss": 0.5583359003067017, "step": 340, "token_acc": 0.8237569060773481 }, { "epoch": 0.018399611503804026, "grad_norm": 1.390852928161621, "learning_rate": 1.2266187050359713e-05, "loss": 0.49096202850341797, "step": 341, "token_acc": 0.8373196659073652 }, { "epoch": 0.01845356930880052, "grad_norm": 1.0091553926467896, "learning_rate": 1.2302158273381296e-05, "loss": 0.5415232181549072, "step": 342, "token_acc": 0.8211284513805522 }, { "epoch": 0.01850752711379701, "grad_norm": 1.1627169847488403, "learning_rate": 1.233812949640288e-05, "loss": 0.5270614624023438, "step": 343, "token_acc": 0.8291386587462873 }, { "epoch": 0.018561484918793503, "grad_norm": 1.3527566194534302, "learning_rate": 1.2374100719424463e-05, "loss": 0.5661444664001465, "step": 344, "token_acc": 0.8182393119150012 }, { "epoch": 0.018615442723789996, "grad_norm": 1.3343676328659058, "learning_rate": 1.2410071942446044e-05, "loss": 0.521546483039856, "step": 345, "token_acc": 0.8321852557526015 }, { "epoch": 0.01866940052878649, "grad_norm": 1.2381596565246582, "learning_rate": 1.2446043165467627e-05, "loss": 0.5247669219970703, "step": 346, "token_acc": 0.8264859522801073 }, { "epoch": 0.018723358333782983, "grad_norm": 0.9437515735626221, "learning_rate": 1.248201438848921e-05, "loss": 0.513481080532074, "step": 347, "token_acc": 0.831741810709537 }, { "epoch": 0.018777316138779473, "grad_norm": 1.2151490449905396, "learning_rate": 1.2517985611510793e-05, "loss": 0.5316791534423828, "step": 348, "token_acc": 0.8278852568378919 }, { "epoch": 0.018831273943775966, "grad_norm": 1.0789177417755127, "learning_rate": 1.2553956834532374e-05, "loss": 0.5461699962615967, "step": 349, "token_acc": 0.8292996609376827 }, { "epoch": 0.01888523174877246, "grad_norm": 1.073470950126648, "learning_rate": 1.2589928057553957e-05, "loss": 0.5279503464698792, "step": 350, "token_acc": 0.8299023644277144 }, { "epoch": 0.018939189553768953, "grad_norm": 1.5721272230148315, "learning_rate": 1.262589928057554e-05, "loss": 0.4995824098587036, "step": 351, "token_acc": 0.8271349862258953 }, { "epoch": 0.018993147358765446, "grad_norm": 1.4637259244918823, "learning_rate": 1.2661870503597123e-05, "loss": 0.5529473423957825, "step": 352, "token_acc": 0.8192177827799663 }, { "epoch": 0.01904710516376194, "grad_norm": 1.3244760036468506, "learning_rate": 1.2697841726618707e-05, "loss": 0.5344829559326172, "step": 353, "token_acc": 0.8279662848497947 }, { "epoch": 0.01910106296875843, "grad_norm": 1.5662708282470703, "learning_rate": 1.273381294964029e-05, "loss": 0.47803181409835815, "step": 354, "token_acc": 0.8339507517977773 }, { "epoch": 0.019155020773754923, "grad_norm": 1.1791419982910156, "learning_rate": 1.2769784172661871e-05, "loss": 0.5436446070671082, "step": 355, "token_acc": 0.825046904315197 }, { "epoch": 0.019208978578751416, "grad_norm": 1.0841751098632812, "learning_rate": 1.2805755395683454e-05, "loss": 0.5340561866760254, "step": 356, "token_acc": 0.82640930589288 }, { "epoch": 0.01926293638374791, "grad_norm": 1.2116931676864624, "learning_rate": 1.2841726618705037e-05, "loss": 0.5964912176132202, "step": 357, "token_acc": 0.807230869001297 }, { "epoch": 0.019316894188744403, "grad_norm": 1.3481354713439941, "learning_rate": 1.287769784172662e-05, "loss": 0.589316725730896, "step": 358, "token_acc": 0.8055087127599775 }, { "epoch": 0.019370851993740893, "grad_norm": 1.1065045595169067, "learning_rate": 1.2913669064748203e-05, "loss": 0.532364010810852, "step": 359, "token_acc": 0.8202979515828678 }, { "epoch": 0.019424809798737386, "grad_norm": 0.9596236944198608, "learning_rate": 1.2949640287769784e-05, "loss": 0.4745810031890869, "step": 360, "token_acc": 0.8366369342583416 }, { "epoch": 0.01947876760373388, "grad_norm": 1.0617213249206543, "learning_rate": 1.2985611510791367e-05, "loss": 0.4935448169708252, "step": 361, "token_acc": 0.8387862137862138 }, { "epoch": 0.019532725408730373, "grad_norm": 1.1742497682571411, "learning_rate": 1.302158273381295e-05, "loss": 0.6182594895362854, "step": 362, "token_acc": 0.8038420236248658 }, { "epoch": 0.019586683213726867, "grad_norm": 1.3385400772094727, "learning_rate": 1.3057553956834535e-05, "loss": 0.5356704592704773, "step": 363, "token_acc": 0.8266622922134733 }, { "epoch": 0.01964064101872336, "grad_norm": 1.1634570360183716, "learning_rate": 1.3093525179856117e-05, "loss": 0.52483069896698, "step": 364, "token_acc": 0.8245835083298334 }, { "epoch": 0.01969459882371985, "grad_norm": 1.0087361335754395, "learning_rate": 1.3129496402877699e-05, "loss": 0.6113921999931335, "step": 365, "token_acc": 0.8088473520249221 }, { "epoch": 0.019748556628716343, "grad_norm": 1.110791802406311, "learning_rate": 1.3165467625899282e-05, "loss": 0.5387722253799438, "step": 366, "token_acc": 0.8186990619713979 }, { "epoch": 0.019802514433712837, "grad_norm": 1.1817865371704102, "learning_rate": 1.3201438848920864e-05, "loss": 0.5166906118392944, "step": 367, "token_acc": 0.8283756197142024 }, { "epoch": 0.01985647223870933, "grad_norm": 1.3033726215362549, "learning_rate": 1.3237410071942447e-05, "loss": 0.5662504434585571, "step": 368, "token_acc": 0.8098553829673273 }, { "epoch": 0.019910430043705823, "grad_norm": 1.257799506187439, "learning_rate": 1.327338129496403e-05, "loss": 0.5472406148910522, "step": 369, "token_acc": 0.8184026723352567 }, { "epoch": 0.019964387848702313, "grad_norm": 1.2446181774139404, "learning_rate": 1.3309352517985611e-05, "loss": 0.5988069772720337, "step": 370, "token_acc": 0.8050455291834602 }, { "epoch": 0.020018345653698807, "grad_norm": 1.2253957986831665, "learning_rate": 1.3345323741007194e-05, "loss": 0.5573258399963379, "step": 371, "token_acc": 0.8213641284734753 }, { "epoch": 0.0200723034586953, "grad_norm": 1.1944983005523682, "learning_rate": 1.3381294964028777e-05, "loss": 0.597618818283081, "step": 372, "token_acc": 0.8034976590470945 }, { "epoch": 0.020126261263691794, "grad_norm": 0.7596373558044434, "learning_rate": 1.3417266187050362e-05, "loss": 0.45897018909454346, "step": 373, "token_acc": 0.8462014863748968 }, { "epoch": 0.020180219068688287, "grad_norm": 0.9728931784629822, "learning_rate": 1.3453237410071945e-05, "loss": 0.5556442737579346, "step": 374, "token_acc": 0.8176958032611601 }, { "epoch": 0.020234176873684777, "grad_norm": 0.8929346203804016, "learning_rate": 1.3489208633093528e-05, "loss": 0.5703210830688477, "step": 375, "token_acc": 0.813980492336275 }, { "epoch": 0.02028813467868127, "grad_norm": 1.1838762760162354, "learning_rate": 1.3525179856115109e-05, "loss": 0.5207263231277466, "step": 376, "token_acc": 0.8326189691174855 }, { "epoch": 0.020342092483677764, "grad_norm": 1.3889048099517822, "learning_rate": 1.3561151079136692e-05, "loss": 0.5876622796058655, "step": 377, "token_acc": 0.8108472746896924 }, { "epoch": 0.020396050288674257, "grad_norm": 0.944464385509491, "learning_rate": 1.3597122302158274e-05, "loss": 0.4837217330932617, "step": 378, "token_acc": 0.8366037735849057 }, { "epoch": 0.02045000809367075, "grad_norm": 1.1571372747421265, "learning_rate": 1.3633093525179857e-05, "loss": 0.5271028280258179, "step": 379, "token_acc": 0.8248233782915864 }, { "epoch": 0.020503965898667244, "grad_norm": 1.0738325119018555, "learning_rate": 1.3669064748201439e-05, "loss": 0.531367838382721, "step": 380, "token_acc": 0.8261738261738262 }, { "epoch": 0.020557923703663734, "grad_norm": 0.9706140160560608, "learning_rate": 1.3705035971223021e-05, "loss": 0.5510954260826111, "step": 381, "token_acc": 0.8234851336093338 }, { "epoch": 0.020611881508660227, "grad_norm": 1.0574803352355957, "learning_rate": 1.3741007194244604e-05, "loss": 0.5830454230308533, "step": 382, "token_acc": 0.8147404357867256 }, { "epoch": 0.02066583931365672, "grad_norm": 1.0596041679382324, "learning_rate": 1.3776978417266189e-05, "loss": 0.5864371061325073, "step": 383, "token_acc": 0.8055937643282898 }, { "epoch": 0.020719797118653214, "grad_norm": 1.0957857370376587, "learning_rate": 1.3812949640287772e-05, "loss": 0.5086735486984253, "step": 384, "token_acc": 0.8334701696770662 }, { "epoch": 0.020773754923649707, "grad_norm": 0.9738159775733948, "learning_rate": 1.3848920863309355e-05, "loss": 0.4789119362831116, "step": 385, "token_acc": 0.84109714986209 }, { "epoch": 0.020827712728646197, "grad_norm": 1.2852336168289185, "learning_rate": 1.3884892086330936e-05, "loss": 0.5644443035125732, "step": 386, "token_acc": 0.8212900096993211 }, { "epoch": 0.02088167053364269, "grad_norm": 1.0116539001464844, "learning_rate": 1.3920863309352519e-05, "loss": 0.5307828783988953, "step": 387, "token_acc": 0.8249235474006116 }, { "epoch": 0.020935628338639184, "grad_norm": 1.2473320960998535, "learning_rate": 1.3956834532374102e-05, "loss": 0.5386290550231934, "step": 388, "token_acc": 0.8226105784101454 }, { "epoch": 0.020989586143635677, "grad_norm": 1.2634432315826416, "learning_rate": 1.3992805755395685e-05, "loss": 0.534481406211853, "step": 389, "token_acc": 0.8235665771600064 }, { "epoch": 0.02104354394863217, "grad_norm": 1.1550794839859009, "learning_rate": 1.4028776978417267e-05, "loss": 0.5967423319816589, "step": 390, "token_acc": 0.7987263033175356 }, { "epoch": 0.021097501753628664, "grad_norm": 1.151774287223816, "learning_rate": 1.4064748201438849e-05, "loss": 0.5155565738677979, "step": 391, "token_acc": 0.8270967741935484 }, { "epoch": 0.021151459558625154, "grad_norm": 0.9588260650634766, "learning_rate": 1.4100719424460432e-05, "loss": 0.5323038101196289, "step": 392, "token_acc": 0.824447137231142 }, { "epoch": 0.021205417363621647, "grad_norm": 1.1415965557098389, "learning_rate": 1.4136690647482016e-05, "loss": 0.5103614330291748, "step": 393, "token_acc": 0.8287367146508812 }, { "epoch": 0.02125937516861814, "grad_norm": 1.2802249193191528, "learning_rate": 1.4172661870503599e-05, "loss": 0.5155404806137085, "step": 394, "token_acc": 0.8286110255620572 }, { "epoch": 0.021313332973614634, "grad_norm": 1.1749275922775269, "learning_rate": 1.4208633093525182e-05, "loss": 0.5332057476043701, "step": 395, "token_acc": 0.8204670775218943 }, { "epoch": 0.021367290778611128, "grad_norm": 0.8265388607978821, "learning_rate": 1.4244604316546765e-05, "loss": 0.6035418510437012, "step": 396, "token_acc": 0.8063772048846676 }, { "epoch": 0.021421248583607617, "grad_norm": 1.057482361793518, "learning_rate": 1.4280575539568346e-05, "loss": 0.5252602100372314, "step": 397, "token_acc": 0.8271642704190884 }, { "epoch": 0.02147520638860411, "grad_norm": 0.9712656736373901, "learning_rate": 1.4316546762589929e-05, "loss": 0.5459039211273193, "step": 398, "token_acc": 0.8273414776032577 }, { "epoch": 0.021529164193600604, "grad_norm": 1.105743646621704, "learning_rate": 1.4352517985611512e-05, "loss": 0.5170553922653198, "step": 399, "token_acc": 0.8303924150645247 }, { "epoch": 0.021583121998597098, "grad_norm": 1.0731794834136963, "learning_rate": 1.4388489208633095e-05, "loss": 0.5370499491691589, "step": 400, "token_acc": 0.8258085174511688 }, { "epoch": 0.02163707980359359, "grad_norm": 1.2060279846191406, "learning_rate": 1.4424460431654676e-05, "loss": 0.49507594108581543, "step": 401, "token_acc": 0.8385655638734605 }, { "epoch": 0.02169103760859008, "grad_norm": 1.149664044380188, "learning_rate": 1.4460431654676259e-05, "loss": 0.5518022775650024, "step": 402, "token_acc": 0.8226886195042519 }, { "epoch": 0.021744995413586574, "grad_norm": 1.4172345399856567, "learning_rate": 1.4496402877697843e-05, "loss": 0.45899420976638794, "step": 403, "token_acc": 0.8465405793631793 }, { "epoch": 0.021798953218583068, "grad_norm": 1.172518014907837, "learning_rate": 1.4532374100719426e-05, "loss": 0.5642247200012207, "step": 404, "token_acc": 0.813317064667945 }, { "epoch": 0.02185291102357956, "grad_norm": 1.1362918615341187, "learning_rate": 1.4568345323741009e-05, "loss": 0.5431301593780518, "step": 405, "token_acc": 0.8220487279580024 }, { "epoch": 0.021906868828576054, "grad_norm": 1.1181508302688599, "learning_rate": 1.4604316546762592e-05, "loss": 0.6029300093650818, "step": 406, "token_acc": 0.8005295675198588 }, { "epoch": 0.021960826633572548, "grad_norm": 1.335803747177124, "learning_rate": 1.4640287769784173e-05, "loss": 0.5257953405380249, "step": 407, "token_acc": 0.829985057280425 }, { "epoch": 0.022014784438569038, "grad_norm": 1.0097477436065674, "learning_rate": 1.4676258992805756e-05, "loss": 0.5567148923873901, "step": 408, "token_acc": 0.8179535467671061 }, { "epoch": 0.02206874224356553, "grad_norm": 0.9829238057136536, "learning_rate": 1.4712230215827339e-05, "loss": 0.5738274455070496, "step": 409, "token_acc": 0.814233714070541 }, { "epoch": 0.022122700048562025, "grad_norm": 1.1677223443984985, "learning_rate": 1.4748201438848922e-05, "loss": 0.5454132556915283, "step": 410, "token_acc": 0.8216392424712822 }, { "epoch": 0.022176657853558518, "grad_norm": 1.0173710584640503, "learning_rate": 1.4784172661870505e-05, "loss": 0.5490942001342773, "step": 411, "token_acc": 0.8253818296331287 }, { "epoch": 0.02223061565855501, "grad_norm": 1.3339848518371582, "learning_rate": 1.4820143884892086e-05, "loss": 0.5787262320518494, "step": 412, "token_acc": 0.8129763896635063 }, { "epoch": 0.0222845734635515, "grad_norm": 1.3250160217285156, "learning_rate": 1.485611510791367e-05, "loss": 0.5333911180496216, "step": 413, "token_acc": 0.8247586459733594 }, { "epoch": 0.022338531268547995, "grad_norm": 1.0416465997695923, "learning_rate": 1.4892086330935253e-05, "loss": 0.5628114342689514, "step": 414, "token_acc": 0.8215657311669129 }, { "epoch": 0.022392489073544488, "grad_norm": 0.9169410467147827, "learning_rate": 1.4928057553956836e-05, "loss": 0.5347352027893066, "step": 415, "token_acc": 0.8244766505636071 }, { "epoch": 0.02244644687854098, "grad_norm": 1.151006817817688, "learning_rate": 1.496402877697842e-05, "loss": 0.4866713583469391, "step": 416, "token_acc": 0.8375135005400216 }, { "epoch": 0.022500404683537475, "grad_norm": 1.1468870639801025, "learning_rate": 1.5000000000000002e-05, "loss": 0.5644465684890747, "step": 417, "token_acc": 0.8183568677792041 }, { "epoch": 0.022554362488533968, "grad_norm": 1.0312232971191406, "learning_rate": 1.5035971223021583e-05, "loss": 0.5269845724105835, "step": 418, "token_acc": 0.8249329380206127 }, { "epoch": 0.022608320293530458, "grad_norm": 0.9958366751670837, "learning_rate": 1.5071942446043166e-05, "loss": 0.5787090063095093, "step": 419, "token_acc": 0.8092964024196115 }, { "epoch": 0.02266227809852695, "grad_norm": 1.0867117643356323, "learning_rate": 1.5107913669064749e-05, "loss": 0.5493800044059753, "step": 420, "token_acc": 0.8219159008632693 }, { "epoch": 0.022716235903523445, "grad_norm": 1.1639856100082397, "learning_rate": 1.5143884892086332e-05, "loss": 0.5216587781906128, "step": 421, "token_acc": 0.825336838646073 }, { "epoch": 0.022770193708519938, "grad_norm": 1.0899628400802612, "learning_rate": 1.5179856115107917e-05, "loss": 0.58899986743927, "step": 422, "token_acc": 0.8058676654182272 }, { "epoch": 0.02282415151351643, "grad_norm": 0.9755807518959045, "learning_rate": 1.5215827338129498e-05, "loss": 0.524013876914978, "step": 423, "token_acc": 0.821842250413679 }, { "epoch": 0.02287810931851292, "grad_norm": 0.935080349445343, "learning_rate": 1.525179856115108e-05, "loss": 0.5589017868041992, "step": 424, "token_acc": 0.8157364497279871 }, { "epoch": 0.022932067123509415, "grad_norm": 1.2871754169464111, "learning_rate": 1.5287769784172665e-05, "loss": 0.5428625345230103, "step": 425, "token_acc": 0.8219470538001707 }, { "epoch": 0.02298602492850591, "grad_norm": 0.9929945468902588, "learning_rate": 1.5323741007194246e-05, "loss": 0.5146746635437012, "step": 426, "token_acc": 0.827689818560037 }, { "epoch": 0.0230399827335024, "grad_norm": 1.0367319583892822, "learning_rate": 1.5359712230215828e-05, "loss": 0.5520021915435791, "step": 427, "token_acc": 0.8228973202440966 }, { "epoch": 0.023093940538498895, "grad_norm": 1.083417296409607, "learning_rate": 1.5395683453237412e-05, "loss": 0.5077105164527893, "step": 428, "token_acc": 0.8299028486744607 }, { "epoch": 0.023147898343495385, "grad_norm": 1.2747856378555298, "learning_rate": 1.5431654676258993e-05, "loss": 0.5435516834259033, "step": 429, "token_acc": 0.8246300599241776 }, { "epoch": 0.02320185614849188, "grad_norm": 1.0640991926193237, "learning_rate": 1.5467625899280575e-05, "loss": 0.5193601846694946, "step": 430, "token_acc": 0.8330290860411342 }, { "epoch": 0.023255813953488372, "grad_norm": 0.8422157764434814, "learning_rate": 1.550359712230216e-05, "loss": 0.5316506624221802, "step": 431, "token_acc": 0.8239964711071901 }, { "epoch": 0.023309771758484865, "grad_norm": 1.1287683248519897, "learning_rate": 1.5539568345323744e-05, "loss": 0.5034607648849487, "step": 432, "token_acc": 0.8323105656350054 }, { "epoch": 0.02336372956348136, "grad_norm": 1.0308176279067993, "learning_rate": 1.5575539568345325e-05, "loss": 0.5430840849876404, "step": 433, "token_acc": 0.8177003246198531 }, { "epoch": 0.023417687368477852, "grad_norm": 1.1355915069580078, "learning_rate": 1.561151079136691e-05, "loss": 0.5133794546127319, "step": 434, "token_acc": 0.8304067236980339 }, { "epoch": 0.023471645173474342, "grad_norm": 0.8725458979606628, "learning_rate": 1.564748201438849e-05, "loss": 0.5693319439888, "step": 435, "token_acc": 0.8154285714285714 }, { "epoch": 0.023525602978470835, "grad_norm": 1.0032703876495361, "learning_rate": 1.5683453237410072e-05, "loss": 0.5207476615905762, "step": 436, "token_acc": 0.8256074168797954 }, { "epoch": 0.02357956078346733, "grad_norm": 1.1123868227005005, "learning_rate": 1.5719424460431656e-05, "loss": 0.5080671310424805, "step": 437, "token_acc": 0.826850310276979 }, { "epoch": 0.023633518588463822, "grad_norm": 1.0119410753250122, "learning_rate": 1.5755395683453238e-05, "loss": 0.6030460596084595, "step": 438, "token_acc": 0.8037392622536634 }, { "epoch": 0.023687476393460315, "grad_norm": 0.9158233404159546, "learning_rate": 1.5791366906474822e-05, "loss": 0.5196210741996765, "step": 439, "token_acc": 0.8298459750072653 }, { "epoch": 0.023741434198456805, "grad_norm": 1.2603648900985718, "learning_rate": 1.5827338129496403e-05, "loss": 0.6079951524734497, "step": 440, "token_acc": 0.8077607113985449 }, { "epoch": 0.0237953920034533, "grad_norm": 0.9551938772201538, "learning_rate": 1.5863309352517985e-05, "loss": 0.573640763759613, "step": 441, "token_acc": 0.8114498319731157 }, { "epoch": 0.023849349808449792, "grad_norm": 1.1116480827331543, "learning_rate": 1.589928057553957e-05, "loss": 0.5732841491699219, "step": 442, "token_acc": 0.8160751904067413 }, { "epoch": 0.023903307613446285, "grad_norm": 1.1947910785675049, "learning_rate": 1.5935251798561154e-05, "loss": 0.6062458157539368, "step": 443, "token_acc": 0.8005118534482759 }, { "epoch": 0.02395726541844278, "grad_norm": 0.9305034279823303, "learning_rate": 1.5971223021582735e-05, "loss": 0.4770374894142151, "step": 444, "token_acc": 0.8412620765561942 }, { "epoch": 0.02401122322343927, "grad_norm": 1.1692206859588623, "learning_rate": 1.600719424460432e-05, "loss": 0.5780553221702576, "step": 445, "token_acc": 0.813130477922937 }, { "epoch": 0.024065181028435762, "grad_norm": 0.9158545136451721, "learning_rate": 1.60431654676259e-05, "loss": 0.5097954869270325, "step": 446, "token_acc": 0.8309244203553147 }, { "epoch": 0.024119138833432256, "grad_norm": 1.2318555116653442, "learning_rate": 1.6079136690647482e-05, "loss": 0.5003948211669922, "step": 447, "token_acc": 0.8365578635014836 }, { "epoch": 0.02417309663842875, "grad_norm": 1.1066275835037231, "learning_rate": 1.6115107913669067e-05, "loss": 0.5424032211303711, "step": 448, "token_acc": 0.8219443203811225 }, { "epoch": 0.024227054443425242, "grad_norm": 1.0394712686538696, "learning_rate": 1.6151079136690648e-05, "loss": 0.5927717089653015, "step": 449, "token_acc": 0.811421591082608 }, { "epoch": 0.024281012248421736, "grad_norm": 1.009264349937439, "learning_rate": 1.6187050359712232e-05, "loss": 0.49486657977104187, "step": 450, "token_acc": 0.8377891258636228 }, { "epoch": 0.024334970053418226, "grad_norm": 0.8768704533576965, "learning_rate": 1.6223021582733814e-05, "loss": 0.46161097288131714, "step": 451, "token_acc": 0.8493248894730553 }, { "epoch": 0.02438892785841472, "grad_norm": 1.0338271856307983, "learning_rate": 1.6258992805755398e-05, "loss": 0.5448617339134216, "step": 452, "token_acc": 0.8277426673148598 }, { "epoch": 0.024442885663411212, "grad_norm": 0.9124260544776917, "learning_rate": 1.629496402877698e-05, "loss": 0.5436785221099854, "step": 453, "token_acc": 0.8247676325861126 }, { "epoch": 0.024496843468407706, "grad_norm": 1.0028990507125854, "learning_rate": 1.6330935251798564e-05, "loss": 0.5370782613754272, "step": 454, "token_acc": 0.8287524035742563 }, { "epoch": 0.0245508012734042, "grad_norm": 1.0386286973953247, "learning_rate": 1.6366906474820145e-05, "loss": 0.5590056777000427, "step": 455, "token_acc": 0.8200223574711216 }, { "epoch": 0.02460475907840069, "grad_norm": 1.0653069019317627, "learning_rate": 1.640287769784173e-05, "loss": 0.5108538269996643, "step": 456, "token_acc": 0.8379736520440038 }, { "epoch": 0.024658716883397182, "grad_norm": 1.0202785730361938, "learning_rate": 1.643884892086331e-05, "loss": 0.5241342782974243, "step": 457, "token_acc": 0.8279362631714212 }, { "epoch": 0.024712674688393676, "grad_norm": 0.8106256723403931, "learning_rate": 1.6474820143884892e-05, "loss": 0.5194336175918579, "step": 458, "token_acc": 0.8259863770723558 }, { "epoch": 0.02476663249339017, "grad_norm": 0.9299056529998779, "learning_rate": 1.6510791366906477e-05, "loss": 0.48505085706710815, "step": 459, "token_acc": 0.8388363636363636 }, { "epoch": 0.024820590298386663, "grad_norm": 0.9912726879119873, "learning_rate": 1.6546762589928058e-05, "loss": 0.49281808733940125, "step": 460, "token_acc": 0.8331305405875196 }, { "epoch": 0.024874548103383156, "grad_norm": 0.9611283540725708, "learning_rate": 1.658273381294964e-05, "loss": 0.5076809525489807, "step": 461, "token_acc": 0.8330827067669173 }, { "epoch": 0.024928505908379646, "grad_norm": 0.9150076508522034, "learning_rate": 1.6618705035971227e-05, "loss": 0.5788812637329102, "step": 462, "token_acc": 0.8147630815515203 }, { "epoch": 0.02498246371337614, "grad_norm": 1.0253294706344604, "learning_rate": 1.6654676258992808e-05, "loss": 0.5554934740066528, "step": 463, "token_acc": 0.8189076392669207 }, { "epoch": 0.025036421518372633, "grad_norm": 0.7566479444503784, "learning_rate": 1.669064748201439e-05, "loss": 0.5100162625312805, "step": 464, "token_acc": 0.8394347240915209 }, { "epoch": 0.025090379323369126, "grad_norm": 1.0121874809265137, "learning_rate": 1.6726618705035974e-05, "loss": 0.5511595606803894, "step": 465, "token_acc": 0.8237136465324385 }, { "epoch": 0.02514433712836562, "grad_norm": 0.7896140217781067, "learning_rate": 1.6762589928057555e-05, "loss": 0.48851603269577026, "step": 466, "token_acc": 0.8380819129992368 }, { "epoch": 0.02519829493336211, "grad_norm": 0.9557551741600037, "learning_rate": 1.6798561151079136e-05, "loss": 0.5894536972045898, "step": 467, "token_acc": 0.8089517854204377 }, { "epoch": 0.025252252738358603, "grad_norm": 1.026414155960083, "learning_rate": 1.683453237410072e-05, "loss": 0.6021705269813538, "step": 468, "token_acc": 0.8058889198995255 }, { "epoch": 0.025306210543355096, "grad_norm": 0.8699323534965515, "learning_rate": 1.6870503597122302e-05, "loss": 0.6031398773193359, "step": 469, "token_acc": 0.8113929431769874 }, { "epoch": 0.02536016834835159, "grad_norm": 0.866012454032898, "learning_rate": 1.6906474820143887e-05, "loss": 0.4998677372932434, "step": 470, "token_acc": 0.8351097983819186 }, { "epoch": 0.025414126153348083, "grad_norm": 0.9242586493492126, "learning_rate": 1.6942446043165468e-05, "loss": 0.4896290898323059, "step": 471, "token_acc": 0.8390250619664005 }, { "epoch": 0.025468083958344573, "grad_norm": 1.0010592937469482, "learning_rate": 1.6978417266187053e-05, "loss": 0.5032800436019897, "step": 472, "token_acc": 0.8304597701149425 }, { "epoch": 0.025522041763341066, "grad_norm": 1.1059566736221313, "learning_rate": 1.7014388489208634e-05, "loss": 0.5629953742027283, "step": 473, "token_acc": 0.8158335660430047 }, { "epoch": 0.02557599956833756, "grad_norm": 0.9736800193786621, "learning_rate": 1.7050359712230218e-05, "loss": 0.5023436546325684, "step": 474, "token_acc": 0.8369304556354916 }, { "epoch": 0.025629957373334053, "grad_norm": 1.3071646690368652, "learning_rate": 1.70863309352518e-05, "loss": 0.56074059009552, "step": 475, "token_acc": 0.8213032198992828 }, { "epoch": 0.025683915178330546, "grad_norm": 1.0498976707458496, "learning_rate": 1.7122302158273384e-05, "loss": 0.594517171382904, "step": 476, "token_acc": 0.8137267004073571 }, { "epoch": 0.02573787298332704, "grad_norm": 0.9622722864151001, "learning_rate": 1.7158273381294965e-05, "loss": 0.5416862964630127, "step": 477, "token_acc": 0.8281757565395794 }, { "epoch": 0.02579183078832353, "grad_norm": 0.9691368341445923, "learning_rate": 1.7194244604316546e-05, "loss": 0.5132874846458435, "step": 478, "token_acc": 0.8273533136344449 }, { "epoch": 0.025845788593320023, "grad_norm": 0.8192963004112244, "learning_rate": 1.723021582733813e-05, "loss": 0.560675859451294, "step": 479, "token_acc": 0.8186678547560704 }, { "epoch": 0.025899746398316516, "grad_norm": 0.9438736438751221, "learning_rate": 1.7266187050359712e-05, "loss": 0.5090795755386353, "step": 480, "token_acc": 0.8338635049161365 }, { "epoch": 0.02595370420331301, "grad_norm": 0.8726692199707031, "learning_rate": 1.7302158273381297e-05, "loss": 0.5122113227844238, "step": 481, "token_acc": 0.8272747798600135 }, { "epoch": 0.026007662008309503, "grad_norm": 1.0681567192077637, "learning_rate": 1.733812949640288e-05, "loss": 0.5661367177963257, "step": 482, "token_acc": 0.812137797810689 }, { "epoch": 0.026061619813305993, "grad_norm": 0.9490201473236084, "learning_rate": 1.7374100719424463e-05, "loss": 0.5648879408836365, "step": 483, "token_acc": 0.8146908859145953 }, { "epoch": 0.026115577618302487, "grad_norm": 1.0770515203475952, "learning_rate": 1.7410071942446044e-05, "loss": 0.5630017518997192, "step": 484, "token_acc": 0.8174939467312349 }, { "epoch": 0.02616953542329898, "grad_norm": 1.0220959186553955, "learning_rate": 1.744604316546763e-05, "loss": 0.5208859443664551, "step": 485, "token_acc": 0.8265374331550802 }, { "epoch": 0.026223493228295473, "grad_norm": 0.7670276165008545, "learning_rate": 1.748201438848921e-05, "loss": 0.5169594287872314, "step": 486, "token_acc": 0.8277700215763422 }, { "epoch": 0.026277451033291967, "grad_norm": 1.3247113227844238, "learning_rate": 1.7517985611510794e-05, "loss": 0.5032602548599243, "step": 487, "token_acc": 0.8359541011474713 }, { "epoch": 0.02633140883828846, "grad_norm": 0.8299486637115479, "learning_rate": 1.7553956834532375e-05, "loss": 0.518614649772644, "step": 488, "token_acc": 0.8290668868703551 }, { "epoch": 0.02638536664328495, "grad_norm": 0.9653997421264648, "learning_rate": 1.7589928057553957e-05, "loss": 0.5447885394096375, "step": 489, "token_acc": 0.8216997663551402 }, { "epoch": 0.026439324448281443, "grad_norm": 0.8616854548454285, "learning_rate": 1.762589928057554e-05, "loss": 0.5569326281547546, "step": 490, "token_acc": 0.8136704119850188 }, { "epoch": 0.026493282253277937, "grad_norm": 0.8511533141136169, "learning_rate": 1.7661870503597122e-05, "loss": 0.5258849859237671, "step": 491, "token_acc": 0.8253256150506513 }, { "epoch": 0.02654724005827443, "grad_norm": 1.0204291343688965, "learning_rate": 1.7697841726618707e-05, "loss": 0.4521607756614685, "step": 492, "token_acc": 0.8522671867381765 }, { "epoch": 0.026601197863270924, "grad_norm": 1.1322696208953857, "learning_rate": 1.773381294964029e-05, "loss": 0.5573593378067017, "step": 493, "token_acc": 0.8216855087358684 }, { "epoch": 0.026655155668267413, "grad_norm": 0.8693358302116394, "learning_rate": 1.7769784172661873e-05, "loss": 0.5003102421760559, "step": 494, "token_acc": 0.8371008759536592 }, { "epoch": 0.026709113473263907, "grad_norm": 0.9683269262313843, "learning_rate": 1.7805755395683454e-05, "loss": 0.4873804450035095, "step": 495, "token_acc": 0.8399144455412965 }, { "epoch": 0.0267630712782604, "grad_norm": 0.685890257358551, "learning_rate": 1.784172661870504e-05, "loss": 0.5214245915412903, "step": 496, "token_acc": 0.8293951281817352 }, { "epoch": 0.026817029083256894, "grad_norm": 0.9120921492576599, "learning_rate": 1.787769784172662e-05, "loss": 0.581834077835083, "step": 497, "token_acc": 0.8163544373034323 }, { "epoch": 0.026870986888253387, "grad_norm": 0.9662275314331055, "learning_rate": 1.7913669064748204e-05, "loss": 0.5624752044677734, "step": 498, "token_acc": 0.8200122025625382 }, { "epoch": 0.026924944693249877, "grad_norm": 0.8612667918205261, "learning_rate": 1.7949640287769785e-05, "loss": 0.5236034393310547, "step": 499, "token_acc": 0.8327745180217938 }, { "epoch": 0.02697890249824637, "grad_norm": 1.0126112699508667, "learning_rate": 1.7985611510791367e-05, "loss": 0.5707177519798279, "step": 500, "token_acc": 0.8156710628394104 }, { "epoch": 0.027032860303242864, "grad_norm": 1.094970941543579, "learning_rate": 1.802158273381295e-05, "loss": 0.5398538112640381, "step": 501, "token_acc": 0.8218451749734889 }, { "epoch": 0.027086818108239357, "grad_norm": 0.8116239309310913, "learning_rate": 1.8057553956834536e-05, "loss": 0.5137415528297424, "step": 502, "token_acc": 0.8341268042016172 }, { "epoch": 0.02714077591323585, "grad_norm": 1.132163166999817, "learning_rate": 1.8093525179856117e-05, "loss": 0.5456819534301758, "step": 503, "token_acc": 0.8209568011424492 }, { "epoch": 0.027194733718232344, "grad_norm": 0.908989429473877, "learning_rate": 1.8129496402877698e-05, "loss": 0.565190851688385, "step": 504, "token_acc": 0.8151688182720953 }, { "epoch": 0.027248691523228834, "grad_norm": 0.8260560631752014, "learning_rate": 1.8165467625899283e-05, "loss": 0.5115716457366943, "step": 505, "token_acc": 0.8283059520603285 }, { "epoch": 0.027302649328225327, "grad_norm": 0.9077727198600769, "learning_rate": 1.8201438848920864e-05, "loss": 0.5399124026298523, "step": 506, "token_acc": 0.8180264844083724 }, { "epoch": 0.02735660713322182, "grad_norm": 1.1503757238388062, "learning_rate": 1.823741007194245e-05, "loss": 0.5259125828742981, "step": 507, "token_acc": 0.8259193833306568 }, { "epoch": 0.027410564938218314, "grad_norm": 0.91961270570755, "learning_rate": 1.827338129496403e-05, "loss": 0.5029640197753906, "step": 508, "token_acc": 0.8365543898398675 }, { "epoch": 0.027464522743214807, "grad_norm": 0.7931209206581116, "learning_rate": 1.830935251798561e-05, "loss": 0.5456376075744629, "step": 509, "token_acc": 0.8259142423180278 }, { "epoch": 0.027518480548211297, "grad_norm": 1.0326100587844849, "learning_rate": 1.8345323741007196e-05, "loss": 0.5517668724060059, "step": 510, "token_acc": 0.8206956774850822 }, { "epoch": 0.02757243835320779, "grad_norm": 1.0711385011672974, "learning_rate": 1.8381294964028777e-05, "loss": 0.6041605472564697, "step": 511, "token_acc": 0.80801654207094 }, { "epoch": 0.027626396158204284, "grad_norm": 0.9799340963363647, "learning_rate": 1.841726618705036e-05, "loss": 0.5924972891807556, "step": 512, "token_acc": 0.8078238271098345 }, { "epoch": 0.027680353963200777, "grad_norm": 1.1573963165283203, "learning_rate": 1.8453237410071946e-05, "loss": 0.5408563613891602, "step": 513, "token_acc": 0.8164291701592624 }, { "epoch": 0.02773431176819727, "grad_norm": 0.8673797249794006, "learning_rate": 1.8489208633093527e-05, "loss": 0.5445853471755981, "step": 514, "token_acc": 0.8228687415426251 }, { "epoch": 0.027788269573193764, "grad_norm": 1.0117613077163696, "learning_rate": 1.8525179856115108e-05, "loss": 0.5908182859420776, "step": 515, "token_acc": 0.8088346337780015 }, { "epoch": 0.027842227378190254, "grad_norm": 0.8000991940498352, "learning_rate": 1.8561151079136693e-05, "loss": 0.5541455745697021, "step": 516, "token_acc": 0.8205456641475599 }, { "epoch": 0.027896185183186747, "grad_norm": 1.173998236656189, "learning_rate": 1.8597122302158274e-05, "loss": 0.5537809133529663, "step": 517, "token_acc": 0.8110288065843622 }, { "epoch": 0.02795014298818324, "grad_norm": 0.8459213972091675, "learning_rate": 1.863309352517986e-05, "loss": 0.5822895765304565, "step": 518, "token_acc": 0.8141308233051352 }, { "epoch": 0.028004100793179734, "grad_norm": 0.7506870627403259, "learning_rate": 1.866906474820144e-05, "loss": 0.5124966502189636, "step": 519, "token_acc": 0.8312230603448276 }, { "epoch": 0.028058058598176228, "grad_norm": 0.9153597354888916, "learning_rate": 1.870503597122302e-05, "loss": 0.5689592361450195, "step": 520, "token_acc": 0.8202804135391588 }, { "epoch": 0.028112016403172718, "grad_norm": 0.7530882358551025, "learning_rate": 1.8741007194244606e-05, "loss": 0.5498135685920715, "step": 521, "token_acc": 0.8164848166162745 }, { "epoch": 0.02816597420816921, "grad_norm": 1.2495821714401245, "learning_rate": 1.877697841726619e-05, "loss": 0.5826138257980347, "step": 522, "token_acc": 0.804930332261522 }, { "epoch": 0.028219932013165704, "grad_norm": 0.8961580395698547, "learning_rate": 1.881294964028777e-05, "loss": 0.564133882522583, "step": 523, "token_acc": 0.815380456818489 }, { "epoch": 0.028273889818162198, "grad_norm": 1.0762847661972046, "learning_rate": 1.8848920863309356e-05, "loss": 0.5277817845344543, "step": 524, "token_acc": 0.8285990338164251 }, { "epoch": 0.02832784762315869, "grad_norm": 1.0182243585586548, "learning_rate": 1.8884892086330937e-05, "loss": 0.529015302658081, "step": 525, "token_acc": 0.8264065592309867 }, { "epoch": 0.02838180542815518, "grad_norm": 0.895703136920929, "learning_rate": 1.892086330935252e-05, "loss": 0.5761286020278931, "step": 526, "token_acc": 0.8109946160385378 }, { "epoch": 0.028435763233151674, "grad_norm": 0.8803246021270752, "learning_rate": 1.8956834532374103e-05, "loss": 0.5561714172363281, "step": 527, "token_acc": 0.8233890214797136 }, { "epoch": 0.028489721038148168, "grad_norm": 0.768547773361206, "learning_rate": 1.8992805755395684e-05, "loss": 0.5427009463310242, "step": 528, "token_acc": 0.8200500592807272 }, { "epoch": 0.02854367884314466, "grad_norm": 1.0069619417190552, "learning_rate": 1.902877697841727e-05, "loss": 0.5713762640953064, "step": 529, "token_acc": 0.8146559428060769 }, { "epoch": 0.028597636648141155, "grad_norm": 0.9637838006019592, "learning_rate": 1.906474820143885e-05, "loss": 0.5672692060470581, "step": 530, "token_acc": 0.8154891304347827 }, { "epoch": 0.028651594453137648, "grad_norm": 0.8838149309158325, "learning_rate": 1.910071942446043e-05, "loss": 0.520588219165802, "step": 531, "token_acc": 0.8247422680412371 }, { "epoch": 0.028705552258134138, "grad_norm": 1.0735280513763428, "learning_rate": 1.9136690647482016e-05, "loss": 0.49569541215896606, "step": 532, "token_acc": 0.8382439024390244 }, { "epoch": 0.02875951006313063, "grad_norm": 0.8264849781990051, "learning_rate": 1.91726618705036e-05, "loss": 0.4889860451221466, "step": 533, "token_acc": 0.8352765321375187 }, { "epoch": 0.028813467868127125, "grad_norm": 0.8393588662147522, "learning_rate": 1.920863309352518e-05, "loss": 0.5472247004508972, "step": 534, "token_acc": 0.8181416580768665 }, { "epoch": 0.028867425673123618, "grad_norm": 0.9237326383590698, "learning_rate": 1.9244604316546766e-05, "loss": 0.5282249450683594, "step": 535, "token_acc": 0.8248051507963402 }, { "epoch": 0.02892138347812011, "grad_norm": 0.7988091111183167, "learning_rate": 1.9280575539568347e-05, "loss": 0.5440127849578857, "step": 536, "token_acc": 0.8253182461103253 }, { "epoch": 0.0289753412831166, "grad_norm": 1.1486634016036987, "learning_rate": 1.931654676258993e-05, "loss": 0.609199047088623, "step": 537, "token_acc": 0.8001909438079651 }, { "epoch": 0.029029299088113095, "grad_norm": 1.0683753490447998, "learning_rate": 1.9352517985611513e-05, "loss": 0.539993166923523, "step": 538, "token_acc": 0.8252516010978957 }, { "epoch": 0.029083256893109588, "grad_norm": 0.9826582670211792, "learning_rate": 1.9388489208633094e-05, "loss": 0.543923020362854, "step": 539, "token_acc": 0.8218940052128584 }, { "epoch": 0.02913721469810608, "grad_norm": 0.9183666110038757, "learning_rate": 1.9424460431654675e-05, "loss": 0.583259642124176, "step": 540, "token_acc": 0.8081360048573163 }, { "epoch": 0.029191172503102575, "grad_norm": 0.9907522797584534, "learning_rate": 1.946043165467626e-05, "loss": 0.5700074434280396, "step": 541, "token_acc": 0.8124895642010352 }, { "epoch": 0.029245130308099068, "grad_norm": 0.8118934631347656, "learning_rate": 1.9496402877697845e-05, "loss": 0.5443419814109802, "step": 542, "token_acc": 0.826993456803815 }, { "epoch": 0.029299088113095558, "grad_norm": 0.7955740094184875, "learning_rate": 1.9532374100719426e-05, "loss": 0.5856417417526245, "step": 543, "token_acc": 0.8157065935182363 }, { "epoch": 0.02935304591809205, "grad_norm": 1.0373057126998901, "learning_rate": 1.956834532374101e-05, "loss": 0.5441112518310547, "step": 544, "token_acc": 0.8205936920222634 }, { "epoch": 0.029407003723088545, "grad_norm": 1.1088558435440063, "learning_rate": 1.960431654676259e-05, "loss": 0.590958833694458, "step": 545, "token_acc": 0.8103952022143626 }, { "epoch": 0.02946096152808504, "grad_norm": 0.9900914430618286, "learning_rate": 1.9640287769784173e-05, "loss": 0.5414022207260132, "step": 546, "token_acc": 0.8257997218358831 }, { "epoch": 0.02951491933308153, "grad_norm": 0.8688497543334961, "learning_rate": 1.9676258992805757e-05, "loss": 0.5331407189369202, "step": 547, "token_acc": 0.8268210323203087 }, { "epoch": 0.02956887713807802, "grad_norm": 0.7716765999794006, "learning_rate": 1.971223021582734e-05, "loss": 0.5427395701408386, "step": 548, "token_acc": 0.825506867233486 }, { "epoch": 0.029622834943074515, "grad_norm": 1.0307387113571167, "learning_rate": 1.9748201438848923e-05, "loss": 0.515133798122406, "step": 549, "token_acc": 0.8219490781387182 }, { "epoch": 0.02967679274807101, "grad_norm": 0.8740566968917847, "learning_rate": 1.9784172661870504e-05, "loss": 0.48254671692848206, "step": 550, "token_acc": 0.8426594832863965 }, { "epoch": 0.029730750553067502, "grad_norm": 0.9598432779312134, "learning_rate": 1.9820143884892085e-05, "loss": 0.5808261632919312, "step": 551, "token_acc": 0.8116106283843345 }, { "epoch": 0.029784708358063995, "grad_norm": 0.9150490760803223, "learning_rate": 1.985611510791367e-05, "loss": 0.580181360244751, "step": 552, "token_acc": 0.8137858537317916 }, { "epoch": 0.029838666163060485, "grad_norm": 0.9727497696876526, "learning_rate": 1.9892086330935255e-05, "loss": 0.5575789213180542, "step": 553, "token_acc": 0.8230873000578146 }, { "epoch": 0.02989262396805698, "grad_norm": 0.8944370746612549, "learning_rate": 1.9928057553956836e-05, "loss": 0.535334587097168, "step": 554, "token_acc": 0.8250465682762573 }, { "epoch": 0.029946581773053472, "grad_norm": 0.8133926391601562, "learning_rate": 1.996402877697842e-05, "loss": 0.5418198704719543, "step": 555, "token_acc": 0.8235893949694085 }, { "epoch": 0.030000539578049965, "grad_norm": 0.7992306351661682, "learning_rate": 2e-05, "loss": 0.508002758026123, "step": 556, "token_acc": 0.8301756326925737 }, { "epoch": 0.03005449738304646, "grad_norm": 0.9066998958587646, "learning_rate": 1.9999999847301313e-05, "loss": 0.5523704290390015, "step": 557, "token_acc": 0.821003963011889 }, { "epoch": 0.030108455188042952, "grad_norm": 0.7771724462509155, "learning_rate": 1.9999999389205245e-05, "loss": 0.4621874690055847, "step": 558, "token_acc": 0.8495438379464976 }, { "epoch": 0.030162412993039442, "grad_norm": 0.7989349365234375, "learning_rate": 1.999999862571182e-05, "loss": 0.45630452036857605, "step": 559, "token_acc": 0.8483635843933114 }, { "epoch": 0.030216370798035935, "grad_norm": 0.8080393671989441, "learning_rate": 1.9999997556821055e-05, "loss": 0.5847355127334595, "step": 560, "token_acc": 0.8038760973993705 }, { "epoch": 0.03027032860303243, "grad_norm": 0.8546733260154724, "learning_rate": 1.999999618253298e-05, "loss": 0.5040184259414673, "step": 561, "token_acc": 0.827756199581715 }, { "epoch": 0.030324286408028922, "grad_norm": 0.8538128733634949, "learning_rate": 1.9999994502847647e-05, "loss": 0.5919207334518433, "step": 562, "token_acc": 0.8109567015045366 }, { "epoch": 0.030378244213025415, "grad_norm": 0.8711193799972534, "learning_rate": 1.9999992517765098e-05, "loss": 0.556483268737793, "step": 563, "token_acc": 0.8202333773667988 }, { "epoch": 0.030432202018021905, "grad_norm": 0.7805373668670654, "learning_rate": 1.99999902272854e-05, "loss": 0.487624853849411, "step": 564, "token_acc": 0.8371838687628161 }, { "epoch": 0.0304861598230184, "grad_norm": 1.017922043800354, "learning_rate": 1.9999987631408614e-05, "loss": 0.5785384774208069, "step": 565, "token_acc": 0.8181818181818182 }, { "epoch": 0.030540117628014892, "grad_norm": 0.9240229725837708, "learning_rate": 1.9999984730134833e-05, "loss": 0.5549162030220032, "step": 566, "token_acc": 0.8186314921681781 }, { "epoch": 0.030594075433011386, "grad_norm": 0.8939217925071716, "learning_rate": 1.999998152346413e-05, "loss": 0.5170983672142029, "step": 567, "token_acc": 0.8316955511881365 }, { "epoch": 0.03064803323800788, "grad_norm": 0.6985619068145752, "learning_rate": 1.999997801139662e-05, "loss": 0.5049999356269836, "step": 568, "token_acc": 0.8341375150784077 }, { "epoch": 0.030701991043004372, "grad_norm": 0.9485167264938354, "learning_rate": 1.9999974193932395e-05, "loss": 0.532137930393219, "step": 569, "token_acc": 0.8279258400926999 }, { "epoch": 0.030755948848000862, "grad_norm": 0.8433043956756592, "learning_rate": 1.999997007107158e-05, "loss": 0.500679612159729, "step": 570, "token_acc": 0.8329577091342967 }, { "epoch": 0.030809906652997356, "grad_norm": 0.8650495409965515, "learning_rate": 1.9999965642814296e-05, "loss": 0.5573973059654236, "step": 571, "token_acc": 0.8217465753424658 }, { "epoch": 0.03086386445799385, "grad_norm": 1.1284195184707642, "learning_rate": 1.9999960909160685e-05, "loss": 0.5361233353614807, "step": 572, "token_acc": 0.8260287081339713 }, { "epoch": 0.030917822262990342, "grad_norm": 0.9422510862350464, "learning_rate": 1.9999955870110887e-05, "loss": 0.4979308843612671, "step": 573, "token_acc": 0.8344042592252616 }, { "epoch": 0.030971780067986836, "grad_norm": 0.9514453411102295, "learning_rate": 1.9999950525665054e-05, "loss": 0.5470126271247864, "step": 574, "token_acc": 0.82233927930085 }, { "epoch": 0.031025737872983326, "grad_norm": 0.9937689900398254, "learning_rate": 1.999994487582335e-05, "loss": 0.5746195316314697, "step": 575, "token_acc": 0.8128289005831318 }, { "epoch": 0.03107969567797982, "grad_norm": 0.9682989120483398, "learning_rate": 1.9999938920585953e-05, "loss": 0.6021358370780945, "step": 576, "token_acc": 0.8049391553328561 }, { "epoch": 0.031133653482976312, "grad_norm": 0.8621955513954163, "learning_rate": 1.999993265995304e-05, "loss": 0.5273804664611816, "step": 577, "token_acc": 0.8238597452403781 }, { "epoch": 0.031187611287972806, "grad_norm": 0.7836014032363892, "learning_rate": 1.9999926093924803e-05, "loss": 0.5097523927688599, "step": 578, "token_acc": 0.8312557008209183 }, { "epoch": 0.0312415690929693, "grad_norm": 1.1662296056747437, "learning_rate": 1.999991922250144e-05, "loss": 0.5814071893692017, "step": 579, "token_acc": 0.8069692801467216 }, { "epoch": 0.03129552689796579, "grad_norm": 0.9388656616210938, "learning_rate": 1.9999912045683168e-05, "loss": 0.5282617807388306, "step": 580, "token_acc": 0.826461620045075 }, { "epoch": 0.03134948470296228, "grad_norm": 0.8446305394172668, "learning_rate": 1.9999904563470202e-05, "loss": 0.5614391565322876, "step": 581, "token_acc": 0.8143810229799852 }, { "epoch": 0.031403442507958776, "grad_norm": 0.8175392150878906, "learning_rate": 1.9999896775862766e-05, "loss": 0.506419837474823, "step": 582, "token_acc": 0.8328339575530587 }, { "epoch": 0.03145740031295527, "grad_norm": 0.7601373791694641, "learning_rate": 1.9999888682861105e-05, "loss": 0.5448474884033203, "step": 583, "token_acc": 0.8208759124087591 }, { "epoch": 0.03151135811795176, "grad_norm": 0.7040234208106995, "learning_rate": 1.999988028446546e-05, "loss": 0.5545668601989746, "step": 584, "token_acc": 0.8229673384294649 }, { "epoch": 0.031565315922948256, "grad_norm": 0.8584920167922974, "learning_rate": 1.9999871580676094e-05, "loss": 0.47397086024284363, "step": 585, "token_acc": 0.8380199860562398 }, { "epoch": 0.03161927372794475, "grad_norm": 0.8631424903869629, "learning_rate": 1.999986257149327e-05, "loss": 0.4936078190803528, "step": 586, "token_acc": 0.8362408553742262 }, { "epoch": 0.03167323153294124, "grad_norm": 0.927499532699585, "learning_rate": 1.9999853256917263e-05, "loss": 0.4969460964202881, "step": 587, "token_acc": 0.8311284046692607 }, { "epoch": 0.03172718933793773, "grad_norm": 0.9699845314025879, "learning_rate": 1.999984363694835e-05, "loss": 0.48963189125061035, "step": 588, "token_acc": 0.8310051107325384 }, { "epoch": 0.03178114714293422, "grad_norm": 0.791649580001831, "learning_rate": 1.999983371158684e-05, "loss": 0.5442118644714355, "step": 589, "token_acc": 0.8236218223364753 }, { "epoch": 0.031835104947930716, "grad_norm": 0.9858888387680054, "learning_rate": 1.9999823480833026e-05, "loss": 0.5720106363296509, "step": 590, "token_acc": 0.8163699176189211 }, { "epoch": 0.03188906275292721, "grad_norm": 0.8840697407722473, "learning_rate": 1.999981294468722e-05, "loss": 0.4824545085430145, "step": 591, "token_acc": 0.8384217104088556 }, { "epoch": 0.0319430205579237, "grad_norm": 0.8168489336967468, "learning_rate": 1.999980210314975e-05, "loss": 0.5399695038795471, "step": 592, "token_acc": 0.8198895027624309 }, { "epoch": 0.031996978362920196, "grad_norm": 0.6666600108146667, "learning_rate": 1.9999790956220943e-05, "loss": 0.4430135190486908, "step": 593, "token_acc": 0.8459657701711492 }, { "epoch": 0.03205093616791669, "grad_norm": 0.7570568919181824, "learning_rate": 1.9999779503901137e-05, "loss": 0.5569599866867065, "step": 594, "token_acc": 0.8216083645282045 }, { "epoch": 0.03210489397291318, "grad_norm": 0.9313080906867981, "learning_rate": 1.9999767746190687e-05, "loss": 0.5370352864265442, "step": 595, "token_acc": 0.8224228743416102 }, { "epoch": 0.032158851777909676, "grad_norm": 0.8811206221580505, "learning_rate": 1.999975568308995e-05, "loss": 0.5957126617431641, "step": 596, "token_acc": 0.8063281824871229 }, { "epoch": 0.03221280958290617, "grad_norm": 0.7697939872741699, "learning_rate": 1.999974331459929e-05, "loss": 0.5592180490493774, "step": 597, "token_acc": 0.8194827220691118 }, { "epoch": 0.03226676738790266, "grad_norm": 0.930656373500824, "learning_rate": 1.999973064071909e-05, "loss": 0.554093599319458, "step": 598, "token_acc": 0.8189760615062031 }, { "epoch": 0.03232072519289915, "grad_norm": 0.8828598856925964, "learning_rate": 1.9999717661449737e-05, "loss": 0.5568744540214539, "step": 599, "token_acc": 0.8215341308937368 }, { "epoch": 0.03237468299789564, "grad_norm": 0.783657431602478, "learning_rate": 1.9999704376791628e-05, "loss": 0.4956509470939636, "step": 600, "token_acc": 0.8325533781807546 }, { "epoch": 0.032428640802892136, "grad_norm": 0.937605619430542, "learning_rate": 1.9999690786745164e-05, "loss": 0.501335859298706, "step": 601, "token_acc": 0.8362253069941271 }, { "epoch": 0.03248259860788863, "grad_norm": 0.8247230052947998, "learning_rate": 1.9999676891310763e-05, "loss": 0.5066397786140442, "step": 602, "token_acc": 0.8376443418013857 }, { "epoch": 0.03253655641288512, "grad_norm": 0.9632172584533691, "learning_rate": 1.999966269048885e-05, "loss": 0.5960294008255005, "step": 603, "token_acc": 0.8027627456565081 }, { "epoch": 0.032590514217881617, "grad_norm": 1.048697829246521, "learning_rate": 1.999964818427986e-05, "loss": 0.5367423295974731, "step": 604, "token_acc": 0.8252758274824473 }, { "epoch": 0.03264447202287811, "grad_norm": 0.8479297757148743, "learning_rate": 1.9999633372684232e-05, "loss": 0.5366093516349792, "step": 605, "token_acc": 0.8287065098799854 }, { "epoch": 0.0326984298278746, "grad_norm": 0.8717381358146667, "learning_rate": 1.999961825570242e-05, "loss": 0.5337352156639099, "step": 606, "token_acc": 0.8254013354169626 }, { "epoch": 0.0327523876328711, "grad_norm": 0.9830771684646606, "learning_rate": 1.999960283333489e-05, "loss": 0.5116273164749146, "step": 607, "token_acc": 0.8324022346368715 }, { "epoch": 0.03280634543786759, "grad_norm": 1.0258338451385498, "learning_rate": 1.9999587105582103e-05, "loss": 0.4936754107475281, "step": 608, "token_acc": 0.8313731986085804 }, { "epoch": 0.03286030324286408, "grad_norm": 0.8957160115242004, "learning_rate": 1.999957107244455e-05, "loss": 0.5228071212768555, "step": 609, "token_acc": 0.8261666666666667 }, { "epoch": 0.03291426104786057, "grad_norm": 0.8339514136314392, "learning_rate": 1.9999554733922714e-05, "loss": 0.5627737045288086, "step": 610, "token_acc": 0.8082209377007065 }, { "epoch": 0.03296821885285706, "grad_norm": 0.6850723028182983, "learning_rate": 1.9999538090017093e-05, "loss": 0.47666221857070923, "step": 611, "token_acc": 0.8452082587420934 }, { "epoch": 0.03302217665785356, "grad_norm": 1.072378158569336, "learning_rate": 1.9999521140728203e-05, "loss": 0.5491141080856323, "step": 612, "token_acc": 0.8240407038371846 }, { "epoch": 0.03307613446285005, "grad_norm": 0.9554394483566284, "learning_rate": 1.9999503886056553e-05, "loss": 0.5154697895050049, "step": 613, "token_acc": 0.8289290681502086 }, { "epoch": 0.03313009226784654, "grad_norm": 0.9063875079154968, "learning_rate": 1.999948632600268e-05, "loss": 0.4805396795272827, "step": 614, "token_acc": 0.8379948036069081 }, { "epoch": 0.03318405007284304, "grad_norm": 0.940636396408081, "learning_rate": 1.9999468460567107e-05, "loss": 0.5338650345802307, "step": 615, "token_acc": 0.8266331658291457 }, { "epoch": 0.03323800787783953, "grad_norm": 0.8178160190582275, "learning_rate": 1.999945028975039e-05, "loss": 0.49920395016670227, "step": 616, "token_acc": 0.8327939590075513 }, { "epoch": 0.033291965682836024, "grad_norm": 0.94621342420578, "learning_rate": 1.9999431813553084e-05, "loss": 0.5820983648300171, "step": 617, "token_acc": 0.8145056091834072 }, { "epoch": 0.03334592348783252, "grad_norm": 0.8332968354225159, "learning_rate": 1.9999413031975744e-05, "loss": 0.5713028311729431, "step": 618, "token_acc": 0.8166810221073787 }, { "epoch": 0.03339988129282901, "grad_norm": 0.8866222500801086, "learning_rate": 1.9999393945018953e-05, "loss": 0.5320980548858643, "step": 619, "token_acc": 0.8241931251639989 }, { "epoch": 0.033453839097825504, "grad_norm": 0.6398501396179199, "learning_rate": 1.9999374552683288e-05, "loss": 0.48577165603637695, "step": 620, "token_acc": 0.8371331401939791 }, { "epoch": 0.03350779690282199, "grad_norm": 0.6766622066497803, "learning_rate": 1.9999354854969346e-05, "loss": 0.4710675776004791, "step": 621, "token_acc": 0.8459761731096523 }, { "epoch": 0.033561754707818484, "grad_norm": 0.771361768245697, "learning_rate": 1.999933485187772e-05, "loss": 0.5012290477752686, "step": 622, "token_acc": 0.8369226199316798 }, { "epoch": 0.03361571251281498, "grad_norm": 0.7360581755638123, "learning_rate": 1.9999314543409036e-05, "loss": 0.5206296443939209, "step": 623, "token_acc": 0.8269938650306748 }, { "epoch": 0.03366967031781147, "grad_norm": 0.6264323592185974, "learning_rate": 1.99992939295639e-05, "loss": 0.490925669670105, "step": 624, "token_acc": 0.8381078601284535 }, { "epoch": 0.033723628122807964, "grad_norm": 0.8488950133323669, "learning_rate": 1.999927301034295e-05, "loss": 0.46874284744262695, "step": 625, "token_acc": 0.8412815319462346 }, { "epoch": 0.03377758592780446, "grad_norm": 0.9371679425239563, "learning_rate": 1.999925178574682e-05, "loss": 0.5592284202575684, "step": 626, "token_acc": 0.8183837822826993 }, { "epoch": 0.03383154373280095, "grad_norm": 0.7572945952415466, "learning_rate": 1.999923025577616e-05, "loss": 0.5496758222579956, "step": 627, "token_acc": 0.8231676706827309 }, { "epoch": 0.033885501537797444, "grad_norm": 0.689953625202179, "learning_rate": 1.999920842043163e-05, "loss": 0.5818324685096741, "step": 628, "token_acc": 0.8145088421324383 }, { "epoch": 0.03393945934279394, "grad_norm": 0.599328875541687, "learning_rate": 1.999918627971389e-05, "loss": 0.49176985025405884, "step": 629, "token_acc": 0.8380493033226152 }, { "epoch": 0.03399341714779043, "grad_norm": 0.8178456425666809, "learning_rate": 1.999916383362362e-05, "loss": 0.5048612952232361, "step": 630, "token_acc": 0.8332881906825569 }, { "epoch": 0.034047374952786924, "grad_norm": 0.7903381586074829, "learning_rate": 1.999914108216151e-05, "loss": 0.5535815954208374, "step": 631, "token_acc": 0.8179362506753106 }, { "epoch": 0.03410133275778341, "grad_norm": 0.7601591348648071, "learning_rate": 1.999911802532825e-05, "loss": 0.4726216793060303, "step": 632, "token_acc": 0.8443825910931174 }, { "epoch": 0.034155290562779904, "grad_norm": 0.6819126605987549, "learning_rate": 1.9999094663124544e-05, "loss": 0.5375242233276367, "step": 633, "token_acc": 0.8222469764481222 }, { "epoch": 0.0342092483677764, "grad_norm": 0.8468921780586243, "learning_rate": 1.9999070995551105e-05, "loss": 0.5833778381347656, "step": 634, "token_acc": 0.8107658157602664 }, { "epoch": 0.03426320617277289, "grad_norm": 0.7224376201629639, "learning_rate": 1.9999047022608656e-05, "loss": 0.5311039686203003, "step": 635, "token_acc": 0.8273381294964028 }, { "epoch": 0.034317163977769384, "grad_norm": 0.8187092542648315, "learning_rate": 1.9999022744297933e-05, "loss": 0.5809391736984253, "step": 636, "token_acc": 0.8105109489051094 }, { "epoch": 0.03437112178276588, "grad_norm": 1.0031899213790894, "learning_rate": 1.9998998160619677e-05, "loss": 0.5945926904678345, "step": 637, "token_acc": 0.8037882467217096 }, { "epoch": 0.03442507958776237, "grad_norm": 0.8418897390365601, "learning_rate": 1.9998973271574632e-05, "loss": 0.4800335764884949, "step": 638, "token_acc": 0.8438256658595642 }, { "epoch": 0.034479037392758864, "grad_norm": 1.0012614727020264, "learning_rate": 1.9998948077163564e-05, "loss": 0.4967188537120819, "step": 639, "token_acc": 0.8365805168986084 }, { "epoch": 0.03453299519775536, "grad_norm": 0.9153357744216919, "learning_rate": 1.999892257738724e-05, "loss": 0.5619934797286987, "step": 640, "token_acc": 0.8218844113007314 }, { "epoch": 0.03458695300275185, "grad_norm": 0.7652435302734375, "learning_rate": 1.9998896772246445e-05, "loss": 0.5490752458572388, "step": 641, "token_acc": 0.8192488262910798 }, { "epoch": 0.03464091080774834, "grad_norm": 0.6788078546524048, "learning_rate": 1.999887066174196e-05, "loss": 0.5347854495048523, "step": 642, "token_acc": 0.8340925789860397 }, { "epoch": 0.03469486861274483, "grad_norm": 0.6717011332511902, "learning_rate": 1.999884424587458e-05, "loss": 0.4849257469177246, "step": 643, "token_acc": 0.8446158690176322 }, { "epoch": 0.034748826417741324, "grad_norm": 0.8842484354972839, "learning_rate": 1.999881752464512e-05, "loss": 0.5520913600921631, "step": 644, "token_acc": 0.815624064651302 }, { "epoch": 0.03480278422273782, "grad_norm": 0.8340261578559875, "learning_rate": 1.999879049805439e-05, "loss": 0.5063179135322571, "step": 645, "token_acc": 0.8344580636275996 }, { "epoch": 0.03485674202773431, "grad_norm": 0.7246527075767517, "learning_rate": 1.999876316610322e-05, "loss": 0.4796162247657776, "step": 646, "token_acc": 0.8468918386121698 }, { "epoch": 0.034910699832730804, "grad_norm": 0.939963161945343, "learning_rate": 1.999873552879244e-05, "loss": 0.5722787380218506, "step": 647, "token_acc": 0.8119010819165379 }, { "epoch": 0.0349646576377273, "grad_norm": 0.7004358172416687, "learning_rate": 1.9998707586122898e-05, "loss": 0.5554372072219849, "step": 648, "token_acc": 0.8196135521439916 }, { "epoch": 0.03501861544272379, "grad_norm": 1.0610829591751099, "learning_rate": 1.9998679338095445e-05, "loss": 0.541815996170044, "step": 649, "token_acc": 0.8247855767547698 }, { "epoch": 0.035072573247720285, "grad_norm": 0.8439702987670898, "learning_rate": 1.9998650784710945e-05, "loss": 0.4913932681083679, "step": 650, "token_acc": 0.8412287793047696 }, { "epoch": 0.03512653105271678, "grad_norm": 0.7257934808731079, "learning_rate": 1.9998621925970268e-05, "loss": 0.5043317675590515, "step": 651, "token_acc": 0.8357645083429531 }, { "epoch": 0.03518048885771327, "grad_norm": 0.675272524356842, "learning_rate": 1.9998592761874294e-05, "loss": 0.5127677917480469, "step": 652, "token_acc": 0.8237398879900436 }, { "epoch": 0.03523444666270976, "grad_norm": 0.8529042601585388, "learning_rate": 1.999856329242392e-05, "loss": 0.5261150598526001, "step": 653, "token_acc": 0.8233045212765957 }, { "epoch": 0.03528840446770625, "grad_norm": 0.8617729544639587, "learning_rate": 1.9998533517620044e-05, "loss": 0.4845700263977051, "step": 654, "token_acc": 0.8403642445642074 }, { "epoch": 0.035342362272702744, "grad_norm": 0.8329753279685974, "learning_rate": 1.999850343746357e-05, "loss": 0.5258035659790039, "step": 655, "token_acc": 0.8309651227163684 }, { "epoch": 0.03539632007769924, "grad_norm": 0.6431788206100464, "learning_rate": 1.9998473051955423e-05, "loss": 0.5103585720062256, "step": 656, "token_acc": 0.8320450530035336 }, { "epoch": 0.03545027788269573, "grad_norm": 0.6633704900741577, "learning_rate": 1.9998442361096525e-05, "loss": 0.5560773015022278, "step": 657, "token_acc": 0.8205730389854392 }, { "epoch": 0.035504235687692225, "grad_norm": 0.9051008224487305, "learning_rate": 1.9998411364887816e-05, "loss": 0.5901294946670532, "step": 658, "token_acc": 0.8064516129032258 }, { "epoch": 0.03555819349268872, "grad_norm": 0.8765365481376648, "learning_rate": 1.9998380063330248e-05, "loss": 0.5346117615699768, "step": 659, "token_acc": 0.8310846145800649 }, { "epoch": 0.03561215129768521, "grad_norm": 0.7996556162834167, "learning_rate": 1.999834845642477e-05, "loss": 0.5191922187805176, "step": 660, "token_acc": 0.8267982342248766 }, { "epoch": 0.035666109102681705, "grad_norm": 0.6919018626213074, "learning_rate": 1.9998316544172348e-05, "loss": 0.5633431673049927, "step": 661, "token_acc": 0.8157529930686831 }, { "epoch": 0.0357200669076782, "grad_norm": 0.697823166847229, "learning_rate": 1.999828432657396e-05, "loss": 0.564637303352356, "step": 662, "token_acc": 0.816382252559727 }, { "epoch": 0.03577402471267469, "grad_norm": 0.8701587915420532, "learning_rate": 1.9998251803630587e-05, "loss": 0.518159031867981, "step": 663, "token_acc": 0.8248333333333333 }, { "epoch": 0.03582798251767118, "grad_norm": 0.7165060639381409, "learning_rate": 1.9998218975343223e-05, "loss": 0.528765082359314, "step": 664, "token_acc": 0.8253215264600464 }, { "epoch": 0.03588194032266767, "grad_norm": 0.7766932845115662, "learning_rate": 1.999818584171287e-05, "loss": 0.5403681993484497, "step": 665, "token_acc": 0.824322446143155 }, { "epoch": 0.035935898127664165, "grad_norm": 0.7240355610847473, "learning_rate": 1.999815240274054e-05, "loss": 0.46083390712738037, "step": 666, "token_acc": 0.8443285996764336 }, { "epoch": 0.03598985593266066, "grad_norm": 0.7319231629371643, "learning_rate": 1.999811865842726e-05, "loss": 0.559910774230957, "step": 667, "token_acc": 0.8143236074270557 }, { "epoch": 0.03604381373765715, "grad_norm": 0.6063544154167175, "learning_rate": 1.9998084608774045e-05, "loss": 0.5400769114494324, "step": 668, "token_acc": 0.8280173513651442 }, { "epoch": 0.036097771542653645, "grad_norm": 0.6104033589363098, "learning_rate": 1.9998050253781954e-05, "loss": 0.5161378979682922, "step": 669, "token_acc": 0.8329936132626715 }, { "epoch": 0.03615172934765014, "grad_norm": 0.7062622904777527, "learning_rate": 1.9998015593452023e-05, "loss": 0.4672466814517975, "step": 670, "token_acc": 0.8494735843687755 }, { "epoch": 0.03620568715264663, "grad_norm": 0.8348633050918579, "learning_rate": 1.999798062778532e-05, "loss": 0.5301738381385803, "step": 671, "token_acc": 0.8233604595500239 }, { "epoch": 0.036259644957643125, "grad_norm": 0.5692915916442871, "learning_rate": 1.9997945356782905e-05, "loss": 0.4394076466560364, "step": 672, "token_acc": 0.8517869656622284 }, { "epoch": 0.03631360276263962, "grad_norm": 0.6038739681243896, "learning_rate": 1.9997909780445856e-05, "loss": 0.4083053469657898, "step": 673, "token_acc": 0.8620869565217392 }, { "epoch": 0.03636756056763611, "grad_norm": 0.8140208721160889, "learning_rate": 1.9997873898775263e-05, "loss": 0.450700044631958, "step": 674, "token_acc": 0.8544213367201143 }, { "epoch": 0.0364215183726326, "grad_norm": 0.8146864771842957, "learning_rate": 1.999783771177222e-05, "loss": 0.6056511402130127, "step": 675, "token_acc": 0.8018976791896397 }, { "epoch": 0.03647547617762909, "grad_norm": 0.6840219497680664, "learning_rate": 1.999780121943783e-05, "loss": 0.5296991467475891, "step": 676, "token_acc": 0.8272499670575834 }, { "epoch": 0.036529433982625585, "grad_norm": 0.7526079416275024, "learning_rate": 1.9997764421773214e-05, "loss": 0.512611985206604, "step": 677, "token_acc": 0.8381677197013211 }, { "epoch": 0.03658339178762208, "grad_norm": 1.007535696029663, "learning_rate": 1.9997727318779488e-05, "loss": 0.5326634645462036, "step": 678, "token_acc": 0.8251433251433251 }, { "epoch": 0.03663734959261857, "grad_norm": 0.7053824067115784, "learning_rate": 1.999768991045779e-05, "loss": 0.5275025367736816, "step": 679, "token_acc": 0.8238671063881764 }, { "epoch": 0.036691307397615065, "grad_norm": 0.7813829183578491, "learning_rate": 1.9997652196809263e-05, "loss": 0.54341721534729, "step": 680, "token_acc": 0.8259843497702148 }, { "epoch": 0.03674526520261156, "grad_norm": 0.8384990692138672, "learning_rate": 1.9997614177835057e-05, "loss": 0.5387454628944397, "step": 681, "token_acc": 0.8242397137745975 }, { "epoch": 0.03679922300760805, "grad_norm": 0.8376085758209229, "learning_rate": 1.9997575853536332e-05, "loss": 0.43925732374191284, "step": 682, "token_acc": 0.8553514882837239 }, { "epoch": 0.036853180812604545, "grad_norm": 0.7973614931106567, "learning_rate": 1.9997537223914255e-05, "loss": 0.6453852653503418, "step": 683, "token_acc": 0.7978949725674617 }, { "epoch": 0.03690713861760104, "grad_norm": 0.8008206486701965, "learning_rate": 1.9997498288970017e-05, "loss": 0.5706299543380737, "step": 684, "token_acc": 0.8129339686861811 }, { "epoch": 0.03696109642259753, "grad_norm": 0.7605113387107849, "learning_rate": 1.99974590487048e-05, "loss": 0.48914071917533875, "step": 685, "token_acc": 0.839421499925451 }, { "epoch": 0.03701505422759402, "grad_norm": 0.7839977145195007, "learning_rate": 1.9997419503119798e-05, "loss": 0.49294278025627136, "step": 686, "token_acc": 0.8336913712853562 }, { "epoch": 0.03706901203259051, "grad_norm": 0.5436283946037292, "learning_rate": 1.9997379652216225e-05, "loss": 0.4384850561618805, "step": 687, "token_acc": 0.8524551740302355 }, { "epoch": 0.037122969837587005, "grad_norm": 0.6184090971946716, "learning_rate": 1.9997339495995296e-05, "loss": 0.45541834831237793, "step": 688, "token_acc": 0.8420557818865559 }, { "epoch": 0.0371769276425835, "grad_norm": 0.8474280834197998, "learning_rate": 1.999729903445824e-05, "loss": 0.6118043661117554, "step": 689, "token_acc": 0.8011346670647954 }, { "epoch": 0.03723088544757999, "grad_norm": 0.8187999725341797, "learning_rate": 1.9997258267606288e-05, "loss": 0.5166527032852173, "step": 690, "token_acc": 0.8276255707762558 }, { "epoch": 0.037284843252576486, "grad_norm": 0.6930412650108337, "learning_rate": 1.9997217195440688e-05, "loss": 0.5661232471466064, "step": 691, "token_acc": 0.8185315270112059 }, { "epoch": 0.03733880105757298, "grad_norm": 0.9094674587249756, "learning_rate": 1.9997175817962696e-05, "loss": 0.5406121015548706, "step": 692, "token_acc": 0.8179473005343653 }, { "epoch": 0.03739275886256947, "grad_norm": 0.6966663599014282, "learning_rate": 1.999713413517357e-05, "loss": 0.5119760036468506, "step": 693, "token_acc": 0.8317152103559871 }, { "epoch": 0.037446716667565966, "grad_norm": 0.6228304505348206, "learning_rate": 1.999709214707459e-05, "loss": 0.5222700834274292, "step": 694, "token_acc": 0.8241095254669916 }, { "epoch": 0.03750067447256246, "grad_norm": 0.8705143928527832, "learning_rate": 1.999704985366703e-05, "loss": 0.5886107683181763, "step": 695, "token_acc": 0.8102083882880506 }, { "epoch": 0.037554632277558946, "grad_norm": 0.8137732744216919, "learning_rate": 1.9997007254952188e-05, "loss": 0.47786247730255127, "step": 696, "token_acc": 0.8373445323857213 }, { "epoch": 0.03760859008255544, "grad_norm": 0.7403764128684998, "learning_rate": 1.9996964350931364e-05, "loss": 0.5288150310516357, "step": 697, "token_acc": 0.827828120432622 }, { "epoch": 0.03766254788755193, "grad_norm": 0.6637607216835022, "learning_rate": 1.999692114160587e-05, "loss": 0.5690793395042419, "step": 698, "token_acc": 0.816498111368344 }, { "epoch": 0.037716505692548426, "grad_norm": 0.9171152114868164, "learning_rate": 1.9996877626977022e-05, "loss": 0.579903244972229, "step": 699, "token_acc": 0.8158315565031983 }, { "epoch": 0.03777046349754492, "grad_norm": 0.7275369167327881, "learning_rate": 1.9996833807046146e-05, "loss": 0.5067291855812073, "step": 700, "token_acc": 0.8333757961783439 }, { "epoch": 0.03782442130254141, "grad_norm": 0.7891184091567993, "learning_rate": 1.999678968181459e-05, "loss": 0.4860020875930786, "step": 701, "token_acc": 0.8369455134697185 }, { "epoch": 0.037878379107537906, "grad_norm": 0.7390373349189758, "learning_rate": 1.9996745251283692e-05, "loss": 0.5140175223350525, "step": 702, "token_acc": 0.830745138979512 }, { "epoch": 0.0379323369125344, "grad_norm": 0.9746444821357727, "learning_rate": 1.9996700515454813e-05, "loss": 0.5370857119560242, "step": 703, "token_acc": 0.8311663479923518 }, { "epoch": 0.03798629471753089, "grad_norm": 0.6375455260276794, "learning_rate": 1.9996655474329324e-05, "loss": 0.5284375548362732, "step": 704, "token_acc": 0.8274160383824537 }, { "epoch": 0.038040252522527386, "grad_norm": 0.6925625205039978, "learning_rate": 1.999661012790859e-05, "loss": 0.5374724864959717, "step": 705, "token_acc": 0.8209312139541426 }, { "epoch": 0.03809421032752388, "grad_norm": 0.7248083353042603, "learning_rate": 1.9996564476194004e-05, "loss": 0.515839695930481, "step": 706, "token_acc": 0.8261638987203921 }, { "epoch": 0.038148168132520366, "grad_norm": 0.794209897518158, "learning_rate": 1.9996518519186958e-05, "loss": 0.48846131563186646, "step": 707, "token_acc": 0.8408743503515744 }, { "epoch": 0.03820212593751686, "grad_norm": 0.6901549696922302, "learning_rate": 1.999647225688885e-05, "loss": 0.5013828873634338, "step": 708, "token_acc": 0.8392023759015698 }, { "epoch": 0.03825608374251335, "grad_norm": 0.6700540781021118, "learning_rate": 1.9996425689301103e-05, "loss": 0.4717881381511688, "step": 709, "token_acc": 0.8447383553766532 }, { "epoch": 0.038310041547509846, "grad_norm": 0.5604382157325745, "learning_rate": 1.9996378816425134e-05, "loss": 0.49571019411087036, "step": 710, "token_acc": 0.8368391918073623 }, { "epoch": 0.03836399935250634, "grad_norm": 0.8566590547561646, "learning_rate": 1.9996331638262374e-05, "loss": 0.5386883020401001, "step": 711, "token_acc": 0.8230852211434736 }, { "epoch": 0.03841795715750283, "grad_norm": 0.5838965773582458, "learning_rate": 1.9996284154814265e-05, "loss": 0.44113844633102417, "step": 712, "token_acc": 0.8494314289628716 }, { "epoch": 0.038471914962499326, "grad_norm": 0.8257099986076355, "learning_rate": 1.9996236366082255e-05, "loss": 0.5098543167114258, "step": 713, "token_acc": 0.8267008985879333 }, { "epoch": 0.03852587276749582, "grad_norm": 0.9011340737342834, "learning_rate": 1.9996188272067807e-05, "loss": 0.5278942584991455, "step": 714, "token_acc": 0.8192293162070268 }, { "epoch": 0.03857983057249231, "grad_norm": 0.7130506634712219, "learning_rate": 1.9996139872772388e-05, "loss": 0.5282433032989502, "step": 715, "token_acc": 0.8228163992869876 }, { "epoch": 0.038633788377488806, "grad_norm": 0.7536584734916687, "learning_rate": 1.9996091168197472e-05, "loss": 0.5085448026657104, "step": 716, "token_acc": 0.8273579013116802 }, { "epoch": 0.0386877461824853, "grad_norm": 0.7500463128089905, "learning_rate": 1.9996042158344554e-05, "loss": 0.5024908185005188, "step": 717, "token_acc": 0.8295250896057348 }, { "epoch": 0.038741703987481786, "grad_norm": 0.8798364400863647, "learning_rate": 1.9995992843215127e-05, "loss": 0.5544589757919312, "step": 718, "token_acc": 0.8189655172413793 }, { "epoch": 0.03879566179247828, "grad_norm": 0.6674599647521973, "learning_rate": 1.9995943222810696e-05, "loss": 0.5129265785217285, "step": 719, "token_acc": 0.8296937039137833 }, { "epoch": 0.03884961959747477, "grad_norm": 0.6754684448242188, "learning_rate": 1.999589329713278e-05, "loss": 0.44582682847976685, "step": 720, "token_acc": 0.8518854036915412 }, { "epoch": 0.038903577402471266, "grad_norm": 0.5703819394111633, "learning_rate": 1.9995843066182896e-05, "loss": 0.5300000905990601, "step": 721, "token_acc": 0.8250078872646966 }, { "epoch": 0.03895753520746776, "grad_norm": 0.5433131456375122, "learning_rate": 1.9995792529962586e-05, "loss": 0.45464053750038147, "step": 722, "token_acc": 0.8513054450160148 }, { "epoch": 0.03901149301246425, "grad_norm": 0.7945834398269653, "learning_rate": 1.9995741688473395e-05, "loss": 0.5527700185775757, "step": 723, "token_acc": 0.8239168618266979 }, { "epoch": 0.039065450817460746, "grad_norm": 0.7169800996780396, "learning_rate": 1.9995690541716867e-05, "loss": 0.5052556991577148, "step": 724, "token_acc": 0.8318009734991888 }, { "epoch": 0.03911940862245724, "grad_norm": 0.6874444484710693, "learning_rate": 1.9995639089694567e-05, "loss": 0.5295736193656921, "step": 725, "token_acc": 0.8262862211709048 }, { "epoch": 0.03917336642745373, "grad_norm": 0.8068800568580627, "learning_rate": 1.9995587332408068e-05, "loss": 0.5262911915779114, "step": 726, "token_acc": 0.8256808106396454 }, { "epoch": 0.03922732423245023, "grad_norm": 0.66557377576828, "learning_rate": 1.9995535269858952e-05, "loss": 0.5150209069252014, "step": 727, "token_acc": 0.8274740906319854 }, { "epoch": 0.03928128203744672, "grad_norm": 0.741615355014801, "learning_rate": 1.999548290204881e-05, "loss": 0.5275938510894775, "step": 728, "token_acc": 0.8281503316138541 }, { "epoch": 0.039335239842443206, "grad_norm": 0.484131395816803, "learning_rate": 1.9995430228979234e-05, "loss": 0.5339190363883972, "step": 729, "token_acc": 0.8279364440868866 }, { "epoch": 0.0393891976474397, "grad_norm": 0.7165807485580444, "learning_rate": 1.999537725065184e-05, "loss": 0.4788692891597748, "step": 730, "token_acc": 0.8383348231388073 }, { "epoch": 0.03944315545243619, "grad_norm": 0.8301909565925598, "learning_rate": 1.9995323967068244e-05, "loss": 0.5223414897918701, "step": 731, "token_acc": 0.8284923928077456 }, { "epoch": 0.03949711325743269, "grad_norm": 0.9114118218421936, "learning_rate": 1.999527037823007e-05, "loss": 0.5551576614379883, "step": 732, "token_acc": 0.8219272144655527 }, { "epoch": 0.03955107106242918, "grad_norm": 0.7574243545532227, "learning_rate": 1.999521648413896e-05, "loss": 0.5214656591415405, "step": 733, "token_acc": 0.8293492695883135 }, { "epoch": 0.03960502886742567, "grad_norm": 0.8392720818519592, "learning_rate": 1.9995162284796554e-05, "loss": 0.5341144800186157, "step": 734, "token_acc": 0.8251719394773039 }, { "epoch": 0.03965898667242217, "grad_norm": 0.6373475790023804, "learning_rate": 1.9995107780204508e-05, "loss": 0.495095431804657, "step": 735, "token_acc": 0.8340812613705276 }, { "epoch": 0.03971294447741866, "grad_norm": 0.7025432586669922, "learning_rate": 1.999505297036449e-05, "loss": 0.5461391806602478, "step": 736, "token_acc": 0.8191255111670337 }, { "epoch": 0.039766902282415154, "grad_norm": 0.7703643441200256, "learning_rate": 1.999499785527818e-05, "loss": 0.5931162238121033, "step": 737, "token_acc": 0.8115244689590424 }, { "epoch": 0.03982086008741165, "grad_norm": 0.6289198398590088, "learning_rate": 1.9994942434947243e-05, "loss": 0.4691010117530823, "step": 738, "token_acc": 0.84798827604583 }, { "epoch": 0.03987481789240813, "grad_norm": 0.6398928165435791, "learning_rate": 1.9994886709373392e-05, "loss": 0.4900836646556854, "step": 739, "token_acc": 0.8327790341891712 }, { "epoch": 0.03992877569740463, "grad_norm": 0.594480574131012, "learning_rate": 1.9994830678558313e-05, "loss": 0.515031099319458, "step": 740, "token_acc": 0.8293556085918854 }, { "epoch": 0.03998273350240112, "grad_norm": 0.8559938669204712, "learning_rate": 1.9994774342503724e-05, "loss": 0.563873291015625, "step": 741, "token_acc": 0.8220128255463944 }, { "epoch": 0.040036691307397614, "grad_norm": 0.738865077495575, "learning_rate": 1.9994717701211346e-05, "loss": 0.5563625693321228, "step": 742, "token_acc": 0.8204403451353763 }, { "epoch": 0.04009064911239411, "grad_norm": 0.7126692533493042, "learning_rate": 1.9994660754682908e-05, "loss": 0.5234935879707336, "step": 743, "token_acc": 0.8274102423911063 }, { "epoch": 0.0401446069173906, "grad_norm": 0.9074109196662903, "learning_rate": 1.999460350292015e-05, "loss": 0.5604146718978882, "step": 744, "token_acc": 0.8209800106138334 }, { "epoch": 0.040198564722387094, "grad_norm": 0.8344096541404724, "learning_rate": 1.9994545945924816e-05, "loss": 0.5602492690086365, "step": 745, "token_acc": 0.818769761425697 }, { "epoch": 0.04025252252738359, "grad_norm": 0.6931573748588562, "learning_rate": 1.999448808369867e-05, "loss": 0.48295530676841736, "step": 746, "token_acc": 0.8417891575786313 }, { "epoch": 0.04030648033238008, "grad_norm": 0.6317618489265442, "learning_rate": 1.9994429916243473e-05, "loss": 0.5763752460479736, "step": 747, "token_acc": 0.8099640472521829 }, { "epoch": 0.040360438137376574, "grad_norm": 0.6956380605697632, "learning_rate": 1.999437144356101e-05, "loss": 0.5184857249259949, "step": 748, "token_acc": 0.8340505648197956 }, { "epoch": 0.04041439594237307, "grad_norm": 0.6922042965888977, "learning_rate": 1.9994312665653055e-05, "loss": 0.48970696330070496, "step": 749, "token_acc": 0.8378378378378378 }, { "epoch": 0.040468353747369554, "grad_norm": 0.8615827560424805, "learning_rate": 1.999425358252141e-05, "loss": 0.5062326192855835, "step": 750, "token_acc": 0.8289741847826086 }, { "epoch": 0.04052231155236605, "grad_norm": 0.5488879680633545, "learning_rate": 1.9994194194167883e-05, "loss": 0.545245885848999, "step": 751, "token_acc": 0.822429906542056 }, { "epoch": 0.04057626935736254, "grad_norm": 0.6310308575630188, "learning_rate": 1.999413450059428e-05, "loss": 0.4934542775154114, "step": 752, "token_acc": 0.8399165507649513 }, { "epoch": 0.040630227162359034, "grad_norm": 0.7025688290596008, "learning_rate": 1.999407450180243e-05, "loss": 0.5048815011978149, "step": 753, "token_acc": 0.83134066634784 }, { "epoch": 0.04068418496735553, "grad_norm": 0.6867328882217407, "learning_rate": 1.999401419779416e-05, "loss": 0.5175586938858032, "step": 754, "token_acc": 0.8265022577283779 }, { "epoch": 0.04073814277235202, "grad_norm": 0.7566519975662231, "learning_rate": 1.9993953588571316e-05, "loss": 0.4991498291492462, "step": 755, "token_acc": 0.8394947055545235 }, { "epoch": 0.040792100577348514, "grad_norm": 0.7721086144447327, "learning_rate": 1.999389267413575e-05, "loss": 0.5111854076385498, "step": 756, "token_acc": 0.835820895522388 }, { "epoch": 0.04084605838234501, "grad_norm": 0.9128900766372681, "learning_rate": 1.9993831454489317e-05, "loss": 0.5278908014297485, "step": 757, "token_acc": 0.8368336025848142 }, { "epoch": 0.0409000161873415, "grad_norm": 0.7605251669883728, "learning_rate": 1.999376992963389e-05, "loss": 0.5769973993301392, "step": 758, "token_acc": 0.8066470203012442 }, { "epoch": 0.040953973992337994, "grad_norm": 0.6838811039924622, "learning_rate": 1.999370809957135e-05, "loss": 0.5658079981803894, "step": 759, "token_acc": 0.8155425219941349 }, { "epoch": 0.04100793179733449, "grad_norm": 0.6947515606880188, "learning_rate": 1.9993645964303576e-05, "loss": 0.4981139898300171, "step": 760, "token_acc": 0.8379559589847033 }, { "epoch": 0.041061889602330974, "grad_norm": 0.598534107208252, "learning_rate": 1.9993583523832477e-05, "loss": 0.4872084856033325, "step": 761, "token_acc": 0.8378277618988443 }, { "epoch": 0.04111584740732747, "grad_norm": 0.759380578994751, "learning_rate": 1.9993520778159952e-05, "loss": 0.519115149974823, "step": 762, "token_acc": 0.8303977272727273 }, { "epoch": 0.04116980521232396, "grad_norm": 0.786825954914093, "learning_rate": 1.9993457727287923e-05, "loss": 0.5069975256919861, "step": 763, "token_acc": 0.832145816072908 }, { "epoch": 0.041223763017320454, "grad_norm": 0.6054997444152832, "learning_rate": 1.9993394371218312e-05, "loss": 0.4804415702819824, "step": 764, "token_acc": 0.8381934083819341 }, { "epoch": 0.04127772082231695, "grad_norm": 0.7583768367767334, "learning_rate": 1.9993330709953054e-05, "loss": 0.5060884356498718, "step": 765, "token_acc": 0.8289907157831686 }, { "epoch": 0.04133167862731344, "grad_norm": 0.5890958309173584, "learning_rate": 1.9993266743494097e-05, "loss": 0.5307419300079346, "step": 766, "token_acc": 0.8190575370997136 }, { "epoch": 0.041385636432309934, "grad_norm": 0.8642886877059937, "learning_rate": 1.999320247184339e-05, "loss": 0.488491415977478, "step": 767, "token_acc": 0.8331764262850687 }, { "epoch": 0.04143959423730643, "grad_norm": 0.5825028419494629, "learning_rate": 1.9993137895002898e-05, "loss": 0.4759356379508972, "step": 768, "token_acc": 0.8401400233372228 }, { "epoch": 0.04149355204230292, "grad_norm": 0.5979899168014526, "learning_rate": 1.999307301297459e-05, "loss": 0.42569321393966675, "step": 769, "token_acc": 0.8586572438162544 }, { "epoch": 0.041547509847299414, "grad_norm": 0.740196943283081, "learning_rate": 1.9993007825760452e-05, "loss": 0.48882627487182617, "step": 770, "token_acc": 0.8302354399008675 }, { "epoch": 0.04160146765229591, "grad_norm": 0.9441538453102112, "learning_rate": 1.9992942333362472e-05, "loss": 0.5238233804702759, "step": 771, "token_acc": 0.8302801724137931 }, { "epoch": 0.041655425457292394, "grad_norm": 0.473201721906662, "learning_rate": 1.999287653578265e-05, "loss": 0.49854421615600586, "step": 772, "token_acc": 0.8356573705179283 }, { "epoch": 0.04170938326228889, "grad_norm": 0.690707266330719, "learning_rate": 1.9992810433023e-05, "loss": 0.5313490033149719, "step": 773, "token_acc": 0.8277518528685149 }, { "epoch": 0.04176334106728538, "grad_norm": 0.6351072788238525, "learning_rate": 1.9992744025085534e-05, "loss": 0.5377757549285889, "step": 774, "token_acc": 0.820784991360158 }, { "epoch": 0.041817298872281874, "grad_norm": 0.7179339528083801, "learning_rate": 1.9992677311972283e-05, "loss": 0.5803213119506836, "step": 775, "token_acc": 0.8128910228430088 }, { "epoch": 0.04187125667727837, "grad_norm": 0.749590277671814, "learning_rate": 1.9992610293685286e-05, "loss": 0.5234121680259705, "step": 776, "token_acc": 0.8339814537840263 }, { "epoch": 0.04192521448227486, "grad_norm": 0.69253009557724, "learning_rate": 1.999254297022659e-05, "loss": 0.49358588457107544, "step": 777, "token_acc": 0.8387918949917166 }, { "epoch": 0.041979172287271355, "grad_norm": 0.678584098815918, "learning_rate": 1.9992475341598244e-05, "loss": 0.5565459728240967, "step": 778, "token_acc": 0.8154244306418219 }, { "epoch": 0.04203313009226785, "grad_norm": 0.6740459203720093, "learning_rate": 1.9992407407802323e-05, "loss": 0.44952356815338135, "step": 779, "token_acc": 0.8487084870848709 }, { "epoch": 0.04208708789726434, "grad_norm": 0.7038453817367554, "learning_rate": 1.99923391688409e-05, "loss": 0.4784597158432007, "step": 780, "token_acc": 0.8419790483402751 }, { "epoch": 0.042141045702260835, "grad_norm": 0.6021759510040283, "learning_rate": 1.999227062471605e-05, "loss": 0.4490464925765991, "step": 781, "token_acc": 0.8497968659315148 }, { "epoch": 0.04219500350725733, "grad_norm": 0.7100193500518799, "learning_rate": 1.9992201775429875e-05, "loss": 0.38838088512420654, "step": 782, "token_acc": 0.8599130725509863 }, { "epoch": 0.042248961312253815, "grad_norm": 0.7083598375320435, "learning_rate": 1.9992132620984477e-05, "loss": 0.4793590307235718, "step": 783, "token_acc": 0.8455246913580247 }, { "epoch": 0.04230291911725031, "grad_norm": 0.6798230409622192, "learning_rate": 1.9992063161381965e-05, "loss": 0.45298469066619873, "step": 784, "token_acc": 0.8459682977257064 }, { "epoch": 0.0423568769222468, "grad_norm": 0.5466729402542114, "learning_rate": 1.9991993396624464e-05, "loss": 0.44877398014068604, "step": 785, "token_acc": 0.849112426035503 }, { "epoch": 0.042410834727243295, "grad_norm": 0.6926348209381104, "learning_rate": 1.99919233267141e-05, "loss": 0.4518178701400757, "step": 786, "token_acc": 0.8555916125811283 }, { "epoch": 0.04246479253223979, "grad_norm": 0.7740875482559204, "learning_rate": 1.9991852951653015e-05, "loss": 0.4970339834690094, "step": 787, "token_acc": 0.8369607211848036 }, { "epoch": 0.04251875033723628, "grad_norm": 0.6373652815818787, "learning_rate": 1.9991782271443358e-05, "loss": 0.42437660694122314, "step": 788, "token_acc": 0.8561433656560697 }, { "epoch": 0.042572708142232775, "grad_norm": 0.7331618666648865, "learning_rate": 1.9991711286087286e-05, "loss": 0.5035352110862732, "step": 789, "token_acc": 0.8323442136498517 }, { "epoch": 0.04262666594722927, "grad_norm": 0.6167041063308716, "learning_rate": 1.9991639995586975e-05, "loss": 0.5225633382797241, "step": 790, "token_acc": 0.8316440936406853 }, { "epoch": 0.04268062375222576, "grad_norm": 0.6030855774879456, "learning_rate": 1.9991568399944593e-05, "loss": 0.44729727506637573, "step": 791, "token_acc": 0.8445032333921223 }, { "epoch": 0.042734581557222255, "grad_norm": 0.5410134196281433, "learning_rate": 1.9991496499162325e-05, "loss": 0.5369018316268921, "step": 792, "token_acc": 0.8249290891900409 }, { "epoch": 0.04278853936221874, "grad_norm": 0.7519530653953552, "learning_rate": 1.9991424293242375e-05, "loss": 0.48268625140190125, "step": 793, "token_acc": 0.8451076191733791 }, { "epoch": 0.042842497167215235, "grad_norm": 0.8826636075973511, "learning_rate": 1.9991351782186946e-05, "loss": 0.5510557889938354, "step": 794, "token_acc": 0.8186568311049639 }, { "epoch": 0.04289645497221173, "grad_norm": 0.5195849537849426, "learning_rate": 1.999127896599825e-05, "loss": 0.5233798027038574, "step": 795, "token_acc": 0.82758234371499 }, { "epoch": 0.04295041277720822, "grad_norm": 0.7416726350784302, "learning_rate": 1.999120584467851e-05, "loss": 0.5482925176620483, "step": 796, "token_acc": 0.8192736631398342 }, { "epoch": 0.043004370582204715, "grad_norm": 0.5177653431892395, "learning_rate": 1.9991132418229965e-05, "loss": 0.4759179353713989, "step": 797, "token_acc": 0.8422569559915752 }, { "epoch": 0.04305832838720121, "grad_norm": 0.8213993310928345, "learning_rate": 1.9991058686654852e-05, "loss": 0.45248275995254517, "step": 798, "token_acc": 0.8460087994971716 }, { "epoch": 0.0431122861921977, "grad_norm": 0.7552281618118286, "learning_rate": 1.999098464995542e-05, "loss": 0.46048521995544434, "step": 799, "token_acc": 0.8400515369041045 }, { "epoch": 0.043166243997194195, "grad_norm": 0.6086881756782532, "learning_rate": 1.9990910308133936e-05, "loss": 0.4925665557384491, "step": 800, "token_acc": 0.8299051787016776 }, { "epoch": 0.04322020180219069, "grad_norm": 0.6926766633987427, "learning_rate": 1.999083566119267e-05, "loss": 0.4577886462211609, "step": 801, "token_acc": 0.8451945818103633 }, { "epoch": 0.04327415960718718, "grad_norm": 0.4960496723651886, "learning_rate": 1.9990760709133902e-05, "loss": 0.49955320358276367, "step": 802, "token_acc": 0.8406498015873016 }, { "epoch": 0.043328117412183675, "grad_norm": 0.677167534828186, "learning_rate": 1.9990685451959915e-05, "loss": 0.5352984666824341, "step": 803, "token_acc": 0.8244142319930575 }, { "epoch": 0.04338207521718016, "grad_norm": 0.7229345440864563, "learning_rate": 1.9990609889673012e-05, "loss": 0.5671062469482422, "step": 804, "token_acc": 0.8132582864290181 }, { "epoch": 0.043436033022176655, "grad_norm": 0.7543873190879822, "learning_rate": 1.99905340222755e-05, "loss": 0.5593281984329224, "step": 805, "token_acc": 0.8188352647792284 }, { "epoch": 0.04348999082717315, "grad_norm": 0.8504976630210876, "learning_rate": 1.99904578497697e-05, "loss": 0.483770489692688, "step": 806, "token_acc": 0.8395485770363101 }, { "epoch": 0.04354394863216964, "grad_norm": 0.7005698084831238, "learning_rate": 1.999038137215793e-05, "loss": 0.5207713842391968, "step": 807, "token_acc": 0.8312871287128712 }, { "epoch": 0.043597906437166135, "grad_norm": 0.6525324583053589, "learning_rate": 1.9990304589442532e-05, "loss": 0.5232975482940674, "step": 808, "token_acc": 0.8230711233978386 }, { "epoch": 0.04365186424216263, "grad_norm": 0.7299243211746216, "learning_rate": 1.9990227501625848e-05, "loss": 0.5375162363052368, "step": 809, "token_acc": 0.825946940653274 }, { "epoch": 0.04370582204715912, "grad_norm": 0.5912003517150879, "learning_rate": 1.9990150108710235e-05, "loss": 0.4952138662338257, "step": 810, "token_acc": 0.8422730270389096 }, { "epoch": 0.043759779852155616, "grad_norm": 0.7179500460624695, "learning_rate": 1.9990072410698053e-05, "loss": 0.5014303922653198, "step": 811, "token_acc": 0.8311298076923077 }, { "epoch": 0.04381373765715211, "grad_norm": 0.6128879189491272, "learning_rate": 1.9989994407591675e-05, "loss": 0.5563195943832397, "step": 812, "token_acc": 0.8247713824636902 }, { "epoch": 0.0438676954621486, "grad_norm": 0.5393539667129517, "learning_rate": 1.9989916099393486e-05, "loss": 0.5182982683181763, "step": 813, "token_acc": 0.8275862068965517 }, { "epoch": 0.043921653267145096, "grad_norm": 0.7337230443954468, "learning_rate": 1.998983748610588e-05, "loss": 0.53408282995224, "step": 814, "token_acc": 0.8269988738738738 }, { "epoch": 0.04397561107214158, "grad_norm": 0.6759723424911499, "learning_rate": 1.998975856773125e-05, "loss": 0.5430636405944824, "step": 815, "token_acc": 0.8253358925143954 }, { "epoch": 0.044029568877138076, "grad_norm": 0.6419126987457275, "learning_rate": 1.9989679344272013e-05, "loss": 0.546489417552948, "step": 816, "token_acc": 0.8197885196374622 }, { "epoch": 0.04408352668213457, "grad_norm": 0.6005762219429016, "learning_rate": 1.9989599815730584e-05, "loss": 0.5070692896842957, "step": 817, "token_acc": 0.8333695652173913 }, { "epoch": 0.04413748448713106, "grad_norm": 0.4604601263999939, "learning_rate": 1.9989519982109394e-05, "loss": 0.4267187714576721, "step": 818, "token_acc": 0.8584095707248417 }, { "epoch": 0.044191442292127556, "grad_norm": 0.6388283371925354, "learning_rate": 1.998943984341088e-05, "loss": 0.4786813259124756, "step": 819, "token_acc": 0.8420502092050209 }, { "epoch": 0.04424540009712405, "grad_norm": 0.6476960778236389, "learning_rate": 1.998935939963749e-05, "loss": 0.5186057090759277, "step": 820, "token_acc": 0.8268041237113402 }, { "epoch": 0.04429935790212054, "grad_norm": 0.7204508185386658, "learning_rate": 1.9989278650791685e-05, "loss": 0.5286223888397217, "step": 821, "token_acc": 0.8328335832083958 }, { "epoch": 0.044353315707117036, "grad_norm": 0.724105715751648, "learning_rate": 1.998919759687592e-05, "loss": 0.556559145450592, "step": 822, "token_acc": 0.8204015865146257 }, { "epoch": 0.04440727351211353, "grad_norm": 0.6538923978805542, "learning_rate": 1.998911623789268e-05, "loss": 0.4844139814376831, "step": 823, "token_acc": 0.8391284882853306 }, { "epoch": 0.04446123131711002, "grad_norm": 0.5554497838020325, "learning_rate": 1.998903457384445e-05, "loss": 0.4952428340911865, "step": 824, "token_acc": 0.8400494874513963 }, { "epoch": 0.044515189122106516, "grad_norm": 0.703130841255188, "learning_rate": 1.9988952604733718e-05, "loss": 0.480129212141037, "step": 825, "token_acc": 0.8405452528134412 }, { "epoch": 0.044569146927103, "grad_norm": 0.6150124073028564, "learning_rate": 1.9988870330562992e-05, "loss": 0.5102594494819641, "step": 826, "token_acc": 0.8351125673818836 }, { "epoch": 0.044623104732099496, "grad_norm": 0.7921349406242371, "learning_rate": 1.998878775133478e-05, "loss": 0.5720489621162415, "step": 827, "token_acc": 0.8197067944742036 }, { "epoch": 0.04467706253709599, "grad_norm": 0.644656240940094, "learning_rate": 1.998870486705161e-05, "loss": 0.5176788568496704, "step": 828, "token_acc": 0.8326790971540726 }, { "epoch": 0.04473102034209248, "grad_norm": 0.6830840110778809, "learning_rate": 1.9988621677716006e-05, "loss": 0.5072394609451294, "step": 829, "token_acc": 0.8317052749092939 }, { "epoch": 0.044784978147088976, "grad_norm": 0.668459415435791, "learning_rate": 1.9988538183330514e-05, "loss": 0.5234628319740295, "step": 830, "token_acc": 0.8318395868145223 }, { "epoch": 0.04483893595208547, "grad_norm": 0.6528074145317078, "learning_rate": 1.9988454383897684e-05, "loss": 0.5242171287536621, "step": 831, "token_acc": 0.8256975989617131 }, { "epoch": 0.04489289375708196, "grad_norm": 0.6775572299957275, "learning_rate": 1.9988370279420073e-05, "loss": 0.47969797253608704, "step": 832, "token_acc": 0.8416666666666667 }, { "epoch": 0.044946851562078456, "grad_norm": 0.47029006481170654, "learning_rate": 1.9988285869900254e-05, "loss": 0.5270393490791321, "step": 833, "token_acc": 0.83182156133829 }, { "epoch": 0.04500080936707495, "grad_norm": 0.6436229944229126, "learning_rate": 1.9988201155340793e-05, "loss": 0.44994106888771057, "step": 834, "token_acc": 0.8458794818847382 }, { "epoch": 0.04505476717207144, "grad_norm": 0.6338887810707092, "learning_rate": 1.998811613574429e-05, "loss": 0.5449169874191284, "step": 835, "token_acc": 0.8188930744711678 }, { "epoch": 0.045108724977067936, "grad_norm": 0.6493287682533264, "learning_rate": 1.9988030811113337e-05, "loss": 0.5516918897628784, "step": 836, "token_acc": 0.8217086256706562 }, { "epoch": 0.04516268278206442, "grad_norm": 0.8800298571586609, "learning_rate": 1.998794518145054e-05, "loss": 0.5472310781478882, "step": 837, "token_acc": 0.8164668547249647 }, { "epoch": 0.045216640587060916, "grad_norm": 0.7068689465522766, "learning_rate": 1.9987859246758512e-05, "loss": 0.5084781646728516, "step": 838, "token_acc": 0.8313471111781566 }, { "epoch": 0.04527059839205741, "grad_norm": 0.6982377171516418, "learning_rate": 1.9987773007039882e-05, "loss": 0.5172489285469055, "step": 839, "token_acc": 0.8261822737237707 }, { "epoch": 0.0453245561970539, "grad_norm": 0.5540503859519958, "learning_rate": 1.9987686462297278e-05, "loss": 0.4275854229927063, "step": 840, "token_acc": 0.8585570721463927 }, { "epoch": 0.045378514002050396, "grad_norm": 0.8330289125442505, "learning_rate": 1.9987599612533345e-05, "loss": 0.47829169034957886, "step": 841, "token_acc": 0.8407262698230292 }, { "epoch": 0.04543247180704689, "grad_norm": 0.7821216583251953, "learning_rate": 1.9987512457750737e-05, "loss": 0.5185927152633667, "step": 842, "token_acc": 0.817644383810823 }, { "epoch": 0.04548642961204338, "grad_norm": 0.7641642689704895, "learning_rate": 1.9987424997952118e-05, "loss": 0.4635619521141052, "step": 843, "token_acc": 0.8401716738197424 }, { "epoch": 0.045540387417039876, "grad_norm": 0.6345670223236084, "learning_rate": 1.9987337233140153e-05, "loss": 0.5077486038208008, "step": 844, "token_acc": 0.8345302652792027 }, { "epoch": 0.04559434522203637, "grad_norm": 0.7218356728553772, "learning_rate": 1.9987249163317525e-05, "loss": 0.5124481916427612, "step": 845, "token_acc": 0.8327734774353731 }, { "epoch": 0.04564830302703286, "grad_norm": 0.7376925349235535, "learning_rate": 1.9987160788486922e-05, "loss": 0.44089800119400024, "step": 846, "token_acc": 0.8469196093163035 }, { "epoch": 0.04570226083202935, "grad_norm": 0.7686077952384949, "learning_rate": 1.9987072108651048e-05, "loss": 0.5258949995040894, "step": 847, "token_acc": 0.8282236248872858 }, { "epoch": 0.04575621863702584, "grad_norm": 0.6259286999702454, "learning_rate": 1.9986983123812605e-05, "loss": 0.528572678565979, "step": 848, "token_acc": 0.8313825275657337 }, { "epoch": 0.045810176442022336, "grad_norm": 0.5864934921264648, "learning_rate": 1.998689383397432e-05, "loss": 0.5197941064834595, "step": 849, "token_acc": 0.8288132665399897 }, { "epoch": 0.04586413424701883, "grad_norm": 0.8848313689231873, "learning_rate": 1.998680423913891e-05, "loss": 0.540861964225769, "step": 850, "token_acc": 0.8297635605006954 }, { "epoch": 0.04591809205201532, "grad_norm": 0.7667557001113892, "learning_rate": 1.998671433930911e-05, "loss": 0.5359204411506653, "step": 851, "token_acc": 0.8227097130242825 }, { "epoch": 0.04597204985701182, "grad_norm": 0.7434933185577393, "learning_rate": 1.9986624134487678e-05, "loss": 0.58797687292099, "step": 852, "token_acc": 0.8104083512434755 }, { "epoch": 0.04602600766200831, "grad_norm": 0.5375339984893799, "learning_rate": 1.9986533624677355e-05, "loss": 0.5195046663284302, "step": 853, "token_acc": 0.8337195038844214 }, { "epoch": 0.0460799654670048, "grad_norm": 0.5794634222984314, "learning_rate": 1.9986442809880917e-05, "loss": 0.5322602391242981, "step": 854, "token_acc": 0.8241061130334487 }, { "epoch": 0.0461339232720013, "grad_norm": 0.5563573837280273, "learning_rate": 1.998635169010113e-05, "loss": 0.5076645612716675, "step": 855, "token_acc": 0.8294182563455778 }, { "epoch": 0.04618788107699779, "grad_norm": 0.6253763437271118, "learning_rate": 1.9986260265340775e-05, "loss": 0.46320533752441406, "step": 856, "token_acc": 0.8447745358090186 }, { "epoch": 0.046241838881994284, "grad_norm": 0.7193980813026428, "learning_rate": 1.998616853560265e-05, "loss": 0.5082605481147766, "step": 857, "token_acc": 0.833996287456908 }, { "epoch": 0.04629579668699077, "grad_norm": 0.5393938422203064, "learning_rate": 1.998607650088955e-05, "loss": 0.5265454053878784, "step": 858, "token_acc": 0.8271459768184108 }, { "epoch": 0.04634975449198726, "grad_norm": 0.5997630953788757, "learning_rate": 1.9985984161204296e-05, "loss": 0.5078324675559998, "step": 859, "token_acc": 0.8371268105232043 }, { "epoch": 0.04640371229698376, "grad_norm": 0.5533523559570312, "learning_rate": 1.9985891516549697e-05, "loss": 0.44501644372940063, "step": 860, "token_acc": 0.8547594412829799 }, { "epoch": 0.04645767010198025, "grad_norm": 0.8294369578361511, "learning_rate": 1.998579856692859e-05, "loss": 0.5564672946929932, "step": 861, "token_acc": 0.819320864354458 }, { "epoch": 0.046511627906976744, "grad_norm": 0.7449827194213867, "learning_rate": 1.998570531234381e-05, "loss": 0.5483136177062988, "step": 862, "token_acc": 0.8220198675496688 }, { "epoch": 0.04656558571197324, "grad_norm": 0.7661042809486389, "learning_rate": 1.9985611752798205e-05, "loss": 0.5532932877540588, "step": 863, "token_acc": 0.8225762527233116 }, { "epoch": 0.04661954351696973, "grad_norm": 0.671358048915863, "learning_rate": 1.9985517888294633e-05, "loss": 0.4634592533111572, "step": 864, "token_acc": 0.8463146161348752 }, { "epoch": 0.046673501321966224, "grad_norm": 0.7402158379554749, "learning_rate": 1.998542371883596e-05, "loss": 0.4979991912841797, "step": 865, "token_acc": 0.8338303512259775 }, { "epoch": 0.04672745912696272, "grad_norm": 0.5829094648361206, "learning_rate": 1.998532924442506e-05, "loss": 0.4854390025138855, "step": 866, "token_acc": 0.8404192674596632 }, { "epoch": 0.04678141693195921, "grad_norm": 0.5865939855575562, "learning_rate": 1.9985234465064828e-05, "loss": 0.48545458912849426, "step": 867, "token_acc": 0.8380687093779016 }, { "epoch": 0.046835374736955704, "grad_norm": 0.5787063837051392, "learning_rate": 1.9985139380758145e-05, "loss": 0.467620313167572, "step": 868, "token_acc": 0.8386901152213463 }, { "epoch": 0.04688933254195219, "grad_norm": 0.6483830809593201, "learning_rate": 1.9985043991507925e-05, "loss": 0.47480309009552, "step": 869, "token_acc": 0.8433268858800773 }, { "epoch": 0.046943290346948684, "grad_norm": 0.6564833521842957, "learning_rate": 1.9984948297317076e-05, "loss": 0.5583963394165039, "step": 870, "token_acc": 0.8190500445689546 }, { "epoch": 0.04699724815194518, "grad_norm": 0.5096136927604675, "learning_rate": 1.9984852298188518e-05, "loss": 0.4791744649410248, "step": 871, "token_acc": 0.844269691891195 }, { "epoch": 0.04705120595694167, "grad_norm": 0.6943426132202148, "learning_rate": 1.9984755994125188e-05, "loss": 0.5103180408477783, "step": 872, "token_acc": 0.8359125207827088 }, { "epoch": 0.047105163761938164, "grad_norm": 0.6386728286743164, "learning_rate": 1.998465938513003e-05, "loss": 0.5445941686630249, "step": 873, "token_acc": 0.8200438917337235 }, { "epoch": 0.04715912156693466, "grad_norm": 0.5760061740875244, "learning_rate": 1.9984562471205985e-05, "loss": 0.47623124718666077, "step": 874, "token_acc": 0.8375101050929669 }, { "epoch": 0.04721307937193115, "grad_norm": 0.638481855392456, "learning_rate": 1.9984465252356024e-05, "loss": 0.4738253951072693, "step": 875, "token_acc": 0.838130714527599 }, { "epoch": 0.047267037176927644, "grad_norm": 0.6190427541732788, "learning_rate": 1.9984367728583102e-05, "loss": 0.41047197580337524, "step": 876, "token_acc": 0.8569151056197688 }, { "epoch": 0.04732099498192414, "grad_norm": 0.6350666880607605, "learning_rate": 1.998426989989021e-05, "loss": 0.5557447671890259, "step": 877, "token_acc": 0.8202634880803011 }, { "epoch": 0.04737495278692063, "grad_norm": 0.6024144291877747, "learning_rate": 1.9984171766280328e-05, "loss": 0.5423898100852966, "step": 878, "token_acc": 0.8224859962656709 }, { "epoch": 0.047428910591917124, "grad_norm": 0.6227681636810303, "learning_rate": 1.9984073327756458e-05, "loss": 0.5115461945533752, "step": 879, "token_acc": 0.8266893534273213 }, { "epoch": 0.04748286839691361, "grad_norm": 0.7624478340148926, "learning_rate": 1.9983974584321602e-05, "loss": 0.5843696594238281, "step": 880, "token_acc": 0.8129339686861811 }, { "epoch": 0.047536826201910104, "grad_norm": 0.7287613749504089, "learning_rate": 1.998387553597878e-05, "loss": 0.49446386098861694, "step": 881, "token_acc": 0.8372051782230892 }, { "epoch": 0.0475907840069066, "grad_norm": 0.6328548192977905, "learning_rate": 1.9983776182731015e-05, "loss": 0.5417993068695068, "step": 882, "token_acc": 0.822390984811367 }, { "epoch": 0.04764474181190309, "grad_norm": 0.6329013109207153, "learning_rate": 1.998367652458134e-05, "loss": 0.4559983015060425, "step": 883, "token_acc": 0.8540120990754481 }, { "epoch": 0.047698699616899584, "grad_norm": 0.5879509449005127, "learning_rate": 1.9983576561532795e-05, "loss": 0.4638931155204773, "step": 884, "token_acc": 0.8451833510199269 }, { "epoch": 0.04775265742189608, "grad_norm": 0.7167874574661255, "learning_rate": 1.998347629358844e-05, "loss": 0.5112236738204956, "step": 885, "token_acc": 0.8270830293302204 }, { "epoch": 0.04780661522689257, "grad_norm": 0.6593940258026123, "learning_rate": 1.9983375720751337e-05, "loss": 0.47355493903160095, "step": 886, "token_acc": 0.8455411747973262 }, { "epoch": 0.047860573031889064, "grad_norm": 0.7215020060539246, "learning_rate": 1.9983274843024555e-05, "loss": 0.548103928565979, "step": 887, "token_acc": 0.8179342893447087 }, { "epoch": 0.04791453083688556, "grad_norm": 0.5409709811210632, "learning_rate": 1.9983173660411167e-05, "loss": 0.4622207283973694, "step": 888, "token_acc": 0.8438119257791389 }, { "epoch": 0.04796848864188205, "grad_norm": 0.5456744432449341, "learning_rate": 1.9983072172914277e-05, "loss": 0.49093562364578247, "step": 889, "token_acc": 0.8351996105160662 }, { "epoch": 0.04802244644687854, "grad_norm": 0.8486905097961426, "learning_rate": 1.9982970380536978e-05, "loss": 0.47998544573783875, "step": 890, "token_acc": 0.8365518306812915 }, { "epoch": 0.04807640425187503, "grad_norm": 0.5385947823524475, "learning_rate": 1.998286828328238e-05, "loss": 0.5200304388999939, "step": 891, "token_acc": 0.8349007314524556 }, { "epoch": 0.048130362056871524, "grad_norm": 0.5884329080581665, "learning_rate": 1.9982765881153595e-05, "loss": 0.4414772689342499, "step": 892, "token_acc": 0.8478402078596947 }, { "epoch": 0.04818431986186802, "grad_norm": 0.5516534447669983, "learning_rate": 1.9982663174153758e-05, "loss": 0.4956434369087219, "step": 893, "token_acc": 0.8323483295139308 }, { "epoch": 0.04823827766686451, "grad_norm": 0.6398876309394836, "learning_rate": 1.9982560162286005e-05, "loss": 0.5096710920333862, "step": 894, "token_acc": 0.8288663226236058 }, { "epoch": 0.048292235471861004, "grad_norm": 0.5191407799720764, "learning_rate": 1.9982456845553476e-05, "loss": 0.4760330319404602, "step": 895, "token_acc": 0.8419099697295186 }, { "epoch": 0.0483461932768575, "grad_norm": 0.5690721869468689, "learning_rate": 1.998235322395933e-05, "loss": 0.5131523609161377, "step": 896, "token_acc": 0.8334855055923305 }, { "epoch": 0.04840015108185399, "grad_norm": 0.7990107536315918, "learning_rate": 1.9982249297506734e-05, "loss": 0.49228906631469727, "step": 897, "token_acc": 0.8330838323353293 }, { "epoch": 0.048454108886850485, "grad_norm": 0.5675432085990906, "learning_rate": 1.9982145066198857e-05, "loss": 0.5677111148834229, "step": 898, "token_acc": 0.813984760197221 }, { "epoch": 0.04850806669184698, "grad_norm": 0.6138342618942261, "learning_rate": 1.9982040530038885e-05, "loss": 0.536772608757019, "step": 899, "token_acc": 0.8252160251374705 }, { "epoch": 0.04856202449684347, "grad_norm": 0.6460595726966858, "learning_rate": 1.998193568903001e-05, "loss": 0.5085065960884094, "step": 900, "token_acc": 0.8301127214170693 }, { "epoch": 0.04861598230183996, "grad_norm": 0.63184654712677, "learning_rate": 1.9981830543175437e-05, "loss": 0.5058605074882507, "step": 901, "token_acc": 0.8351889338731444 }, { "epoch": 0.04866994010683645, "grad_norm": 0.693291187286377, "learning_rate": 1.9981725092478373e-05, "loss": 0.47016698122024536, "step": 902, "token_acc": 0.8366959064327485 }, { "epoch": 0.048723897911832945, "grad_norm": 0.6239912509918213, "learning_rate": 1.9981619336942036e-05, "loss": 0.5211408734321594, "step": 903, "token_acc": 0.8272150464536596 }, { "epoch": 0.04877785571682944, "grad_norm": 0.6955773830413818, "learning_rate": 1.998151327656966e-05, "loss": 0.5108197927474976, "step": 904, "token_acc": 0.8345064067857787 }, { "epoch": 0.04883181352182593, "grad_norm": 0.5909547805786133, "learning_rate": 1.9981406911364488e-05, "loss": 0.5279507637023926, "step": 905, "token_acc": 0.8214407067618077 }, { "epoch": 0.048885771326822425, "grad_norm": 0.6647478342056274, "learning_rate": 1.998130024132976e-05, "loss": 0.5332555770874023, "step": 906, "token_acc": 0.8283250587927881 }, { "epoch": 0.04893972913181892, "grad_norm": 0.7290631532669067, "learning_rate": 1.998119326646874e-05, "loss": 0.5231790542602539, "step": 907, "token_acc": 0.8255797610681659 }, { "epoch": 0.04899368693681541, "grad_norm": 0.6072730422019958, "learning_rate": 1.998108598678469e-05, "loss": 0.4697984457015991, "step": 908, "token_acc": 0.8400386224654007 }, { "epoch": 0.049047644741811905, "grad_norm": 0.5255186557769775, "learning_rate": 1.9980978402280887e-05, "loss": 0.5022908449172974, "step": 909, "token_acc": 0.8327264938959091 }, { "epoch": 0.0491016025468084, "grad_norm": 0.8232094645500183, "learning_rate": 1.998087051296062e-05, "loss": 0.49985021352767944, "step": 910, "token_acc": 0.8304957904583723 }, { "epoch": 0.04915556035180489, "grad_norm": 0.5284619927406311, "learning_rate": 1.9980762318827185e-05, "loss": 0.4937552213668823, "step": 911, "token_acc": 0.8359407592824364 }, { "epoch": 0.04920951815680138, "grad_norm": 0.6314496397972107, "learning_rate": 1.9980653819883883e-05, "loss": 0.46482130885124207, "step": 912, "token_acc": 0.8416271186440678 }, { "epoch": 0.04926347596179787, "grad_norm": 0.46523699164390564, "learning_rate": 1.9980545016134022e-05, "loss": 0.5008845329284668, "step": 913, "token_acc": 0.8362131401965857 }, { "epoch": 0.049317433766794365, "grad_norm": 0.5470839738845825, "learning_rate": 1.9980435907580938e-05, "loss": 0.4583261013031006, "step": 914, "token_acc": 0.8455123113582208 }, { "epoch": 0.04937139157179086, "grad_norm": 0.758826494216919, "learning_rate": 1.998032649422795e-05, "loss": 0.49644532799720764, "step": 915, "token_acc": 0.8322039367537916 }, { "epoch": 0.04942534937678735, "grad_norm": 0.7438721656799316, "learning_rate": 1.998021677607841e-05, "loss": 0.5136918425559998, "step": 916, "token_acc": 0.8278810408921933 }, { "epoch": 0.049479307181783845, "grad_norm": 0.7865121364593506, "learning_rate": 1.998010675313566e-05, "loss": 0.55660480260849, "step": 917, "token_acc": 0.8204444444444444 }, { "epoch": 0.04953326498678034, "grad_norm": 0.7577094435691833, "learning_rate": 1.9979996425403068e-05, "loss": 0.5213172435760498, "step": 918, "token_acc": 0.8273381294964028 }, { "epoch": 0.04958722279177683, "grad_norm": 0.6721250414848328, "learning_rate": 1.9979885792884e-05, "loss": 0.539883017539978, "step": 919, "token_acc": 0.8239848317269711 }, { "epoch": 0.049641180596773325, "grad_norm": 0.7210883498191833, "learning_rate": 1.9979774855581827e-05, "loss": 0.5421302318572998, "step": 920, "token_acc": 0.8177680113012086 }, { "epoch": 0.04969513840176982, "grad_norm": 0.7172164916992188, "learning_rate": 1.997966361349995e-05, "loss": 0.531944215297699, "step": 921, "token_acc": 0.8242181710389803 }, { "epoch": 0.04974909620676631, "grad_norm": 0.6776549816131592, "learning_rate": 1.9979552066641763e-05, "loss": 0.47853195667266846, "step": 922, "token_acc": 0.8417000307976594 }, { "epoch": 0.0498030540117628, "grad_norm": 0.6068503856658936, "learning_rate": 1.9979440215010667e-05, "loss": 0.45151710510253906, "step": 923, "token_acc": 0.843901798608376 }, { "epoch": 0.04985701181675929, "grad_norm": 0.7017574906349182, "learning_rate": 1.997932805861008e-05, "loss": 0.5551767349243164, "step": 924, "token_acc": 0.8202504375925677 }, { "epoch": 0.049910969621755785, "grad_norm": 0.7355385422706604, "learning_rate": 1.997921559744343e-05, "loss": 0.46181800961494446, "step": 925, "token_acc": 0.8444694896151989 }, { "epoch": 0.04996492742675228, "grad_norm": 0.6656391620635986, "learning_rate": 1.997910283151415e-05, "loss": 0.48749464750289917, "step": 926, "token_acc": 0.8362824804718635 }, { "epoch": 0.05001888523174877, "grad_norm": 0.6015986204147339, "learning_rate": 1.997898976082568e-05, "loss": 0.4163622260093689, "step": 927, "token_acc": 0.8574898785425101 }, { "epoch": 0.050072843036745265, "grad_norm": 0.7935568690299988, "learning_rate": 1.9978876385381482e-05, "loss": 0.5399105548858643, "step": 928, "token_acc": 0.8213769860375542 }, { "epoch": 0.05012680084174176, "grad_norm": 0.6645094156265259, "learning_rate": 1.997876270518501e-05, "loss": 0.47949618101119995, "step": 929, "token_acc": 0.8430303856111186 }, { "epoch": 0.05018075864673825, "grad_norm": 0.6798114776611328, "learning_rate": 1.997864872023974e-05, "loss": 0.5020859241485596, "step": 930, "token_acc": 0.8377391558346721 }, { "epoch": 0.050234716451734746, "grad_norm": 0.6931281089782715, "learning_rate": 1.9978534430549153e-05, "loss": 0.5318747162818909, "step": 931, "token_acc": 0.8241776590415762 }, { "epoch": 0.05028867425673124, "grad_norm": 0.5778091549873352, "learning_rate": 1.997841983611674e-05, "loss": 0.4951178729534149, "step": 932, "token_acc": 0.8378010613688938 }, { "epoch": 0.05034263206172773, "grad_norm": 0.6794232130050659, "learning_rate": 1.9978304936945995e-05, "loss": 0.4921340346336365, "step": 933, "token_acc": 0.8351420363868497 }, { "epoch": 0.05039658986672422, "grad_norm": 0.7700832486152649, "learning_rate": 1.9978189733040433e-05, "loss": 0.5498608350753784, "step": 934, "token_acc": 0.8144871025794841 }, { "epoch": 0.05045054767172071, "grad_norm": 0.4430093467235565, "learning_rate": 1.997807422440357e-05, "loss": 0.48585110902786255, "step": 935, "token_acc": 0.8413363533408833 }, { "epoch": 0.050504505476717206, "grad_norm": 0.669123113155365, "learning_rate": 1.9977958411038936e-05, "loss": 0.49796628952026367, "step": 936, "token_acc": 0.8360040913740198 }, { "epoch": 0.0505584632817137, "grad_norm": 0.5967108011245728, "learning_rate": 1.9977842292950064e-05, "loss": 0.5141575932502747, "step": 937, "token_acc": 0.8287136706135629 }, { "epoch": 0.05061242108671019, "grad_norm": 0.5336405038833618, "learning_rate": 1.9977725870140503e-05, "loss": 0.4951859712600708, "step": 938, "token_acc": 0.8393796004206099 }, { "epoch": 0.050666378891706686, "grad_norm": 0.690139889717102, "learning_rate": 1.997760914261381e-05, "loss": 0.48206013441085815, "step": 939, "token_acc": 0.8352704168603751 }, { "epoch": 0.05072033669670318, "grad_norm": 0.6660971641540527, "learning_rate": 1.9977492110373544e-05, "loss": 0.4857831597328186, "step": 940, "token_acc": 0.8373419673142153 }, { "epoch": 0.05077429450169967, "grad_norm": 0.5502610206604004, "learning_rate": 1.9977374773423282e-05, "loss": 0.4557226896286011, "step": 941, "token_acc": 0.8457750419697817 }, { "epoch": 0.050828252306696166, "grad_norm": 0.7836302518844604, "learning_rate": 1.9977257131766612e-05, "loss": 0.5864314436912537, "step": 942, "token_acc": 0.807353388027955 }, { "epoch": 0.05088221011169266, "grad_norm": 0.4773961901664734, "learning_rate": 1.9977139185407122e-05, "loss": 0.49383383989334106, "step": 943, "token_acc": 0.8399219131283553 }, { "epoch": 0.050936167916689146, "grad_norm": 0.5511058568954468, "learning_rate": 1.9977020934348414e-05, "loss": 0.46425729990005493, "step": 944, "token_acc": 0.8387893761581223 }, { "epoch": 0.05099012572168564, "grad_norm": 0.5903640389442444, "learning_rate": 1.9976902378594102e-05, "loss": 0.533552348613739, "step": 945, "token_acc": 0.8214285714285714 }, { "epoch": 0.05104408352668213, "grad_norm": 0.47851863503456116, "learning_rate": 1.9976783518147803e-05, "loss": 0.5015815496444702, "step": 946, "token_acc": 0.8309245693921996 }, { "epoch": 0.051098041331678626, "grad_norm": 0.6540731191635132, "learning_rate": 1.9976664353013147e-05, "loss": 0.5606891512870789, "step": 947, "token_acc": 0.8169477581242287 }, { "epoch": 0.05115199913667512, "grad_norm": 0.6896024942398071, "learning_rate": 1.9976544883193778e-05, "loss": 0.5392852425575256, "step": 948, "token_acc": 0.8258994744643579 }, { "epoch": 0.05120595694167161, "grad_norm": 0.7835479378700256, "learning_rate": 1.997642510869334e-05, "loss": 0.5219389200210571, "step": 949, "token_acc": 0.8277586745858081 }, { "epoch": 0.051259914746668106, "grad_norm": 0.5327906608581543, "learning_rate": 1.9976305029515494e-05, "loss": 0.4384297728538513, "step": 950, "token_acc": 0.8520348459238071 }, { "epoch": 0.0513138725516646, "grad_norm": 0.6596798896789551, "learning_rate": 1.9976184645663905e-05, "loss": 0.5005845427513123, "step": 951, "token_acc": 0.8319194061505832 }, { "epoch": 0.05136783035666109, "grad_norm": 0.6733710169792175, "learning_rate": 1.9976063957142252e-05, "loss": 0.4875231981277466, "step": 952, "token_acc": 0.8391328515842135 }, { "epoch": 0.051421788161657586, "grad_norm": 0.6439480185508728, "learning_rate": 1.997594296395422e-05, "loss": 0.4837793707847595, "step": 953, "token_acc": 0.8453803555814632 }, { "epoch": 0.05147574596665408, "grad_norm": 0.6533311009407043, "learning_rate": 1.9975821666103498e-05, "loss": 0.460256963968277, "step": 954, "token_acc": 0.8412830314110021 }, { "epoch": 0.051529703771650566, "grad_norm": 0.7547575831413269, "learning_rate": 1.99757000635938e-05, "loss": 0.505699634552002, "step": 955, "token_acc": 0.831291564578229 }, { "epoch": 0.05158366157664706, "grad_norm": 0.6702069640159607, "learning_rate": 1.9975578156428834e-05, "loss": 0.49362245202064514, "step": 956, "token_acc": 0.8360256019506248 }, { "epoch": 0.05163761938164355, "grad_norm": 0.5683302879333496, "learning_rate": 1.9975455944612324e-05, "loss": 0.48457062244415283, "step": 957, "token_acc": 0.8380966631752207 }, { "epoch": 0.051691577186640046, "grad_norm": 0.6770913004875183, "learning_rate": 1.9975333428148004e-05, "loss": 0.515362560749054, "step": 958, "token_acc": 0.8268989547038328 }, { "epoch": 0.05174553499163654, "grad_norm": 0.7264049649238586, "learning_rate": 1.9975210607039615e-05, "loss": 0.47717344760894775, "step": 959, "token_acc": 0.8389746142359383 }, { "epoch": 0.05179949279663303, "grad_norm": 0.5772354602813721, "learning_rate": 1.9975087481290906e-05, "loss": 0.4602001905441284, "step": 960, "token_acc": 0.8442900711623179 }, { "epoch": 0.051853450601629526, "grad_norm": 0.6226930022239685, "learning_rate": 1.9974964050905636e-05, "loss": 0.4358507990837097, "step": 961, "token_acc": 0.8500085807448087 }, { "epoch": 0.05190740840662602, "grad_norm": 0.514728844165802, "learning_rate": 1.997484031588758e-05, "loss": 0.49926725029945374, "step": 962, "token_acc": 0.8321983110868973 }, { "epoch": 0.05196136621162251, "grad_norm": 0.8390287756919861, "learning_rate": 1.997471627624051e-05, "loss": 0.4495214819908142, "step": 963, "token_acc": 0.8495019488956258 }, { "epoch": 0.052015324016619006, "grad_norm": 0.5386201739311218, "learning_rate": 1.9974591931968223e-05, "loss": 0.45717692375183105, "step": 964, "token_acc": 0.8501756282085923 }, { "epoch": 0.0520692818216155, "grad_norm": 0.5923749804496765, "learning_rate": 1.997446728307451e-05, "loss": 0.4898894131183624, "step": 965, "token_acc": 0.8352662098049551 }, { "epoch": 0.052123239626611986, "grad_norm": 0.7808384895324707, "learning_rate": 1.9974342329563177e-05, "loss": 0.4803370237350464, "step": 966, "token_acc": 0.8418813806514341 }, { "epoch": 0.05217719743160848, "grad_norm": 0.5531908273696899, "learning_rate": 1.9974217071438043e-05, "loss": 0.5364248752593994, "step": 967, "token_acc": 0.8267572776143625 }, { "epoch": 0.05223115523660497, "grad_norm": 0.6236319541931152, "learning_rate": 1.9974091508702934e-05, "loss": 0.43478691577911377, "step": 968, "token_acc": 0.8522957198443579 }, { "epoch": 0.052285113041601466, "grad_norm": 0.5025848150253296, "learning_rate": 1.9973965641361682e-05, "loss": 0.41641199588775635, "step": 969, "token_acc": 0.8556843267108167 }, { "epoch": 0.05233907084659796, "grad_norm": 0.758043646812439, "learning_rate": 1.9973839469418133e-05, "loss": 0.3964099884033203, "step": 970, "token_acc": 0.8613861386138614 }, { "epoch": 0.05239302865159445, "grad_norm": 0.6101278066635132, "learning_rate": 1.9973712992876138e-05, "loss": 0.471318781375885, "step": 971, "token_acc": 0.8404182143348466 }, { "epoch": 0.05244698645659095, "grad_norm": 0.6657624840736389, "learning_rate": 1.997358621173956e-05, "loss": 0.4837026000022888, "step": 972, "token_acc": 0.8359450570710002 }, { "epoch": 0.05250094426158744, "grad_norm": 0.4477825164794922, "learning_rate": 1.9973459126012277e-05, "loss": 0.4703546166419983, "step": 973, "token_acc": 0.8455488803932277 }, { "epoch": 0.05255490206658393, "grad_norm": 0.6764251589775085, "learning_rate": 1.9973331735698163e-05, "loss": 0.488768607378006, "step": 974, "token_acc": 0.837189945064092 }, { "epoch": 0.05260885987158043, "grad_norm": 0.5900571346282959, "learning_rate": 1.9973204040801108e-05, "loss": 0.5048508644104004, "step": 975, "token_acc": 0.8326837014580627 }, { "epoch": 0.05266281767657692, "grad_norm": 0.6187917590141296, "learning_rate": 1.9973076041325014e-05, "loss": 0.47084665298461914, "step": 976, "token_acc": 0.8377503852080124 }, { "epoch": 0.05271677548157341, "grad_norm": 0.6024318933486938, "learning_rate": 1.9972947737273793e-05, "loss": 0.5020440816879272, "step": 977, "token_acc": 0.8356859831599802 }, { "epoch": 0.0527707332865699, "grad_norm": 0.6255470514297485, "learning_rate": 1.9972819128651357e-05, "loss": 0.432977557182312, "step": 978, "token_acc": 0.8556383668178872 }, { "epoch": 0.05282469109156639, "grad_norm": 0.5923405289649963, "learning_rate": 1.997269021546164e-05, "loss": 0.5215796828269958, "step": 979, "token_acc": 0.8283643551309504 }, { "epoch": 0.05287864889656289, "grad_norm": 0.6133004426956177, "learning_rate": 1.9972560997708575e-05, "loss": 0.49688100814819336, "step": 980, "token_acc": 0.8401208807022593 }, { "epoch": 0.05293260670155938, "grad_norm": 0.623805046081543, "learning_rate": 1.9972431475396114e-05, "loss": 0.5461175441741943, "step": 981, "token_acc": 0.824793388429752 }, { "epoch": 0.052986564506555874, "grad_norm": 0.5122054815292358, "learning_rate": 1.9972301648528202e-05, "loss": 0.52785325050354, "step": 982, "token_acc": 0.8301995616564771 }, { "epoch": 0.05304052231155237, "grad_norm": 0.664198637008667, "learning_rate": 1.9972171517108812e-05, "loss": 0.48820123076438904, "step": 983, "token_acc": 0.8342291371994343 }, { "epoch": 0.05309448011654886, "grad_norm": 0.6792959570884705, "learning_rate": 1.997204108114192e-05, "loss": 0.479144811630249, "step": 984, "token_acc": 0.8384004792571514 }, { "epoch": 0.053148437921545354, "grad_norm": 0.6443355083465576, "learning_rate": 1.9971910340631503e-05, "loss": 0.42636263370513916, "step": 985, "token_acc": 0.8567945592198126 }, { "epoch": 0.05320239572654185, "grad_norm": 0.6605151295661926, "learning_rate": 1.9971779295581552e-05, "loss": 0.49553942680358887, "step": 986, "token_acc": 0.8373814781140407 }, { "epoch": 0.05325635353153834, "grad_norm": 0.725533127784729, "learning_rate": 1.997164794599608e-05, "loss": 0.4866502583026886, "step": 987, "token_acc": 0.8348977754918349 }, { "epoch": 0.05331031133653483, "grad_norm": 0.548983633518219, "learning_rate": 1.997151629187909e-05, "loss": 0.46630656719207764, "step": 988, "token_acc": 0.8448487326246934 }, { "epoch": 0.05336426914153132, "grad_norm": 0.5928108096122742, "learning_rate": 1.9971384333234606e-05, "loss": 0.4793194532394409, "step": 989, "token_acc": 0.84274094158245 }, { "epoch": 0.053418226946527814, "grad_norm": 0.528605043888092, "learning_rate": 1.9971252070066652e-05, "loss": 0.526321530342102, "step": 990, "token_acc": 0.8268253737842938 }, { "epoch": 0.05347218475152431, "grad_norm": 0.7616602778434753, "learning_rate": 1.9971119502379274e-05, "loss": 0.49843811988830566, "step": 991, "token_acc": 0.8352312666947842 }, { "epoch": 0.0535261425565208, "grad_norm": 0.6164488196372986, "learning_rate": 1.9970986630176518e-05, "loss": 0.496040403842926, "step": 992, "token_acc": 0.8343255655723762 }, { "epoch": 0.053580100361517294, "grad_norm": 0.6162088513374329, "learning_rate": 1.9970853453462443e-05, "loss": 0.5182802081108093, "step": 993, "token_acc": 0.8245541269182912 }, { "epoch": 0.05363405816651379, "grad_norm": 0.6911048889160156, "learning_rate": 1.997071997224112e-05, "loss": 0.5649464130401611, "step": 994, "token_acc": 0.8147510403214234 }, { "epoch": 0.05368801597151028, "grad_norm": 0.6370834112167358, "learning_rate": 1.9970586186516618e-05, "loss": 0.5294845104217529, "step": 995, "token_acc": 0.8244571151279874 }, { "epoch": 0.053741973776506774, "grad_norm": 0.6849631071090698, "learning_rate": 1.9970452096293023e-05, "loss": 0.5081079006195068, "step": 996, "token_acc": 0.8292926411865373 }, { "epoch": 0.05379593158150327, "grad_norm": 0.4957621991634369, "learning_rate": 1.9970317701574436e-05, "loss": 0.47111767530441284, "step": 997, "token_acc": 0.8412848370335381 }, { "epoch": 0.053849889386499754, "grad_norm": 0.5110964179039001, "learning_rate": 1.9970183002364954e-05, "loss": 0.43626081943511963, "step": 998, "token_acc": 0.8459889773423147 }, { "epoch": 0.05390384719149625, "grad_norm": 0.5933754444122314, "learning_rate": 1.99700479986687e-05, "loss": 0.5277829170227051, "step": 999, "token_acc": 0.82220367278798 }, { "epoch": 0.05395780499649274, "grad_norm": 0.6459143757820129, "learning_rate": 1.996991269048979e-05, "loss": 0.44947484135627747, "step": 1000, "token_acc": 0.845951948314153 }, { "epoch": 0.054011762801489234, "grad_norm": 0.5012298226356506, "learning_rate": 1.9969777077832357e-05, "loss": 0.4869093894958496, "step": 1001, "token_acc": 0.8382640586797065 }, { "epoch": 0.05406572060648573, "grad_norm": 0.5974643230438232, "learning_rate": 1.9969641160700542e-05, "loss": 0.48130494356155396, "step": 1002, "token_acc": 0.8387920506272899 }, { "epoch": 0.05411967841148222, "grad_norm": 0.6587076187133789, "learning_rate": 1.9969504939098503e-05, "loss": 0.5255228281021118, "step": 1003, "token_acc": 0.824868282402529 }, { "epoch": 0.054173636216478714, "grad_norm": 0.45836442708969116, "learning_rate": 1.9969368413030394e-05, "loss": 0.47451552748680115, "step": 1004, "token_acc": 0.8445258926460156 }, { "epoch": 0.05422759402147521, "grad_norm": 0.623732328414917, "learning_rate": 1.9969231582500384e-05, "loss": 0.43019193410873413, "step": 1005, "token_acc": 0.8521897810218978 }, { "epoch": 0.0542815518264717, "grad_norm": 0.5085777044296265, "learning_rate": 1.996909444751265e-05, "loss": 0.5067863464355469, "step": 1006, "token_acc": 0.8332793784396245 }, { "epoch": 0.054335509631468194, "grad_norm": 0.6304706335067749, "learning_rate": 1.9968957008071384e-05, "loss": 0.506719708442688, "step": 1007, "token_acc": 0.8289981972701519 }, { "epoch": 0.05438946743646469, "grad_norm": 0.7806472182273865, "learning_rate": 1.9968819264180787e-05, "loss": 0.4700498580932617, "step": 1008, "token_acc": 0.8453167508744656 }, { "epoch": 0.054443425241461174, "grad_norm": 0.6309357285499573, "learning_rate": 1.9968681215845058e-05, "loss": 0.5094919204711914, "step": 1009, "token_acc": 0.8271973916587421 }, { "epoch": 0.05449738304645767, "grad_norm": 0.6345802545547485, "learning_rate": 1.9968542863068418e-05, "loss": 0.48715659976005554, "step": 1010, "token_acc": 0.8407348825891228 }, { "epoch": 0.05455134085145416, "grad_norm": 0.5831595659255981, "learning_rate": 1.996840420585509e-05, "loss": 0.45607203245162964, "step": 1011, "token_acc": 0.8456720531128649 }, { "epoch": 0.054605298656450654, "grad_norm": 0.5990407466888428, "learning_rate": 1.9968265244209303e-05, "loss": 0.4063073992729187, "step": 1012, "token_acc": 0.8593506088042461 }, { "epoch": 0.05465925646144715, "grad_norm": 0.5564766526222229, "learning_rate": 1.9968125978135316e-05, "loss": 0.4526275098323822, "step": 1013, "token_acc": 0.8434360348292029 }, { "epoch": 0.05471321426644364, "grad_norm": 0.6422818303108215, "learning_rate": 1.9967986407637365e-05, "loss": 0.4806315302848816, "step": 1014, "token_acc": 0.840905363293423 }, { "epoch": 0.054767172071440134, "grad_norm": 0.5373026132583618, "learning_rate": 1.9967846532719727e-05, "loss": 0.5537198185920715, "step": 1015, "token_acc": 0.8217090069284064 }, { "epoch": 0.05482112987643663, "grad_norm": 0.7405779957771301, "learning_rate": 1.9967706353386664e-05, "loss": 0.42958080768585205, "step": 1016, "token_acc": 0.8527997251803504 }, { "epoch": 0.05487508768143312, "grad_norm": 0.5558722615242004, "learning_rate": 1.9967565869642458e-05, "loss": 0.5163581371307373, "step": 1017, "token_acc": 0.8314292659212446 }, { "epoch": 0.054929045486429615, "grad_norm": 0.6122289896011353, "learning_rate": 1.9967425081491406e-05, "loss": 0.5224381685256958, "step": 1018, "token_acc": 0.8218705937646874 }, { "epoch": 0.05498300329142611, "grad_norm": 0.6317908763885498, "learning_rate": 1.99672839889378e-05, "loss": 0.48054128885269165, "step": 1019, "token_acc": 0.8428571428571429 }, { "epoch": 0.055036961096422594, "grad_norm": 0.5293658971786499, "learning_rate": 1.9967142591985952e-05, "loss": 0.444130539894104, "step": 1020, "token_acc": 0.8496013197690404 }, { "epoch": 0.05509091890141909, "grad_norm": 0.7803549766540527, "learning_rate": 1.996700089064018e-05, "loss": 0.5409347414970398, "step": 1021, "token_acc": 0.8258445391931781 }, { "epoch": 0.05514487670641558, "grad_norm": 0.6083102822303772, "learning_rate": 1.9966858884904816e-05, "loss": 0.4839198589324951, "step": 1022, "token_acc": 0.8418227215980025 }, { "epoch": 0.055198834511412075, "grad_norm": 0.7061855792999268, "learning_rate": 1.9966716574784185e-05, "loss": 0.5371530055999756, "step": 1023, "token_acc": 0.8211000901713256 }, { "epoch": 0.05525279231640857, "grad_norm": 0.5543636083602905, "learning_rate": 1.9966573960282647e-05, "loss": 0.46512070298194885, "step": 1024, "token_acc": 0.846573583694431 }, { "epoch": 0.05530675012140506, "grad_norm": 0.5716032981872559, "learning_rate": 1.9966431041404547e-05, "loss": 0.49071037769317627, "step": 1025, "token_acc": 0.8337133086349388 }, { "epoch": 0.055360707926401555, "grad_norm": 0.5697593688964844, "learning_rate": 1.9966287818154257e-05, "loss": 0.4611620008945465, "step": 1026, "token_acc": 0.8448637316561844 }, { "epoch": 0.05541466573139805, "grad_norm": 0.6923823356628418, "learning_rate": 1.996614429053615e-05, "loss": 0.45328688621520996, "step": 1027, "token_acc": 0.8425234387414587 }, { "epoch": 0.05546862353639454, "grad_norm": 0.5114612579345703, "learning_rate": 1.9966000458554603e-05, "loss": 0.49831312894821167, "step": 1028, "token_acc": 0.8315916398713826 }, { "epoch": 0.055522581341391035, "grad_norm": 0.7219922542572021, "learning_rate": 1.996585632221401e-05, "loss": 0.48667943477630615, "step": 1029, "token_acc": 0.8380222841225626 }, { "epoch": 0.05557653914638753, "grad_norm": 0.7901466488838196, "learning_rate": 1.9965711881518783e-05, "loss": 0.5014921426773071, "step": 1030, "token_acc": 0.835521463342676 }, { "epoch": 0.055630496951384015, "grad_norm": 0.615497350692749, "learning_rate": 1.9965567136473322e-05, "loss": 0.48477035760879517, "step": 1031, "token_acc": 0.8447898955550158 }, { "epoch": 0.05568445475638051, "grad_norm": 0.5756500959396362, "learning_rate": 1.9965422087082055e-05, "loss": 0.4777209758758545, "step": 1032, "token_acc": 0.8429797085659813 }, { "epoch": 0.055738412561377, "grad_norm": 0.6330601572990417, "learning_rate": 1.99652767333494e-05, "loss": 0.5076591372489929, "step": 1033, "token_acc": 0.8265740886854979 }, { "epoch": 0.055792370366373495, "grad_norm": 0.45985618233680725, "learning_rate": 1.996513107527981e-05, "loss": 0.46807289123535156, "step": 1034, "token_acc": 0.8499795053969121 }, { "epoch": 0.05584632817136999, "grad_norm": 0.5029605031013489, "learning_rate": 1.9964985112877727e-05, "loss": 0.47378605604171753, "step": 1035, "token_acc": 0.8353369763205829 }, { "epoch": 0.05590028597636648, "grad_norm": 0.7286427617073059, "learning_rate": 1.9964838846147608e-05, "loss": 0.4631579518318176, "step": 1036, "token_acc": 0.8433781190019194 }, { "epoch": 0.055954243781362975, "grad_norm": 0.568207323551178, "learning_rate": 1.9964692275093923e-05, "loss": 0.482799768447876, "step": 1037, "token_acc": 0.8410111032364753 }, { "epoch": 0.05600820158635947, "grad_norm": 0.7908043265342712, "learning_rate": 1.996454539972114e-05, "loss": 0.5744463801383972, "step": 1038, "token_acc": 0.819614711033275 }, { "epoch": 0.05606215939135596, "grad_norm": 0.7191596627235413, "learning_rate": 1.9964398220033758e-05, "loss": 0.5241144895553589, "step": 1039, "token_acc": 0.8281698221457258 }, { "epoch": 0.056116117196352455, "grad_norm": 0.5949957370758057, "learning_rate": 1.9964250736036264e-05, "loss": 0.48965543508529663, "step": 1040, "token_acc": 0.8415687215159511 }, { "epoch": 0.05617007500134894, "grad_norm": 0.5983198881149292, "learning_rate": 1.996410294773316e-05, "loss": 0.5407519340515137, "step": 1041, "token_acc": 0.8221715588486749 }, { "epoch": 0.056224032806345435, "grad_norm": 0.7287748456001282, "learning_rate": 1.9963954855128964e-05, "loss": 0.5833425521850586, "step": 1042, "token_acc": 0.8203405017921147 }, { "epoch": 0.05627799061134193, "grad_norm": 0.5875898003578186, "learning_rate": 1.9963806458228195e-05, "loss": 0.5430148839950562, "step": 1043, "token_acc": 0.8242907569616943 }, { "epoch": 0.05633194841633842, "grad_norm": 0.6313178539276123, "learning_rate": 1.9963657757035387e-05, "loss": 0.444673091173172, "step": 1044, "token_acc": 0.8522099447513812 }, { "epoch": 0.056385906221334915, "grad_norm": 0.5402422547340393, "learning_rate": 1.9963508751555083e-05, "loss": 0.5325757265090942, "step": 1045, "token_acc": 0.8241031754633142 }, { "epoch": 0.05643986402633141, "grad_norm": 0.5053545832633972, "learning_rate": 1.9963359441791833e-05, "loss": 0.5466367602348328, "step": 1046, "token_acc": 0.8229566243729713 }, { "epoch": 0.0564938218313279, "grad_norm": 0.5533000230789185, "learning_rate": 1.996320982775019e-05, "loss": 0.4954518973827362, "step": 1047, "token_acc": 0.8321669258683255 }, { "epoch": 0.056547779636324395, "grad_norm": 0.5957016944885254, "learning_rate": 1.9963059909434735e-05, "loss": 0.45799529552459717, "step": 1048, "token_acc": 0.8468892484188851 }, { "epoch": 0.05660173744132089, "grad_norm": 0.615820586681366, "learning_rate": 1.996290968685004e-05, "loss": 0.4980253577232361, "step": 1049, "token_acc": 0.830371107635433 }, { "epoch": 0.05665569524631738, "grad_norm": 0.594008207321167, "learning_rate": 1.996275916000069e-05, "loss": 0.4849105179309845, "step": 1050, "token_acc": 0.8400145772594753 }, { "epoch": 0.056709653051313876, "grad_norm": 0.663533627986908, "learning_rate": 1.9962608328891285e-05, "loss": 0.5443304181098938, "step": 1051, "token_acc": 0.8192905896833389 }, { "epoch": 0.05676361085631036, "grad_norm": 0.6083036661148071, "learning_rate": 1.9962457193526435e-05, "loss": 0.4681539833545685, "step": 1052, "token_acc": 0.8454479242534596 }, { "epoch": 0.056817568661306855, "grad_norm": 0.6682699918746948, "learning_rate": 1.996230575391075e-05, "loss": 0.4927041232585907, "step": 1053, "token_acc": 0.8357712765957447 }, { "epoch": 0.05687152646630335, "grad_norm": 0.8044388294219971, "learning_rate": 1.9962154010048858e-05, "loss": 0.46579256653785706, "step": 1054, "token_acc": 0.8398550724637681 }, { "epoch": 0.05692548427129984, "grad_norm": 0.608708918094635, "learning_rate": 1.996200196194539e-05, "loss": 0.4767979383468628, "step": 1055, "token_acc": 0.8445538266116227 }, { "epoch": 0.056979442076296336, "grad_norm": 0.4877060055732727, "learning_rate": 1.9961849609604994e-05, "loss": 0.4924583435058594, "step": 1056, "token_acc": 0.8371861519743391 }, { "epoch": 0.05703339988129283, "grad_norm": 0.5064923763275146, "learning_rate": 1.9961696953032315e-05, "loss": 0.4935539662837982, "step": 1057, "token_acc": 0.8362825693186569 }, { "epoch": 0.05708735768628932, "grad_norm": 0.5836595892906189, "learning_rate": 1.996154399223203e-05, "loss": 0.4612942337989807, "step": 1058, "token_acc": 0.842678904345332 }, { "epoch": 0.057141315491285816, "grad_norm": 0.6167305707931519, "learning_rate": 1.9961390727208794e-05, "loss": 0.5839295387268066, "step": 1059, "token_acc": 0.8079030189998835 }, { "epoch": 0.05719527329628231, "grad_norm": 0.5921739935874939, "learning_rate": 1.9961237157967297e-05, "loss": 0.46826285123825073, "step": 1060, "token_acc": 0.8347301867372445 }, { "epoch": 0.0572492311012788, "grad_norm": 0.5800741314888, "learning_rate": 1.996108328451223e-05, "loss": 0.45303431153297424, "step": 1061, "token_acc": 0.8474014848657909 }, { "epoch": 0.057303188906275296, "grad_norm": 0.40855634212493896, "learning_rate": 1.996092910684828e-05, "loss": 0.38144874572753906, "step": 1062, "token_acc": 0.8704178415790823 }, { "epoch": 0.05735714671127178, "grad_norm": 0.4234394133090973, "learning_rate": 1.9960774624980173e-05, "loss": 0.45434367656707764, "step": 1063, "token_acc": 0.8487998973174176 }, { "epoch": 0.057411104516268276, "grad_norm": 0.5584456324577332, "learning_rate": 1.9960619838912617e-05, "loss": 0.48727330565452576, "step": 1064, "token_acc": 0.836529242569511 }, { "epoch": 0.05746506232126477, "grad_norm": 0.668910026550293, "learning_rate": 1.9960464748650336e-05, "loss": 0.5155137777328491, "step": 1065, "token_acc": 0.8351479620323842 }, { "epoch": 0.05751902012626126, "grad_norm": 0.6300912499427795, "learning_rate": 1.9960309354198073e-05, "loss": 0.4731888175010681, "step": 1066, "token_acc": 0.8415107659724673 }, { "epoch": 0.057572977931257756, "grad_norm": 0.6026988625526428, "learning_rate": 1.9960153655560574e-05, "loss": 0.5125389695167542, "step": 1067, "token_acc": 0.831353591160221 }, { "epoch": 0.05762693573625425, "grad_norm": 0.5388308167457581, "learning_rate": 1.995999765274259e-05, "loss": 0.4648725688457489, "step": 1068, "token_acc": 0.8406822488945042 }, { "epoch": 0.05768089354125074, "grad_norm": 0.6576910614967346, "learning_rate": 1.995984134574889e-05, "loss": 0.5242973566055298, "step": 1069, "token_acc": 0.8308020584912765 }, { "epoch": 0.057734851346247236, "grad_norm": 0.5205404758453369, "learning_rate": 1.9959684734584235e-05, "loss": 0.5294867753982544, "step": 1070, "token_acc": 0.8277153558052435 }, { "epoch": 0.05778880915124373, "grad_norm": 0.5870619416236877, "learning_rate": 1.9959527819253426e-05, "loss": 0.40655386447906494, "step": 1071, "token_acc": 0.8611517100263081 }, { "epoch": 0.05784276695624022, "grad_norm": 0.5899544954299927, "learning_rate": 1.9959370599761243e-05, "loss": 0.5209072232246399, "step": 1072, "token_acc": 0.8256085820382341 }, { "epoch": 0.057896724761236716, "grad_norm": 0.5826218128204346, "learning_rate": 1.9959213076112493e-05, "loss": 0.49518847465515137, "step": 1073, "token_acc": 0.8388570090174493 }, { "epoch": 0.0579506825662332, "grad_norm": 0.5706366300582886, "learning_rate": 1.995905524831198e-05, "loss": 0.5173414945602417, "step": 1074, "token_acc": 0.82606728013275 }, { "epoch": 0.058004640371229696, "grad_norm": 0.5682640671730042, "learning_rate": 1.9958897116364534e-05, "loss": 0.5349826812744141, "step": 1075, "token_acc": 0.8193509795124869 }, { "epoch": 0.05805859817622619, "grad_norm": 0.5098016858100891, "learning_rate": 1.9958738680274975e-05, "loss": 0.45124882459640503, "step": 1076, "token_acc": 0.8515296940611877 }, { "epoch": 0.05811255598122268, "grad_norm": 0.6112192273139954, "learning_rate": 1.995857994004815e-05, "loss": 0.4818417429924011, "step": 1077, "token_acc": 0.8406836563406074 }, { "epoch": 0.058166513786219176, "grad_norm": 0.6681251525878906, "learning_rate": 1.9958420895688898e-05, "loss": 0.46218299865722656, "step": 1078, "token_acc": 0.8444176430048242 }, { "epoch": 0.05822047159121567, "grad_norm": 0.6289830207824707, "learning_rate": 1.9958261547202086e-05, "loss": 0.4796309173107147, "step": 1079, "token_acc": 0.8394587675991955 }, { "epoch": 0.05827442939621216, "grad_norm": 0.6140072345733643, "learning_rate": 1.995810189459257e-05, "loss": 0.5029873847961426, "step": 1080, "token_acc": 0.8372222998463044 }, { "epoch": 0.058328387201208656, "grad_norm": 0.6456661224365234, "learning_rate": 1.9957941937865233e-05, "loss": 0.5065627098083496, "step": 1081, "token_acc": 0.8311064988973926 }, { "epoch": 0.05838234500620515, "grad_norm": 0.7466562986373901, "learning_rate": 1.9957781677024956e-05, "loss": 0.49186989665031433, "step": 1082, "token_acc": 0.8407183291040407 }, { "epoch": 0.05843630281120164, "grad_norm": 0.6924615502357483, "learning_rate": 1.9957621112076636e-05, "loss": 0.5129991769790649, "step": 1083, "token_acc": 0.8258426966292135 }, { "epoch": 0.058490260616198136, "grad_norm": 0.5305834412574768, "learning_rate": 1.9957460243025178e-05, "loss": 0.4231031537055969, "step": 1084, "token_acc": 0.8599805020716549 }, { "epoch": 0.05854421842119462, "grad_norm": 0.5670274496078491, "learning_rate": 1.9957299069875492e-05, "loss": 0.4218899607658386, "step": 1085, "token_acc": 0.852583135391924 }, { "epoch": 0.058598176226191116, "grad_norm": 0.6543908715248108, "learning_rate": 1.9957137592632497e-05, "loss": 0.5096046328544617, "step": 1086, "token_acc": 0.833379424778761 }, { "epoch": 0.05865213403118761, "grad_norm": 0.6397584676742554, "learning_rate": 1.995697581130113e-05, "loss": 0.49122926592826843, "step": 1087, "token_acc": 0.8366022710514673 }, { "epoch": 0.0587060918361841, "grad_norm": 0.5962620377540588, "learning_rate": 1.995681372588633e-05, "loss": 0.4883817434310913, "step": 1088, "token_acc": 0.8379032258064516 }, { "epoch": 0.058760049641180596, "grad_norm": 0.5713218450546265, "learning_rate": 1.995665133639305e-05, "loss": 0.48347023129463196, "step": 1089, "token_acc": 0.8400958227279532 }, { "epoch": 0.05881400744617709, "grad_norm": 0.5050039291381836, "learning_rate": 1.9956488642826245e-05, "loss": 0.4749169647693634, "step": 1090, "token_acc": 0.8386123680241327 }, { "epoch": 0.05886796525117358, "grad_norm": 0.46529585123062134, "learning_rate": 1.9956325645190887e-05, "loss": 0.44209086894989014, "step": 1091, "token_acc": 0.8525812350541567 }, { "epoch": 0.05892192305617008, "grad_norm": 0.6507221460342407, "learning_rate": 1.9956162343491948e-05, "loss": 0.4723697900772095, "step": 1092, "token_acc": 0.838099073701168 }, { "epoch": 0.05897588086116657, "grad_norm": 0.7243779301643372, "learning_rate": 1.9955998737734422e-05, "loss": 0.4994543194770813, "step": 1093, "token_acc": 0.8288863109048724 }, { "epoch": 0.05902983866616306, "grad_norm": 0.603290855884552, "learning_rate": 1.99558348279233e-05, "loss": 0.4584633708000183, "step": 1094, "token_acc": 0.848338214695021 }, { "epoch": 0.05908379647115955, "grad_norm": 0.783329427242279, "learning_rate": 1.995567061406359e-05, "loss": 0.5327292680740356, "step": 1095, "token_acc": 0.8285109386026818 }, { "epoch": 0.05913775427615604, "grad_norm": 0.5519493222236633, "learning_rate": 1.9955506096160312e-05, "loss": 0.46279221773147583, "step": 1096, "token_acc": 0.8396977465929024 }, { "epoch": 0.05919171208115254, "grad_norm": 0.5525190830230713, "learning_rate": 1.9955341274218484e-05, "loss": 0.4509758949279785, "step": 1097, "token_acc": 0.8504391769943448 }, { "epoch": 0.05924566988614903, "grad_norm": 0.7081436514854431, "learning_rate": 1.9955176148243142e-05, "loss": 0.47638219594955444, "step": 1098, "token_acc": 0.8393748643368787 }, { "epoch": 0.05929962769114552, "grad_norm": 0.5509647727012634, "learning_rate": 1.9955010718239326e-05, "loss": 0.491586297750473, "step": 1099, "token_acc": 0.8354906682721253 }, { "epoch": 0.05935358549614202, "grad_norm": 0.632260262966156, "learning_rate": 1.9954844984212092e-05, "loss": 0.5257197618484497, "step": 1100, "token_acc": 0.8344159418694264 }, { "epoch": 0.05940754330113851, "grad_norm": 0.589224100112915, "learning_rate": 1.99546789461665e-05, "loss": 0.5709782242774963, "step": 1101, "token_acc": 0.8133432171979244 }, { "epoch": 0.059461501106135004, "grad_norm": 0.5814707279205322, "learning_rate": 1.995451260410762e-05, "loss": 0.47362393140792847, "step": 1102, "token_acc": 0.8413384930571364 }, { "epoch": 0.0595154589111315, "grad_norm": 0.7109665870666504, "learning_rate": 1.9954345958040535e-05, "loss": 0.5323310494422913, "step": 1103, "token_acc": 0.8282290279627164 }, { "epoch": 0.05956941671612799, "grad_norm": 0.5867555141448975, "learning_rate": 1.9954179007970328e-05, "loss": 0.4349838197231293, "step": 1104, "token_acc": 0.8489897199574619 }, { "epoch": 0.059623374521124484, "grad_norm": 0.59521484375, "learning_rate": 1.9954011753902104e-05, "loss": 0.5314978361129761, "step": 1105, "token_acc": 0.83014424492199 }, { "epoch": 0.05967733232612097, "grad_norm": 0.6859552264213562, "learning_rate": 1.995384419584097e-05, "loss": 0.5168694257736206, "step": 1106, "token_acc": 0.8321100917431192 }, { "epoch": 0.059731290131117463, "grad_norm": 0.5813125967979431, "learning_rate": 1.995367633379204e-05, "loss": 0.5393570065498352, "step": 1107, "token_acc": 0.823268059897976 }, { "epoch": 0.05978524793611396, "grad_norm": 0.6190792918205261, "learning_rate": 1.9953508167760442e-05, "loss": 0.4903565049171448, "step": 1108, "token_acc": 0.8345302214242968 }, { "epoch": 0.05983920574111045, "grad_norm": 0.5871873497962952, "learning_rate": 1.9953339697751316e-05, "loss": 0.4870937168598175, "step": 1109, "token_acc": 0.844851904090268 }, { "epoch": 0.059893163546106944, "grad_norm": 0.6007868647575378, "learning_rate": 1.9953170923769794e-05, "loss": 0.4346463084220886, "step": 1110, "token_acc": 0.8523936170212766 }, { "epoch": 0.05994712135110344, "grad_norm": 0.5824307799339294, "learning_rate": 1.9953001845821046e-05, "loss": 0.42878520488739014, "step": 1111, "token_acc": 0.861455525606469 }, { "epoch": 0.06000107915609993, "grad_norm": 0.5652986168861389, "learning_rate": 1.995283246391023e-05, "loss": 0.49310117959976196, "step": 1112, "token_acc": 0.8455252405430341 }, { "epoch": 0.060055036961096424, "grad_norm": 0.6649380922317505, "learning_rate": 1.9952662778042516e-05, "loss": 0.5197439789772034, "step": 1113, "token_acc": 0.8300037693177534 }, { "epoch": 0.06010899476609292, "grad_norm": 0.4908186197280884, "learning_rate": 1.9952492788223088e-05, "loss": 0.45965641736984253, "step": 1114, "token_acc": 0.8442762737156256 }, { "epoch": 0.06016295257108941, "grad_norm": 0.6093283295631409, "learning_rate": 1.995232249445714e-05, "loss": 0.47901809215545654, "step": 1115, "token_acc": 0.8414292041931197 }, { "epoch": 0.060216910376085904, "grad_norm": 0.5719469785690308, "learning_rate": 1.9952151896749866e-05, "loss": 0.4485974907875061, "step": 1116, "token_acc": 0.8525475463723878 }, { "epoch": 0.06027086818108239, "grad_norm": 0.49022534489631653, "learning_rate": 1.995198099510648e-05, "loss": 0.4567856192588806, "step": 1117, "token_acc": 0.8464780252859723 }, { "epoch": 0.060324825986078884, "grad_norm": 0.6247151494026184, "learning_rate": 1.9951809789532203e-05, "loss": 0.5181455016136169, "step": 1118, "token_acc": 0.8301953193735703 }, { "epoch": 0.06037878379107538, "grad_norm": 0.5441950559616089, "learning_rate": 1.9951638280032265e-05, "loss": 0.447767972946167, "step": 1119, "token_acc": 0.8510036754311563 }, { "epoch": 0.06043274159607187, "grad_norm": 0.48593375086784363, "learning_rate": 1.9951466466611898e-05, "loss": 0.4890456795692444, "step": 1120, "token_acc": 0.8294561458660799 }, { "epoch": 0.060486699401068364, "grad_norm": 0.5711641907691956, "learning_rate": 1.9951294349276353e-05, "loss": 0.43380677700042725, "step": 1121, "token_acc": 0.8549962434259955 }, { "epoch": 0.06054065720606486, "grad_norm": 0.7183653712272644, "learning_rate": 1.9951121928030886e-05, "loss": 0.5022600293159485, "step": 1122, "token_acc": 0.8345354263894782 }, { "epoch": 0.06059461501106135, "grad_norm": 0.6787493228912354, "learning_rate": 1.9950949202880762e-05, "loss": 0.5321146249771118, "step": 1123, "token_acc": 0.8230358504958047 }, { "epoch": 0.060648572816057844, "grad_norm": 0.7166277766227722, "learning_rate": 1.9950776173831252e-05, "loss": 0.4939539432525635, "step": 1124, "token_acc": 0.8398996606167921 }, { "epoch": 0.06070253062105434, "grad_norm": 0.6248577833175659, "learning_rate": 1.9950602840887646e-05, "loss": 0.5109135508537292, "step": 1125, "token_acc": 0.8334413480233311 }, { "epoch": 0.06075648842605083, "grad_norm": 0.47504448890686035, "learning_rate": 1.995042920405524e-05, "loss": 0.445282906293869, "step": 1126, "token_acc": 0.8544389978213508 }, { "epoch": 0.060810446231047324, "grad_norm": 0.7165715098381042, "learning_rate": 1.995025526333933e-05, "loss": 0.5146650075912476, "step": 1127, "token_acc": 0.8297358490566038 }, { "epoch": 0.06086440403604381, "grad_norm": 0.6422531604766846, "learning_rate": 1.995008101874523e-05, "loss": 0.4827852249145508, "step": 1128, "token_acc": 0.8431750741839763 }, { "epoch": 0.060918361841040304, "grad_norm": 0.550469160079956, "learning_rate": 1.9949906470278262e-05, "loss": 0.4184616208076477, "step": 1129, "token_acc": 0.8573287077189939 }, { "epoch": 0.0609723196460368, "grad_norm": 0.5821092128753662, "learning_rate": 1.994973161794376e-05, "loss": 0.47022974491119385, "step": 1130, "token_acc": 0.8463634458184867 }, { "epoch": 0.06102627745103329, "grad_norm": 0.4800841510295868, "learning_rate": 1.994955646174706e-05, "loss": 0.5293253660202026, "step": 1131, "token_acc": 0.828777577355349 }, { "epoch": 0.061080235256029784, "grad_norm": 0.5881680250167847, "learning_rate": 1.994938100169351e-05, "loss": 0.4592326283454895, "step": 1132, "token_acc": 0.8472768107804605 }, { "epoch": 0.06113419306102628, "grad_norm": 0.6037256121635437, "learning_rate": 1.9949205237788473e-05, "loss": 0.48783326148986816, "step": 1133, "token_acc": 0.8316672616922528 }, { "epoch": 0.06118815086602277, "grad_norm": 0.6432193517684937, "learning_rate": 1.994902917003731e-05, "loss": 0.48477277159690857, "step": 1134, "token_acc": 0.8324895771292437 }, { "epoch": 0.061242108671019264, "grad_norm": 0.5841206908226013, "learning_rate": 1.9948852798445406e-05, "loss": 0.49649497866630554, "step": 1135, "token_acc": 0.8346198830409357 }, { "epoch": 0.06129606647601576, "grad_norm": 0.5571247339248657, "learning_rate": 1.9948676123018144e-05, "loss": 0.5475364327430725, "step": 1136, "token_acc": 0.8187411870273041 }, { "epoch": 0.06135002428101225, "grad_norm": 0.5692602396011353, "learning_rate": 1.994849914376092e-05, "loss": 0.5412395000457764, "step": 1137, "token_acc": 0.82 }, { "epoch": 0.061403982086008745, "grad_norm": 0.549220621585846, "learning_rate": 1.9948321860679137e-05, "loss": 0.48840993642807007, "step": 1138, "token_acc": 0.8368271954674221 }, { "epoch": 0.06145793989100523, "grad_norm": 0.5568659901618958, "learning_rate": 1.9948144273778213e-05, "loss": 0.4229722023010254, "step": 1139, "token_acc": 0.8532961931290622 }, { "epoch": 0.061511897696001724, "grad_norm": 0.6322149038314819, "learning_rate": 1.9947966383063566e-05, "loss": 0.4966370165348053, "step": 1140, "token_acc": 0.8389765589091466 }, { "epoch": 0.06156585550099822, "grad_norm": 0.4779130518436432, "learning_rate": 1.994778818854063e-05, "loss": 0.49741849303245544, "step": 1141, "token_acc": 0.8381472115000665 }, { "epoch": 0.06161981330599471, "grad_norm": 0.5957105755805969, "learning_rate": 1.994760969021485e-05, "loss": 0.5084390640258789, "step": 1142, "token_acc": 0.8350372552417259 }, { "epoch": 0.061673771110991205, "grad_norm": 0.6688146591186523, "learning_rate": 1.994743088809168e-05, "loss": 0.5211917161941528, "step": 1143, "token_acc": 0.8237129485179407 }, { "epoch": 0.0617277289159877, "grad_norm": 0.5259957909584045, "learning_rate": 1.994725178217657e-05, "loss": 0.46395206451416016, "step": 1144, "token_acc": 0.8449154269411046 }, { "epoch": 0.06178168672098419, "grad_norm": 0.4644702970981598, "learning_rate": 1.9947072372475e-05, "loss": 0.49293768405914307, "step": 1145, "token_acc": 0.8371227780074411 }, { "epoch": 0.061835644525980685, "grad_norm": 0.5510838627815247, "learning_rate": 1.9946892658992448e-05, "loss": 0.4791584610939026, "step": 1146, "token_acc": 0.8419018167761886 }, { "epoch": 0.06188960233097718, "grad_norm": 0.6404427289962769, "learning_rate": 1.9946712641734397e-05, "loss": 0.4915860593318939, "step": 1147, "token_acc": 0.8362975712373303 }, { "epoch": 0.06194356013597367, "grad_norm": 0.6280640363693237, "learning_rate": 1.9946532320706346e-05, "loss": 0.5345544219017029, "step": 1148, "token_acc": 0.8228397742075554 }, { "epoch": 0.06199751794097016, "grad_norm": 0.4980062246322632, "learning_rate": 1.9946351695913804e-05, "loss": 0.44372108578681946, "step": 1149, "token_acc": 0.8490379173740804 }, { "epoch": 0.06205147574596665, "grad_norm": 0.6306162476539612, "learning_rate": 1.994617076736229e-05, "loss": 0.5303329229354858, "step": 1150, "token_acc": 0.8176286188456574 }, { "epoch": 0.062105433550963145, "grad_norm": 0.4863075613975525, "learning_rate": 1.9945989535057325e-05, "loss": 0.4328776001930237, "step": 1151, "token_acc": 0.8522352235223523 }, { "epoch": 0.06215939135595964, "grad_norm": 0.7151788473129272, "learning_rate": 1.994580799900444e-05, "loss": 0.5136938095092773, "step": 1152, "token_acc": 0.8305841924398626 }, { "epoch": 0.06221334916095613, "grad_norm": 0.5527995228767395, "learning_rate": 1.9945626159209192e-05, "loss": 0.5065222978591919, "step": 1153, "token_acc": 0.8346990675332014 }, { "epoch": 0.062267306965952625, "grad_norm": 0.5607800483703613, "learning_rate": 1.9945444015677124e-05, "loss": 0.46959808468818665, "step": 1154, "token_acc": 0.8464363594628715 }, { "epoch": 0.06232126477094912, "grad_norm": 0.4543270766735077, "learning_rate": 1.99452615684138e-05, "loss": 0.5221773982048035, "step": 1155, "token_acc": 0.8276445698166431 }, { "epoch": 0.06237522257594561, "grad_norm": 0.33070772886276245, "learning_rate": 1.994507881742479e-05, "loss": 0.43948474526405334, "step": 1156, "token_acc": 0.8520359395909004 }, { "epoch": 0.062429180380942105, "grad_norm": 0.6192924380302429, "learning_rate": 1.9944895762715685e-05, "loss": 0.47471484541893005, "step": 1157, "token_acc": 0.8339167639050953 }, { "epoch": 0.0624831381859386, "grad_norm": 0.500623345375061, "learning_rate": 1.9944712404292063e-05, "loss": 0.4563506245613098, "step": 1158, "token_acc": 0.8441247002398081 }, { "epoch": 0.06253709599093508, "grad_norm": 0.616524338722229, "learning_rate": 1.9944528742159533e-05, "loss": 0.4985692501068115, "step": 1159, "token_acc": 0.8362511052166225 }, { "epoch": 0.06259105379593158, "grad_norm": 0.5945762395858765, "learning_rate": 1.99443447763237e-05, "loss": 0.4970732629299164, "step": 1160, "token_acc": 0.8324445691832725 }, { "epoch": 0.06264501160092807, "grad_norm": 0.5356379151344299, "learning_rate": 1.9944160506790182e-05, "loss": 0.47416749596595764, "step": 1161, "token_acc": 0.8391015680180817 }, { "epoch": 0.06269896940592457, "grad_norm": 0.4652452766895294, "learning_rate": 1.9943975933564606e-05, "loss": 0.5041439533233643, "step": 1162, "token_acc": 0.8321861233480177 }, { "epoch": 0.06275292721092106, "grad_norm": 0.5990099906921387, "learning_rate": 1.9943791056652612e-05, "loss": 0.46991339325904846, "step": 1163, "token_acc": 0.8416632805092781 }, { "epoch": 0.06280688501591755, "grad_norm": 0.544572651386261, "learning_rate": 1.9943605876059838e-05, "loss": 0.5818935036659241, "step": 1164, "token_acc": 0.8047150781643974 }, { "epoch": 0.06286084282091405, "grad_norm": 0.6109757423400879, "learning_rate": 1.9943420391791953e-05, "loss": 0.4790326952934265, "step": 1165, "token_acc": 0.838393421884883 }, { "epoch": 0.06291480062591054, "grad_norm": 0.4286647439002991, "learning_rate": 1.994323460385461e-05, "loss": 0.44370216131210327, "step": 1166, "token_acc": 0.8539367637941724 }, { "epoch": 0.06296875843090703, "grad_norm": 0.6435291171073914, "learning_rate": 1.9943048512253488e-05, "loss": 0.4867534637451172, "step": 1167, "token_acc": 0.838318875261741 }, { "epoch": 0.06302271623590353, "grad_norm": 0.7417318820953369, "learning_rate": 1.994286211699427e-05, "loss": 0.48583003878593445, "step": 1168, "token_acc": 0.8366979812112733 }, { "epoch": 0.06307667404090002, "grad_norm": 0.606735110282898, "learning_rate": 1.9942675418082645e-05, "loss": 0.5149112343788147, "step": 1169, "token_acc": 0.8270868824531517 }, { "epoch": 0.06313063184589651, "grad_norm": 0.7570937275886536, "learning_rate": 1.9942488415524323e-05, "loss": 0.4884395897388458, "step": 1170, "token_acc": 0.8368911917098446 }, { "epoch": 0.063184589650893, "grad_norm": 0.5066763758659363, "learning_rate": 1.9942301109325e-05, "loss": 0.445533812046051, "step": 1171, "token_acc": 0.8519878652403001 }, { "epoch": 0.0632385474558895, "grad_norm": 0.5153513550758362, "learning_rate": 1.9942113499490415e-05, "loss": 0.4984830319881439, "step": 1172, "token_acc": 0.8357090643274854 }, { "epoch": 0.06329250526088599, "grad_norm": 0.6307263374328613, "learning_rate": 1.994192558602628e-05, "loss": 0.484153151512146, "step": 1173, "token_acc": 0.838388411045722 }, { "epoch": 0.06334646306588249, "grad_norm": 0.5717456340789795, "learning_rate": 1.994173736893835e-05, "loss": 0.5029699802398682, "step": 1174, "token_acc": 0.8351376430559851 }, { "epoch": 0.06340042087087898, "grad_norm": 0.5017522573471069, "learning_rate": 1.9941548848232363e-05, "loss": 0.4688798785209656, "step": 1175, "token_acc": 0.8466039175782244 }, { "epoch": 0.06345437867587546, "grad_norm": 0.6349281072616577, "learning_rate": 1.9941360023914074e-05, "loss": 0.5720954537391663, "step": 1176, "token_acc": 0.8085632966217052 }, { "epoch": 0.06350833648087195, "grad_norm": 0.6133666038513184, "learning_rate": 1.994117089598926e-05, "loss": 0.3995014727115631, "step": 1177, "token_acc": 0.867269335629215 }, { "epoch": 0.06356229428586845, "grad_norm": 0.4736815094947815, "learning_rate": 1.9940981464463685e-05, "loss": 0.4202980101108551, "step": 1178, "token_acc": 0.8531846401471602 }, { "epoch": 0.06361625209086494, "grad_norm": 0.6363794803619385, "learning_rate": 1.9940791729343146e-05, "loss": 0.4649418592453003, "step": 1179, "token_acc": 0.8388333333333333 }, { "epoch": 0.06367020989586143, "grad_norm": 0.5362752079963684, "learning_rate": 1.994060169063343e-05, "loss": 0.4771791696548462, "step": 1180, "token_acc": 0.8388911760669686 }, { "epoch": 0.06372416770085793, "grad_norm": 0.5100346803665161, "learning_rate": 1.994041134834034e-05, "loss": 0.5015999674797058, "step": 1181, "token_acc": 0.8353961827646038 }, { "epoch": 0.06377812550585442, "grad_norm": 0.6638677716255188, "learning_rate": 1.9940220702469696e-05, "loss": 0.4693527817726135, "step": 1182, "token_acc": 0.8422556677462374 }, { "epoch": 0.06383208331085091, "grad_norm": 0.5440425872802734, "learning_rate": 1.994002975302731e-05, "loss": 0.5549981594085693, "step": 1183, "token_acc": 0.8197860962566845 }, { "epoch": 0.0638860411158474, "grad_norm": 0.6723366379737854, "learning_rate": 1.993983850001902e-05, "loss": 0.4822196364402771, "step": 1184, "token_acc": 0.838280725319006 }, { "epoch": 0.0639399989208439, "grad_norm": 0.5893634557723999, "learning_rate": 1.993964694345067e-05, "loss": 0.5211420059204102, "step": 1185, "token_acc": 0.8290858725761773 }, { "epoch": 0.06399395672584039, "grad_norm": 0.5724483728408813, "learning_rate": 1.9939455083328104e-05, "loss": 0.48018860816955566, "step": 1186, "token_acc": 0.8363138392137909 }, { "epoch": 0.06404791453083689, "grad_norm": 0.4541456699371338, "learning_rate": 1.9939262919657186e-05, "loss": 0.5356809496879578, "step": 1187, "token_acc": 0.8256918068366793 }, { "epoch": 0.06410187233583338, "grad_norm": 0.6018912196159363, "learning_rate": 1.993907045244378e-05, "loss": 0.4713497757911682, "step": 1188, "token_acc": 0.8400231013572047 }, { "epoch": 0.06415583014082987, "grad_norm": 0.6113040447235107, "learning_rate": 1.9938877681693766e-05, "loss": 0.5056936144828796, "step": 1189, "token_acc": 0.83089384680972 }, { "epoch": 0.06420978794582637, "grad_norm": 0.5089733004570007, "learning_rate": 1.993868460741303e-05, "loss": 0.466755747795105, "step": 1190, "token_acc": 0.8484110415216887 }, { "epoch": 0.06426374575082286, "grad_norm": 0.5561826229095459, "learning_rate": 1.9938491229607472e-05, "loss": 0.47055643796920776, "step": 1191, "token_acc": 0.839633304031144 }, { "epoch": 0.06431770355581935, "grad_norm": 0.5648742318153381, "learning_rate": 1.9938297548282994e-05, "loss": 0.5364999771118164, "step": 1192, "token_acc": 0.826174400229852 }, { "epoch": 0.06437166136081585, "grad_norm": 0.46385762095451355, "learning_rate": 1.9938103563445513e-05, "loss": 0.4774753451347351, "step": 1193, "token_acc": 0.8361281369261016 }, { "epoch": 0.06442561916581234, "grad_norm": 0.5157471895217896, "learning_rate": 1.9937909275100952e-05, "loss": 0.42190635204315186, "step": 1194, "token_acc": 0.858195211786372 }, { "epoch": 0.06447957697080883, "grad_norm": 0.5542042851448059, "learning_rate": 1.9937714683255246e-05, "loss": 0.45775920152664185, "step": 1195, "token_acc": 0.8453650533223954 }, { "epoch": 0.06453353477580533, "grad_norm": 0.5675288438796997, "learning_rate": 1.9937519787914336e-05, "loss": 0.45736172795295715, "step": 1196, "token_acc": 0.8443025540275049 }, { "epoch": 0.06458749258080182, "grad_norm": 0.6080393195152283, "learning_rate": 1.9937324589084176e-05, "loss": 0.5234025120735168, "step": 1197, "token_acc": 0.8246048961884103 }, { "epoch": 0.0646414503857983, "grad_norm": 0.6294237971305847, "learning_rate": 1.9937129086770725e-05, "loss": 0.5260817408561707, "step": 1198, "token_acc": 0.8281298782391507 }, { "epoch": 0.06469540819079479, "grad_norm": 0.5969944596290588, "learning_rate": 1.9936933280979957e-05, "loss": 0.46800923347473145, "step": 1199, "token_acc": 0.8439623316247467 }, { "epoch": 0.06474936599579129, "grad_norm": 0.5283077955245972, "learning_rate": 1.9936737171717847e-05, "loss": 0.5186326503753662, "step": 1200, "token_acc": 0.8297657171751222 }, { "epoch": 0.06480332380078778, "grad_norm": 0.6201987266540527, "learning_rate": 1.993654075899039e-05, "loss": 0.4678921103477478, "step": 1201, "token_acc": 0.8444275966641395 }, { "epoch": 0.06485728160578427, "grad_norm": 0.640687108039856, "learning_rate": 1.9936344042803578e-05, "loss": 0.4880414307117462, "step": 1202, "token_acc": 0.831335436382755 }, { "epoch": 0.06491123941078077, "grad_norm": 0.6087803840637207, "learning_rate": 1.9936147023163423e-05, "loss": 0.4976232051849365, "step": 1203, "token_acc": 0.833421052631579 }, { "epoch": 0.06496519721577726, "grad_norm": 0.47614234685897827, "learning_rate": 1.993594970007594e-05, "loss": 0.4317740201950073, "step": 1204, "token_acc": 0.8544339096486335 }, { "epoch": 0.06501915502077375, "grad_norm": 0.6259710192680359, "learning_rate": 1.9935752073547158e-05, "loss": 0.5162205696105957, "step": 1205, "token_acc": 0.8378176382660688 }, { "epoch": 0.06507311282577025, "grad_norm": 0.4465901553630829, "learning_rate": 1.993555414358311e-05, "loss": 0.4784350097179413, "step": 1206, "token_acc": 0.84105759003191 }, { "epoch": 0.06512707063076674, "grad_norm": 0.6325904130935669, "learning_rate": 1.993535591018984e-05, "loss": 0.4474705457687378, "step": 1207, "token_acc": 0.8505843071786311 }, { "epoch": 0.06518102843576323, "grad_norm": 0.5566404461860657, "learning_rate": 1.9935157373373405e-05, "loss": 0.44536668062210083, "step": 1208, "token_acc": 0.8501284999323685 }, { "epoch": 0.06523498624075973, "grad_norm": 0.5723443627357483, "learning_rate": 1.9934958533139864e-05, "loss": 0.5180416107177734, "step": 1209, "token_acc": 0.825696316262354 }, { "epoch": 0.06528894404575622, "grad_norm": 0.5882156491279602, "learning_rate": 1.9934759389495296e-05, "loss": 0.46991994976997375, "step": 1210, "token_acc": 0.8412989393740998 }, { "epoch": 0.06534290185075271, "grad_norm": 0.5812454223632812, "learning_rate": 1.9934559942445776e-05, "loss": 0.4752526879310608, "step": 1211, "token_acc": 0.838450409901945 }, { "epoch": 0.0653968596557492, "grad_norm": 0.6064922213554382, "learning_rate": 1.9934360191997396e-05, "loss": 0.4842025637626648, "step": 1212, "token_acc": 0.8410150181253236 }, { "epoch": 0.0654508174607457, "grad_norm": 0.6365607976913452, "learning_rate": 1.993416013815626e-05, "loss": 0.5102586150169373, "step": 1213, "token_acc": 0.8330932949271883 }, { "epoch": 0.0655047752657422, "grad_norm": 0.6323693990707397, "learning_rate": 1.9933959780928477e-05, "loss": 0.5167931318283081, "step": 1214, "token_acc": 0.8341271144687141 }, { "epoch": 0.06555873307073869, "grad_norm": 0.5189071297645569, "learning_rate": 1.9933759120320163e-05, "loss": 0.43850839138031006, "step": 1215, "token_acc": 0.8492009288348586 }, { "epoch": 0.06561269087573518, "grad_norm": 0.5859302878379822, "learning_rate": 1.993355815633745e-05, "loss": 0.495996356010437, "step": 1216, "token_acc": 0.8326870601752119 }, { "epoch": 0.06566664868073167, "grad_norm": 0.5641906261444092, "learning_rate": 1.9933356888986473e-05, "loss": 0.4900732636451721, "step": 1217, "token_acc": 0.8366893692626591 }, { "epoch": 0.06572060648572817, "grad_norm": 0.5961353778839111, "learning_rate": 1.9933155318273373e-05, "loss": 0.47435683012008667, "step": 1218, "token_acc": 0.8420376024425518 }, { "epoch": 0.06577456429072465, "grad_norm": 0.656708836555481, "learning_rate": 1.9932953444204317e-05, "loss": 0.5088629126548767, "step": 1219, "token_acc": 0.8359594822734947 }, { "epoch": 0.06582852209572114, "grad_norm": 0.524762749671936, "learning_rate": 1.9932751266785467e-05, "loss": 0.4257371425628662, "step": 1220, "token_acc": 0.8529948099312666 }, { "epoch": 0.06588247990071763, "grad_norm": 0.4847758412361145, "learning_rate": 1.993254878602299e-05, "loss": 0.4742787778377533, "step": 1221, "token_acc": 0.8427428317403868 }, { "epoch": 0.06593643770571413, "grad_norm": 0.6317684650421143, "learning_rate": 1.9932346001923078e-05, "loss": 0.5285001993179321, "step": 1222, "token_acc": 0.827877351503481 }, { "epoch": 0.06599039551071062, "grad_norm": 0.5505062341690063, "learning_rate": 1.9932142914491922e-05, "loss": 0.4371892809867859, "step": 1223, "token_acc": 0.8499376225271787 }, { "epoch": 0.06604435331570711, "grad_norm": 0.5779229998588562, "learning_rate": 1.993193952373572e-05, "loss": 0.43891456723213196, "step": 1224, "token_acc": 0.8548142008564719 }, { "epoch": 0.0660983111207036, "grad_norm": 0.6213836669921875, "learning_rate": 1.993173582966069e-05, "loss": 0.5070400834083557, "step": 1225, "token_acc": 0.8323300283286119 }, { "epoch": 0.0661522689257001, "grad_norm": 0.5182752013206482, "learning_rate": 1.9931531832273044e-05, "loss": 0.47261470556259155, "step": 1226, "token_acc": 0.8381706244503079 }, { "epoch": 0.0662062267306966, "grad_norm": 0.5002434253692627, "learning_rate": 1.9931327531579027e-05, "loss": 0.4358627498149872, "step": 1227, "token_acc": 0.8523629735711495 }, { "epoch": 0.06626018453569309, "grad_norm": 0.48660698533058167, "learning_rate": 1.9931122927584862e-05, "loss": 0.4731704592704773, "step": 1228, "token_acc": 0.838010776829816 }, { "epoch": 0.06631414234068958, "grad_norm": 0.7714017629623413, "learning_rate": 1.9930918020296802e-05, "loss": 0.44995933771133423, "step": 1229, "token_acc": 0.8526560682435053 }, { "epoch": 0.06636810014568607, "grad_norm": 0.6607722640037537, "learning_rate": 1.9930712809721114e-05, "loss": 0.5093321204185486, "step": 1230, "token_acc": 0.8391201599709144 }, { "epoch": 0.06642205795068257, "grad_norm": 0.5660085678100586, "learning_rate": 1.9930507295864056e-05, "loss": 0.46352025866508484, "step": 1231, "token_acc": 0.844368520263902 }, { "epoch": 0.06647601575567906, "grad_norm": 0.4890058934688568, "learning_rate": 1.9930301478731903e-05, "loss": 0.4218636751174927, "step": 1232, "token_acc": 0.8592299497793334 }, { "epoch": 0.06652997356067555, "grad_norm": 0.5196077227592468, "learning_rate": 1.9930095358330946e-05, "loss": 0.45813173055648804, "step": 1233, "token_acc": 0.8495075508864084 }, { "epoch": 0.06658393136567205, "grad_norm": 0.5641770362854004, "learning_rate": 1.9929888934667482e-05, "loss": 0.4654170870780945, "step": 1234, "token_acc": 0.8449056603773585 }, { "epoch": 0.06663788917066854, "grad_norm": 0.47955137491226196, "learning_rate": 1.9929682207747808e-05, "loss": 0.49401015043258667, "step": 1235, "token_acc": 0.835987864998104 }, { "epoch": 0.06669184697566503, "grad_norm": 0.693811297416687, "learning_rate": 1.9929475177578243e-05, "loss": 0.5079951286315918, "step": 1236, "token_acc": 0.8316653322658126 }, { "epoch": 0.06674580478066153, "grad_norm": 0.5892663598060608, "learning_rate": 1.9929267844165103e-05, "loss": 0.39118692278862, "step": 1237, "token_acc": 0.8665793413173652 }, { "epoch": 0.06679976258565802, "grad_norm": 0.5276252031326294, "learning_rate": 1.9929060207514726e-05, "loss": 0.47978082299232483, "step": 1238, "token_acc": 0.8333333333333334 }, { "epoch": 0.06685372039065451, "grad_norm": 0.5007070899009705, "learning_rate": 1.9928852267633453e-05, "loss": 0.45726144313812256, "step": 1239, "token_acc": 0.853866004014145 }, { "epoch": 0.06690767819565101, "grad_norm": 0.5897418260574341, "learning_rate": 1.9928644024527633e-05, "loss": 0.4341357946395874, "step": 1240, "token_acc": 0.8499079189686924 }, { "epoch": 0.06696163600064749, "grad_norm": 0.6538006663322449, "learning_rate": 1.9928435478203624e-05, "loss": 0.4450291693210602, "step": 1241, "token_acc": 0.8498659517426274 }, { "epoch": 0.06701559380564398, "grad_norm": 0.5940072536468506, "learning_rate": 1.9928226628667794e-05, "loss": 0.5558756589889526, "step": 1242, "token_acc": 0.8192184497117233 }, { "epoch": 0.06706955161064047, "grad_norm": 0.5964893698692322, "learning_rate": 1.9928017475926528e-05, "loss": 0.5203239321708679, "step": 1243, "token_acc": 0.8244828837546873 }, { "epoch": 0.06712350941563697, "grad_norm": 0.5768956542015076, "learning_rate": 1.9927808019986204e-05, "loss": 0.4884034991264343, "step": 1244, "token_acc": 0.8373164902697167 }, { "epoch": 0.06717746722063346, "grad_norm": 0.5355230569839478, "learning_rate": 1.992759826085323e-05, "loss": 0.41110503673553467, "step": 1245, "token_acc": 0.8591331269349846 }, { "epoch": 0.06723142502562995, "grad_norm": 0.6656169891357422, "learning_rate": 1.9927388198534e-05, "loss": 0.4800512194633484, "step": 1246, "token_acc": 0.836864406779661 }, { "epoch": 0.06728538283062645, "grad_norm": 0.49055036902427673, "learning_rate": 1.992717783303494e-05, "loss": 0.4248749613761902, "step": 1247, "token_acc": 0.8520439292251373 }, { "epoch": 0.06733934063562294, "grad_norm": 0.422832727432251, "learning_rate": 1.9926967164362465e-05, "loss": 0.4290503263473511, "step": 1248, "token_acc": 0.8522537562604341 }, { "epoch": 0.06739329844061943, "grad_norm": 0.5286173820495605, "learning_rate": 1.9926756192523016e-05, "loss": 0.48406046628952026, "step": 1249, "token_acc": 0.8368046736093472 }, { "epoch": 0.06744725624561593, "grad_norm": 0.7146163582801819, "learning_rate": 1.9926544917523033e-05, "loss": 0.534309983253479, "step": 1250, "token_acc": 0.8294910179640719 }, { "epoch": 0.06750121405061242, "grad_norm": 0.40396714210510254, "learning_rate": 1.9926333339368968e-05, "loss": 0.4901241064071655, "step": 1251, "token_acc": 0.8336233435952349 }, { "epoch": 0.06755517185560891, "grad_norm": 0.5261311531066895, "learning_rate": 1.9926121458067286e-05, "loss": 0.4791928231716156, "step": 1252, "token_acc": 0.8440540540540541 }, { "epoch": 0.06760912966060541, "grad_norm": 0.5358403325080872, "learning_rate": 1.9925909273624452e-05, "loss": 0.48400211334228516, "step": 1253, "token_acc": 0.841747984726347 }, { "epoch": 0.0676630874656019, "grad_norm": 0.4992331266403198, "learning_rate": 1.9925696786046948e-05, "loss": 0.5025059580802917, "step": 1254, "token_acc": 0.8308907862877533 }, { "epoch": 0.0677170452705984, "grad_norm": 0.5903224349021912, "learning_rate": 1.9925483995341265e-05, "loss": 0.5121138691902161, "step": 1255, "token_acc": 0.8322757427170464 }, { "epoch": 0.06777100307559489, "grad_norm": 0.5827533602714539, "learning_rate": 1.9925270901513905e-05, "loss": 0.4619632959365845, "step": 1256, "token_acc": 0.8444858321138752 }, { "epoch": 0.06782496088059138, "grad_norm": 0.5611162185668945, "learning_rate": 1.9925057504571366e-05, "loss": 0.5057682991027832, "step": 1257, "token_acc": 0.839632685653447 }, { "epoch": 0.06787891868558787, "grad_norm": 0.43444833159446716, "learning_rate": 1.9924843804520174e-05, "loss": 0.4109307527542114, "step": 1258, "token_acc": 0.858041958041958 }, { "epoch": 0.06793287649058437, "grad_norm": 0.5277681946754456, "learning_rate": 1.992462980136685e-05, "loss": 0.48540177941322327, "step": 1259, "token_acc": 0.8340507476212052 }, { "epoch": 0.06798683429558086, "grad_norm": 0.5321789979934692, "learning_rate": 1.9924415495117933e-05, "loss": 0.44523924589157104, "step": 1260, "token_acc": 0.8491523326321 }, { "epoch": 0.06804079210057735, "grad_norm": 0.4184950292110443, "learning_rate": 1.9924200885779966e-05, "loss": 0.4581280052661896, "step": 1261, "token_acc": 0.8501809408926417 }, { "epoch": 0.06809474990557385, "grad_norm": 0.5314778089523315, "learning_rate": 1.992398597335951e-05, "loss": 0.47220855951309204, "step": 1262, "token_acc": 0.8451573103022826 }, { "epoch": 0.06814870771057033, "grad_norm": 0.5507803559303284, "learning_rate": 1.9923770757863114e-05, "loss": 0.5108659267425537, "step": 1263, "token_acc": 0.8284935403579471 }, { "epoch": 0.06820266551556682, "grad_norm": 0.6350851655006409, "learning_rate": 1.9923555239297362e-05, "loss": 0.4758480489253998, "step": 1264, "token_acc": 0.8411873218178769 }, { "epoch": 0.06825662332056331, "grad_norm": 0.6174571514129639, "learning_rate": 1.9923339417668832e-05, "loss": 0.44971078634262085, "step": 1265, "token_acc": 0.8484587566729809 }, { "epoch": 0.06831058112555981, "grad_norm": 0.5151360630989075, "learning_rate": 1.9923123292984114e-05, "loss": 0.4472445845603943, "step": 1266, "token_acc": 0.8485517599688271 }, { "epoch": 0.0683645389305563, "grad_norm": 0.5078762173652649, "learning_rate": 1.9922906865249812e-05, "loss": 0.4540913701057434, "step": 1267, "token_acc": 0.8529311713346185 }, { "epoch": 0.0684184967355528, "grad_norm": 0.6543232202529907, "learning_rate": 1.9922690134472536e-05, "loss": 0.5029000043869019, "step": 1268, "token_acc": 0.8261900996362486 }, { "epoch": 0.06847245454054929, "grad_norm": 0.6596025824546814, "learning_rate": 1.99224731006589e-05, "loss": 0.4753815829753876, "step": 1269, "token_acc": 0.839138884736134 }, { "epoch": 0.06852641234554578, "grad_norm": 0.5389429330825806, "learning_rate": 1.9922255763815536e-05, "loss": 0.45986032485961914, "step": 1270, "token_acc": 0.8498306850742381 }, { "epoch": 0.06858037015054227, "grad_norm": 0.5422073006629944, "learning_rate": 1.992203812394908e-05, "loss": 0.5054836273193359, "step": 1271, "token_acc": 0.834272962179939 }, { "epoch": 0.06863432795553877, "grad_norm": 0.4727802872657776, "learning_rate": 1.992182018106618e-05, "loss": 0.4880853593349457, "step": 1272, "token_acc": 0.8386686991869918 }, { "epoch": 0.06868828576053526, "grad_norm": 0.5847395658493042, "learning_rate": 1.9921601935173487e-05, "loss": 0.4530509114265442, "step": 1273, "token_acc": 0.8478626427117444 }, { "epoch": 0.06874224356553175, "grad_norm": 0.5926699638366699, "learning_rate": 1.9921383386277672e-05, "loss": 0.4880760908126831, "step": 1274, "token_acc": 0.8338408003479774 }, { "epoch": 0.06879620137052825, "grad_norm": 0.7439537644386292, "learning_rate": 1.992116453438541e-05, "loss": 0.4542170763015747, "step": 1275, "token_acc": 0.8421724075086569 }, { "epoch": 0.06885015917552474, "grad_norm": 0.4641847014427185, "learning_rate": 1.9920945379503375e-05, "loss": 0.47318950295448303, "step": 1276, "token_acc": 0.8404403244495945 }, { "epoch": 0.06890411698052124, "grad_norm": 0.4903091490268707, "learning_rate": 1.9920725921638273e-05, "loss": 0.42996305227279663, "step": 1277, "token_acc": 0.8572942423172024 }, { "epoch": 0.06895807478551773, "grad_norm": 0.5794287919998169, "learning_rate": 1.99205061607968e-05, "loss": 0.5107789039611816, "step": 1278, "token_acc": 0.8292265571526352 }, { "epoch": 0.06901203259051422, "grad_norm": 0.44489768147468567, "learning_rate": 1.9920286096985667e-05, "loss": 0.4767281115055084, "step": 1279, "token_acc": 0.8417148790980566 }, { "epoch": 0.06906599039551072, "grad_norm": 0.5065280199050903, "learning_rate": 1.9920065730211597e-05, "loss": 0.44125962257385254, "step": 1280, "token_acc": 0.8549490256807329 }, { "epoch": 0.06911994820050721, "grad_norm": 0.4956282079219818, "learning_rate": 1.9919845060481315e-05, "loss": 0.40787047147750854, "step": 1281, "token_acc": 0.858195211786372 }, { "epoch": 0.0691739060055037, "grad_norm": 0.5355471968650818, "learning_rate": 1.9919624087801566e-05, "loss": 0.44317179918289185, "step": 1282, "token_acc": 0.8521006189911761 }, { "epoch": 0.0692278638105002, "grad_norm": 0.5264106392860413, "learning_rate": 1.9919402812179093e-05, "loss": 0.5003001689910889, "step": 1283, "token_acc": 0.8321519325932861 }, { "epoch": 0.06928182161549667, "grad_norm": 0.5253018140792847, "learning_rate": 1.991918123362066e-05, "loss": 0.4802282154560089, "step": 1284, "token_acc": 0.8363058205447256 }, { "epoch": 0.06933577942049317, "grad_norm": 0.5998337864875793, "learning_rate": 1.991895935213303e-05, "loss": 0.5260027647018433, "step": 1285, "token_acc": 0.8242468772961058 }, { "epoch": 0.06938973722548966, "grad_norm": 0.4718863368034363, "learning_rate": 1.9918737167722977e-05, "loss": 0.45054179430007935, "step": 1286, "token_acc": 0.8488238197071887 }, { "epoch": 0.06944369503048615, "grad_norm": 0.3926260769367218, "learning_rate": 1.991851468039729e-05, "loss": 0.42613980174064636, "step": 1287, "token_acc": 0.8571257914227691 }, { "epoch": 0.06949765283548265, "grad_norm": 0.6091626882553101, "learning_rate": 1.9918291890162768e-05, "loss": 0.49717843532562256, "step": 1288, "token_acc": 0.8355752492482987 }, { "epoch": 0.06955161064047914, "grad_norm": 0.4372808039188385, "learning_rate": 1.9918068797026203e-05, "loss": 0.49552878737449646, "step": 1289, "token_acc": 0.8364261884904087 }, { "epoch": 0.06960556844547564, "grad_norm": 0.5229136943817139, "learning_rate": 1.9917845400994417e-05, "loss": 0.4580409526824951, "step": 1290, "token_acc": 0.8466225852456891 }, { "epoch": 0.06965952625047213, "grad_norm": 0.4571933448314667, "learning_rate": 1.9917621702074234e-05, "loss": 0.4045097529888153, "step": 1291, "token_acc": 0.8567964627014691 }, { "epoch": 0.06971348405546862, "grad_norm": 0.5632969737052917, "learning_rate": 1.9917397700272477e-05, "loss": 0.4359225630760193, "step": 1292, "token_acc": 0.8503155996393147 }, { "epoch": 0.06976744186046512, "grad_norm": 0.574206531047821, "learning_rate": 1.9917173395595997e-05, "loss": 0.5065350532531738, "step": 1293, "token_acc": 0.8276520316334879 }, { "epoch": 0.06982139966546161, "grad_norm": 0.6912460327148438, "learning_rate": 1.9916948788051638e-05, "loss": 0.4326481223106384, "step": 1294, "token_acc": 0.848968105065666 }, { "epoch": 0.0698753574704581, "grad_norm": 0.557405412197113, "learning_rate": 1.991672387764626e-05, "loss": 0.44142580032348633, "step": 1295, "token_acc": 0.8571428571428571 }, { "epoch": 0.0699293152754546, "grad_norm": 0.4669584333896637, "learning_rate": 1.9916498664386734e-05, "loss": 0.4843037724494934, "step": 1296, "token_acc": 0.8415332476335164 }, { "epoch": 0.06998327308045109, "grad_norm": 0.5011492967605591, "learning_rate": 1.9916273148279935e-05, "loss": 0.45260196924209595, "step": 1297, "token_acc": 0.8494194484760522 }, { "epoch": 0.07003723088544758, "grad_norm": 0.5561807155609131, "learning_rate": 1.991604732933275e-05, "loss": 0.5278923511505127, "step": 1298, "token_acc": 0.8240171551107934 }, { "epoch": 0.07009118869044408, "grad_norm": 0.6416005492210388, "learning_rate": 1.9915821207552085e-05, "loss": 0.49797725677490234, "step": 1299, "token_acc": 0.8337628865979382 }, { "epoch": 0.07014514649544057, "grad_norm": 0.578346312046051, "learning_rate": 1.991559478294483e-05, "loss": 0.531478762626648, "step": 1300, "token_acc": 0.8205714285714286 }, { "epoch": 0.07019910430043706, "grad_norm": 0.5808972120285034, "learning_rate": 1.9915368055517912e-05, "loss": 0.5339994430541992, "step": 1301, "token_acc": 0.8267729083665338 }, { "epoch": 0.07025306210543356, "grad_norm": 0.6556023955345154, "learning_rate": 1.9915141025278252e-05, "loss": 0.5060186386108398, "step": 1302, "token_acc": 0.8318116290245074 }, { "epoch": 0.07030701991043005, "grad_norm": 0.46110013127326965, "learning_rate": 1.9914913692232777e-05, "loss": 0.44916796684265137, "step": 1303, "token_acc": 0.846090022816843 }, { "epoch": 0.07036097771542654, "grad_norm": 0.4170759320259094, "learning_rate": 1.991468605638844e-05, "loss": 0.4653155207633972, "step": 1304, "token_acc": 0.8440555841482243 }, { "epoch": 0.07041493552042304, "grad_norm": 0.5468929409980774, "learning_rate": 1.9914458117752185e-05, "loss": 0.48797351121902466, "step": 1305, "token_acc": 0.8410511692521417 }, { "epoch": 0.07046889332541952, "grad_norm": 0.4284650683403015, "learning_rate": 1.9914229876330976e-05, "loss": 0.4870281219482422, "step": 1306, "token_acc": 0.8424835791300037 }, { "epoch": 0.07052285113041601, "grad_norm": 0.5992273092269897, "learning_rate": 1.991400133213179e-05, "loss": 0.4671896696090698, "step": 1307, "token_acc": 0.8449133642134157 }, { "epoch": 0.0705768089354125, "grad_norm": 0.5273756384849548, "learning_rate": 1.9913772485161593e-05, "loss": 0.5506014823913574, "step": 1308, "token_acc": 0.8186794389326035 }, { "epoch": 0.070630766740409, "grad_norm": 0.6220459342002869, "learning_rate": 1.9913543335427384e-05, "loss": 0.42003631591796875, "step": 1309, "token_acc": 0.8531456068436999 }, { "epoch": 0.07068472454540549, "grad_norm": 0.4570407271385193, "learning_rate": 1.9913313882936157e-05, "loss": 0.45114293694496155, "step": 1310, "token_acc": 0.8502596304794107 }, { "epoch": 0.07073868235040198, "grad_norm": 0.5399843454360962, "learning_rate": 1.9913084127694927e-05, "loss": 0.43217313289642334, "step": 1311, "token_acc": 0.8477319781667608 }, { "epoch": 0.07079264015539848, "grad_norm": 0.4596691131591797, "learning_rate": 1.9912854069710698e-05, "loss": 0.563991904258728, "step": 1312, "token_acc": 0.8196846777896831 }, { "epoch": 0.07084659796039497, "grad_norm": 0.6793504357337952, "learning_rate": 1.9912623708990507e-05, "loss": 0.40263307094573975, "step": 1313, "token_acc": 0.8648984666390386 }, { "epoch": 0.07090055576539146, "grad_norm": 0.6494786739349365, "learning_rate": 1.9912393045541384e-05, "loss": 0.5391263365745544, "step": 1314, "token_acc": 0.8227266585596541 }, { "epoch": 0.07095451357038796, "grad_norm": 0.5140119194984436, "learning_rate": 1.991216207937037e-05, "loss": 0.4514752924442291, "step": 1315, "token_acc": 0.8463582369490465 }, { "epoch": 0.07100847137538445, "grad_norm": 0.5629968643188477, "learning_rate": 1.9911930810484526e-05, "loss": 0.4816470742225647, "step": 1316, "token_acc": 0.8393655049151028 }, { "epoch": 0.07106242918038094, "grad_norm": 0.5034362077713013, "learning_rate": 1.991169923889091e-05, "loss": 0.4933367073535919, "step": 1317, "token_acc": 0.8392076900669968 }, { "epoch": 0.07111638698537744, "grad_norm": 0.5439817905426025, "learning_rate": 1.9911467364596598e-05, "loss": 0.47754132747650146, "step": 1318, "token_acc": 0.839952025586354 }, { "epoch": 0.07117034479037393, "grad_norm": 0.6206900477409363, "learning_rate": 1.991123518760867e-05, "loss": 0.5040611028671265, "step": 1319, "token_acc": 0.8302273892973158 }, { "epoch": 0.07122430259537042, "grad_norm": 0.5475913882255554, "learning_rate": 1.991100270793421e-05, "loss": 0.4261705279350281, "step": 1320, "token_acc": 0.8533814488104785 }, { "epoch": 0.07127826040036692, "grad_norm": 0.5199249386787415, "learning_rate": 1.991076992558033e-05, "loss": 0.4605812132358551, "step": 1321, "token_acc": 0.846272637933455 }, { "epoch": 0.07133221820536341, "grad_norm": 0.48018601536750793, "learning_rate": 1.9910536840554128e-05, "loss": 0.4988225996494293, "step": 1322, "token_acc": 0.8351303911735206 }, { "epoch": 0.0713861760103599, "grad_norm": 0.5170028805732727, "learning_rate": 1.991030345286273e-05, "loss": 0.4585496783256531, "step": 1323, "token_acc": 0.8425762392558578 }, { "epoch": 0.0714401338153564, "grad_norm": 0.5526995658874512, "learning_rate": 1.991006976251326e-05, "loss": 0.4589731991291046, "step": 1324, "token_acc": 0.8434546160818335 }, { "epoch": 0.07149409162035289, "grad_norm": 0.6096038818359375, "learning_rate": 1.9909835769512854e-05, "loss": 0.5255440473556519, "step": 1325, "token_acc": 0.8272104607721046 }, { "epoch": 0.07154804942534938, "grad_norm": 0.6325255036354065, "learning_rate": 1.990960147386866e-05, "loss": 0.4322851300239563, "step": 1326, "token_acc": 0.8548744460856721 }, { "epoch": 0.07160200723034586, "grad_norm": 0.6017237305641174, "learning_rate": 1.9909366875587834e-05, "loss": 0.49454236030578613, "step": 1327, "token_acc": 0.8382420971472629 }, { "epoch": 0.07165596503534236, "grad_norm": 0.5459901690483093, "learning_rate": 1.9909131974677543e-05, "loss": 0.4496685862541199, "step": 1328, "token_acc": 0.8516069303002248 }, { "epoch": 0.07170992284033885, "grad_norm": 0.3995489180088043, "learning_rate": 1.9908896771144952e-05, "loss": 0.4518706202507019, "step": 1329, "token_acc": 0.8491025641025641 }, { "epoch": 0.07176388064533534, "grad_norm": 0.5332537889480591, "learning_rate": 1.990866126499725e-05, "loss": 0.5066623091697693, "step": 1330, "token_acc": 0.8322054752383882 }, { "epoch": 0.07181783845033184, "grad_norm": 0.3934301435947418, "learning_rate": 1.990842545624163e-05, "loss": 0.44983211159706116, "step": 1331, "token_acc": 0.8493474463056212 }, { "epoch": 0.07187179625532833, "grad_norm": 0.4393532872200012, "learning_rate": 1.9908189344885294e-05, "loss": 0.4676547646522522, "step": 1332, "token_acc": 0.8429498122226016 }, { "epoch": 0.07192575406032482, "grad_norm": 0.6039730906486511, "learning_rate": 1.990795293093545e-05, "loss": 0.4817817807197571, "step": 1333, "token_acc": 0.8363447559709242 }, { "epoch": 0.07197971186532132, "grad_norm": 0.5092785358428955, "learning_rate": 1.990771621439932e-05, "loss": 0.4832187294960022, "step": 1334, "token_acc": 0.8374318459997775 }, { "epoch": 0.07203366967031781, "grad_norm": 0.5506956577301025, "learning_rate": 1.9907479195284125e-05, "loss": 0.42728978395462036, "step": 1335, "token_acc": 0.853460972017673 }, { "epoch": 0.0720876274753143, "grad_norm": 0.6345677375793457, "learning_rate": 1.9907241873597117e-05, "loss": 0.47903144359588623, "step": 1336, "token_acc": 0.8393996894945661 }, { "epoch": 0.0721415852803108, "grad_norm": 0.6559739708900452, "learning_rate": 1.990700424934554e-05, "loss": 0.44971752166748047, "step": 1337, "token_acc": 0.8483725427006124 }, { "epoch": 0.07219554308530729, "grad_norm": 0.5063704252243042, "learning_rate": 1.9906766322536642e-05, "loss": 0.40085628628730774, "step": 1338, "token_acc": 0.8636296840558413 }, { "epoch": 0.07224950089030378, "grad_norm": 0.524111807346344, "learning_rate": 1.99065280931777e-05, "loss": 0.4299239218235016, "step": 1339, "token_acc": 0.8554514702055686 }, { "epoch": 0.07230345869530028, "grad_norm": 0.6681370139122009, "learning_rate": 1.9906289561275984e-05, "loss": 0.451296865940094, "step": 1340, "token_acc": 0.854251012145749 }, { "epoch": 0.07235741650029677, "grad_norm": 0.5609100461006165, "learning_rate": 1.9906050726838778e-05, "loss": 0.508368730545044, "step": 1341, "token_acc": 0.8341333333333333 }, { "epoch": 0.07241137430529326, "grad_norm": 0.4978311061859131, "learning_rate": 1.9905811589873384e-05, "loss": 0.5174219012260437, "step": 1342, "token_acc": 0.8294058661318626 }, { "epoch": 0.07246533211028976, "grad_norm": 0.5852572917938232, "learning_rate": 1.9905572150387093e-05, "loss": 0.5272102952003479, "step": 1343, "token_acc": 0.8255306775225356 }, { "epoch": 0.07251928991528625, "grad_norm": 0.5345118045806885, "learning_rate": 1.9905332408387227e-05, "loss": 0.41704538464546204, "step": 1344, "token_acc": 0.8526753693874624 }, { "epoch": 0.07257324772028274, "grad_norm": 0.540023148059845, "learning_rate": 1.99050923638811e-05, "loss": 0.5072422027587891, "step": 1345, "token_acc": 0.8354952115407929 }, { "epoch": 0.07262720552527924, "grad_norm": 0.6438723802566528, "learning_rate": 1.990485201687605e-05, "loss": 0.5122607350349426, "step": 1346, "token_acc": 0.8310828356271098 }, { "epoch": 0.07268116333027573, "grad_norm": 0.5193858742713928, "learning_rate": 1.9904611367379415e-05, "loss": 0.456391304731369, "step": 1347, "token_acc": 0.8462732919254659 }, { "epoch": 0.07273512113527222, "grad_norm": 0.5612554550170898, "learning_rate": 1.990437041539854e-05, "loss": 0.47572922706604004, "step": 1348, "token_acc": 0.843705708027316 }, { "epoch": 0.0727890789402687, "grad_norm": 0.6064562201499939, "learning_rate": 1.990412916094079e-05, "loss": 0.5215303897857666, "step": 1349, "token_acc": 0.8296439981743496 }, { "epoch": 0.0728430367452652, "grad_norm": 0.7054007053375244, "learning_rate": 1.990388760401353e-05, "loss": 0.5161149501800537, "step": 1350, "token_acc": 0.8279916250603963 }, { "epoch": 0.07289699455026169, "grad_norm": 0.6569940447807312, "learning_rate": 1.9903645744624133e-05, "loss": 0.4913308024406433, "step": 1351, "token_acc": 0.8343247025892232 }, { "epoch": 0.07295095235525818, "grad_norm": 0.6440526247024536, "learning_rate": 1.990340358277999e-05, "loss": 0.49559444189071655, "step": 1352, "token_acc": 0.8395101514663229 }, { "epoch": 0.07300491016025468, "grad_norm": 0.4809311628341675, "learning_rate": 1.9903161118488504e-05, "loss": 0.4628472328186035, "step": 1353, "token_acc": 0.8471418653089563 }, { "epoch": 0.07305886796525117, "grad_norm": 0.5249091982841492, "learning_rate": 1.9902918351757067e-05, "loss": 0.46293848752975464, "step": 1354, "token_acc": 0.8426211453744493 }, { "epoch": 0.07311282577024766, "grad_norm": 0.5588371753692627, "learning_rate": 1.9902675282593095e-05, "loss": 0.4204043745994568, "step": 1355, "token_acc": 0.853752131893121 }, { "epoch": 0.07316678357524416, "grad_norm": 0.504923939704895, "learning_rate": 1.9902431911004015e-05, "loss": 0.5001716017723083, "step": 1356, "token_acc": 0.8368669022379269 }, { "epoch": 0.07322074138024065, "grad_norm": 0.5472544431686401, "learning_rate": 1.990218823699726e-05, "loss": 0.45262473821640015, "step": 1357, "token_acc": 0.8490365448504983 }, { "epoch": 0.07327469918523714, "grad_norm": 0.5714627504348755, "learning_rate": 1.990194426058027e-05, "loss": 0.4699208438396454, "step": 1358, "token_acc": 0.8394912174439734 }, { "epoch": 0.07332865699023364, "grad_norm": 0.5586180090904236, "learning_rate": 1.9901699981760494e-05, "loss": 0.4610518217086792, "step": 1359, "token_acc": 0.846585042041599 }, { "epoch": 0.07338261479523013, "grad_norm": 0.4089679419994354, "learning_rate": 1.9901455400545397e-05, "loss": 0.4914696216583252, "step": 1360, "token_acc": 0.8376936753873507 }, { "epoch": 0.07343657260022662, "grad_norm": 0.5480287075042725, "learning_rate": 1.9901210516942445e-05, "loss": 0.49636808037757874, "step": 1361, "token_acc": 0.8363247159558702 }, { "epoch": 0.07349053040522312, "grad_norm": 0.5369789004325867, "learning_rate": 1.990096533095912e-05, "loss": 0.422186017036438, "step": 1362, "token_acc": 0.8568154034229829 }, { "epoch": 0.07354448821021961, "grad_norm": 0.6504929661750793, "learning_rate": 1.9900719842602903e-05, "loss": 0.41769251227378845, "step": 1363, "token_acc": 0.8585078267940638 }, { "epoch": 0.0735984460152161, "grad_norm": 0.5415071249008179, "learning_rate": 1.99004740518813e-05, "loss": 0.408119261264801, "step": 1364, "token_acc": 0.8601058503790588 }, { "epoch": 0.0736524038202126, "grad_norm": 0.49561959505081177, "learning_rate": 1.990022795880181e-05, "loss": 0.5330727100372314, "step": 1365, "token_acc": 0.8309194645600175 }, { "epoch": 0.07370636162520909, "grad_norm": 0.5309925675392151, "learning_rate": 1.9899981563371954e-05, "loss": 0.5601843595504761, "step": 1366, "token_acc": 0.8175845680738049 }, { "epoch": 0.07376031943020558, "grad_norm": 0.60980224609375, "learning_rate": 1.989973486559925e-05, "loss": 0.5045973062515259, "step": 1367, "token_acc": 0.8384829404012301 }, { "epoch": 0.07381427723520208, "grad_norm": 0.4836364686489105, "learning_rate": 1.989948786549124e-05, "loss": 0.44527941942214966, "step": 1368, "token_acc": 0.8492893955958145 }, { "epoch": 0.07386823504019857, "grad_norm": 0.6324443817138672, "learning_rate": 1.9899240563055465e-05, "loss": 0.5016385316848755, "step": 1369, "token_acc": 0.8301365882011044 }, { "epoch": 0.07392219284519506, "grad_norm": 0.5499480962753296, "learning_rate": 1.9898992958299475e-05, "loss": 0.4673144519329071, "step": 1370, "token_acc": 0.8493204908299248 }, { "epoch": 0.07397615065019154, "grad_norm": 0.5363385081291199, "learning_rate": 1.9898745051230836e-05, "loss": 0.43899157643318176, "step": 1371, "token_acc": 0.8511096014492754 }, { "epoch": 0.07403010845518804, "grad_norm": 0.5256810188293457, "learning_rate": 1.989849684185711e-05, "loss": 0.4691164493560791, "step": 1372, "token_acc": 0.8440699126092385 }, { "epoch": 0.07408406626018453, "grad_norm": 0.5390320420265198, "learning_rate": 1.9898248330185887e-05, "loss": 0.49620532989501953, "step": 1373, "token_acc": 0.8360078277886497 }, { "epoch": 0.07413802406518102, "grad_norm": 0.591562807559967, "learning_rate": 1.9897999516224754e-05, "loss": 0.5191945433616638, "step": 1374, "token_acc": 0.8295439074200136 }, { "epoch": 0.07419198187017752, "grad_norm": 0.6392876505851746, "learning_rate": 1.9897750399981305e-05, "loss": 0.4866012930870056, "step": 1375, "token_acc": 0.8389261744966443 }, { "epoch": 0.07424593967517401, "grad_norm": 0.5292027592658997, "learning_rate": 1.9897500981463155e-05, "loss": 0.46680140495300293, "step": 1376, "token_acc": 0.8455917394757744 }, { "epoch": 0.0742998974801705, "grad_norm": 0.4962409734725952, "learning_rate": 1.989725126067792e-05, "loss": 0.4701765775680542, "step": 1377, "token_acc": 0.8423247328642168 }, { "epoch": 0.074353855285167, "grad_norm": 0.3045295774936676, "learning_rate": 1.989700123763322e-05, "loss": 0.44666528701782227, "step": 1378, "token_acc": 0.8493545557087354 }, { "epoch": 0.07440781309016349, "grad_norm": 0.49467119574546814, "learning_rate": 1.9896750912336696e-05, "loss": 0.49745407700538635, "step": 1379, "token_acc": 0.8373249441851025 }, { "epoch": 0.07446177089515998, "grad_norm": 0.526201605796814, "learning_rate": 1.9896500284795996e-05, "loss": 0.49482452869415283, "step": 1380, "token_acc": 0.8401486988847584 }, { "epoch": 0.07451572870015648, "grad_norm": 0.6151384115219116, "learning_rate": 1.989624935501876e-05, "loss": 0.5170193910598755, "step": 1381, "token_acc": 0.8298090040927695 }, { "epoch": 0.07456968650515297, "grad_norm": 0.482061505317688, "learning_rate": 1.989599812301267e-05, "loss": 0.3958921432495117, "step": 1382, "token_acc": 0.8589298123697012 }, { "epoch": 0.07462364431014946, "grad_norm": 0.3760363757610321, "learning_rate": 1.9895746588785388e-05, "loss": 0.4964190423488617, "step": 1383, "token_acc": 0.8361186682212722 }, { "epoch": 0.07467760211514596, "grad_norm": 0.7389294505119324, "learning_rate": 1.9895494752344594e-05, "loss": 0.5173277258872986, "step": 1384, "token_acc": 0.8307450363672105 }, { "epoch": 0.07473155992014245, "grad_norm": 0.5549023747444153, "learning_rate": 1.9895242613697986e-05, "loss": 0.5023367404937744, "step": 1385, "token_acc": 0.8298989898989899 }, { "epoch": 0.07478551772513894, "grad_norm": 0.4907512962818146, "learning_rate": 1.989499017285326e-05, "loss": 0.4053237736225128, "step": 1386, "token_acc": 0.8611361587015329 }, { "epoch": 0.07483947553013544, "grad_norm": 0.6317833065986633, "learning_rate": 1.9894737429818127e-05, "loss": 0.5383623838424683, "step": 1387, "token_acc": 0.8242592322288717 }, { "epoch": 0.07489343333513193, "grad_norm": 0.516086995601654, "learning_rate": 1.98944843846003e-05, "loss": 0.4453273415565491, "step": 1388, "token_acc": 0.8466840536512668 }, { "epoch": 0.07494739114012842, "grad_norm": 0.5446065068244934, "learning_rate": 1.9894231037207516e-05, "loss": 0.4644443392753601, "step": 1389, "token_acc": 0.8402377856214007 }, { "epoch": 0.07500134894512492, "grad_norm": 0.6913059949874878, "learning_rate": 1.9893977387647508e-05, "loss": 0.5125647783279419, "step": 1390, "token_acc": 0.8254442522409183 }, { "epoch": 0.07505530675012141, "grad_norm": 0.6530198454856873, "learning_rate": 1.9893723435928022e-05, "loss": 0.5011671781539917, "step": 1391, "token_acc": 0.8362054817003118 }, { "epoch": 0.07510926455511789, "grad_norm": 0.6123100519180298, "learning_rate": 1.9893469182056813e-05, "loss": 0.4884929656982422, "step": 1392, "token_acc": 0.8350819672131148 }, { "epoch": 0.07516322236011438, "grad_norm": 0.5743460655212402, "learning_rate": 1.989321462604165e-05, "loss": 0.4997372627258301, "step": 1393, "token_acc": 0.839744377534718 }, { "epoch": 0.07521718016511088, "grad_norm": 0.6534023284912109, "learning_rate": 1.9892959767890304e-05, "loss": 0.4729054272174835, "step": 1394, "token_acc": 0.8408016946390745 }, { "epoch": 0.07527113797010737, "grad_norm": 0.5974064469337463, "learning_rate": 1.9892704607610554e-05, "loss": 0.4769318997859955, "step": 1395, "token_acc": 0.8413660148719361 }, { "epoch": 0.07532509577510386, "grad_norm": 0.5143241286277771, "learning_rate": 1.9892449145210202e-05, "loss": 0.47833162546157837, "step": 1396, "token_acc": 0.8418506574051104 }, { "epoch": 0.07537905358010036, "grad_norm": 0.6083957552909851, "learning_rate": 1.989219338069704e-05, "loss": 0.5226059556007385, "step": 1397, "token_acc": 0.8287502058290795 }, { "epoch": 0.07543301138509685, "grad_norm": 0.5433923602104187, "learning_rate": 1.9891937314078885e-05, "loss": 0.5083198547363281, "step": 1398, "token_acc": 0.8333901192504259 }, { "epoch": 0.07548696919009334, "grad_norm": 0.5229586362838745, "learning_rate": 1.9891680945363555e-05, "loss": 0.4796973764896393, "step": 1399, "token_acc": 0.836412951347686 }, { "epoch": 0.07554092699508984, "grad_norm": 0.47763505578041077, "learning_rate": 1.9891424274558882e-05, "loss": 0.5012059211730957, "step": 1400, "token_acc": 0.8334461746784022 }, { "epoch": 0.07559488480008633, "grad_norm": 0.5421387553215027, "learning_rate": 1.9891167301672705e-05, "loss": 0.42924684286117554, "step": 1401, "token_acc": 0.8489065606361829 }, { "epoch": 0.07564884260508282, "grad_norm": 0.6041052341461182, "learning_rate": 1.9890910026712865e-05, "loss": 0.3947882652282715, "step": 1402, "token_acc": 0.8586024448093413 }, { "epoch": 0.07570280041007932, "grad_norm": 0.47451677918434143, "learning_rate": 1.9890652449687228e-05, "loss": 0.4464549720287323, "step": 1403, "token_acc": 0.852195423623995 }, { "epoch": 0.07575675821507581, "grad_norm": 0.5325531959533691, "learning_rate": 1.9890394570603652e-05, "loss": 0.4289112091064453, "step": 1404, "token_acc": 0.8507314654103645 }, { "epoch": 0.0758107160200723, "grad_norm": 0.6971294283866882, "learning_rate": 1.9890136389470018e-05, "loss": 0.480583131313324, "step": 1405, "token_acc": 0.8403869407496977 }, { "epoch": 0.0758646738250688, "grad_norm": 0.5496097803115845, "learning_rate": 1.988987790629421e-05, "loss": 0.45788419246673584, "step": 1406, "token_acc": 0.8462356067316209 }, { "epoch": 0.07591863163006529, "grad_norm": 0.5385613441467285, "learning_rate": 1.9889619121084124e-05, "loss": 0.4181528687477112, "step": 1407, "token_acc": 0.8504672897196262 }, { "epoch": 0.07597258943506179, "grad_norm": 0.6062830090522766, "learning_rate": 1.9889360033847657e-05, "loss": 0.491921067237854, "step": 1408, "token_acc": 0.8429063236093495 }, { "epoch": 0.07602654724005828, "grad_norm": 0.6327335238456726, "learning_rate": 1.9889100644592726e-05, "loss": 0.4482782781124115, "step": 1409, "token_acc": 0.8501081350856763 }, { "epoch": 0.07608050504505477, "grad_norm": 0.5143454670906067, "learning_rate": 1.9888840953327255e-05, "loss": 0.45242342352867126, "step": 1410, "token_acc": 0.8488516800543048 }, { "epoch": 0.07613446285005127, "grad_norm": 0.527012050151825, "learning_rate": 1.9888580960059167e-05, "loss": 0.5066280961036682, "step": 1411, "token_acc": 0.8308798741431621 }, { "epoch": 0.07618842065504776, "grad_norm": 0.5888808369636536, "learning_rate": 1.988832066479641e-05, "loss": 0.42696189880371094, "step": 1412, "token_acc": 0.8530798479087452 }, { "epoch": 0.07624237846004425, "grad_norm": 0.6182589530944824, "learning_rate": 1.988806006754693e-05, "loss": 0.5147936344146729, "step": 1413, "token_acc": 0.8300900377577694 }, { "epoch": 0.07629633626504073, "grad_norm": 0.6842318177223206, "learning_rate": 1.9887799168318688e-05, "loss": 0.5045071840286255, "step": 1414, "token_acc": 0.831889400921659 }, { "epoch": 0.07635029407003723, "grad_norm": 0.5139836668968201, "learning_rate": 1.9887537967119644e-05, "loss": 0.4081174433231354, "step": 1415, "token_acc": 0.8573005796301407 }, { "epoch": 0.07640425187503372, "grad_norm": 0.5510874390602112, "learning_rate": 1.9887276463957787e-05, "loss": 0.5042459964752197, "step": 1416, "token_acc": 0.8270577933450087 }, { "epoch": 0.07645820968003021, "grad_norm": 0.43643325567245483, "learning_rate": 1.988701465884109e-05, "loss": 0.4392554759979248, "step": 1417, "token_acc": 0.8506219055669605 }, { "epoch": 0.0765121674850267, "grad_norm": 0.6247559189796448, "learning_rate": 1.9886752551777563e-05, "loss": 0.4832582175731659, "step": 1418, "token_acc": 0.8384201077199281 }, { "epoch": 0.0765661252900232, "grad_norm": 0.5847442150115967, "learning_rate": 1.98864901427752e-05, "loss": 0.48250025510787964, "step": 1419, "token_acc": 0.8392282958199357 }, { "epoch": 0.07662008309501969, "grad_norm": 0.4906121790409088, "learning_rate": 1.9886227431842015e-05, "loss": 0.5005519390106201, "step": 1420, "token_acc": 0.8342342342342343 }, { "epoch": 0.07667404090001619, "grad_norm": 0.5465085506439209, "learning_rate": 1.9885964418986034e-05, "loss": 0.3998347222805023, "step": 1421, "token_acc": 0.8589560526733302 }, { "epoch": 0.07672799870501268, "grad_norm": 0.4114794433116913, "learning_rate": 1.9885701104215293e-05, "loss": 0.5349060893058777, "step": 1422, "token_acc": 0.8239152952008286 }, { "epoch": 0.07678195651000917, "grad_norm": 0.44974401593208313, "learning_rate": 1.988543748753783e-05, "loss": 0.3935275375843048, "step": 1423, "token_acc": 0.8641708922213831 }, { "epoch": 0.07683591431500567, "grad_norm": 0.5401766300201416, "learning_rate": 1.9885173568961694e-05, "loss": 0.47116878628730774, "step": 1424, "token_acc": 0.8430676137858458 }, { "epoch": 0.07688987212000216, "grad_norm": 0.5683671832084656, "learning_rate": 1.9884909348494946e-05, "loss": 0.5221138000488281, "step": 1425, "token_acc": 0.8233597692862292 }, { "epoch": 0.07694382992499865, "grad_norm": 0.5321865677833557, "learning_rate": 1.9884644826145657e-05, "loss": 0.46867072582244873, "step": 1426, "token_acc": 0.8429015988840004 }, { "epoch": 0.07699778772999515, "grad_norm": 0.5428816676139832, "learning_rate": 1.9884380001921905e-05, "loss": 0.49132198095321655, "step": 1427, "token_acc": 0.8392117900314622 }, { "epoch": 0.07705174553499164, "grad_norm": 0.5914968848228455, "learning_rate": 1.9884114875831776e-05, "loss": 0.5040096044540405, "step": 1428, "token_acc": 0.8300113895216401 }, { "epoch": 0.07710570333998813, "grad_norm": 0.4954597055912018, "learning_rate": 1.988384944788337e-05, "loss": 0.48723000288009644, "step": 1429, "token_acc": 0.8383678633560019 }, { "epoch": 0.07715966114498463, "grad_norm": 0.6450979709625244, "learning_rate": 1.9883583718084792e-05, "loss": 0.479068785905838, "step": 1430, "token_acc": 0.8341030851653427 }, { "epoch": 0.07721361894998112, "grad_norm": 0.48830223083496094, "learning_rate": 1.9883317686444153e-05, "loss": 0.44478967785835266, "step": 1431, "token_acc": 0.8500124161907127 }, { "epoch": 0.07726757675497761, "grad_norm": 0.5397602915763855, "learning_rate": 1.988305135296958e-05, "loss": 0.4559566378593445, "step": 1432, "token_acc": 0.8444564775957764 }, { "epoch": 0.0773215345599741, "grad_norm": 0.4744882881641388, "learning_rate": 1.988278471766921e-05, "loss": 0.4513449966907501, "step": 1433, "token_acc": 0.8466893313906427 }, { "epoch": 0.0773754923649706, "grad_norm": 0.6068272590637207, "learning_rate": 1.9882517780551182e-05, "loss": 0.4513712227344513, "step": 1434, "token_acc": 0.849719887955182 }, { "epoch": 0.07742945016996708, "grad_norm": 0.5451595187187195, "learning_rate": 1.9882250541623652e-05, "loss": 0.44781503081321716, "step": 1435, "token_acc": 0.8519455868396077 }, { "epoch": 0.07748340797496357, "grad_norm": 0.5062804222106934, "learning_rate": 1.988198300089478e-05, "loss": 0.4937938451766968, "step": 1436, "token_acc": 0.8383613100129124 }, { "epoch": 0.07753736577996007, "grad_norm": 0.5254486203193665, "learning_rate": 1.9881715158372732e-05, "loss": 0.4762546420097351, "step": 1437, "token_acc": 0.8416833667334669 }, { "epoch": 0.07759132358495656, "grad_norm": 0.6269832849502563, "learning_rate": 1.9881447014065695e-05, "loss": 0.4990687966346741, "step": 1438, "token_acc": 0.8338528255861227 }, { "epoch": 0.07764528138995305, "grad_norm": 0.49943840503692627, "learning_rate": 1.9881178567981855e-05, "loss": 0.4479037821292877, "step": 1439, "token_acc": 0.8430576070901034 }, { "epoch": 0.07769923919494955, "grad_norm": 0.4644757807254791, "learning_rate": 1.988090982012941e-05, "loss": 0.4791947603225708, "step": 1440, "token_acc": 0.8385059049711617 }, { "epoch": 0.07775319699994604, "grad_norm": 0.6764463782310486, "learning_rate": 1.9880640770516565e-05, "loss": 0.4557201862335205, "step": 1441, "token_acc": 0.8536050156739812 }, { "epoch": 0.07780715480494253, "grad_norm": 0.5916129350662231, "learning_rate": 1.9880371419151544e-05, "loss": 0.4450905919075012, "step": 1442, "token_acc": 0.846140350877193 }, { "epoch": 0.07786111260993903, "grad_norm": 0.5413519144058228, "learning_rate": 1.9880101766042563e-05, "loss": 0.4620435833930969, "step": 1443, "token_acc": 0.8495537611559711 }, { "epoch": 0.07791507041493552, "grad_norm": 0.551292359828949, "learning_rate": 1.9879831811197864e-05, "loss": 0.43152594566345215, "step": 1444, "token_acc": 0.8568470866607513 }, { "epoch": 0.07796902821993201, "grad_norm": 0.5185554027557373, "learning_rate": 1.9879561554625687e-05, "loss": 0.4573555290699005, "step": 1445, "token_acc": 0.8420737189091999 }, { "epoch": 0.0780229860249285, "grad_norm": 0.641886830329895, "learning_rate": 1.9879290996334295e-05, "loss": 0.4736826419830322, "step": 1446, "token_acc": 0.8485361722719574 }, { "epoch": 0.078076943829925, "grad_norm": 0.4303627908229828, "learning_rate": 1.987902013633194e-05, "loss": 0.43099865317344666, "step": 1447, "token_acc": 0.8559763480219625 }, { "epoch": 0.07813090163492149, "grad_norm": 0.44529345631599426, "learning_rate": 1.98787489746269e-05, "loss": 0.4651772081851959, "step": 1448, "token_acc": 0.8444834743733035 }, { "epoch": 0.07818485943991799, "grad_norm": 0.572239875793457, "learning_rate": 1.987847751122745e-05, "loss": 0.47241246700286865, "step": 1449, "token_acc": 0.8388273779567187 }, { "epoch": 0.07823881724491448, "grad_norm": 0.5540178418159485, "learning_rate": 1.9878205746141886e-05, "loss": 0.4650888442993164, "step": 1450, "token_acc": 0.8458126793634229 }, { "epoch": 0.07829277504991097, "grad_norm": 0.5792499780654907, "learning_rate": 1.9877933679378508e-05, "loss": 0.4928336441516876, "step": 1451, "token_acc": 0.8367899008115419 }, { "epoch": 0.07834673285490747, "grad_norm": 0.5048767924308777, "learning_rate": 1.9877661310945623e-05, "loss": 0.46561282873153687, "step": 1452, "token_acc": 0.8461796621546034 }, { "epoch": 0.07840069065990396, "grad_norm": 0.6973097920417786, "learning_rate": 1.9877388640851546e-05, "loss": 0.5393507480621338, "step": 1453, "token_acc": 0.8227474150664698 }, { "epoch": 0.07845464846490045, "grad_norm": 0.4853374660015106, "learning_rate": 1.987711566910461e-05, "loss": 0.45905405282974243, "step": 1454, "token_acc": 0.8442060621179992 }, { "epoch": 0.07850860626989695, "grad_norm": 0.5175573229789734, "learning_rate": 1.987684239571315e-05, "loss": 0.4414476454257965, "step": 1455, "token_acc": 0.8470852577705654 }, { "epoch": 0.07856256407489344, "grad_norm": 0.5454365015029907, "learning_rate": 1.9876568820685508e-05, "loss": 0.4915192425251007, "step": 1456, "token_acc": 0.8391545495427972 }, { "epoch": 0.07861652187988992, "grad_norm": 0.722077488899231, "learning_rate": 1.9876294944030043e-05, "loss": 0.4810113310813904, "step": 1457, "token_acc": 0.8410473907431311 }, { "epoch": 0.07867047968488641, "grad_norm": 0.5540366172790527, "learning_rate": 1.9876020765755117e-05, "loss": 0.5169957280158997, "step": 1458, "token_acc": 0.8296824368114063 }, { "epoch": 0.0787244374898829, "grad_norm": 0.5425640940666199, "learning_rate": 1.9875746285869106e-05, "loss": 0.4878886044025421, "step": 1459, "token_acc": 0.833204334365325 }, { "epoch": 0.0787783952948794, "grad_norm": 0.5026578307151794, "learning_rate": 1.987547150438039e-05, "loss": 0.5160890817642212, "step": 1460, "token_acc": 0.8315251084934904 }, { "epoch": 0.0788323530998759, "grad_norm": 0.4095364809036255, "learning_rate": 1.987519642129736e-05, "loss": 0.45355117321014404, "step": 1461, "token_acc": 0.8500927956807829 }, { "epoch": 0.07888631090487239, "grad_norm": 0.4727369546890259, "learning_rate": 1.9874921036628418e-05, "loss": 0.5321314334869385, "step": 1462, "token_acc": 0.8249908211969159 }, { "epoch": 0.07894026870986888, "grad_norm": 0.5361953377723694, "learning_rate": 1.9874645350381976e-05, "loss": 0.49458956718444824, "step": 1463, "token_acc": 0.8383951406649617 }, { "epoch": 0.07899422651486537, "grad_norm": 0.5572580695152283, "learning_rate": 1.9874369362566452e-05, "loss": 0.4551733732223511, "step": 1464, "token_acc": 0.8465509150633506 }, { "epoch": 0.07904818431986187, "grad_norm": 0.598314642906189, "learning_rate": 1.987409307319027e-05, "loss": 0.5099101662635803, "step": 1465, "token_acc": 0.831474296799224 }, { "epoch": 0.07910214212485836, "grad_norm": 0.5022148489952087, "learning_rate": 1.9873816482261878e-05, "loss": 0.5343949198722839, "step": 1466, "token_acc": 0.8213587487781037 }, { "epoch": 0.07915609992985485, "grad_norm": 0.3333439528942108, "learning_rate": 1.987353958978971e-05, "loss": 0.4086126685142517, "step": 1467, "token_acc": 0.8618660389471432 }, { "epoch": 0.07921005773485135, "grad_norm": 0.6366841197013855, "learning_rate": 1.9873262395782234e-05, "loss": 0.487423300743103, "step": 1468, "token_acc": 0.8413552421113858 }, { "epoch": 0.07926401553984784, "grad_norm": 0.5569325685501099, "learning_rate": 1.987298490024791e-05, "loss": 0.4722546339035034, "step": 1469, "token_acc": 0.8485294117647059 }, { "epoch": 0.07931797334484433, "grad_norm": 0.6385082006454468, "learning_rate": 1.9872707103195216e-05, "loss": 0.4773370623588562, "step": 1470, "token_acc": 0.8377786624203821 }, { "epoch": 0.07937193114984083, "grad_norm": 0.5020633339881897, "learning_rate": 1.9872429004632636e-05, "loss": 0.4729565382003784, "step": 1471, "token_acc": 0.8410556305293148 }, { "epoch": 0.07942588895483732, "grad_norm": 0.7133137583732605, "learning_rate": 1.9872150604568654e-05, "loss": 0.40432029962539673, "step": 1472, "token_acc": 0.8592006492189085 }, { "epoch": 0.07947984675983381, "grad_norm": 0.4017447531223297, "learning_rate": 1.9871871903011782e-05, "loss": 0.40312597155570984, "step": 1473, "token_acc": 0.8641481110833125 }, { "epoch": 0.07953380456483031, "grad_norm": 0.5575136542320251, "learning_rate": 1.9871592899970524e-05, "loss": 0.4030664563179016, "step": 1474, "token_acc": 0.8624290163033522 }, { "epoch": 0.0795877623698268, "grad_norm": 0.5412327647209167, "learning_rate": 1.987131359545341e-05, "loss": 0.48620858788490295, "step": 1475, "token_acc": 0.8326354827132432 }, { "epoch": 0.0796417201748233, "grad_norm": 0.6211405396461487, "learning_rate": 1.987103398946896e-05, "loss": 0.5022130012512207, "step": 1476, "token_acc": 0.8384339674662256 }, { "epoch": 0.07969567797981979, "grad_norm": 0.4565776586532593, "learning_rate": 1.9870754082025722e-05, "loss": 0.38866955041885376, "step": 1477, "token_acc": 0.8692289935364728 }, { "epoch": 0.07974963578481627, "grad_norm": 0.6867509484291077, "learning_rate": 1.987047387313224e-05, "loss": 0.4627426564693451, "step": 1478, "token_acc": 0.8429081467056151 }, { "epoch": 0.07980359358981276, "grad_norm": 0.5857715606689453, "learning_rate": 1.987019336279707e-05, "loss": 0.5504225492477417, "step": 1479, "token_acc": 0.8186177248677249 }, { "epoch": 0.07985755139480925, "grad_norm": 0.6874340772628784, "learning_rate": 1.9869912551028777e-05, "loss": 0.4944385588169098, "step": 1480, "token_acc": 0.8370996585920989 }, { "epoch": 0.07991150919980575, "grad_norm": 0.4802103042602539, "learning_rate": 1.9869631437835947e-05, "loss": 0.4707895517349243, "step": 1481, "token_acc": 0.8495616598480421 }, { "epoch": 0.07996546700480224, "grad_norm": 0.5892912745475769, "learning_rate": 1.9869350023227154e-05, "loss": 0.4631848931312561, "step": 1482, "token_acc": 0.8430306905370843 }, { "epoch": 0.08001942480979873, "grad_norm": 0.5526943206787109, "learning_rate": 1.9869068307210998e-05, "loss": 0.39391398429870605, "step": 1483, "token_acc": 0.8660287081339713 }, { "epoch": 0.08007338261479523, "grad_norm": 0.42884892225265503, "learning_rate": 1.986878628979608e-05, "loss": 0.4612683653831482, "step": 1484, "token_acc": 0.8455060057987022 }, { "epoch": 0.08012734041979172, "grad_norm": 0.4317639470100403, "learning_rate": 1.9868503970991018e-05, "loss": 0.4494172930717468, "step": 1485, "token_acc": 0.8461371762921227 }, { "epoch": 0.08018129822478821, "grad_norm": 0.6739170551300049, "learning_rate": 1.9868221350804426e-05, "loss": 0.44817692041397095, "step": 1486, "token_acc": 0.8511118832522585 }, { "epoch": 0.08023525602978471, "grad_norm": 0.5745227932929993, "learning_rate": 1.986793842924494e-05, "loss": 0.5306225419044495, "step": 1487, "token_acc": 0.8295691452397498 }, { "epoch": 0.0802892138347812, "grad_norm": 0.6338131427764893, "learning_rate": 1.98676552063212e-05, "loss": 0.4742180109024048, "step": 1488, "token_acc": 0.8397552505374566 }, { "epoch": 0.0803431716397777, "grad_norm": 0.5424720644950867, "learning_rate": 1.9867371682041855e-05, "loss": 0.5050579309463501, "step": 1489, "token_acc": 0.8350243224461431 }, { "epoch": 0.08039712944477419, "grad_norm": 0.4881014823913574, "learning_rate": 1.986708785641556e-05, "loss": 0.4652128517627716, "step": 1490, "token_acc": 0.8468357271095153 }, { "epoch": 0.08045108724977068, "grad_norm": 0.6941698789596558, "learning_rate": 1.986680372945099e-05, "loss": 0.47080424427986145, "step": 1491, "token_acc": 0.8437451316404424 }, { "epoch": 0.08050504505476717, "grad_norm": 0.44117826223373413, "learning_rate": 1.986651930115682e-05, "loss": 0.45363542437553406, "step": 1492, "token_acc": 0.8503896103896104 }, { "epoch": 0.08055900285976367, "grad_norm": 0.43604445457458496, "learning_rate": 1.9866234571541735e-05, "loss": 0.40676403045654297, "step": 1493, "token_acc": 0.8654993021190204 }, { "epoch": 0.08061296066476016, "grad_norm": 0.48013219237327576, "learning_rate": 1.986594954061443e-05, "loss": 0.44833704829216003, "step": 1494, "token_acc": 0.8500863099190015 }, { "epoch": 0.08066691846975665, "grad_norm": 0.5185183882713318, "learning_rate": 1.986566420838361e-05, "loss": 0.4483377933502197, "step": 1495, "token_acc": 0.8496413934426229 }, { "epoch": 0.08072087627475315, "grad_norm": 0.5146554708480835, "learning_rate": 1.986537857485799e-05, "loss": 0.47798046469688416, "step": 1496, "token_acc": 0.8363067292644757 }, { "epoch": 0.08077483407974964, "grad_norm": 0.6123846173286438, "learning_rate": 1.9865092640046293e-05, "loss": 0.4033438563346863, "step": 1497, "token_acc": 0.8572668112798265 }, { "epoch": 0.08082879188474613, "grad_norm": 0.7428940534591675, "learning_rate": 1.986480640395725e-05, "loss": 0.4951572120189667, "step": 1498, "token_acc": 0.833824804856895 }, { "epoch": 0.08088274968974263, "grad_norm": 0.42900270223617554, "learning_rate": 1.9864519866599603e-05, "loss": 0.4213206171989441, "step": 1499, "token_acc": 0.8548219840995507 }, { "epoch": 0.08093670749473911, "grad_norm": 0.48340940475463867, "learning_rate": 1.9864233027982105e-05, "loss": 0.504544734954834, "step": 1500, "token_acc": 0.8372531757011696 }, { "epoch": 0.0809906652997356, "grad_norm": 0.5763938426971436, "learning_rate": 1.9863945888113518e-05, "loss": 0.5433156490325928, "step": 1501, "token_acc": 0.8244510765295247 }, { "epoch": 0.0810446231047321, "grad_norm": 0.5462517142295837, "learning_rate": 1.98636584470026e-05, "loss": 0.4195777177810669, "step": 1502, "token_acc": 0.8558661033474163 }, { "epoch": 0.08109858090972859, "grad_norm": 0.41852232813835144, "learning_rate": 1.9863370704658142e-05, "loss": 0.48859328031539917, "step": 1503, "token_acc": 0.8387898433279308 }, { "epoch": 0.08115253871472508, "grad_norm": 0.6097555756568909, "learning_rate": 1.9863082661088928e-05, "loss": 0.5033602118492126, "step": 1504, "token_acc": 0.8307065217391304 }, { "epoch": 0.08120649651972157, "grad_norm": 0.5715391039848328, "learning_rate": 1.986279431630375e-05, "loss": 0.4521799087524414, "step": 1505, "token_acc": 0.8505071292077024 }, { "epoch": 0.08126045432471807, "grad_norm": 0.5413199663162231, "learning_rate": 1.9862505670311414e-05, "loss": 0.46927061676979065, "step": 1506, "token_acc": 0.8421560740144811 }, { "epoch": 0.08131441212971456, "grad_norm": 0.6480444073677063, "learning_rate": 1.9862216723120743e-05, "loss": 0.5253616571426392, "step": 1507, "token_acc": 0.8273347074678928 }, { "epoch": 0.08136836993471105, "grad_norm": 0.4645650386810303, "learning_rate": 1.9861927474740558e-05, "loss": 0.4331238865852356, "step": 1508, "token_acc": 0.8523007615574386 }, { "epoch": 0.08142232773970755, "grad_norm": 0.5784320831298828, "learning_rate": 1.9861637925179686e-05, "loss": 0.4620405435562134, "step": 1509, "token_acc": 0.8426233869894905 }, { "epoch": 0.08147628554470404, "grad_norm": 0.5382482409477234, "learning_rate": 1.986134807444698e-05, "loss": 0.4359286427497864, "step": 1510, "token_acc": 0.8546591619762351 }, { "epoch": 0.08153024334970053, "grad_norm": 0.4403004050254822, "learning_rate": 1.9861057922551285e-05, "loss": 0.5037556886672974, "step": 1511, "token_acc": 0.8372535842293907 }, { "epoch": 0.08158420115469703, "grad_norm": 0.537451446056366, "learning_rate": 1.9860767469501466e-05, "loss": 0.4503221809864044, "step": 1512, "token_acc": 0.8495913524914316 }, { "epoch": 0.08163815895969352, "grad_norm": 0.45713362097740173, "learning_rate": 1.986047671530639e-05, "loss": 0.43638962507247925, "step": 1513, "token_acc": 0.8503954802259887 }, { "epoch": 0.08169211676469001, "grad_norm": 0.6579298973083496, "learning_rate": 1.986018565997494e-05, "loss": 0.545390248298645, "step": 1514, "token_acc": 0.8211750490862407 }, { "epoch": 0.08174607456968651, "grad_norm": 0.5749840140342712, "learning_rate": 1.9859894303516e-05, "loss": 0.5555945038795471, "step": 1515, "token_acc": 0.8149837133550488 }, { "epoch": 0.081800032374683, "grad_norm": 0.670536994934082, "learning_rate": 1.9859602645938473e-05, "loss": 0.473885715007782, "step": 1516, "token_acc": 0.8482541783882178 }, { "epoch": 0.0818539901796795, "grad_norm": 0.513029932975769, "learning_rate": 1.9859310687251266e-05, "loss": 0.4705362319946289, "step": 1517, "token_acc": 0.8412698412698413 }, { "epoch": 0.08190794798467599, "grad_norm": 0.5058736801147461, "learning_rate": 1.985901842746329e-05, "loss": 0.5217441320419312, "step": 1518, "token_acc": 0.8263899643797429 }, { "epoch": 0.08196190578967248, "grad_norm": 0.41915199160575867, "learning_rate": 1.9858725866583476e-05, "loss": 0.4309890866279602, "step": 1519, "token_acc": 0.8530825496342738 }, { "epoch": 0.08201586359466898, "grad_norm": 0.5943317413330078, "learning_rate": 1.9858433004620754e-05, "loss": 0.4753282070159912, "step": 1520, "token_acc": 0.8355068683707181 }, { "epoch": 0.08206982139966547, "grad_norm": 0.5348940491676331, "learning_rate": 1.985813984158407e-05, "loss": 0.4574558436870575, "step": 1521, "token_acc": 0.8444528683345969 }, { "epoch": 0.08212377920466195, "grad_norm": 0.5027585029602051, "learning_rate": 1.9857846377482385e-05, "loss": 0.48213836550712585, "step": 1522, "token_acc": 0.8331480795356305 }, { "epoch": 0.08217773700965844, "grad_norm": 0.62265545129776, "learning_rate": 1.9857552612324647e-05, "loss": 0.48396581411361694, "step": 1523, "token_acc": 0.8351439903431627 }, { "epoch": 0.08223169481465493, "grad_norm": 0.661476731300354, "learning_rate": 1.9857258546119836e-05, "loss": 0.48066604137420654, "step": 1524, "token_acc": 0.8397861356932154 }, { "epoch": 0.08228565261965143, "grad_norm": 0.5627854466438293, "learning_rate": 1.9856964178876934e-05, "loss": 0.5025644898414612, "step": 1525, "token_acc": 0.836217183770883 }, { "epoch": 0.08233961042464792, "grad_norm": 0.5559806823730469, "learning_rate": 1.9856669510604926e-05, "loss": 0.3924093246459961, "step": 1526, "token_acc": 0.8660652324774463 }, { "epoch": 0.08239356822964441, "grad_norm": 0.4930817782878876, "learning_rate": 1.9856374541312814e-05, "loss": 0.5004873871803284, "step": 1527, "token_acc": 0.8333333333333334 }, { "epoch": 0.08244752603464091, "grad_norm": 0.5114337801933289, "learning_rate": 1.9856079271009605e-05, "loss": 0.42905688285827637, "step": 1528, "token_acc": 0.853868635594524 }, { "epoch": 0.0825014838396374, "grad_norm": 0.4532170295715332, "learning_rate": 1.9855783699704316e-05, "loss": 0.5839736461639404, "step": 1529, "token_acc": 0.8112056737588652 }, { "epoch": 0.0825554416446339, "grad_norm": 0.608737051486969, "learning_rate": 1.9855487827405975e-05, "loss": 0.49152565002441406, "step": 1530, "token_acc": 0.8315803667745415 }, { "epoch": 0.08260939944963039, "grad_norm": 0.4102543592453003, "learning_rate": 1.9855191654123617e-05, "loss": 0.45777687430381775, "step": 1531, "token_acc": 0.8474037874160049 }, { "epoch": 0.08266335725462688, "grad_norm": 0.49339723587036133, "learning_rate": 1.9854895179866293e-05, "loss": 0.47646746039390564, "step": 1532, "token_acc": 0.8395709455520578 }, { "epoch": 0.08271731505962338, "grad_norm": 0.6965413689613342, "learning_rate": 1.9854598404643044e-05, "loss": 0.457430899143219, "step": 1533, "token_acc": 0.8469387755102041 }, { "epoch": 0.08277127286461987, "grad_norm": 0.5391589999198914, "learning_rate": 1.9854301328462945e-05, "loss": 0.5122662782669067, "step": 1534, "token_acc": 0.831021226809293 }, { "epoch": 0.08282523066961636, "grad_norm": 0.5522968769073486, "learning_rate": 1.9854003951335065e-05, "loss": 0.44900333881378174, "step": 1535, "token_acc": 0.8467762326169406 }, { "epoch": 0.08287918847461286, "grad_norm": 0.541236162185669, "learning_rate": 1.9853706273268484e-05, "loss": 0.4456254541873932, "step": 1536, "token_acc": 0.8493878865979382 }, { "epoch": 0.08293314627960935, "grad_norm": 0.5218865275382996, "learning_rate": 1.9853408294272298e-05, "loss": 0.4785705804824829, "step": 1537, "token_acc": 0.8427233328673284 }, { "epoch": 0.08298710408460584, "grad_norm": 0.5487092733383179, "learning_rate": 1.98531100143556e-05, "loss": 0.41090500354766846, "step": 1538, "token_acc": 0.859879242140329 }, { "epoch": 0.08304106188960234, "grad_norm": 0.609553337097168, "learning_rate": 1.985281143352751e-05, "loss": 0.4912834167480469, "step": 1539, "token_acc": 0.8338403592640881 }, { "epoch": 0.08309501969459883, "grad_norm": 0.5433110594749451, "learning_rate": 1.9852512551797133e-05, "loss": 0.4198407828807831, "step": 1540, "token_acc": 0.8492935635792779 }, { "epoch": 0.08314897749959532, "grad_norm": 0.5270692110061646, "learning_rate": 1.9852213369173606e-05, "loss": 0.4739520251750946, "step": 1541, "token_acc": 0.843613404565323 }, { "epoch": 0.08320293530459182, "grad_norm": 0.5143600702285767, "learning_rate": 1.9851913885666063e-05, "loss": 0.41866910457611084, "step": 1542, "token_acc": 0.8578169235608682 }, { "epoch": 0.0832568931095883, "grad_norm": 0.5736703276634216, "learning_rate": 1.9851614101283653e-05, "loss": 0.5071444511413574, "step": 1543, "token_acc": 0.8337974242951619 }, { "epoch": 0.08331085091458479, "grad_norm": 0.5672385692596436, "learning_rate": 1.985131401603553e-05, "loss": 0.4656599164009094, "step": 1544, "token_acc": 0.8481475592304023 }, { "epoch": 0.08336480871958128, "grad_norm": 0.545076847076416, "learning_rate": 1.985101362993085e-05, "loss": 0.4166492223739624, "step": 1545, "token_acc": 0.8561204711495453 }, { "epoch": 0.08341876652457778, "grad_norm": 0.5500892996788025, "learning_rate": 1.98507129429788e-05, "loss": 0.46525731682777405, "step": 1546, "token_acc": 0.8402656413841314 }, { "epoch": 0.08347272432957427, "grad_norm": 0.4831758439540863, "learning_rate": 1.9850411955188557e-05, "loss": 0.4945463538169861, "step": 1547, "token_acc": 0.8390158172231986 }, { "epoch": 0.08352668213457076, "grad_norm": 0.3970035910606384, "learning_rate": 1.9850110666569314e-05, "loss": 0.506582498550415, "step": 1548, "token_acc": 0.8348254252461952 }, { "epoch": 0.08358063993956726, "grad_norm": 0.4715394675731659, "learning_rate": 1.984980907713027e-05, "loss": 0.45008277893066406, "step": 1549, "token_acc": 0.8474110699079797 }, { "epoch": 0.08363459774456375, "grad_norm": 0.5816324353218079, "learning_rate": 1.9849507186880638e-05, "loss": 0.5087791681289673, "step": 1550, "token_acc": 0.8341947488112467 }, { "epoch": 0.08368855554956024, "grad_norm": 0.4769565463066101, "learning_rate": 1.984920499582964e-05, "loss": 0.4797222912311554, "step": 1551, "token_acc": 0.8397419015011851 }, { "epoch": 0.08374251335455674, "grad_norm": 0.5390366315841675, "learning_rate": 1.9848902503986495e-05, "loss": 0.43757379055023193, "step": 1552, "token_acc": 0.8562749522652318 }, { "epoch": 0.08379647115955323, "grad_norm": 0.5348995327949524, "learning_rate": 1.9848599711360454e-05, "loss": 0.45013782382011414, "step": 1553, "token_acc": 0.8472830850131463 }, { "epoch": 0.08385042896454972, "grad_norm": 0.5735769271850586, "learning_rate": 1.984829661796075e-05, "loss": 0.48119592666625977, "step": 1554, "token_acc": 0.8396696790277345 }, { "epoch": 0.08390438676954622, "grad_norm": 0.47215279936790466, "learning_rate": 1.9847993223796653e-05, "loss": 0.42488837242126465, "step": 1555, "token_acc": 0.8559157026686519 }, { "epoch": 0.08395834457454271, "grad_norm": 0.5379323959350586, "learning_rate": 1.984768952887742e-05, "loss": 0.41451287269592285, "step": 1556, "token_acc": 0.8596445712803217 }, { "epoch": 0.0840123023795392, "grad_norm": 0.5221766233444214, "learning_rate": 1.984738553321233e-05, "loss": 0.4963826537132263, "step": 1557, "token_acc": 0.8380715470267709 }, { "epoch": 0.0840662601845357, "grad_norm": 0.5172806978225708, "learning_rate": 1.984708123681067e-05, "loss": 0.4768429696559906, "step": 1558, "token_acc": 0.843879270721618 }, { "epoch": 0.08412021798953219, "grad_norm": 0.5240123867988586, "learning_rate": 1.9846776639681723e-05, "loss": 0.439966082572937, "step": 1559, "token_acc": 0.8457221081451061 }, { "epoch": 0.08417417579452868, "grad_norm": 0.43410423398017883, "learning_rate": 1.9846471741834797e-05, "loss": 0.49271658062934875, "step": 1560, "token_acc": 0.8357596883329916 }, { "epoch": 0.08422813359952518, "grad_norm": 0.5334721803665161, "learning_rate": 1.9846166543279204e-05, "loss": 0.4398910403251648, "step": 1561, "token_acc": 0.8482780018616196 }, { "epoch": 0.08428209140452167, "grad_norm": 0.3942553699016571, "learning_rate": 1.9845861044024266e-05, "loss": 0.42011770606040955, "step": 1562, "token_acc": 0.8555519862940357 }, { "epoch": 0.08433604920951816, "grad_norm": 0.39832380414009094, "learning_rate": 1.984555524407931e-05, "loss": 0.43289583921432495, "step": 1563, "token_acc": 0.8617173353664348 }, { "epoch": 0.08439000701451466, "grad_norm": 0.5346757769584656, "learning_rate": 1.9845249143453674e-05, "loss": 0.5031218528747559, "step": 1564, "token_acc": 0.8348436129786612 }, { "epoch": 0.08444396481951114, "grad_norm": 0.4341256320476532, "learning_rate": 1.984494274215671e-05, "loss": 0.4241013526916504, "step": 1565, "token_acc": 0.8617419515847268 }, { "epoch": 0.08449792262450763, "grad_norm": 0.3977510333061218, "learning_rate": 1.9844636040197773e-05, "loss": 0.49776872992515564, "step": 1566, "token_acc": 0.8348920863309353 }, { "epoch": 0.08455188042950412, "grad_norm": 0.6400542855262756, "learning_rate": 1.984432903758623e-05, "loss": 0.46808359026908875, "step": 1567, "token_acc": 0.8483412322274881 }, { "epoch": 0.08460583823450062, "grad_norm": 0.5336000323295593, "learning_rate": 1.984402173433146e-05, "loss": 0.45083892345428467, "step": 1568, "token_acc": 0.8464487858316597 }, { "epoch": 0.08465979603949711, "grad_norm": 0.49685630202293396, "learning_rate": 1.9843714130442843e-05, "loss": 0.47093406319618225, "step": 1569, "token_acc": 0.8456383212480774 }, { "epoch": 0.0847137538444936, "grad_norm": 0.49635881185531616, "learning_rate": 1.9843406225929772e-05, "loss": 0.523037314414978, "step": 1570, "token_acc": 0.8271513353115727 }, { "epoch": 0.0847677116494901, "grad_norm": 0.4323063790798187, "learning_rate": 1.984309802080166e-05, "loss": 0.44968390464782715, "step": 1571, "token_acc": 0.8479014707640798 }, { "epoch": 0.08482166945448659, "grad_norm": 0.4822332262992859, "learning_rate": 1.9842789515067907e-05, "loss": 0.4766043424606323, "step": 1572, "token_acc": 0.8333870967741935 }, { "epoch": 0.08487562725948308, "grad_norm": 0.6280155181884766, "learning_rate": 1.9842480708737946e-05, "loss": 0.5187901258468628, "step": 1573, "token_acc": 0.8258473868759392 }, { "epoch": 0.08492958506447958, "grad_norm": 0.5773141384124756, "learning_rate": 1.9842171601821197e-05, "loss": 0.4274929463863373, "step": 1574, "token_acc": 0.8547317661241712 }, { "epoch": 0.08498354286947607, "grad_norm": 0.5210647583007812, "learning_rate": 1.9841862194327112e-05, "loss": 0.39703369140625, "step": 1575, "token_acc": 0.8595619363151114 }, { "epoch": 0.08503750067447256, "grad_norm": 0.3902677893638611, "learning_rate": 1.9841552486265127e-05, "loss": 0.49746108055114746, "step": 1576, "token_acc": 0.8298251125772332 }, { "epoch": 0.08509145847946906, "grad_norm": 0.5853365063667297, "learning_rate": 1.9841242477644712e-05, "loss": 0.526150107383728, "step": 1577, "token_acc": 0.8253302061473892 }, { "epoch": 0.08514541628446555, "grad_norm": 0.5579925179481506, "learning_rate": 1.9840932168475327e-05, "loss": 0.5244516134262085, "step": 1578, "token_acc": 0.8256619144602851 }, { "epoch": 0.08519937408946204, "grad_norm": 0.40361878275871277, "learning_rate": 1.9840621558766455e-05, "loss": 0.42801716923713684, "step": 1579, "token_acc": 0.8544727744165946 }, { "epoch": 0.08525333189445854, "grad_norm": 0.5255351662635803, "learning_rate": 1.9840310648527575e-05, "loss": 0.4777814745903015, "step": 1580, "token_acc": 0.838958180484226 }, { "epoch": 0.08530728969945503, "grad_norm": 0.5808383226394653, "learning_rate": 1.9839999437768186e-05, "loss": 0.4520339071750641, "step": 1581, "token_acc": 0.8449877750611247 }, { "epoch": 0.08536124750445152, "grad_norm": 0.5719937086105347, "learning_rate": 1.9839687926497794e-05, "loss": 0.42210274934768677, "step": 1582, "token_acc": 0.8534303534303534 }, { "epoch": 0.08541520530944802, "grad_norm": 0.6372723579406738, "learning_rate": 1.9839376114725906e-05, "loss": 0.5291851162910461, "step": 1583, "token_acc": 0.8193362682018287 }, { "epoch": 0.08546916311444451, "grad_norm": 0.5575287342071533, "learning_rate": 1.9839064002462053e-05, "loss": 0.5072711110115051, "step": 1584, "token_acc": 0.8341113704568189 }, { "epoch": 0.085523120919441, "grad_norm": 0.5457994937896729, "learning_rate": 1.9838751589715762e-05, "loss": 0.5212131142616272, "step": 1585, "token_acc": 0.83217196173379 }, { "epoch": 0.08557707872443748, "grad_norm": 0.5146862268447876, "learning_rate": 1.9838438876496575e-05, "loss": 0.4182601869106293, "step": 1586, "token_acc": 0.8536036036036037 }, { "epoch": 0.08563103652943398, "grad_norm": 0.5643481016159058, "learning_rate": 1.9838125862814043e-05, "loss": 0.485678493976593, "step": 1587, "token_acc": 0.8395769061260467 }, { "epoch": 0.08568499433443047, "grad_norm": 0.602131724357605, "learning_rate": 1.9837812548677722e-05, "loss": 0.4987150728702545, "step": 1588, "token_acc": 0.8287276040085029 }, { "epoch": 0.08573895213942696, "grad_norm": 0.5635526776313782, "learning_rate": 1.9837498934097183e-05, "loss": 0.4880376160144806, "step": 1589, "token_acc": 0.8349708576186511 }, { "epoch": 0.08579290994442346, "grad_norm": 0.5142425298690796, "learning_rate": 1.9837185019082005e-05, "loss": 0.438714861869812, "step": 1590, "token_acc": 0.8514795412714145 }, { "epoch": 0.08584686774941995, "grad_norm": 0.44745898246765137, "learning_rate": 1.9836870803641775e-05, "loss": 0.3723528981208801, "step": 1591, "token_acc": 0.8732193732193733 }, { "epoch": 0.08590082555441644, "grad_norm": 0.5986337661743164, "learning_rate": 1.9836556287786083e-05, "loss": 0.5163638591766357, "step": 1592, "token_acc": 0.8329348896076502 }, { "epoch": 0.08595478335941294, "grad_norm": 0.5102345943450928, "learning_rate": 1.9836241471524543e-05, "loss": 0.4551064968109131, "step": 1593, "token_acc": 0.8457696827262045 }, { "epoch": 0.08600874116440943, "grad_norm": 0.4692746698856354, "learning_rate": 1.9835926354866763e-05, "loss": 0.4407172203063965, "step": 1594, "token_acc": 0.8533152909336942 }, { "epoch": 0.08606269896940592, "grad_norm": 0.5070312023162842, "learning_rate": 1.983561093782237e-05, "loss": 0.4493437707424164, "step": 1595, "token_acc": 0.8475679193102327 }, { "epoch": 0.08611665677440242, "grad_norm": 0.5207300186157227, "learning_rate": 1.9835295220400998e-05, "loss": 0.40696436166763306, "step": 1596, "token_acc": 0.8619992417540756 }, { "epoch": 0.08617061457939891, "grad_norm": 0.5830812454223633, "learning_rate": 1.983497920261228e-05, "loss": 0.4685361981391907, "step": 1597, "token_acc": 0.8447035957240039 }, { "epoch": 0.0862245723843954, "grad_norm": 0.5658448338508606, "learning_rate": 1.983466288446588e-05, "loss": 0.39836418628692627, "step": 1598, "token_acc": 0.8658116121914219 }, { "epoch": 0.0862785301893919, "grad_norm": 0.42351657152175903, "learning_rate": 1.9834346265971446e-05, "loss": 0.42902475595474243, "step": 1599, "token_acc": 0.851023261726198 }, { "epoch": 0.08633248799438839, "grad_norm": 0.5092726349830627, "learning_rate": 1.983402934713866e-05, "loss": 0.4705023169517517, "step": 1600, "token_acc": 0.8420849916548979 }, { "epoch": 0.08638644579938488, "grad_norm": 0.47406914830207825, "learning_rate": 1.9833712127977187e-05, "loss": 0.4202837646007538, "step": 1601, "token_acc": 0.8559002609452016 }, { "epoch": 0.08644040360438138, "grad_norm": 0.6278513073921204, "learning_rate": 1.9833394608496725e-05, "loss": 0.4938923716545105, "step": 1602, "token_acc": 0.8291338582677166 }, { "epoch": 0.08649436140937787, "grad_norm": 0.6134189963340759, "learning_rate": 1.9833076788706966e-05, "loss": 0.5422148108482361, "step": 1603, "token_acc": 0.8240722937896559 }, { "epoch": 0.08654831921437436, "grad_norm": 0.5292870402336121, "learning_rate": 1.983275866861762e-05, "loss": 0.46099013090133667, "step": 1604, "token_acc": 0.8491036447821442 }, { "epoch": 0.08660227701937086, "grad_norm": 0.45864853262901306, "learning_rate": 1.98324402482384e-05, "loss": 0.5115782618522644, "step": 1605, "token_acc": 0.8322109348247917 }, { "epoch": 0.08665623482436735, "grad_norm": 0.5118425488471985, "learning_rate": 1.983212152757903e-05, "loss": 0.3869907259941101, "step": 1606, "token_acc": 0.864006791171477 }, { "epoch": 0.08671019262936384, "grad_norm": 0.6228498220443726, "learning_rate": 1.9831802506649245e-05, "loss": 0.4119702875614166, "step": 1607, "token_acc": 0.8594930334060769 }, { "epoch": 0.08676415043436032, "grad_norm": 0.46469420194625854, "learning_rate": 1.9831483185458787e-05, "loss": 0.44516342878341675, "step": 1608, "token_acc": 0.8495490078171979 }, { "epoch": 0.08681810823935682, "grad_norm": 0.47378718852996826, "learning_rate": 1.9831163564017406e-05, "loss": 0.4950803518295288, "step": 1609, "token_acc": 0.8385506249283339 }, { "epoch": 0.08687206604435331, "grad_norm": 0.5415022969245911, "learning_rate": 1.9830843642334866e-05, "loss": 0.5121690034866333, "step": 1610, "token_acc": 0.8240379725377183 }, { "epoch": 0.0869260238493498, "grad_norm": 0.47776326537132263, "learning_rate": 1.9830523420420936e-05, "loss": 0.4176393747329712, "step": 1611, "token_acc": 0.8625954198473282 }, { "epoch": 0.0869799816543463, "grad_norm": 0.5978909730911255, "learning_rate": 1.9830202898285397e-05, "loss": 0.47220468521118164, "step": 1612, "token_acc": 0.8407622203811101 }, { "epoch": 0.08703393945934279, "grad_norm": 0.5365549921989441, "learning_rate": 1.9829882075938037e-05, "loss": 0.4326772093772888, "step": 1613, "token_acc": 0.8508102955195425 }, { "epoch": 0.08708789726433928, "grad_norm": 0.5496182441711426, "learning_rate": 1.9829560953388655e-05, "loss": 0.5032153129577637, "step": 1614, "token_acc": 0.8368918445922297 }, { "epoch": 0.08714185506933578, "grad_norm": 0.41272518038749695, "learning_rate": 1.9829239530647053e-05, "loss": 0.46621495485305786, "step": 1615, "token_acc": 0.8411071849234394 }, { "epoch": 0.08719581287433227, "grad_norm": 0.45318838953971863, "learning_rate": 1.9828917807723052e-05, "loss": 0.42074349522590637, "step": 1616, "token_acc": 0.8561762391817467 }, { "epoch": 0.08724977067932876, "grad_norm": 0.5843434929847717, "learning_rate": 1.9828595784626475e-05, "loss": 0.42547672986984253, "step": 1617, "token_acc": 0.8543147208121827 }, { "epoch": 0.08730372848432526, "grad_norm": 0.4594793915748596, "learning_rate": 1.982827346136716e-05, "loss": 0.3973259925842285, "step": 1618, "token_acc": 0.8650983519404573 }, { "epoch": 0.08735768628932175, "grad_norm": 0.4379616975784302, "learning_rate": 1.9827950837954947e-05, "loss": 0.4604052007198334, "step": 1619, "token_acc": 0.8447482581832523 }, { "epoch": 0.08741164409431824, "grad_norm": 0.5992709398269653, "learning_rate": 1.982762791439969e-05, "loss": 0.4525770843029022, "step": 1620, "token_acc": 0.8386771865032735 }, { "epoch": 0.08746560189931474, "grad_norm": 0.44318461418151855, "learning_rate": 1.982730469071125e-05, "loss": 0.4686479866504669, "step": 1621, "token_acc": 0.8428371114746436 }, { "epoch": 0.08751955970431123, "grad_norm": 0.4947344958782196, "learning_rate": 1.98269811668995e-05, "loss": 0.4213520586490631, "step": 1622, "token_acc": 0.8629930743751882 }, { "epoch": 0.08757351750930772, "grad_norm": 0.5505368113517761, "learning_rate": 1.9826657342974316e-05, "loss": 0.4776010513305664, "step": 1623, "token_acc": 0.8389668920090136 }, { "epoch": 0.08762747531430422, "grad_norm": 0.4788734018802643, "learning_rate": 1.98263332189456e-05, "loss": 0.47404980659484863, "step": 1624, "token_acc": 0.8446359435493943 }, { "epoch": 0.08768143311930071, "grad_norm": 0.5789283514022827, "learning_rate": 1.9826008794823234e-05, "loss": 0.49251702427864075, "step": 1625, "token_acc": 0.8340775138417574 }, { "epoch": 0.0877353909242972, "grad_norm": 0.46704888343811035, "learning_rate": 1.9825684070617135e-05, "loss": 0.4169692397117615, "step": 1626, "token_acc": 0.8590808092620746 }, { "epoch": 0.0877893487292937, "grad_norm": 0.5727528929710388, "learning_rate": 1.9825359046337218e-05, "loss": 0.41175442934036255, "step": 1627, "token_acc": 0.8638140747176368 }, { "epoch": 0.08784330653429019, "grad_norm": 0.5975837707519531, "learning_rate": 1.9825033721993413e-05, "loss": 0.48000264167785645, "step": 1628, "token_acc": 0.8371508792670311 }, { "epoch": 0.08789726433928667, "grad_norm": 0.45759719610214233, "learning_rate": 1.982470809759565e-05, "loss": 0.44346117973327637, "step": 1629, "token_acc": 0.8492843109738984 }, { "epoch": 0.08795122214428316, "grad_norm": 0.3683100938796997, "learning_rate": 1.9824382173153874e-05, "loss": 0.369584858417511, "step": 1630, "token_acc": 0.8714793546622915 }, { "epoch": 0.08800517994927966, "grad_norm": 0.5943790078163147, "learning_rate": 1.9824055948678042e-05, "loss": 0.5235075950622559, "step": 1631, "token_acc": 0.8285804816223067 }, { "epoch": 0.08805913775427615, "grad_norm": 0.7094327211380005, "learning_rate": 1.9823729424178112e-05, "loss": 0.48043033480644226, "step": 1632, "token_acc": 0.8367385907962575 }, { "epoch": 0.08811309555927264, "grad_norm": 0.5824402570724487, "learning_rate": 1.9823402599664062e-05, "loss": 0.4519665539264679, "step": 1633, "token_acc": 0.8474218089602705 }, { "epoch": 0.08816705336426914, "grad_norm": 0.5154606103897095, "learning_rate": 1.9823075475145868e-05, "loss": 0.45721668004989624, "step": 1634, "token_acc": 0.8435879709643969 }, { "epoch": 0.08822101116926563, "grad_norm": 0.655167281627655, "learning_rate": 1.9822748050633523e-05, "loss": 0.5088308453559875, "step": 1635, "token_acc": 0.8320865828425911 }, { "epoch": 0.08827496897426212, "grad_norm": 0.5512737035751343, "learning_rate": 1.9822420326137022e-05, "loss": 0.498921275138855, "step": 1636, "token_acc": 0.8339316734221193 }, { "epoch": 0.08832892677925862, "grad_norm": 0.5017382502555847, "learning_rate": 1.9822092301666386e-05, "loss": 0.4057513475418091, "step": 1637, "token_acc": 0.8605417801347284 }, { "epoch": 0.08838288458425511, "grad_norm": 0.492877334356308, "learning_rate": 1.9821763977231618e-05, "loss": 0.4224700331687927, "step": 1638, "token_acc": 0.8552029520295203 }, { "epoch": 0.0884368423892516, "grad_norm": 0.5329962968826294, "learning_rate": 1.982143535284275e-05, "loss": 0.5134291648864746, "step": 1639, "token_acc": 0.8359060402684564 }, { "epoch": 0.0884908001942481, "grad_norm": 0.5601829886436462, "learning_rate": 1.9821106428509823e-05, "loss": 0.5364923477172852, "step": 1640, "token_acc": 0.8209820067093626 }, { "epoch": 0.08854475799924459, "grad_norm": 0.435713529586792, "learning_rate": 1.9820777204242878e-05, "loss": 0.4377875030040741, "step": 1641, "token_acc": 0.8465212545772967 }, { "epoch": 0.08859871580424108, "grad_norm": 0.5340326428413391, "learning_rate": 1.9820447680051965e-05, "loss": 0.44383642077445984, "step": 1642, "token_acc": 0.8464232789977099 }, { "epoch": 0.08865267360923758, "grad_norm": 0.43655920028686523, "learning_rate": 1.982011785594716e-05, "loss": 0.42917245626449585, "step": 1643, "token_acc": 0.8514754098360656 }, { "epoch": 0.08870663141423407, "grad_norm": 0.45578432083129883, "learning_rate": 1.9819787731938524e-05, "loss": 0.4194202423095703, "step": 1644, "token_acc": 0.8522364217252396 }, { "epoch": 0.08876058921923057, "grad_norm": 0.4290468990802765, "learning_rate": 1.9819457308036143e-05, "loss": 0.48434579372406006, "step": 1645, "token_acc": 0.842937174869948 }, { "epoch": 0.08881454702422706, "grad_norm": 0.4639045298099518, "learning_rate": 1.981912658425011e-05, "loss": 0.47964486479759216, "step": 1646, "token_acc": 0.841453779299447 }, { "epoch": 0.08886850482922355, "grad_norm": 0.5131460428237915, "learning_rate": 1.981879556059052e-05, "loss": 0.4804157614707947, "step": 1647, "token_acc": 0.8453723034098817 }, { "epoch": 0.08892246263422005, "grad_norm": 0.49175506830215454, "learning_rate": 1.9818464237067488e-05, "loss": 0.517142653465271, "step": 1648, "token_acc": 0.8321816386969397 }, { "epoch": 0.08897642043921654, "grad_norm": 0.5951141715049744, "learning_rate": 1.981813261369113e-05, "loss": 0.4272799491882324, "step": 1649, "token_acc": 0.8519598315516683 }, { "epoch": 0.08903037824421303, "grad_norm": 0.6094833612442017, "learning_rate": 1.9817800690471577e-05, "loss": 0.45591288805007935, "step": 1650, "token_acc": 0.8490685591179825 }, { "epoch": 0.08908433604920951, "grad_norm": 0.6644779443740845, "learning_rate": 1.9817468467418957e-05, "loss": 0.48423123359680176, "step": 1651, "token_acc": 0.8341737781225756 }, { "epoch": 0.089138293854206, "grad_norm": 0.5172547698020935, "learning_rate": 1.9817135944543424e-05, "loss": 0.39216625690460205, "step": 1652, "token_acc": 0.8672099712368169 }, { "epoch": 0.0891922516592025, "grad_norm": 0.4557560384273529, "learning_rate": 1.9816803121855133e-05, "loss": 0.42550933361053467, "step": 1653, "token_acc": 0.853062519834973 }, { "epoch": 0.08924620946419899, "grad_norm": 0.495259165763855, "learning_rate": 1.9816469999364245e-05, "loss": 0.45848900079727173, "step": 1654, "token_acc": 0.8394648829431438 }, { "epoch": 0.08930016726919549, "grad_norm": 0.563671886920929, "learning_rate": 1.9816136577080935e-05, "loss": 0.48958855867385864, "step": 1655, "token_acc": 0.8388278388278388 }, { "epoch": 0.08935412507419198, "grad_norm": 0.5872361063957214, "learning_rate": 1.9815802855015385e-05, "loss": 0.4833264946937561, "step": 1656, "token_acc": 0.8396076734458388 }, { "epoch": 0.08940808287918847, "grad_norm": 0.5517084002494812, "learning_rate": 1.9815468833177788e-05, "loss": 0.4386949837207794, "step": 1657, "token_acc": 0.8564303526897044 }, { "epoch": 0.08946204068418497, "grad_norm": 0.469259113073349, "learning_rate": 1.9815134511578345e-05, "loss": 0.43539804220199585, "step": 1658, "token_acc": 0.8506340312592156 }, { "epoch": 0.08951599848918146, "grad_norm": 0.5746724605560303, "learning_rate": 1.9814799890227263e-05, "loss": 0.4618903398513794, "step": 1659, "token_acc": 0.843859649122807 }, { "epoch": 0.08956995629417795, "grad_norm": 0.5595636963844299, "learning_rate": 1.9814464969134762e-05, "loss": 0.47533315420150757, "step": 1660, "token_acc": 0.8450332963374029 }, { "epoch": 0.08962391409917445, "grad_norm": 0.6254221796989441, "learning_rate": 1.9814129748311078e-05, "loss": 0.45144253969192505, "step": 1661, "token_acc": 0.848093083387201 }, { "epoch": 0.08967787190417094, "grad_norm": 0.5330312252044678, "learning_rate": 1.9813794227766438e-05, "loss": 0.4266325831413269, "step": 1662, "token_acc": 0.8516255194329015 }, { "epoch": 0.08973182970916743, "grad_norm": 0.48252272605895996, "learning_rate": 1.9813458407511093e-05, "loss": 0.5089281797409058, "step": 1663, "token_acc": 0.8300938337801609 }, { "epoch": 0.08978578751416393, "grad_norm": 0.5003010034561157, "learning_rate": 1.98131222875553e-05, "loss": 0.43366944789886475, "step": 1664, "token_acc": 0.8529275451877028 }, { "epoch": 0.08983974531916042, "grad_norm": 0.46392571926116943, "learning_rate": 1.981278586790932e-05, "loss": 0.5078524947166443, "step": 1665, "token_acc": 0.8277620396600567 }, { "epoch": 0.08989370312415691, "grad_norm": 0.4371070861816406, "learning_rate": 1.9812449148583437e-05, "loss": 0.4712001383304596, "step": 1666, "token_acc": 0.8474107893788297 }, { "epoch": 0.0899476609291534, "grad_norm": 0.6087231040000916, "learning_rate": 1.9812112129587922e-05, "loss": 0.484928160905838, "step": 1667, "token_acc": 0.8342618384401114 }, { "epoch": 0.0900016187341499, "grad_norm": 0.5349112749099731, "learning_rate": 1.981177481093307e-05, "loss": 0.4556116461753845, "step": 1668, "token_acc": 0.8410161507402423 }, { "epoch": 0.09005557653914639, "grad_norm": 0.6453852653503418, "learning_rate": 1.9811437192629196e-05, "loss": 0.47652915120124817, "step": 1669, "token_acc": 0.837890625 }, { "epoch": 0.09010953434414289, "grad_norm": 0.46865755319595337, "learning_rate": 1.9811099274686593e-05, "loss": 0.4105500280857086, "step": 1670, "token_acc": 0.8536945812807881 }, { "epoch": 0.09016349214913938, "grad_norm": 0.43981748819351196, "learning_rate": 1.981076105711559e-05, "loss": 0.4688672125339508, "step": 1671, "token_acc": 0.8414842195052602 }, { "epoch": 0.09021744995413587, "grad_norm": 0.555474579334259, "learning_rate": 1.9810422539926515e-05, "loss": 0.3935132622718811, "step": 1672, "token_acc": 0.8575534399137776 }, { "epoch": 0.09027140775913235, "grad_norm": 0.5110653042793274, "learning_rate": 1.9810083723129705e-05, "loss": 0.48102867603302, "step": 1673, "token_acc": 0.8392568659127625 }, { "epoch": 0.09032536556412885, "grad_norm": 0.5314428210258484, "learning_rate": 1.980974460673551e-05, "loss": 0.46717074513435364, "step": 1674, "token_acc": 0.8508634222919937 }, { "epoch": 0.09037932336912534, "grad_norm": 0.5107666850090027, "learning_rate": 1.980940519075428e-05, "loss": 0.4352775812149048, "step": 1675, "token_acc": 0.8518316019682887 }, { "epoch": 0.09043328117412183, "grad_norm": 0.42904821038246155, "learning_rate": 1.9809065475196387e-05, "loss": 0.3810332417488098, "step": 1676, "token_acc": 0.8665519399249061 }, { "epoch": 0.09048723897911833, "grad_norm": 0.5625536441802979, "learning_rate": 1.9808725460072204e-05, "loss": 0.5058470964431763, "step": 1677, "token_acc": 0.8340673575129534 }, { "epoch": 0.09054119678411482, "grad_norm": 0.504063069820404, "learning_rate": 1.980838514539212e-05, "loss": 0.46919023990631104, "step": 1678, "token_acc": 0.8447087444249223 }, { "epoch": 0.09059515458911131, "grad_norm": 0.4667339026927948, "learning_rate": 1.9808044531166515e-05, "loss": 0.48608875274658203, "step": 1679, "token_acc": 0.8432735193444664 }, { "epoch": 0.0906491123941078, "grad_norm": 0.48984408378601074, "learning_rate": 1.9807703617405803e-05, "loss": 0.4834122359752655, "step": 1680, "token_acc": 0.8360270755629231 }, { "epoch": 0.0907030701991043, "grad_norm": 0.5182748436927795, "learning_rate": 1.980736240412039e-05, "loss": 0.45444706082344055, "step": 1681, "token_acc": 0.8531949115367744 }, { "epoch": 0.09075702800410079, "grad_norm": 0.47770875692367554, "learning_rate": 1.9807020891320702e-05, "loss": 0.4458087682723999, "step": 1682, "token_acc": 0.8499553969669937 }, { "epoch": 0.09081098580909729, "grad_norm": 0.5361773371696472, "learning_rate": 1.9806679079017163e-05, "loss": 0.5240194797515869, "step": 1683, "token_acc": 0.8332677423586514 }, { "epoch": 0.09086494361409378, "grad_norm": 0.5108954310417175, "learning_rate": 1.9806336967220213e-05, "loss": 0.4751802980899811, "step": 1684, "token_acc": 0.841870160810006 }, { "epoch": 0.09091890141909027, "grad_norm": 0.47517088055610657, "learning_rate": 1.9805994555940304e-05, "loss": 0.49917343258857727, "step": 1685, "token_acc": 0.8329599551946234 }, { "epoch": 0.09097285922408677, "grad_norm": 0.5877160429954529, "learning_rate": 1.9805651845187888e-05, "loss": 0.4936041235923767, "step": 1686, "token_acc": 0.8299124900556881 }, { "epoch": 0.09102681702908326, "grad_norm": 0.4975355565547943, "learning_rate": 1.9805308834973434e-05, "loss": 0.408748984336853, "step": 1687, "token_acc": 0.8596740817382307 }, { "epoch": 0.09108077483407975, "grad_norm": 0.49850037693977356, "learning_rate": 1.9804965525307413e-05, "loss": 0.4803822338581085, "step": 1688, "token_acc": 0.8402435488547405 }, { "epoch": 0.09113473263907625, "grad_norm": 0.5348791480064392, "learning_rate": 1.9804621916200317e-05, "loss": 0.37576058506965637, "step": 1689, "token_acc": 0.8672490578965399 }, { "epoch": 0.09118869044407274, "grad_norm": 0.4835241734981537, "learning_rate": 1.980427800766264e-05, "loss": 0.5357222557067871, "step": 1690, "token_acc": 0.8232624113475178 }, { "epoch": 0.09124264824906923, "grad_norm": 0.44036009907722473, "learning_rate": 1.9803933799704875e-05, "loss": 0.4696725010871887, "step": 1691, "token_acc": 0.8385382059800665 }, { "epoch": 0.09129660605406573, "grad_norm": 0.3415845036506653, "learning_rate": 1.9803589292337543e-05, "loss": 0.419619619846344, "step": 1692, "token_acc": 0.8551894003737047 }, { "epoch": 0.09135056385906222, "grad_norm": 0.44520118832588196, "learning_rate": 1.9803244485571162e-05, "loss": 0.5521779656410217, "step": 1693, "token_acc": 0.8236714975845411 }, { "epoch": 0.0914045216640587, "grad_norm": 0.45955780148506165, "learning_rate": 1.980289937941626e-05, "loss": 0.44771042466163635, "step": 1694, "token_acc": 0.8533365338454153 }, { "epoch": 0.09145847946905519, "grad_norm": 0.5266250371932983, "learning_rate": 1.980255397388338e-05, "loss": 0.42290404438972473, "step": 1695, "token_acc": 0.8537142857142858 }, { "epoch": 0.09151243727405169, "grad_norm": 0.49953514337539673, "learning_rate": 1.980220826898307e-05, "loss": 0.36275458335876465, "step": 1696, "token_acc": 0.8711111111111111 }, { "epoch": 0.09156639507904818, "grad_norm": 0.4854017198085785, "learning_rate": 1.9801862264725887e-05, "loss": 0.4898940920829773, "step": 1697, "token_acc": 0.8323624595469256 }, { "epoch": 0.09162035288404467, "grad_norm": 0.6583020687103271, "learning_rate": 1.98015159611224e-05, "loss": 0.4614517092704773, "step": 1698, "token_acc": 0.8426900584795322 }, { "epoch": 0.09167431068904117, "grad_norm": 0.3827855885028839, "learning_rate": 1.980116935818318e-05, "loss": 0.4563986659049988, "step": 1699, "token_acc": 0.8431734317343174 }, { "epoch": 0.09172826849403766, "grad_norm": 0.520699679851532, "learning_rate": 1.980082245591882e-05, "loss": 0.4790731370449066, "step": 1700, "token_acc": 0.8380254026450177 }, { "epoch": 0.09178222629903415, "grad_norm": 0.4925290048122406, "learning_rate": 1.9800475254339907e-05, "loss": 0.46643829345703125, "step": 1701, "token_acc": 0.8487972508591065 }, { "epoch": 0.09183618410403065, "grad_norm": 0.7039558291435242, "learning_rate": 1.9800127753457044e-05, "loss": 0.5225690603256226, "step": 1702, "token_acc": 0.8266409266409266 }, { "epoch": 0.09189014190902714, "grad_norm": 0.48557114601135254, "learning_rate": 1.9799779953280853e-05, "loss": 0.48930448293685913, "step": 1703, "token_acc": 0.8383838383838383 }, { "epoch": 0.09194409971402363, "grad_norm": 0.3977152407169342, "learning_rate": 1.9799431853821944e-05, "loss": 0.41593754291534424, "step": 1704, "token_acc": 0.860581188997338 }, { "epoch": 0.09199805751902013, "grad_norm": 0.43740585446357727, "learning_rate": 1.9799083455090957e-05, "loss": 0.4354850649833679, "step": 1705, "token_acc": 0.8558270907527938 }, { "epoch": 0.09205201532401662, "grad_norm": 0.4876565635204315, "learning_rate": 1.9798734757098526e-05, "loss": 0.5025941729545593, "step": 1706, "token_acc": 0.8315682576526912 }, { "epoch": 0.09210597312901311, "grad_norm": 0.3742792308330536, "learning_rate": 1.9798385759855302e-05, "loss": 0.43441343307495117, "step": 1707, "token_acc": 0.8525648690661246 }, { "epoch": 0.0921599309340096, "grad_norm": 0.4179036319255829, "learning_rate": 1.9798036463371946e-05, "loss": 0.4599078893661499, "step": 1708, "token_acc": 0.8454998534154207 }, { "epoch": 0.0922138887390061, "grad_norm": 0.5857380628585815, "learning_rate": 1.979768686765912e-05, "loss": 0.4934634864330292, "step": 1709, "token_acc": 0.8332318001827597 }, { "epoch": 0.0922678465440026, "grad_norm": 0.5532853007316589, "learning_rate": 1.9797336972727506e-05, "loss": 0.5018155574798584, "step": 1710, "token_acc": 0.8361174386231334 }, { "epoch": 0.09232180434899909, "grad_norm": 0.42566871643066406, "learning_rate": 1.9796986778587788e-05, "loss": 0.43253856897354126, "step": 1711, "token_acc": 0.8543089011979905 }, { "epoch": 0.09237576215399558, "grad_norm": 0.4603496789932251, "learning_rate": 1.979663628525066e-05, "loss": 0.4595113694667816, "step": 1712, "token_acc": 0.8445416624275104 }, { "epoch": 0.09242971995899207, "grad_norm": 0.42888253927230835, "learning_rate": 1.9796285492726825e-05, "loss": 0.4130922555923462, "step": 1713, "token_acc": 0.8584514097609847 }, { "epoch": 0.09248367776398857, "grad_norm": 0.5112858414649963, "learning_rate": 1.9795934401026998e-05, "loss": 0.5263184309005737, "step": 1714, "token_acc": 0.826910684366542 }, { "epoch": 0.09253763556898506, "grad_norm": 0.4121711254119873, "learning_rate": 1.97955830101619e-05, "loss": 0.4805046021938324, "step": 1715, "token_acc": 0.8446808510638298 }, { "epoch": 0.09259159337398154, "grad_norm": 0.5470982789993286, "learning_rate": 1.9795231320142263e-05, "loss": 0.4278404116630554, "step": 1716, "token_acc": 0.8552806597499335 }, { "epoch": 0.09264555117897803, "grad_norm": 0.43052706122398376, "learning_rate": 1.979487933097883e-05, "loss": 0.4164161682128906, "step": 1717, "token_acc": 0.8581341557440246 }, { "epoch": 0.09269950898397453, "grad_norm": 0.5954557061195374, "learning_rate": 1.9794527042682345e-05, "loss": 0.4237881898880005, "step": 1718, "token_acc": 0.8546762589928057 }, { "epoch": 0.09275346678897102, "grad_norm": 0.527776837348938, "learning_rate": 1.979417445526357e-05, "loss": 0.45365241169929504, "step": 1719, "token_acc": 0.8447805941673481 }, { "epoch": 0.09280742459396751, "grad_norm": 0.5499609112739563, "learning_rate": 1.9793821568733277e-05, "loss": 0.4800596833229065, "step": 1720, "token_acc": 0.8347981770833334 }, { "epoch": 0.092861382398964, "grad_norm": 0.5364629030227661, "learning_rate": 1.9793468383102233e-05, "loss": 0.38145607709884644, "step": 1721, "token_acc": 0.8677167549484248 }, { "epoch": 0.0929153402039605, "grad_norm": 0.39170515537261963, "learning_rate": 1.9793114898381233e-05, "loss": 0.3931587338447571, "step": 1722, "token_acc": 0.8632954270505686 }, { "epoch": 0.092969298008957, "grad_norm": 0.43780380487442017, "learning_rate": 1.979276111458107e-05, "loss": 0.5127624273300171, "step": 1723, "token_acc": 0.8302858614473345 }, { "epoch": 0.09302325581395349, "grad_norm": 0.5336185097694397, "learning_rate": 1.9792407031712547e-05, "loss": 0.43792304396629333, "step": 1724, "token_acc": 0.8551905387647831 }, { "epoch": 0.09307721361894998, "grad_norm": 0.47035571932792664, "learning_rate": 1.979205264978648e-05, "loss": 0.5174652934074402, "step": 1725, "token_acc": 0.8263493127723768 }, { "epoch": 0.09313117142394647, "grad_norm": 0.4168243110179901, "learning_rate": 1.9791697968813687e-05, "loss": 0.4309561550617218, "step": 1726, "token_acc": 0.8515981735159818 }, { "epoch": 0.09318512922894297, "grad_norm": 0.3966120481491089, "learning_rate": 1.979134298880501e-05, "loss": 0.4335893988609314, "step": 1727, "token_acc": 0.85331182578889 }, { "epoch": 0.09323908703393946, "grad_norm": 0.6028324961662292, "learning_rate": 1.9790987709771274e-05, "loss": 0.4663698971271515, "step": 1728, "token_acc": 0.8430744595676541 }, { "epoch": 0.09329304483893595, "grad_norm": 0.5743938684463501, "learning_rate": 1.979063213172334e-05, "loss": 0.49261558055877686, "step": 1729, "token_acc": 0.8349285270354257 }, { "epoch": 0.09334700264393245, "grad_norm": 0.5764662027359009, "learning_rate": 1.979027625467207e-05, "loss": 0.4818084239959717, "step": 1730, "token_acc": 0.8430781331934977 }, { "epoch": 0.09340096044892894, "grad_norm": 0.46869122982025146, "learning_rate": 1.9789920078628327e-05, "loss": 0.45952484011650085, "step": 1731, "token_acc": 0.8481012658227848 }, { "epoch": 0.09345491825392543, "grad_norm": 0.6258329153060913, "learning_rate": 1.978956360360299e-05, "loss": 0.5266472101211548, "step": 1732, "token_acc": 0.8246816846229187 }, { "epoch": 0.09350887605892193, "grad_norm": 0.44657227396965027, "learning_rate": 1.9789206829606942e-05, "loss": 0.46235376596450806, "step": 1733, "token_acc": 0.8435191403626595 }, { "epoch": 0.09356283386391842, "grad_norm": 0.5822757482528687, "learning_rate": 1.9788849756651083e-05, "loss": 0.4631449282169342, "step": 1734, "token_acc": 0.8455882352941176 }, { "epoch": 0.09361679166891491, "grad_norm": 0.5612549185752869, "learning_rate": 1.9788492384746315e-05, "loss": 0.47128191590309143, "step": 1735, "token_acc": 0.8441047373997276 }, { "epoch": 0.09367074947391141, "grad_norm": 0.635086715221405, "learning_rate": 1.9788134713903557e-05, "loss": 0.40782663226127625, "step": 1736, "token_acc": 0.8629541864139021 }, { "epoch": 0.09372470727890789, "grad_norm": 0.5109940767288208, "learning_rate": 1.978777674413373e-05, "loss": 0.5001083612442017, "step": 1737, "token_acc": 0.8322356157574251 }, { "epoch": 0.09377866508390438, "grad_norm": 0.4343619644641876, "learning_rate": 1.978741847544776e-05, "loss": 0.4841606318950653, "step": 1738, "token_acc": 0.8402203856749312 }, { "epoch": 0.09383262288890087, "grad_norm": 0.5452101826667786, "learning_rate": 1.9787059907856598e-05, "loss": 0.41265445947647095, "step": 1739, "token_acc": 0.8541114058355438 }, { "epoch": 0.09388658069389737, "grad_norm": 0.5320696234703064, "learning_rate": 1.9786701041371187e-05, "loss": 0.4647488594055176, "step": 1740, "token_acc": 0.8426648945007477 }, { "epoch": 0.09394053849889386, "grad_norm": 0.4439716339111328, "learning_rate": 1.9786341876002493e-05, "loss": 0.4230702519416809, "step": 1741, "token_acc": 0.8567160124949069 }, { "epoch": 0.09399449630389035, "grad_norm": 0.5338200926780701, "learning_rate": 1.9785982411761483e-05, "loss": 0.47267478704452515, "step": 1742, "token_acc": 0.8407967432393137 }, { "epoch": 0.09404845410888685, "grad_norm": 0.6021119952201843, "learning_rate": 1.978562264865913e-05, "loss": 0.5290085077285767, "step": 1743, "token_acc": 0.8220157255182273 }, { "epoch": 0.09410241191388334, "grad_norm": 0.4508388936519623, "learning_rate": 1.9785262586706425e-05, "loss": 0.3997945785522461, "step": 1744, "token_acc": 0.862296967083513 }, { "epoch": 0.09415636971887983, "grad_norm": 0.44870659708976746, "learning_rate": 1.9784902225914366e-05, "loss": 0.4991913139820099, "step": 1745, "token_acc": 0.8293910417715148 }, { "epoch": 0.09421032752387633, "grad_norm": 0.44270992279052734, "learning_rate": 1.9784541566293955e-05, "loss": 0.4581950306892395, "step": 1746, "token_acc": 0.8486793015967766 }, { "epoch": 0.09426428532887282, "grad_norm": 0.4881482720375061, "learning_rate": 1.9784180607856208e-05, "loss": 0.46492183208465576, "step": 1747, "token_acc": 0.8467141469731304 }, { "epoch": 0.09431824313386931, "grad_norm": 0.51755291223526, "learning_rate": 1.978381935061215e-05, "loss": 0.5321599841117859, "step": 1748, "token_acc": 0.8266129032258065 }, { "epoch": 0.09437220093886581, "grad_norm": 0.5350072383880615, "learning_rate": 1.9783457794572812e-05, "loss": 0.4743967652320862, "step": 1749, "token_acc": 0.84 }, { "epoch": 0.0944261587438623, "grad_norm": 0.4629591107368469, "learning_rate": 1.9783095939749235e-05, "loss": 0.4683002233505249, "step": 1750, "token_acc": 0.8408063057242537 }, { "epoch": 0.0944801165488588, "grad_norm": 0.520727813243866, "learning_rate": 1.978273378615247e-05, "loss": 0.4071695804595947, "step": 1751, "token_acc": 0.8576807228915663 }, { "epoch": 0.09453407435385529, "grad_norm": 0.4814664125442505, "learning_rate": 1.9782371333793576e-05, "loss": 0.42443224787712097, "step": 1752, "token_acc": 0.8557250777662568 }, { "epoch": 0.09458803215885178, "grad_norm": 0.5298850536346436, "learning_rate": 1.9782008582683627e-05, "loss": 0.40597227215766907, "step": 1753, "token_acc": 0.8586712683347714 }, { "epoch": 0.09464198996384827, "grad_norm": 0.442609578371048, "learning_rate": 1.97816455328337e-05, "loss": 0.521431028842926, "step": 1754, "token_acc": 0.8286458333333333 }, { "epoch": 0.09469594776884477, "grad_norm": 0.42640507221221924, "learning_rate": 1.9781282184254878e-05, "loss": 0.4490990936756134, "step": 1755, "token_acc": 0.844866218531699 }, { "epoch": 0.09474990557384126, "grad_norm": 0.4374917149543762, "learning_rate": 1.978091853695826e-05, "loss": 0.4351016879081726, "step": 1756, "token_acc": 0.8504781222833961 }, { "epoch": 0.09480386337883775, "grad_norm": 0.42775920033454895, "learning_rate": 1.9780554590954954e-05, "loss": 0.39926087856292725, "step": 1757, "token_acc": 0.8620643431635389 }, { "epoch": 0.09485782118383425, "grad_norm": 0.428081750869751, "learning_rate": 1.978019034625607e-05, "loss": 0.4133703112602234, "step": 1758, "token_acc": 0.8594573234984194 }, { "epoch": 0.09491177898883073, "grad_norm": 0.5858696103096008, "learning_rate": 1.9779825802872737e-05, "loss": 0.4756298065185547, "step": 1759, "token_acc": 0.8368759268413247 }, { "epoch": 0.09496573679382722, "grad_norm": 0.49537909030914307, "learning_rate": 1.977946096081609e-05, "loss": 0.5916377305984497, "step": 1760, "token_acc": 0.8096120583148011 }, { "epoch": 0.09501969459882371, "grad_norm": 0.5953269004821777, "learning_rate": 1.9779095820097264e-05, "loss": 0.43143177032470703, "step": 1761, "token_acc": 0.8544789125130707 }, { "epoch": 0.09507365240382021, "grad_norm": 0.6436390280723572, "learning_rate": 1.9778730380727408e-05, "loss": 0.4559011161327362, "step": 1762, "token_acc": 0.850527875532506 }, { "epoch": 0.0951276102088167, "grad_norm": 0.5079723596572876, "learning_rate": 1.9778364642717696e-05, "loss": 0.4782032370567322, "step": 1763, "token_acc": 0.8421124454148472 }, { "epoch": 0.0951815680138132, "grad_norm": 0.44040265679359436, "learning_rate": 1.9777998606079284e-05, "loss": 0.4150766432285309, "step": 1764, "token_acc": 0.8576311487591024 }, { "epoch": 0.09523552581880969, "grad_norm": 0.6386381983757019, "learning_rate": 1.9777632270823358e-05, "loss": 0.49137377738952637, "step": 1765, "token_acc": 0.8403635847180798 }, { "epoch": 0.09528948362380618, "grad_norm": 0.5683263540267944, "learning_rate": 1.9777265636961104e-05, "loss": 0.5078154802322388, "step": 1766, "token_acc": 0.8277578103400608 }, { "epoch": 0.09534344142880267, "grad_norm": 0.5669940114021301, "learning_rate": 1.9776898704503718e-05, "loss": 0.4191756844520569, "step": 1767, "token_acc": 0.8581956797966963 }, { "epoch": 0.09539739923379917, "grad_norm": 0.5340211987495422, "learning_rate": 1.977653147346241e-05, "loss": 0.4347163140773773, "step": 1768, "token_acc": 0.8510042805400065 }, { "epoch": 0.09545135703879566, "grad_norm": 0.5939655900001526, "learning_rate": 1.9776163943848384e-05, "loss": 0.5438655018806458, "step": 1769, "token_acc": 0.8232603724273113 }, { "epoch": 0.09550531484379216, "grad_norm": 0.5441155433654785, "learning_rate": 1.977579611567288e-05, "loss": 0.4233165979385376, "step": 1770, "token_acc": 0.8566978193146417 }, { "epoch": 0.09555927264878865, "grad_norm": 0.5287241339683533, "learning_rate": 1.977542798894712e-05, "loss": 0.4928513765335083, "step": 1771, "token_acc": 0.8289621581116523 }, { "epoch": 0.09561323045378514, "grad_norm": 0.4905485510826111, "learning_rate": 1.977505956368235e-05, "loss": 0.485514760017395, "step": 1772, "token_acc": 0.8348509933774835 }, { "epoch": 0.09566718825878164, "grad_norm": 0.6652539372444153, "learning_rate": 1.9774690839889822e-05, "loss": 0.5088286399841309, "step": 1773, "token_acc": 0.8298823919165148 }, { "epoch": 0.09572114606377813, "grad_norm": 0.5201907753944397, "learning_rate": 1.97743218175808e-05, "loss": 0.48016735911369324, "step": 1774, "token_acc": 0.8455673493795784 }, { "epoch": 0.09577510386877462, "grad_norm": 0.5315130352973938, "learning_rate": 1.9773952496766547e-05, "loss": 0.44659775495529175, "step": 1775, "token_acc": 0.8469135802469135 }, { "epoch": 0.09582906167377112, "grad_norm": 0.5720604062080383, "learning_rate": 1.9773582877458348e-05, "loss": 0.45014506578445435, "step": 1776, "token_acc": 0.8499531103469834 }, { "epoch": 0.09588301947876761, "grad_norm": 0.480144739151001, "learning_rate": 1.9773212959667484e-05, "loss": 0.44158700108528137, "step": 1777, "token_acc": 0.843911612599906 }, { "epoch": 0.0959369772837641, "grad_norm": 0.6217635273933411, "learning_rate": 1.9772842743405256e-05, "loss": 0.47470569610595703, "step": 1778, "token_acc": 0.8380432664198012 }, { "epoch": 0.0959909350887606, "grad_norm": 0.4166150987148285, "learning_rate": 1.9772472228682972e-05, "loss": 0.422610342502594, "step": 1779, "token_acc": 0.8552912690499929 }, { "epoch": 0.09604489289375708, "grad_norm": 0.5525391697883606, "learning_rate": 1.9772101415511947e-05, "loss": 0.5249637365341187, "step": 1780, "token_acc": 0.8301987224982257 }, { "epoch": 0.09609885069875357, "grad_norm": 0.4702194929122925, "learning_rate": 1.9771730303903503e-05, "loss": 0.4654698967933655, "step": 1781, "token_acc": 0.846461706385301 }, { "epoch": 0.09615280850375006, "grad_norm": 0.5912903547286987, "learning_rate": 1.977135889386898e-05, "loss": 0.5408872365951538, "step": 1782, "token_acc": 0.8243534482758621 }, { "epoch": 0.09620676630874656, "grad_norm": 0.5297306180000305, "learning_rate": 1.9770987185419712e-05, "loss": 0.5071722865104675, "step": 1783, "token_acc": 0.8331658291457287 }, { "epoch": 0.09626072411374305, "grad_norm": 0.5182297825813293, "learning_rate": 1.9770615178567055e-05, "loss": 0.47452113032341003, "step": 1784, "token_acc": 0.8451957295373665 }, { "epoch": 0.09631468191873954, "grad_norm": 0.4769063889980316, "learning_rate": 1.9770242873322374e-05, "loss": 0.460341215133667, "step": 1785, "token_acc": 0.8474970896391153 }, { "epoch": 0.09636863972373604, "grad_norm": 0.46054375171661377, "learning_rate": 1.9769870269697032e-05, "loss": 0.44560888409614563, "step": 1786, "token_acc": 0.8487218396171355 }, { "epoch": 0.09642259752873253, "grad_norm": 0.5603585243225098, "learning_rate": 1.9769497367702413e-05, "loss": 0.4868243932723999, "step": 1787, "token_acc": 0.8372434017595308 }, { "epoch": 0.09647655533372902, "grad_norm": 0.6320084929466248, "learning_rate": 1.97691241673499e-05, "loss": 0.5046801567077637, "step": 1788, "token_acc": 0.8326407253494522 }, { "epoch": 0.09653051313872552, "grad_norm": 0.6668115854263306, "learning_rate": 1.9768750668650898e-05, "loss": 0.48947006464004517, "step": 1789, "token_acc": 0.8355077574047954 }, { "epoch": 0.09658447094372201, "grad_norm": 0.5130131840705872, "learning_rate": 1.9768376871616808e-05, "loss": 0.4983886480331421, "step": 1790, "token_acc": 0.8394218551461246 }, { "epoch": 0.0966384287487185, "grad_norm": 0.46895065903663635, "learning_rate": 1.9768002776259045e-05, "loss": 0.4368070363998413, "step": 1791, "token_acc": 0.8502245418133269 }, { "epoch": 0.096692386553715, "grad_norm": 0.5399997234344482, "learning_rate": 1.9767628382589038e-05, "loss": 0.5417875647544861, "step": 1792, "token_acc": 0.8201268998081747 }, { "epoch": 0.09674634435871149, "grad_norm": 0.4289410710334778, "learning_rate": 1.976725369061822e-05, "loss": 0.4240327477455139, "step": 1793, "token_acc": 0.8537571448487383 }, { "epoch": 0.09680030216370798, "grad_norm": 0.6185281276702881, "learning_rate": 1.976687870035803e-05, "loss": 0.4633875787258148, "step": 1794, "token_acc": 0.8421721758082092 }, { "epoch": 0.09685425996870448, "grad_norm": 0.5583697557449341, "learning_rate": 1.9766503411819927e-05, "loss": 0.4912823736667633, "step": 1795, "token_acc": 0.83456 }, { "epoch": 0.09690821777370097, "grad_norm": 0.4514023959636688, "learning_rate": 1.976612782501537e-05, "loss": 0.40384483337402344, "step": 1796, "token_acc": 0.8621704991587212 }, { "epoch": 0.09696217557869746, "grad_norm": 0.48929786682128906, "learning_rate": 1.976575193995582e-05, "loss": 0.4724823832511902, "step": 1797, "token_acc": 0.8402258292166549 }, { "epoch": 0.09701613338369396, "grad_norm": 0.5950814485549927, "learning_rate": 1.976537575665277e-05, "loss": 0.4036579728126526, "step": 1798, "token_acc": 0.8568681905575731 }, { "epoch": 0.09707009118869045, "grad_norm": 0.4799495339393616, "learning_rate": 1.9764999275117703e-05, "loss": 0.47138988971710205, "step": 1799, "token_acc": 0.8421393421393422 }, { "epoch": 0.09712404899368694, "grad_norm": 0.5529593229293823, "learning_rate": 1.976462249536211e-05, "loss": 0.5112348198890686, "step": 1800, "token_acc": 0.8268198276938444 }, { "epoch": 0.09717800679868344, "grad_norm": 0.45038607716560364, "learning_rate": 1.976424541739751e-05, "loss": 0.39736291766166687, "step": 1801, "token_acc": 0.8625512418615867 }, { "epoch": 0.09723196460367992, "grad_norm": 0.4631424844264984, "learning_rate": 1.976386804123541e-05, "loss": 0.4063214659690857, "step": 1802, "token_acc": 0.8587905750535508 }, { "epoch": 0.09728592240867641, "grad_norm": 0.5106984972953796, "learning_rate": 1.9763490366887338e-05, "loss": 0.47121918201446533, "step": 1803, "token_acc": 0.836029248836694 }, { "epoch": 0.0973398802136729, "grad_norm": 0.4480960965156555, "learning_rate": 1.9763112394364832e-05, "loss": 0.4383520185947418, "step": 1804, "token_acc": 0.8534694881889764 }, { "epoch": 0.0973938380186694, "grad_norm": 0.5216410160064697, "learning_rate": 1.9762734123679427e-05, "loss": 0.39533114433288574, "step": 1805, "token_acc": 0.8603994351422232 }, { "epoch": 0.09744779582366589, "grad_norm": 0.5934601426124573, "learning_rate": 1.976235555484268e-05, "loss": 0.4568607807159424, "step": 1806, "token_acc": 0.8476043871464306 }, { "epoch": 0.09750175362866238, "grad_norm": 0.3980865180492401, "learning_rate": 1.9761976687866152e-05, "loss": 0.440949022769928, "step": 1807, "token_acc": 0.8551682692307693 }, { "epoch": 0.09755571143365888, "grad_norm": 0.49639514088630676, "learning_rate": 1.9761597522761412e-05, "loss": 0.47926831245422363, "step": 1808, "token_acc": 0.8414565032587135 }, { "epoch": 0.09760966923865537, "grad_norm": 0.3767779469490051, "learning_rate": 1.9761218059540042e-05, "loss": 0.43491262197494507, "step": 1809, "token_acc": 0.8532034733630603 }, { "epoch": 0.09766362704365186, "grad_norm": 0.5460339784622192, "learning_rate": 1.976083829821363e-05, "loss": 0.5419533252716064, "step": 1810, "token_acc": 0.8206663595885153 }, { "epoch": 0.09771758484864836, "grad_norm": 0.5096850395202637, "learning_rate": 1.976045823879377e-05, "loss": 0.4747834801673889, "step": 1811, "token_acc": 0.8385021824029405 }, { "epoch": 0.09777154265364485, "grad_norm": 0.6145365834236145, "learning_rate": 1.9760077881292077e-05, "loss": 0.5294795632362366, "step": 1812, "token_acc": 0.8324615628467269 }, { "epoch": 0.09782550045864134, "grad_norm": 0.4218086302280426, "learning_rate": 1.975969722572016e-05, "loss": 0.4306858777999878, "step": 1813, "token_acc": 0.8544716625597912 }, { "epoch": 0.09787945826363784, "grad_norm": 0.5055311322212219, "learning_rate": 1.9759316272089645e-05, "loss": 0.5141192674636841, "step": 1814, "token_acc": 0.8315938439766496 }, { "epoch": 0.09793341606863433, "grad_norm": 0.4227118194103241, "learning_rate": 1.975893502041217e-05, "loss": 0.4565662741661072, "step": 1815, "token_acc": 0.8492742835876442 }, { "epoch": 0.09798737387363082, "grad_norm": 0.48664382100105286, "learning_rate": 1.9758553470699376e-05, "loss": 0.4657726287841797, "step": 1816, "token_acc": 0.8473263675476337 }, { "epoch": 0.09804133167862732, "grad_norm": 0.4963475167751312, "learning_rate": 1.9758171622962914e-05, "loss": 0.4476970434188843, "step": 1817, "token_acc": 0.8471106912297276 }, { "epoch": 0.09809528948362381, "grad_norm": 0.4512403607368469, "learning_rate": 1.9757789477214448e-05, "loss": 0.45176786184310913, "step": 1818, "token_acc": 0.8477071939419436 }, { "epoch": 0.0981492472886203, "grad_norm": 0.561143696308136, "learning_rate": 1.975740703346565e-05, "loss": 0.4949970841407776, "step": 1819, "token_acc": 0.8379446640316206 }, { "epoch": 0.0982032050936168, "grad_norm": 0.5193417072296143, "learning_rate": 1.9757024291728193e-05, "loss": 0.45866715908050537, "step": 1820, "token_acc": 0.8470865611316942 }, { "epoch": 0.09825716289861329, "grad_norm": 0.5743717551231384, "learning_rate": 1.975664125201377e-05, "loss": 0.4483504295349121, "step": 1821, "token_acc": 0.8483811529349828 }, { "epoch": 0.09831112070360978, "grad_norm": 0.4555552005767822, "learning_rate": 1.9756257914334084e-05, "loss": 0.3612304925918579, "step": 1822, "token_acc": 0.8745762711864407 }, { "epoch": 0.09836507850860628, "grad_norm": 0.494935005903244, "learning_rate": 1.975587427870083e-05, "loss": 0.4672202467918396, "step": 1823, "token_acc": 0.8395620989052472 }, { "epoch": 0.09841903631360276, "grad_norm": 0.521640419960022, "learning_rate": 1.9755490345125736e-05, "loss": 0.4252496361732483, "step": 1824, "token_acc": 0.8546637744034707 }, { "epoch": 0.09847299411859925, "grad_norm": 0.40990427136421204, "learning_rate": 1.975510611362052e-05, "loss": 0.46322083473205566, "step": 1825, "token_acc": 0.8439047394482433 }, { "epoch": 0.09852695192359574, "grad_norm": 0.5331092476844788, "learning_rate": 1.975472158419692e-05, "loss": 0.4814545512199402, "step": 1826, "token_acc": 0.8430304095623132 }, { "epoch": 0.09858090972859224, "grad_norm": 0.4635607600212097, "learning_rate": 1.975433675686668e-05, "loss": 0.3853934109210968, "step": 1827, "token_acc": 0.8637445735891331 }, { "epoch": 0.09863486753358873, "grad_norm": 0.5529592633247375, "learning_rate": 1.975395163164155e-05, "loss": 0.49850767850875854, "step": 1828, "token_acc": 0.835179742256387 }, { "epoch": 0.09868882533858522, "grad_norm": 0.3183293342590332, "learning_rate": 1.975356620853329e-05, "loss": 0.4556030035018921, "step": 1829, "token_acc": 0.8462694982146213 }, { "epoch": 0.09874278314358172, "grad_norm": 0.5456755757331848, "learning_rate": 1.9753180487553677e-05, "loss": 0.4207479953765869, "step": 1830, "token_acc": 0.8587244659285825 }, { "epoch": 0.09879674094857821, "grad_norm": 0.5032939910888672, "learning_rate": 1.9752794468714486e-05, "loss": 0.43118005990982056, "step": 1831, "token_acc": 0.8511396918375144 }, { "epoch": 0.0988506987535747, "grad_norm": 0.3710468113422394, "learning_rate": 1.9752408152027502e-05, "loss": 0.5208315849304199, "step": 1832, "token_acc": 0.8267559314070942 }, { "epoch": 0.0989046565585712, "grad_norm": 0.4575239419937134, "learning_rate": 1.9752021537504534e-05, "loss": 0.4658896327018738, "step": 1833, "token_acc": 0.8466887417218543 }, { "epoch": 0.09895861436356769, "grad_norm": 0.5585022568702698, "learning_rate": 1.9751634625157375e-05, "loss": 0.46750640869140625, "step": 1834, "token_acc": 0.8515363553392151 }, { "epoch": 0.09901257216856418, "grad_norm": 0.539825975894928, "learning_rate": 1.975124741499786e-05, "loss": 0.4734410047531128, "step": 1835, "token_acc": 0.8423675071719764 }, { "epoch": 0.09906652997356068, "grad_norm": 0.47911638021469116, "learning_rate": 1.9750859907037797e-05, "loss": 0.5094062089920044, "step": 1836, "token_acc": 0.8274249509729974 }, { "epoch": 0.09912048777855717, "grad_norm": 0.5600634217262268, "learning_rate": 1.9750472101289023e-05, "loss": 0.444576621055603, "step": 1837, "token_acc": 0.848114956736712 }, { "epoch": 0.09917444558355366, "grad_norm": 0.5572169423103333, "learning_rate": 1.975008399776339e-05, "loss": 0.44005441665649414, "step": 1838, "token_acc": 0.8495192307692307 }, { "epoch": 0.09922840338855016, "grad_norm": 0.4149746298789978, "learning_rate": 1.9749695596472746e-05, "loss": 0.40787947177886963, "step": 1839, "token_acc": 0.8586101952827796 }, { "epoch": 0.09928236119354665, "grad_norm": 0.602605938911438, "learning_rate": 1.9749306897428955e-05, "loss": 0.4593547284603119, "step": 1840, "token_acc": 0.8410943761227028 }, { "epoch": 0.09933631899854314, "grad_norm": 0.4766497015953064, "learning_rate": 1.974891790064388e-05, "loss": 0.4495401978492737, "step": 1841, "token_acc": 0.8500352030039897 }, { "epoch": 0.09939027680353964, "grad_norm": 0.39305514097213745, "learning_rate": 1.974852860612941e-05, "loss": 0.49517983198165894, "step": 1842, "token_acc": 0.8335492227979274 }, { "epoch": 0.09944423460853613, "grad_norm": 0.47354450821876526, "learning_rate": 1.9748139013897427e-05, "loss": 0.5450897216796875, "step": 1843, "token_acc": 0.8204739115209525 }, { "epoch": 0.09949819241353262, "grad_norm": 0.36845117807388306, "learning_rate": 1.9747749123959834e-05, "loss": 0.4486767053604126, "step": 1844, "token_acc": 0.8528360343183985 }, { "epoch": 0.0995521502185291, "grad_norm": 0.49457085132598877, "learning_rate": 1.9747358936328538e-05, "loss": 0.4366271495819092, "step": 1845, "token_acc": 0.8484502213969433 }, { "epoch": 0.0996061080235256, "grad_norm": 0.4706810712814331, "learning_rate": 1.974696845101545e-05, "loss": 0.4580274820327759, "step": 1846, "token_acc": 0.8395935609087796 }, { "epoch": 0.09966006582852209, "grad_norm": 0.4441705644130707, "learning_rate": 1.9746577668032502e-05, "loss": 0.44608819484710693, "step": 1847, "token_acc": 0.8457697246397489 }, { "epoch": 0.09971402363351858, "grad_norm": 0.5256552696228027, "learning_rate": 1.9746186587391625e-05, "loss": 0.4074172377586365, "step": 1848, "token_acc": 0.8600704048277596 }, { "epoch": 0.09976798143851508, "grad_norm": 0.4488010108470917, "learning_rate": 1.974579520910476e-05, "loss": 0.4359547793865204, "step": 1849, "token_acc": 0.8482451804251112 }, { "epoch": 0.09982193924351157, "grad_norm": 0.5950372219085693, "learning_rate": 1.9745403533183866e-05, "loss": 0.4807736277580261, "step": 1850, "token_acc": 0.8361944489010108 }, { "epoch": 0.09987589704850806, "grad_norm": 0.5476689338684082, "learning_rate": 1.9745011559640897e-05, "loss": 0.4794256091117859, "step": 1851, "token_acc": 0.8421125206839493 }, { "epoch": 0.09992985485350456, "grad_norm": 0.411852091550827, "learning_rate": 1.974461928848783e-05, "loss": 0.42121216654777527, "step": 1852, "token_acc": 0.8556495769039323 }, { "epoch": 0.09998381265850105, "grad_norm": 0.5825279355049133, "learning_rate": 1.9744226719736646e-05, "loss": 0.43762433528900146, "step": 1853, "token_acc": 0.849079754601227 }, { "epoch": 0.10003777046349754, "grad_norm": 0.5172057747840881, "learning_rate": 1.9743833853399324e-05, "loss": 0.44859516620635986, "step": 1854, "token_acc": 0.8550945698596706 }, { "epoch": 0.10009172826849404, "grad_norm": 0.48102784156799316, "learning_rate": 1.9743440689487873e-05, "loss": 0.45130467414855957, "step": 1855, "token_acc": 0.848585690515807 }, { "epoch": 0.10014568607349053, "grad_norm": 0.3881591558456421, "learning_rate": 1.9743047228014296e-05, "loss": 0.45041728019714355, "step": 1856, "token_acc": 0.8437889960294952 }, { "epoch": 0.10019964387848702, "grad_norm": 0.5432842373847961, "learning_rate": 1.9742653468990608e-05, "loss": 0.43112754821777344, "step": 1857, "token_acc": 0.8455076698319941 }, { "epoch": 0.10025360168348352, "grad_norm": 0.4443325102329254, "learning_rate": 1.9742259412428836e-05, "loss": 0.4232625663280487, "step": 1858, "token_acc": 0.8524590163934426 }, { "epoch": 0.10030755948848001, "grad_norm": 0.41483399271965027, "learning_rate": 1.974186505834101e-05, "loss": 0.41350799798965454, "step": 1859, "token_acc": 0.8571637426900585 }, { "epoch": 0.1003615172934765, "grad_norm": 0.5267612934112549, "learning_rate": 1.9741470406739182e-05, "loss": 0.4993845224380493, "step": 1860, "token_acc": 0.8343812709030101 }, { "epoch": 0.100415475098473, "grad_norm": 0.5002760887145996, "learning_rate": 1.9741075457635396e-05, "loss": 0.39842647314071655, "step": 1861, "token_acc": 0.8658138268916712 }, { "epoch": 0.10046943290346949, "grad_norm": 0.4947412610054016, "learning_rate": 1.974068021104172e-05, "loss": 0.44506704807281494, "step": 1862, "token_acc": 0.8496488462089723 }, { "epoch": 0.10052339070846598, "grad_norm": 0.6268656849861145, "learning_rate": 1.974028466697022e-05, "loss": 0.41527706384658813, "step": 1863, "token_acc": 0.8549963794351919 }, { "epoch": 0.10057734851346248, "grad_norm": 0.41071411967277527, "learning_rate": 1.9739888825432978e-05, "loss": 0.41549503803253174, "step": 1864, "token_acc": 0.8566662832163809 }, { "epoch": 0.10063130631845897, "grad_norm": 0.4977095127105713, "learning_rate": 1.9739492686442085e-05, "loss": 0.3853702247142792, "step": 1865, "token_acc": 0.867982908501547 }, { "epoch": 0.10068526412345546, "grad_norm": 0.48481473326683044, "learning_rate": 1.9739096250009636e-05, "loss": 0.41526320576667786, "step": 1866, "token_acc": 0.856067187952505 }, { "epoch": 0.10073922192845194, "grad_norm": 0.49509984254837036, "learning_rate": 1.9738699516147734e-05, "loss": 0.43406298756599426, "step": 1867, "token_acc": 0.8529316488500162 }, { "epoch": 0.10079317973344844, "grad_norm": 0.4284609854221344, "learning_rate": 1.9738302484868506e-05, "loss": 0.3496988117694855, "step": 1868, "token_acc": 0.8782760629004077 }, { "epoch": 0.10084713753844493, "grad_norm": 0.5025672912597656, "learning_rate": 1.973790515618407e-05, "loss": 0.4653211236000061, "step": 1869, "token_acc": 0.8444258406585741 }, { "epoch": 0.10090109534344142, "grad_norm": 0.48262640833854675, "learning_rate": 1.9737507530106562e-05, "loss": 0.4498918354511261, "step": 1870, "token_acc": 0.8484779645615629 }, { "epoch": 0.10095505314843792, "grad_norm": 0.42900851368904114, "learning_rate": 1.9737109606648123e-05, "loss": 0.49015942215919495, "step": 1871, "token_acc": 0.833431865208395 }, { "epoch": 0.10100901095343441, "grad_norm": 0.47435861825942993, "learning_rate": 1.973671138582091e-05, "loss": 0.5428673028945923, "step": 1872, "token_acc": 0.8250638698399893 }, { "epoch": 0.1010629687584309, "grad_norm": 0.5164003372192383, "learning_rate": 1.9736312867637077e-05, "loss": 0.4995538592338562, "step": 1873, "token_acc": 0.8351394003156234 }, { "epoch": 0.1011169265634274, "grad_norm": 0.42400696873664856, "learning_rate": 1.9735914052108803e-05, "loss": 0.4703226387500763, "step": 1874, "token_acc": 0.8399218397715317 }, { "epoch": 0.10117088436842389, "grad_norm": 0.6101990342140198, "learning_rate": 1.9735514939248265e-05, "loss": 0.4659045338630676, "step": 1875, "token_acc": 0.8406327816024608 }, { "epoch": 0.10122484217342038, "grad_norm": 0.4649800956249237, "learning_rate": 1.973511552906765e-05, "loss": 0.42429280281066895, "step": 1876, "token_acc": 0.8534993168361925 }, { "epoch": 0.10127879997841688, "grad_norm": 0.4770791232585907, "learning_rate": 1.9734715821579157e-05, "loss": 0.4495013356208801, "step": 1877, "token_acc": 0.8471106219042378 }, { "epoch": 0.10133275778341337, "grad_norm": 0.4254775941371918, "learning_rate": 1.9734315816794992e-05, "loss": 0.5297587513923645, "step": 1878, "token_acc": 0.8197916666666667 }, { "epoch": 0.10138671558840986, "grad_norm": 0.5506384372711182, "learning_rate": 1.9733915514727372e-05, "loss": 0.4639784097671509, "step": 1879, "token_acc": 0.8456042249145698 }, { "epoch": 0.10144067339340636, "grad_norm": 0.3973822295665741, "learning_rate": 1.9733514915388525e-05, "loss": 0.3946155309677124, "step": 1880, "token_acc": 0.8685359079486841 }, { "epoch": 0.10149463119840285, "grad_norm": 0.40548279881477356, "learning_rate": 1.973311401879068e-05, "loss": 0.4400615692138672, "step": 1881, "token_acc": 0.8505509641873278 }, { "epoch": 0.10154858900339934, "grad_norm": 0.5126818418502808, "learning_rate": 1.9732712824946083e-05, "loss": 0.501245379447937, "step": 1882, "token_acc": 0.8298701298701299 }, { "epoch": 0.10160254680839584, "grad_norm": 0.42762288451194763, "learning_rate": 1.9732311333866985e-05, "loss": 0.46440762281417847, "step": 1883, "token_acc": 0.8479133714563816 }, { "epoch": 0.10165650461339233, "grad_norm": 0.5663173794746399, "learning_rate": 1.973190954556565e-05, "loss": 0.4818880259990692, "step": 1884, "token_acc": 0.8301120675653727 }, { "epoch": 0.10171046241838883, "grad_norm": 0.5115310549736023, "learning_rate": 1.9731507460054346e-05, "loss": 0.41715729236602783, "step": 1885, "token_acc": 0.8576751117734724 }, { "epoch": 0.10176442022338532, "grad_norm": 0.5118408799171448, "learning_rate": 1.9731105077345355e-05, "loss": 0.4492909610271454, "step": 1886, "token_acc": 0.8493381780430833 }, { "epoch": 0.10181837802838181, "grad_norm": 0.594482421875, "learning_rate": 1.9730702397450962e-05, "loss": 0.4825824499130249, "step": 1887, "token_acc": 0.8378195590423373 }, { "epoch": 0.10187233583337829, "grad_norm": 0.5492811799049377, "learning_rate": 1.973029942038347e-05, "loss": 0.4304918050765991, "step": 1888, "token_acc": 0.8555008210180624 }, { "epoch": 0.10192629363837478, "grad_norm": 0.5138626098632812, "learning_rate": 1.9729896146155177e-05, "loss": 0.49231624603271484, "step": 1889, "token_acc": 0.8348917322834646 }, { "epoch": 0.10198025144337128, "grad_norm": 0.5925474166870117, "learning_rate": 1.9729492574778407e-05, "loss": 0.4465227723121643, "step": 1890, "token_acc": 0.852112676056338 }, { "epoch": 0.10203420924836777, "grad_norm": 0.6150429844856262, "learning_rate": 1.9729088706265488e-05, "loss": 0.494942843914032, "step": 1891, "token_acc": 0.8340850075167419 }, { "epoch": 0.10208816705336426, "grad_norm": 0.48077017068862915, "learning_rate": 1.9728684540628742e-05, "loss": 0.49042436480522156, "step": 1892, "token_acc": 0.8325770614927241 }, { "epoch": 0.10214212485836076, "grad_norm": 0.4668690860271454, "learning_rate": 1.972828007788052e-05, "loss": 0.4734666347503662, "step": 1893, "token_acc": 0.8501167921100441 }, { "epoch": 0.10219608266335725, "grad_norm": 0.432522714138031, "learning_rate": 1.9727875318033173e-05, "loss": 0.4394751489162445, "step": 1894, "token_acc": 0.8515035103987283 }, { "epoch": 0.10225004046835375, "grad_norm": 0.45868682861328125, "learning_rate": 1.9727470261099063e-05, "loss": 0.4481887221336365, "step": 1895, "token_acc": 0.8479686735193344 }, { "epoch": 0.10230399827335024, "grad_norm": 0.6716130971908569, "learning_rate": 1.972706490709056e-05, "loss": 0.43495678901672363, "step": 1896, "token_acc": 0.8512043512043512 }, { "epoch": 0.10235795607834673, "grad_norm": 0.5042857527732849, "learning_rate": 1.972665925602004e-05, "loss": 0.4674733281135559, "step": 1897, "token_acc": 0.8436885865457294 }, { "epoch": 0.10241191388334323, "grad_norm": 0.4635791778564453, "learning_rate": 1.97262533078999e-05, "loss": 0.3902297914028168, "step": 1898, "token_acc": 0.8655815911323137 }, { "epoch": 0.10246587168833972, "grad_norm": 0.4367247223854065, "learning_rate": 1.9725847062742528e-05, "loss": 0.46866345405578613, "step": 1899, "token_acc": 0.8461863214185196 }, { "epoch": 0.10251982949333621, "grad_norm": 0.4510767161846161, "learning_rate": 1.9725440520560336e-05, "loss": 0.4631490111351013, "step": 1900, "token_acc": 0.8538318192406242 }, { "epoch": 0.1025737872983327, "grad_norm": 0.5667787790298462, "learning_rate": 1.9725033681365736e-05, "loss": 0.5261602997779846, "step": 1901, "token_acc": 0.8275986525505293 }, { "epoch": 0.1026277451033292, "grad_norm": 0.5850670337677002, "learning_rate": 1.9724626545171156e-05, "loss": 0.4428121745586395, "step": 1902, "token_acc": 0.8496346622227919 }, { "epoch": 0.10268170290832569, "grad_norm": 0.6466947197914124, "learning_rate": 1.972421911198903e-05, "loss": 0.4830344319343567, "step": 1903, "token_acc": 0.8386091520419878 }, { "epoch": 0.10273566071332219, "grad_norm": 0.4624095857143402, "learning_rate": 1.97238113818318e-05, "loss": 0.49145156145095825, "step": 1904, "token_acc": 0.8295958279009127 }, { "epoch": 0.10278961851831868, "grad_norm": 0.4282388687133789, "learning_rate": 1.9723403354711913e-05, "loss": 0.4232492446899414, "step": 1905, "token_acc": 0.8588886030357602 }, { "epoch": 0.10284357632331517, "grad_norm": 0.5251393914222717, "learning_rate": 1.972299503064184e-05, "loss": 0.46170344948768616, "step": 1906, "token_acc": 0.8463379607467688 }, { "epoch": 0.10289753412831167, "grad_norm": 0.517961323261261, "learning_rate": 1.9722586409634042e-05, "loss": 0.5254825353622437, "step": 1907, "token_acc": 0.8214571657325861 }, { "epoch": 0.10295149193330816, "grad_norm": 0.6364073157310486, "learning_rate": 1.9722177491701006e-05, "loss": 0.44407057762145996, "step": 1908, "token_acc": 0.8493711611582334 }, { "epoch": 0.10300544973830465, "grad_norm": 0.4949797987937927, "learning_rate": 1.9721768276855216e-05, "loss": 0.48634618520736694, "step": 1909, "token_acc": 0.8425258539642745 }, { "epoch": 0.10305940754330113, "grad_norm": 0.4828052818775177, "learning_rate": 1.9721358765109166e-05, "loss": 0.4680226743221283, "step": 1910, "token_acc": 0.8419670217423026 }, { "epoch": 0.10311336534829763, "grad_norm": 0.43650689721107483, "learning_rate": 1.9720948956475368e-05, "loss": 0.4884711503982544, "step": 1911, "token_acc": 0.8358952613166543 }, { "epoch": 0.10316732315329412, "grad_norm": 0.4467044174671173, "learning_rate": 1.9720538850966337e-05, "loss": 0.4712159037590027, "step": 1912, "token_acc": 0.8406975374101567 }, { "epoch": 0.10322128095829061, "grad_norm": 0.4689924716949463, "learning_rate": 1.9720128448594593e-05, "loss": 0.48152419924736023, "step": 1913, "token_acc": 0.8377019201462969 }, { "epoch": 0.1032752387632871, "grad_norm": 0.6404796242713928, "learning_rate": 1.9719717749372674e-05, "loss": 0.4298371970653534, "step": 1914, "token_acc": 0.8535008976660682 }, { "epoch": 0.1033291965682836, "grad_norm": 0.5481269359588623, "learning_rate": 1.971930675331312e-05, "loss": 0.4392273426055908, "step": 1915, "token_acc": 0.8469072856567825 }, { "epoch": 0.10338315437328009, "grad_norm": 0.5363544821739197, "learning_rate": 1.9718895460428487e-05, "loss": 0.4316544532775879, "step": 1916, "token_acc": 0.8545046599930963 }, { "epoch": 0.10343711217827659, "grad_norm": 0.35058289766311646, "learning_rate": 1.9718483870731328e-05, "loss": 0.42798641324043274, "step": 1917, "token_acc": 0.8575518969219756 }, { "epoch": 0.10349106998327308, "grad_norm": 0.5521703958511353, "learning_rate": 1.971807198423422e-05, "loss": 0.5457647442817688, "step": 1918, "token_acc": 0.8191414496833216 }, { "epoch": 0.10354502778826957, "grad_norm": 0.4151766300201416, "learning_rate": 1.971765980094974e-05, "loss": 0.4508500099182129, "step": 1919, "token_acc": 0.8512302659650121 }, { "epoch": 0.10359898559326607, "grad_norm": 0.43785956501960754, "learning_rate": 1.9717247320890477e-05, "loss": 0.4078485667705536, "step": 1920, "token_acc": 0.8608764940239044 }, { "epoch": 0.10365294339826256, "grad_norm": 0.42547154426574707, "learning_rate": 1.9716834544069022e-05, "loss": 0.4886784851551056, "step": 1921, "token_acc": 0.8334668803418803 }, { "epoch": 0.10370690120325905, "grad_norm": 0.4183429479598999, "learning_rate": 1.9716421470497987e-05, "loss": 0.38148927688598633, "step": 1922, "token_acc": 0.8663273506563086 }, { "epoch": 0.10376085900825555, "grad_norm": 0.48417916893959045, "learning_rate": 1.971600810018999e-05, "loss": 0.4401624798774719, "step": 1923, "token_acc": 0.854228691145661 }, { "epoch": 0.10381481681325204, "grad_norm": 0.4388926923274994, "learning_rate": 1.9715594433157645e-05, "loss": 0.47943851351737976, "step": 1924, "token_acc": 0.8434331240946402 }, { "epoch": 0.10386877461824853, "grad_norm": 0.44222283363342285, "learning_rate": 1.9715180469413593e-05, "loss": 0.42481786012649536, "step": 1925, "token_acc": 0.8561285500747384 }, { "epoch": 0.10392273242324503, "grad_norm": 0.3182319104671478, "learning_rate": 1.9714766208970472e-05, "loss": 0.4179249703884125, "step": 1926, "token_acc": 0.8538845331432644 }, { "epoch": 0.10397669022824152, "grad_norm": 0.5365522503852844, "learning_rate": 1.971435165184094e-05, "loss": 0.4292113780975342, "step": 1927, "token_acc": 0.8550838163872787 }, { "epoch": 0.10403064803323801, "grad_norm": 0.4815039336681366, "learning_rate": 1.9713936798037654e-05, "loss": 0.524690568447113, "step": 1928, "token_acc": 0.8306094007348284 }, { "epoch": 0.1040846058382345, "grad_norm": 0.4372444748878479, "learning_rate": 1.971352164757328e-05, "loss": 0.447828471660614, "step": 1929, "token_acc": 0.8519383465670247 }, { "epoch": 0.104138563643231, "grad_norm": 0.4529726803302765, "learning_rate": 1.97131062004605e-05, "loss": 0.45663711428642273, "step": 1930, "token_acc": 0.8440932226091615 }, { "epoch": 0.10419252144822748, "grad_norm": 0.5225563049316406, "learning_rate": 1.9712690456712e-05, "loss": 0.4765867590904236, "step": 1931, "token_acc": 0.8386668437126543 }, { "epoch": 0.10424647925322397, "grad_norm": 0.3874545991420746, "learning_rate": 1.971227441634048e-05, "loss": 0.4243347644805908, "step": 1932, "token_acc": 0.8538435412657082 }, { "epoch": 0.10430043705822047, "grad_norm": 0.5910754203796387, "learning_rate": 1.9711858079358644e-05, "loss": 0.39786937832832336, "step": 1933, "token_acc": 0.8628636191915376 }, { "epoch": 0.10435439486321696, "grad_norm": 0.545742392539978, "learning_rate": 1.9711441445779204e-05, "loss": 0.4870492219924927, "step": 1934, "token_acc": 0.8348314606741573 }, { "epoch": 0.10440835266821345, "grad_norm": 0.5457381010055542, "learning_rate": 1.9711024515614888e-05, "loss": 0.4797263443470001, "step": 1935, "token_acc": 0.8350738665308202 }, { "epoch": 0.10446231047320995, "grad_norm": 0.4307646155357361, "learning_rate": 1.9710607288878428e-05, "loss": 0.5015957951545715, "step": 1936, "token_acc": 0.8344038304332594 }, { "epoch": 0.10451626827820644, "grad_norm": 0.48842230439186096, "learning_rate": 1.9710189765582564e-05, "loss": 0.37343621253967285, "step": 1937, "token_acc": 0.8704230696262569 }, { "epoch": 0.10457022608320293, "grad_norm": 0.5304726958274841, "learning_rate": 1.9709771945740048e-05, "loss": 0.44203639030456543, "step": 1938, "token_acc": 0.8488982161594963 }, { "epoch": 0.10462418388819943, "grad_norm": 0.36152321100234985, "learning_rate": 1.970935382936364e-05, "loss": 0.4264396131038666, "step": 1939, "token_acc": 0.8538739462063428 }, { "epoch": 0.10467814169319592, "grad_norm": 0.480132520198822, "learning_rate": 1.970893541646611e-05, "loss": 0.44817352294921875, "step": 1940, "token_acc": 0.8411345049591091 }, { "epoch": 0.10473209949819241, "grad_norm": 0.5359903573989868, "learning_rate": 1.9708516707060238e-05, "loss": 0.422322154045105, "step": 1941, "token_acc": 0.855640243902439 }, { "epoch": 0.1047860573031889, "grad_norm": 0.4125470817089081, "learning_rate": 1.9708097701158807e-05, "loss": 0.5076026916503906, "step": 1942, "token_acc": 0.8307260328920979 }, { "epoch": 0.1048400151081854, "grad_norm": 0.6261059641838074, "learning_rate": 1.9707678398774615e-05, "loss": 0.47877755761146545, "step": 1943, "token_acc": 0.8421931956257594 }, { "epoch": 0.1048939729131819, "grad_norm": 0.4891371428966522, "learning_rate": 1.970725879992047e-05, "loss": 0.4989279806613922, "step": 1944, "token_acc": 0.8331172199170125 }, { "epoch": 0.10494793071817839, "grad_norm": 0.5849359631538391, "learning_rate": 1.970683890460918e-05, "loss": 0.4809255301952362, "step": 1945, "token_acc": 0.8380750605326877 }, { "epoch": 0.10500188852317488, "grad_norm": 0.7250269055366516, "learning_rate": 1.9706418712853575e-05, "loss": 0.42804551124572754, "step": 1946, "token_acc": 0.853285498489426 }, { "epoch": 0.10505584632817137, "grad_norm": 0.5338935852050781, "learning_rate": 1.9705998224666486e-05, "loss": 0.46665099263191223, "step": 1947, "token_acc": 0.8441597139451729 }, { "epoch": 0.10510980413316787, "grad_norm": 0.45833250880241394, "learning_rate": 1.9705577440060752e-05, "loss": 0.4200945496559143, "step": 1948, "token_acc": 0.8565135305223411 }, { "epoch": 0.10516376193816436, "grad_norm": 0.44313231110572815, "learning_rate": 1.9705156359049228e-05, "loss": 0.4096725583076477, "step": 1949, "token_acc": 0.8605577689243028 }, { "epoch": 0.10521771974316085, "grad_norm": 0.5561500787734985, "learning_rate": 1.970473498164477e-05, "loss": 0.5302689671516418, "step": 1950, "token_acc": 0.8234688625836336 }, { "epoch": 0.10527167754815735, "grad_norm": 0.4649673402309418, "learning_rate": 1.9704313307860246e-05, "loss": 0.4423384666442871, "step": 1951, "token_acc": 0.8396874579011181 }, { "epoch": 0.10532563535315384, "grad_norm": 0.4428510367870331, "learning_rate": 1.970389133770854e-05, "loss": 0.43453431129455566, "step": 1952, "token_acc": 0.8537655374116501 }, { "epoch": 0.10537959315815032, "grad_norm": 0.501560628414154, "learning_rate": 1.970346907120253e-05, "loss": 0.39884376525878906, "step": 1953, "token_acc": 0.8631393986723936 }, { "epoch": 0.10543355096314681, "grad_norm": 0.4880341589450836, "learning_rate": 1.970304650835512e-05, "loss": 0.4671708941459656, "step": 1954, "token_acc": 0.8426776673542097 }, { "epoch": 0.1054875087681433, "grad_norm": 0.5082880258560181, "learning_rate": 1.970262364917921e-05, "loss": 0.5107961893081665, "step": 1955, "token_acc": 0.828700108712533 }, { "epoch": 0.1055414665731398, "grad_norm": 0.47398000955581665, "learning_rate": 1.9702200493687718e-05, "loss": 0.4078805446624756, "step": 1956, "token_acc": 0.8610792192881745 }, { "epoch": 0.1055954243781363, "grad_norm": 0.5094225406646729, "learning_rate": 1.970177704189356e-05, "loss": 0.45592600107192993, "step": 1957, "token_acc": 0.8475666822648573 }, { "epoch": 0.10564938218313279, "grad_norm": 0.4211406111717224, "learning_rate": 1.9701353293809674e-05, "loss": 0.38556909561157227, "step": 1958, "token_acc": 0.8666242240967691 }, { "epoch": 0.10570333998812928, "grad_norm": 0.6145687103271484, "learning_rate": 1.9700929249449003e-05, "loss": 0.47931912541389465, "step": 1959, "token_acc": 0.8377902885292048 }, { "epoch": 0.10575729779312577, "grad_norm": 0.5674290657043457, "learning_rate": 1.9700504908824492e-05, "loss": 0.4701877236366272, "step": 1960, "token_acc": 0.8431455897980872 }, { "epoch": 0.10581125559812227, "grad_norm": 0.46945875883102417, "learning_rate": 1.97000802719491e-05, "loss": 0.4413835406303406, "step": 1961, "token_acc": 0.8498180615282832 }, { "epoch": 0.10586521340311876, "grad_norm": 0.6100096702575684, "learning_rate": 1.9699655338835802e-05, "loss": 0.46650201082229614, "step": 1962, "token_acc": 0.8413324913167035 }, { "epoch": 0.10591917120811525, "grad_norm": 0.5032293796539307, "learning_rate": 1.969923010949757e-05, "loss": 0.5290157794952393, "step": 1963, "token_acc": 0.8271641791044776 }, { "epoch": 0.10597312901311175, "grad_norm": 0.48286569118499756, "learning_rate": 1.969880458394739e-05, "loss": 0.4344498813152313, "step": 1964, "token_acc": 0.8559683794466403 }, { "epoch": 0.10602708681810824, "grad_norm": 0.534132182598114, "learning_rate": 1.9698378762198258e-05, "loss": 0.45516473054885864, "step": 1965, "token_acc": 0.843140339836375 }, { "epoch": 0.10608104462310473, "grad_norm": 0.49450594186782837, "learning_rate": 1.969795264426318e-05, "loss": 0.41578757762908936, "step": 1966, "token_acc": 0.8542335225870156 }, { "epoch": 0.10613500242810123, "grad_norm": 0.5018337965011597, "learning_rate": 1.969752623015517e-05, "loss": 0.4980540871620178, "step": 1967, "token_acc": 0.8379506641366223 }, { "epoch": 0.10618896023309772, "grad_norm": 0.5893809795379639, "learning_rate": 1.9697099519887243e-05, "loss": 0.46095484495162964, "step": 1968, "token_acc": 0.847971145175834 }, { "epoch": 0.10624291803809421, "grad_norm": 0.5648483037948608, "learning_rate": 1.9696672513472444e-05, "loss": 0.4583015441894531, "step": 1969, "token_acc": 0.8438626744003762 }, { "epoch": 0.10629687584309071, "grad_norm": 0.4618178904056549, "learning_rate": 1.9696245210923807e-05, "loss": 0.5094363689422607, "step": 1970, "token_acc": 0.8342636324602833 }, { "epoch": 0.1063508336480872, "grad_norm": 0.5214613676071167, "learning_rate": 1.9695817612254376e-05, "loss": 0.3842237889766693, "step": 1971, "token_acc": 0.8723276121650105 }, { "epoch": 0.1064047914530837, "grad_norm": 0.48790961503982544, "learning_rate": 1.969538971747722e-05, "loss": 0.4183116555213928, "step": 1972, "token_acc": 0.8498655276063914 }, { "epoch": 0.10645874925808019, "grad_norm": 0.4943569302558899, "learning_rate": 1.9694961526605398e-05, "loss": 0.4638531804084778, "step": 1973, "token_acc": 0.8431879932720922 }, { "epoch": 0.10651270706307668, "grad_norm": 0.5200337767601013, "learning_rate": 1.9694533039651994e-05, "loss": 0.4929519593715668, "step": 1974, "token_acc": 0.8370735503956415 }, { "epoch": 0.10656666486807316, "grad_norm": 0.5969512462615967, "learning_rate": 1.9694104256630088e-05, "loss": 0.46528029441833496, "step": 1975, "token_acc": 0.8428635080944601 }, { "epoch": 0.10662062267306965, "grad_norm": 0.5890181660652161, "learning_rate": 1.969367517755278e-05, "loss": 0.4663447141647339, "step": 1976, "token_acc": 0.8417254901960785 }, { "epoch": 0.10667458047806615, "grad_norm": 0.47751495242118835, "learning_rate": 1.969324580243317e-05, "loss": 0.4256123900413513, "step": 1977, "token_acc": 0.8549364613880743 }, { "epoch": 0.10672853828306264, "grad_norm": 0.4421929717063904, "learning_rate": 1.969281613128437e-05, "loss": 0.3905467391014099, "step": 1978, "token_acc": 0.8679421371908539 }, { "epoch": 0.10678249608805913, "grad_norm": 0.483566552400589, "learning_rate": 1.9692386164119508e-05, "loss": 0.4795796275138855, "step": 1979, "token_acc": 0.8414512212541065 }, { "epoch": 0.10683645389305563, "grad_norm": 0.42653438448905945, "learning_rate": 1.969195590095171e-05, "loss": 0.4823022484779358, "step": 1980, "token_acc": 0.8397343343921455 }, { "epoch": 0.10689041169805212, "grad_norm": 0.491212397813797, "learning_rate": 1.969152534179412e-05, "loss": 0.48018383979797363, "step": 1981, "token_acc": 0.8376042908224076 }, { "epoch": 0.10694436950304861, "grad_norm": 0.4021313488483429, "learning_rate": 1.9691094486659885e-05, "loss": 0.41869160532951355, "step": 1982, "token_acc": 0.8584615384615385 }, { "epoch": 0.10699832730804511, "grad_norm": 0.48925912380218506, "learning_rate": 1.9690663335562158e-05, "loss": 0.41701072454452515, "step": 1983, "token_acc": 0.8575747931253979 }, { "epoch": 0.1070522851130416, "grad_norm": 0.5283972024917603, "learning_rate": 1.9690231888514117e-05, "loss": 0.4581189453601837, "step": 1984, "token_acc": 0.8401381909547738 }, { "epoch": 0.1071062429180381, "grad_norm": 0.5560757517814636, "learning_rate": 1.968980014552893e-05, "loss": 0.4721561670303345, "step": 1985, "token_acc": 0.8426083987709115 }, { "epoch": 0.10716020072303459, "grad_norm": 0.6447998285293579, "learning_rate": 1.9689368106619785e-05, "loss": 0.46095559000968933, "step": 1986, "token_acc": 0.8474959612277867 }, { "epoch": 0.10721415852803108, "grad_norm": 0.6555273532867432, "learning_rate": 1.9688935771799877e-05, "loss": 0.48151472210884094, "step": 1987, "token_acc": 0.8427073403241182 }, { "epoch": 0.10726811633302757, "grad_norm": 0.3726459741592407, "learning_rate": 1.968850314108241e-05, "loss": 0.4491827189922333, "step": 1988, "token_acc": 0.8535143970828618 }, { "epoch": 0.10732207413802407, "grad_norm": 0.398564875125885, "learning_rate": 1.9688070214480594e-05, "loss": 0.4209326207637787, "step": 1989, "token_acc": 0.8585625810168515 }, { "epoch": 0.10737603194302056, "grad_norm": 0.4999208450317383, "learning_rate": 1.968763699200765e-05, "loss": 0.4333382248878479, "step": 1990, "token_acc": 0.8563918096292197 }, { "epoch": 0.10742998974801705, "grad_norm": 0.47315189242362976, "learning_rate": 1.968720347367681e-05, "loss": 0.4289226531982422, "step": 1991, "token_acc": 0.8566228276186003 }, { "epoch": 0.10748394755301355, "grad_norm": 0.5081961750984192, "learning_rate": 1.9686769659501314e-05, "loss": 0.43779122829437256, "step": 1992, "token_acc": 0.8518108930052529 }, { "epoch": 0.10753790535801004, "grad_norm": 0.48522984981536865, "learning_rate": 1.9686335549494414e-05, "loss": 0.4345611035823822, "step": 1993, "token_acc": 0.8551013625789299 }, { "epoch": 0.10759186316300653, "grad_norm": 0.5915232300758362, "learning_rate": 1.9685901143669358e-05, "loss": 0.48227596282958984, "step": 1994, "token_acc": 0.8443078724026924 }, { "epoch": 0.10764582096800303, "grad_norm": 0.47532224655151367, "learning_rate": 1.9685466442039424e-05, "loss": 0.4419081211090088, "step": 1995, "token_acc": 0.8541869998048018 }, { "epoch": 0.10769977877299951, "grad_norm": 0.4717773497104645, "learning_rate": 1.9685031444617877e-05, "loss": 0.5036472082138062, "step": 1996, "token_acc": 0.8346215048874744 }, { "epoch": 0.107753736577996, "grad_norm": 0.426059365272522, "learning_rate": 1.968459615141801e-05, "loss": 0.46393442153930664, "step": 1997, "token_acc": 0.8440285204991087 }, { "epoch": 0.1078076943829925, "grad_norm": 0.5120782852172852, "learning_rate": 1.968416056245311e-05, "loss": 0.3578999936580658, "step": 1998, "token_acc": 0.8758033178897026 }, { "epoch": 0.10786165218798899, "grad_norm": 0.39497220516204834, "learning_rate": 1.9683724677736487e-05, "loss": 0.4507400393486023, "step": 1999, "token_acc": 0.8428310918623205 }, { "epoch": 0.10791560999298548, "grad_norm": 0.5080616474151611, "learning_rate": 1.968328849728145e-05, "loss": 0.46749457716941833, "step": 2000, "token_acc": 0.8435969209237228 }, { "epoch": 0.10796956779798197, "grad_norm": 0.5090059638023376, "learning_rate": 1.9682852021101318e-05, "loss": 0.45398813486099243, "step": 2001, "token_acc": 0.8441288534811223 }, { "epoch": 0.10802352560297847, "grad_norm": 0.5120164155960083, "learning_rate": 1.968241524920942e-05, "loss": 0.4091631770133972, "step": 2002, "token_acc": 0.8566009104704098 }, { "epoch": 0.10807748340797496, "grad_norm": 0.5282572507858276, "learning_rate": 1.9681978181619096e-05, "loss": 0.4749317765235901, "step": 2003, "token_acc": 0.8426197458455523 }, { "epoch": 0.10813144121297145, "grad_norm": 0.4704775810241699, "learning_rate": 1.9681540818343698e-05, "loss": 0.43162232637405396, "step": 2004, "token_acc": 0.8544827586206897 }, { "epoch": 0.10818539901796795, "grad_norm": 0.48561909794807434, "learning_rate": 1.9681103159396576e-05, "loss": 0.4683367908000946, "step": 2005, "token_acc": 0.8453493236647608 }, { "epoch": 0.10823935682296444, "grad_norm": 0.571835458278656, "learning_rate": 1.9680665204791103e-05, "loss": 0.46474453806877136, "step": 2006, "token_acc": 0.8439539575175032 }, { "epoch": 0.10829331462796093, "grad_norm": 0.41039612889289856, "learning_rate": 1.9680226954540652e-05, "loss": 0.5021194815635681, "step": 2007, "token_acc": 0.831966623507409 }, { "epoch": 0.10834727243295743, "grad_norm": 0.553637683391571, "learning_rate": 1.96797884086586e-05, "loss": 0.37175631523132324, "step": 2008, "token_acc": 0.8669704542113773 }, { "epoch": 0.10840123023795392, "grad_norm": 0.4540615379810333, "learning_rate": 1.9679349567158347e-05, "loss": 0.4194778800010681, "step": 2009, "token_acc": 0.8580315178324578 }, { "epoch": 0.10845518804295042, "grad_norm": 0.5542768239974976, "learning_rate": 1.9678910430053298e-05, "loss": 0.4583895206451416, "step": 2010, "token_acc": 0.8469805527123848 }, { "epoch": 0.10850914584794691, "grad_norm": 0.5239964723587036, "learning_rate": 1.9678470997356856e-05, "loss": 0.4882490336894989, "step": 2011, "token_acc": 0.8360476255645272 }, { "epoch": 0.1085631036529434, "grad_norm": 0.5173597931861877, "learning_rate": 1.967803126908245e-05, "loss": 0.3969342112541199, "step": 2012, "token_acc": 0.8586285915741231 }, { "epoch": 0.1086170614579399, "grad_norm": 0.5822570323944092, "learning_rate": 1.96775912452435e-05, "loss": 0.46085911989212036, "step": 2013, "token_acc": 0.8420092838196287 }, { "epoch": 0.10867101926293639, "grad_norm": 0.4729803204536438, "learning_rate": 1.9677150925853448e-05, "loss": 0.43148094415664673, "step": 2014, "token_acc": 0.85046875 }, { "epoch": 0.10872497706793288, "grad_norm": 0.5314042568206787, "learning_rate": 1.9676710310925746e-05, "loss": 0.4627794623374939, "step": 2015, "token_acc": 0.8423173803526448 }, { "epoch": 0.10877893487292938, "grad_norm": 0.4606318175792694, "learning_rate": 1.9676269400473842e-05, "loss": 0.42917609214782715, "step": 2016, "token_acc": 0.8537576167907921 }, { "epoch": 0.10883289267792587, "grad_norm": 0.6351438164710999, "learning_rate": 1.9675828194511205e-05, "loss": 0.5050843954086304, "step": 2017, "token_acc": 0.8315809596635443 }, { "epoch": 0.10888685048292235, "grad_norm": 0.4318072199821472, "learning_rate": 1.9675386693051312e-05, "loss": 0.44120243191719055, "step": 2018, "token_acc": 0.8449359135461171 }, { "epoch": 0.10894080828791884, "grad_norm": 0.39899754524230957, "learning_rate": 1.9674944896107645e-05, "loss": 0.365386039018631, "step": 2019, "token_acc": 0.8738903394255875 }, { "epoch": 0.10899476609291534, "grad_norm": 0.5126902461051941, "learning_rate": 1.967450280369369e-05, "loss": 0.4318476915359497, "step": 2020, "token_acc": 0.8543101999663922 }, { "epoch": 0.10904872389791183, "grad_norm": 0.5777015686035156, "learning_rate": 1.967406041582296e-05, "loss": 0.39270609617233276, "step": 2021, "token_acc": 0.8653680399931046 }, { "epoch": 0.10910268170290832, "grad_norm": 0.45768028497695923, "learning_rate": 1.9673617732508957e-05, "loss": 0.48302775621414185, "step": 2022, "token_acc": 0.8446639802417782 }, { "epoch": 0.10915663950790482, "grad_norm": 0.5607661604881287, "learning_rate": 1.96731747537652e-05, "loss": 0.4068703353404999, "step": 2023, "token_acc": 0.8647279549718574 }, { "epoch": 0.10921059731290131, "grad_norm": 0.4407835006713867, "learning_rate": 1.9672731479605224e-05, "loss": 0.4028981029987335, "step": 2024, "token_acc": 0.8615260395639887 }, { "epoch": 0.1092645551178978, "grad_norm": 0.474854975938797, "learning_rate": 1.9672287910042562e-05, "loss": 0.44101518392562866, "step": 2025, "token_acc": 0.8551483420593369 }, { "epoch": 0.1093185129228943, "grad_norm": 0.5823841691017151, "learning_rate": 1.9671844045090756e-05, "loss": 0.4922100007534027, "step": 2026, "token_acc": 0.8391839183918391 }, { "epoch": 0.10937247072789079, "grad_norm": 0.444745272397995, "learning_rate": 1.9671399884763373e-05, "loss": 0.4603198766708374, "step": 2027, "token_acc": 0.8396782841823056 }, { "epoch": 0.10942642853288728, "grad_norm": 0.40541061758995056, "learning_rate": 1.967095542907397e-05, "loss": 0.4213058054447174, "step": 2028, "token_acc": 0.8591707976566021 }, { "epoch": 0.10948038633788378, "grad_norm": 0.43000537157058716, "learning_rate": 1.967051067803612e-05, "loss": 0.4555690586566925, "step": 2029, "token_acc": 0.8442390818303771 }, { "epoch": 0.10953434414288027, "grad_norm": 0.5395707488059998, "learning_rate": 1.9670065631663407e-05, "loss": 0.44389137625694275, "step": 2030, "token_acc": 0.8488753799392097 }, { "epoch": 0.10958830194787676, "grad_norm": 0.654346227645874, "learning_rate": 1.966962028996942e-05, "loss": 0.5131943225860596, "step": 2031, "token_acc": 0.8333569304827977 }, { "epoch": 0.10964225975287326, "grad_norm": 0.6591742634773254, "learning_rate": 1.966917465296777e-05, "loss": 0.5320988297462463, "step": 2032, "token_acc": 0.8272160664819944 }, { "epoch": 0.10969621755786975, "grad_norm": 0.4426804482936859, "learning_rate": 1.9668728720672054e-05, "loss": 0.4687192142009735, "step": 2033, "token_acc": 0.8418466460975889 }, { "epoch": 0.10975017536286624, "grad_norm": 0.6008878350257874, "learning_rate": 1.9668282493095897e-05, "loss": 0.42334991693496704, "step": 2034, "token_acc": 0.8528648087242513 }, { "epoch": 0.10980413316786274, "grad_norm": 0.33053478598594666, "learning_rate": 1.9667835970252925e-05, "loss": 0.43046605587005615, "step": 2035, "token_acc": 0.8473490527463541 }, { "epoch": 0.10985809097285923, "grad_norm": 0.45886099338531494, "learning_rate": 1.9667389152156774e-05, "loss": 0.4719082713127136, "step": 2036, "token_acc": 0.8376547231270358 }, { "epoch": 0.10991204877785572, "grad_norm": 0.5032780766487122, "learning_rate": 1.966694203882109e-05, "loss": 0.5156068801879883, "step": 2037, "token_acc": 0.8336140744899869 }, { "epoch": 0.10996600658285222, "grad_norm": 0.5525962710380554, "learning_rate": 1.9666494630259532e-05, "loss": 0.47271305322647095, "step": 2038, "token_acc": 0.842525270438021 }, { "epoch": 0.1100199643878487, "grad_norm": 0.6888159513473511, "learning_rate": 1.966604692648576e-05, "loss": 0.42536109685897827, "step": 2039, "token_acc": 0.8511282750265031 }, { "epoch": 0.11007392219284519, "grad_norm": 0.5210268497467041, "learning_rate": 1.9665598927513442e-05, "loss": 0.417475163936615, "step": 2040, "token_acc": 0.8601009222202889 }, { "epoch": 0.11012787999784168, "grad_norm": 0.5225096344947815, "learning_rate": 1.9665150633356268e-05, "loss": 0.4694839119911194, "step": 2041, "token_acc": 0.8464341488807913 }, { "epoch": 0.11018183780283818, "grad_norm": 0.4340270459651947, "learning_rate": 1.966470204402793e-05, "loss": 0.45684224367141724, "step": 2042, "token_acc": 0.8499608252807521 }, { "epoch": 0.11023579560783467, "grad_norm": 0.590450644493103, "learning_rate": 1.9664253159542117e-05, "loss": 0.44792288541793823, "step": 2043, "token_acc": 0.8479300450881267 }, { "epoch": 0.11028975341283116, "grad_norm": 0.43357324600219727, "learning_rate": 1.9663803979912547e-05, "loss": 0.4461905360221863, "step": 2044, "token_acc": 0.8481957186544342 }, { "epoch": 0.11034371121782766, "grad_norm": 0.4520302414894104, "learning_rate": 1.9663354505152933e-05, "loss": 0.4859467148780823, "step": 2045, "token_acc": 0.8391824141920555 }, { "epoch": 0.11039766902282415, "grad_norm": 0.5543480515480042, "learning_rate": 1.9662904735277008e-05, "loss": 0.49821794033050537, "step": 2046, "token_acc": 0.8338562464346834 }, { "epoch": 0.11045162682782064, "grad_norm": 0.6370378732681274, "learning_rate": 1.9662454670298497e-05, "loss": 0.43286722898483276, "step": 2047, "token_acc": 0.8524466276213868 }, { "epoch": 0.11050558463281714, "grad_norm": 0.5177015662193298, "learning_rate": 1.9662004310231157e-05, "loss": 0.43675827980041504, "step": 2048, "token_acc": 0.8525179856115108 }, { "epoch": 0.11055954243781363, "grad_norm": 0.6148311495780945, "learning_rate": 1.9661553655088735e-05, "loss": 0.5095939636230469, "step": 2049, "token_acc": 0.8288352272727273 }, { "epoch": 0.11061350024281012, "grad_norm": 0.417185515165329, "learning_rate": 1.9661102704885e-05, "loss": 0.46778929233551025, "step": 2050, "token_acc": 0.8441692466460269 }, { "epoch": 0.11066745804780662, "grad_norm": 0.45817774534225464, "learning_rate": 1.9660651459633713e-05, "loss": 0.47533828020095825, "step": 2051, "token_acc": 0.8410186673259983 }, { "epoch": 0.11072141585280311, "grad_norm": 0.5678261518478394, "learning_rate": 1.9660199919348662e-05, "loss": 0.4368744492530823, "step": 2052, "token_acc": 0.8542931483087598 }, { "epoch": 0.1107753736577996, "grad_norm": 0.4523743689060211, "learning_rate": 1.9659748084043638e-05, "loss": 0.4425424635410309, "step": 2053, "token_acc": 0.8495712076479686 }, { "epoch": 0.1108293314627961, "grad_norm": 0.5115914344787598, "learning_rate": 1.9659295953732437e-05, "loss": 0.5112683176994324, "step": 2054, "token_acc": 0.8304940374787053 }, { "epoch": 0.11088328926779259, "grad_norm": 0.6539711952209473, "learning_rate": 1.9658843528428866e-05, "loss": 0.48634102940559387, "step": 2055, "token_acc": 0.8373430962343096 }, { "epoch": 0.11093724707278908, "grad_norm": 0.6220632195472717, "learning_rate": 1.9658390808146746e-05, "loss": 0.39715760946273804, "step": 2056, "token_acc": 0.8608878683505549 }, { "epoch": 0.11099120487778558, "grad_norm": 0.5319969058036804, "learning_rate": 1.9657937792899903e-05, "loss": 0.4935719668865204, "step": 2057, "token_acc": 0.8335742681604627 }, { "epoch": 0.11104516268278207, "grad_norm": 0.5169870853424072, "learning_rate": 1.9657484482702168e-05, "loss": 0.4183390438556671, "step": 2058, "token_acc": 0.8580723423026025 }, { "epoch": 0.11109912048777856, "grad_norm": 0.4978550672531128, "learning_rate": 1.9657030877567384e-05, "loss": 0.4447008967399597, "step": 2059, "token_acc": 0.8479136690647482 }, { "epoch": 0.11115307829277506, "grad_norm": 0.3985816240310669, "learning_rate": 1.965657697750941e-05, "loss": 0.4458271563053131, "step": 2060, "token_acc": 0.8462651236191479 }, { "epoch": 0.11120703609777154, "grad_norm": 0.4328925907611847, "learning_rate": 1.9656122782542105e-05, "loss": 0.4433487057685852, "step": 2061, "token_acc": 0.8593155893536122 }, { "epoch": 0.11126099390276803, "grad_norm": 0.467927485704422, "learning_rate": 1.965566829267934e-05, "loss": 0.42904964089393616, "step": 2062, "token_acc": 0.8586250261999581 }, { "epoch": 0.11131495170776452, "grad_norm": 0.5267949104309082, "learning_rate": 1.9655213507934993e-05, "loss": 0.4563889801502228, "step": 2063, "token_acc": 0.8453517403436628 }, { "epoch": 0.11136890951276102, "grad_norm": 0.4660928547382355, "learning_rate": 1.9654758428322954e-05, "loss": 0.42990684509277344, "step": 2064, "token_acc": 0.8529667149059335 }, { "epoch": 0.11142286731775751, "grad_norm": 0.4499906897544861, "learning_rate": 1.9654303053857124e-05, "loss": 0.38633695244789124, "step": 2065, "token_acc": 0.8664614482556435 }, { "epoch": 0.111476825122754, "grad_norm": 0.47955358028411865, "learning_rate": 1.9653847384551406e-05, "loss": 0.398176372051239, "step": 2066, "token_acc": 0.8655879729901899 }, { "epoch": 0.1115307829277505, "grad_norm": 0.5131021738052368, "learning_rate": 1.9653391420419717e-05, "loss": 0.4052847623825073, "step": 2067, "token_acc": 0.8631333721607455 }, { "epoch": 0.11158474073274699, "grad_norm": 0.5269247889518738, "learning_rate": 1.9652935161475988e-05, "loss": 0.4839543104171753, "step": 2068, "token_acc": 0.8382275825904563 }, { "epoch": 0.11163869853774348, "grad_norm": 0.4672917127609253, "learning_rate": 1.9652478607734144e-05, "loss": 0.36687207221984863, "step": 2069, "token_acc": 0.8707349081364829 }, { "epoch": 0.11169265634273998, "grad_norm": 0.5384339094161987, "learning_rate": 1.9652021759208135e-05, "loss": 0.47806987166404724, "step": 2070, "token_acc": 0.8399014778325123 }, { "epoch": 0.11174661414773647, "grad_norm": 0.43208858370780945, "learning_rate": 1.9651564615911908e-05, "loss": 0.42434367537498474, "step": 2071, "token_acc": 0.8609595270577535 }, { "epoch": 0.11180057195273296, "grad_norm": 0.45499351620674133, "learning_rate": 1.9651107177859424e-05, "loss": 0.48692482709884644, "step": 2072, "token_acc": 0.8374790565794561 }, { "epoch": 0.11185452975772946, "grad_norm": 0.5817833542823792, "learning_rate": 1.9650649445064655e-05, "loss": 0.46819835901260376, "step": 2073, "token_acc": 0.8387932630020359 }, { "epoch": 0.11190848756272595, "grad_norm": 0.42052122950553894, "learning_rate": 1.9650191417541584e-05, "loss": 0.48115354776382446, "step": 2074, "token_acc": 0.8440699935191186 }, { "epoch": 0.11196244536772244, "grad_norm": 0.4304899573326111, "learning_rate": 1.964973309530419e-05, "loss": 0.4785720705986023, "step": 2075, "token_acc": 0.8394708128750682 }, { "epoch": 0.11201640317271894, "grad_norm": 0.5706490874290466, "learning_rate": 1.964927447836648e-05, "loss": 0.5137962102890015, "step": 2076, "token_acc": 0.8281750266808965 }, { "epoch": 0.11207036097771543, "grad_norm": 0.4704280495643616, "learning_rate": 1.9648815566742452e-05, "loss": 0.43014761805534363, "step": 2077, "token_acc": 0.85607691203903 }, { "epoch": 0.11212431878271192, "grad_norm": 0.48686671257019043, "learning_rate": 1.9648356360446125e-05, "loss": 0.49861466884613037, "step": 2078, "token_acc": 0.8300190683737402 }, { "epoch": 0.11217827658770842, "grad_norm": 0.38370493054389954, "learning_rate": 1.9647896859491526e-05, "loss": 0.4263160228729248, "step": 2079, "token_acc": 0.8543110618242307 }, { "epoch": 0.11223223439270491, "grad_norm": 0.3845549523830414, "learning_rate": 1.9647437063892677e-05, "loss": 0.41024184226989746, "step": 2080, "token_acc": 0.8638610723792622 }, { "epoch": 0.1122861921977014, "grad_norm": 0.4253344237804413, "learning_rate": 1.9646976973663632e-05, "loss": 0.38364487886428833, "step": 2081, "token_acc": 0.8685937718195783 }, { "epoch": 0.11234015000269788, "grad_norm": 0.42051345109939575, "learning_rate": 1.964651658881844e-05, "loss": 0.42105844616889954, "step": 2082, "token_acc": 0.8558887088060508 }, { "epoch": 0.11239410780769438, "grad_norm": 0.39405685663223267, "learning_rate": 1.9646055909371152e-05, "loss": 0.4723908007144928, "step": 2083, "token_acc": 0.8420620812252154 }, { "epoch": 0.11244806561269087, "grad_norm": 0.3558662235736847, "learning_rate": 1.9645594935335848e-05, "loss": 0.43328219652175903, "step": 2084, "token_acc": 0.8526881720430107 }, { "epoch": 0.11250202341768736, "grad_norm": 0.40333375334739685, "learning_rate": 1.96451336667266e-05, "loss": 0.4717378616333008, "step": 2085, "token_acc": 0.8378652254686708 }, { "epoch": 0.11255598122268386, "grad_norm": 0.5052248239517212, "learning_rate": 1.9644672103557494e-05, "loss": 0.4331960380077362, "step": 2086, "token_acc": 0.8558558558558559 }, { "epoch": 0.11260993902768035, "grad_norm": 0.5286592245101929, "learning_rate": 1.964421024584263e-05, "loss": 0.4479699730873108, "step": 2087, "token_acc": 0.8465757089352595 }, { "epoch": 0.11266389683267684, "grad_norm": 0.4992813467979431, "learning_rate": 1.9643748093596115e-05, "loss": 0.4876900315284729, "step": 2088, "token_acc": 0.8386113065916022 }, { "epoch": 0.11271785463767334, "grad_norm": 0.6175124645233154, "learning_rate": 1.9643285646832056e-05, "loss": 0.44259652495384216, "step": 2089, "token_acc": 0.8508585761142196 }, { "epoch": 0.11277181244266983, "grad_norm": 0.33492743968963623, "learning_rate": 1.9642822905564582e-05, "loss": 0.40675032138824463, "step": 2090, "token_acc": 0.8595592004100461 }, { "epoch": 0.11282577024766632, "grad_norm": 0.41203391551971436, "learning_rate": 1.964235986980782e-05, "loss": 0.5209875106811523, "step": 2091, "token_acc": 0.8258635114138879 }, { "epoch": 0.11287972805266282, "grad_norm": 0.5487256050109863, "learning_rate": 1.9641896539575917e-05, "loss": 0.417630672454834, "step": 2092, "token_acc": 0.8568782623337262 }, { "epoch": 0.11293368585765931, "grad_norm": 0.5773732662200928, "learning_rate": 1.9641432914883016e-05, "loss": 0.5101420283317566, "step": 2093, "token_acc": 0.8303375712406839 }, { "epoch": 0.1129876436626558, "grad_norm": 0.5451950430870056, "learning_rate": 1.9640968995743282e-05, "loss": 0.40429025888442993, "step": 2094, "token_acc": 0.8705490264865813 }, { "epoch": 0.1130416014676523, "grad_norm": 0.55484938621521, "learning_rate": 1.9640504782170878e-05, "loss": 0.4202103912830353, "step": 2095, "token_acc": 0.8511591962905719 }, { "epoch": 0.11309555927264879, "grad_norm": 0.540007472038269, "learning_rate": 1.9640040274179982e-05, "loss": 0.4584876000881195, "step": 2096, "token_acc": 0.8468456553365957 }, { "epoch": 0.11314951707764528, "grad_norm": 0.43866637349128723, "learning_rate": 1.9639575471784787e-05, "loss": 0.45214059948921204, "step": 2097, "token_acc": 0.8499498997995992 }, { "epoch": 0.11320347488264178, "grad_norm": 0.5122917294502258, "learning_rate": 1.9639110374999477e-05, "loss": 0.4599962830543518, "step": 2098, "token_acc": 0.8461635697130577 }, { "epoch": 0.11325743268763827, "grad_norm": 0.5252630710601807, "learning_rate": 1.9638644983838264e-05, "loss": 0.42750871181488037, "step": 2099, "token_acc": 0.8558046683046683 }, { "epoch": 0.11331139049263476, "grad_norm": 0.45022520422935486, "learning_rate": 1.963817929831536e-05, "loss": 0.3906576633453369, "step": 2100, "token_acc": 0.8663330950679057 }, { "epoch": 0.11336534829763126, "grad_norm": 0.5531981587409973, "learning_rate": 1.9637713318444983e-05, "loss": 0.4564192593097687, "step": 2101, "token_acc": 0.8456245589273113 }, { "epoch": 0.11341930610262775, "grad_norm": 0.44480764865875244, "learning_rate": 1.9637247044241365e-05, "loss": 0.4236893951892853, "step": 2102, "token_acc": 0.8541294642857142 }, { "epoch": 0.11347326390762424, "grad_norm": 0.4923440217971802, "learning_rate": 1.9636780475718748e-05, "loss": 0.48635828495025635, "step": 2103, "token_acc": 0.842443729903537 }, { "epoch": 0.11352722171262072, "grad_norm": 0.5894678831100464, "learning_rate": 1.9636313612891376e-05, "loss": 0.45528125762939453, "step": 2104, "token_acc": 0.8471587197909863 }, { "epoch": 0.11358117951761722, "grad_norm": 0.4761901795864105, "learning_rate": 1.9635846455773516e-05, "loss": 0.5108862519264221, "step": 2105, "token_acc": 0.8297872340425532 }, { "epoch": 0.11363513732261371, "grad_norm": 0.5348156094551086, "learning_rate": 1.963537900437943e-05, "loss": 0.5267331600189209, "step": 2106, "token_acc": 0.8254804270462633 }, { "epoch": 0.1136890951276102, "grad_norm": 0.49119752645492554, "learning_rate": 1.963491125872339e-05, "loss": 0.4376834034919739, "step": 2107, "token_acc": 0.851734671612481 }, { "epoch": 0.1137430529326067, "grad_norm": 0.4166686534881592, "learning_rate": 1.9634443218819683e-05, "loss": 0.42701178789138794, "step": 2108, "token_acc": 0.8553881220716728 }, { "epoch": 0.11379701073760319, "grad_norm": 0.44646018743515015, "learning_rate": 1.9633974884682607e-05, "loss": 0.4227820038795471, "step": 2109, "token_acc": 0.8524725274725274 }, { "epoch": 0.11385096854259968, "grad_norm": 0.4374563992023468, "learning_rate": 1.963350625632646e-05, "loss": 0.45698291063308716, "step": 2110, "token_acc": 0.8403726708074534 }, { "epoch": 0.11390492634759618, "grad_norm": 0.5414657592773438, "learning_rate": 1.9633037333765558e-05, "loss": 0.4191957414150238, "step": 2111, "token_acc": 0.8578836493274996 }, { "epoch": 0.11395888415259267, "grad_norm": 0.379589706659317, "learning_rate": 1.963256811701422e-05, "loss": 0.3996371626853943, "step": 2112, "token_acc": 0.8617411225658649 }, { "epoch": 0.11401284195758916, "grad_norm": 0.5295403599739075, "learning_rate": 1.963209860608677e-05, "loss": 0.45637330412864685, "step": 2113, "token_acc": 0.8459756097560975 }, { "epoch": 0.11406679976258566, "grad_norm": 0.5082882046699524, "learning_rate": 1.9631628800997554e-05, "loss": 0.496116042137146, "step": 2114, "token_acc": 0.8296442039678635 }, { "epoch": 0.11412075756758215, "grad_norm": 0.35744285583496094, "learning_rate": 1.963115870176092e-05, "loss": 0.4370564818382263, "step": 2115, "token_acc": 0.8533318695795368 }, { "epoch": 0.11417471537257864, "grad_norm": 0.4863147437572479, "learning_rate": 1.963068830839122e-05, "loss": 0.42135512828826904, "step": 2116, "token_acc": 0.8554128975780566 }, { "epoch": 0.11422867317757514, "grad_norm": 0.422374427318573, "learning_rate": 1.963021762090282e-05, "loss": 0.4277907609939575, "step": 2117, "token_acc": 0.8598244429439568 }, { "epoch": 0.11428263098257163, "grad_norm": 0.4171602725982666, "learning_rate": 1.9629746639310103e-05, "loss": 0.5151227712631226, "step": 2118, "token_acc": 0.8319987913582112 }, { "epoch": 0.11433658878756812, "grad_norm": 0.5689077973365784, "learning_rate": 1.9629275363627443e-05, "loss": 0.4744492769241333, "step": 2119, "token_acc": 0.8417905347684947 }, { "epoch": 0.11439054659256462, "grad_norm": 0.479905366897583, "learning_rate": 1.962880379386923e-05, "loss": 0.4312538504600525, "step": 2120, "token_acc": 0.8601116625310173 }, { "epoch": 0.11444450439756111, "grad_norm": 0.38551080226898193, "learning_rate": 1.962833193004988e-05, "loss": 0.46388185024261475, "step": 2121, "token_acc": 0.8460794740404138 }, { "epoch": 0.1144984622025576, "grad_norm": 0.5259149074554443, "learning_rate": 1.9627859772183793e-05, "loss": 0.4460301399230957, "step": 2122, "token_acc": 0.8467937608318891 }, { "epoch": 0.1145524200075541, "grad_norm": 0.4182814657688141, "learning_rate": 1.962738732028539e-05, "loss": 0.41220927238464355, "step": 2123, "token_acc": 0.861440407903123 }, { "epoch": 0.11460637781255059, "grad_norm": 0.4859344959259033, "learning_rate": 1.96269145743691e-05, "loss": 0.373729944229126, "step": 2124, "token_acc": 0.8745670832705917 }, { "epoch": 0.11466033561754709, "grad_norm": 0.4274941682815552, "learning_rate": 1.962644153444936e-05, "loss": 0.4520082473754883, "step": 2125, "token_acc": 0.8461719670200235 }, { "epoch": 0.11471429342254356, "grad_norm": 0.3984759747982025, "learning_rate": 1.9625968200540618e-05, "loss": 0.4983770549297333, "step": 2126, "token_acc": 0.8371708584248435 }, { "epoch": 0.11476825122754006, "grad_norm": 0.436732679605484, "learning_rate": 1.962549457265733e-05, "loss": 0.4647440016269684, "step": 2127, "token_acc": 0.843647277404602 }, { "epoch": 0.11482220903253655, "grad_norm": 0.5808711647987366, "learning_rate": 1.9625020650813957e-05, "loss": 0.39801496267318726, "step": 2128, "token_acc": 0.8580257510729614 }, { "epoch": 0.11487616683753304, "grad_norm": 0.5959516763687134, "learning_rate": 1.9624546435024975e-05, "loss": 0.5254917144775391, "step": 2129, "token_acc": 0.8261214138532256 }, { "epoch": 0.11493012464252954, "grad_norm": 0.5427504181861877, "learning_rate": 1.9624071925304868e-05, "loss": 0.46783310174942017, "step": 2130, "token_acc": 0.8379518072289157 }, { "epoch": 0.11498408244752603, "grad_norm": 0.46324071288108826, "learning_rate": 1.9623597121668126e-05, "loss": 0.4926765263080597, "step": 2131, "token_acc": 0.8406909788867563 }, { "epoch": 0.11503804025252252, "grad_norm": 0.664590060710907, "learning_rate": 1.9623122024129247e-05, "loss": 0.45377638936042786, "step": 2132, "token_acc": 0.8442682047584715 }, { "epoch": 0.11509199805751902, "grad_norm": 0.6213963031768799, "learning_rate": 1.9622646632702742e-05, "loss": 0.48340967297554016, "step": 2133, "token_acc": 0.8332780358327804 }, { "epoch": 0.11514595586251551, "grad_norm": 0.4051094651222229, "learning_rate": 1.962217094740313e-05, "loss": 0.4564889669418335, "step": 2134, "token_acc": 0.8445018713241846 }, { "epoch": 0.115199913667512, "grad_norm": 0.4700852036476135, "learning_rate": 1.962169496824494e-05, "loss": 0.4742601811885834, "step": 2135, "token_acc": 0.8408629441624366 }, { "epoch": 0.1152538714725085, "grad_norm": 0.4290514886379242, "learning_rate": 1.9621218695242702e-05, "loss": 0.49134355783462524, "step": 2136, "token_acc": 0.8366822782897718 }, { "epoch": 0.11530782927750499, "grad_norm": 0.4712320566177368, "learning_rate": 1.962074212841097e-05, "loss": 0.43889284133911133, "step": 2137, "token_acc": 0.8493667068757539 }, { "epoch": 0.11536178708250149, "grad_norm": 0.5613520741462708, "learning_rate": 1.9620265267764293e-05, "loss": 0.46041253209114075, "step": 2138, "token_acc": 0.8463462569233517 }, { "epoch": 0.11541574488749798, "grad_norm": 0.4370536208152771, "learning_rate": 1.9619788113317232e-05, "loss": 0.4335062503814697, "step": 2139, "token_acc": 0.8538304872317092 }, { "epoch": 0.11546970269249447, "grad_norm": 0.45089107751846313, "learning_rate": 1.9619310665084366e-05, "loss": 0.47242748737335205, "step": 2140, "token_acc": 0.8370235426008968 }, { "epoch": 0.11552366049749097, "grad_norm": 0.4978545904159546, "learning_rate": 1.961883292308027e-05, "loss": 0.44106242060661316, "step": 2141, "token_acc": 0.854389039350008 }, { "epoch": 0.11557761830248746, "grad_norm": 0.46638476848602295, "learning_rate": 1.9618354887319534e-05, "loss": 0.41081517934799194, "step": 2142, "token_acc": 0.8649666763257027 }, { "epoch": 0.11563157610748395, "grad_norm": 0.4519146680831909, "learning_rate": 1.9617876557816764e-05, "loss": 0.44935253262519836, "step": 2143, "token_acc": 0.8470258922323303 }, { "epoch": 0.11568553391248045, "grad_norm": 0.6279604434967041, "learning_rate": 1.961739793458656e-05, "loss": 0.42828983068466187, "step": 2144, "token_acc": 0.8551735592094336 }, { "epoch": 0.11573949171747694, "grad_norm": 0.48215171694755554, "learning_rate": 1.9616919017643544e-05, "loss": 0.3917207717895508, "step": 2145, "token_acc": 0.8651177296117262 }, { "epoch": 0.11579344952247343, "grad_norm": 0.41192978620529175, "learning_rate": 1.961643980700234e-05, "loss": 0.43217188119888306, "step": 2146, "token_acc": 0.8511770182205081 }, { "epoch": 0.11584740732746991, "grad_norm": 0.4641631543636322, "learning_rate": 1.9615960302677585e-05, "loss": 0.409807026386261, "step": 2147, "token_acc": 0.8587781350482315 }, { "epoch": 0.1159013651324664, "grad_norm": 0.5401755571365356, "learning_rate": 1.9615480504683918e-05, "loss": 0.4334496855735779, "step": 2148, "token_acc": 0.849955343852337 }, { "epoch": 0.1159553229374629, "grad_norm": 0.4246688187122345, "learning_rate": 1.9615000413035997e-05, "loss": 0.5088279843330383, "step": 2149, "token_acc": 0.8321832735302236 }, { "epoch": 0.11600928074245939, "grad_norm": 0.4431494176387787, "learning_rate": 1.9614520027748482e-05, "loss": 0.3773351013660431, "step": 2150, "token_acc": 0.8714823622671423 }, { "epoch": 0.11606323854745589, "grad_norm": 0.2982068955898285, "learning_rate": 1.9614039348836048e-05, "loss": 0.44233015179634094, "step": 2151, "token_acc": 0.850121295222023 }, { "epoch": 0.11611719635245238, "grad_norm": 0.5468190908432007, "learning_rate": 1.9613558376313367e-05, "loss": 0.4488944411277771, "step": 2152, "token_acc": 0.8466031355671688 }, { "epoch": 0.11617115415744887, "grad_norm": 0.45859265327453613, "learning_rate": 1.9613077110195128e-05, "loss": 0.44491586089134216, "step": 2153, "token_acc": 0.8510018639328985 }, { "epoch": 0.11622511196244537, "grad_norm": 0.48754653334617615, "learning_rate": 1.961259555049604e-05, "loss": 0.4436461925506592, "step": 2154, "token_acc": 0.8500258131130615 }, { "epoch": 0.11627906976744186, "grad_norm": 0.4139767289161682, "learning_rate": 1.9612113697230797e-05, "loss": 0.3888721466064453, "step": 2155, "token_acc": 0.8654370697493181 }, { "epoch": 0.11633302757243835, "grad_norm": 0.4037401080131531, "learning_rate": 1.961163155041412e-05, "loss": 0.41803526878356934, "step": 2156, "token_acc": 0.8553639846743295 }, { "epoch": 0.11638698537743485, "grad_norm": 0.501999020576477, "learning_rate": 1.9611149110060737e-05, "loss": 0.4246714115142822, "step": 2157, "token_acc": 0.854979687307645 }, { "epoch": 0.11644094318243134, "grad_norm": 0.3865806758403778, "learning_rate": 1.9610666376185376e-05, "loss": 0.4387168884277344, "step": 2158, "token_acc": 0.8503834466792071 }, { "epoch": 0.11649490098742783, "grad_norm": 0.4002021253108978, "learning_rate": 1.9610183348802778e-05, "loss": 0.4523578882217407, "step": 2159, "token_acc": 0.8507992895204263 }, { "epoch": 0.11654885879242433, "grad_norm": 0.5740182399749756, "learning_rate": 1.96097000279277e-05, "loss": 0.4308037757873535, "step": 2160, "token_acc": 0.854334514985498 }, { "epoch": 0.11660281659742082, "grad_norm": 0.40910249948501587, "learning_rate": 1.9609216413574904e-05, "loss": 0.4144076108932495, "step": 2161, "token_acc": 0.8585353661584604 }, { "epoch": 0.11665677440241731, "grad_norm": 0.4025081694126129, "learning_rate": 1.9608732505759153e-05, "loss": 0.44398701190948486, "step": 2162, "token_acc": 0.8503577724774448 }, { "epoch": 0.1167107322074138, "grad_norm": 0.5271835923194885, "learning_rate": 1.9608248304495227e-05, "loss": 0.43656885623931885, "step": 2163, "token_acc": 0.8537414965986394 }, { "epoch": 0.1167646900124103, "grad_norm": 0.5563445091247559, "learning_rate": 1.960776380979792e-05, "loss": 0.47289544343948364, "step": 2164, "token_acc": 0.84029590948651 }, { "epoch": 0.11681864781740679, "grad_norm": 0.45370742678642273, "learning_rate": 1.9607279021682018e-05, "loss": 0.48944973945617676, "step": 2165, "token_acc": 0.835143380109823 }, { "epoch": 0.11687260562240329, "grad_norm": 0.49242615699768066, "learning_rate": 1.9606793940162334e-05, "loss": 0.4451887011528015, "step": 2166, "token_acc": 0.8515529040182488 }, { "epoch": 0.11692656342739978, "grad_norm": 0.6010060906410217, "learning_rate": 1.960630856525368e-05, "loss": 0.4573668837547302, "step": 2167, "token_acc": 0.8434556023251153 }, { "epoch": 0.11698052123239627, "grad_norm": 0.5577914714813232, "learning_rate": 1.9605822896970874e-05, "loss": 0.41195011138916016, "step": 2168, "token_acc": 0.8558403578186823 }, { "epoch": 0.11703447903739275, "grad_norm": 0.4364508092403412, "learning_rate": 1.960533693532876e-05, "loss": 0.4819367527961731, "step": 2169, "token_acc": 0.8335493160547156 }, { "epoch": 0.11708843684238925, "grad_norm": 0.4723227322101593, "learning_rate": 1.9604850680342168e-05, "loss": 0.3891896903514862, "step": 2170, "token_acc": 0.8645814821396837 }, { "epoch": 0.11714239464738574, "grad_norm": 0.5341553688049316, "learning_rate": 1.9604364132025955e-05, "loss": 0.41415607929229736, "step": 2171, "token_acc": 0.8608390316022916 }, { "epoch": 0.11719635245238223, "grad_norm": 0.5033755302429199, "learning_rate": 1.9603877290394974e-05, "loss": 0.4140673279762268, "step": 2172, "token_acc": 0.8580782701385716 }, { "epoch": 0.11725031025737873, "grad_norm": 0.4169275462627411, "learning_rate": 1.96033901554641e-05, "loss": 0.44125813245773315, "step": 2173, "token_acc": 0.848967347409715 }, { "epoch": 0.11730426806237522, "grad_norm": 0.49822449684143066, "learning_rate": 1.9602902727248205e-05, "loss": 0.4634452760219574, "step": 2174, "token_acc": 0.8473007712082262 }, { "epoch": 0.11735822586737171, "grad_norm": 0.4475163519382477, "learning_rate": 1.9602415005762174e-05, "loss": 0.4440690279006958, "step": 2175, "token_acc": 0.8476721235341925 }, { "epoch": 0.1174121836723682, "grad_norm": 0.5781372785568237, "learning_rate": 1.960192699102091e-05, "loss": 0.4628832936286926, "step": 2176, "token_acc": 0.8499387505104125 }, { "epoch": 0.1174661414773647, "grad_norm": 0.49811071157455444, "learning_rate": 1.9601438683039307e-05, "loss": 0.4327796697616577, "step": 2177, "token_acc": 0.8543283582089553 }, { "epoch": 0.11752009928236119, "grad_norm": 0.46825167536735535, "learning_rate": 1.960095008183228e-05, "loss": 0.4349125027656555, "step": 2178, "token_acc": 0.8556477805846265 }, { "epoch": 0.11757405708735769, "grad_norm": 0.5422859191894531, "learning_rate": 1.9600461187414757e-05, "loss": 0.47263863682746887, "step": 2179, "token_acc": 0.8422084286180986 }, { "epoch": 0.11762801489235418, "grad_norm": 0.43333545327186584, "learning_rate": 1.9599971999801658e-05, "loss": 0.4417310059070587, "step": 2180, "token_acc": 0.8525588476402389 }, { "epoch": 0.11768197269735067, "grad_norm": 0.534264087677002, "learning_rate": 1.9599482519007934e-05, "loss": 0.4065312147140503, "step": 2181, "token_acc": 0.8579906740144129 }, { "epoch": 0.11773593050234717, "grad_norm": 0.5980367064476013, "learning_rate": 1.959899274504853e-05, "loss": 0.4472198784351349, "step": 2182, "token_acc": 0.8532254882619846 }, { "epoch": 0.11778988830734366, "grad_norm": 0.4532635509967804, "learning_rate": 1.95985026779384e-05, "loss": 0.4861260950565338, "step": 2183, "token_acc": 0.8327256817850371 }, { "epoch": 0.11784384611234015, "grad_norm": 0.4944494366645813, "learning_rate": 1.959801231769251e-05, "loss": 0.48352307081222534, "step": 2184, "token_acc": 0.8378510777269759 }, { "epoch": 0.11789780391733665, "grad_norm": 0.48882347345352173, "learning_rate": 1.9597521664325844e-05, "loss": 0.42309749126434326, "step": 2185, "token_acc": 0.8560500695410292 }, { "epoch": 0.11795176172233314, "grad_norm": 0.5070610046386719, "learning_rate": 1.9597030717853376e-05, "loss": 0.4952966868877411, "step": 2186, "token_acc": 0.8342851209635552 }, { "epoch": 0.11800571952732963, "grad_norm": 0.4051452875137329, "learning_rate": 1.9596539478290104e-05, "loss": 0.34987878799438477, "step": 2187, "token_acc": 0.8755418822542301 }, { "epoch": 0.11805967733232613, "grad_norm": 0.5567263960838318, "learning_rate": 1.9596047945651034e-05, "loss": 0.4841846227645874, "step": 2188, "token_acc": 0.8405343945423536 }, { "epoch": 0.11811363513732262, "grad_norm": 0.5326427817344666, "learning_rate": 1.959555611995117e-05, "loss": 0.46671155095100403, "step": 2189, "token_acc": 0.8449451887941535 }, { "epoch": 0.1181675929423191, "grad_norm": 0.4239307641983032, "learning_rate": 1.9595064001205538e-05, "loss": 0.4687662124633789, "step": 2190, "token_acc": 0.8459037711313394 }, { "epoch": 0.11822155074731559, "grad_norm": 0.5927857160568237, "learning_rate": 1.959457158942916e-05, "loss": 0.49494582414627075, "step": 2191, "token_acc": 0.8311198557140514 }, { "epoch": 0.11827550855231209, "grad_norm": 0.504448652267456, "learning_rate": 1.9594078884637086e-05, "loss": 0.4347129166126251, "step": 2192, "token_acc": 0.854125210469922 }, { "epoch": 0.11832946635730858, "grad_norm": 0.4593663215637207, "learning_rate": 1.9593585886844354e-05, "loss": 0.41461947560310364, "step": 2193, "token_acc": 0.860977288368892 }, { "epoch": 0.11838342416230507, "grad_norm": 0.5427389144897461, "learning_rate": 1.959309259606602e-05, "loss": 0.4665846824645996, "step": 2194, "token_acc": 0.8415744502031097 }, { "epoch": 0.11843738196730157, "grad_norm": 0.5308408141136169, "learning_rate": 1.9592599012317153e-05, "loss": 0.4858212471008301, "step": 2195, "token_acc": 0.8426200873362445 }, { "epoch": 0.11849133977229806, "grad_norm": 0.530954122543335, "learning_rate": 1.9592105135612826e-05, "loss": 0.44826382398605347, "step": 2196, "token_acc": 0.8463616142357802 }, { "epoch": 0.11854529757729455, "grad_norm": 0.5005319118499756, "learning_rate": 1.959161096596812e-05, "loss": 0.4334999620914459, "step": 2197, "token_acc": 0.8486486486486486 }, { "epoch": 0.11859925538229105, "grad_norm": 0.49734994769096375, "learning_rate": 1.9591116503398128e-05, "loss": 0.4933784306049347, "step": 2198, "token_acc": 0.8332327013645695 }, { "epoch": 0.11865321318728754, "grad_norm": 0.4736218750476837, "learning_rate": 1.959062174791795e-05, "loss": 0.4370711147785187, "step": 2199, "token_acc": 0.8532354058503229 }, { "epoch": 0.11870717099228403, "grad_norm": 0.4386940896511078, "learning_rate": 1.9590126699542697e-05, "loss": 0.4740679860115051, "step": 2200, "token_acc": 0.8426511954992968 }, { "epoch": 0.11876112879728053, "grad_norm": 0.3789331614971161, "learning_rate": 1.958963135828748e-05, "loss": 0.38892799615859985, "step": 2201, "token_acc": 0.8647140864714087 }, { "epoch": 0.11881508660227702, "grad_norm": 0.46588587760925293, "learning_rate": 1.958913572416744e-05, "loss": 0.4441796839237213, "step": 2202, "token_acc": 0.8480714374302368 }, { "epoch": 0.11886904440727351, "grad_norm": 0.38157936930656433, "learning_rate": 1.9588639797197707e-05, "loss": 0.46456387639045715, "step": 2203, "token_acc": 0.8504123613612665 }, { "epoch": 0.11892300221227001, "grad_norm": 0.5080479383468628, "learning_rate": 1.958814357739343e-05, "loss": 0.5105214715003967, "step": 2204, "token_acc": 0.8317307692307693 }, { "epoch": 0.1189769600172665, "grad_norm": 0.48010125756263733, "learning_rate": 1.9587647064769754e-05, "loss": 0.5128800272941589, "step": 2205, "token_acc": 0.8291094147582697 }, { "epoch": 0.119030917822263, "grad_norm": 0.4476129710674286, "learning_rate": 1.958715025934185e-05, "loss": 0.4132038652896881, "step": 2206, "token_acc": 0.8613021214337966 }, { "epoch": 0.11908487562725949, "grad_norm": 0.46269601583480835, "learning_rate": 1.958665316112489e-05, "loss": 0.3892224133014679, "step": 2207, "token_acc": 0.8689619060086399 }, { "epoch": 0.11913883343225598, "grad_norm": 0.4056612253189087, "learning_rate": 1.958615577013405e-05, "loss": 0.4298304319381714, "step": 2208, "token_acc": 0.8521118912292432 }, { "epoch": 0.11919279123725247, "grad_norm": 0.4535190463066101, "learning_rate": 1.9585658086384526e-05, "loss": 0.45602113008499146, "step": 2209, "token_acc": 0.8495563501522977 }, { "epoch": 0.11924674904224897, "grad_norm": 0.5436863303184509, "learning_rate": 1.9585160109891518e-05, "loss": 0.5174620747566223, "step": 2210, "token_acc": 0.8220706757594545 }, { "epoch": 0.11930070684724546, "grad_norm": 0.39870136976242065, "learning_rate": 1.9584661840670228e-05, "loss": 0.42705637216567993, "step": 2211, "token_acc": 0.8532322594230364 }, { "epoch": 0.11935466465224194, "grad_norm": 0.5372371673583984, "learning_rate": 1.9584163278735876e-05, "loss": 0.4717924892902374, "step": 2212, "token_acc": 0.8347408270238789 }, { "epoch": 0.11940862245723843, "grad_norm": 0.43988102674484253, "learning_rate": 1.958366442410369e-05, "loss": 0.4761601984500885, "step": 2213, "token_acc": 0.8439024390243902 }, { "epoch": 0.11946258026223493, "grad_norm": 0.5965288281440735, "learning_rate": 1.9583165276788903e-05, "loss": 0.459591269493103, "step": 2214, "token_acc": 0.8498883097542814 }, { "epoch": 0.11951653806723142, "grad_norm": 0.41802066564559937, "learning_rate": 1.9582665836806758e-05, "loss": 0.4284296929836273, "step": 2215, "token_acc": 0.857408916307764 }, { "epoch": 0.11957049587222791, "grad_norm": 0.47910788655281067, "learning_rate": 1.958216610417251e-05, "loss": 0.44181108474731445, "step": 2216, "token_acc": 0.8483438269915138 }, { "epoch": 0.11962445367722441, "grad_norm": 0.42576226592063904, "learning_rate": 1.9581666078901417e-05, "loss": 0.5015408992767334, "step": 2217, "token_acc": 0.8369669546140135 }, { "epoch": 0.1196784114822209, "grad_norm": 0.48362523317337036, "learning_rate": 1.958116576100875e-05, "loss": 0.4463277757167816, "step": 2218, "token_acc": 0.8494743351886209 }, { "epoch": 0.1197323692872174, "grad_norm": 0.40774253010749817, "learning_rate": 1.9580665150509797e-05, "loss": 0.43467456102371216, "step": 2219, "token_acc": 0.852409202720301 }, { "epoch": 0.11978632709221389, "grad_norm": 0.5261467695236206, "learning_rate": 1.9580164247419836e-05, "loss": 0.41042372584342957, "step": 2220, "token_acc": 0.8640398691792556 }, { "epoch": 0.11984028489721038, "grad_norm": 0.5700540542602539, "learning_rate": 1.957966305175417e-05, "loss": 0.4262005090713501, "step": 2221, "token_acc": 0.8539106145251396 }, { "epoch": 0.11989424270220687, "grad_norm": 0.4020710289478302, "learning_rate": 1.9579161563528103e-05, "loss": 0.4580090045928955, "step": 2222, "token_acc": 0.8427648067537264 }, { "epoch": 0.11994820050720337, "grad_norm": 0.49214011430740356, "learning_rate": 1.9578659782756954e-05, "loss": 0.4271366596221924, "step": 2223, "token_acc": 0.8532423208191127 }, { "epoch": 0.12000215831219986, "grad_norm": 0.4779313802719116, "learning_rate": 1.957815770945604e-05, "loss": 0.40208226442337036, "step": 2224, "token_acc": 0.8599757183326588 }, { "epoch": 0.12005611611719635, "grad_norm": 0.5024473071098328, "learning_rate": 1.9577655343640702e-05, "loss": 0.49585938453674316, "step": 2225, "token_acc": 0.8379520108144644 }, { "epoch": 0.12011007392219285, "grad_norm": 0.530918300151825, "learning_rate": 1.957715268532628e-05, "loss": 0.4431607127189636, "step": 2226, "token_acc": 0.8465227817745803 }, { "epoch": 0.12016403172718934, "grad_norm": 0.37498918175697327, "learning_rate": 1.9576649734528116e-05, "loss": 0.46031850576400757, "step": 2227, "token_acc": 0.8426335284980284 }, { "epoch": 0.12021798953218583, "grad_norm": 0.49081137776374817, "learning_rate": 1.9576146491261586e-05, "loss": 0.4391053020954132, "step": 2228, "token_acc": 0.851472672861868 }, { "epoch": 0.12027194733718233, "grad_norm": 0.4939853250980377, "learning_rate": 1.9575642955542046e-05, "loss": 0.42512843012809753, "step": 2229, "token_acc": 0.8546120058565154 }, { "epoch": 0.12032590514217882, "grad_norm": 0.4011235535144806, "learning_rate": 1.957513912738488e-05, "loss": 0.38248661160469055, "step": 2230, "token_acc": 0.8674013078227174 }, { "epoch": 0.12037986294717531, "grad_norm": 0.43199098110198975, "learning_rate": 1.9574635006805476e-05, "loss": 0.3813742697238922, "step": 2231, "token_acc": 0.8668440390219182 }, { "epoch": 0.12043382075217181, "grad_norm": 0.4631025195121765, "learning_rate": 1.9574130593819226e-05, "loss": 0.431221067905426, "step": 2232, "token_acc": 0.8548686962584243 }, { "epoch": 0.1204877785571683, "grad_norm": 0.4590606987476349, "learning_rate": 1.9573625888441535e-05, "loss": 0.45082515478134155, "step": 2233, "token_acc": 0.8479214172952114 }, { "epoch": 0.12054173636216478, "grad_norm": 0.3908350467681885, "learning_rate": 1.9573120890687818e-05, "loss": 0.3642517328262329, "step": 2234, "token_acc": 0.8760697305863708 }, { "epoch": 0.12059569416716127, "grad_norm": 0.5625681281089783, "learning_rate": 1.9572615600573496e-05, "loss": 0.529145359992981, "step": 2235, "token_acc": 0.8319681204339163 }, { "epoch": 0.12064965197215777, "grad_norm": 0.37510430812835693, "learning_rate": 1.9572110018114002e-05, "loss": 0.42087018489837646, "step": 2236, "token_acc": 0.857251444207966 }, { "epoch": 0.12070360977715426, "grad_norm": 0.5880196690559387, "learning_rate": 1.9571604143324773e-05, "loss": 0.4529602825641632, "step": 2237, "token_acc": 0.8434823977164605 }, { "epoch": 0.12075756758215075, "grad_norm": 0.4928356409072876, "learning_rate": 1.9571097976221262e-05, "loss": 0.4385177493095398, "step": 2238, "token_acc": 0.8516773733047823 }, { "epoch": 0.12081152538714725, "grad_norm": 0.5176191329956055, "learning_rate": 1.9570591516818925e-05, "loss": 0.5011320114135742, "step": 2239, "token_acc": 0.835981308411215 }, { "epoch": 0.12086548319214374, "grad_norm": 0.45148006081581116, "learning_rate": 1.9570084765133234e-05, "loss": 0.4311765730381012, "step": 2240, "token_acc": 0.8562854284761587 }, { "epoch": 0.12091944099714023, "grad_norm": 0.5254462361335754, "learning_rate": 1.956957772117966e-05, "loss": 0.5073646306991577, "step": 2241, "token_acc": 0.8302219608996031 }, { "epoch": 0.12097339880213673, "grad_norm": 0.48481959104537964, "learning_rate": 1.9569070384973686e-05, "loss": 0.4424521327018738, "step": 2242, "token_acc": 0.8473151750972763 }, { "epoch": 0.12102735660713322, "grad_norm": 0.44373178482055664, "learning_rate": 1.9568562756530815e-05, "loss": 0.4227702021598816, "step": 2243, "token_acc": 0.859916054564533 }, { "epoch": 0.12108131441212971, "grad_norm": 0.5129658579826355, "learning_rate": 1.956805483586654e-05, "loss": 0.41879457235336304, "step": 2244, "token_acc": 0.8549618320610687 }, { "epoch": 0.12113527221712621, "grad_norm": 0.551692008972168, "learning_rate": 1.956754662299638e-05, "loss": 0.45156461000442505, "step": 2245, "token_acc": 0.849540261911396 }, { "epoch": 0.1211892300221227, "grad_norm": 0.47003892064094543, "learning_rate": 1.9567038117935848e-05, "loss": 0.39317256212234497, "step": 2246, "token_acc": 0.859805094888015 }, { "epoch": 0.1212431878271192, "grad_norm": 0.5147438049316406, "learning_rate": 1.956652932070048e-05, "loss": 0.49197784066200256, "step": 2247, "token_acc": 0.8366497593965405 }, { "epoch": 0.12129714563211569, "grad_norm": 0.6271752715110779, "learning_rate": 1.9566020231305813e-05, "loss": 0.4137378931045532, "step": 2248, "token_acc": 0.854210898796886 }, { "epoch": 0.12135110343711218, "grad_norm": 0.44571608304977417, "learning_rate": 1.9565510849767395e-05, "loss": 0.4488014876842499, "step": 2249, "token_acc": 0.8482551487414187 }, { "epoch": 0.12140506124210868, "grad_norm": 0.5111600756645203, "learning_rate": 1.9565001176100778e-05, "loss": 0.4126659035682678, "step": 2250, "token_acc": 0.8613119834710744 }, { "epoch": 0.12145901904710517, "grad_norm": 0.48918598890304565, "learning_rate": 1.9564491210321534e-05, "loss": 0.475172221660614, "step": 2251, "token_acc": 0.8422395676973882 }, { "epoch": 0.12151297685210166, "grad_norm": 0.4685227870941162, "learning_rate": 1.9563980952445235e-05, "loss": 0.472589910030365, "step": 2252, "token_acc": 0.8465227817745803 }, { "epoch": 0.12156693465709816, "grad_norm": 0.3874412775039673, "learning_rate": 1.9563470402487462e-05, "loss": 0.4443509578704834, "step": 2253, "token_acc": 0.8447980662705207 }, { "epoch": 0.12162089246209465, "grad_norm": 0.5485396981239319, "learning_rate": 1.956295956046381e-05, "loss": 0.42277446389198303, "step": 2254, "token_acc": 0.8556332787617205 }, { "epoch": 0.12167485026709113, "grad_norm": 0.43708351254463196, "learning_rate": 1.9562448426389873e-05, "loss": 0.34500035643577576, "step": 2255, "token_acc": 0.8814366868242625 }, { "epoch": 0.12172880807208762, "grad_norm": 0.6666696667671204, "learning_rate": 1.956193700028127e-05, "loss": 0.4801725447177887, "step": 2256, "token_acc": 0.8455944529003889 }, { "epoch": 0.12178276587708411, "grad_norm": 0.44135069847106934, "learning_rate": 1.956142528215361e-05, "loss": 0.5250922441482544, "step": 2257, "token_acc": 0.8241277016527758 }, { "epoch": 0.12183672368208061, "grad_norm": 0.4591163396835327, "learning_rate": 1.9560913272022534e-05, "loss": 0.4602561593055725, "step": 2258, "token_acc": 0.8498236880315287 }, { "epoch": 0.1218906814870771, "grad_norm": 0.47100451588630676, "learning_rate": 1.956040096990367e-05, "loss": 0.43869519233703613, "step": 2259, "token_acc": 0.8538645418326694 }, { "epoch": 0.1219446392920736, "grad_norm": 0.578598141670227, "learning_rate": 1.9559888375812665e-05, "loss": 0.4359930753707886, "step": 2260, "token_acc": 0.8495693779904306 }, { "epoch": 0.12199859709707009, "grad_norm": 0.5101397037506104, "learning_rate": 1.955937548976517e-05, "loss": 0.4324559271335602, "step": 2261, "token_acc": 0.851858438555931 }, { "epoch": 0.12205255490206658, "grad_norm": 0.5753645300865173, "learning_rate": 1.9558862311776852e-05, "loss": 0.4999874234199524, "step": 2262, "token_acc": 0.8322396576319544 }, { "epoch": 0.12210651270706308, "grad_norm": 0.5357202887535095, "learning_rate": 1.9558348841863387e-05, "loss": 0.4649437665939331, "step": 2263, "token_acc": 0.8466725300498388 }, { "epoch": 0.12216047051205957, "grad_norm": 0.4298173487186432, "learning_rate": 1.955783508004045e-05, "loss": 0.48444071412086487, "step": 2264, "token_acc": 0.8424776564051638 }, { "epoch": 0.12221442831705606, "grad_norm": 0.39861589670181274, "learning_rate": 1.955732102632373e-05, "loss": 0.44078004360198975, "step": 2265, "token_acc": 0.8509151879279028 }, { "epoch": 0.12226838612205256, "grad_norm": 0.4291832745075226, "learning_rate": 1.9556806680728936e-05, "loss": 0.4755994379520416, "step": 2266, "token_acc": 0.8380053113012688 }, { "epoch": 0.12232234392704905, "grad_norm": 0.5472632646560669, "learning_rate": 1.9556292043271768e-05, "loss": 0.44324618577957153, "step": 2267, "token_acc": 0.8485200232153222 }, { "epoch": 0.12237630173204554, "grad_norm": 0.3492376506328583, "learning_rate": 1.955577711396794e-05, "loss": 0.43383505940437317, "step": 2268, "token_acc": 0.8558682223747426 }, { "epoch": 0.12243025953704204, "grad_norm": 0.5116091370582581, "learning_rate": 1.955526189283319e-05, "loss": 0.4811728596687317, "step": 2269, "token_acc": 0.8382758061902447 }, { "epoch": 0.12248421734203853, "grad_norm": 0.5539095997810364, "learning_rate": 1.955474637988324e-05, "loss": 0.48901981115341187, "step": 2270, "token_acc": 0.8346210995542348 }, { "epoch": 0.12253817514703502, "grad_norm": 0.5283357501029968, "learning_rate": 1.955423057513384e-05, "loss": 0.4939725697040558, "step": 2271, "token_acc": 0.8319933462711394 }, { "epoch": 0.12259213295203152, "grad_norm": 0.5860022306442261, "learning_rate": 1.9553714478600738e-05, "loss": 0.4111591577529907, "step": 2272, "token_acc": 0.8587400177462289 }, { "epoch": 0.12264609075702801, "grad_norm": 0.46015897393226624, "learning_rate": 1.9553198090299698e-05, "loss": 0.47737953066825867, "step": 2273, "token_acc": 0.8400926998841252 }, { "epoch": 0.1227000485620245, "grad_norm": 0.532599151134491, "learning_rate": 1.955268141024649e-05, "loss": 0.4769189953804016, "step": 2274, "token_acc": 0.8377445339470656 }, { "epoch": 0.122754006367021, "grad_norm": 0.4717537760734558, "learning_rate": 1.9552164438456902e-05, "loss": 0.49636682868003845, "step": 2275, "token_acc": 0.8316770186335404 }, { "epoch": 0.12280796417201749, "grad_norm": 0.34744375944137573, "learning_rate": 1.955164717494671e-05, "loss": 0.4688619375228882, "step": 2276, "token_acc": 0.8398280278082693 }, { "epoch": 0.12286192197701397, "grad_norm": 0.42387285828590393, "learning_rate": 1.9551129619731716e-05, "loss": 0.4612000286579132, "step": 2277, "token_acc": 0.8456634347190862 }, { "epoch": 0.12291587978201046, "grad_norm": 0.4206099510192871, "learning_rate": 1.9550611772827724e-05, "loss": 0.4445059299468994, "step": 2278, "token_acc": 0.8560781020365316 }, { "epoch": 0.12296983758700696, "grad_norm": 0.506676971912384, "learning_rate": 1.955009363425055e-05, "loss": 0.4720310568809509, "step": 2279, "token_acc": 0.8363203584430131 }, { "epoch": 0.12302379539200345, "grad_norm": 0.412205308675766, "learning_rate": 1.9549575204016022e-05, "loss": 0.42099666595458984, "step": 2280, "token_acc": 0.8560072973677352 }, { "epoch": 0.12307775319699994, "grad_norm": 0.5495991110801697, "learning_rate": 1.9549056482139965e-05, "loss": 0.40404725074768066, "step": 2281, "token_acc": 0.8617195161817588 }, { "epoch": 0.12313171100199644, "grad_norm": 0.40373656153678894, "learning_rate": 1.9548537468638226e-05, "loss": 0.4391443133354187, "step": 2282, "token_acc": 0.8524301479220474 }, { "epoch": 0.12318566880699293, "grad_norm": 0.5377811789512634, "learning_rate": 1.9548018163526655e-05, "loss": 0.5078272819519043, "step": 2283, "token_acc": 0.8323828070663514 }, { "epoch": 0.12323962661198942, "grad_norm": 0.3370110094547272, "learning_rate": 1.954749856682111e-05, "loss": 0.36896389722824097, "step": 2284, "token_acc": 0.8713494084169537 }, { "epoch": 0.12329358441698592, "grad_norm": 0.5376613140106201, "learning_rate": 1.9546978678537458e-05, "loss": 0.3983527421951294, "step": 2285, "token_acc": 0.8604510367791736 }, { "epoch": 0.12334754222198241, "grad_norm": 0.3733564019203186, "learning_rate": 1.9546458498691578e-05, "loss": 0.41217362880706787, "step": 2286, "token_acc": 0.8640385261507447 }, { "epoch": 0.1234015000269789, "grad_norm": 0.5489059090614319, "learning_rate": 1.954593802729936e-05, "loss": 0.48979872465133667, "step": 2287, "token_acc": 0.8425613159054193 }, { "epoch": 0.1234554578319754, "grad_norm": 0.4835040271282196, "learning_rate": 1.954541726437669e-05, "loss": 0.4545547068119049, "step": 2288, "token_acc": 0.8460147447489219 }, { "epoch": 0.12350941563697189, "grad_norm": 0.5538074970245361, "learning_rate": 1.954489620993948e-05, "loss": 0.5048866868019104, "step": 2289, "token_acc": 0.8312268803945746 }, { "epoch": 0.12356337344196838, "grad_norm": 0.5210657715797424, "learning_rate": 1.954437486400364e-05, "loss": 0.39167430996894836, "step": 2290, "token_acc": 0.8684098939929329 }, { "epoch": 0.12361733124696488, "grad_norm": 0.5626885294914246, "learning_rate": 1.9543853226585094e-05, "loss": 0.38846904039382935, "step": 2291, "token_acc": 0.8647163120567376 }, { "epoch": 0.12367128905196137, "grad_norm": 0.38177627325057983, "learning_rate": 1.954333129769977e-05, "loss": 0.4438645839691162, "step": 2292, "token_acc": 0.8447142857142858 }, { "epoch": 0.12372524685695786, "grad_norm": 0.4895727336406708, "learning_rate": 1.954280907736361e-05, "loss": 0.44644895195961, "step": 2293, "token_acc": 0.8502381619501261 }, { "epoch": 0.12377920466195436, "grad_norm": 0.6004457473754883, "learning_rate": 1.9542286565592555e-05, "loss": 0.48075345158576965, "step": 2294, "token_acc": 0.838662486938349 }, { "epoch": 0.12383316246695085, "grad_norm": 0.28385645151138306, "learning_rate": 1.954176376240257e-05, "loss": 0.39236974716186523, "step": 2295, "token_acc": 0.8625709087017018 }, { "epoch": 0.12388712027194734, "grad_norm": 0.3624039888381958, "learning_rate": 1.954124066780962e-05, "loss": 0.40516722202301025, "step": 2296, "token_acc": 0.8628485305199699 }, { "epoch": 0.12394107807694384, "grad_norm": 0.41691678762435913, "learning_rate": 1.9540717281829682e-05, "loss": 0.41164302825927734, "step": 2297, "token_acc": 0.8604651162790697 }, { "epoch": 0.12399503588194032, "grad_norm": 0.46472448110580444, "learning_rate": 1.9540193604478734e-05, "loss": 0.4389374554157257, "step": 2298, "token_acc": 0.851935362645622 }, { "epoch": 0.12404899368693681, "grad_norm": 0.4679414629936218, "learning_rate": 1.9539669635772773e-05, "loss": 0.4597786068916321, "step": 2299, "token_acc": 0.8452205882352941 }, { "epoch": 0.1241029514919333, "grad_norm": 0.4034920632839203, "learning_rate": 1.95391453757278e-05, "loss": 0.4412786364555359, "step": 2300, "token_acc": 0.848964545158842 }, { "epoch": 0.1241569092969298, "grad_norm": 0.5231752395629883, "learning_rate": 1.9538620824359828e-05, "loss": 0.480713814496994, "step": 2301, "token_acc": 0.8386128709569857 }, { "epoch": 0.12421086710192629, "grad_norm": 0.39276543259620667, "learning_rate": 1.9538095981684875e-05, "loss": 0.4113045930862427, "step": 2302, "token_acc": 0.85978898007034 }, { "epoch": 0.12426482490692278, "grad_norm": 0.47458145022392273, "learning_rate": 1.9537570847718968e-05, "loss": 0.4289483428001404, "step": 2303, "token_acc": 0.851426499032882 }, { "epoch": 0.12431878271191928, "grad_norm": 0.40385517477989197, "learning_rate": 1.9537045422478146e-05, "loss": 0.3952593207359314, "step": 2304, "token_acc": 0.866387985756309 }, { "epoch": 0.12437274051691577, "grad_norm": 0.454215943813324, "learning_rate": 1.9536519705978455e-05, "loss": 0.39820003509521484, "step": 2305, "token_acc": 0.8622685185185185 }, { "epoch": 0.12442669832191226, "grad_norm": 0.4923878312110901, "learning_rate": 1.953599369823595e-05, "loss": 0.42584311962127686, "step": 2306, "token_acc": 0.8531733981172184 }, { "epoch": 0.12448065612690876, "grad_norm": 0.5119020342826843, "learning_rate": 1.95354673992667e-05, "loss": 0.42338860034942627, "step": 2307, "token_acc": 0.862614637655098 }, { "epoch": 0.12453461393190525, "grad_norm": 0.5145005583763123, "learning_rate": 1.953494080908677e-05, "loss": 0.42284998297691345, "step": 2308, "token_acc": 0.85798729126644 }, { "epoch": 0.12458857173690174, "grad_norm": 0.4122588634490967, "learning_rate": 1.9534413927712242e-05, "loss": 0.4331650137901306, "step": 2309, "token_acc": 0.8508697855092046 }, { "epoch": 0.12464252954189824, "grad_norm": 0.5992818474769592, "learning_rate": 1.9533886755159215e-05, "loss": 0.44458091259002686, "step": 2310, "token_acc": 0.8454480028787333 }, { "epoch": 0.12469648734689473, "grad_norm": 0.6095002889633179, "learning_rate": 1.9533359291443782e-05, "loss": 0.46245238184928894, "step": 2311, "token_acc": 0.8435722411831627 }, { "epoch": 0.12475044515189122, "grad_norm": 0.49173203110694885, "learning_rate": 1.953283153658206e-05, "loss": 0.4805088937282562, "step": 2312, "token_acc": 0.8377350044762757 }, { "epoch": 0.12480440295688772, "grad_norm": 0.4874899089336395, "learning_rate": 1.953230349059015e-05, "loss": 0.4667659401893616, "step": 2313, "token_acc": 0.8390083163345363 }, { "epoch": 0.12485836076188421, "grad_norm": 0.49100685119628906, "learning_rate": 1.9531775153484193e-05, "loss": 0.5140000581741333, "step": 2314, "token_acc": 0.8307820762593933 }, { "epoch": 0.1249123185668807, "grad_norm": 0.36678770184516907, "learning_rate": 1.953124652528032e-05, "loss": 0.4000677466392517, "step": 2315, "token_acc": 0.8632567849686847 }, { "epoch": 0.1249662763718772, "grad_norm": 0.5358384251594543, "learning_rate": 1.9530717605994674e-05, "loss": 0.5075674057006836, "step": 2316, "token_acc": 0.8334328358208956 }, { "epoch": 0.1250202341768737, "grad_norm": 0.37734270095825195, "learning_rate": 1.953018839564341e-05, "loss": 0.44139641523361206, "step": 2317, "token_acc": 0.8471391972672929 }, { "epoch": 0.12507419198187017, "grad_norm": 0.5790755152702332, "learning_rate": 1.952965889424269e-05, "loss": 0.4069178104400635, "step": 2318, "token_acc": 0.8605664488017429 }, { "epoch": 0.12512814978686668, "grad_norm": 0.49029165506362915, "learning_rate": 1.9529129101808683e-05, "loss": 0.4567506015300751, "step": 2319, "token_acc": 0.8487667995864717 }, { "epoch": 0.12518210759186316, "grad_norm": 0.44071516394615173, "learning_rate": 1.952859901835757e-05, "loss": 0.42189204692840576, "step": 2320, "token_acc": 0.8505878979065099 }, { "epoch": 0.12523606539685966, "grad_norm": 0.3995494246482849, "learning_rate": 1.952806864390554e-05, "loss": 0.4910895824432373, "step": 2321, "token_acc": 0.8341774939440652 }, { "epoch": 0.12529002320185614, "grad_norm": 0.39518120884895325, "learning_rate": 1.9527537978468785e-05, "loss": 0.48452532291412354, "step": 2322, "token_acc": 0.8403274002858256 }, { "epoch": 0.12534398100685265, "grad_norm": 0.4762212336063385, "learning_rate": 1.952700702206352e-05, "loss": 0.4442089796066284, "step": 2323, "token_acc": 0.8509177972865124 }, { "epoch": 0.12539793881184913, "grad_norm": 0.4028625786304474, "learning_rate": 1.9526475774705954e-05, "loss": 0.36157649755477905, "step": 2324, "token_acc": 0.8678960603520537 }, { "epoch": 0.12545189661684564, "grad_norm": 0.5203203558921814, "learning_rate": 1.9525944236412316e-05, "loss": 0.5263938307762146, "step": 2325, "token_acc": 0.8304721030042919 }, { "epoch": 0.12550585442184212, "grad_norm": 0.4376099705696106, "learning_rate": 1.9525412407198835e-05, "loss": 0.45991721749305725, "step": 2326, "token_acc": 0.8459147424511545 }, { "epoch": 0.12555981222683862, "grad_norm": 0.6120128035545349, "learning_rate": 1.9524880287081755e-05, "loss": 0.4810846745967865, "step": 2327, "token_acc": 0.8487344615845439 }, { "epoch": 0.1256137700318351, "grad_norm": 0.4339387118816376, "learning_rate": 1.9524347876077325e-05, "loss": 0.4365231990814209, "step": 2328, "token_acc": 0.8545310015898251 }, { "epoch": 0.1256677278368316, "grad_norm": 0.47013139724731445, "learning_rate": 1.9523815174201804e-05, "loss": 0.42649316787719727, "step": 2329, "token_acc": 0.8559914661609577 }, { "epoch": 0.1257216856418281, "grad_norm": 0.4169517457485199, "learning_rate": 1.9523282181471467e-05, "loss": 0.459593266248703, "step": 2330, "token_acc": 0.8446612706508482 }, { "epoch": 0.12577564344682457, "grad_norm": 0.4647020995616913, "learning_rate": 1.9522748897902585e-05, "loss": 0.44675907492637634, "step": 2331, "token_acc": 0.8488941665429717 }, { "epoch": 0.12582960125182108, "grad_norm": 0.49505364894866943, "learning_rate": 1.952221532351145e-05, "loss": 0.4782964587211609, "step": 2332, "token_acc": 0.8430939226519337 }, { "epoch": 0.12588355905681756, "grad_norm": 0.4428936839103699, "learning_rate": 1.9521681458314347e-05, "loss": 0.4012030363082886, "step": 2333, "token_acc": 0.8606986281996889 }, { "epoch": 0.12593751686181406, "grad_norm": 0.5224859118461609, "learning_rate": 1.952114730232759e-05, "loss": 0.49540236592292786, "step": 2334, "token_acc": 0.841897233201581 }, { "epoch": 0.12599147466681054, "grad_norm": 0.3715202510356903, "learning_rate": 1.9520612855567485e-05, "loss": 0.40846121311187744, "step": 2335, "token_acc": 0.8558041112454655 }, { "epoch": 0.12604543247180705, "grad_norm": 0.4518793821334839, "learning_rate": 1.952007811805036e-05, "loss": 0.45167768001556396, "step": 2336, "token_acc": 0.850762396446152 }, { "epoch": 0.12609939027680353, "grad_norm": 0.4651530086994171, "learning_rate": 1.951954308979255e-05, "loss": 0.3984464406967163, "step": 2337, "token_acc": 0.8592087312414733 }, { "epoch": 0.12615334808180004, "grad_norm": 0.5977871417999268, "learning_rate": 1.951900777081038e-05, "loss": 0.41125476360321045, "step": 2338, "token_acc": 0.8552074513124471 }, { "epoch": 0.12620730588679652, "grad_norm": 0.5385484099388123, "learning_rate": 1.9518472161120208e-05, "loss": 0.4300631582736969, "step": 2339, "token_acc": 0.8603126575895108 }, { "epoch": 0.12626126369179302, "grad_norm": 0.38186609745025635, "learning_rate": 1.9517936260738388e-05, "loss": 0.39362773299217224, "step": 2340, "token_acc": 0.8689231432575186 }, { "epoch": 0.1263152214967895, "grad_norm": 0.360442578792572, "learning_rate": 1.9517400069681288e-05, "loss": 0.4082815647125244, "step": 2341, "token_acc": 0.8614589003810561 }, { "epoch": 0.126369179301786, "grad_norm": 0.5285874009132385, "learning_rate": 1.9516863587965286e-05, "loss": 0.4537615478038788, "step": 2342, "token_acc": 0.8407066284161256 }, { "epoch": 0.1264231371067825, "grad_norm": 0.3909578323364258, "learning_rate": 1.951632681560676e-05, "loss": 0.43111443519592285, "step": 2343, "token_acc": 0.8508969814158341 }, { "epoch": 0.126477094911779, "grad_norm": 0.4967140555381775, "learning_rate": 1.951578975262211e-05, "loss": 0.431857168674469, "step": 2344, "token_acc": 0.8602053915275995 }, { "epoch": 0.12653105271677548, "grad_norm": 0.42713093757629395, "learning_rate": 1.951525239902773e-05, "loss": 0.5078609585762024, "step": 2345, "token_acc": 0.8315890939135224 }, { "epoch": 0.12658501052177198, "grad_norm": 0.4154423773288727, "learning_rate": 1.9514714754840038e-05, "loss": 0.4945001006126404, "step": 2346, "token_acc": 0.8393659180977543 }, { "epoch": 0.12663896832676846, "grad_norm": 0.41616880893707275, "learning_rate": 1.9514176820075444e-05, "loss": 0.4664967656135559, "step": 2347, "token_acc": 0.8475801965510847 }, { "epoch": 0.12669292613176497, "grad_norm": 0.5145744681358337, "learning_rate": 1.951363859475039e-05, "loss": 0.4034477472305298, "step": 2348, "token_acc": 0.8576485932550774 }, { "epoch": 0.12674688393676145, "grad_norm": 0.49810877442359924, "learning_rate": 1.9513100078881303e-05, "loss": 0.4579707384109497, "step": 2349, "token_acc": 0.8466212211025489 }, { "epoch": 0.12680084174175796, "grad_norm": 0.4538928270339966, "learning_rate": 1.9512561272484628e-05, "loss": 0.4743812680244446, "step": 2350, "token_acc": 0.842632140950347 }, { "epoch": 0.12685479954675444, "grad_norm": 0.5317602753639221, "learning_rate": 1.9512022175576827e-05, "loss": 0.49467578530311584, "step": 2351, "token_acc": 0.8307607497243661 }, { "epoch": 0.12690875735175092, "grad_norm": 0.40199026465415955, "learning_rate": 1.9511482788174358e-05, "loss": 0.4708728790283203, "step": 2352, "token_acc": 0.8434739330595994 }, { "epoch": 0.12696271515674742, "grad_norm": 0.5437562465667725, "learning_rate": 1.95109431102937e-05, "loss": 0.5183001160621643, "step": 2353, "token_acc": 0.8241488747836122 }, { "epoch": 0.1270166729617439, "grad_norm": 0.46924617886543274, "learning_rate": 1.9510403141951325e-05, "loss": 0.5208459496498108, "step": 2354, "token_acc": 0.8282122905027933 }, { "epoch": 0.1270706307667404, "grad_norm": 0.36419677734375, "learning_rate": 1.9509862883163734e-05, "loss": 0.3697183430194855, "step": 2355, "token_acc": 0.8741164591046786 }, { "epoch": 0.1271245885717369, "grad_norm": 0.4626075327396393, "learning_rate": 1.9509322333947422e-05, "loss": 0.3992752432823181, "step": 2356, "token_acc": 0.866988387875132 }, { "epoch": 0.1271785463767334, "grad_norm": 0.485757052898407, "learning_rate": 1.9508781494318897e-05, "loss": 0.3878867030143738, "step": 2357, "token_acc": 0.8646834477498093 }, { "epoch": 0.12723250418172988, "grad_norm": 0.42418602108955383, "learning_rate": 1.9508240364294674e-05, "loss": 0.4687574505805969, "step": 2358, "token_acc": 0.8458531818737022 }, { "epoch": 0.12728646198672638, "grad_norm": 0.338451623916626, "learning_rate": 1.9507698943891284e-05, "loss": 0.3532504439353943, "step": 2359, "token_acc": 0.8794181620445894 }, { "epoch": 0.12734041979172286, "grad_norm": 0.45469340682029724, "learning_rate": 1.9507157233125257e-05, "loss": 0.42936477065086365, "step": 2360, "token_acc": 0.8518263266712612 }, { "epoch": 0.12739437759671937, "grad_norm": 0.5526532530784607, "learning_rate": 1.9506615232013137e-05, "loss": 0.44668054580688477, "step": 2361, "token_acc": 0.8488799732530926 }, { "epoch": 0.12744833540171585, "grad_norm": 0.47654175758361816, "learning_rate": 1.950607294057148e-05, "loss": 0.410961389541626, "step": 2362, "token_acc": 0.8612146459109844 }, { "epoch": 0.12750229320671236, "grad_norm": 0.47438183426856995, "learning_rate": 1.9505530358816848e-05, "loss": 0.45833098888397217, "step": 2363, "token_acc": 0.8441725692208628 }, { "epoch": 0.12755625101170884, "grad_norm": 0.4400511682033539, "learning_rate": 1.9504987486765804e-05, "loss": 0.5017440319061279, "step": 2364, "token_acc": 0.8299626633478532 }, { "epoch": 0.12761020881670534, "grad_norm": 0.45354586839675903, "learning_rate": 1.9504444324434936e-05, "loss": 0.4411167502403259, "step": 2365, "token_acc": 0.8499496475327291 }, { "epoch": 0.12766416662170182, "grad_norm": 0.4550233483314514, "learning_rate": 1.950390087184083e-05, "loss": 0.42800042033195496, "step": 2366, "token_acc": 0.854848245959795 }, { "epoch": 0.12771812442669833, "grad_norm": 0.49381110072135925, "learning_rate": 1.9503357129000075e-05, "loss": 0.4199662208557129, "step": 2367, "token_acc": 0.8520035618878006 }, { "epoch": 0.1277720822316948, "grad_norm": 0.35409417748451233, "learning_rate": 1.9502813095929288e-05, "loss": 0.48485198616981506, "step": 2368, "token_acc": 0.8440034762456546 }, { "epoch": 0.12782604003669132, "grad_norm": 0.4836750328540802, "learning_rate": 1.9502268772645074e-05, "loss": 0.4055563807487488, "step": 2369, "token_acc": 0.8653870438186639 }, { "epoch": 0.1278799978416878, "grad_norm": 0.6455299258232117, "learning_rate": 1.9501724159164063e-05, "loss": 0.5090023279190063, "step": 2370, "token_acc": 0.8323442136498517 }, { "epoch": 0.1279339556466843, "grad_norm": 0.5221538543701172, "learning_rate": 1.9501179255502886e-05, "loss": 0.41846591234207153, "step": 2371, "token_acc": 0.857243691547556 }, { "epoch": 0.12798791345168078, "grad_norm": 0.4175143241882324, "learning_rate": 1.9500634061678182e-05, "loss": 0.42346787452697754, "step": 2372, "token_acc": 0.8586054721977052 }, { "epoch": 0.1280418712566773, "grad_norm": 0.4289587736129761, "learning_rate": 1.9500088577706604e-05, "loss": 0.45378077030181885, "step": 2373, "token_acc": 0.8443446088794926 }, { "epoch": 0.12809582906167377, "grad_norm": 0.4475773274898529, "learning_rate": 1.949954280360481e-05, "loss": 0.37468045949935913, "step": 2374, "token_acc": 0.8698393813206424 }, { "epoch": 0.12814978686667025, "grad_norm": 0.50013267993927, "learning_rate": 1.9498996739389466e-05, "loss": 0.46226799488067627, "step": 2375, "token_acc": 0.8471188309897987 }, { "epoch": 0.12820374467166676, "grad_norm": 0.47172456979751587, "learning_rate": 1.949845038507725e-05, "loss": 0.5198304653167725, "step": 2376, "token_acc": 0.824575586095392 }, { "epoch": 0.12825770247666324, "grad_norm": 0.4733984172344208, "learning_rate": 1.949790374068485e-05, "loss": 0.4118073582649231, "step": 2377, "token_acc": 0.8571097462046587 }, { "epoch": 0.12831166028165975, "grad_norm": 0.5057148337364197, "learning_rate": 1.9497356806228955e-05, "loss": 0.42809808254241943, "step": 2378, "token_acc": 0.8516118836915297 }, { "epoch": 0.12836561808665622, "grad_norm": 0.47511836886405945, "learning_rate": 1.949680958172627e-05, "loss": 0.437177449464798, "step": 2379, "token_acc": 0.8538071065989847 }, { "epoch": 0.12841957589165273, "grad_norm": 0.42894190549850464, "learning_rate": 1.949626206719351e-05, "loss": 0.4254951477050781, "step": 2380, "token_acc": 0.8562737642585552 }, { "epoch": 0.1284735336966492, "grad_norm": 0.5734050273895264, "learning_rate": 1.9495714262647396e-05, "loss": 0.37663328647613525, "step": 2381, "token_acc": 0.8711535297021556 }, { "epoch": 0.12852749150164572, "grad_norm": 0.5101038217544556, "learning_rate": 1.9495166168104652e-05, "loss": 0.4287472665309906, "step": 2382, "token_acc": 0.8562896603210153 }, { "epoch": 0.1285814493066422, "grad_norm": 0.4969823956489563, "learning_rate": 1.9494617783582025e-05, "loss": 0.49824875593185425, "step": 2383, "token_acc": 0.8355989311025909 }, { "epoch": 0.1286354071116387, "grad_norm": 0.5126357078552246, "learning_rate": 1.9494069109096253e-05, "loss": 0.44336405396461487, "step": 2384, "token_acc": 0.8492019812878371 }, { "epoch": 0.12868936491663518, "grad_norm": 0.4625479578971863, "learning_rate": 1.9493520144664103e-05, "loss": 0.4415585994720459, "step": 2385, "token_acc": 0.8502923976608188 }, { "epoch": 0.1287433227216317, "grad_norm": 0.513480007648468, "learning_rate": 1.949297089030233e-05, "loss": 0.4165572226047516, "step": 2386, "token_acc": 0.8538694992412746 }, { "epoch": 0.12879728052662817, "grad_norm": 0.44938111305236816, "learning_rate": 1.949242134602772e-05, "loss": 0.3920571804046631, "step": 2387, "token_acc": 0.8639488030338943 }, { "epoch": 0.12885123833162468, "grad_norm": 0.4425898790359497, "learning_rate": 1.9491871511857043e-05, "loss": 0.4461216926574707, "step": 2388, "token_acc": 0.8462947005601034 }, { "epoch": 0.12890519613662116, "grad_norm": 0.429879367351532, "learning_rate": 1.9491321387807094e-05, "loss": 0.43476125597953796, "step": 2389, "token_acc": 0.8514644351464435 }, { "epoch": 0.12895915394161767, "grad_norm": 0.384348601102829, "learning_rate": 1.9490770973894682e-05, "loss": 0.3928080201148987, "step": 2390, "token_acc": 0.8665072974333166 }, { "epoch": 0.12901311174661415, "grad_norm": 0.5286203026771545, "learning_rate": 1.9490220270136605e-05, "loss": 0.4661748707294464, "step": 2391, "token_acc": 0.847489150650961 }, { "epoch": 0.12906706955161065, "grad_norm": 0.39397019147872925, "learning_rate": 1.9489669276549694e-05, "loss": 0.46210986375808716, "step": 2392, "token_acc": 0.8500469704086425 }, { "epoch": 0.12912102735660713, "grad_norm": 0.5176302194595337, "learning_rate": 1.9489117993150765e-05, "loss": 0.4810468554496765, "step": 2393, "token_acc": 0.8413621262458472 }, { "epoch": 0.12917498516160364, "grad_norm": 0.5757451057434082, "learning_rate": 1.9488566419956657e-05, "loss": 0.438798189163208, "step": 2394, "token_acc": 0.852788237403622 }, { "epoch": 0.12922894296660012, "grad_norm": 0.4596015214920044, "learning_rate": 1.948801455698422e-05, "loss": 0.48407936096191406, "step": 2395, "token_acc": 0.8390317700453858 }, { "epoch": 0.1292829007715966, "grad_norm": 0.46660733222961426, "learning_rate": 1.94874624042503e-05, "loss": 0.47134771943092346, "step": 2396, "token_acc": 0.8466937569264869 }, { "epoch": 0.1293368585765931, "grad_norm": 0.3681814670562744, "learning_rate": 1.948690996177177e-05, "loss": 0.37317711114883423, "step": 2397, "token_acc": 0.8663529411764705 }, { "epoch": 0.12939081638158959, "grad_norm": 0.38189396262168884, "learning_rate": 1.9486357229565488e-05, "loss": 0.37474969029426575, "step": 2398, "token_acc": 0.8642964446670005 }, { "epoch": 0.1294447741865861, "grad_norm": 0.39651480317115784, "learning_rate": 1.9485804207648345e-05, "loss": 0.4170895218849182, "step": 2399, "token_acc": 0.8541023000343289 }, { "epoch": 0.12949873199158257, "grad_norm": 0.4101705849170685, "learning_rate": 1.9485250896037223e-05, "loss": 0.45129406452178955, "step": 2400, "token_acc": 0.8493605713336655 }, { "epoch": 0.12955268979657908, "grad_norm": 0.4830576181411743, "learning_rate": 1.9484697294749026e-05, "loss": 0.444887638092041, "step": 2401, "token_acc": 0.8454833597464342 }, { "epoch": 0.12960664760157556, "grad_norm": 0.502309262752533, "learning_rate": 1.9484143403800657e-05, "loss": 0.42146041989326477, "step": 2402, "token_acc": 0.8533109807208717 }, { "epoch": 0.12966060540657207, "grad_norm": 0.45088431239128113, "learning_rate": 1.9483589223209036e-05, "loss": 0.34559938311576843, "step": 2403, "token_acc": 0.8757763975155279 }, { "epoch": 0.12971456321156855, "grad_norm": 0.5064720511436462, "learning_rate": 1.9483034752991083e-05, "loss": 0.3780284523963928, "step": 2404, "token_acc": 0.8716486023958927 }, { "epoch": 0.12976852101656505, "grad_norm": 0.510047972202301, "learning_rate": 1.9482479993163733e-05, "loss": 0.40935564041137695, "step": 2405, "token_acc": 0.8559850374064838 }, { "epoch": 0.12982247882156153, "grad_norm": 0.46529093384742737, "learning_rate": 1.9481924943743923e-05, "loss": 0.4143674969673157, "step": 2406, "token_acc": 0.8575819672131147 }, { "epoch": 0.12987643662655804, "grad_norm": 0.4579111933708191, "learning_rate": 1.9481369604748613e-05, "loss": 0.4475744068622589, "step": 2407, "token_acc": 0.8491773308957953 }, { "epoch": 0.12993039443155452, "grad_norm": 0.48567697405815125, "learning_rate": 1.9480813976194762e-05, "loss": 0.48356401920318604, "step": 2408, "token_acc": 0.8402466367713004 }, { "epoch": 0.12998435223655103, "grad_norm": 0.5301995277404785, "learning_rate": 1.9480258058099334e-05, "loss": 0.4446042776107788, "step": 2409, "token_acc": 0.8487179487179487 }, { "epoch": 0.1300383100415475, "grad_norm": 0.4882475733757019, "learning_rate": 1.9479701850479306e-05, "loss": 0.4098571538925171, "step": 2410, "token_acc": 0.8606132424825952 }, { "epoch": 0.130092267846544, "grad_norm": 0.45573189854621887, "learning_rate": 1.947914535335167e-05, "loss": 0.36905723810195923, "step": 2411, "token_acc": 0.8687699278402417 }, { "epoch": 0.1301462256515405, "grad_norm": 0.4612475633621216, "learning_rate": 1.9478588566733418e-05, "loss": 0.4669225811958313, "step": 2412, "token_acc": 0.8423862271453356 }, { "epoch": 0.130200183456537, "grad_norm": 0.5494816899299622, "learning_rate": 1.9478031490641555e-05, "loss": 0.4557223320007324, "step": 2413, "token_acc": 0.8445150439170248 }, { "epoch": 0.13025414126153348, "grad_norm": 0.48473381996154785, "learning_rate": 1.947747412509309e-05, "loss": 0.46180784702301025, "step": 2414, "token_acc": 0.8418618529016858 }, { "epoch": 0.13030809906653, "grad_norm": 0.5709200501441956, "learning_rate": 1.9476916470105048e-05, "loss": 0.5066226720809937, "step": 2415, "token_acc": 0.834736556219446 }, { "epoch": 0.13036205687152647, "grad_norm": 0.5216948986053467, "learning_rate": 1.947635852569446e-05, "loss": 0.3972684442996979, "step": 2416, "token_acc": 0.865010073875084 }, { "epoch": 0.13041601467652295, "grad_norm": 0.47284284234046936, "learning_rate": 1.9475800291878366e-05, "loss": 0.44687390327453613, "step": 2417, "token_acc": 0.849895178197065 }, { "epoch": 0.13046997248151945, "grad_norm": 0.5634979009628296, "learning_rate": 1.9475241768673812e-05, "loss": 0.4131430983543396, "step": 2418, "token_acc": 0.8582242889585812 }, { "epoch": 0.13052393028651593, "grad_norm": 0.43090537190437317, "learning_rate": 1.9474682956097857e-05, "loss": 0.4238680601119995, "step": 2419, "token_acc": 0.8569545944650773 }, { "epoch": 0.13057788809151244, "grad_norm": 0.3491497039794922, "learning_rate": 1.9474123854167567e-05, "loss": 0.41536110639572144, "step": 2420, "token_acc": 0.8599608282036934 }, { "epoch": 0.13063184589650892, "grad_norm": 0.5192768573760986, "learning_rate": 1.947356446290002e-05, "loss": 0.4442591071128845, "step": 2421, "token_acc": 0.8451037017589919 }, { "epoch": 0.13068580370150543, "grad_norm": 0.6056039929389954, "learning_rate": 1.9473004782312294e-05, "loss": 0.47174909710884094, "step": 2422, "token_acc": 0.8361456483126111 }, { "epoch": 0.1307397615065019, "grad_norm": 0.506555438041687, "learning_rate": 1.947244481242148e-05, "loss": 0.47090205550193787, "step": 2423, "token_acc": 0.8386992396052418 }, { "epoch": 0.1307937193114984, "grad_norm": 0.3766075372695923, "learning_rate": 1.9471884553244688e-05, "loss": 0.48536911606788635, "step": 2424, "token_acc": 0.8382141142582813 }, { "epoch": 0.1308476771164949, "grad_norm": 0.4643734395503998, "learning_rate": 1.9471324004799018e-05, "loss": 0.5324627757072449, "step": 2425, "token_acc": 0.8273570712136409 }, { "epoch": 0.1309016349214914, "grad_norm": 0.3993270695209503, "learning_rate": 1.9470763167101595e-05, "loss": 0.3972333073616028, "step": 2426, "token_acc": 0.8590308370044053 }, { "epoch": 0.13095559272648788, "grad_norm": 0.3573533594608307, "learning_rate": 1.947020204016955e-05, "loss": 0.40797871351242065, "step": 2427, "token_acc": 0.8616743916570104 }, { "epoch": 0.1310095505314844, "grad_norm": 0.4517122209072113, "learning_rate": 1.946964062402001e-05, "loss": 0.4639146029949188, "step": 2428, "token_acc": 0.8451888613178936 }, { "epoch": 0.13106350833648087, "grad_norm": 0.5020076036453247, "learning_rate": 1.946907891867013e-05, "loss": 0.4119948148727417, "step": 2429, "token_acc": 0.8567581215283623 }, { "epoch": 0.13111746614147737, "grad_norm": 0.4914487302303314, "learning_rate": 1.9468516924137058e-05, "loss": 0.466819167137146, "step": 2430, "token_acc": 0.8453145057766367 }, { "epoch": 0.13117142394647385, "grad_norm": 0.4241062104701996, "learning_rate": 1.9467954640437964e-05, "loss": 0.4044259488582611, "step": 2431, "token_acc": 0.862760335716506 }, { "epoch": 0.13122538175147036, "grad_norm": 0.4223288595676422, "learning_rate": 1.946739206759001e-05, "loss": 0.45320844650268555, "step": 2432, "token_acc": 0.8503065649768207 }, { "epoch": 0.13127933955646684, "grad_norm": 0.3831244111061096, "learning_rate": 1.9466829205610384e-05, "loss": 0.45858216285705566, "step": 2433, "token_acc": 0.8452929558920342 }, { "epoch": 0.13133329736146335, "grad_norm": 0.519142210483551, "learning_rate": 1.9466266054516274e-05, "loss": 0.4447084665298462, "step": 2434, "token_acc": 0.8468264248704663 }, { "epoch": 0.13138725516645983, "grad_norm": 0.4350910186767578, "learning_rate": 1.946570261432488e-05, "loss": 0.4534563720226288, "step": 2435, "token_acc": 0.8490818925077067 }, { "epoch": 0.13144121297145633, "grad_norm": 0.38339969515800476, "learning_rate": 1.946513888505341e-05, "loss": 0.45169633626937866, "step": 2436, "token_acc": 0.853495186270406 }, { "epoch": 0.1314951707764528, "grad_norm": 0.5890203714370728, "learning_rate": 1.946457486671907e-05, "loss": 0.5440933108329773, "step": 2437, "token_acc": 0.8238272524199554 }, { "epoch": 0.1315491285814493, "grad_norm": 0.4416782557964325, "learning_rate": 1.94640105593391e-05, "loss": 0.4452458620071411, "step": 2438, "token_acc": 0.8472745541321276 }, { "epoch": 0.1316030863864458, "grad_norm": 0.42818760871887207, "learning_rate": 1.9463445962930724e-05, "loss": 0.44830936193466187, "step": 2439, "token_acc": 0.8461337171267826 }, { "epoch": 0.13165704419144228, "grad_norm": 0.5763605833053589, "learning_rate": 1.946288107751119e-05, "loss": 0.4522576332092285, "step": 2440, "token_acc": 0.8466907962771458 }, { "epoch": 0.1317110019964388, "grad_norm": 0.4359458386898041, "learning_rate": 1.9462315903097742e-05, "loss": 0.3925427794456482, "step": 2441, "token_acc": 0.8647926914968377 }, { "epoch": 0.13176495980143527, "grad_norm": 0.503040075302124, "learning_rate": 1.946175043970765e-05, "loss": 0.48203349113464355, "step": 2442, "token_acc": 0.8382063720991215 }, { "epoch": 0.13181891760643177, "grad_norm": 0.5297937989234924, "learning_rate": 1.9461184687358176e-05, "loss": 0.4362162947654724, "step": 2443, "token_acc": 0.8528358208955223 }, { "epoch": 0.13187287541142825, "grad_norm": 0.54509437084198, "learning_rate": 1.94606186460666e-05, "loss": 0.40807366371154785, "step": 2444, "token_acc": 0.8565669095983873 }, { "epoch": 0.13192683321642476, "grad_norm": 0.454422265291214, "learning_rate": 1.946005231585021e-05, "loss": 0.4551762044429779, "step": 2445, "token_acc": 0.8486176159540716 }, { "epoch": 0.13198079102142124, "grad_norm": 0.47230276465415955, "learning_rate": 1.9459485696726298e-05, "loss": 0.42969492077827454, "step": 2446, "token_acc": 0.8615272195834855 }, { "epoch": 0.13203474882641775, "grad_norm": 0.47614461183547974, "learning_rate": 1.9458918788712174e-05, "loss": 0.42133447527885437, "step": 2447, "token_acc": 0.8577777777777778 }, { "epoch": 0.13208870663141423, "grad_norm": 0.5094934701919556, "learning_rate": 1.9458351591825145e-05, "loss": 0.4660455584526062, "step": 2448, "token_acc": 0.8431808702905602 }, { "epoch": 0.13214266443641073, "grad_norm": 0.5646176338195801, "learning_rate": 1.945778410608254e-05, "loss": 0.4176004230976105, "step": 2449, "token_acc": 0.8591344757478943 }, { "epoch": 0.1321966222414072, "grad_norm": 0.40084120631217957, "learning_rate": 1.9457216331501683e-05, "loss": 0.3886635899543762, "step": 2450, "token_acc": 0.8627677446451071 }, { "epoch": 0.13225058004640372, "grad_norm": 0.5126407146453857, "learning_rate": 1.9456648268099915e-05, "loss": 0.4959057569503784, "step": 2451, "token_acc": 0.833849821215733 }, { "epoch": 0.1323045378514002, "grad_norm": 0.49183493852615356, "learning_rate": 1.945607991589459e-05, "loss": 0.40883690118789673, "step": 2452, "token_acc": 0.8652788688138257 }, { "epoch": 0.1323584956563967, "grad_norm": 0.3822425305843353, "learning_rate": 1.945551127490306e-05, "loss": 0.41933369636535645, "step": 2453, "token_acc": 0.8524647476984034 }, { "epoch": 0.1324124534613932, "grad_norm": 0.5962256789207458, "learning_rate": 1.9454942345142694e-05, "loss": 0.5041122436523438, "step": 2454, "token_acc": 0.8365005992124637 }, { "epoch": 0.1324664112663897, "grad_norm": 0.39595288038253784, "learning_rate": 1.9454373126630864e-05, "loss": 0.35187196731567383, "step": 2455, "token_acc": 0.8735346848710522 }, { "epoch": 0.13252036907138617, "grad_norm": 0.5358037948608398, "learning_rate": 1.9453803619384957e-05, "loss": 0.3652827739715576, "step": 2456, "token_acc": 0.865277180919639 }, { "epoch": 0.13257432687638268, "grad_norm": 0.38516467809677124, "learning_rate": 1.9453233823422364e-05, "loss": 0.34685614705085754, "step": 2457, "token_acc": 0.8808765915768854 }, { "epoch": 0.13262828468137916, "grad_norm": 0.5830589532852173, "learning_rate": 1.9452663738760483e-05, "loss": 0.48052239418029785, "step": 2458, "token_acc": 0.840498766073516 }, { "epoch": 0.13268224248637567, "grad_norm": 0.5105530023574829, "learning_rate": 1.9452093365416733e-05, "loss": 0.45326220989227295, "step": 2459, "token_acc": 0.8445097079458312 }, { "epoch": 0.13273620029137215, "grad_norm": 0.37525779008865356, "learning_rate": 1.945152270340852e-05, "loss": 0.41536611318588257, "step": 2460, "token_acc": 0.8596354166666667 }, { "epoch": 0.13279015809636863, "grad_norm": 0.43846631050109863, "learning_rate": 1.9450951752753288e-05, "loss": 0.4535607099533081, "step": 2461, "token_acc": 0.8464632919879316 }, { "epoch": 0.13284411590136513, "grad_norm": 0.5498147010803223, "learning_rate": 1.945038051346846e-05, "loss": 0.4311690330505371, "step": 2462, "token_acc": 0.8504352790067076 }, { "epoch": 0.1328980737063616, "grad_norm": 0.40309765934944153, "learning_rate": 1.944980898557149e-05, "loss": 0.43363070487976074, "step": 2463, "token_acc": 0.8523937877037607 }, { "epoch": 0.13295203151135812, "grad_norm": 0.8511295318603516, "learning_rate": 1.9449237169079826e-05, "loss": 0.408486008644104, "step": 2464, "token_acc": 0.8596462225161183 }, { "epoch": 0.1330059893163546, "grad_norm": 0.40093594789505005, "learning_rate": 1.9448665064010937e-05, "loss": 0.4533929228782654, "step": 2465, "token_acc": 0.8530379746835443 }, { "epoch": 0.1330599471213511, "grad_norm": 0.5269439816474915, "learning_rate": 1.9448092670382294e-05, "loss": 0.4767410159111023, "step": 2466, "token_acc": 0.8402213384056718 }, { "epoch": 0.1331139049263476, "grad_norm": 0.38229337334632874, "learning_rate": 1.944751998821137e-05, "loss": 0.37513524293899536, "step": 2467, "token_acc": 0.866928632115548 }, { "epoch": 0.1331678627313441, "grad_norm": 0.5371728539466858, "learning_rate": 1.9446947017515663e-05, "loss": 0.4369649291038513, "step": 2468, "token_acc": 0.8505779934351363 }, { "epoch": 0.13322182053634057, "grad_norm": 0.432644248008728, "learning_rate": 1.944637375831267e-05, "loss": 0.45222118496894836, "step": 2469, "token_acc": 0.8460224499755978 }, { "epoch": 0.13327577834133708, "grad_norm": 0.5664594173431396, "learning_rate": 1.9445800210619895e-05, "loss": 0.5039874315261841, "step": 2470, "token_acc": 0.8364392009652768 }, { "epoch": 0.13332973614633356, "grad_norm": 0.41103580594062805, "learning_rate": 1.9445226374454857e-05, "loss": 0.45556437969207764, "step": 2471, "token_acc": 0.8500932256059665 }, { "epoch": 0.13338369395133007, "grad_norm": 0.4235178530216217, "learning_rate": 1.944465224983508e-05, "loss": 0.46889728307724, "step": 2472, "token_acc": 0.8444391465013709 }, { "epoch": 0.13343765175632655, "grad_norm": 0.41275453567504883, "learning_rate": 1.94440778367781e-05, "loss": 0.47354236245155334, "step": 2473, "token_acc": 0.8435456527004117 }, { "epoch": 0.13349160956132305, "grad_norm": 0.43485304713249207, "learning_rate": 1.9443503135301454e-05, "loss": 0.4324610233306885, "step": 2474, "token_acc": 0.848087764741734 }, { "epoch": 0.13354556736631953, "grad_norm": 0.4629819989204407, "learning_rate": 1.9442928145422695e-05, "loss": 0.3990921974182129, "step": 2475, "token_acc": 0.8671481357070877 }, { "epoch": 0.13359952517131604, "grad_norm": 0.536246120929718, "learning_rate": 1.9442352867159386e-05, "loss": 0.4715825915336609, "step": 2476, "token_acc": 0.8417200365965233 }, { "epoch": 0.13365348297631252, "grad_norm": 0.4777710437774658, "learning_rate": 1.9441777300529093e-05, "loss": 0.4416382908821106, "step": 2477, "token_acc": 0.8542044767090139 }, { "epoch": 0.13370744078130903, "grad_norm": 0.5052466988563538, "learning_rate": 1.9441201445549395e-05, "loss": 0.48138582706451416, "step": 2478, "token_acc": 0.8359066427289048 }, { "epoch": 0.1337613985863055, "grad_norm": 0.49243324995040894, "learning_rate": 1.944062530223788e-05, "loss": 0.4387035667896271, "step": 2479, "token_acc": 0.850211277866822 }, { "epoch": 0.13381535639130201, "grad_norm": 0.3747539818286896, "learning_rate": 1.944004887061214e-05, "loss": 0.38455671072006226, "step": 2480, "token_acc": 0.8651073845248602 }, { "epoch": 0.1338693141962985, "grad_norm": 0.6280308961868286, "learning_rate": 1.943947215068978e-05, "loss": 0.41293108463287354, "step": 2481, "token_acc": 0.8598956242472903 }, { "epoch": 0.13392327200129497, "grad_norm": 0.3236360251903534, "learning_rate": 1.9438895142488415e-05, "loss": 0.40093451738357544, "step": 2482, "token_acc": 0.8627581612258495 }, { "epoch": 0.13397722980629148, "grad_norm": 0.5004584193229675, "learning_rate": 1.9438317846025662e-05, "loss": 0.49490422010421753, "step": 2483, "token_acc": 0.8413552881570614 }, { "epoch": 0.13403118761128796, "grad_norm": 0.4892868995666504, "learning_rate": 1.9437740261319156e-05, "loss": 0.37244677543640137, "step": 2484, "token_acc": 0.8702310953097224 }, { "epoch": 0.13408514541628447, "grad_norm": 0.4004218578338623, "learning_rate": 1.9437162388386535e-05, "loss": 0.43690288066864014, "step": 2485, "token_acc": 0.8517282797277459 }, { "epoch": 0.13413910322128095, "grad_norm": 0.481459379196167, "learning_rate": 1.9436584227245444e-05, "loss": 0.42516452074050903, "step": 2486, "token_acc": 0.8565914489311164 }, { "epoch": 0.13419306102627745, "grad_norm": 0.37107378244400024, "learning_rate": 1.943600577791355e-05, "loss": 0.40268340706825256, "step": 2487, "token_acc": 0.8644894340180287 }, { "epoch": 0.13424701883127393, "grad_norm": 0.547710657119751, "learning_rate": 1.9435427040408504e-05, "loss": 0.4181314706802368, "step": 2488, "token_acc": 0.8518007662835249 }, { "epoch": 0.13430097663627044, "grad_norm": 0.3749978244304657, "learning_rate": 1.9434848014747992e-05, "loss": 0.40757715702056885, "step": 2489, "token_acc": 0.859821518098867 }, { "epoch": 0.13435493444126692, "grad_norm": 0.48198166489601135, "learning_rate": 1.943426870094969e-05, "loss": 0.40050065517425537, "step": 2490, "token_acc": 0.8594612138915937 }, { "epoch": 0.13440889224626343, "grad_norm": 0.39082086086273193, "learning_rate": 1.9433689099031297e-05, "loss": 0.45010238885879517, "step": 2491, "token_acc": 0.84469941822883 }, { "epoch": 0.1344628500512599, "grad_norm": 0.44341519474983215, "learning_rate": 1.943310920901051e-05, "loss": 0.42391279339790344, "step": 2492, "token_acc": 0.8500428449014568 }, { "epoch": 0.13451680785625642, "grad_norm": 0.4286985695362091, "learning_rate": 1.9432529030905037e-05, "loss": 0.4770337641239166, "step": 2493, "token_acc": 0.8413812867172947 }, { "epoch": 0.1345707656612529, "grad_norm": 0.5011358857154846, "learning_rate": 1.9431948564732598e-05, "loss": 0.4043794572353363, "step": 2494, "token_acc": 0.8609028084105279 }, { "epoch": 0.1346247234662494, "grad_norm": 0.508924663066864, "learning_rate": 1.943136781051092e-05, "loss": 0.4713740646839142, "step": 2495, "token_acc": 0.846142208774584 }, { "epoch": 0.13467868127124588, "grad_norm": 0.5337329506874084, "learning_rate": 1.9430786768257743e-05, "loss": 0.43740010261535645, "step": 2496, "token_acc": 0.8451602354480052 }, { "epoch": 0.1347326390762424, "grad_norm": 0.4538431763648987, "learning_rate": 1.943020543799081e-05, "loss": 0.4861264228820801, "step": 2497, "token_acc": 0.835145176826751 }, { "epoch": 0.13478659688123887, "grad_norm": 0.4919958710670471, "learning_rate": 1.942962381972787e-05, "loss": 0.4790438413619995, "step": 2498, "token_acc": 0.8368538076709283 }, { "epoch": 0.13484055468623538, "grad_norm": 0.45352140069007874, "learning_rate": 1.9429041913486685e-05, "loss": 0.43167591094970703, "step": 2499, "token_acc": 0.8494130346781811 }, { "epoch": 0.13489451249123185, "grad_norm": 0.5562938451766968, "learning_rate": 1.9428459719285034e-05, "loss": 0.41260531544685364, "step": 2500, "token_acc": 0.8573346760657939 }, { "epoch": 0.13494847029622836, "grad_norm": 0.4540629982948303, "learning_rate": 1.942787723714069e-05, "loss": 0.5079344511032104, "step": 2501, "token_acc": 0.8302567060859533 }, { "epoch": 0.13500242810122484, "grad_norm": 0.5550857782363892, "learning_rate": 1.9427294467071453e-05, "loss": 0.4427713453769684, "step": 2502, "token_acc": 0.8495970241785493 }, { "epoch": 0.13505638590622132, "grad_norm": 0.5064507722854614, "learning_rate": 1.9426711409095106e-05, "loss": 0.4991401135921478, "step": 2503, "token_acc": 0.8351554907677357 }, { "epoch": 0.13511034371121783, "grad_norm": 0.5375779271125793, "learning_rate": 1.9426128063229465e-05, "loss": 0.4663260579109192, "step": 2504, "token_acc": 0.8435840328936097 }, { "epoch": 0.1351643015162143, "grad_norm": 0.4656113088130951, "learning_rate": 1.942554442949234e-05, "loss": 0.4088920056819916, "step": 2505, "token_acc": 0.8599830795262268 }, { "epoch": 0.13521825932121082, "grad_norm": 0.46556180715560913, "learning_rate": 1.942496050790156e-05, "loss": 0.43809938430786133, "step": 2506, "token_acc": 0.8521677327647477 }, { "epoch": 0.1352722171262073, "grad_norm": 0.48896101117134094, "learning_rate": 1.942437629847495e-05, "loss": 0.4657062590122223, "step": 2507, "token_acc": 0.8366849482023156 }, { "epoch": 0.1353261749312038, "grad_norm": 0.40135353803634644, "learning_rate": 1.942379180123036e-05, "loss": 0.35696882009506226, "step": 2508, "token_acc": 0.8672694394213382 }, { "epoch": 0.13538013273620028, "grad_norm": 0.5860713720321655, "learning_rate": 1.9423207016185637e-05, "loss": 0.47191041707992554, "step": 2509, "token_acc": 0.8407859931981357 }, { "epoch": 0.1354340905411968, "grad_norm": 0.45412397384643555, "learning_rate": 1.9422621943358645e-05, "loss": 0.4643642008304596, "step": 2510, "token_acc": 0.8448642562241961 }, { "epoch": 0.13548804834619327, "grad_norm": 0.4794811010360718, "learning_rate": 1.942203658276724e-05, "loss": 0.4296308159828186, "step": 2511, "token_acc": 0.8520255063765941 }, { "epoch": 0.13554200615118978, "grad_norm": 0.51242595911026, "learning_rate": 1.942145093442931e-05, "loss": 0.39037102460861206, "step": 2512, "token_acc": 0.8628727107613138 }, { "epoch": 0.13559596395618626, "grad_norm": 0.45816275477409363, "learning_rate": 1.9420864998362733e-05, "loss": 0.44930073618888855, "step": 2513, "token_acc": 0.8502839442436758 }, { "epoch": 0.13564992176118276, "grad_norm": 0.4186379015445709, "learning_rate": 1.942027877458541e-05, "loss": 0.3824803829193115, "step": 2514, "token_acc": 0.8633806986382475 }, { "epoch": 0.13570387956617924, "grad_norm": 0.5159599184989929, "learning_rate": 1.9419692263115238e-05, "loss": 0.44653868675231934, "step": 2515, "token_acc": 0.8467767050763002 }, { "epoch": 0.13575783737117575, "grad_norm": 0.5167356133460999, "learning_rate": 1.9419105463970136e-05, "loss": 0.46115732192993164, "step": 2516, "token_acc": 0.8484363894811656 }, { "epoch": 0.13581179517617223, "grad_norm": 0.4015684425830841, "learning_rate": 1.941851837716802e-05, "loss": 0.447596937417984, "step": 2517, "token_acc": 0.8467574770506366 }, { "epoch": 0.13586575298116874, "grad_norm": 0.3368982672691345, "learning_rate": 1.9417931002726817e-05, "loss": 0.4459485411643982, "step": 2518, "token_acc": 0.8485700890764182 }, { "epoch": 0.13591971078616522, "grad_norm": 0.5728105902671814, "learning_rate": 1.941734334066447e-05, "loss": 0.4902283549308777, "step": 2519, "token_acc": 0.8386229155459924 }, { "epoch": 0.13597366859116172, "grad_norm": 0.5178183317184448, "learning_rate": 1.9416755390998923e-05, "loss": 0.4292912185192108, "step": 2520, "token_acc": 0.8507038035339922 }, { "epoch": 0.1360276263961582, "grad_norm": 0.3817043900489807, "learning_rate": 1.9416167153748137e-05, "loss": 0.3697773814201355, "step": 2521, "token_acc": 0.8714331516511703 }, { "epoch": 0.1360815842011547, "grad_norm": 0.4970298707485199, "learning_rate": 1.941557862893007e-05, "loss": 0.443445086479187, "step": 2522, "token_acc": 0.8524732426959791 }, { "epoch": 0.1361355420061512, "grad_norm": 0.4120490849018097, "learning_rate": 1.94149898165627e-05, "loss": 0.3964524269104004, "step": 2523, "token_acc": 0.8651523864289822 }, { "epoch": 0.1361894998111477, "grad_norm": 0.3685868978500366, "learning_rate": 1.9414400716664004e-05, "loss": 0.3590715229511261, "step": 2524, "token_acc": 0.8759720574667194 }, { "epoch": 0.13624345761614418, "grad_norm": 0.5212357044219971, "learning_rate": 1.941381132925198e-05, "loss": 0.45510801672935486, "step": 2525, "token_acc": 0.8401589770174529 }, { "epoch": 0.13629741542114066, "grad_norm": 0.5078661441802979, "learning_rate": 1.941322165434462e-05, "loss": 0.4941980242729187, "step": 2526, "token_acc": 0.8330256754489008 }, { "epoch": 0.13635137322613716, "grad_norm": 0.5655747056007385, "learning_rate": 1.941263169195994e-05, "loss": 0.49819955229759216, "step": 2527, "token_acc": 0.8329222167406677 }, { "epoch": 0.13640533103113364, "grad_norm": 0.5223336815834045, "learning_rate": 1.941204144211595e-05, "loss": 0.41870105266571045, "step": 2528, "token_acc": 0.8538948200869909 }, { "epoch": 0.13645928883613015, "grad_norm": 0.520287811756134, "learning_rate": 1.9411450904830683e-05, "loss": 0.5055787563323975, "step": 2529, "token_acc": 0.8331788693234476 }, { "epoch": 0.13651324664112663, "grad_norm": 0.47402501106262207, "learning_rate": 1.9410860080122173e-05, "loss": 0.49477338790893555, "step": 2530, "token_acc": 0.8362777946999695 }, { "epoch": 0.13656720444612314, "grad_norm": 0.49131667613983154, "learning_rate": 1.9410268968008458e-05, "loss": 0.4255654811859131, "step": 2531, "token_acc": 0.8536177331660393 }, { "epoch": 0.13662116225111962, "grad_norm": 0.3828069269657135, "learning_rate": 1.9409677568507598e-05, "loss": 0.40546631813049316, "step": 2532, "token_acc": 0.8612581750233572 }, { "epoch": 0.13667512005611612, "grad_norm": 0.5280492305755615, "learning_rate": 1.9409085881637647e-05, "loss": 0.45338916778564453, "step": 2533, "token_acc": 0.847615632843313 }, { "epoch": 0.1367290778611126, "grad_norm": 0.4625060558319092, "learning_rate": 1.9408493907416676e-05, "loss": 0.44074174761772156, "step": 2534, "token_acc": 0.8467270601987142 }, { "epoch": 0.1367830356661091, "grad_norm": 0.39871102571487427, "learning_rate": 1.940790164586277e-05, "loss": 0.46540242433547974, "step": 2535, "token_acc": 0.8433217993079585 }, { "epoch": 0.1368369934711056, "grad_norm": 0.475223183631897, "learning_rate": 1.9407309096994008e-05, "loss": 0.43961775302886963, "step": 2536, "token_acc": 0.8511511283337132 }, { "epoch": 0.1368909512761021, "grad_norm": 0.4592059254646301, "learning_rate": 1.9406716260828494e-05, "loss": 0.3945053815841675, "step": 2537, "token_acc": 0.8648956356736243 }, { "epoch": 0.13694490908109858, "grad_norm": 0.3471064269542694, "learning_rate": 1.9406123137384326e-05, "loss": 0.38141071796417236, "step": 2538, "token_acc": 0.871070293182621 }, { "epoch": 0.13699886688609508, "grad_norm": 0.5098990201950073, "learning_rate": 1.9405529726679627e-05, "loss": 0.47045180201530457, "step": 2539, "token_acc": 0.8383084577114428 }, { "epoch": 0.13705282469109156, "grad_norm": 0.4642274081707001, "learning_rate": 1.940493602873251e-05, "loss": 0.387077659368515, "step": 2540, "token_acc": 0.8677148278666306 }, { "epoch": 0.13710678249608807, "grad_norm": 0.43992170691490173, "learning_rate": 1.940434204356111e-05, "loss": 0.4245913028717041, "step": 2541, "token_acc": 0.8552572371381431 }, { "epoch": 0.13716074030108455, "grad_norm": 0.447599321603775, "learning_rate": 1.940374777118357e-05, "loss": 0.48729926347732544, "step": 2542, "token_acc": 0.836127355425601 }, { "epoch": 0.13721469810608106, "grad_norm": 0.3959340453147888, "learning_rate": 1.9403153211618035e-05, "loss": 0.3993508219718933, "step": 2543, "token_acc": 0.8565330080805 }, { "epoch": 0.13726865591107754, "grad_norm": 0.5806278586387634, "learning_rate": 1.9402558364882666e-05, "loss": 0.4236961007118225, "step": 2544, "token_acc": 0.8575798776342624 }, { "epoch": 0.13732261371607404, "grad_norm": 0.45893847942352295, "learning_rate": 1.9401963230995626e-05, "loss": 0.3598244786262512, "step": 2545, "token_acc": 0.8741858617950754 }, { "epoch": 0.13737657152107052, "grad_norm": 0.39056453108787537, "learning_rate": 1.940136780997509e-05, "loss": 0.45425504446029663, "step": 2546, "token_acc": 0.8442987880286915 }, { "epoch": 0.137430529326067, "grad_norm": 0.5110153555870056, "learning_rate": 1.9400772101839248e-05, "loss": 0.4917312264442444, "step": 2547, "token_acc": 0.8384502923976608 }, { "epoch": 0.1374844871310635, "grad_norm": 0.5377912521362305, "learning_rate": 1.9400176106606285e-05, "loss": 0.44380244612693787, "step": 2548, "token_acc": 0.8560126582278481 }, { "epoch": 0.13753844493606, "grad_norm": 0.5001197457313538, "learning_rate": 1.9399579824294408e-05, "loss": 0.40711459517478943, "step": 2549, "token_acc": 0.8540556115833453 }, { "epoch": 0.1375924027410565, "grad_norm": 0.5012162923812866, "learning_rate": 1.9398983254921825e-05, "loss": 0.45348766446113586, "step": 2550, "token_acc": 0.8450404992696853 }, { "epoch": 0.13764636054605298, "grad_norm": 0.4241403639316559, "learning_rate": 1.9398386398506757e-05, "loss": 0.36857277154922485, "step": 2551, "token_acc": 0.8700389105058366 }, { "epoch": 0.13770031835104948, "grad_norm": 0.4179314076900482, "learning_rate": 1.939778925506743e-05, "loss": 0.39276719093322754, "step": 2552, "token_acc": 0.8604805522368246 }, { "epoch": 0.13775427615604596, "grad_norm": 0.5085535049438477, "learning_rate": 1.9397191824622082e-05, "loss": 0.48161429166793823, "step": 2553, "token_acc": 0.840036563071298 }, { "epoch": 0.13780823396104247, "grad_norm": 0.5204922556877136, "learning_rate": 1.9396594107188957e-05, "loss": 0.4770095944404602, "step": 2554, "token_acc": 0.846177558569667 }, { "epoch": 0.13786219176603895, "grad_norm": 0.5931016206741333, "learning_rate": 1.9395996102786307e-05, "loss": 0.3872114419937134, "step": 2555, "token_acc": 0.8661193029490617 }, { "epoch": 0.13791614957103546, "grad_norm": 0.3688972294330597, "learning_rate": 1.93953978114324e-05, "loss": 0.3654208779335022, "step": 2556, "token_acc": 0.8703163562934709 }, { "epoch": 0.13797010737603194, "grad_norm": 0.5088863372802734, "learning_rate": 1.9394799233145504e-05, "loss": 0.46218353509902954, "step": 2557, "token_acc": 0.8417855629326176 }, { "epoch": 0.13802406518102844, "grad_norm": 0.4568045735359192, "learning_rate": 1.93942003679439e-05, "loss": 0.3832116723060608, "step": 2558, "token_acc": 0.8676745371156787 }, { "epoch": 0.13807802298602492, "grad_norm": 0.5099273920059204, "learning_rate": 1.9393601215845875e-05, "loss": 0.4923168122768402, "step": 2559, "token_acc": 0.8348214285714286 }, { "epoch": 0.13813198079102143, "grad_norm": 0.48634982109069824, "learning_rate": 1.9393001776869737e-05, "loss": 0.462802916765213, "step": 2560, "token_acc": 0.8479068197164078 }, { "epoch": 0.1381859385960179, "grad_norm": 0.5373853445053101, "learning_rate": 1.939240205103378e-05, "loss": 0.46427279710769653, "step": 2561, "token_acc": 0.8433409220261809 }, { "epoch": 0.13823989640101442, "grad_norm": 0.5218401551246643, "learning_rate": 1.9391802038356327e-05, "loss": 0.49934330582618713, "step": 2562, "token_acc": 0.8329779673063256 }, { "epoch": 0.1382938542060109, "grad_norm": 0.33387500047683716, "learning_rate": 1.9391201738855697e-05, "loss": 0.4391520321369171, "step": 2563, "token_acc": 0.8525891829689298 }, { "epoch": 0.1383478120110074, "grad_norm": 0.541904628276825, "learning_rate": 1.939060115255023e-05, "loss": 0.4308035373687744, "step": 2564, "token_acc": 0.8560257589696412 }, { "epoch": 0.13840176981600388, "grad_norm": 0.4037221670150757, "learning_rate": 1.9390000279458262e-05, "loss": 0.4259507656097412, "step": 2565, "token_acc": 0.8518420002659928 }, { "epoch": 0.1384557276210004, "grad_norm": 0.42672908306121826, "learning_rate": 1.9389399119598143e-05, "loss": 0.45376548171043396, "step": 2566, "token_acc": 0.8451277045713356 }, { "epoch": 0.13850968542599687, "grad_norm": 0.4552440643310547, "learning_rate": 1.9388797672988237e-05, "loss": 0.44626370072364807, "step": 2567, "token_acc": 0.8457271624198301 }, { "epoch": 0.13856364323099335, "grad_norm": 0.3283284306526184, "learning_rate": 1.938819593964691e-05, "loss": 0.5117509961128235, "step": 2568, "token_acc": 0.8333706606942889 }, { "epoch": 0.13861760103598986, "grad_norm": 0.4706229865550995, "learning_rate": 1.938759391959254e-05, "loss": 0.4427535831928253, "step": 2569, "token_acc": 0.8482252141982864 }, { "epoch": 0.13867155884098634, "grad_norm": 0.6101871132850647, "learning_rate": 1.9386991612843507e-05, "loss": 0.49917683005332947, "step": 2570, "token_acc": 0.8300190212692374 }, { "epoch": 0.13872551664598284, "grad_norm": 0.5385541319847107, "learning_rate": 1.938638901941821e-05, "loss": 0.4089534878730774, "step": 2571, "token_acc": 0.8602873738917762 }, { "epoch": 0.13877947445097932, "grad_norm": 0.41590416431427, "learning_rate": 1.9385786139335055e-05, "loss": 0.5140736699104309, "step": 2572, "token_acc": 0.8321286141575274 }, { "epoch": 0.13883343225597583, "grad_norm": 0.41609272360801697, "learning_rate": 1.938518297261245e-05, "loss": 0.39457157254219055, "step": 2573, "token_acc": 0.8637733574442436 }, { "epoch": 0.1388873900609723, "grad_norm": 0.5422682762145996, "learning_rate": 1.938457951926881e-05, "loss": 0.3939633369445801, "step": 2574, "token_acc": 0.8649706457925636 }, { "epoch": 0.13894134786596882, "grad_norm": 0.4943232238292694, "learning_rate": 1.938397577932257e-05, "loss": 0.4180234372615814, "step": 2575, "token_acc": 0.8520661157024794 }, { "epoch": 0.1389953056709653, "grad_norm": 0.4127463698387146, "learning_rate": 1.9383371752792172e-05, "loss": 0.372379869222641, "step": 2576, "token_acc": 0.8755667506297229 }, { "epoch": 0.1390492634759618, "grad_norm": 0.493503212928772, "learning_rate": 1.938276743969606e-05, "loss": 0.46762561798095703, "step": 2577, "token_acc": 0.84375 }, { "epoch": 0.13910322128095828, "grad_norm": 0.3705171048641205, "learning_rate": 1.9382162840052686e-05, "loss": 0.48318806290626526, "step": 2578, "token_acc": 0.8365796380715569 }, { "epoch": 0.1391571790859548, "grad_norm": 0.5344688296318054, "learning_rate": 1.9381557953880516e-05, "loss": 0.48977333307266235, "step": 2579, "token_acc": 0.8387189844200807 }, { "epoch": 0.13921113689095127, "grad_norm": 0.4358930289745331, "learning_rate": 1.9380952781198025e-05, "loss": 0.3828202784061432, "step": 2580, "token_acc": 0.8677991137370753 }, { "epoch": 0.13926509469594778, "grad_norm": 0.48691824078559875, "learning_rate": 1.938034732202369e-05, "loss": 0.4298097491264343, "step": 2581, "token_acc": 0.8587179487179487 }, { "epoch": 0.13931905250094426, "grad_norm": 0.4596503973007202, "learning_rate": 1.937974157637601e-05, "loss": 0.46362969279289246, "step": 2582, "token_acc": 0.846426370918053 }, { "epoch": 0.13937301030594076, "grad_norm": 0.5979052186012268, "learning_rate": 1.9379135544273476e-05, "loss": 0.5215393900871277, "step": 2583, "token_acc": 0.8295345980619626 }, { "epoch": 0.13942696811093724, "grad_norm": 0.5396813154220581, "learning_rate": 1.9378529225734603e-05, "loss": 0.44715529680252075, "step": 2584, "token_acc": 0.8451296426068675 }, { "epoch": 0.13948092591593375, "grad_norm": 0.6175671815872192, "learning_rate": 1.9377922620777904e-05, "loss": 0.4833000898361206, "step": 2585, "token_acc": 0.8371239911958914 }, { "epoch": 0.13953488372093023, "grad_norm": 0.47383663058280945, "learning_rate": 1.93773157294219e-05, "loss": 0.48894375562667847, "step": 2586, "token_acc": 0.8370011737089202 }, { "epoch": 0.13958884152592674, "grad_norm": 0.5168433785438538, "learning_rate": 1.9376708551685134e-05, "loss": 0.41587042808532715, "step": 2587, "token_acc": 0.8599495313626532 }, { "epoch": 0.13964279933092322, "grad_norm": 0.6535528302192688, "learning_rate": 1.9376101087586143e-05, "loss": 0.423020601272583, "step": 2588, "token_acc": 0.8529234478601567 }, { "epoch": 0.13969675713591972, "grad_norm": 0.38911816477775574, "learning_rate": 1.9375493337143482e-05, "loss": 0.3886520266532898, "step": 2589, "token_acc": 0.8605470716563182 }, { "epoch": 0.1397507149409162, "grad_norm": 0.4473106265068054, "learning_rate": 1.9374885300375714e-05, "loss": 0.4343593120574951, "step": 2590, "token_acc": 0.8506253553155202 }, { "epoch": 0.13980467274591268, "grad_norm": 0.3986169695854187, "learning_rate": 1.93742769773014e-05, "loss": 0.4254593551158905, "step": 2591, "token_acc": 0.8534429851479072 }, { "epoch": 0.1398586305509092, "grad_norm": 0.5289295315742493, "learning_rate": 1.9373668367939125e-05, "loss": 0.42621779441833496, "step": 2592, "token_acc": 0.850688924218336 }, { "epoch": 0.13991258835590567, "grad_norm": 0.4280970096588135, "learning_rate": 1.9373059472307473e-05, "loss": 0.36851733922958374, "step": 2593, "token_acc": 0.8777848504137492 }, { "epoch": 0.13996654616090218, "grad_norm": 0.4351949989795685, "learning_rate": 1.937245029042504e-05, "loss": 0.43713170289993286, "step": 2594, "token_acc": 0.8516105237275633 }, { "epoch": 0.14002050396589866, "grad_norm": 0.5840246677398682, "learning_rate": 1.937184082231043e-05, "loss": 0.4581949710845947, "step": 2595, "token_acc": 0.8453299057412168 }, { "epoch": 0.14007446177089516, "grad_norm": 0.4288381040096283, "learning_rate": 1.9371231067982258e-05, "loss": 0.4084379971027374, "step": 2596, "token_acc": 0.8583290751149719 }, { "epoch": 0.14012841957589164, "grad_norm": 0.43878403306007385, "learning_rate": 1.9370621027459142e-05, "loss": 0.46939617395401, "step": 2597, "token_acc": 0.8428408442669709 }, { "epoch": 0.14018237738088815, "grad_norm": 0.6017327904701233, "learning_rate": 1.9370010700759715e-05, "loss": 0.4280236065387726, "step": 2598, "token_acc": 0.8480833774951422 }, { "epoch": 0.14023633518588463, "grad_norm": 0.498361200094223, "learning_rate": 1.9369400087902617e-05, "loss": 0.42232394218444824, "step": 2599, "token_acc": 0.8515878969742435 }, { "epoch": 0.14029029299088114, "grad_norm": 0.48585236072540283, "learning_rate": 1.9368789188906497e-05, "loss": 0.42059236764907837, "step": 2600, "token_acc": 0.8538961038961039 }, { "epoch": 0.14034425079587762, "grad_norm": 0.41535481810569763, "learning_rate": 1.9368178003790007e-05, "loss": 0.41501641273498535, "step": 2601, "token_acc": 0.8592222463610689 }, { "epoch": 0.14039820860087412, "grad_norm": 0.4023301899433136, "learning_rate": 1.9367566532571814e-05, "loss": 0.3490051031112671, "step": 2602, "token_acc": 0.8749160284831385 }, { "epoch": 0.1404521664058706, "grad_norm": 0.4039035737514496, "learning_rate": 1.936695477527059e-05, "loss": 0.42090433835983276, "step": 2603, "token_acc": 0.8544443006857291 }, { "epoch": 0.1405061242108671, "grad_norm": 0.4340571463108063, "learning_rate": 1.9366342731905025e-05, "loss": 0.40484267473220825, "step": 2604, "token_acc": 0.8621851003637343 }, { "epoch": 0.1405600820158636, "grad_norm": 0.539503276348114, "learning_rate": 1.936573040249381e-05, "loss": 0.42635107040405273, "step": 2605, "token_acc": 0.8574412532637076 }, { "epoch": 0.1406140398208601, "grad_norm": 0.38505426049232483, "learning_rate": 1.9365117787055637e-05, "loss": 0.43825116753578186, "step": 2606, "token_acc": 0.8475250239956122 }, { "epoch": 0.14066799762585658, "grad_norm": 0.5262369513511658, "learning_rate": 1.9364504885609218e-05, "loss": 0.45540735125541687, "step": 2607, "token_acc": 0.8438169425511197 }, { "epoch": 0.14072195543085309, "grad_norm": 0.43794363737106323, "learning_rate": 1.9363891698173278e-05, "loss": 0.4223494231700897, "step": 2608, "token_acc": 0.8548900079512325 }, { "epoch": 0.14077591323584956, "grad_norm": 0.515745222568512, "learning_rate": 1.9363278224766536e-05, "loss": 0.3920148015022278, "step": 2609, "token_acc": 0.8661810911168237 }, { "epoch": 0.14082987104084607, "grad_norm": 0.5073748826980591, "learning_rate": 1.9362664465407733e-05, "loss": 0.49160629510879517, "step": 2610, "token_acc": 0.8378644879560502 }, { "epoch": 0.14088382884584255, "grad_norm": 0.5314208269119263, "learning_rate": 1.9362050420115608e-05, "loss": 0.4731847047805786, "step": 2611, "token_acc": 0.8442261679889183 }, { "epoch": 0.14093778665083903, "grad_norm": 0.4582515358924866, "learning_rate": 1.9361436088908914e-05, "loss": 0.40298256278038025, "step": 2612, "token_acc": 0.8625286697247706 }, { "epoch": 0.14099174445583554, "grad_norm": 0.5333710312843323, "learning_rate": 1.9360821471806415e-05, "loss": 0.49447736144065857, "step": 2613, "token_acc": 0.8400050352467271 }, { "epoch": 0.14104570226083202, "grad_norm": 0.45236602425575256, "learning_rate": 1.936020656882688e-05, "loss": 0.4401591420173645, "step": 2614, "token_acc": 0.8483688160514627 }, { "epoch": 0.14109966006582852, "grad_norm": 0.39854228496551514, "learning_rate": 1.9359591379989093e-05, "loss": 0.4154732823371887, "step": 2615, "token_acc": 0.8597110754414126 }, { "epoch": 0.141153617870825, "grad_norm": 0.43256786465644836, "learning_rate": 1.9358975905311833e-05, "loss": 0.4157566428184509, "step": 2616, "token_acc": 0.8605916829472509 }, { "epoch": 0.1412075756758215, "grad_norm": 0.4164918065071106, "learning_rate": 1.93583601448139e-05, "loss": 0.43105176091194153, "step": 2617, "token_acc": 0.8572804844481146 }, { "epoch": 0.141261533480818, "grad_norm": 0.4833596348762512, "learning_rate": 1.9357744098514102e-05, "loss": 0.45093798637390137, "step": 2618, "token_acc": 0.8416724738675958 }, { "epoch": 0.1413154912858145, "grad_norm": 0.4235715866088867, "learning_rate": 1.935712776643125e-05, "loss": 0.46385324001312256, "step": 2619, "token_acc": 0.8453369639210347 }, { "epoch": 0.14136944909081098, "grad_norm": 0.4400596022605896, "learning_rate": 1.9356511148584165e-05, "loss": 0.45429515838623047, "step": 2620, "token_acc": 0.8451008645533141 }, { "epoch": 0.14142340689580749, "grad_norm": 0.4107106328010559, "learning_rate": 1.9355894244991682e-05, "loss": 0.4524613618850708, "step": 2621, "token_acc": 0.8514202476329206 }, { "epoch": 0.14147736470080396, "grad_norm": 0.5312904119491577, "learning_rate": 1.9355277055672638e-05, "loss": 0.5059866309165955, "step": 2622, "token_acc": 0.8392532628448703 }, { "epoch": 0.14153132250580047, "grad_norm": 1.0132344961166382, "learning_rate": 1.935465958064589e-05, "loss": 0.4855760633945465, "step": 2623, "token_acc": 0.8385043754972156 }, { "epoch": 0.14158528031079695, "grad_norm": 0.469570130109787, "learning_rate": 1.9354041819930282e-05, "loss": 0.3979129493236542, "step": 2624, "token_acc": 0.8635418635418636 }, { "epoch": 0.14163923811579346, "grad_norm": 0.4270174205303192, "learning_rate": 1.935342377354469e-05, "loss": 0.4637373685836792, "step": 2625, "token_acc": 0.8484551250613046 }, { "epoch": 0.14169319592078994, "grad_norm": 0.44834861159324646, "learning_rate": 1.9352805441507984e-05, "loss": 0.4572720527648926, "step": 2626, "token_acc": 0.847592167930852 }, { "epoch": 0.14174715372578645, "grad_norm": 0.5755935311317444, "learning_rate": 1.9352186823839053e-05, "loss": 0.42884260416030884, "step": 2627, "token_acc": 0.8536774694352804 }, { "epoch": 0.14180111153078293, "grad_norm": 0.40489739179611206, "learning_rate": 1.9351567920556785e-05, "loss": 0.4563044607639313, "step": 2628, "token_acc": 0.848373325482114 }, { "epoch": 0.14185506933577943, "grad_norm": 0.5645225048065186, "learning_rate": 1.9350948731680083e-05, "loss": 0.500817060470581, "step": 2629, "token_acc": 0.8331927456769296 }, { "epoch": 0.1419090271407759, "grad_norm": 0.5148449540138245, "learning_rate": 1.935032925722786e-05, "loss": 0.4836093783378601, "step": 2630, "token_acc": 0.8385261446762395 }, { "epoch": 0.14196298494577242, "grad_norm": 0.43493542075157166, "learning_rate": 1.9349709497219026e-05, "loss": 0.3718348741531372, "step": 2631, "token_acc": 0.8696900982615269 }, { "epoch": 0.1420169427507689, "grad_norm": 0.3708043694496155, "learning_rate": 1.934908945167251e-05, "loss": 0.4055171608924866, "step": 2632, "token_acc": 0.8591149005278116 }, { "epoch": 0.14207090055576538, "grad_norm": 0.5567790269851685, "learning_rate": 1.9348469120607258e-05, "loss": 0.44991666078567505, "step": 2633, "token_acc": 0.8449131513647643 }, { "epoch": 0.14212485836076189, "grad_norm": 0.4923851191997528, "learning_rate": 1.9347848504042205e-05, "loss": 0.4471208453178406, "step": 2634, "token_acc": 0.8519350380096752 }, { "epoch": 0.14217881616575836, "grad_norm": 0.3573778569698334, "learning_rate": 1.9347227601996308e-05, "loss": 0.4217786192893982, "step": 2635, "token_acc": 0.8579419648059666 }, { "epoch": 0.14223277397075487, "grad_norm": 0.4135989248752594, "learning_rate": 1.934660641448853e-05, "loss": 0.42408597469329834, "step": 2636, "token_acc": 0.8564576088259983 }, { "epoch": 0.14228673177575135, "grad_norm": 0.5552331805229187, "learning_rate": 1.9345984941537837e-05, "loss": 0.4405093193054199, "step": 2637, "token_acc": 0.8525381704293358 }, { "epoch": 0.14234068958074786, "grad_norm": 0.33282434940338135, "learning_rate": 1.9345363183163216e-05, "loss": 0.40332943201065063, "step": 2638, "token_acc": 0.8566524783742963 }, { "epoch": 0.14239464738574434, "grad_norm": 0.4099656641483307, "learning_rate": 1.934474113938365e-05, "loss": 0.41146886348724365, "step": 2639, "token_acc": 0.8595963394759935 }, { "epoch": 0.14244860519074085, "grad_norm": 0.4312022924423218, "learning_rate": 1.9344118810218137e-05, "loss": 0.4394870698451996, "step": 2640, "token_acc": 0.8515677118078719 }, { "epoch": 0.14250256299573733, "grad_norm": 0.5559657216072083, "learning_rate": 1.9343496195685683e-05, "loss": 0.47145771980285645, "step": 2641, "token_acc": 0.8439409368635438 }, { "epoch": 0.14255652080073383, "grad_norm": 0.5313472151756287, "learning_rate": 1.9342873295805305e-05, "loss": 0.3763653635978699, "step": 2642, "token_acc": 0.8690854119425548 }, { "epoch": 0.1426104786057303, "grad_norm": 0.3981955349445343, "learning_rate": 1.9342250110596024e-05, "loss": 0.42197465896606445, "step": 2643, "token_acc": 0.8559477124183007 }, { "epoch": 0.14266443641072682, "grad_norm": 0.6187452673912048, "learning_rate": 1.9341626640076868e-05, "loss": 0.4121670722961426, "step": 2644, "token_acc": 0.8558384547848991 }, { "epoch": 0.1427183942157233, "grad_norm": 0.3957027196884155, "learning_rate": 1.9341002884266885e-05, "loss": 0.4155169129371643, "step": 2645, "token_acc": 0.857290723250097 }, { "epoch": 0.1427723520207198, "grad_norm": 0.4802405536174774, "learning_rate": 1.934037884318512e-05, "loss": 0.35061079263687134, "step": 2646, "token_acc": 0.876890891535243 }, { "epoch": 0.14282630982571629, "grad_norm": 0.41333943605422974, "learning_rate": 1.9339754516850632e-05, "loss": 0.3972152769565582, "step": 2647, "token_acc": 0.8653788107455478 }, { "epoch": 0.1428802676307128, "grad_norm": 0.3189782202243805, "learning_rate": 1.9339129905282487e-05, "loss": 0.39525482058525085, "step": 2648, "token_acc": 0.8629189839170299 }, { "epoch": 0.14293422543570927, "grad_norm": 0.5434022545814514, "learning_rate": 1.9338505008499764e-05, "loss": 0.446931928396225, "step": 2649, "token_acc": 0.8411654135338346 }, { "epoch": 0.14298818324070578, "grad_norm": 0.4459505081176758, "learning_rate": 1.9337879826521542e-05, "loss": 0.4492718577384949, "step": 2650, "token_acc": 0.849418381695002 }, { "epoch": 0.14304214104570226, "grad_norm": 0.43465456366539, "learning_rate": 1.9337254359366917e-05, "loss": 0.41036295890808105, "step": 2651, "token_acc": 0.8548241659152389 }, { "epoch": 0.14309609885069877, "grad_norm": 0.45508670806884766, "learning_rate": 1.933662860705499e-05, "loss": 0.4241902828216553, "step": 2652, "token_acc": 0.8543338683788122 }, { "epoch": 0.14315005665569525, "grad_norm": 0.46210917830467224, "learning_rate": 1.933600256960487e-05, "loss": 0.4459429979324341, "step": 2653, "token_acc": 0.8480066445182725 }, { "epoch": 0.14320401446069173, "grad_norm": 0.49522605538368225, "learning_rate": 1.933537624703568e-05, "loss": 0.40962356328964233, "step": 2654, "token_acc": 0.8586501423547145 }, { "epoch": 0.14325797226568823, "grad_norm": 0.5989717245101929, "learning_rate": 1.9334749639366544e-05, "loss": 0.5089826583862305, "step": 2655, "token_acc": 0.8317693226411875 }, { "epoch": 0.1433119300706847, "grad_norm": 0.4709008038043976, "learning_rate": 1.93341227466166e-05, "loss": 0.3897020220756531, "step": 2656, "token_acc": 0.8689719336331214 }, { "epoch": 0.14336588787568122, "grad_norm": 0.48668530583381653, "learning_rate": 1.933349556880499e-05, "loss": 0.5580721497535706, "step": 2657, "token_acc": 0.816006216006216 }, { "epoch": 0.1434198456806777, "grad_norm": 0.4277573525905609, "learning_rate": 1.933286810595087e-05, "loss": 0.41588544845581055, "step": 2658, "token_acc": 0.8602347056560216 }, { "epoch": 0.1434738034856742, "grad_norm": 0.3656996488571167, "learning_rate": 1.9332240358073404e-05, "loss": 0.4607694149017334, "step": 2659, "token_acc": 0.8426509453140882 }, { "epoch": 0.14352776129067069, "grad_norm": 0.5306016206741333, "learning_rate": 1.9331612325191763e-05, "loss": 0.45143428444862366, "step": 2660, "token_acc": 0.847249885092692 }, { "epoch": 0.1435817190956672, "grad_norm": 0.4943873882293701, "learning_rate": 1.9330984007325125e-05, "loss": 0.5118364095687866, "step": 2661, "token_acc": 0.8291293734743694 }, { "epoch": 0.14363567690066367, "grad_norm": 0.4901030361652374, "learning_rate": 1.933035540449268e-05, "loss": 0.40256619453430176, "step": 2662, "token_acc": 0.8588363749805022 }, { "epoch": 0.14368963470566018, "grad_norm": 0.4003426730632782, "learning_rate": 1.932972651671363e-05, "loss": 0.3910359740257263, "step": 2663, "token_acc": 0.8645554202192448 }, { "epoch": 0.14374359251065666, "grad_norm": 0.4948442876338959, "learning_rate": 1.9329097344007167e-05, "loss": 0.48610255122184753, "step": 2664, "token_acc": 0.8361610113107119 }, { "epoch": 0.14379755031565317, "grad_norm": 0.5127931237220764, "learning_rate": 1.9328467886392525e-05, "loss": 0.4149092137813568, "step": 2665, "token_acc": 0.8571837026447462 }, { "epoch": 0.14385150812064965, "grad_norm": 0.5183179378509521, "learning_rate": 1.9327838143888908e-05, "loss": 0.39455628395080566, "step": 2666, "token_acc": 0.8595591030026606 }, { "epoch": 0.14390546592564615, "grad_norm": 0.4571131765842438, "learning_rate": 1.9327208116515564e-05, "loss": 0.36241525411605835, "step": 2667, "token_acc": 0.8714814814814815 }, { "epoch": 0.14395942373064263, "grad_norm": 0.5997649431228638, "learning_rate": 1.9326577804291725e-05, "loss": 0.4093201160430908, "step": 2668, "token_acc": 0.8572618254497002 }, { "epoch": 0.14401338153563914, "grad_norm": 0.49672412872314453, "learning_rate": 1.9325947207236645e-05, "loss": 0.4748697876930237, "step": 2669, "token_acc": 0.8435374149659864 }, { "epoch": 0.14406733934063562, "grad_norm": 0.3436872661113739, "learning_rate": 1.9325316325369577e-05, "loss": 0.38823169469833374, "step": 2670, "token_acc": 0.8631064309890937 }, { "epoch": 0.14412129714563213, "grad_norm": 0.4494597017765045, "learning_rate": 1.9324685158709794e-05, "loss": 0.4981222450733185, "step": 2671, "token_acc": 0.8306557808796419 }, { "epoch": 0.1441752549506286, "grad_norm": 0.4668218791484833, "learning_rate": 1.9324053707276568e-05, "loss": 0.3862461745738983, "step": 2672, "token_acc": 0.8637611940298507 }, { "epoch": 0.1442292127556251, "grad_norm": 0.487235963344574, "learning_rate": 1.9323421971089185e-05, "loss": 0.4488886594772339, "step": 2673, "token_acc": 0.8499069967091143 }, { "epoch": 0.1442831705606216, "grad_norm": 0.5349023938179016, "learning_rate": 1.9322789950166938e-05, "loss": 0.46480488777160645, "step": 2674, "token_acc": 0.8399088448938897 }, { "epoch": 0.1443371283656181, "grad_norm": 0.5151306986808777, "learning_rate": 1.9322157644529123e-05, "loss": 0.48708102107048035, "step": 2675, "token_acc": 0.8380634390651085 }, { "epoch": 0.14439108617061458, "grad_norm": 0.4826628267765045, "learning_rate": 1.932152505419506e-05, "loss": 0.43052053451538086, "step": 2676, "token_acc": 0.8544257498171178 }, { "epoch": 0.14444504397561106, "grad_norm": 0.4336564242839813, "learning_rate": 1.9320892179184065e-05, "loss": 0.4114174246788025, "step": 2677, "token_acc": 0.8552009456264775 }, { "epoch": 0.14449900178060757, "grad_norm": 0.4760507345199585, "learning_rate": 1.9320259019515463e-05, "loss": 0.4317532777786255, "step": 2678, "token_acc": 0.8564808685290612 }, { "epoch": 0.14455295958560405, "grad_norm": 0.43017151951789856, "learning_rate": 1.9319625575208594e-05, "loss": 0.40212196111679077, "step": 2679, "token_acc": 0.8584862733201881 }, { "epoch": 0.14460691739060055, "grad_norm": 0.44244828820228577, "learning_rate": 1.93189918462828e-05, "loss": 0.3858657479286194, "step": 2680, "token_acc": 0.8659043659043659 }, { "epoch": 0.14466087519559703, "grad_norm": 0.4944821894168854, "learning_rate": 1.9318357832757434e-05, "loss": 0.46226662397384644, "step": 2681, "token_acc": 0.8470539647577092 }, { "epoch": 0.14471483300059354, "grad_norm": 0.390510618686676, "learning_rate": 1.9317723534651864e-05, "loss": 0.4247378706932068, "step": 2682, "token_acc": 0.853974330105832 }, { "epoch": 0.14476879080559002, "grad_norm": 0.4500361680984497, "learning_rate": 1.9317088951985458e-05, "loss": 0.45293423533439636, "step": 2683, "token_acc": 0.8465825755353477 }, { "epoch": 0.14482274861058653, "grad_norm": 0.45072630047798157, "learning_rate": 1.9316454084777594e-05, "loss": 0.50372713804245, "step": 2684, "token_acc": 0.8296493092454835 }, { "epoch": 0.144876706415583, "grad_norm": 0.4790728688240051, "learning_rate": 1.9315818933047664e-05, "loss": 0.42012983560562134, "step": 2685, "token_acc": 0.859057620561006 }, { "epoch": 0.1449306642205795, "grad_norm": 0.4754791557788849, "learning_rate": 1.9315183496815067e-05, "loss": 0.4888487458229065, "step": 2686, "token_acc": 0.8362958359229336 }, { "epoch": 0.144984622025576, "grad_norm": 0.4505822956562042, "learning_rate": 1.93145477760992e-05, "loss": 0.4892421364784241, "step": 2687, "token_acc": 0.8365431496830814 }, { "epoch": 0.1450385798305725, "grad_norm": 0.45724061131477356, "learning_rate": 1.931391177091949e-05, "loss": 0.44042208790779114, "step": 2688, "token_acc": 0.8501448180329934 }, { "epoch": 0.14509253763556898, "grad_norm": 0.4559488296508789, "learning_rate": 1.931327548129535e-05, "loss": 0.5079942941665649, "step": 2689, "token_acc": 0.8307086614173228 }, { "epoch": 0.1451464954405655, "grad_norm": 0.4091462790966034, "learning_rate": 1.9312638907246218e-05, "loss": 0.4485369324684143, "step": 2690, "token_acc": 0.8454607508532423 }, { "epoch": 0.14520045324556197, "grad_norm": 0.40013808012008667, "learning_rate": 1.9312002048791533e-05, "loss": 0.4704763889312744, "step": 2691, "token_acc": 0.8470033236839284 }, { "epoch": 0.14525441105055847, "grad_norm": 0.39886805415153503, "learning_rate": 1.9311364905950747e-05, "loss": 0.4378187954425812, "step": 2692, "token_acc": 0.8475503062117236 }, { "epoch": 0.14530836885555495, "grad_norm": 0.3844015598297119, "learning_rate": 1.9310727478743315e-05, "loss": 0.4313478171825409, "step": 2693, "token_acc": 0.8520335496122804 }, { "epoch": 0.14536232666055146, "grad_norm": 0.45598840713500977, "learning_rate": 1.93100897671887e-05, "loss": 0.47082430124282837, "step": 2694, "token_acc": 0.8448900388098318 }, { "epoch": 0.14541628446554794, "grad_norm": 0.5321032404899597, "learning_rate": 1.930945177130639e-05, "loss": 0.4297308921813965, "step": 2695, "token_acc": 0.8501252167212483 }, { "epoch": 0.14547024227054445, "grad_norm": 0.4761006832122803, "learning_rate": 1.930881349111586e-05, "loss": 0.47881585359573364, "step": 2696, "token_acc": 0.8354727398205659 }, { "epoch": 0.14552420007554093, "grad_norm": 0.44975051283836365, "learning_rate": 1.9308174926636604e-05, "loss": 0.359990656375885, "step": 2697, "token_acc": 0.8767123287671232 }, { "epoch": 0.1455781578805374, "grad_norm": 0.4406031668186188, "learning_rate": 1.930753607788812e-05, "loss": 0.4027942419052124, "step": 2698, "token_acc": 0.8598990029781173 }, { "epoch": 0.1456321156855339, "grad_norm": 0.4722142517566681, "learning_rate": 1.9306896944889927e-05, "loss": 0.4821806848049164, "step": 2699, "token_acc": 0.8409248234805482 }, { "epoch": 0.1456860734905304, "grad_norm": 0.5092549324035645, "learning_rate": 1.9306257527661537e-05, "loss": 0.40461015701293945, "step": 2700, "token_acc": 0.8584401709401709 }, { "epoch": 0.1457400312955269, "grad_norm": 0.45778989791870117, "learning_rate": 1.9305617826222483e-05, "loss": 0.4160726070404053, "step": 2701, "token_acc": 0.8542024013722127 }, { "epoch": 0.14579398910052338, "grad_norm": 0.4118803143501282, "learning_rate": 1.9304977840592294e-05, "loss": 0.4512472450733185, "step": 2702, "token_acc": 0.8467347224164452 }, { "epoch": 0.1458479469055199, "grad_norm": 0.5780115127563477, "learning_rate": 1.9304337570790525e-05, "loss": 0.4250210225582123, "step": 2703, "token_acc": 0.8529642716921869 }, { "epoch": 0.14590190471051637, "grad_norm": 0.46014276146888733, "learning_rate": 1.930369701683672e-05, "loss": 0.4008493423461914, "step": 2704, "token_acc": 0.8602513853223409 }, { "epoch": 0.14595586251551287, "grad_norm": 0.4230833351612091, "learning_rate": 1.9303056178750445e-05, "loss": 0.48463982343673706, "step": 2705, "token_acc": 0.8404669260700389 }, { "epoch": 0.14600982032050935, "grad_norm": 0.49471035599708557, "learning_rate": 1.9302415056551275e-05, "loss": 0.4366823136806488, "step": 2706, "token_acc": 0.849694856146469 }, { "epoch": 0.14606377812550586, "grad_norm": 0.5120330452919006, "learning_rate": 1.9301773650258782e-05, "loss": 0.512509822845459, "step": 2707, "token_acc": 0.8326577147200406 }, { "epoch": 0.14611773593050234, "grad_norm": 0.42606204748153687, "learning_rate": 1.9301131959892558e-05, "loss": 0.4339478611946106, "step": 2708, "token_acc": 0.8529278770065566 }, { "epoch": 0.14617169373549885, "grad_norm": 0.5048642158508301, "learning_rate": 1.9300489985472204e-05, "loss": 0.4745784401893616, "step": 2709, "token_acc": 0.8387138098898284 }, { "epoch": 0.14622565154049533, "grad_norm": 0.4878278970718384, "learning_rate": 1.929984772701732e-05, "loss": 0.4597453773021698, "step": 2710, "token_acc": 0.8458279845956355 }, { "epoch": 0.14627960934549183, "grad_norm": 0.4500597417354584, "learning_rate": 1.9299205184547524e-05, "loss": 0.4253334403038025, "step": 2711, "token_acc": 0.85198342214328 }, { "epoch": 0.14633356715048831, "grad_norm": 0.5021361708641052, "learning_rate": 1.9298562358082434e-05, "loss": 0.429914653301239, "step": 2712, "token_acc": 0.8558849427648958 }, { "epoch": 0.14638752495548482, "grad_norm": 0.5161751508712769, "learning_rate": 1.9297919247641692e-05, "loss": 0.4597357511520386, "step": 2713, "token_acc": 0.8418503269556793 }, { "epoch": 0.1464414827604813, "grad_norm": 0.4973299503326416, "learning_rate": 1.929727585324493e-05, "loss": 0.38601166009902954, "step": 2714, "token_acc": 0.865278934221482 }, { "epoch": 0.1464954405654778, "grad_norm": 0.5422950387001038, "learning_rate": 1.9296632174911797e-05, "loss": 0.3791979253292084, "step": 2715, "token_acc": 0.8707595565116664 }, { "epoch": 0.1465493983704743, "grad_norm": 0.4892357885837555, "learning_rate": 1.929598821266195e-05, "loss": 0.4058672785758972, "step": 2716, "token_acc": 0.8583815028901735 }, { "epoch": 0.1466033561754708, "grad_norm": 0.5139009952545166, "learning_rate": 1.9295343966515065e-05, "loss": 0.4472658634185791, "step": 2717, "token_acc": 0.8483606557377049 }, { "epoch": 0.14665731398046727, "grad_norm": 0.5747619867324829, "learning_rate": 1.9294699436490803e-05, "loss": 0.4444456398487091, "step": 2718, "token_acc": 0.8550320144475455 }, { "epoch": 0.14671127178546375, "grad_norm": 0.4283032715320587, "learning_rate": 1.929405462260886e-05, "loss": 0.44480955600738525, "step": 2719, "token_acc": 0.8471784086926043 }, { "epoch": 0.14676522959046026, "grad_norm": 0.42347100377082825, "learning_rate": 1.929340952488892e-05, "loss": 0.3706207871437073, "step": 2720, "token_acc": 0.8744593671750512 }, { "epoch": 0.14681918739545674, "grad_norm": 0.5812439322471619, "learning_rate": 1.929276414335069e-05, "loss": 0.4813547730445862, "step": 2721, "token_acc": 0.8333333333333334 }, { "epoch": 0.14687314520045325, "grad_norm": 0.3478457033634186, "learning_rate": 1.929211847801388e-05, "loss": 0.38820141553878784, "step": 2722, "token_acc": 0.8666818856294943 }, { "epoch": 0.14692710300544973, "grad_norm": 0.48757779598236084, "learning_rate": 1.9291472528898204e-05, "loss": 0.4806090295314789, "step": 2723, "token_acc": 0.8322549336784212 }, { "epoch": 0.14698106081044623, "grad_norm": 0.5212034583091736, "learning_rate": 1.9290826296023388e-05, "loss": 0.44371333718299866, "step": 2724, "token_acc": 0.847947157104002 }, { "epoch": 0.14703501861544271, "grad_norm": 0.5804018974304199, "learning_rate": 1.9290179779409174e-05, "loss": 0.4153733551502228, "step": 2725, "token_acc": 0.8520654715510522 }, { "epoch": 0.14708897642043922, "grad_norm": 0.45041775703430176, "learning_rate": 1.9289532979075304e-05, "loss": 0.43468302488327026, "step": 2726, "token_acc": 0.8543312543312543 }, { "epoch": 0.1471429342254357, "grad_norm": 0.4713682234287262, "learning_rate": 1.9288885895041528e-05, "loss": 0.5007508993148804, "step": 2727, "token_acc": 0.8340481689211481 }, { "epoch": 0.1471968920304322, "grad_norm": 0.38597211241722107, "learning_rate": 1.928823852732761e-05, "loss": 0.428169846534729, "step": 2728, "token_acc": 0.8557758979337123 }, { "epoch": 0.1472508498354287, "grad_norm": 0.5365422964096069, "learning_rate": 1.928759087595332e-05, "loss": 0.47040802240371704, "step": 2729, "token_acc": 0.8425448976750661 }, { "epoch": 0.1473048076404252, "grad_norm": 0.38519909977912903, "learning_rate": 1.928694294093844e-05, "loss": 0.37654203176498413, "step": 2730, "token_acc": 0.8708357685563998 }, { "epoch": 0.14735876544542167, "grad_norm": 0.4470045268535614, "learning_rate": 1.9286294722302754e-05, "loss": 0.4434490203857422, "step": 2731, "token_acc": 0.8520102651839179 }, { "epoch": 0.14741272325041818, "grad_norm": 0.5884626507759094, "learning_rate": 1.928564622006606e-05, "loss": 0.45497363805770874, "step": 2732, "token_acc": 0.8436652460286711 }, { "epoch": 0.14746668105541466, "grad_norm": 0.5425617098808289, "learning_rate": 1.9284997434248165e-05, "loss": 0.4403410255908966, "step": 2733, "token_acc": 0.8456483126110125 }, { "epoch": 0.14752063886041117, "grad_norm": 0.31785479187965393, "learning_rate": 1.9284348364868877e-05, "loss": 0.4376537799835205, "step": 2734, "token_acc": 0.8501323069521289 }, { "epoch": 0.14757459666540765, "grad_norm": 0.41193798184394836, "learning_rate": 1.9283699011948023e-05, "loss": 0.4191155433654785, "step": 2735, "token_acc": 0.8567231247839613 }, { "epoch": 0.14762855447040416, "grad_norm": 0.5123618841171265, "learning_rate": 1.9283049375505435e-05, "loss": 0.46389514207839966, "step": 2736, "token_acc": 0.8443009251178216 }, { "epoch": 0.14768251227540063, "grad_norm": 0.49219781160354614, "learning_rate": 1.928239945556095e-05, "loss": 0.4120548963546753, "step": 2737, "token_acc": 0.8578437872961145 }, { "epoch": 0.14773647008039714, "grad_norm": 0.4875769019126892, "learning_rate": 1.928174925213442e-05, "loss": 0.42561131715774536, "step": 2738, "token_acc": 0.8521883992124791 }, { "epoch": 0.14779042788539362, "grad_norm": 0.4264127314090729, "learning_rate": 1.9281098765245698e-05, "loss": 0.3757316470146179, "step": 2739, "token_acc": 0.8725518024411013 }, { "epoch": 0.14784438569039013, "grad_norm": 0.5222475528717041, "learning_rate": 1.9280447994914645e-05, "loss": 0.42609119415283203, "step": 2740, "token_acc": 0.8517943141214852 }, { "epoch": 0.1478983434953866, "grad_norm": 0.5153576135635376, "learning_rate": 1.9279796941161147e-05, "loss": 0.42846986651420593, "step": 2741, "token_acc": 0.8556790301630827 }, { "epoch": 0.1479523013003831, "grad_norm": 0.47883450984954834, "learning_rate": 1.9279145604005083e-05, "loss": 0.38310733437538147, "step": 2742, "token_acc": 0.8640625 }, { "epoch": 0.1480062591053796, "grad_norm": 0.41173282265663147, "learning_rate": 1.927849398346634e-05, "loss": 0.4426199793815613, "step": 2743, "token_acc": 0.846749226006192 }, { "epoch": 0.14806021691037607, "grad_norm": 0.3738730549812317, "learning_rate": 1.9277842079564822e-05, "loss": 0.41584092378616333, "step": 2744, "token_acc": 0.8582864929313575 }, { "epoch": 0.14811417471537258, "grad_norm": 0.3832660913467407, "learning_rate": 1.9277189892320437e-05, "loss": 0.4259582757949829, "step": 2745, "token_acc": 0.8594931670425899 }, { "epoch": 0.14816813252036906, "grad_norm": 0.4828963577747345, "learning_rate": 1.9276537421753102e-05, "loss": 0.4526129364967346, "step": 2746, "token_acc": 0.8441365660564077 }, { "epoch": 0.14822209032536557, "grad_norm": 0.462190717458725, "learning_rate": 1.9275884667882747e-05, "loss": 0.46084803342819214, "step": 2747, "token_acc": 0.8427878946678894 }, { "epoch": 0.14827604813036205, "grad_norm": 0.4632832109928131, "learning_rate": 1.9275231630729304e-05, "loss": 0.42053908109664917, "step": 2748, "token_acc": 0.8554889675837647 }, { "epoch": 0.14833000593535856, "grad_norm": 0.5718839168548584, "learning_rate": 1.9274578310312716e-05, "loss": 0.4570872485637665, "step": 2749, "token_acc": 0.8479636763896533 }, { "epoch": 0.14838396374035503, "grad_norm": 0.400463730096817, "learning_rate": 1.9273924706652934e-05, "loss": 0.4488106667995453, "step": 2750, "token_acc": 0.8445029624753128 }, { "epoch": 0.14843792154535154, "grad_norm": 0.5378624796867371, "learning_rate": 1.9273270819769923e-05, "loss": 0.44692376255989075, "step": 2751, "token_acc": 0.8457978802806388 }, { "epoch": 0.14849187935034802, "grad_norm": 0.48274827003479004, "learning_rate": 1.927261664968365e-05, "loss": 0.4093227982521057, "step": 2752, "token_acc": 0.8592233009708737 }, { "epoch": 0.14854583715534453, "grad_norm": 0.3738875091075897, "learning_rate": 1.927196219641409e-05, "loss": 0.42209863662719727, "step": 2753, "token_acc": 0.8564218766093316 }, { "epoch": 0.148599794960341, "grad_norm": 0.4108492434024811, "learning_rate": 1.9271307459981237e-05, "loss": 0.40807196497917175, "step": 2754, "token_acc": 0.8569032979318055 }, { "epoch": 0.14865375276533752, "grad_norm": 0.41296544671058655, "learning_rate": 1.9270652440405088e-05, "loss": 0.41156458854675293, "step": 2755, "token_acc": 0.8586888331242158 }, { "epoch": 0.148707710570334, "grad_norm": 0.46143603324890137, "learning_rate": 1.9269997137705635e-05, "loss": 0.37250572443008423, "step": 2756, "token_acc": 0.8743249877270496 }, { "epoch": 0.1487616683753305, "grad_norm": 0.425333708524704, "learning_rate": 1.9269341551902898e-05, "loss": 0.40919220447540283, "step": 2757, "token_acc": 0.8634404388714734 }, { "epoch": 0.14881562618032698, "grad_norm": 0.37031152844429016, "learning_rate": 1.9268685683016902e-05, "loss": 0.43917638063430786, "step": 2758, "token_acc": 0.85253152644457 }, { "epoch": 0.1488695839853235, "grad_norm": 0.4150794744491577, "learning_rate": 1.9268029531067667e-05, "loss": 0.4362350404262543, "step": 2759, "token_acc": 0.8548996801395755 }, { "epoch": 0.14892354179031997, "grad_norm": 0.4847869873046875, "learning_rate": 1.9267373096075245e-05, "loss": 0.43809670209884644, "step": 2760, "token_acc": 0.8516613374113722 }, { "epoch": 0.14897749959531648, "grad_norm": 0.40246784687042236, "learning_rate": 1.9266716378059675e-05, "loss": 0.4555293023586273, "step": 2761, "token_acc": 0.8432456012796278 }, { "epoch": 0.14903145740031296, "grad_norm": 0.3207762837409973, "learning_rate": 1.9266059377041012e-05, "loss": 0.48590725660324097, "step": 2762, "token_acc": 0.8385086376558059 }, { "epoch": 0.14908541520530944, "grad_norm": 0.5130422115325928, "learning_rate": 1.9265402093039324e-05, "loss": 0.43443745374679565, "step": 2763, "token_acc": 0.8516737891737892 }, { "epoch": 0.14913937301030594, "grad_norm": 0.49046197533607483, "learning_rate": 1.9264744526074683e-05, "loss": 0.421705961227417, "step": 2764, "token_acc": 0.8526772793053545 }, { "epoch": 0.14919333081530242, "grad_norm": 0.360808402299881, "learning_rate": 1.926408667616717e-05, "loss": 0.4296383559703827, "step": 2765, "token_acc": 0.8575660623296599 }, { "epoch": 0.14924728862029893, "grad_norm": 0.5385661721229553, "learning_rate": 1.926342854333688e-05, "loss": 0.48109203577041626, "step": 2766, "token_acc": 0.8338500563697858 }, { "epoch": 0.1493012464252954, "grad_norm": 0.3937542736530304, "learning_rate": 1.926277012760391e-05, "loss": 0.47477132081985474, "step": 2767, "token_acc": 0.8448723200873959 }, { "epoch": 0.14935520423029192, "grad_norm": 0.5227028727531433, "learning_rate": 1.926211142898836e-05, "loss": 0.4758285880088806, "step": 2768, "token_acc": 0.8412475414442259 }, { "epoch": 0.1494091620352884, "grad_norm": 0.3635508716106415, "learning_rate": 1.926145244751036e-05, "loss": 0.36107558012008667, "step": 2769, "token_acc": 0.8717379233759023 }, { "epoch": 0.1494631198402849, "grad_norm": 0.3924930989742279, "learning_rate": 1.9260793183190026e-05, "loss": 0.39601266384124756, "step": 2770, "token_acc": 0.8632647307583874 }, { "epoch": 0.14951707764528138, "grad_norm": 0.5363448858261108, "learning_rate": 1.9260133636047495e-05, "loss": 0.4837223291397095, "step": 2771, "token_acc": 0.8359128474830955 }, { "epoch": 0.1495710354502779, "grad_norm": 0.5165901184082031, "learning_rate": 1.9259473806102903e-05, "loss": 0.4293256998062134, "step": 2772, "token_acc": 0.8549979146392326 }, { "epoch": 0.14962499325527437, "grad_norm": 0.4607221186161041, "learning_rate": 1.9258813693376415e-05, "loss": 0.40395259857177734, "step": 2773, "token_acc": 0.8555515630614445 }, { "epoch": 0.14967895106027088, "grad_norm": 0.4410806894302368, "learning_rate": 1.925815329788818e-05, "loss": 0.4614957571029663, "step": 2774, "token_acc": 0.8425234387414587 }, { "epoch": 0.14973290886526736, "grad_norm": 0.41635116934776306, "learning_rate": 1.9257492619658365e-05, "loss": 0.40367648005485535, "step": 2775, "token_acc": 0.8605170762847112 }, { "epoch": 0.14978686667026386, "grad_norm": 0.4824385941028595, "learning_rate": 1.9256831658707155e-05, "loss": 0.42003166675567627, "step": 2776, "token_acc": 0.8532972511038314 }, { "epoch": 0.14984082447526034, "grad_norm": 0.46774792671203613, "learning_rate": 1.9256170415054725e-05, "loss": 0.4983764588832855, "step": 2777, "token_acc": 0.8334832134292566 }, { "epoch": 0.14989478228025685, "grad_norm": 0.43119287490844727, "learning_rate": 1.925550888872128e-05, "loss": 0.41686108708381653, "step": 2778, "token_acc": 0.8581245526127416 }, { "epoch": 0.14994874008525333, "grad_norm": 0.5091910362243652, "learning_rate": 1.9254847079727016e-05, "loss": 0.4259141683578491, "step": 2779, "token_acc": 0.8541524459613197 }, { "epoch": 0.15000269789024984, "grad_norm": 0.5860676169395447, "learning_rate": 1.9254184988092148e-05, "loss": 0.42096883058547974, "step": 2780, "token_acc": 0.8513513513513513 }, { "epoch": 0.15005665569524632, "grad_norm": 0.46313321590423584, "learning_rate": 1.9253522613836894e-05, "loss": 0.4419479966163635, "step": 2781, "token_acc": 0.8568965517241379 }, { "epoch": 0.15011061350024282, "grad_norm": 0.4241529703140259, "learning_rate": 1.9252859956981488e-05, "loss": 0.41720694303512573, "step": 2782, "token_acc": 0.8582214765100671 }, { "epoch": 0.1501645713052393, "grad_norm": 0.5469893217086792, "learning_rate": 1.9252197017546156e-05, "loss": 0.4917829632759094, "step": 2783, "token_acc": 0.8387261580381471 }, { "epoch": 0.15021852911023578, "grad_norm": 0.4668389856815338, "learning_rate": 1.9251533795551153e-05, "loss": 0.4738215208053589, "step": 2784, "token_acc": 0.8407984420642648 }, { "epoch": 0.1502724869152323, "grad_norm": 0.5549635887145996, "learning_rate": 1.9250870291016733e-05, "loss": 0.4300811290740967, "step": 2785, "token_acc": 0.855749240364625 }, { "epoch": 0.15032644472022877, "grad_norm": 0.39445170760154724, "learning_rate": 1.9250206503963157e-05, "loss": 0.4618651270866394, "step": 2786, "token_acc": 0.8433734939759037 }, { "epoch": 0.15038040252522528, "grad_norm": 0.3280743360519409, "learning_rate": 1.9249542434410697e-05, "loss": 0.4165632426738739, "step": 2787, "token_acc": 0.862298195631529 }, { "epoch": 0.15043436033022176, "grad_norm": 0.4104847013950348, "learning_rate": 1.9248878082379634e-05, "loss": 0.41735535860061646, "step": 2788, "token_acc": 0.860730915761253 }, { "epoch": 0.15048831813521826, "grad_norm": 0.4510835111141205, "learning_rate": 1.924821344789026e-05, "loss": 0.42043909430503845, "step": 2789, "token_acc": 0.8536427275084263 }, { "epoch": 0.15054227594021474, "grad_norm": 0.4926086664199829, "learning_rate": 1.9247548530962867e-05, "loss": 0.41044002771377563, "step": 2790, "token_acc": 0.8560677328316086 }, { "epoch": 0.15059623374521125, "grad_norm": 0.4936531186103821, "learning_rate": 1.9246883331617763e-05, "loss": 0.4670923352241516, "step": 2791, "token_acc": 0.8437886067261496 }, { "epoch": 0.15065019155020773, "grad_norm": 0.46862244606018066, "learning_rate": 1.924621784987527e-05, "loss": 0.4063207507133484, "step": 2792, "token_acc": 0.856038820992092 }, { "epoch": 0.15070414935520424, "grad_norm": 0.43483343720436096, "learning_rate": 1.9245552085755702e-05, "loss": 0.4559330344200134, "step": 2793, "token_acc": 0.8437452268214449 }, { "epoch": 0.15075810716020072, "grad_norm": 0.4053194522857666, "learning_rate": 1.9244886039279396e-05, "loss": 0.4268023669719696, "step": 2794, "token_acc": 0.8524716503357921 }, { "epoch": 0.15081206496519722, "grad_norm": 0.46881425380706787, "learning_rate": 1.9244219710466692e-05, "loss": 0.4057786166667938, "step": 2795, "token_acc": 0.859538784067086 }, { "epoch": 0.1508660227701937, "grad_norm": 0.5218108296394348, "learning_rate": 1.9243553099337938e-05, "loss": 0.4572598338127136, "step": 2796, "token_acc": 0.8392640166637737 }, { "epoch": 0.1509199805751902, "grad_norm": 0.5903075933456421, "learning_rate": 1.9242886205913496e-05, "loss": 0.4747084975242615, "step": 2797, "token_acc": 0.8409742120343839 }, { "epoch": 0.1509739383801867, "grad_norm": 0.4805097281932831, "learning_rate": 1.9242219030213732e-05, "loss": 0.47901836037635803, "step": 2798, "token_acc": 0.8488582224412683 }, { "epoch": 0.1510278961851832, "grad_norm": 0.3984416127204895, "learning_rate": 1.9241551572259015e-05, "loss": 0.49279388785362244, "step": 2799, "token_acc": 0.8411773069813735 }, { "epoch": 0.15108185399017968, "grad_norm": 0.502148449420929, "learning_rate": 1.9240883832069735e-05, "loss": 0.46379005908966064, "step": 2800, "token_acc": 0.8503807106598985 }, { "epoch": 0.15113581179517618, "grad_norm": 0.504173219203949, "learning_rate": 1.924021580966629e-05, "loss": 0.45221278071403503, "step": 2801, "token_acc": 0.8523816770658956 }, { "epoch": 0.15118976960017266, "grad_norm": 0.4461991786956787, "learning_rate": 1.923954750506907e-05, "loss": 0.4790879487991333, "step": 2802, "token_acc": 0.8416552145365704 }, { "epoch": 0.15124372740516917, "grad_norm": 0.40939220786094666, "learning_rate": 1.923887891829849e-05, "loss": 0.396912544965744, "step": 2803, "token_acc": 0.8602951317059716 }, { "epoch": 0.15129768521016565, "grad_norm": 0.503838062286377, "learning_rate": 1.9238210049374966e-05, "loss": 0.4622441530227661, "step": 2804, "token_acc": 0.8434399117971334 }, { "epoch": 0.15135164301516213, "grad_norm": 0.5384693741798401, "learning_rate": 1.9237540898318928e-05, "loss": 0.4243014454841614, "step": 2805, "token_acc": 0.8519243313763861 }, { "epoch": 0.15140560082015864, "grad_norm": 0.4077434837818146, "learning_rate": 1.9236871465150816e-05, "loss": 0.4321741759777069, "step": 2806, "token_acc": 0.8577712609970675 }, { "epoch": 0.15145955862515512, "grad_norm": 0.4439929723739624, "learning_rate": 1.9236201749891065e-05, "loss": 0.466137170791626, "step": 2807, "token_acc": 0.8411435674331883 }, { "epoch": 0.15151351643015162, "grad_norm": 0.3756852447986603, "learning_rate": 1.923553175256013e-05, "loss": 0.43009236454963684, "step": 2808, "token_acc": 0.8490853658536586 }, { "epoch": 0.1515674742351481, "grad_norm": 0.35528308153152466, "learning_rate": 1.9234861473178477e-05, "loss": 0.4203847646713257, "step": 2809, "token_acc": 0.8531491002570694 }, { "epoch": 0.1516214320401446, "grad_norm": 0.4006516635417938, "learning_rate": 1.9234190911766575e-05, "loss": 0.4179231524467468, "step": 2810, "token_acc": 0.8600998417143553 }, { "epoch": 0.1516753898451411, "grad_norm": 0.4771043360233307, "learning_rate": 1.9233520068344903e-05, "loss": 0.4540753662586212, "step": 2811, "token_acc": 0.853071389042612 }, { "epoch": 0.1517293476501376, "grad_norm": 0.5147551894187927, "learning_rate": 1.9232848942933944e-05, "loss": 0.5146593451499939, "step": 2812, "token_acc": 0.8310309728079104 }, { "epoch": 0.15178330545513408, "grad_norm": 0.5490332841873169, "learning_rate": 1.9232177535554198e-05, "loss": 0.44019758701324463, "step": 2813, "token_acc": 0.8482615268329554 }, { "epoch": 0.15183726326013058, "grad_norm": 0.5518410801887512, "learning_rate": 1.9231505846226166e-05, "loss": 0.4211376905441284, "step": 2814, "token_acc": 0.8524752475247525 }, { "epoch": 0.15189122106512706, "grad_norm": 0.49588021636009216, "learning_rate": 1.9230833874970366e-05, "loss": 0.38810354471206665, "step": 2815, "token_acc": 0.8683715682587438 }, { "epoch": 0.15194517887012357, "grad_norm": 0.5049809813499451, "learning_rate": 1.9230161621807318e-05, "loss": 0.45666101574897766, "step": 2816, "token_acc": 0.8485584218512898 }, { "epoch": 0.15199913667512005, "grad_norm": 0.5149545073509216, "learning_rate": 1.9229489086757552e-05, "loss": 0.4762251675128937, "step": 2817, "token_acc": 0.8397125880889872 }, { "epoch": 0.15205309448011656, "grad_norm": 0.49820661544799805, "learning_rate": 1.9228816269841605e-05, "loss": 0.38371938467025757, "step": 2818, "token_acc": 0.8680030257186082 }, { "epoch": 0.15210705228511304, "grad_norm": 0.42101505398750305, "learning_rate": 1.922814317108003e-05, "loss": 0.4543551206588745, "step": 2819, "token_acc": 0.8507012249245518 }, { "epoch": 0.15216101009010954, "grad_norm": 0.45958176255226135, "learning_rate": 1.922746979049338e-05, "loss": 0.40597474575042725, "step": 2820, "token_acc": 0.8622428666224287 }, { "epoch": 0.15221496789510602, "grad_norm": 0.46392861008644104, "learning_rate": 1.9226796128102213e-05, "loss": 0.4372352361679077, "step": 2821, "token_acc": 0.8473888496824277 }, { "epoch": 0.15226892570010253, "grad_norm": 0.5385838150978088, "learning_rate": 1.9226122183927115e-05, "loss": 0.41051363945007324, "step": 2822, "token_acc": 0.8607655502392344 }, { "epoch": 0.152322883505099, "grad_norm": 0.40470224618911743, "learning_rate": 1.9225447957988662e-05, "loss": 0.420203298330307, "step": 2823, "token_acc": 0.8589160624308714 }, { "epoch": 0.15237684131009552, "grad_norm": 0.5214347243309021, "learning_rate": 1.9224773450307447e-05, "loss": 0.4356220066547394, "step": 2824, "token_acc": 0.85794734619175 }, { "epoch": 0.152430799115092, "grad_norm": 0.43976518511772156, "learning_rate": 1.9224098660904066e-05, "loss": 0.47620338201522827, "step": 2825, "token_acc": 0.8393191980383672 }, { "epoch": 0.1524847569200885, "grad_norm": 0.3912181854248047, "learning_rate": 1.9223423589799127e-05, "loss": 0.46062952280044556, "step": 2826, "token_acc": 0.8449101796407186 }, { "epoch": 0.15253871472508498, "grad_norm": 0.4210067689418793, "learning_rate": 1.922274823701325e-05, "loss": 0.4179927110671997, "step": 2827, "token_acc": 0.8573645129114486 }, { "epoch": 0.15259267253008146, "grad_norm": 0.4245472848415375, "learning_rate": 1.9222072602567055e-05, "loss": 0.39684632420539856, "step": 2828, "token_acc": 0.863132800448745 }, { "epoch": 0.15264663033507797, "grad_norm": 0.3702889084815979, "learning_rate": 1.922139668648118e-05, "loss": 0.43312254548072815, "step": 2829, "token_acc": 0.8484479610468655 }, { "epoch": 0.15270058814007445, "grad_norm": 0.47884029150009155, "learning_rate": 1.9220720488776264e-05, "loss": 0.4442027509212494, "step": 2830, "token_acc": 0.8482109694193388 }, { "epoch": 0.15275454594507096, "grad_norm": 0.47148337960243225, "learning_rate": 1.922004400947296e-05, "loss": 0.38909322023391724, "step": 2831, "token_acc": 0.862751513168657 }, { "epoch": 0.15280850375006744, "grad_norm": 0.49669349193573, "learning_rate": 1.9219367248591933e-05, "loss": 0.4264863133430481, "step": 2832, "token_acc": 0.8522805567038721 }, { "epoch": 0.15286246155506394, "grad_norm": 0.5979148745536804, "learning_rate": 1.9218690206153842e-05, "loss": 0.4635867476463318, "step": 2833, "token_acc": 0.8420781008085326 }, { "epoch": 0.15291641936006042, "grad_norm": 0.5294798016548157, "learning_rate": 1.9218012882179364e-05, "loss": 0.39800506830215454, "step": 2834, "token_acc": 0.8611710068004644 }, { "epoch": 0.15297037716505693, "grad_norm": 0.4750816524028778, "learning_rate": 1.921733527668919e-05, "loss": 0.4140075445175171, "step": 2835, "token_acc": 0.8544466685730626 }, { "epoch": 0.1530243349700534, "grad_norm": 0.3902011215686798, "learning_rate": 1.921665738970401e-05, "loss": 0.4495084881782532, "step": 2836, "token_acc": 0.847121701369558 }, { "epoch": 0.15307829277504992, "grad_norm": 0.3889089524745941, "learning_rate": 1.921597922124453e-05, "loss": 0.4598507583141327, "step": 2837, "token_acc": 0.847207128885025 }, { "epoch": 0.1531322505800464, "grad_norm": 0.4165189266204834, "learning_rate": 1.9215300771331457e-05, "loss": 0.48719432950019836, "step": 2838, "token_acc": 0.839110686110447 }, { "epoch": 0.1531862083850429, "grad_norm": 0.46603140234947205, "learning_rate": 1.9214622039985512e-05, "loss": 0.40648308396339417, "step": 2839, "token_acc": 0.8554054054054054 }, { "epoch": 0.15324016619003938, "grad_norm": 0.5152875185012817, "learning_rate": 1.9213943027227427e-05, "loss": 0.35931044816970825, "step": 2840, "token_acc": 0.8654496281271129 }, { "epoch": 0.1532941239950359, "grad_norm": 0.5902023315429688, "learning_rate": 1.9213263733077932e-05, "loss": 0.4954487085342407, "step": 2841, "token_acc": 0.8399735493469995 }, { "epoch": 0.15334808180003237, "grad_norm": 0.43021056056022644, "learning_rate": 1.921258415755778e-05, "loss": 0.43855762481689453, "step": 2842, "token_acc": 0.84846547314578 }, { "epoch": 0.15340203960502888, "grad_norm": 0.6001092791557312, "learning_rate": 1.9211904300687717e-05, "loss": 0.4716913104057312, "step": 2843, "token_acc": 0.8428622797554837 }, { "epoch": 0.15345599741002536, "grad_norm": 0.434269517660141, "learning_rate": 1.921122416248851e-05, "loss": 0.4123518764972687, "step": 2844, "token_acc": 0.8539079620160701 }, { "epoch": 0.15350995521502186, "grad_norm": 0.44007012248039246, "learning_rate": 1.9210543742980933e-05, "loss": 0.4702775180339813, "step": 2845, "token_acc": 0.8393200769724182 }, { "epoch": 0.15356391302001834, "grad_norm": 0.4095743000507355, "learning_rate": 1.9209863042185764e-05, "loss": 0.4136289954185486, "step": 2846, "token_acc": 0.8549963530269876 }, { "epoch": 0.15361787082501485, "grad_norm": 0.43274685740470886, "learning_rate": 1.9209182060123787e-05, "loss": 0.4462334215641022, "step": 2847, "token_acc": 0.8488915518274416 }, { "epoch": 0.15367182863001133, "grad_norm": 0.45889097452163696, "learning_rate": 1.9208500796815804e-05, "loss": 0.43706047534942627, "step": 2848, "token_acc": 0.8522443890274314 }, { "epoch": 0.1537257864350078, "grad_norm": 0.4512205123901367, "learning_rate": 1.9207819252282615e-05, "loss": 0.3681887984275818, "step": 2849, "token_acc": 0.8718628490632733 }, { "epoch": 0.15377974424000432, "grad_norm": 0.4421086609363556, "learning_rate": 1.9207137426545042e-05, "loss": 0.4855296313762665, "step": 2850, "token_acc": 0.8396385357635167 }, { "epoch": 0.1538337020450008, "grad_norm": 0.42679813504219055, "learning_rate": 1.9206455319623905e-05, "loss": 0.3877667188644409, "step": 2851, "token_acc": 0.8628691983122363 }, { "epoch": 0.1538876598499973, "grad_norm": 0.4518094062805176, "learning_rate": 1.9205772931540034e-05, "loss": 0.47897395491600037, "step": 2852, "token_acc": 0.839279322276578 }, { "epoch": 0.15394161765499378, "grad_norm": 0.38239479064941406, "learning_rate": 1.9205090262314265e-05, "loss": 0.41569650173187256, "step": 2853, "token_acc": 0.8572974272661698 }, { "epoch": 0.1539955754599903, "grad_norm": 0.4367060363292694, "learning_rate": 1.9204407311967457e-05, "loss": 0.46734005212783813, "step": 2854, "token_acc": 0.8452035799823522 }, { "epoch": 0.15404953326498677, "grad_norm": 0.41918978095054626, "learning_rate": 1.9203724080520456e-05, "loss": 0.38245517015457153, "step": 2855, "token_acc": 0.8650074394697687 }, { "epoch": 0.15410349106998328, "grad_norm": 0.3593725264072418, "learning_rate": 1.9203040567994136e-05, "loss": 0.48303166031837463, "step": 2856, "token_acc": 0.8379937304075236 }, { "epoch": 0.15415744887497976, "grad_norm": 0.4611591100692749, "learning_rate": 1.9202356774409366e-05, "loss": 0.3751530051231384, "step": 2857, "token_acc": 0.8715194346289753 }, { "epoch": 0.15421140667997627, "grad_norm": 0.5323427319526672, "learning_rate": 1.920167269978703e-05, "loss": 0.47891759872436523, "step": 2858, "token_acc": 0.8349375383985255 }, { "epoch": 0.15426536448497274, "grad_norm": 0.4356142282485962, "learning_rate": 1.920098834414802e-05, "loss": 0.44130873680114746, "step": 2859, "token_acc": 0.8514599517814091 }, { "epoch": 0.15431932228996925, "grad_norm": 0.37488314509391785, "learning_rate": 1.920030370751324e-05, "loss": 0.3944082260131836, "step": 2860, "token_acc": 0.8650134518657153 }, { "epoch": 0.15437328009496573, "grad_norm": 0.47682681679725647, "learning_rate": 1.919961878990359e-05, "loss": 0.43024516105651855, "step": 2861, "token_acc": 0.8592247596153846 }, { "epoch": 0.15442723789996224, "grad_norm": 0.4057265520095825, "learning_rate": 1.9198933591339996e-05, "loss": 0.454730749130249, "step": 2862, "token_acc": 0.8485484049012032 }, { "epoch": 0.15448119570495872, "grad_norm": 0.40437906980514526, "learning_rate": 1.919824811184338e-05, "loss": 0.3826003074645996, "step": 2863, "token_acc": 0.8655530109120302 }, { "epoch": 0.15453515350995523, "grad_norm": 0.6105615496635437, "learning_rate": 1.9197562351434675e-05, "loss": 0.44227227568626404, "step": 2864, "token_acc": 0.8545190913484775 }, { "epoch": 0.1545891113149517, "grad_norm": 0.2585325539112091, "learning_rate": 1.9196876310134825e-05, "loss": 0.45488399267196655, "step": 2865, "token_acc": 0.8488416741328555 }, { "epoch": 0.1546430691199482, "grad_norm": 0.5036490559577942, "learning_rate": 1.919618998796478e-05, "loss": 0.38977575302124023, "step": 2866, "token_acc": 0.866382590168924 }, { "epoch": 0.1546970269249447, "grad_norm": 0.3687719404697418, "learning_rate": 1.9195503384945505e-05, "loss": 0.35623329877853394, "step": 2867, "token_acc": 0.8745883282834936 }, { "epoch": 0.1547509847299412, "grad_norm": 0.5140228271484375, "learning_rate": 1.9194816501097964e-05, "loss": 0.4598518908023834, "step": 2868, "token_acc": 0.8422136775611095 }, { "epoch": 0.15480494253493768, "grad_norm": 0.37121906876564026, "learning_rate": 1.9194129336443136e-05, "loss": 0.39326372742652893, "step": 2869, "token_acc": 0.8649234521067606 }, { "epoch": 0.15485890033993416, "grad_norm": 0.46864235401153564, "learning_rate": 1.9193441891002008e-05, "loss": 0.3650268316268921, "step": 2870, "token_acc": 0.8731642189586115 }, { "epoch": 0.15491285814493067, "grad_norm": 0.45280691981315613, "learning_rate": 1.919275416479557e-05, "loss": 0.41519707441329956, "step": 2871, "token_acc": 0.8555213004484304 }, { "epoch": 0.15496681594992714, "grad_norm": 0.4319911301136017, "learning_rate": 1.919206615784483e-05, "loss": 0.3951135575771332, "step": 2872, "token_acc": 0.8645797301971636 }, { "epoch": 0.15502077375492365, "grad_norm": 0.48041021823883057, "learning_rate": 1.91913778701708e-05, "loss": 0.42333725094795227, "step": 2873, "token_acc": 0.8571625724537676 }, { "epoch": 0.15507473155992013, "grad_norm": 0.5295830368995667, "learning_rate": 1.919068930179449e-05, "loss": 0.49626198410987854, "step": 2874, "token_acc": 0.8339371105636865 }, { "epoch": 0.15512868936491664, "grad_norm": 0.38291269540786743, "learning_rate": 1.9190000452736943e-05, "loss": 0.4653918743133545, "step": 2875, "token_acc": 0.8431372549019608 }, { "epoch": 0.15518264716991312, "grad_norm": 0.3952103555202484, "learning_rate": 1.9189311323019185e-05, "loss": 0.40802910923957825, "step": 2876, "token_acc": 0.8608169440242057 }, { "epoch": 0.15523660497490963, "grad_norm": 0.48029401898384094, "learning_rate": 1.918862191266227e-05, "loss": 0.5112794637680054, "step": 2877, "token_acc": 0.8345990279465371 }, { "epoch": 0.1552905627799061, "grad_norm": 0.4949905574321747, "learning_rate": 1.918793222168725e-05, "loss": 0.42017972469329834, "step": 2878, "token_acc": 0.8613752743233358 }, { "epoch": 0.1553445205849026, "grad_norm": 0.5263909101486206, "learning_rate": 1.9187242250115185e-05, "loss": 0.4523441195487976, "step": 2879, "token_acc": 0.8537317700886474 }, { "epoch": 0.1553984783898991, "grad_norm": 0.5189448595046997, "learning_rate": 1.9186551997967147e-05, "loss": 0.48316317796707153, "step": 2880, "token_acc": 0.8372122762148337 }, { "epoch": 0.1554524361948956, "grad_norm": 0.46061062812805176, "learning_rate": 1.9185861465264222e-05, "loss": 0.3969056010246277, "step": 2881, "token_acc": 0.857707509881423 }, { "epoch": 0.15550639399989208, "grad_norm": 0.42970457673072815, "learning_rate": 1.918517065202749e-05, "loss": 0.4487103223800659, "step": 2882, "token_acc": 0.8459502382212811 }, { "epoch": 0.15556035180488859, "grad_norm": 0.4629678428173065, "learning_rate": 1.9184479558278052e-05, "loss": 0.43735194206237793, "step": 2883, "token_acc": 0.85180690399137 }, { "epoch": 0.15561430960988507, "grad_norm": 0.4307819902896881, "learning_rate": 1.9183788184037015e-05, "loss": 0.4712144732475281, "step": 2884, "token_acc": 0.8436784379896348 }, { "epoch": 0.15566826741488157, "grad_norm": 0.5082705616950989, "learning_rate": 1.9183096529325493e-05, "loss": 0.43848538398742676, "step": 2885, "token_acc": 0.8500079529187212 }, { "epoch": 0.15572222521987805, "grad_norm": 0.4913020133972168, "learning_rate": 1.9182404594164612e-05, "loss": 0.42527228593826294, "step": 2886, "token_acc": 0.8523351648351648 }, { "epoch": 0.15577618302487456, "grad_norm": 0.4848913848400116, "learning_rate": 1.9181712378575497e-05, "loss": 0.46958112716674805, "step": 2887, "token_acc": 0.844275107463853 }, { "epoch": 0.15583014082987104, "grad_norm": 0.3962678611278534, "learning_rate": 1.918101988257929e-05, "loss": 0.44597405195236206, "step": 2888, "token_acc": 0.8492529348986126 }, { "epoch": 0.15588409863486755, "grad_norm": 0.5015215873718262, "learning_rate": 1.9180327106197146e-05, "loss": 0.42794567346572876, "step": 2889, "token_acc": 0.8529903107250251 }, { "epoch": 0.15593805643986403, "grad_norm": 0.39071667194366455, "learning_rate": 1.9179634049450212e-05, "loss": 0.4439396262168884, "step": 2890, "token_acc": 0.8515965166908563 }, { "epoch": 0.15599201424486053, "grad_norm": 0.416346937417984, "learning_rate": 1.917894071235966e-05, "loss": 0.45481014251708984, "step": 2891, "token_acc": 0.8473071324599709 }, { "epoch": 0.156045972049857, "grad_norm": 0.4696599245071411, "learning_rate": 1.917824709494666e-05, "loss": 0.46836012601852417, "step": 2892, "token_acc": 0.8448249027237354 }, { "epoch": 0.1560999298548535, "grad_norm": 0.42985326051712036, "learning_rate": 1.9177553197232407e-05, "loss": 0.47487902641296387, "step": 2893, "token_acc": 0.8440400586223742 }, { "epoch": 0.15615388765985, "grad_norm": 0.4734545052051544, "learning_rate": 1.9176859019238077e-05, "loss": 0.39540940523147583, "step": 2894, "token_acc": 0.8658001879110554 }, { "epoch": 0.15620784546484648, "grad_norm": 0.38066256046295166, "learning_rate": 1.917616456098488e-05, "loss": 0.39876675605773926, "step": 2895, "token_acc": 0.8622525755295751 }, { "epoch": 0.15626180326984299, "grad_norm": 0.3724753260612488, "learning_rate": 1.9175469822494018e-05, "loss": 0.45704054832458496, "step": 2896, "token_acc": 0.848476856125544 }, { "epoch": 0.15631576107483947, "grad_norm": 0.5250396728515625, "learning_rate": 1.917477480378671e-05, "loss": 0.43435347080230713, "step": 2897, "token_acc": 0.8472471367462399 }, { "epoch": 0.15636971887983597, "grad_norm": 0.5100932121276855, "learning_rate": 1.9174079504884186e-05, "loss": 0.4294694662094116, "step": 2898, "token_acc": 0.8521145975443384 }, { "epoch": 0.15642367668483245, "grad_norm": 0.45854249596595764, "learning_rate": 1.9173383925807677e-05, "loss": 0.388412743806839, "step": 2899, "token_acc": 0.8642573273720815 }, { "epoch": 0.15647763448982896, "grad_norm": 0.46869128942489624, "learning_rate": 1.9172688066578424e-05, "loss": 0.3985058069229126, "step": 2900, "token_acc": 0.8602816029707566 }, { "epoch": 0.15653159229482544, "grad_norm": 0.442769855260849, "learning_rate": 1.917199192721768e-05, "loss": 0.4356606602668762, "step": 2901, "token_acc": 0.8562845849802372 }, { "epoch": 0.15658555009982195, "grad_norm": 0.45568135380744934, "learning_rate": 1.917129550774671e-05, "loss": 0.4527207612991333, "step": 2902, "token_acc": 0.8476859988283538 }, { "epoch": 0.15663950790481843, "grad_norm": 0.5266080498695374, "learning_rate": 1.9170598808186773e-05, "loss": 0.4003790020942688, "step": 2903, "token_acc": 0.8610431180568631 }, { "epoch": 0.15669346570981493, "grad_norm": 0.41362544894218445, "learning_rate": 1.9169901828559156e-05, "loss": 0.39580532908439636, "step": 2904, "token_acc": 0.8664086102719033 }, { "epoch": 0.1567474235148114, "grad_norm": 0.352460652589798, "learning_rate": 1.9169204568885132e-05, "loss": 0.4540122151374817, "step": 2905, "token_acc": 0.84685221015032 }, { "epoch": 0.15680138131980792, "grad_norm": 0.6223880052566528, "learning_rate": 1.916850702918601e-05, "loss": 0.45758795738220215, "step": 2906, "token_acc": 0.8436734693877551 }, { "epoch": 0.1568553391248044, "grad_norm": 0.5053659677505493, "learning_rate": 1.916780920948308e-05, "loss": 0.42686933279037476, "step": 2907, "token_acc": 0.8549808747713288 }, { "epoch": 0.1569092969298009, "grad_norm": 0.29653623700141907, "learning_rate": 1.916711110979766e-05, "loss": 0.3778930902481079, "step": 2908, "token_acc": 0.8716036228023442 }, { "epoch": 0.15696325473479739, "grad_norm": 0.44618022441864014, "learning_rate": 1.916641273015107e-05, "loss": 0.4326525330543518, "step": 2909, "token_acc": 0.8546132339235788 }, { "epoch": 0.1570172125397939, "grad_norm": 0.4367034435272217, "learning_rate": 1.916571407056463e-05, "loss": 0.39419376850128174, "step": 2910, "token_acc": 0.8610118229309871 }, { "epoch": 0.15707117034479037, "grad_norm": 0.4660153388977051, "learning_rate": 1.916501513105969e-05, "loss": 0.45386266708374023, "step": 2911, "token_acc": 0.8504151530877011 }, { "epoch": 0.15712512814978688, "grad_norm": 0.4997435510158539, "learning_rate": 1.9164315911657587e-05, "loss": 0.3760060667991638, "step": 2912, "token_acc": 0.8706240487062404 }, { "epoch": 0.15717908595478336, "grad_norm": 0.5686206221580505, "learning_rate": 1.916361641237968e-05, "loss": 0.39620599150657654, "step": 2913, "token_acc": 0.8642003676470589 }, { "epoch": 0.15723304375977984, "grad_norm": 0.4303590953350067, "learning_rate": 1.9162916633247323e-05, "loss": 0.4865064024925232, "step": 2914, "token_acc": 0.8390706898994191 }, { "epoch": 0.15728700156477635, "grad_norm": 0.4347938895225525, "learning_rate": 1.9162216574281892e-05, "loss": 0.37775880098342896, "step": 2915, "token_acc": 0.865615141955836 }, { "epoch": 0.15734095936977283, "grad_norm": 0.3819311559200287, "learning_rate": 1.9161516235504768e-05, "loss": 0.43557655811309814, "step": 2916, "token_acc": 0.8514796203238414 }, { "epoch": 0.15739491717476933, "grad_norm": 0.4106729030609131, "learning_rate": 1.916081561693734e-05, "loss": 0.44307398796081543, "step": 2917, "token_acc": 0.8519968676585747 }, { "epoch": 0.1574488749797658, "grad_norm": 0.4070585370063782, "learning_rate": 1.9160114718601005e-05, "loss": 0.4382275342941284, "step": 2918, "token_acc": 0.8542547115933752 }, { "epoch": 0.15750283278476232, "grad_norm": 0.3923734128475189, "learning_rate": 1.9159413540517162e-05, "loss": 0.4162598252296448, "step": 2919, "token_acc": 0.861088323762647 }, { "epoch": 0.1575567905897588, "grad_norm": 0.48031437397003174, "learning_rate": 1.915871208270723e-05, "loss": 0.40514522790908813, "step": 2920, "token_acc": 0.8613787991104522 }, { "epoch": 0.1576107483947553, "grad_norm": 0.4476129412651062, "learning_rate": 1.9158010345192634e-05, "loss": 0.4164675772190094, "step": 2921, "token_acc": 0.8618865706729356 }, { "epoch": 0.1576647061997518, "grad_norm": 0.38718774914741516, "learning_rate": 1.9157308327994795e-05, "loss": 0.42280492186546326, "step": 2922, "token_acc": 0.8577289571201694 }, { "epoch": 0.1577186640047483, "grad_norm": 0.48952609300613403, "learning_rate": 1.915660603113516e-05, "loss": 0.5061637163162231, "step": 2923, "token_acc": 0.8287534349238072 }, { "epoch": 0.15777262180974477, "grad_norm": 0.48639047145843506, "learning_rate": 1.915590345463518e-05, "loss": 0.5080236792564392, "step": 2924, "token_acc": 0.834451595234835 }, { "epoch": 0.15782657961474128, "grad_norm": 0.43773844838142395, "learning_rate": 1.9155200598516302e-05, "loss": 0.3680156469345093, "step": 2925, "token_acc": 0.8715826806561253 }, { "epoch": 0.15788053741973776, "grad_norm": 0.3660047948360443, "learning_rate": 1.9154497462799998e-05, "loss": 0.4368237555027008, "step": 2926, "token_acc": 0.8475375263614936 }, { "epoch": 0.15793449522473427, "grad_norm": 0.5209965109825134, "learning_rate": 1.915379404750774e-05, "loss": 0.39954519271850586, "step": 2927, "token_acc": 0.8647265749920051 }, { "epoch": 0.15798845302973075, "grad_norm": 0.48249393701553345, "learning_rate": 1.915309035266101e-05, "loss": 0.47221285104751587, "step": 2928, "token_acc": 0.8448605273213603 }, { "epoch": 0.15804241083472725, "grad_norm": 0.4912915527820587, "learning_rate": 1.9152386378281297e-05, "loss": 0.47142988443374634, "step": 2929, "token_acc": 0.8417495776234278 }, { "epoch": 0.15809636863972373, "grad_norm": 0.5011036396026611, "learning_rate": 1.91516821243901e-05, "loss": 0.5151898860931396, "step": 2930, "token_acc": 0.8294562780269058 }, { "epoch": 0.15815032644472024, "grad_norm": 0.4232407510280609, "learning_rate": 1.9150977591008932e-05, "loss": 0.4052581191062927, "step": 2931, "token_acc": 0.8597023990282417 }, { "epoch": 0.15820428424971672, "grad_norm": 0.5374497771263123, "learning_rate": 1.91502727781593e-05, "loss": 0.4385126829147339, "step": 2932, "token_acc": 0.8523609314359638 }, { "epoch": 0.15825824205471323, "grad_norm": 0.4116773307323456, "learning_rate": 1.9149567685862744e-05, "loss": 0.41211050748825073, "step": 2933, "token_acc": 0.862483994878361 }, { "epoch": 0.1583121998597097, "grad_norm": 0.5242006182670593, "learning_rate": 1.9148862314140778e-05, "loss": 0.44158828258514404, "step": 2934, "token_acc": 0.8551173402868318 }, { "epoch": 0.1583661576647062, "grad_norm": 0.43852972984313965, "learning_rate": 1.9148156663014962e-05, "loss": 0.37037521600723267, "step": 2935, "token_acc": 0.8722586157789803 }, { "epoch": 0.1584201154697027, "grad_norm": 0.365204781293869, "learning_rate": 1.914745073250683e-05, "loss": 0.35110822319984436, "step": 2936, "token_acc": 0.8702899808994617 }, { "epoch": 0.15847407327469917, "grad_norm": 0.42481836676597595, "learning_rate": 1.9146744522637954e-05, "loss": 0.48567965626716614, "step": 2937, "token_acc": 0.8413781624500666 }, { "epoch": 0.15852803107969568, "grad_norm": 0.4014171361923218, "learning_rate": 1.9146038033429897e-05, "loss": 0.4744911193847656, "step": 2938, "token_acc": 0.8432203389830508 }, { "epoch": 0.15858198888469216, "grad_norm": 0.5589228272438049, "learning_rate": 1.914533126490423e-05, "loss": 0.4543496370315552, "step": 2939, "token_acc": 0.8471277399848829 }, { "epoch": 0.15863594668968867, "grad_norm": 0.3085022568702698, "learning_rate": 1.9144624217082546e-05, "loss": 0.4181462526321411, "step": 2940, "token_acc": 0.8529347826086957 }, { "epoch": 0.15868990449468515, "grad_norm": 0.44189050793647766, "learning_rate": 1.9143916889986434e-05, "loss": 0.3699932396411896, "step": 2941, "token_acc": 0.8722115997450606 }, { "epoch": 0.15874386229968165, "grad_norm": 0.46196117997169495, "learning_rate": 1.914320928363749e-05, "loss": 0.4169975519180298, "step": 2942, "token_acc": 0.859443402126329 }, { "epoch": 0.15879782010467813, "grad_norm": 0.5796804428100586, "learning_rate": 1.9142501398057338e-05, "loss": 0.4560551643371582, "step": 2943, "token_acc": 0.8466033601168736 }, { "epoch": 0.15885177790967464, "grad_norm": 0.47789934277534485, "learning_rate": 1.9141793233267583e-05, "loss": 0.45549702644348145, "step": 2944, "token_acc": 0.8487659433493457 }, { "epoch": 0.15890573571467112, "grad_norm": 0.4407688081264496, "learning_rate": 1.914108478928986e-05, "loss": 0.4266018271446228, "step": 2945, "token_acc": 0.8524413595021542 }, { "epoch": 0.15895969351966763, "grad_norm": 0.4810159504413605, "learning_rate": 1.91403760661458e-05, "loss": 0.49015289545059204, "step": 2946, "token_acc": 0.8398490159072526 }, { "epoch": 0.1590136513246641, "grad_norm": 0.47410666942596436, "learning_rate": 1.913966706385705e-05, "loss": 0.4372777044773102, "step": 2947, "token_acc": 0.8467603702434008 }, { "epoch": 0.15906760912966061, "grad_norm": 0.46422430872917175, "learning_rate": 1.913895778244526e-05, "loss": 0.4686344265937805, "step": 2948, "token_acc": 0.8452537662717566 }, { "epoch": 0.1591215669346571, "grad_norm": 0.3995368778705597, "learning_rate": 1.9138248221932096e-05, "loss": 0.4439813494682312, "step": 2949, "token_acc": 0.850079575596817 }, { "epoch": 0.1591755247396536, "grad_norm": 0.45576298236846924, "learning_rate": 1.9137538382339226e-05, "loss": 0.3838018774986267, "step": 2950, "token_acc": 0.870117049781413 }, { "epoch": 0.15922948254465008, "grad_norm": 0.4872741401195526, "learning_rate": 1.913682826368833e-05, "loss": 0.4530051052570343, "step": 2951, "token_acc": 0.8492929292929293 }, { "epoch": 0.1592834403496466, "grad_norm": 0.480971097946167, "learning_rate": 1.9136117866001086e-05, "loss": 0.390275239944458, "step": 2952, "token_acc": 0.8607302164216091 }, { "epoch": 0.15933739815464307, "grad_norm": 0.5203539133071899, "learning_rate": 1.9135407189299198e-05, "loss": 0.45875999331474304, "step": 2953, "token_acc": 0.8470571590265987 }, { "epoch": 0.15939135595963957, "grad_norm": 0.3436616063117981, "learning_rate": 1.913469623360437e-05, "loss": 0.4109957218170166, "step": 2954, "token_acc": 0.8581061692969871 }, { "epoch": 0.15944531376463605, "grad_norm": 0.50037682056427, "learning_rate": 1.9133984998938308e-05, "loss": 0.4735949635505676, "step": 2955, "token_acc": 0.8398245380832114 }, { "epoch": 0.15949927156963253, "grad_norm": 0.39143747091293335, "learning_rate": 1.9133273485322738e-05, "loss": 0.48422572016716003, "step": 2956, "token_acc": 0.8383314109957709 }, { "epoch": 0.15955322937462904, "grad_norm": 0.36990484595298767, "learning_rate": 1.913256169277939e-05, "loss": 0.33053261041641235, "step": 2957, "token_acc": 0.8822546972860126 }, { "epoch": 0.15960718717962552, "grad_norm": 0.445625364780426, "learning_rate": 1.913184962133e-05, "loss": 0.376920223236084, "step": 2958, "token_acc": 0.8662909033975262 }, { "epoch": 0.15966114498462203, "grad_norm": 0.3481610119342804, "learning_rate": 1.9131137270996314e-05, "loss": 0.4158834218978882, "step": 2959, "token_acc": 0.8579688285570638 }, { "epoch": 0.1597151027896185, "grad_norm": 0.4200608432292938, "learning_rate": 1.9130424641800088e-05, "loss": 0.3979843556880951, "step": 2960, "token_acc": 0.8621065540264341 }, { "epoch": 0.15976906059461501, "grad_norm": 0.439982533454895, "learning_rate": 1.9129711733763084e-05, "loss": 0.41400662064552307, "step": 2961, "token_acc": 0.856550006916586 }, { "epoch": 0.1598230183996115, "grad_norm": 0.3770403563976288, "learning_rate": 1.9128998546907077e-05, "loss": 0.4169524908065796, "step": 2962, "token_acc": 0.8559580436638614 }, { "epoch": 0.159876976204608, "grad_norm": 0.5746115446090698, "learning_rate": 1.9128285081253848e-05, "loss": 0.46372464299201965, "step": 2963, "token_acc": 0.8448673587081892 }, { "epoch": 0.15993093400960448, "grad_norm": 0.2918775975704193, "learning_rate": 1.9127571336825183e-05, "loss": 0.45437902212142944, "step": 2964, "token_acc": 0.8478675244222064 }, { "epoch": 0.159984891814601, "grad_norm": 0.5784512162208557, "learning_rate": 1.9126857313642877e-05, "loss": 0.4617273807525635, "step": 2965, "token_acc": 0.8437115936091766 }, { "epoch": 0.16003884961959747, "grad_norm": 0.48302575945854187, "learning_rate": 1.912614301172874e-05, "loss": 0.40257978439331055, "step": 2966, "token_acc": 0.8608952440161641 }, { "epoch": 0.16009280742459397, "grad_norm": 0.48083993792533875, "learning_rate": 1.9125428431104586e-05, "loss": 0.4323297142982483, "step": 2967, "token_acc": 0.8538011695906432 }, { "epoch": 0.16014676522959045, "grad_norm": 0.528530478477478, "learning_rate": 1.9124713571792243e-05, "loss": 0.4837818741798401, "step": 2968, "token_acc": 0.8383578245720346 }, { "epoch": 0.16020072303458696, "grad_norm": 0.5188760757446289, "learning_rate": 1.9123998433813534e-05, "loss": 0.3931277394294739, "step": 2969, "token_acc": 0.8639455782312925 }, { "epoch": 0.16025468083958344, "grad_norm": 0.3996782898902893, "learning_rate": 1.9123283017190304e-05, "loss": 0.3499692678451538, "step": 2970, "token_acc": 0.8743163172288059 }, { "epoch": 0.16030863864457995, "grad_norm": 0.42094892263412476, "learning_rate": 1.91225673219444e-05, "loss": 0.39868026971817017, "step": 2971, "token_acc": 0.8605271938605272 }, { "epoch": 0.16036259644957643, "grad_norm": 0.5016863346099854, "learning_rate": 1.912185134809768e-05, "loss": 0.39760473370552063, "step": 2972, "token_acc": 0.8607787687622362 }, { "epoch": 0.16041655425457294, "grad_norm": 0.6432170867919922, "learning_rate": 1.912113509567201e-05, "loss": 0.45730751752853394, "step": 2973, "token_acc": 0.8445344129554656 }, { "epoch": 0.16047051205956941, "grad_norm": 0.485247939825058, "learning_rate": 1.9120418564689263e-05, "loss": 0.5066729784011841, "step": 2974, "token_acc": 0.8354057591623036 }, { "epoch": 0.16052446986456592, "grad_norm": 0.4092993140220642, "learning_rate": 1.911970175517132e-05, "loss": 0.39257657527923584, "step": 2975, "token_acc": 0.8618716163959783 }, { "epoch": 0.1605784276695624, "grad_norm": 0.5004104971885681, "learning_rate": 1.911898466714008e-05, "loss": 0.41247719526290894, "step": 2976, "token_acc": 0.8582112807728264 }, { "epoch": 0.1606323854745589, "grad_norm": 0.5085257887840271, "learning_rate": 1.9118267300617434e-05, "loss": 0.3982161283493042, "step": 2977, "token_acc": 0.8628085106382979 }, { "epoch": 0.1606863432795554, "grad_norm": 0.37332889437675476, "learning_rate": 1.9117549655625292e-05, "loss": 0.39751505851745605, "step": 2978, "token_acc": 0.8663181478715459 }, { "epoch": 0.16074030108455187, "grad_norm": 0.38105708360671997, "learning_rate": 1.9116831732185574e-05, "loss": 0.3332323133945465, "step": 2979, "token_acc": 0.8792640343808756 }, { "epoch": 0.16079425888954837, "grad_norm": 0.5116437077522278, "learning_rate": 1.9116113530320203e-05, "loss": 0.4485173225402832, "step": 2980, "token_acc": 0.8522105263157894 }, { "epoch": 0.16084821669454485, "grad_norm": 0.5831711888313293, "learning_rate": 1.9115395050051112e-05, "loss": 0.43268883228302, "step": 2981, "token_acc": 0.8502099860009332 }, { "epoch": 0.16090217449954136, "grad_norm": 0.5119827389717102, "learning_rate": 1.9114676291400245e-05, "loss": 0.417450487613678, "step": 2982, "token_acc": 0.8569954601733388 }, { "epoch": 0.16095613230453784, "grad_norm": 0.46695277094841003, "learning_rate": 1.911395725438955e-05, "loss": 0.43186745047569275, "step": 2983, "token_acc": 0.8613138686131386 }, { "epoch": 0.16101009010953435, "grad_norm": 0.4854595959186554, "learning_rate": 1.911323793904099e-05, "loss": 0.42021825909614563, "step": 2984, "token_acc": 0.8566308243727598 }, { "epoch": 0.16106404791453083, "grad_norm": 0.48810163140296936, "learning_rate": 1.911251834537653e-05, "loss": 0.40674158930778503, "step": 2985, "token_acc": 0.8529459241323648 }, { "epoch": 0.16111800571952734, "grad_norm": 0.5187628269195557, "learning_rate": 1.9111798473418146e-05, "loss": 0.4312303066253662, "step": 2986, "token_acc": 0.85140625 }, { "epoch": 0.16117196352452381, "grad_norm": 0.47024109959602356, "learning_rate": 1.9111078323187824e-05, "loss": 0.46573710441589355, "step": 2987, "token_acc": 0.8387995441306826 }, { "epoch": 0.16122592132952032, "grad_norm": 0.5462977290153503, "learning_rate": 1.9110357894707558e-05, "loss": 0.3936096429824829, "step": 2988, "token_acc": 0.8720442410373761 }, { "epoch": 0.1612798791345168, "grad_norm": 0.6042134761810303, "learning_rate": 1.9109637187999348e-05, "loss": 0.4657411277294159, "step": 2989, "token_acc": 0.8415178571428571 }, { "epoch": 0.1613338369395133, "grad_norm": 0.4439103603363037, "learning_rate": 1.9108916203085205e-05, "loss": 0.445422500371933, "step": 2990, "token_acc": 0.846932321315623 }, { "epoch": 0.1613877947445098, "grad_norm": 0.4781888723373413, "learning_rate": 1.9108194939987147e-05, "loss": 0.4406825006008148, "step": 2991, "token_acc": 0.8505730451182358 }, { "epoch": 0.1614417525495063, "grad_norm": 0.5034564137458801, "learning_rate": 1.91074733987272e-05, "loss": 0.43271830677986145, "step": 2992, "token_acc": 0.8540495867768595 }, { "epoch": 0.16149571035450278, "grad_norm": 0.29077690839767456, "learning_rate": 1.9106751579327402e-05, "loss": 0.4058135151863098, "step": 2993, "token_acc": 0.860563840208991 }, { "epoch": 0.16154966815949928, "grad_norm": 0.4108930826187134, "learning_rate": 1.9106029481809795e-05, "loss": 0.434826135635376, "step": 2994, "token_acc": 0.8556031684196858 }, { "epoch": 0.16160362596449576, "grad_norm": 0.3148714303970337, "learning_rate": 1.9105307106196438e-05, "loss": 0.42936402559280396, "step": 2995, "token_acc": 0.8524402788890159 }, { "epoch": 0.16165758376949227, "grad_norm": 0.4438030421733856, "learning_rate": 1.910458445250938e-05, "loss": 0.3985790014266968, "step": 2996, "token_acc": 0.8630698309714872 }, { "epoch": 0.16171154157448875, "grad_norm": 0.39502426981925964, "learning_rate": 1.9103861520770705e-05, "loss": 0.4421175718307495, "step": 2997, "token_acc": 0.8538453784900716 }, { "epoch": 0.16176549937948526, "grad_norm": 0.46542656421661377, "learning_rate": 1.9103138311002483e-05, "loss": 0.4268924295902252, "step": 2998, "token_acc": 0.8564587108464924 }, { "epoch": 0.16181945718448174, "grad_norm": 0.5877998471260071, "learning_rate": 1.91024148232268e-05, "loss": 0.4292316138744354, "step": 2999, "token_acc": 0.8554941682013505 }, { "epoch": 0.16187341498947821, "grad_norm": 0.47354573011398315, "learning_rate": 1.910169105746575e-05, "loss": 0.46799421310424805, "step": 3000, "token_acc": 0.8459903655426466 }, { "epoch": 0.16192737279447472, "grad_norm": 0.3791797459125519, "learning_rate": 1.910096701374144e-05, "loss": 0.3460143208503723, "step": 3001, "token_acc": 0.8741529525653436 }, { "epoch": 0.1619813305994712, "grad_norm": 0.4727855324745178, "learning_rate": 1.9100242692075986e-05, "loss": 0.4142346978187561, "step": 3002, "token_acc": 0.8579890205143023 }, { "epoch": 0.1620352884044677, "grad_norm": 0.4584943950176239, "learning_rate": 1.9099518092491503e-05, "loss": 0.4639683663845062, "step": 3003, "token_acc": 0.8416208995101677 }, { "epoch": 0.1620892462094642, "grad_norm": 0.40184321999549866, "learning_rate": 1.909879321501012e-05, "loss": 0.44734326004981995, "step": 3004, "token_acc": 0.8522476478104309 }, { "epoch": 0.1621432040144607, "grad_norm": 0.3920314908027649, "learning_rate": 1.9098068059653974e-05, "loss": 0.41694721579551697, "step": 3005, "token_acc": 0.8542049824444072 }, { "epoch": 0.16219716181945718, "grad_norm": 0.4750271737575531, "learning_rate": 1.9097342626445218e-05, "loss": 0.41649529337882996, "step": 3006, "token_acc": 0.8602091359383599 }, { "epoch": 0.16225111962445368, "grad_norm": 0.4757634401321411, "learning_rate": 1.9096616915405995e-05, "loss": 0.45878687500953674, "step": 3007, "token_acc": 0.8435329143235197 }, { "epoch": 0.16230507742945016, "grad_norm": 0.5519686341285706, "learning_rate": 1.909589092655848e-05, "loss": 0.39733463525772095, "step": 3008, "token_acc": 0.8609049133969601 }, { "epoch": 0.16235903523444667, "grad_norm": 0.4562993049621582, "learning_rate": 1.9095164659924834e-05, "loss": 0.48769184947013855, "step": 3009, "token_acc": 0.8356498065625447 }, { "epoch": 0.16241299303944315, "grad_norm": 0.3870740830898285, "learning_rate": 1.9094438115527242e-05, "loss": 0.45663416385650635, "step": 3010, "token_acc": 0.8435578630960514 }, { "epoch": 0.16246695084443966, "grad_norm": 0.3796614110469818, "learning_rate": 1.9093711293387896e-05, "loss": 0.48071831464767456, "step": 3011, "token_acc": 0.8377479646829492 }, { "epoch": 0.16252090864943614, "grad_norm": 0.49516820907592773, "learning_rate": 1.9092984193528988e-05, "loss": 0.37667393684387207, "step": 3012, "token_acc": 0.8624260355029586 }, { "epoch": 0.16257486645443264, "grad_norm": 0.44870826601982117, "learning_rate": 1.9092256815972727e-05, "loss": 0.4895767569541931, "step": 3013, "token_acc": 0.8357716580608147 }, { "epoch": 0.16262882425942912, "grad_norm": 0.4378957450389862, "learning_rate": 1.909152916074132e-05, "loss": 0.4655248522758484, "step": 3014, "token_acc": 0.8335392217418159 }, { "epoch": 0.16268278206442563, "grad_norm": 0.5020362138748169, "learning_rate": 1.9090801227856995e-05, "loss": 0.4327974319458008, "step": 3015, "token_acc": 0.8554195174648902 }, { "epoch": 0.1627367398694221, "grad_norm": 0.4708307385444641, "learning_rate": 1.9090073017341985e-05, "loss": 0.44905000925064087, "step": 3016, "token_acc": 0.8461997375014915 }, { "epoch": 0.16279069767441862, "grad_norm": 0.43462520837783813, "learning_rate": 1.9089344529218526e-05, "loss": 0.4013892710208893, "step": 3017, "token_acc": 0.8590448625180898 }, { "epoch": 0.1628446554794151, "grad_norm": 0.42621472477912903, "learning_rate": 1.908861576350886e-05, "loss": 0.4302278161048889, "step": 3018, "token_acc": 0.8512947448591013 }, { "epoch": 0.1628986132844116, "grad_norm": 0.49041908979415894, "learning_rate": 1.9087886720235258e-05, "loss": 0.4460676312446594, "step": 3019, "token_acc": 0.850433356758023 }, { "epoch": 0.16295257108940808, "grad_norm": 0.5959005355834961, "learning_rate": 1.908715739941997e-05, "loss": 0.3852258622646332, "step": 3020, "token_acc": 0.8671689989235737 }, { "epoch": 0.16300652889440456, "grad_norm": 0.5197647213935852, "learning_rate": 1.908642780108528e-05, "loss": 0.3852550983428955, "step": 3021, "token_acc": 0.8649359185014788 }, { "epoch": 0.16306048669940107, "grad_norm": 0.4525311589241028, "learning_rate": 1.9085697925253463e-05, "loss": 0.4852067232131958, "step": 3022, "token_acc": 0.8363676486814521 }, { "epoch": 0.16311444450439755, "grad_norm": 0.4457262456417084, "learning_rate": 1.908496777194681e-05, "loss": 0.4899217188358307, "step": 3023, "token_acc": 0.8368357487922705 }, { "epoch": 0.16316840230939406, "grad_norm": 0.4356609880924225, "learning_rate": 1.9084237341187626e-05, "loss": 0.4420001804828644, "step": 3024, "token_acc": 0.8529543754674644 }, { "epoch": 0.16322236011439054, "grad_norm": 0.3721884787082672, "learning_rate": 1.908350663299821e-05, "loss": 0.3880351185798645, "step": 3025, "token_acc": 0.8663687150837989 }, { "epoch": 0.16327631791938704, "grad_norm": 0.3503791093826294, "learning_rate": 1.9082775647400883e-05, "loss": 0.38490039110183716, "step": 3026, "token_acc": 0.8661317029677748 }, { "epoch": 0.16333027572438352, "grad_norm": 0.30566033720970154, "learning_rate": 1.9082044384417963e-05, "loss": 0.3981873393058777, "step": 3027, "token_acc": 0.8653053749656625 }, { "epoch": 0.16338423352938003, "grad_norm": 0.345996230840683, "learning_rate": 1.9081312844071787e-05, "loss": 0.4355088472366333, "step": 3028, "token_acc": 0.8470040721349622 }, { "epoch": 0.1634381913343765, "grad_norm": 0.35697710514068604, "learning_rate": 1.90805810263847e-05, "loss": 0.39155006408691406, "step": 3029, "token_acc": 0.8655312658871378 }, { "epoch": 0.16349214913937302, "grad_norm": 0.37148353457450867, "learning_rate": 1.9079848931379046e-05, "loss": 0.3775790333747864, "step": 3030, "token_acc": 0.8738653156005911 }, { "epoch": 0.1635461069443695, "grad_norm": 0.4416675865650177, "learning_rate": 1.907911655907718e-05, "loss": 0.38174042105674744, "step": 3031, "token_acc": 0.8716323296354992 }, { "epoch": 0.163600064749366, "grad_norm": 0.4230559766292572, "learning_rate": 1.907838390950148e-05, "loss": 0.4150310158729553, "step": 3032, "token_acc": 0.856912070159243 }, { "epoch": 0.16365402255436248, "grad_norm": 0.39939436316490173, "learning_rate": 1.9077650982674308e-05, "loss": 0.3704567849636078, "step": 3033, "token_acc": 0.8701754385964913 }, { "epoch": 0.163707980359359, "grad_norm": 0.4451710283756256, "learning_rate": 1.9076917778618056e-05, "loss": 0.4116055965423584, "step": 3034, "token_acc": 0.8601674641148326 }, { "epoch": 0.16376193816435547, "grad_norm": 0.42115670442581177, "learning_rate": 1.9076184297355112e-05, "loss": 0.36093151569366455, "step": 3035, "token_acc": 0.8699296010727455 }, { "epoch": 0.16381589596935198, "grad_norm": 0.5147795677185059, "learning_rate": 1.9075450538907874e-05, "loss": 0.4667603671550751, "step": 3036, "token_acc": 0.846431332130894 }, { "epoch": 0.16386985377434846, "grad_norm": 0.4348134994506836, "learning_rate": 1.9074716503298757e-05, "loss": 0.40599024295806885, "step": 3037, "token_acc": 0.861145244531826 }, { "epoch": 0.16392381157934496, "grad_norm": 0.4893949329853058, "learning_rate": 1.9073982190550177e-05, "loss": 0.4161117672920227, "step": 3038, "token_acc": 0.8599320882852292 }, { "epoch": 0.16397776938434144, "grad_norm": 0.35341185331344604, "learning_rate": 1.9073247600684556e-05, "loss": 0.43591952323913574, "step": 3039, "token_acc": 0.8525684544014482 }, { "epoch": 0.16403172718933795, "grad_norm": 0.4746756851673126, "learning_rate": 1.907251273372433e-05, "loss": 0.5032060742378235, "step": 3040, "token_acc": 0.8376819194965255 }, { "epoch": 0.16408568499433443, "grad_norm": 0.5520493984222412, "learning_rate": 1.907177758969194e-05, "loss": 0.451505184173584, "step": 3041, "token_acc": 0.8445739257101238 }, { "epoch": 0.16413964279933094, "grad_norm": 0.4072582423686981, "learning_rate": 1.9071042168609842e-05, "loss": 0.4647718667984009, "step": 3042, "token_acc": 0.8420823823497446 }, { "epoch": 0.16419360060432742, "grad_norm": 0.4072710871696472, "learning_rate": 1.907030647050049e-05, "loss": 0.4526180624961853, "step": 3043, "token_acc": 0.8532579429186861 }, { "epoch": 0.1642475584093239, "grad_norm": 0.4565911889076233, "learning_rate": 1.9069570495386354e-05, "loss": 0.39150315523147583, "step": 3044, "token_acc": 0.8677248677248677 }, { "epoch": 0.1643015162143204, "grad_norm": 0.5181712508201599, "learning_rate": 1.906883424328991e-05, "loss": 0.48595887422561646, "step": 3045, "token_acc": 0.8375580149946448 }, { "epoch": 0.16435547401931688, "grad_norm": 0.3790384829044342, "learning_rate": 1.906809771423365e-05, "loss": 0.4665263295173645, "step": 3046, "token_acc": 0.843591793232081 }, { "epoch": 0.1644094318243134, "grad_norm": 0.5060017108917236, "learning_rate": 1.9067360908240057e-05, "loss": 0.4368601441383362, "step": 3047, "token_acc": 0.8516385302879841 }, { "epoch": 0.16446338962930987, "grad_norm": 0.410969078540802, "learning_rate": 1.906662382533164e-05, "loss": 0.3928323984146118, "step": 3048, "token_acc": 0.8638638638638638 }, { "epoch": 0.16451734743430638, "grad_norm": 0.5922902822494507, "learning_rate": 1.9065886465530905e-05, "loss": 0.38729873299598694, "step": 3049, "token_acc": 0.8621097445600757 }, { "epoch": 0.16457130523930286, "grad_norm": 0.4318084716796875, "learning_rate": 1.906514882886037e-05, "loss": 0.4690765142440796, "step": 3050, "token_acc": 0.8412854336233511 }, { "epoch": 0.16462526304429936, "grad_norm": 0.4047645628452301, "learning_rate": 1.9064410915342567e-05, "loss": 0.38935381174087524, "step": 3051, "token_acc": 0.8657103994064548 }, { "epoch": 0.16467922084929584, "grad_norm": 0.5199657678604126, "learning_rate": 1.9063672725000028e-05, "loss": 0.4456540644168854, "step": 3052, "token_acc": 0.845435029675464 }, { "epoch": 0.16473317865429235, "grad_norm": 0.4581655263900757, "learning_rate": 1.9062934257855298e-05, "loss": 0.40537816286087036, "step": 3053, "token_acc": 0.8617896468801417 }, { "epoch": 0.16478713645928883, "grad_norm": 0.46354949474334717, "learning_rate": 1.9062195513930934e-05, "loss": 0.4106414318084717, "step": 3054, "token_acc": 0.8682346990389479 }, { "epoch": 0.16484109426428534, "grad_norm": 0.4762539267539978, "learning_rate": 1.906145649324949e-05, "loss": 0.3874441385269165, "step": 3055, "token_acc": 0.8614414414414414 }, { "epoch": 0.16489505206928182, "grad_norm": 0.5644006133079529, "learning_rate": 1.9060717195833538e-05, "loss": 0.4249599277973175, "step": 3056, "token_acc": 0.8505707957062532 }, { "epoch": 0.16494900987427832, "grad_norm": 0.4558788239955902, "learning_rate": 1.9059977621705657e-05, "loss": 0.40296635031700134, "step": 3057, "token_acc": 0.8609617899171873 }, { "epoch": 0.1650029676792748, "grad_norm": 0.4701610505580902, "learning_rate": 1.9059237770888433e-05, "loss": 0.4420474171638489, "step": 3058, "token_acc": 0.8494433885419495 }, { "epoch": 0.1650569254842713, "grad_norm": 0.36933737993240356, "learning_rate": 1.905849764340446e-05, "loss": 0.4017281234264374, "step": 3059, "token_acc": 0.8628135916163862 }, { "epoch": 0.1651108832892678, "grad_norm": 0.49486592411994934, "learning_rate": 1.9057757239276343e-05, "loss": 0.4284570813179016, "step": 3060, "token_acc": 0.850625 }, { "epoch": 0.1651648410942643, "grad_norm": 0.38380396366119385, "learning_rate": 1.905701655852669e-05, "loss": 0.4171214699745178, "step": 3061, "token_acc": 0.8514193025141931 }, { "epoch": 0.16521879889926078, "grad_norm": 0.4828064739704132, "learning_rate": 1.905627560117813e-05, "loss": 0.44541528820991516, "step": 3062, "token_acc": 0.851972399427158 }, { "epoch": 0.16527275670425728, "grad_norm": 0.39965707063674927, "learning_rate": 1.9055534367253276e-05, "loss": 0.4025341272354126, "step": 3063, "token_acc": 0.8589725741333705 }, { "epoch": 0.16532671450925376, "grad_norm": 0.38271835446357727, "learning_rate": 1.905479285677478e-05, "loss": 0.4543739855289459, "step": 3064, "token_acc": 0.850853889943074 }, { "epoch": 0.16538067231425024, "grad_norm": 0.5160274505615234, "learning_rate": 1.9054051069765283e-05, "loss": 0.48552241921424866, "step": 3065, "token_acc": 0.8344727272727273 }, { "epoch": 0.16543463011924675, "grad_norm": 0.4581679403781891, "learning_rate": 1.9053309006247438e-05, "loss": 0.43725866079330444, "step": 3066, "token_acc": 0.8505392912172574 }, { "epoch": 0.16548858792424323, "grad_norm": 0.4334912598133087, "learning_rate": 1.9052566666243907e-05, "loss": 0.42254918813705444, "step": 3067, "token_acc": 0.8541187298665439 }, { "epoch": 0.16554254572923974, "grad_norm": 0.4284079372882843, "learning_rate": 1.905182404977736e-05, "loss": 0.40328311920166016, "step": 3068, "token_acc": 0.8622723330442325 }, { "epoch": 0.16559650353423622, "grad_norm": 0.36412155628204346, "learning_rate": 1.9051081156870477e-05, "loss": 0.4421577751636505, "step": 3069, "token_acc": 0.849477440734132 }, { "epoch": 0.16565046133923272, "grad_norm": 0.5095500946044922, "learning_rate": 1.905033798754595e-05, "loss": 0.3993757963180542, "step": 3070, "token_acc": 0.8638897491483432 }, { "epoch": 0.1657044191442292, "grad_norm": 0.4580039978027344, "learning_rate": 1.9049594541826473e-05, "loss": 0.407149076461792, "step": 3071, "token_acc": 0.8616495120973132 }, { "epoch": 0.1657583769492257, "grad_norm": 0.4309196472167969, "learning_rate": 1.9048850819734745e-05, "loss": 0.42896878719329834, "step": 3072, "token_acc": 0.8551810237203495 }, { "epoch": 0.1658123347542222, "grad_norm": 0.36311930418014526, "learning_rate": 1.9048106821293487e-05, "loss": 0.40284842252731323, "step": 3073, "token_acc": 0.8642293689320388 }, { "epoch": 0.1658662925592187, "grad_norm": 0.29335659742355347, "learning_rate": 1.9047362546525413e-05, "loss": 0.37815070152282715, "step": 3074, "token_acc": 0.8670877916720382 }, { "epoch": 0.16592025036421518, "grad_norm": 0.3340229094028473, "learning_rate": 1.904661799545326e-05, "loss": 0.41039860248565674, "step": 3075, "token_acc": 0.8613103196791054 }, { "epoch": 0.16597420816921168, "grad_norm": 0.5454989671707153, "learning_rate": 1.9045873168099763e-05, "loss": 0.39325278997421265, "step": 3076, "token_acc": 0.8685789276340458 }, { "epoch": 0.16602816597420816, "grad_norm": 0.47580328583717346, "learning_rate": 1.904512806448767e-05, "loss": 0.4013035297393799, "step": 3077, "token_acc": 0.866783523225241 }, { "epoch": 0.16608212377920467, "grad_norm": 0.5463937520980835, "learning_rate": 1.9044382684639734e-05, "loss": 0.4367370307445526, "step": 3078, "token_acc": 0.853083853083853 }, { "epoch": 0.16613608158420115, "grad_norm": 0.2669374942779541, "learning_rate": 1.904363702857872e-05, "loss": 0.3574444651603699, "step": 3079, "token_acc": 0.8758127438231469 }, { "epoch": 0.16619003938919766, "grad_norm": 0.4872378706932068, "learning_rate": 1.9042891096327398e-05, "loss": 0.37803834676742554, "step": 3080, "token_acc": 0.8713842975206612 }, { "epoch": 0.16624399719419414, "grad_norm": 0.3928927183151245, "learning_rate": 1.9042144887908557e-05, "loss": 0.4316314458847046, "step": 3081, "token_acc": 0.8523418491484185 }, { "epoch": 0.16629795499919064, "grad_norm": 0.4037330746650696, "learning_rate": 1.9041398403344975e-05, "loss": 0.4856170415878296, "step": 3082, "token_acc": 0.8369784395843027 }, { "epoch": 0.16635191280418712, "grad_norm": 0.308024138212204, "learning_rate": 1.9040651642659457e-05, "loss": 0.42385509610176086, "step": 3083, "token_acc": 0.8564318529862175 }, { "epoch": 0.16640587060918363, "grad_norm": 0.5605846643447876, "learning_rate": 1.90399046058748e-05, "loss": 0.4680020809173584, "step": 3084, "token_acc": 0.8430688753269399 }, { "epoch": 0.1664598284141801, "grad_norm": 0.4533941149711609, "learning_rate": 1.903915729301383e-05, "loss": 0.4744889736175537, "step": 3085, "token_acc": 0.8370794559770938 }, { "epoch": 0.1665137862191766, "grad_norm": 0.508815348148346, "learning_rate": 1.9038409704099365e-05, "loss": 0.5055464506149292, "step": 3086, "token_acc": 0.8333573175996546 }, { "epoch": 0.1665677440241731, "grad_norm": 0.41723284125328064, "learning_rate": 1.9037661839154232e-05, "loss": 0.4070398509502411, "step": 3087, "token_acc": 0.8614712706428279 }, { "epoch": 0.16662170182916958, "grad_norm": 0.5239371657371521, "learning_rate": 1.9036913698201276e-05, "loss": 0.47264283895492554, "step": 3088, "token_acc": 0.841294538665007 }, { "epoch": 0.16667565963416608, "grad_norm": 0.4791196584701538, "learning_rate": 1.9036165281263345e-05, "loss": 0.4445481300354004, "step": 3089, "token_acc": 0.8563517368686021 }, { "epoch": 0.16672961743916256, "grad_norm": 0.4059084951877594, "learning_rate": 1.903541658836329e-05, "loss": 0.44067925214767456, "step": 3090, "token_acc": 0.8507197153485363 }, { "epoch": 0.16678357524415907, "grad_norm": 0.4788905680179596, "learning_rate": 1.903466761952398e-05, "loss": 0.3434464931488037, "step": 3091, "token_acc": 0.8790632645927997 }, { "epoch": 0.16683753304915555, "grad_norm": 0.4125775396823883, "learning_rate": 1.9033918374768284e-05, "loss": 0.4040250778198242, "step": 3092, "token_acc": 0.8659486329743165 }, { "epoch": 0.16689149085415206, "grad_norm": 0.44799116253852844, "learning_rate": 1.9033168854119095e-05, "loss": 0.47848665714263916, "step": 3093, "token_acc": 0.8406209941603761 }, { "epoch": 0.16694544865914854, "grad_norm": 0.4202876389026642, "learning_rate": 1.903241905759929e-05, "loss": 0.5113622546195984, "step": 3094, "token_acc": 0.8233195376052519 }, { "epoch": 0.16699940646414504, "grad_norm": 0.43691521883010864, "learning_rate": 1.9031668985231775e-05, "loss": 0.3818235993385315, "step": 3095, "token_acc": 0.8674194897532413 }, { "epoch": 0.16705336426914152, "grad_norm": 0.45528489351272583, "learning_rate": 1.9030918637039456e-05, "loss": 0.4165036082267761, "step": 3096, "token_acc": 0.8580354548096484 }, { "epoch": 0.16710732207413803, "grad_norm": 0.37382322549819946, "learning_rate": 1.9030168013045248e-05, "loss": 0.47149235010147095, "step": 3097, "token_acc": 0.8405125829859503 }, { "epoch": 0.1671612798791345, "grad_norm": 0.4217591881752014, "learning_rate": 1.9029417113272074e-05, "loss": 0.40047067403793335, "step": 3098, "token_acc": 0.863102998696219 }, { "epoch": 0.16721523768413102, "grad_norm": 0.43519991636276245, "learning_rate": 1.9028665937742865e-05, "loss": 0.4188515543937683, "step": 3099, "token_acc": 0.8556472471979307 }, { "epoch": 0.1672691954891275, "grad_norm": 0.4825066924095154, "learning_rate": 1.9027914486480566e-05, "loss": 0.45050594210624695, "step": 3100, "token_acc": 0.8409515717926933 }, { "epoch": 0.167323153294124, "grad_norm": 0.3912605345249176, "learning_rate": 1.902716275950812e-05, "loss": 0.4174685776233673, "step": 3101, "token_acc": 0.8566737809883435 }, { "epoch": 0.16737711109912048, "grad_norm": 0.410499632358551, "learning_rate": 1.902641075684849e-05, "loss": 0.38698291778564453, "step": 3102, "token_acc": 0.8622466121626162 }, { "epoch": 0.167431068904117, "grad_norm": 0.4997471272945404, "learning_rate": 1.902565847852464e-05, "loss": 0.40593647956848145, "step": 3103, "token_acc": 0.8615945437441204 }, { "epoch": 0.16748502670911347, "grad_norm": 0.364497572183609, "learning_rate": 1.9024905924559544e-05, "loss": 0.3987576365470886, "step": 3104, "token_acc": 0.8638929798723614 }, { "epoch": 0.16753898451410998, "grad_norm": 0.4638570249080658, "learning_rate": 1.9024153094976186e-05, "loss": 0.47411632537841797, "step": 3105, "token_acc": 0.8409321175278622 }, { "epoch": 0.16759294231910646, "grad_norm": 0.3253595530986786, "learning_rate": 1.902339998979756e-05, "loss": 0.4359833300113678, "step": 3106, "token_acc": 0.8448803311139104 }, { "epoch": 0.16764690012410294, "grad_norm": 0.33060422539711, "learning_rate": 1.9022646609046656e-05, "loss": 0.4105335474014282, "step": 3107, "token_acc": 0.8568033887798326 }, { "epoch": 0.16770085792909944, "grad_norm": 0.47034114599227905, "learning_rate": 1.902189295274649e-05, "loss": 0.46629297733306885, "step": 3108, "token_acc": 0.8468091250360958 }, { "epoch": 0.16775481573409592, "grad_norm": 0.46299242973327637, "learning_rate": 1.9021139020920077e-05, "loss": 0.4968959391117096, "step": 3109, "token_acc": 0.8318629331287559 }, { "epoch": 0.16780877353909243, "grad_norm": 0.41635578870773315, "learning_rate": 1.9020384813590445e-05, "loss": 0.4211886525154114, "step": 3110, "token_acc": 0.8588999447208402 }, { "epoch": 0.1678627313440889, "grad_norm": 0.44598835706710815, "learning_rate": 1.9019630330780622e-05, "loss": 0.375880628824234, "step": 3111, "token_acc": 0.866787330316742 }, { "epoch": 0.16791668914908542, "grad_norm": 0.4326196312904358, "learning_rate": 1.901887557251365e-05, "loss": 0.45604294538497925, "step": 3112, "token_acc": 0.846987218502739 }, { "epoch": 0.1679706469540819, "grad_norm": 0.5744934678077698, "learning_rate": 1.901812053881258e-05, "loss": 0.40315115451812744, "step": 3113, "token_acc": 0.8618652187799425 }, { "epoch": 0.1680246047590784, "grad_norm": 0.518562912940979, "learning_rate": 1.9017365229700474e-05, "loss": 0.38895297050476074, "step": 3114, "token_acc": 0.8624265468371932 }, { "epoch": 0.16807856256407488, "grad_norm": 0.37697264552116394, "learning_rate": 1.9016609645200393e-05, "loss": 0.38120725750923157, "step": 3115, "token_acc": 0.8662642521797451 }, { "epoch": 0.1681325203690714, "grad_norm": 0.40374940633773804, "learning_rate": 1.9015853785335416e-05, "loss": 0.4385700821876526, "step": 3116, "token_acc": 0.8495225551531116 }, { "epoch": 0.16818647817406787, "grad_norm": 0.5640169978141785, "learning_rate": 1.9015097650128628e-05, "loss": 0.4706677198410034, "step": 3117, "token_acc": 0.8430528999362651 }, { "epoch": 0.16824043597906438, "grad_norm": 0.49986788630485535, "learning_rate": 1.9014341239603118e-05, "loss": 0.4588157534599304, "step": 3118, "token_acc": 0.8483381736043886 }, { "epoch": 0.16829439378406086, "grad_norm": 0.5069130063056946, "learning_rate": 1.901358455378199e-05, "loss": 0.43923163414001465, "step": 3119, "token_acc": 0.8551205203417931 }, { "epoch": 0.16834835158905737, "grad_norm": 0.5211819410324097, "learning_rate": 1.9012827592688346e-05, "loss": 0.405522882938385, "step": 3120, "token_acc": 0.8594867480016828 }, { "epoch": 0.16840230939405385, "grad_norm": 0.43332040309906006, "learning_rate": 1.9012070356345313e-05, "loss": 0.42361027002334595, "step": 3121, "token_acc": 0.8552467855661551 }, { "epoch": 0.16845626719905035, "grad_norm": 0.5084713101387024, "learning_rate": 1.901131284477601e-05, "loss": 0.4592382311820984, "step": 3122, "token_acc": 0.8427640156453716 }, { "epoch": 0.16851022500404683, "grad_norm": 0.38195061683654785, "learning_rate": 1.9010555058003574e-05, "loss": 0.38659560680389404, "step": 3123, "token_acc": 0.866755201416556 }, { "epoch": 0.16856418280904334, "grad_norm": 0.5358004570007324, "learning_rate": 1.900979699605115e-05, "loss": 0.4225449562072754, "step": 3124, "token_acc": 0.8513820522529345 }, { "epoch": 0.16861814061403982, "grad_norm": 0.46969884634017944, "learning_rate": 1.900903865894188e-05, "loss": 0.40483197569847107, "step": 3125, "token_acc": 0.8616352201257862 }, { "epoch": 0.16867209841903633, "grad_norm": 0.4740333557128906, "learning_rate": 1.9008280046698933e-05, "loss": 0.38142549991607666, "step": 3126, "token_acc": 0.8653899916130836 }, { "epoch": 0.1687260562240328, "grad_norm": 0.3600919842720032, "learning_rate": 1.9007521159345472e-05, "loss": 0.4026815891265869, "step": 3127, "token_acc": 0.8619184376794945 }, { "epoch": 0.1687800140290293, "grad_norm": 0.4411962330341339, "learning_rate": 1.9006761996904676e-05, "loss": 0.4731443524360657, "step": 3128, "token_acc": 0.844542447629548 }, { "epoch": 0.1688339718340258, "grad_norm": 0.4196885824203491, "learning_rate": 1.9006002559399728e-05, "loss": 0.4137226939201355, "step": 3129, "token_acc": 0.8645057128663686 }, { "epoch": 0.16888792963902227, "grad_norm": 0.3437550961971283, "learning_rate": 1.900524284685382e-05, "loss": 0.36204230785369873, "step": 3130, "token_acc": 0.8714776632302406 }, { "epoch": 0.16894188744401878, "grad_norm": 0.5837245583534241, "learning_rate": 1.9004482859290156e-05, "loss": 0.43051910400390625, "step": 3131, "token_acc": 0.854697869367796 }, { "epoch": 0.16899584524901526, "grad_norm": 0.4127880036830902, "learning_rate": 1.9003722596731942e-05, "loss": 0.40262216329574585, "step": 3132, "token_acc": 0.8589368656459659 }, { "epoch": 0.16904980305401177, "grad_norm": 0.3134278953075409, "learning_rate": 1.9002962059202402e-05, "loss": 0.34899449348449707, "step": 3133, "token_acc": 0.8761915077989602 }, { "epoch": 0.16910376085900825, "grad_norm": 0.4274430572986603, "learning_rate": 1.9002201246724755e-05, "loss": 0.4059963822364807, "step": 3134, "token_acc": 0.8645193633287379 }, { "epoch": 0.16915771866400475, "grad_norm": 0.4279491603374481, "learning_rate": 1.9001440159322242e-05, "loss": 0.43779486417770386, "step": 3135, "token_acc": 0.8533919954584162 }, { "epoch": 0.16921167646900123, "grad_norm": 0.5063381791114807, "learning_rate": 1.9000678797018107e-05, "loss": 0.4772273004055023, "step": 3136, "token_acc": 0.842468772961058 }, { "epoch": 0.16926563427399774, "grad_norm": 0.5887525081634521, "learning_rate": 1.8999917159835596e-05, "loss": 0.395871102809906, "step": 3137, "token_acc": 0.858215470577964 }, { "epoch": 0.16931959207899422, "grad_norm": 0.5086939930915833, "learning_rate": 1.8999155247797974e-05, "loss": 0.4360400438308716, "step": 3138, "token_acc": 0.8510329829648423 }, { "epoch": 0.16937354988399073, "grad_norm": 0.536510169506073, "learning_rate": 1.8998393060928508e-05, "loss": 0.41696715354919434, "step": 3139, "token_acc": 0.8618201020947016 }, { "epoch": 0.1694275076889872, "grad_norm": 0.38815057277679443, "learning_rate": 1.8997630599250475e-05, "loss": 0.428373247385025, "step": 3140, "token_acc": 0.8526022079340047 }, { "epoch": 0.1694814654939837, "grad_norm": 0.39951109886169434, "learning_rate": 1.899686786278716e-05, "loss": 0.4355195164680481, "step": 3141, "token_acc": 0.848071944406595 }, { "epoch": 0.1695354232989802, "grad_norm": 0.43599000573158264, "learning_rate": 1.8996104851561856e-05, "loss": 0.46309447288513184, "step": 3142, "token_acc": 0.8416897506925207 }, { "epoch": 0.1695893811039767, "grad_norm": 0.4618593454360962, "learning_rate": 1.899534156559787e-05, "loss": 0.46207088232040405, "step": 3143, "token_acc": 0.8500249958340277 }, { "epoch": 0.16964333890897318, "grad_norm": 0.49624091386795044, "learning_rate": 1.8994578004918506e-05, "loss": 0.4595290720462799, "step": 3144, "token_acc": 0.8370874803562074 }, { "epoch": 0.1696972967139697, "grad_norm": 0.2998047471046448, "learning_rate": 1.8993814169547088e-05, "loss": 0.42933690547943115, "step": 3145, "token_acc": 0.8567777450113044 }, { "epoch": 0.16975125451896617, "grad_norm": 0.3918190002441406, "learning_rate": 1.8993050059506944e-05, "loss": 0.3759266138076782, "step": 3146, "token_acc": 0.8706998813760379 }, { "epoch": 0.16980521232396267, "grad_norm": 0.41941535472869873, "learning_rate": 1.89922856748214e-05, "loss": 0.4131907820701599, "step": 3147, "token_acc": 0.8570783812246127 }, { "epoch": 0.16985917012895915, "grad_norm": 0.5238946080207825, "learning_rate": 1.8991521015513812e-05, "loss": 0.40619051456451416, "step": 3148, "token_acc": 0.8577289377289378 }, { "epoch": 0.16991312793395566, "grad_norm": 0.44682759046554565, "learning_rate": 1.899075608160753e-05, "loss": 0.3496789336204529, "step": 3149, "token_acc": 0.8783212971794523 }, { "epoch": 0.16996708573895214, "grad_norm": 0.48620927333831787, "learning_rate": 1.8989990873125904e-05, "loss": 0.4077920913696289, "step": 3150, "token_acc": 0.8562566465792272 }, { "epoch": 0.17002104354394862, "grad_norm": 0.5335637331008911, "learning_rate": 1.898922539009232e-05, "loss": 0.4372965395450592, "step": 3151, "token_acc": 0.854055019649875 }, { "epoch": 0.17007500134894513, "grad_norm": 0.48947280645370483, "learning_rate": 1.8988459632530145e-05, "loss": 0.4403541684150696, "step": 3152, "token_acc": 0.8462086659064995 }, { "epoch": 0.1701289591539416, "grad_norm": 0.5172683000564575, "learning_rate": 1.8987693600462766e-05, "loss": 0.4448082745075226, "step": 3153, "token_acc": 0.8491500468478116 }, { "epoch": 0.1701829169589381, "grad_norm": 0.4166067838668823, "learning_rate": 1.898692729391358e-05, "loss": 0.39454972743988037, "step": 3154, "token_acc": 0.8642903156538544 }, { "epoch": 0.1702368747639346, "grad_norm": 0.4942951798439026, "learning_rate": 1.8986160712905987e-05, "loss": 0.476574569940567, "step": 3155, "token_acc": 0.8446446737392821 }, { "epoch": 0.1702908325689311, "grad_norm": 0.41677629947662354, "learning_rate": 1.8985393857463403e-05, "loss": 0.40512579679489136, "step": 3156, "token_acc": 0.854679802955665 }, { "epoch": 0.17034479037392758, "grad_norm": 0.5262917280197144, "learning_rate": 1.8984626727609242e-05, "loss": 0.37892574071884155, "step": 3157, "token_acc": 0.8683417085427135 }, { "epoch": 0.1703987481789241, "grad_norm": 0.42711499333381653, "learning_rate": 1.8983859323366934e-05, "loss": 0.4281371533870697, "step": 3158, "token_acc": 0.8618042226487524 }, { "epoch": 0.17045270598392057, "grad_norm": 0.439480185508728, "learning_rate": 1.8983091644759917e-05, "loss": 0.4243488609790802, "step": 3159, "token_acc": 0.8568790397045245 }, { "epoch": 0.17050666378891707, "grad_norm": 0.38737180829048157, "learning_rate": 1.8982323691811635e-05, "loss": 0.45692044496536255, "step": 3160, "token_acc": 0.8451980326171369 }, { "epoch": 0.17056062159391355, "grad_norm": 0.4480702579021454, "learning_rate": 1.898155546454554e-05, "loss": 0.3698863983154297, "step": 3161, "token_acc": 0.8717948717948718 }, { "epoch": 0.17061457939891006, "grad_norm": 0.4715229868888855, "learning_rate": 1.8980786962985095e-05, "loss": 0.4560125470161438, "step": 3162, "token_acc": 0.8456664977192093 }, { "epoch": 0.17066853720390654, "grad_norm": 0.48538967967033386, "learning_rate": 1.8980018187153767e-05, "loss": 0.39685356616973877, "step": 3163, "token_acc": 0.8637073678729498 }, { "epoch": 0.17072249500890305, "grad_norm": 0.43192213773727417, "learning_rate": 1.8979249137075037e-05, "loss": 0.4055509567260742, "step": 3164, "token_acc": 0.8606831882116543 }, { "epoch": 0.17077645281389953, "grad_norm": 0.4887068271636963, "learning_rate": 1.897847981277239e-05, "loss": 0.4417978525161743, "step": 3165, "token_acc": 0.8501859394368692 }, { "epoch": 0.17083041061889603, "grad_norm": 0.46191588044166565, "learning_rate": 1.897771021426932e-05, "loss": 0.3783431351184845, "step": 3166, "token_acc": 0.8699072773250913 }, { "epoch": 0.1708843684238925, "grad_norm": 0.4812788665294647, "learning_rate": 1.8976940341589337e-05, "loss": 0.4340519309043884, "step": 3167, "token_acc": 0.8494875549048316 }, { "epoch": 0.17093832622888902, "grad_norm": 0.4416446089744568, "learning_rate": 1.8976170194755942e-05, "loss": 0.36841312050819397, "step": 3168, "token_acc": 0.8741721854304636 }, { "epoch": 0.1709922840338855, "grad_norm": 0.5319967865943909, "learning_rate": 1.8975399773792663e-05, "loss": 0.4271225929260254, "step": 3169, "token_acc": 0.8515479298769116 }, { "epoch": 0.171046241838882, "grad_norm": 0.35238751769065857, "learning_rate": 1.8974629078723024e-05, "loss": 0.3540220260620117, "step": 3170, "token_acc": 0.8730118335666115 }, { "epoch": 0.1711001996438785, "grad_norm": 0.39079949259757996, "learning_rate": 1.8973858109570566e-05, "loss": 0.42135435342788696, "step": 3171, "token_acc": 0.8586956521739131 }, { "epoch": 0.17115415744887497, "grad_norm": 0.42532381415367126, "learning_rate": 1.8973086866358835e-05, "loss": 0.46618953347206116, "step": 3172, "token_acc": 0.8487234550226677 }, { "epoch": 0.17120811525387147, "grad_norm": 0.44627001881599426, "learning_rate": 1.8972315349111377e-05, "loss": 0.40151509642601013, "step": 3173, "token_acc": 0.8587472318886429 }, { "epoch": 0.17126207305886795, "grad_norm": 0.35222116112709045, "learning_rate": 1.8971543557851763e-05, "loss": 0.3853715658187866, "step": 3174, "token_acc": 0.8673946957878315 }, { "epoch": 0.17131603086386446, "grad_norm": 0.5041956305503845, "learning_rate": 1.8970771492603556e-05, "loss": 0.47650349140167236, "step": 3175, "token_acc": 0.8408638382600705 }, { "epoch": 0.17136998866886094, "grad_norm": 0.557468831539154, "learning_rate": 1.896999915339034e-05, "loss": 0.45578470826148987, "step": 3176, "token_acc": 0.8544294180215772 }, { "epoch": 0.17142394647385745, "grad_norm": 0.4971217215061188, "learning_rate": 1.8969226540235695e-05, "loss": 0.4127199947834015, "step": 3177, "token_acc": 0.859125 }, { "epoch": 0.17147790427885393, "grad_norm": 0.5003228187561035, "learning_rate": 1.8968453653163224e-05, "loss": 0.39327120780944824, "step": 3178, "token_acc": 0.866121813582409 }, { "epoch": 0.17153186208385043, "grad_norm": 0.5060604214668274, "learning_rate": 1.8967680492196527e-05, "loss": 0.4354245960712433, "step": 3179, "token_acc": 0.8522415370539799 }, { "epoch": 0.1715858198888469, "grad_norm": 0.5690745115280151, "learning_rate": 1.896690705735922e-05, "loss": 0.3666948080062866, "step": 3180, "token_acc": 0.8701298701298701 }, { "epoch": 0.17163977769384342, "grad_norm": 0.47949135303497314, "learning_rate": 1.8966133348674917e-05, "loss": 0.4424729347229004, "step": 3181, "token_acc": 0.8479453588623894 }, { "epoch": 0.1716937354988399, "grad_norm": 0.4029514491558075, "learning_rate": 1.896535936616725e-05, "loss": 0.4059305191040039, "step": 3182, "token_acc": 0.8563171887643047 }, { "epoch": 0.1717476933038364, "grad_norm": 0.4744691848754883, "learning_rate": 1.8964585109859862e-05, "loss": 0.44472894072532654, "step": 3183, "token_acc": 0.8483304277859729 }, { "epoch": 0.1718016511088329, "grad_norm": 0.4751589000225067, "learning_rate": 1.8963810579776387e-05, "loss": 0.46224406361579895, "step": 3184, "token_acc": 0.8495831017231795 }, { "epoch": 0.1718556089138294, "grad_norm": 0.44704851508140564, "learning_rate": 1.896303577594049e-05, "loss": 0.40100473165512085, "step": 3185, "token_acc": 0.8576063896168726 }, { "epoch": 0.17190956671882587, "grad_norm": 0.45087742805480957, "learning_rate": 1.8962260698375826e-05, "loss": 0.41289567947387695, "step": 3186, "token_acc": 0.8609442060085837 }, { "epoch": 0.17196352452382238, "grad_norm": 0.42738860845565796, "learning_rate": 1.896148534710607e-05, "loss": 0.39462703466415405, "step": 3187, "token_acc": 0.8626626626626627 }, { "epoch": 0.17201748232881886, "grad_norm": 0.48623767495155334, "learning_rate": 1.8960709722154894e-05, "loss": 0.4562007784843445, "step": 3188, "token_acc": 0.843134476406405 }, { "epoch": 0.17207144013381537, "grad_norm": 0.4120423197746277, "learning_rate": 1.8959933823545992e-05, "loss": 0.4207670986652374, "step": 3189, "token_acc": 0.8526691546324842 }, { "epoch": 0.17212539793881185, "grad_norm": 0.3610650599002838, "learning_rate": 1.895915765130306e-05, "loss": 0.45046746730804443, "step": 3190, "token_acc": 0.8476420798065296 }, { "epoch": 0.17217935574380835, "grad_norm": 0.5599566102027893, "learning_rate": 1.89583812054498e-05, "loss": 0.4025228023529053, "step": 3191, "token_acc": 0.8614916286149162 }, { "epoch": 0.17223331354880483, "grad_norm": 0.609144926071167, "learning_rate": 1.8957604486009925e-05, "loss": 0.48287081718444824, "step": 3192, "token_acc": 0.835291774546903 }, { "epoch": 0.17228727135380134, "grad_norm": 0.43354475498199463, "learning_rate": 1.8956827493007157e-05, "loss": 0.469099223613739, "step": 3193, "token_acc": 0.8435667471541911 }, { "epoch": 0.17234122915879782, "grad_norm": 0.4303430914878845, "learning_rate": 1.895605022646522e-05, "loss": 0.3611160218715668, "step": 3194, "token_acc": 0.8754346001678456 }, { "epoch": 0.1723951869637943, "grad_norm": 0.549533486366272, "learning_rate": 1.8955272686407858e-05, "loss": 0.48456087708473206, "step": 3195, "token_acc": 0.8321952884943666 }, { "epoch": 0.1724491447687908, "grad_norm": 0.4877399802207947, "learning_rate": 1.8954494872858812e-05, "loss": 0.4420475661754608, "step": 3196, "token_acc": 0.8527193330686781 }, { "epoch": 0.1725031025737873, "grad_norm": 0.40819215774536133, "learning_rate": 1.895371678584184e-05, "loss": 0.4200184941291809, "step": 3197, "token_acc": 0.8583949602848535 }, { "epoch": 0.1725570603787838, "grad_norm": 0.4239858090877533, "learning_rate": 1.8952938425380702e-05, "loss": 0.46307578682899475, "step": 3198, "token_acc": 0.8450242372398061 }, { "epoch": 0.17261101818378027, "grad_norm": 0.4341135323047638, "learning_rate": 1.8952159791499173e-05, "loss": 0.3874431550502777, "step": 3199, "token_acc": 0.8683427059212703 }, { "epoch": 0.17266497598877678, "grad_norm": 0.45399224758148193, "learning_rate": 1.8951380884221026e-05, "loss": 0.35793977975845337, "step": 3200, "token_acc": 0.8714555765595463 }, { "epoch": 0.17271893379377326, "grad_norm": 0.46836450695991516, "learning_rate": 1.895060170357005e-05, "loss": 0.45609143376350403, "step": 3201, "token_acc": 0.8412621359223301 }, { "epoch": 0.17277289159876977, "grad_norm": 0.35654160380363464, "learning_rate": 1.8949822249570048e-05, "loss": 0.37616559863090515, "step": 3202, "token_acc": 0.8730633023461709 }, { "epoch": 0.17282684940376625, "grad_norm": 0.43095412850379944, "learning_rate": 1.8949042522244814e-05, "loss": 0.4505297839641571, "step": 3203, "token_acc": 0.8518287614297589 }, { "epoch": 0.17288080720876275, "grad_norm": 0.4015381634235382, "learning_rate": 1.8948262521618168e-05, "loss": 0.38376837968826294, "step": 3204, "token_acc": 0.8717781402936379 }, { "epoch": 0.17293476501375923, "grad_norm": 0.3500354290008545, "learning_rate": 1.894748224771393e-05, "loss": 0.3835756480693817, "step": 3205, "token_acc": 0.867704789322167 }, { "epoch": 0.17298872281875574, "grad_norm": 0.48961037397384644, "learning_rate": 1.8946701700555925e-05, "loss": 0.4029461741447449, "step": 3206, "token_acc": 0.8601912151863655 }, { "epoch": 0.17304268062375222, "grad_norm": 0.4386245608329773, "learning_rate": 1.8945920880167998e-05, "loss": 0.40357109904289246, "step": 3207, "token_acc": 0.8653055813336868 }, { "epoch": 0.17309663842874873, "grad_norm": 0.4615020155906677, "learning_rate": 1.8945139786573988e-05, "loss": 0.4048902094364166, "step": 3208, "token_acc": 0.8592604658739208 }, { "epoch": 0.1731505962337452, "grad_norm": 0.4222795069217682, "learning_rate": 1.894435841979775e-05, "loss": 0.4151199758052826, "step": 3209, "token_acc": 0.8588704650292981 }, { "epoch": 0.17320455403874171, "grad_norm": 0.4913344979286194, "learning_rate": 1.894357677986315e-05, "loss": 0.46420860290527344, "step": 3210, "token_acc": 0.843941537010844 }, { "epoch": 0.1732585118437382, "grad_norm": 0.4338860511779785, "learning_rate": 1.8942794866794065e-05, "loss": 0.4392614960670471, "step": 3211, "token_acc": 0.8495649733370755 }, { "epoch": 0.1733124696487347, "grad_norm": 0.4002130627632141, "learning_rate": 1.894201268061436e-05, "loss": 0.468217134475708, "step": 3212, "token_acc": 0.8404758685953765 }, { "epoch": 0.17336642745373118, "grad_norm": 0.3251212537288666, "learning_rate": 1.894123022134793e-05, "loss": 0.33943402767181396, "step": 3213, "token_acc": 0.8840871986939403 }, { "epoch": 0.1734203852587277, "grad_norm": 0.4756092429161072, "learning_rate": 1.8940447489018674e-05, "loss": 0.38915419578552246, "step": 3214, "token_acc": 0.8672577743510665 }, { "epoch": 0.17347434306372417, "grad_norm": 0.43986520171165466, "learning_rate": 1.8939664483650493e-05, "loss": 0.4147312045097351, "step": 3215, "token_acc": 0.8573439099283521 }, { "epoch": 0.17352830086872065, "grad_norm": 0.4640830457210541, "learning_rate": 1.8938881205267302e-05, "loss": 0.3977811634540558, "step": 3216, "token_acc": 0.8619847328244274 }, { "epoch": 0.17358225867371715, "grad_norm": 0.40696051716804504, "learning_rate": 1.893809765389302e-05, "loss": 0.4053109884262085, "step": 3217, "token_acc": 0.8607594936708861 }, { "epoch": 0.17363621647871363, "grad_norm": 0.4042190909385681, "learning_rate": 1.8937313829551574e-05, "loss": 0.3836534023284912, "step": 3218, "token_acc": 0.8676724137931034 }, { "epoch": 0.17369017428371014, "grad_norm": 0.4096800982952118, "learning_rate": 1.8936529732266908e-05, "loss": 0.4104767441749573, "step": 3219, "token_acc": 0.859511889862328 }, { "epoch": 0.17374413208870662, "grad_norm": 0.46506890654563904, "learning_rate": 1.8935745362062965e-05, "loss": 0.3854980766773224, "step": 3220, "token_acc": 0.8625384989463446 }, { "epoch": 0.17379808989370313, "grad_norm": 0.5126937627792358, "learning_rate": 1.8934960718963698e-05, "loss": 0.4173951745033264, "step": 3221, "token_acc": 0.853486646884273 }, { "epoch": 0.1738520476986996, "grad_norm": 0.5407857894897461, "learning_rate": 1.8934175802993072e-05, "loss": 0.43153268098831177, "step": 3222, "token_acc": 0.8556563103909618 }, { "epoch": 0.17390600550369611, "grad_norm": 0.5575805902481079, "learning_rate": 1.893339061417506e-05, "loss": 0.40529781579971313, "step": 3223, "token_acc": 0.8580974842767296 }, { "epoch": 0.1739599633086926, "grad_norm": 0.3945031762123108, "learning_rate": 1.8932605152533635e-05, "loss": 0.47264373302459717, "step": 3224, "token_acc": 0.8341232227488151 }, { "epoch": 0.1740139211136891, "grad_norm": 0.45611807703971863, "learning_rate": 1.893181941809279e-05, "loss": 0.4162033200263977, "step": 3225, "token_acc": 0.8641372795969773 }, { "epoch": 0.17406787891868558, "grad_norm": 0.4918416440486908, "learning_rate": 1.8931033410876522e-05, "loss": 0.4156489372253418, "step": 3226, "token_acc": 0.8548721492743607 }, { "epoch": 0.1741218367236821, "grad_norm": 0.4439191222190857, "learning_rate": 1.893024713090883e-05, "loss": 0.4216209053993225, "step": 3227, "token_acc": 0.8587067581374982 }, { "epoch": 0.17417579452867857, "grad_norm": 0.450234979391098, "learning_rate": 1.892946057821373e-05, "loss": 0.49439868330955505, "step": 3228, "token_acc": 0.831441393875396 }, { "epoch": 0.17422975233367508, "grad_norm": 0.3849770426750183, "learning_rate": 1.8928673752815247e-05, "loss": 0.3719853460788727, "step": 3229, "token_acc": 0.8696142798522773 }, { "epoch": 0.17428371013867155, "grad_norm": 0.5495262742042542, "learning_rate": 1.89278866547374e-05, "loss": 0.44230419397354126, "step": 3230, "token_acc": 0.8548641819330385 }, { "epoch": 0.17433766794366806, "grad_norm": 0.39584484696388245, "learning_rate": 1.892709928400424e-05, "loss": 0.42719024419784546, "step": 3231, "token_acc": 0.8549410698096102 }, { "epoch": 0.17439162574866454, "grad_norm": 0.39561623334884644, "learning_rate": 1.8926311640639802e-05, "loss": 0.44362345337867737, "step": 3232, "token_acc": 0.8489952375421072 }, { "epoch": 0.17444558355366105, "grad_norm": 0.4464161694049835, "learning_rate": 1.8925523724668145e-05, "loss": 0.4698230028152466, "step": 3233, "token_acc": 0.8408510638297872 }, { "epoch": 0.17449954135865753, "grad_norm": 0.4058562219142914, "learning_rate": 1.8924735536113337e-05, "loss": 0.4237537384033203, "step": 3234, "token_acc": 0.8571942446043166 }, { "epoch": 0.17455349916365404, "grad_norm": 0.4796210527420044, "learning_rate": 1.892394707499944e-05, "loss": 0.40352508425712585, "step": 3235, "token_acc": 0.8669664605583661 }, { "epoch": 0.17460745696865052, "grad_norm": 0.4400445520877838, "learning_rate": 1.8923158341350534e-05, "loss": 0.4661145806312561, "step": 3236, "token_acc": 0.8432049732343291 }, { "epoch": 0.174661414773647, "grad_norm": 0.5181483030319214, "learning_rate": 1.8922369335190716e-05, "loss": 0.4460141658782959, "step": 3237, "token_acc": 0.846214386903825 }, { "epoch": 0.1747153725786435, "grad_norm": 0.37012889981269836, "learning_rate": 1.892158005654407e-05, "loss": 0.35346290469169617, "step": 3238, "token_acc": 0.8752615062761506 }, { "epoch": 0.17476933038363998, "grad_norm": 0.4346592128276825, "learning_rate": 1.8920790505434713e-05, "loss": 0.4097927510738373, "step": 3239, "token_acc": 0.8621368213913647 }, { "epoch": 0.1748232881886365, "grad_norm": 0.44595858454704285, "learning_rate": 1.8920000681886742e-05, "loss": 0.430366188287735, "step": 3240, "token_acc": 0.8521968365553603 }, { "epoch": 0.17487724599363297, "grad_norm": 0.48395946621894836, "learning_rate": 1.891921058592429e-05, "loss": 0.4455573856830597, "step": 3241, "token_acc": 0.8473607038123168 }, { "epoch": 0.17493120379862948, "grad_norm": 0.45524007081985474, "learning_rate": 1.8918420217571488e-05, "loss": 0.4426691234111786, "step": 3242, "token_acc": 0.8503782937646752 }, { "epoch": 0.17498516160362595, "grad_norm": 0.49392247200012207, "learning_rate": 1.891762957685247e-05, "loss": 0.45761367678642273, "step": 3243, "token_acc": 0.8480495917750227 }, { "epoch": 0.17503911940862246, "grad_norm": 0.4485774040222168, "learning_rate": 1.891683866379137e-05, "loss": 0.4294213056564331, "step": 3244, "token_acc": 0.8532777115613825 }, { "epoch": 0.17509307721361894, "grad_norm": 0.5559445023536682, "learning_rate": 1.8916047478412362e-05, "loss": 0.4279617965221405, "step": 3245, "token_acc": 0.8573630136986301 }, { "epoch": 0.17514703501861545, "grad_norm": 0.37695664167404175, "learning_rate": 1.89152560207396e-05, "loss": 0.4066988229751587, "step": 3246, "token_acc": 0.8618914861677032 }, { "epoch": 0.17520099282361193, "grad_norm": 0.4207111597061157, "learning_rate": 1.891446429079725e-05, "loss": 0.4445301294326782, "step": 3247, "token_acc": 0.856655290102389 }, { "epoch": 0.17525495062860844, "grad_norm": 0.48177507519721985, "learning_rate": 1.8913672288609497e-05, "loss": 0.40704888105392456, "step": 3248, "token_acc": 0.8637941919191919 }, { "epoch": 0.17530890843360492, "grad_norm": 0.496910035610199, "learning_rate": 1.891288001420053e-05, "loss": 0.4026704430580139, "step": 3249, "token_acc": 0.8680409268733073 }, { "epoch": 0.17536286623860142, "grad_norm": 0.46919986605644226, "learning_rate": 1.891208746759454e-05, "loss": 0.40183883905410767, "step": 3250, "token_acc": 0.8629006664550937 }, { "epoch": 0.1754168240435979, "grad_norm": 0.5007449388504028, "learning_rate": 1.891129464881573e-05, "loss": 0.42082032561302185, "step": 3251, "token_acc": 0.8513363028953229 }, { "epoch": 0.1754707818485944, "grad_norm": 0.35915520787239075, "learning_rate": 1.8910501557888324e-05, "loss": 0.4037818908691406, "step": 3252, "token_acc": 0.8586448598130841 }, { "epoch": 0.1755247396535909, "grad_norm": 0.6086817383766174, "learning_rate": 1.890970819483653e-05, "loss": 0.4004799425601959, "step": 3253, "token_acc": 0.8626104023552502 }, { "epoch": 0.1755786974585874, "grad_norm": 0.4566987156867981, "learning_rate": 1.890891455968458e-05, "loss": 0.4435812532901764, "step": 3254, "token_acc": 0.8446805719532993 }, { "epoch": 0.17563265526358388, "grad_norm": 0.38791170716285706, "learning_rate": 1.8908120652456717e-05, "loss": 0.4453320801258087, "step": 3255, "token_acc": 0.8479166666666667 }, { "epoch": 0.17568661306858038, "grad_norm": 0.5486491322517395, "learning_rate": 1.890732647317718e-05, "loss": 0.4370645582675934, "step": 3256, "token_acc": 0.8483077341307598 }, { "epoch": 0.17574057087357686, "grad_norm": 0.4880150854587555, "learning_rate": 1.8906532021870228e-05, "loss": 0.40686899423599243, "step": 3257, "token_acc": 0.8555814370184555 }, { "epoch": 0.17579452867857334, "grad_norm": 0.6007476449012756, "learning_rate": 1.8905737298560122e-05, "loss": 0.41688570380210876, "step": 3258, "token_acc": 0.8491561181434599 }, { "epoch": 0.17584848648356985, "grad_norm": 0.4468817412853241, "learning_rate": 1.890494230327113e-05, "loss": 0.42016538977622986, "step": 3259, "token_acc": 0.8552216275559215 }, { "epoch": 0.17590244428856633, "grad_norm": 0.4113508462905884, "learning_rate": 1.8904147036027533e-05, "loss": 0.4168527126312256, "step": 3260, "token_acc": 0.85589127260474 }, { "epoch": 0.17595640209356284, "grad_norm": 0.468546986579895, "learning_rate": 1.8903351496853618e-05, "loss": 0.37343841791152954, "step": 3261, "token_acc": 0.8695709397209792 }, { "epoch": 0.17601035989855932, "grad_norm": 0.48838183283805847, "learning_rate": 1.8902555685773678e-05, "loss": 0.4221004247665405, "step": 3262, "token_acc": 0.8566198901769372 }, { "epoch": 0.17606431770355582, "grad_norm": 0.48253366351127625, "learning_rate": 1.8901759602812024e-05, "loss": 0.43149736523628235, "step": 3263, "token_acc": 0.8499210110584519 }, { "epoch": 0.1761182755085523, "grad_norm": 0.46934303641319275, "learning_rate": 1.8900963247992964e-05, "loss": 0.390555202960968, "step": 3264, "token_acc": 0.8585795097423005 }, { "epoch": 0.1761722333135488, "grad_norm": 0.40594053268432617, "learning_rate": 1.8900166621340815e-05, "loss": 0.3643888235092163, "step": 3265, "token_acc": 0.8721290460420895 }, { "epoch": 0.1762261911185453, "grad_norm": 0.5181409120559692, "learning_rate": 1.889936972287991e-05, "loss": 0.4415939450263977, "step": 3266, "token_acc": 0.8517972350230415 }, { "epoch": 0.1762801489235418, "grad_norm": 0.48019978404045105, "learning_rate": 1.8898572552634584e-05, "loss": 0.46471017599105835, "step": 3267, "token_acc": 0.8453542907846369 }, { "epoch": 0.17633410672853828, "grad_norm": 0.5534043908119202, "learning_rate": 1.8897775110629183e-05, "loss": 0.3903803825378418, "step": 3268, "token_acc": 0.8649649259547935 }, { "epoch": 0.17638806453353478, "grad_norm": 0.4200558364391327, "learning_rate": 1.8896977396888064e-05, "loss": 0.47837620973587036, "step": 3269, "token_acc": 0.8422120064899946 }, { "epoch": 0.17644202233853126, "grad_norm": 0.3766881823539734, "learning_rate": 1.889617941143558e-05, "loss": 0.4219745695590973, "step": 3270, "token_acc": 0.8578222778473091 }, { "epoch": 0.17649598014352777, "grad_norm": 0.5340601801872253, "learning_rate": 1.8895381154296115e-05, "loss": 0.39675724506378174, "step": 3271, "token_acc": 0.8645360450335936 }, { "epoch": 0.17654993794852425, "grad_norm": 0.3966578245162964, "learning_rate": 1.8894582625494036e-05, "loss": 0.42828649282455444, "step": 3272, "token_acc": 0.8564072557050907 }, { "epoch": 0.17660389575352076, "grad_norm": 0.381715327501297, "learning_rate": 1.8893783825053735e-05, "loss": 0.4578266143798828, "step": 3273, "token_acc": 0.8449885670731707 }, { "epoch": 0.17665785355851724, "grad_norm": 0.539746880531311, "learning_rate": 1.8892984752999605e-05, "loss": 0.39186373353004456, "step": 3274, "token_acc": 0.8644915932746197 }, { "epoch": 0.17671181136351374, "grad_norm": 0.44847431778907776, "learning_rate": 1.889218540935605e-05, "loss": 0.46193599700927734, "step": 3275, "token_acc": 0.8446981075398018 }, { "epoch": 0.17676576916851022, "grad_norm": 0.476217120885849, "learning_rate": 1.8891385794147482e-05, "loss": 0.42684659361839294, "step": 3276, "token_acc": 0.8575268817204301 }, { "epoch": 0.17681972697350673, "grad_norm": 0.3547223210334778, "learning_rate": 1.8890585907398323e-05, "loss": 0.4130432903766632, "step": 3277, "token_acc": 0.8583638583638583 }, { "epoch": 0.1768736847785032, "grad_norm": 0.3851824700832367, "learning_rate": 1.8889785749132997e-05, "loss": 0.4005359411239624, "step": 3278, "token_acc": 0.8636925613022872 }, { "epoch": 0.17692764258349972, "grad_norm": 0.4035947918891907, "learning_rate": 1.8888985319375942e-05, "loss": 0.4941556453704834, "step": 3279, "token_acc": 0.8373363832761469 }, { "epoch": 0.1769816003884962, "grad_norm": 0.5012121200561523, "learning_rate": 1.8888184618151607e-05, "loss": 0.42384231090545654, "step": 3280, "token_acc": 0.8541752648232219 }, { "epoch": 0.17703555819349268, "grad_norm": 0.3631686866283417, "learning_rate": 1.888738364548444e-05, "loss": 0.3950161933898926, "step": 3281, "token_acc": 0.8604651162790697 }, { "epoch": 0.17708951599848918, "grad_norm": 0.4529315233230591, "learning_rate": 1.888658240139891e-05, "loss": 0.464152991771698, "step": 3282, "token_acc": 0.8389856400855484 }, { "epoch": 0.17714347380348566, "grad_norm": 0.5074738264083862, "learning_rate": 1.8885780885919474e-05, "loss": 0.42906975746154785, "step": 3283, "token_acc": 0.8563445549952297 }, { "epoch": 0.17719743160848217, "grad_norm": 0.4623276889324188, "learning_rate": 1.888497909907062e-05, "loss": 0.322138249874115, "step": 3284, "token_acc": 0.8827838827838828 }, { "epoch": 0.17725138941347865, "grad_norm": 0.4201645255088806, "learning_rate": 1.8884177040876833e-05, "loss": 0.358661025762558, "step": 3285, "token_acc": 0.8776191111704257 }, { "epoch": 0.17730534721847516, "grad_norm": 0.5061196088790894, "learning_rate": 1.8883374711362604e-05, "loss": 0.39218103885650635, "step": 3286, "token_acc": 0.8611779810624298 }, { "epoch": 0.17735930502347164, "grad_norm": 0.5342753529548645, "learning_rate": 1.888257211055244e-05, "loss": 0.45021307468414307, "step": 3287, "token_acc": 0.8432055749128919 }, { "epoch": 0.17741326282846814, "grad_norm": 0.5153935551643372, "learning_rate": 1.8881769238470852e-05, "loss": 0.4774830937385559, "step": 3288, "token_acc": 0.845291768646142 }, { "epoch": 0.17746722063346462, "grad_norm": 0.3613833487033844, "learning_rate": 1.8880966095142357e-05, "loss": 0.41412973403930664, "step": 3289, "token_acc": 0.8581495098039216 }, { "epoch": 0.17752117843846113, "grad_norm": 0.46021807193756104, "learning_rate": 1.8880162680591483e-05, "loss": 0.454272598028183, "step": 3290, "token_acc": 0.8463735008566533 }, { "epoch": 0.1775751362434576, "grad_norm": 0.48529019951820374, "learning_rate": 1.8879358994842767e-05, "loss": 0.42397013306617737, "step": 3291, "token_acc": 0.8544197346290413 }, { "epoch": 0.17762909404845412, "grad_norm": 0.5211560130119324, "learning_rate": 1.8878555037920756e-05, "loss": 0.3900403380393982, "step": 3292, "token_acc": 0.8689524916460846 }, { "epoch": 0.1776830518534506, "grad_norm": 0.44128236174583435, "learning_rate": 1.887775080985e-05, "loss": 0.48853176832199097, "step": 3293, "token_acc": 0.8307317073170731 }, { "epoch": 0.1777370096584471, "grad_norm": 0.4270675778388977, "learning_rate": 1.8876946310655054e-05, "loss": 0.38003861904144287, "step": 3294, "token_acc": 0.8657002188183808 }, { "epoch": 0.17779096746344358, "grad_norm": 0.42115309834480286, "learning_rate": 1.88761415403605e-05, "loss": 0.44154712557792664, "step": 3295, "token_acc": 0.8494978858350951 }, { "epoch": 0.1778449252684401, "grad_norm": 0.5603165030479431, "learning_rate": 1.8875336498990904e-05, "loss": 0.45100992918014526, "step": 3296, "token_acc": 0.8462035541195476 }, { "epoch": 0.17789888307343657, "grad_norm": 0.3516680598258972, "learning_rate": 1.887453118657086e-05, "loss": 0.30873432755470276, "step": 3297, "token_acc": 0.8848167539267016 }, { "epoch": 0.17795284087843308, "grad_norm": 0.4715759754180908, "learning_rate": 1.887372560312496e-05, "loss": 0.46916574239730835, "step": 3298, "token_acc": 0.8430266203703703 }, { "epoch": 0.17800679868342956, "grad_norm": 0.5870843529701233, "learning_rate": 1.8872919748677798e-05, "loss": 0.39574354887008667, "step": 3299, "token_acc": 0.8593109728335104 }, { "epoch": 0.17806075648842606, "grad_norm": 0.45529231429100037, "learning_rate": 1.8872113623253996e-05, "loss": 0.4494096636772156, "step": 3300, "token_acc": 0.8517036654620547 }, { "epoch": 0.17811471429342254, "grad_norm": 0.4190133512020111, "learning_rate": 1.8871307226878167e-05, "loss": 0.41803956031799316, "step": 3301, "token_acc": 0.8590612146140818 }, { "epoch": 0.17816867209841902, "grad_norm": 0.5271034240722656, "learning_rate": 1.8870500559574937e-05, "loss": 0.4331246316432953, "step": 3302, "token_acc": 0.850828729281768 }, { "epoch": 0.17822262990341553, "grad_norm": 0.4529269337654114, "learning_rate": 1.8869693621368948e-05, "loss": 0.4532070457935333, "step": 3303, "token_acc": 0.848294434470377 }, { "epoch": 0.178276587708412, "grad_norm": 0.5139601230621338, "learning_rate": 1.8868886412284837e-05, "loss": 0.4868418872356415, "step": 3304, "token_acc": 0.8309050154104791 }, { "epoch": 0.17833054551340852, "grad_norm": 0.43115004897117615, "learning_rate": 1.8868078932347257e-05, "loss": 0.412844717502594, "step": 3305, "token_acc": 0.8591749644381224 }, { "epoch": 0.178384503318405, "grad_norm": 0.436084121465683, "learning_rate": 1.8867271181580874e-05, "loss": 0.38165542483329773, "step": 3306, "token_acc": 0.8672622846202935 }, { "epoch": 0.1784384611234015, "grad_norm": 0.41949278116226196, "learning_rate": 1.8866463160010346e-05, "loss": 0.44818106293678284, "step": 3307, "token_acc": 0.8514128352490421 }, { "epoch": 0.17849241892839798, "grad_norm": 0.5196791291236877, "learning_rate": 1.886565486766036e-05, "loss": 0.3932476043701172, "step": 3308, "token_acc": 0.8567302137492778 }, { "epoch": 0.1785463767333945, "grad_norm": 0.611030638217926, "learning_rate": 1.8864846304555594e-05, "loss": 0.43478089570999146, "step": 3309, "token_acc": 0.8516956920256645 }, { "epoch": 0.17860033453839097, "grad_norm": 0.5325126647949219, "learning_rate": 1.8864037470720748e-05, "loss": 0.4821234941482544, "step": 3310, "token_acc": 0.8418457897850523 }, { "epoch": 0.17865429234338748, "grad_norm": 0.4518466293811798, "learning_rate": 1.8863228366180518e-05, "loss": 0.40962716937065125, "step": 3311, "token_acc": 0.8566912539515279 }, { "epoch": 0.17870825014838396, "grad_norm": 0.43847930431365967, "learning_rate": 1.8862418990959613e-05, "loss": 0.40636008977890015, "step": 3312, "token_acc": 0.8616377040547657 }, { "epoch": 0.17876220795338046, "grad_norm": 0.4783676266670227, "learning_rate": 1.8861609345082758e-05, "loss": 0.4765089154243469, "step": 3313, "token_acc": 0.8442724886002487 }, { "epoch": 0.17881616575837694, "grad_norm": 0.46912387013435364, "learning_rate": 1.886079942857467e-05, "loss": 0.4245953857898712, "step": 3314, "token_acc": 0.8526902887139107 }, { "epoch": 0.17887012356337345, "grad_norm": 0.6485637426376343, "learning_rate": 1.885998924146009e-05, "loss": 0.45930489897727966, "step": 3315, "token_acc": 0.844147582697201 }, { "epoch": 0.17892408136836993, "grad_norm": 0.4641703963279724, "learning_rate": 1.8859178783763764e-05, "loss": 0.43099045753479004, "step": 3316, "token_acc": 0.8607853136155023 }, { "epoch": 0.17897803917336644, "grad_norm": 0.48698949813842773, "learning_rate": 1.8858368055510434e-05, "loss": 0.42919090390205383, "step": 3317, "token_acc": 0.8527872284742214 }, { "epoch": 0.17903199697836292, "grad_norm": 0.34756115078926086, "learning_rate": 1.8857557056724868e-05, "loss": 0.41141408681869507, "step": 3318, "token_acc": 0.8574194283111263 }, { "epoch": 0.17908595478335942, "grad_norm": 0.3703955411911011, "learning_rate": 1.8856745787431828e-05, "loss": 0.3613601624965668, "step": 3319, "token_acc": 0.8752577319587629 }, { "epoch": 0.1791399125883559, "grad_norm": 0.4189054071903229, "learning_rate": 1.885593424765609e-05, "loss": 0.40751251578330994, "step": 3320, "token_acc": 0.8582386782711314 }, { "epoch": 0.1791938703933524, "grad_norm": 0.45949581265449524, "learning_rate": 1.8855122437422443e-05, "loss": 0.4918515086174011, "step": 3321, "token_acc": 0.8370528388578931 }, { "epoch": 0.1792478281983489, "grad_norm": 0.44476786255836487, "learning_rate": 1.8854310356755672e-05, "loss": 0.3832548260688782, "step": 3322, "token_acc": 0.866110130036924 }, { "epoch": 0.17930178600334537, "grad_norm": 0.46147793531417847, "learning_rate": 1.8853498005680585e-05, "loss": 0.47338950634002686, "step": 3323, "token_acc": 0.8468079539508111 }, { "epoch": 0.17935574380834188, "grad_norm": 0.3546956777572632, "learning_rate": 1.885268538422199e-05, "loss": 0.4509344696998596, "step": 3324, "token_acc": 0.8495278439678757 }, { "epoch": 0.17940970161333836, "grad_norm": 0.46867337822914124, "learning_rate": 1.88518724924047e-05, "loss": 0.38469356298446655, "step": 3325, "token_acc": 0.8644483840174554 }, { "epoch": 0.17946365941833486, "grad_norm": 0.40294769406318665, "learning_rate": 1.8851059330253543e-05, "loss": 0.40734827518463135, "step": 3326, "token_acc": 0.8617220801364024 }, { "epoch": 0.17951761722333134, "grad_norm": 0.41632622480392456, "learning_rate": 1.8850245897793353e-05, "loss": 0.4459276497364044, "step": 3327, "token_acc": 0.8521225752038234 }, { "epoch": 0.17957157502832785, "grad_norm": 0.47574126720428467, "learning_rate": 1.8849432195048974e-05, "loss": 0.36446672677993774, "step": 3328, "token_acc": 0.8737331782688155 }, { "epoch": 0.17962553283332433, "grad_norm": 0.36798280477523804, "learning_rate": 1.8848618222045252e-05, "loss": 0.37973570823669434, "step": 3329, "token_acc": 0.8700771948187884 }, { "epoch": 0.17967949063832084, "grad_norm": 0.5888170599937439, "learning_rate": 1.8847803978807046e-05, "loss": 0.42605945467948914, "step": 3330, "token_acc": 0.8569001321502737 }, { "epoch": 0.17973344844331732, "grad_norm": 0.5458078384399414, "learning_rate": 1.8846989465359228e-05, "loss": 0.39137354493141174, "step": 3331, "token_acc": 0.8612136121361214 }, { "epoch": 0.17978740624831382, "grad_norm": 0.39788684248924255, "learning_rate": 1.8846174681726666e-05, "loss": 0.39532670378685, "step": 3332, "token_acc": 0.8634013957094857 }, { "epoch": 0.1798413640533103, "grad_norm": 0.49150946736335754, "learning_rate": 1.8845359627934247e-05, "loss": 0.4579111933708191, "step": 3333, "token_acc": 0.8421194553053796 }, { "epoch": 0.1798953218583068, "grad_norm": 0.4494048058986664, "learning_rate": 1.8844544304006866e-05, "loss": 0.41439756751060486, "step": 3334, "token_acc": 0.8544494720965309 }, { "epoch": 0.1799492796633033, "grad_norm": 0.42263317108154297, "learning_rate": 1.8843728709969417e-05, "loss": 0.446149080991745, "step": 3335, "token_acc": 0.8473797947055646 }, { "epoch": 0.1800032374682998, "grad_norm": 0.5816841721534729, "learning_rate": 1.8842912845846805e-05, "loss": 0.37411385774612427, "step": 3336, "token_acc": 0.8663604092712466 }, { "epoch": 0.18005719527329628, "grad_norm": 0.3665008246898651, "learning_rate": 1.8842096711663956e-05, "loss": 0.3915324807167053, "step": 3337, "token_acc": 0.8679265542789649 }, { "epoch": 0.18011115307829278, "grad_norm": 0.3383072316646576, "learning_rate": 1.884128030744579e-05, "loss": 0.3979032337665558, "step": 3338, "token_acc": 0.8643105446118192 }, { "epoch": 0.18016511088328926, "grad_norm": 0.4675232172012329, "learning_rate": 1.884046363321724e-05, "loss": 0.3954330086708069, "step": 3339, "token_acc": 0.8683744159819559 }, { "epoch": 0.18021906868828577, "grad_norm": 0.4132600724697113, "learning_rate": 1.8839646689003244e-05, "loss": 0.406806617975235, "step": 3340, "token_acc": 0.8623711950728414 }, { "epoch": 0.18027302649328225, "grad_norm": 0.47107744216918945, "learning_rate": 1.8838829474828753e-05, "loss": 0.40374764800071716, "step": 3341, "token_acc": 0.8603983491835636 }, { "epoch": 0.18032698429827876, "grad_norm": 0.5690560936927795, "learning_rate": 1.883801199071873e-05, "loss": 0.4931589365005493, "step": 3342, "token_acc": 0.8337433077702214 }, { "epoch": 0.18038094210327524, "grad_norm": 0.4614846706390381, "learning_rate": 1.883719423669813e-05, "loss": 0.39664995670318604, "step": 3343, "token_acc": 0.8636143018928976 }, { "epoch": 0.18043489990827175, "grad_norm": 0.4218025207519531, "learning_rate": 1.8836376212791938e-05, "loss": 0.47306597232818604, "step": 3344, "token_acc": 0.8413004077933847 }, { "epoch": 0.18048885771326822, "grad_norm": 0.4067704975605011, "learning_rate": 1.883555791902513e-05, "loss": 0.4981149137020111, "step": 3345, "token_acc": 0.838623326959847 }, { "epoch": 0.1805428155182647, "grad_norm": 0.3899487853050232, "learning_rate": 1.8834739355422697e-05, "loss": 0.3901726007461548, "step": 3346, "token_acc": 0.8686925795053003 }, { "epoch": 0.1805967733232612, "grad_norm": 0.4857258200645447, "learning_rate": 1.8833920522009638e-05, "loss": 0.38825923204421997, "step": 3347, "token_acc": 0.8612282309807516 }, { "epoch": 0.1806507311282577, "grad_norm": 0.3135257363319397, "learning_rate": 1.8833101418810962e-05, "loss": 0.4171522557735443, "step": 3348, "token_acc": 0.8574369531652084 }, { "epoch": 0.1807046889332542, "grad_norm": 0.42850247025489807, "learning_rate": 1.883228204585168e-05, "loss": 0.4160439968109131, "step": 3349, "token_acc": 0.8580315839846866 }, { "epoch": 0.18075864673825068, "grad_norm": 0.4390909969806671, "learning_rate": 1.883146240315682e-05, "loss": 0.4037435054779053, "step": 3350, "token_acc": 0.8620433436532507 }, { "epoch": 0.18081260454324719, "grad_norm": 0.4993371367454529, "learning_rate": 1.8830642490751412e-05, "loss": 0.39835894107818604, "step": 3351, "token_acc": 0.868417104276069 }, { "epoch": 0.18086656234824366, "grad_norm": 0.45052292943000793, "learning_rate": 1.8829822308660494e-05, "loss": 0.4752246141433716, "step": 3352, "token_acc": 0.8374086565486228 }, { "epoch": 0.18092052015324017, "grad_norm": 0.5042456388473511, "learning_rate": 1.8829001856909116e-05, "loss": 0.41173118352890015, "step": 3353, "token_acc": 0.8557737627651217 }, { "epoch": 0.18097447795823665, "grad_norm": 0.31341174244880676, "learning_rate": 1.8828181135522336e-05, "loss": 0.3804049789905548, "step": 3354, "token_acc": 0.8706688666074375 }, { "epoch": 0.18102843576323316, "grad_norm": 0.44798743724823, "learning_rate": 1.8827360144525216e-05, "loss": 0.41719120740890503, "step": 3355, "token_acc": 0.8571428571428571 }, { "epoch": 0.18108239356822964, "grad_norm": 0.4911371171474457, "learning_rate": 1.8826538883942832e-05, "loss": 0.42418456077575684, "step": 3356, "token_acc": 0.8571895424836601 }, { "epoch": 0.18113635137322615, "grad_norm": 0.390931099653244, "learning_rate": 1.882571735380026e-05, "loss": 0.3770160377025604, "step": 3357, "token_acc": 0.8635456638526477 }, { "epoch": 0.18119030917822262, "grad_norm": 0.44653841853141785, "learning_rate": 1.882489555412259e-05, "loss": 0.3832358717918396, "step": 3358, "token_acc": 0.8666666666666667 }, { "epoch": 0.18124426698321913, "grad_norm": 0.41606172919273376, "learning_rate": 1.8824073484934925e-05, "loss": 0.43538564443588257, "step": 3359, "token_acc": 0.8582526338193041 }, { "epoch": 0.1812982247882156, "grad_norm": 0.40502628684043884, "learning_rate": 1.8823251146262365e-05, "loss": 0.5280282497406006, "step": 3360, "token_acc": 0.8264502846070001 }, { "epoch": 0.18135218259321212, "grad_norm": 0.4961110055446625, "learning_rate": 1.8822428538130028e-05, "loss": 0.48520466685295105, "step": 3361, "token_acc": 0.8396790663749089 }, { "epoch": 0.1814061403982086, "grad_norm": 0.4730583727359772, "learning_rate": 1.8821605660563035e-05, "loss": 0.41159695386886597, "step": 3362, "token_acc": 0.8611485979907032 }, { "epoch": 0.1814600982032051, "grad_norm": 0.34203675389289856, "learning_rate": 1.8820782513586513e-05, "loss": 0.37363776564598083, "step": 3363, "token_acc": 0.8715654952076677 }, { "epoch": 0.18151405600820159, "grad_norm": 0.47689947485923767, "learning_rate": 1.881995909722561e-05, "loss": 0.4519069790840149, "step": 3364, "token_acc": 0.8487551590999867 }, { "epoch": 0.1815680138131981, "grad_norm": 0.4474797546863556, "learning_rate": 1.881913541150546e-05, "loss": 0.48990899324417114, "step": 3365, "token_acc": 0.8353598422609267 }, { "epoch": 0.18162197161819457, "grad_norm": 0.471190869808197, "learning_rate": 1.8818311456451228e-05, "loss": 0.3918519616127014, "step": 3366, "token_acc": 0.863505989539396 }, { "epoch": 0.18167592942319105, "grad_norm": 0.4726957678794861, "learning_rate": 1.8817487232088073e-05, "loss": 0.4239690601825714, "step": 3367, "token_acc": 0.8594223711504707 }, { "epoch": 0.18172988722818756, "grad_norm": 0.4430675506591797, "learning_rate": 1.881666273844117e-05, "loss": 0.36181747913360596, "step": 3368, "token_acc": 0.8696821927022028 }, { "epoch": 0.18178384503318404, "grad_norm": 0.5092227458953857, "learning_rate": 1.8815837975535697e-05, "loss": 0.3995873034000397, "step": 3369, "token_acc": 0.8620378719567178 }, { "epoch": 0.18183780283818055, "grad_norm": 0.45983925461769104, "learning_rate": 1.8815012943396838e-05, "loss": 0.4030524492263794, "step": 3370, "token_acc": 0.8634427461789025 }, { "epoch": 0.18189176064317703, "grad_norm": 0.35855332016944885, "learning_rate": 1.8814187642049796e-05, "loss": 0.37384283542633057, "step": 3371, "token_acc": 0.8681454097350585 }, { "epoch": 0.18194571844817353, "grad_norm": 0.41800862550735474, "learning_rate": 1.881336207151977e-05, "loss": 0.478219598531723, "step": 3372, "token_acc": 0.8382193268186754 }, { "epoch": 0.18199967625317, "grad_norm": 0.44959864020347595, "learning_rate": 1.8812536231831974e-05, "loss": 0.40209370851516724, "step": 3373, "token_acc": 0.8584321359051496 }, { "epoch": 0.18205363405816652, "grad_norm": 0.522994875907898, "learning_rate": 1.8811710123011636e-05, "loss": 0.4238823652267456, "step": 3374, "token_acc": 0.8536975349766822 }, { "epoch": 0.182107591863163, "grad_norm": 0.4569295048713684, "learning_rate": 1.8810883745083974e-05, "loss": 0.4104708433151245, "step": 3375, "token_acc": 0.8612255085486085 }, { "epoch": 0.1821615496681595, "grad_norm": 0.44490617513656616, "learning_rate": 1.8810057098074232e-05, "loss": 0.35433363914489746, "step": 3376, "token_acc": 0.8754818166582873 }, { "epoch": 0.18221550747315599, "grad_norm": 0.40308281779289246, "learning_rate": 1.8809230182007653e-05, "loss": 0.43086305260658264, "step": 3377, "token_acc": 0.8504635761589404 }, { "epoch": 0.1822694652781525, "grad_norm": 0.38822057843208313, "learning_rate": 1.8808402996909494e-05, "loss": 0.4385319948196411, "step": 3378, "token_acc": 0.856260162601626 }, { "epoch": 0.18232342308314897, "grad_norm": 0.47460246086120605, "learning_rate": 1.8807575542805016e-05, "loss": 0.464337557554245, "step": 3379, "token_acc": 0.846869578930768 }, { "epoch": 0.18237738088814548, "grad_norm": 0.37898942828178406, "learning_rate": 1.8806747819719485e-05, "loss": 0.418662965297699, "step": 3380, "token_acc": 0.854903078677309 }, { "epoch": 0.18243133869314196, "grad_norm": 0.39933836460113525, "learning_rate": 1.8805919827678184e-05, "loss": 0.39829695224761963, "step": 3381, "token_acc": 0.861652739090065 }, { "epoch": 0.18248529649813847, "grad_norm": 0.3433338701725006, "learning_rate": 1.8805091566706397e-05, "loss": 0.3936275243759155, "step": 3382, "token_acc": 0.8670303443573133 }, { "epoch": 0.18253925430313495, "grad_norm": 0.42706161737442017, "learning_rate": 1.880426303682942e-05, "loss": 0.4017215371131897, "step": 3383, "token_acc": 0.8598814918089926 }, { "epoch": 0.18259321210813145, "grad_norm": 0.359742134809494, "learning_rate": 1.880343423807256e-05, "loss": 0.42086124420166016, "step": 3384, "token_acc": 0.8580847723704866 }, { "epoch": 0.18264716991312793, "grad_norm": 0.35549047589302063, "learning_rate": 1.880260517046112e-05, "loss": 0.43183964490890503, "step": 3385, "token_acc": 0.8540485370832388 }, { "epoch": 0.18270112771812444, "grad_norm": 0.48070576786994934, "learning_rate": 1.880177583402043e-05, "loss": 0.36318522691726685, "step": 3386, "token_acc": 0.8734838016275142 }, { "epoch": 0.18275508552312092, "grad_norm": 0.4859277606010437, "learning_rate": 1.8800946228775807e-05, "loss": 0.4134848117828369, "step": 3387, "token_acc": 0.8586420734615996 }, { "epoch": 0.1828090433281174, "grad_norm": 0.476186603307724, "learning_rate": 1.8800116354752594e-05, "loss": 0.46134573221206665, "step": 3388, "token_acc": 0.8412927427499639 }, { "epoch": 0.1828630011331139, "grad_norm": 0.30692192912101746, "learning_rate": 1.879928621197613e-05, "loss": 0.4065176546573639, "step": 3389, "token_acc": 0.862776025236593 }, { "epoch": 0.18291695893811039, "grad_norm": 0.4467954635620117, "learning_rate": 1.879845580047177e-05, "loss": 0.4193430542945862, "step": 3390, "token_acc": 0.857815897559797 }, { "epoch": 0.1829709167431069, "grad_norm": 0.565118134021759, "learning_rate": 1.8797625120264878e-05, "loss": 0.42452627420425415, "step": 3391, "token_acc": 0.8582012405237767 }, { "epoch": 0.18302487454810337, "grad_norm": 0.4726950228214264, "learning_rate": 1.879679417138082e-05, "loss": 0.34148144721984863, "step": 3392, "token_acc": 0.8754122389153536 }, { "epoch": 0.18307883235309988, "grad_norm": 0.4320606589317322, "learning_rate": 1.8795962953844968e-05, "loss": 0.42013609409332275, "step": 3393, "token_acc": 0.8541916886860437 }, { "epoch": 0.18313279015809636, "grad_norm": 0.3438376486301422, "learning_rate": 1.8795131467682717e-05, "loss": 0.4227428436279297, "step": 3394, "token_acc": 0.856670341786108 }, { "epoch": 0.18318674796309287, "grad_norm": 0.3365902602672577, "learning_rate": 1.879429971291945e-05, "loss": 0.3947907090187073, "step": 3395, "token_acc": 0.8631206411991257 }, { "epoch": 0.18324070576808935, "grad_norm": 0.4617250859737396, "learning_rate": 1.8793467689580576e-05, "loss": 0.39879512786865234, "step": 3396, "token_acc": 0.8611361587015329 }, { "epoch": 0.18329466357308585, "grad_norm": 0.406441867351532, "learning_rate": 1.8792635397691502e-05, "loss": 0.39097678661346436, "step": 3397, "token_acc": 0.8617021276595744 }, { "epoch": 0.18334862137808233, "grad_norm": 0.3986489772796631, "learning_rate": 1.879180283727765e-05, "loss": 0.40796270966529846, "step": 3398, "token_acc": 0.8604594330400782 }, { "epoch": 0.18340257918307884, "grad_norm": 0.4833254814147949, "learning_rate": 1.8790970008364438e-05, "loss": 0.3996659815311432, "step": 3399, "token_acc": 0.867399438727783 }, { "epoch": 0.18345653698807532, "grad_norm": 0.5381468534469604, "learning_rate": 1.8790136910977304e-05, "loss": 0.39065730571746826, "step": 3400, "token_acc": 0.8623336745138178 }, { "epoch": 0.18351049479307183, "grad_norm": 0.42564657330513, "learning_rate": 1.8789303545141692e-05, "loss": 0.4205382764339447, "step": 3401, "token_acc": 0.8606683197543984 }, { "epoch": 0.1835644525980683, "grad_norm": 0.4038833677768707, "learning_rate": 1.8788469910883053e-05, "loss": 0.33444923162460327, "step": 3402, "token_acc": 0.8836797164491703 }, { "epoch": 0.1836184104030648, "grad_norm": 0.4032156467437744, "learning_rate": 1.8787636008226846e-05, "loss": 0.4207857847213745, "step": 3403, "token_acc": 0.8559872533398701 }, { "epoch": 0.1836723682080613, "grad_norm": 0.40918242931365967, "learning_rate": 1.878680183719854e-05, "loss": 0.4573497176170349, "step": 3404, "token_acc": 0.8465930719451846 }, { "epoch": 0.1837263260130578, "grad_norm": 0.36892107129096985, "learning_rate": 1.8785967397823602e-05, "loss": 0.3396800756454468, "step": 3405, "token_acc": 0.8768097853220169 }, { "epoch": 0.18378028381805428, "grad_norm": 0.4149342179298401, "learning_rate": 1.8785132690127526e-05, "loss": 0.4327641725540161, "step": 3406, "token_acc": 0.8488630535134211 }, { "epoch": 0.1838342416230508, "grad_norm": 0.4644538164138794, "learning_rate": 1.8784297714135796e-05, "loss": 0.3891403079032898, "step": 3407, "token_acc": 0.8652712264150944 }, { "epoch": 0.18388819942804727, "grad_norm": 0.37698066234588623, "learning_rate": 1.8783462469873916e-05, "loss": 0.42974430322647095, "step": 3408, "token_acc": 0.8556219163853545 }, { "epoch": 0.18394215723304375, "grad_norm": 0.3662627637386322, "learning_rate": 1.8782626957367394e-05, "loss": 0.43229585886001587, "step": 3409, "token_acc": 0.8521882741535921 }, { "epoch": 0.18399611503804025, "grad_norm": 0.47175097465515137, "learning_rate": 1.8781791176641744e-05, "loss": 0.41543081402778625, "step": 3410, "token_acc": 0.856548558313011 }, { "epoch": 0.18405007284303673, "grad_norm": 0.47255465388298035, "learning_rate": 1.878095512772249e-05, "loss": 0.4075276255607605, "step": 3411, "token_acc": 0.8584562012142237 }, { "epoch": 0.18410403064803324, "grad_norm": 0.35025128722190857, "learning_rate": 1.878011881063517e-05, "loss": 0.40049290657043457, "step": 3412, "token_acc": 0.8607207457910115 }, { "epoch": 0.18415798845302972, "grad_norm": 0.4877271056175232, "learning_rate": 1.8779282225405322e-05, "loss": 0.3907157778739929, "step": 3413, "token_acc": 0.8644572335780909 }, { "epoch": 0.18421194625802623, "grad_norm": 0.45096534490585327, "learning_rate": 1.8778445372058493e-05, "loss": 0.38589221239089966, "step": 3414, "token_acc": 0.8629349470499244 }, { "epoch": 0.1842659040630227, "grad_norm": 0.44584783911705017, "learning_rate": 1.8777608250620244e-05, "loss": 0.4349234104156494, "step": 3415, "token_acc": 0.8555474629764506 }, { "epoch": 0.1843198618680192, "grad_norm": 0.48559820652008057, "learning_rate": 1.8776770861116135e-05, "loss": 0.43357568979263306, "step": 3416, "token_acc": 0.853779021900214 }, { "epoch": 0.1843738196730157, "grad_norm": 0.40690648555755615, "learning_rate": 1.8775933203571746e-05, "loss": 0.4518730640411377, "step": 3417, "token_acc": 0.8478152929493545 }, { "epoch": 0.1844277774780122, "grad_norm": 0.4626676142215729, "learning_rate": 1.8775095278012655e-05, "loss": 0.4747311770915985, "step": 3418, "token_acc": 0.8390945243193637 }, { "epoch": 0.18448173528300868, "grad_norm": 0.4359486997127533, "learning_rate": 1.877425708446445e-05, "loss": 0.46538230776786804, "step": 3419, "token_acc": 0.8452141642183393 }, { "epoch": 0.1845356930880052, "grad_norm": 0.3871725797653198, "learning_rate": 1.8773418622952737e-05, "loss": 0.42207998037338257, "step": 3420, "token_acc": 0.8561470215462611 }, { "epoch": 0.18458965089300167, "grad_norm": 0.5051628351211548, "learning_rate": 1.8772579893503116e-05, "loss": 0.4927695393562317, "step": 3421, "token_acc": 0.8332434860736747 }, { "epoch": 0.18464360869799817, "grad_norm": 0.48658886551856995, "learning_rate": 1.8771740896141205e-05, "loss": 0.4727562963962555, "step": 3422, "token_acc": 0.8445868945868946 }, { "epoch": 0.18469756650299465, "grad_norm": 0.3325451910495758, "learning_rate": 1.8770901630892622e-05, "loss": 0.4348660707473755, "step": 3423, "token_acc": 0.854413610375122 }, { "epoch": 0.18475152430799116, "grad_norm": 0.45801547169685364, "learning_rate": 1.8770062097783e-05, "loss": 0.437023401260376, "step": 3424, "token_acc": 0.8518631643249848 }, { "epoch": 0.18480548211298764, "grad_norm": 0.43190088868141174, "learning_rate": 1.876922229683798e-05, "loss": 0.39754098653793335, "step": 3425, "token_acc": 0.859762450510523 }, { "epoch": 0.18485943991798415, "grad_norm": 0.38654041290283203, "learning_rate": 1.876838222808321e-05, "loss": 0.4666937291622162, "step": 3426, "token_acc": 0.839613091890676 }, { "epoch": 0.18491339772298063, "grad_norm": 0.42334309220314026, "learning_rate": 1.876754189154434e-05, "loss": 0.4333457350730896, "step": 3427, "token_acc": 0.8510849524890086 }, { "epoch": 0.18496735552797713, "grad_norm": 0.47443243861198425, "learning_rate": 1.8766701287247043e-05, "loss": 0.4166189432144165, "step": 3428, "token_acc": 0.8556366585563666 }, { "epoch": 0.1850213133329736, "grad_norm": 0.36981257796287537, "learning_rate": 1.8765860415216984e-05, "loss": 0.3855259120464325, "step": 3429, "token_acc": 0.8673965936739659 }, { "epoch": 0.18507527113797012, "grad_norm": 0.5884237289428711, "learning_rate": 1.876501927547984e-05, "loss": 0.4440123438835144, "step": 3430, "token_acc": 0.8474849684704502 }, { "epoch": 0.1851292289429666, "grad_norm": 0.5269393920898438, "learning_rate": 1.8764177868061305e-05, "loss": 0.4771035313606262, "step": 3431, "token_acc": 0.8388386905643609 }, { "epoch": 0.18518318674796308, "grad_norm": 0.4261573851108551, "learning_rate": 1.8763336192987078e-05, "loss": 0.4178769290447235, "step": 3432, "token_acc": 0.8572394213870488 }, { "epoch": 0.1852371445529596, "grad_norm": 0.48328474164009094, "learning_rate": 1.8762494250282856e-05, "loss": 0.3911362886428833, "step": 3433, "token_acc": 0.8654094132817537 }, { "epoch": 0.18529110235795607, "grad_norm": 0.4544932246208191, "learning_rate": 1.8761652039974353e-05, "loss": 0.49547240138053894, "step": 3434, "token_acc": 0.8452613922281241 }, { "epoch": 0.18534506016295257, "grad_norm": 0.3932752013206482, "learning_rate": 1.8760809562087296e-05, "loss": 0.42256414890289307, "step": 3435, "token_acc": 0.8537762634866554 }, { "epoch": 0.18539901796794905, "grad_norm": 0.6095821857452393, "learning_rate": 1.8759966816647406e-05, "loss": 0.42927879095077515, "step": 3436, "token_acc": 0.8555699481865285 }, { "epoch": 0.18545297577294556, "grad_norm": 0.3722276985645294, "learning_rate": 1.8759123803680426e-05, "loss": 0.48749154806137085, "step": 3437, "token_acc": 0.8404613789528252 }, { "epoch": 0.18550693357794204, "grad_norm": 0.5582732558250427, "learning_rate": 1.87582805232121e-05, "loss": 0.4627319574356079, "step": 3438, "token_acc": 0.8444753946146704 }, { "epoch": 0.18556089138293855, "grad_norm": 0.49394136667251587, "learning_rate": 1.8757436975268182e-05, "loss": 0.44940823316574097, "step": 3439, "token_acc": 0.8450456218985113 }, { "epoch": 0.18561484918793503, "grad_norm": 0.3706246316432953, "learning_rate": 1.8756593159874433e-05, "loss": 0.37696969509124756, "step": 3440, "token_acc": 0.8707261938400678 }, { "epoch": 0.18566880699293153, "grad_norm": 0.48614224791526794, "learning_rate": 1.875574907705662e-05, "loss": 0.45409056544303894, "step": 3441, "token_acc": 0.8441233676021117 }, { "epoch": 0.185722764797928, "grad_norm": 0.3037472367286682, "learning_rate": 1.8754904726840527e-05, "loss": 0.36700206995010376, "step": 3442, "token_acc": 0.8730356133408705 }, { "epoch": 0.18577672260292452, "grad_norm": 0.47492504119873047, "learning_rate": 1.8754060109251937e-05, "loss": 0.409450501203537, "step": 3443, "token_acc": 0.8613951789627465 }, { "epoch": 0.185830680407921, "grad_norm": 0.5178325772285461, "learning_rate": 1.8753215224316642e-05, "loss": 0.350883424282074, "step": 3444, "token_acc": 0.8810093939952109 }, { "epoch": 0.1858846382129175, "grad_norm": 0.40637093782424927, "learning_rate": 1.8752370072060452e-05, "loss": 0.4642895460128784, "step": 3445, "token_acc": 0.8476964769647697 }, { "epoch": 0.185938596017914, "grad_norm": 0.36852961778640747, "learning_rate": 1.8751524652509165e-05, "loss": 0.3948363959789276, "step": 3446, "token_acc": 0.8668544600938967 }, { "epoch": 0.1859925538229105, "grad_norm": 0.35438284277915955, "learning_rate": 1.8750678965688612e-05, "loss": 0.3596085011959076, "step": 3447, "token_acc": 0.8789327404113396 }, { "epoch": 0.18604651162790697, "grad_norm": 0.5255045294761658, "learning_rate": 1.8749833011624617e-05, "loss": 0.4532296061515808, "step": 3448, "token_acc": 0.8456014362657092 }, { "epoch": 0.18610046943290348, "grad_norm": 0.4111255407333374, "learning_rate": 1.8748986790343012e-05, "loss": 0.38938888907432556, "step": 3449, "token_acc": 0.8670615333991445 }, { "epoch": 0.18615442723789996, "grad_norm": 0.44025614857673645, "learning_rate": 1.8748140301869644e-05, "loss": 0.47879162430763245, "step": 3450, "token_acc": 0.8379562043795621 }, { "epoch": 0.18620838504289647, "grad_norm": 0.4702818691730499, "learning_rate": 1.8747293546230362e-05, "loss": 0.43753159046173096, "step": 3451, "token_acc": 0.846524646244752 }, { "epoch": 0.18626234284789295, "grad_norm": 0.6193479895591736, "learning_rate": 1.8746446523451026e-05, "loss": 0.4882729649543762, "step": 3452, "token_acc": 0.8360203941022462 }, { "epoch": 0.18631630065288943, "grad_norm": 0.34273144602775574, "learning_rate": 1.8745599233557507e-05, "loss": 0.4042413830757141, "step": 3453, "token_acc": 0.8584083440491499 }, { "epoch": 0.18637025845788593, "grad_norm": 0.49220356345176697, "learning_rate": 1.8744751676575674e-05, "loss": 0.4607917070388794, "step": 3454, "token_acc": 0.8455166400232524 }, { "epoch": 0.18642421626288241, "grad_norm": 0.4402506947517395, "learning_rate": 1.874390385253142e-05, "loss": 0.41396278142929077, "step": 3455, "token_acc": 0.8627618119824647 }, { "epoch": 0.18647817406787892, "grad_norm": 0.44778287410736084, "learning_rate": 1.874305576145063e-05, "loss": 0.38135826587677, "step": 3456, "token_acc": 0.8644738845743871 }, { "epoch": 0.1865321318728754, "grad_norm": 0.4189932644367218, "learning_rate": 1.8742207403359208e-05, "loss": 0.3950164318084717, "step": 3457, "token_acc": 0.8623935454952936 }, { "epoch": 0.1865860896778719, "grad_norm": 0.47120168805122375, "learning_rate": 1.8741358778283062e-05, "loss": 0.4147224724292755, "step": 3458, "token_acc": 0.8554626611874734 }, { "epoch": 0.1866400474828684, "grad_norm": 0.3894632160663605, "learning_rate": 1.8740509886248112e-05, "loss": 0.44480764865875244, "step": 3459, "token_acc": 0.8479387805480129 }, { "epoch": 0.1866940052878649, "grad_norm": 0.4164074957370758, "learning_rate": 1.8739660727280276e-05, "loss": 0.42361384630203247, "step": 3460, "token_acc": 0.8518299007260335 }, { "epoch": 0.18674796309286137, "grad_norm": 0.3140204846858978, "learning_rate": 1.873881130140549e-05, "loss": 0.459593802690506, "step": 3461, "token_acc": 0.8472375690607735 }, { "epoch": 0.18680192089785788, "grad_norm": 0.46053001284599304, "learning_rate": 1.87379616086497e-05, "loss": 0.4841994047164917, "step": 3462, "token_acc": 0.8415927917453858 }, { "epoch": 0.18685587870285436, "grad_norm": 0.4280294179916382, "learning_rate": 1.8737111649038847e-05, "loss": 0.41909295320510864, "step": 3463, "token_acc": 0.8592082151060667 }, { "epoch": 0.18690983650785087, "grad_norm": 0.47759732604026794, "learning_rate": 1.8736261422598895e-05, "loss": 0.40619075298309326, "step": 3464, "token_acc": 0.862882096069869 }, { "epoch": 0.18696379431284735, "grad_norm": 0.42245346307754517, "learning_rate": 1.873541092935581e-05, "loss": 0.4442087411880493, "step": 3465, "token_acc": 0.8484492481203008 }, { "epoch": 0.18701775211784386, "grad_norm": 0.4670504033565521, "learning_rate": 1.8734560169335563e-05, "loss": 0.46850961446762085, "step": 3466, "token_acc": 0.8410104011887073 }, { "epoch": 0.18707170992284033, "grad_norm": 0.5428774356842041, "learning_rate": 1.8733709142564133e-05, "loss": 0.4402109980583191, "step": 3467, "token_acc": 0.8505862646566165 }, { "epoch": 0.18712566772783684, "grad_norm": 0.4641821086406708, "learning_rate": 1.8732857849067518e-05, "loss": 0.4427494406700134, "step": 3468, "token_acc": 0.8481417343812442 }, { "epoch": 0.18717962553283332, "grad_norm": 0.4375078082084656, "learning_rate": 1.8732006288871712e-05, "loss": 0.46414753794670105, "step": 3469, "token_acc": 0.8418681839734471 }, { "epoch": 0.18723358333782983, "grad_norm": 0.4551434814929962, "learning_rate": 1.873115446200272e-05, "loss": 0.4310867190361023, "step": 3470, "token_acc": 0.8492590700051099 }, { "epoch": 0.1872875411428263, "grad_norm": 0.4521028995513916, "learning_rate": 1.8730302368486554e-05, "loss": 0.4643322229385376, "step": 3471, "token_acc": 0.8415887310934073 }, { "epoch": 0.18734149894782282, "grad_norm": 0.3368867039680481, "learning_rate": 1.8729450008349245e-05, "loss": 0.38342416286468506, "step": 3472, "token_acc": 0.8688888888888889 }, { "epoch": 0.1873954567528193, "grad_norm": 0.4447701871395111, "learning_rate": 1.8728597381616817e-05, "loss": 0.4531744718551636, "step": 3473, "token_acc": 0.8461012311901505 }, { "epoch": 0.18744941455781577, "grad_norm": 0.3799300193786621, "learning_rate": 1.8727744488315315e-05, "loss": 0.3697837293148041, "step": 3474, "token_acc": 0.8711596842744384 }, { "epoch": 0.18750337236281228, "grad_norm": 0.6361986398696899, "learning_rate": 1.8726891328470782e-05, "loss": 0.45358359813690186, "step": 3475, "token_acc": 0.8461736998322364 }, { "epoch": 0.18755733016780876, "grad_norm": 0.37595587968826294, "learning_rate": 1.8726037902109274e-05, "loss": 0.40464287996292114, "step": 3476, "token_acc": 0.8579678758346869 }, { "epoch": 0.18761128797280527, "grad_norm": 0.4919371008872986, "learning_rate": 1.872518420925685e-05, "loss": 0.3914754092693329, "step": 3477, "token_acc": 0.8659268692013826 }, { "epoch": 0.18766524577780175, "grad_norm": 0.4415012001991272, "learning_rate": 1.872433024993959e-05, "loss": 0.4406944215297699, "step": 3478, "token_acc": 0.8536205186339805 }, { "epoch": 0.18771920358279826, "grad_norm": 0.45309603214263916, "learning_rate": 1.8723476024183572e-05, "loss": 0.35608887672424316, "step": 3479, "token_acc": 0.8712465878070974 }, { "epoch": 0.18777316138779473, "grad_norm": 0.5565283894538879, "learning_rate": 1.872262153201488e-05, "loss": 0.538133442401886, "step": 3480, "token_acc": 0.8248630553864882 }, { "epoch": 0.18782711919279124, "grad_norm": 0.39207586646080017, "learning_rate": 1.8721766773459606e-05, "loss": 0.3562515676021576, "step": 3481, "token_acc": 0.872394417255755 }, { "epoch": 0.18788107699778772, "grad_norm": 0.4630539119243622, "learning_rate": 1.8720911748543865e-05, "loss": 0.4242056906223297, "step": 3482, "token_acc": 0.8554483837330553 }, { "epoch": 0.18793503480278423, "grad_norm": 0.39569398760795593, "learning_rate": 1.872005645729376e-05, "loss": 0.3839920163154602, "step": 3483, "token_acc": 0.8660260033917467 }, { "epoch": 0.1879889926077807, "grad_norm": 0.4063309133052826, "learning_rate": 1.871920089973542e-05, "loss": 0.4208875298500061, "step": 3484, "token_acc": 0.8548295989455846 }, { "epoch": 0.18804295041277722, "grad_norm": 0.4467357099056244, "learning_rate": 1.8718345075894965e-05, "loss": 0.45354318618774414, "step": 3485, "token_acc": 0.8475677169707021 }, { "epoch": 0.1880969082177737, "grad_norm": 0.42873573303222656, "learning_rate": 1.8717488985798535e-05, "loss": 0.4638023376464844, "step": 3486, "token_acc": 0.8444857496902106 }, { "epoch": 0.1881508660227702, "grad_norm": 0.39216771721839905, "learning_rate": 1.8716632629472274e-05, "loss": 0.47576630115509033, "step": 3487, "token_acc": 0.8385678391959799 }, { "epoch": 0.18820482382776668, "grad_norm": 0.40706944465637207, "learning_rate": 1.8715776006942342e-05, "loss": 0.43439850211143494, "step": 3488, "token_acc": 0.8496055864476917 }, { "epoch": 0.1882587816327632, "grad_norm": 0.5007027983665466, "learning_rate": 1.8714919118234888e-05, "loss": 0.416892409324646, "step": 3489, "token_acc": 0.8578415389631562 }, { "epoch": 0.18831273943775967, "grad_norm": 0.35097721219062805, "learning_rate": 1.8714061963376088e-05, "loss": 0.3866609036922455, "step": 3490, "token_acc": 0.8639531642040702 }, { "epoch": 0.18836669724275618, "grad_norm": 0.37539681792259216, "learning_rate": 1.8713204542392123e-05, "loss": 0.3695797622203827, "step": 3491, "token_acc": 0.8724981467753892 }, { "epoch": 0.18842065504775266, "grad_norm": 0.4387291967868805, "learning_rate": 1.871234685530917e-05, "loss": 0.37119579315185547, "step": 3492, "token_acc": 0.8683032686830326 }, { "epoch": 0.18847461285274916, "grad_norm": 0.3874102830886841, "learning_rate": 1.8711488902153426e-05, "loss": 0.42030882835388184, "step": 3493, "token_acc": 0.8553213192791568 }, { "epoch": 0.18852857065774564, "grad_norm": 0.35516732931137085, "learning_rate": 1.8710630682951094e-05, "loss": 0.36481767892837524, "step": 3494, "token_acc": 0.8722730042656917 }, { "epoch": 0.18858252846274215, "grad_norm": 0.46069663763046265, "learning_rate": 1.870977219772838e-05, "loss": 0.4035584330558777, "step": 3495, "token_acc": 0.861236802413273 }, { "epoch": 0.18863648626773863, "grad_norm": 0.3852735161781311, "learning_rate": 1.8708913446511513e-05, "loss": 0.4616759419441223, "step": 3496, "token_acc": 0.8470695970695971 }, { "epoch": 0.1886904440727351, "grad_norm": 0.315870463848114, "learning_rate": 1.8708054429326702e-05, "loss": 0.47565823793411255, "step": 3497, "token_acc": 0.8428442028985508 }, { "epoch": 0.18874440187773162, "grad_norm": 0.4474492073059082, "learning_rate": 1.8707195146200193e-05, "loss": 0.491049587726593, "step": 3498, "token_acc": 0.837491919844861 }, { "epoch": 0.1887983596827281, "grad_norm": 0.44643914699554443, "learning_rate": 1.8706335597158224e-05, "loss": 0.49000075459480286, "step": 3499, "token_acc": 0.8394396551724138 }, { "epoch": 0.1888523174877246, "grad_norm": 0.47994667291641235, "learning_rate": 1.870547578222705e-05, "loss": 0.44924163818359375, "step": 3500, "token_acc": 0.8496082553028855 }, { "epoch": 0.18890627529272108, "grad_norm": 0.4612659513950348, "learning_rate": 1.8704615701432925e-05, "loss": 0.4392133355140686, "step": 3501, "token_acc": 0.8507232994767621 }, { "epoch": 0.1889602330977176, "grad_norm": 0.48420971632003784, "learning_rate": 1.8703755354802114e-05, "loss": 0.4883406162261963, "step": 3502, "token_acc": 0.8342513770655984 }, { "epoch": 0.18901419090271407, "grad_norm": 0.3204919993877411, "learning_rate": 1.87028947423609e-05, "loss": 0.35974037647247314, "step": 3503, "token_acc": 0.8763868789194404 }, { "epoch": 0.18906814870771058, "grad_norm": 0.4552333354949951, "learning_rate": 1.8702033864135557e-05, "loss": 0.35699260234832764, "step": 3504, "token_acc": 0.874688382915074 }, { "epoch": 0.18912210651270706, "grad_norm": 0.40883877873420715, "learning_rate": 1.8701172720152375e-05, "loss": 0.38688042759895325, "step": 3505, "token_acc": 0.8716814159292036 }, { "epoch": 0.18917606431770356, "grad_norm": 0.4349863827228546, "learning_rate": 1.870031131043766e-05, "loss": 0.4323861598968506, "step": 3506, "token_acc": 0.8530047008004066 }, { "epoch": 0.18923002212270004, "grad_norm": 0.4617287814617157, "learning_rate": 1.869944963501772e-05, "loss": 0.46236276626586914, "step": 3507, "token_acc": 0.8400230182707524 }, { "epoch": 0.18928397992769655, "grad_norm": 0.42376309633255005, "learning_rate": 1.869858769391887e-05, "loss": 0.4305346608161926, "step": 3508, "token_acc": 0.8534667619728378 }, { "epoch": 0.18933793773269303, "grad_norm": 0.486673504114151, "learning_rate": 1.8697725487167426e-05, "loss": 0.42488113045692444, "step": 3509, "token_acc": 0.8545218145044785 }, { "epoch": 0.18939189553768954, "grad_norm": 0.322231262922287, "learning_rate": 1.8696863014789723e-05, "loss": 0.3772001564502716, "step": 3510, "token_acc": 0.8639518741323461 }, { "epoch": 0.18944585334268602, "grad_norm": 0.48187604546546936, "learning_rate": 1.86960002768121e-05, "loss": 0.43629539012908936, "step": 3511, "token_acc": 0.8550567034332763 }, { "epoch": 0.18949981114768252, "grad_norm": 0.40500354766845703, "learning_rate": 1.8695137273260914e-05, "loss": 0.35738593339920044, "step": 3512, "token_acc": 0.8705002337540907 }, { "epoch": 0.189553768952679, "grad_norm": 0.40573209524154663, "learning_rate": 1.8694274004162507e-05, "loss": 0.4398716390132904, "step": 3513, "token_acc": 0.846306917944516 }, { "epoch": 0.1896077267576755, "grad_norm": 0.48897984623908997, "learning_rate": 1.869341046954325e-05, "loss": 0.4432457387447357, "step": 3514, "token_acc": 0.8493859082094376 }, { "epoch": 0.189661684562672, "grad_norm": 0.34088343381881714, "learning_rate": 1.869254666942952e-05, "loss": 0.43040961027145386, "step": 3515, "token_acc": 0.8509522495169749 }, { "epoch": 0.1897156423676685, "grad_norm": 0.44733721017837524, "learning_rate": 1.8691682603847688e-05, "loss": 0.4322575330734253, "step": 3516, "token_acc": 0.852832674571805 }, { "epoch": 0.18976960017266498, "grad_norm": 0.42265909910202026, "learning_rate": 1.8690818272824145e-05, "loss": 0.44587254524230957, "step": 3517, "token_acc": 0.8558134191176471 }, { "epoch": 0.18982355797766146, "grad_norm": 0.48873651027679443, "learning_rate": 1.8689953676385292e-05, "loss": 0.5088940262794495, "step": 3518, "token_acc": 0.8346854431348538 }, { "epoch": 0.18987751578265796, "grad_norm": 0.41040077805519104, "learning_rate": 1.8689088814557525e-05, "loss": 0.40587514638900757, "step": 3519, "token_acc": 0.8611620795107033 }, { "epoch": 0.18993147358765444, "grad_norm": 0.4503386616706848, "learning_rate": 1.8688223687367265e-05, "loss": 0.352351576089859, "step": 3520, "token_acc": 0.8758193736343772 }, { "epoch": 0.18998543139265095, "grad_norm": 0.503811776638031, "learning_rate": 1.8687358294840932e-05, "loss": 0.39278846979141235, "step": 3521, "token_acc": 0.8601336302895323 }, { "epoch": 0.19003938919764743, "grad_norm": 0.5266345739364624, "learning_rate": 1.868649263700495e-05, "loss": 0.43615472316741943, "step": 3522, "token_acc": 0.8544841970158832 }, { "epoch": 0.19009334700264394, "grad_norm": 0.6702849864959717, "learning_rate": 1.868562671388576e-05, "loss": 0.4519382119178772, "step": 3523, "token_acc": 0.8472098413264396 }, { "epoch": 0.19014730480764042, "grad_norm": 0.3500654995441437, "learning_rate": 1.86847605255098e-05, "loss": 0.4098091125488281, "step": 3524, "token_acc": 0.8581856100104275 }, { "epoch": 0.19020126261263692, "grad_norm": 0.4669816493988037, "learning_rate": 1.8683894071903532e-05, "loss": 0.4043405055999756, "step": 3525, "token_acc": 0.8560812035526554 }, { "epoch": 0.1902552204176334, "grad_norm": 0.37711384892463684, "learning_rate": 1.8683027353093413e-05, "loss": 0.42950260639190674, "step": 3526, "token_acc": 0.8519875186541853 }, { "epoch": 0.1903091782226299, "grad_norm": 0.5440546870231628, "learning_rate": 1.8682160369105916e-05, "loss": 0.41273996233940125, "step": 3527, "token_acc": 0.8575809852405597 }, { "epoch": 0.1903631360276264, "grad_norm": 0.5053227543830872, "learning_rate": 1.8681293119967517e-05, "loss": 0.47579115629196167, "step": 3528, "token_acc": 0.8396375665180498 }, { "epoch": 0.1904170938326229, "grad_norm": 0.49399441480636597, "learning_rate": 1.86804256057047e-05, "loss": 0.5097519755363464, "step": 3529, "token_acc": 0.8313739252500438 }, { "epoch": 0.19047105163761938, "grad_norm": 0.4738190472126007, "learning_rate": 1.8679557826343956e-05, "loss": 0.45263445377349854, "step": 3530, "token_acc": 0.8426000537201181 }, { "epoch": 0.19052500944261588, "grad_norm": 0.4797190725803375, "learning_rate": 1.8678689781911788e-05, "loss": 0.43554776906967163, "step": 3531, "token_acc": 0.8484122228879568 }, { "epoch": 0.19057896724761236, "grad_norm": 0.46275123953819275, "learning_rate": 1.867782147243471e-05, "loss": 0.3878326117992401, "step": 3532, "token_acc": 0.8668812170860152 }, { "epoch": 0.19063292505260887, "grad_norm": 0.3842436969280243, "learning_rate": 1.8676952897939237e-05, "loss": 0.33964794874191284, "step": 3533, "token_acc": 0.8787772721969431 }, { "epoch": 0.19068688285760535, "grad_norm": 0.41555166244506836, "learning_rate": 1.86760840584519e-05, "loss": 0.4086739122867584, "step": 3534, "token_acc": 0.8605514912774339 }, { "epoch": 0.19074084066260186, "grad_norm": 0.4676637649536133, "learning_rate": 1.867521495399922e-05, "loss": 0.4632883071899414, "step": 3535, "token_acc": 0.8441542441788839 }, { "epoch": 0.19079479846759834, "grad_norm": 0.39833611249923706, "learning_rate": 1.8674345584607752e-05, "loss": 0.4367094337940216, "step": 3536, "token_acc": 0.8510211524434719 }, { "epoch": 0.19084875627259484, "grad_norm": 0.4996267259120941, "learning_rate": 1.8673475950304046e-05, "loss": 0.44452494382858276, "step": 3537, "token_acc": 0.8471377320230853 }, { "epoch": 0.19090271407759132, "grad_norm": 0.47510629892349243, "learning_rate": 1.867260605111465e-05, "loss": 0.38787204027175903, "step": 3538, "token_acc": 0.8656663093962129 }, { "epoch": 0.1909566718825878, "grad_norm": 0.42393457889556885, "learning_rate": 1.8671735887066145e-05, "loss": 0.4052249789237976, "step": 3539, "token_acc": 0.859359075982335 }, { "epoch": 0.1910106296875843, "grad_norm": 0.4778251349925995, "learning_rate": 1.8670865458185094e-05, "loss": 0.4235328733921051, "step": 3540, "token_acc": 0.8537559592835975 }, { "epoch": 0.1910645874925808, "grad_norm": 0.46202465891838074, "learning_rate": 1.866999476449808e-05, "loss": 0.4622996747493744, "step": 3541, "token_acc": 0.8405832921403855 }, { "epoch": 0.1911185452975773, "grad_norm": 0.45012137293815613, "learning_rate": 1.86691238060317e-05, "loss": 0.4058876633644104, "step": 3542, "token_acc": 0.8618712535835288 }, { "epoch": 0.19117250310257378, "grad_norm": 0.3731214702129364, "learning_rate": 1.866825258281255e-05, "loss": 0.4006679058074951, "step": 3543, "token_acc": 0.8594084329767149 }, { "epoch": 0.19122646090757028, "grad_norm": 0.39644569158554077, "learning_rate": 1.8667381094867238e-05, "loss": 0.3736860454082489, "step": 3544, "token_acc": 0.871928071928072 }, { "epoch": 0.19128041871256676, "grad_norm": 0.5089066624641418, "learning_rate": 1.8666509342222374e-05, "loss": 0.5259762406349182, "step": 3545, "token_acc": 0.8261755714077741 }, { "epoch": 0.19133437651756327, "grad_norm": 0.4364057779312134, "learning_rate": 1.8665637324904587e-05, "loss": 0.3750119209289551, "step": 3546, "token_acc": 0.8704193952905114 }, { "epoch": 0.19138833432255975, "grad_norm": 0.4168016314506531, "learning_rate": 1.866476504294051e-05, "loss": 0.4342251718044281, "step": 3547, "token_acc": 0.8538976014760148 }, { "epoch": 0.19144229212755626, "grad_norm": 0.3732960522174835, "learning_rate": 1.8663892496356775e-05, "loss": 0.4524889588356018, "step": 3548, "token_acc": 0.8478845337484081 }, { "epoch": 0.19149624993255274, "grad_norm": 0.4454995095729828, "learning_rate": 1.8663019685180034e-05, "loss": 0.4149710536003113, "step": 3549, "token_acc": 0.8576818593419862 }, { "epoch": 0.19155020773754924, "grad_norm": 0.4905838668346405, "learning_rate": 1.866214660943694e-05, "loss": 0.4624326229095459, "step": 3550, "token_acc": 0.8410868124585819 }, { "epoch": 0.19160416554254572, "grad_norm": 0.3344806432723999, "learning_rate": 1.8661273269154158e-05, "loss": 0.4160066246986389, "step": 3551, "token_acc": 0.8525125970311862 }, { "epoch": 0.19165812334754223, "grad_norm": 0.4823290705680847, "learning_rate": 1.8660399664358358e-05, "loss": 0.3694777190685272, "step": 3552, "token_acc": 0.8689259458490227 }, { "epoch": 0.1917120811525387, "grad_norm": 0.4375137686729431, "learning_rate": 1.8659525795076222e-05, "loss": 0.4407288134098053, "step": 3553, "token_acc": 0.8509628802679319 }, { "epoch": 0.19176603895753522, "grad_norm": 0.504543125629425, "learning_rate": 1.865865166133444e-05, "loss": 0.4142496883869171, "step": 3554, "token_acc": 0.863448275862069 }, { "epoch": 0.1918199967625317, "grad_norm": 0.38803666830062866, "learning_rate": 1.86577772631597e-05, "loss": 0.46288633346557617, "step": 3555, "token_acc": 0.8388544891640867 }, { "epoch": 0.1918739545675282, "grad_norm": 0.38277992606163025, "learning_rate": 1.865690260057871e-05, "loss": 0.4022263288497925, "step": 3556, "token_acc": 0.8628864960979946 }, { "epoch": 0.19192791237252468, "grad_norm": 0.3823951184749603, "learning_rate": 1.8656027673618184e-05, "loss": 0.42303788661956787, "step": 3557, "token_acc": 0.8590986002153514 }, { "epoch": 0.1919818701775212, "grad_norm": 0.5193372368812561, "learning_rate": 1.8655152482304842e-05, "loss": 0.4227094054222107, "step": 3558, "token_acc": 0.8550628722700199 }, { "epoch": 0.19203582798251767, "grad_norm": 0.5047735571861267, "learning_rate": 1.865427702666541e-05, "loss": 0.4483051598072052, "step": 3559, "token_acc": 0.8495425279566249 }, { "epoch": 0.19208978578751415, "grad_norm": 0.532084047794342, "learning_rate": 1.8653401306726625e-05, "loss": 0.4990692138671875, "step": 3560, "token_acc": 0.8387096774193549 }, { "epoch": 0.19214374359251066, "grad_norm": 0.4467169940471649, "learning_rate": 1.8652525322515228e-05, "loss": 0.39755114912986755, "step": 3561, "token_acc": 0.8658831106102077 }, { "epoch": 0.19219770139750714, "grad_norm": 0.3429766595363617, "learning_rate": 1.8651649074057975e-05, "loss": 0.3810734748840332, "step": 3562, "token_acc": 0.8703845732442205 }, { "epoch": 0.19225165920250364, "grad_norm": 0.34358668327331543, "learning_rate": 1.8650772561381627e-05, "loss": 0.42855527997016907, "step": 3563, "token_acc": 0.8541260558804419 }, { "epoch": 0.19230561700750012, "grad_norm": 0.407745897769928, "learning_rate": 1.864989578451295e-05, "loss": 0.41650575399398804, "step": 3564, "token_acc": 0.8579142240049368 }, { "epoch": 0.19235957481249663, "grad_norm": 0.4251258075237274, "learning_rate": 1.8649018743478727e-05, "loss": 0.43521004915237427, "step": 3565, "token_acc": 0.8505673027333677 }, { "epoch": 0.1924135326174931, "grad_norm": 0.36936479806900024, "learning_rate": 1.864814143830573e-05, "loss": 0.3904186487197876, "step": 3566, "token_acc": 0.8626849009210159 }, { "epoch": 0.19246749042248962, "grad_norm": 0.41852498054504395, "learning_rate": 1.8647263869020763e-05, "loss": 0.3954131007194519, "step": 3567, "token_acc": 0.8623595505617978 }, { "epoch": 0.1925214482274861, "grad_norm": 0.37967541813850403, "learning_rate": 1.8646386035650624e-05, "loss": 0.39642927050590515, "step": 3568, "token_acc": 0.8664580931994073 }, { "epoch": 0.1925754060324826, "grad_norm": 0.46026837825775146, "learning_rate": 1.8645507938222118e-05, "loss": 0.4371034502983093, "step": 3569, "token_acc": 0.8521638106302643 }, { "epoch": 0.19262936383747908, "grad_norm": 0.44365137815475464, "learning_rate": 1.8644629576762064e-05, "loss": 0.5103979110717773, "step": 3570, "token_acc": 0.8375576036866359 }, { "epoch": 0.1926833216424756, "grad_norm": 0.5789327621459961, "learning_rate": 1.8643750951297285e-05, "loss": 0.4419517517089844, "step": 3571, "token_acc": 0.846906675987417 }, { "epoch": 0.19273727944747207, "grad_norm": 0.4270215928554535, "learning_rate": 1.8642872061854622e-05, "loss": 0.46520817279815674, "step": 3572, "token_acc": 0.8457190128222805 }, { "epoch": 0.19279123725246858, "grad_norm": 0.36681994795799255, "learning_rate": 1.8641992908460905e-05, "loss": 0.35860675573349, "step": 3573, "token_acc": 0.8746875 }, { "epoch": 0.19284519505746506, "grad_norm": 0.41845837235450745, "learning_rate": 1.8641113491142994e-05, "loss": 0.4154130220413208, "step": 3574, "token_acc": 0.8621083343904605 }, { "epoch": 0.19289915286246156, "grad_norm": 0.399835467338562, "learning_rate": 1.8640233809927736e-05, "loss": 0.4434032440185547, "step": 3575, "token_acc": 0.8510739102969046 }, { "epoch": 0.19295311066745804, "grad_norm": 0.2512519359588623, "learning_rate": 1.8639353864842006e-05, "loss": 0.43797045946121216, "step": 3576, "token_acc": 0.852342640795078 }, { "epoch": 0.19300706847245455, "grad_norm": 0.4777609407901764, "learning_rate": 1.8638473655912665e-05, "loss": 0.4424689710140228, "step": 3577, "token_acc": 0.8492801439712058 }, { "epoch": 0.19306102627745103, "grad_norm": 0.5986273288726807, "learning_rate": 1.8637593183166606e-05, "loss": 0.4597805440425873, "step": 3578, "token_acc": 0.8443254101104788 }, { "epoch": 0.19311498408244754, "grad_norm": 0.43255817890167236, "learning_rate": 1.8636712446630712e-05, "loss": 0.4554691016674042, "step": 3579, "token_acc": 0.846856528747985 }, { "epoch": 0.19316894188744402, "grad_norm": 0.5409525036811829, "learning_rate": 1.8635831446331886e-05, "loss": 0.41809988021850586, "step": 3580, "token_acc": 0.8571665285832643 }, { "epoch": 0.19322289969244053, "grad_norm": 0.48923179507255554, "learning_rate": 1.863495018229703e-05, "loss": 0.32500582933425903, "step": 3581, "token_acc": 0.8867897927534188 }, { "epoch": 0.193276857497437, "grad_norm": 0.3079930245876312, "learning_rate": 1.8634068654553052e-05, "loss": 0.39118897914886475, "step": 3582, "token_acc": 0.8651382333108564 }, { "epoch": 0.19333081530243348, "grad_norm": 0.39043909311294556, "learning_rate": 1.8633186863126883e-05, "loss": 0.3971441686153412, "step": 3583, "token_acc": 0.8649223004383052 }, { "epoch": 0.19338477310743, "grad_norm": 0.46718695759773254, "learning_rate": 1.8632304808045446e-05, "loss": 0.4447179436683655, "step": 3584, "token_acc": 0.8442211055276382 }, { "epoch": 0.19343873091242647, "grad_norm": 0.46630799770355225, "learning_rate": 1.8631422489335683e-05, "loss": 0.41803184151649475, "step": 3585, "token_acc": 0.8566377938836839 }, { "epoch": 0.19349268871742298, "grad_norm": 0.4850969910621643, "learning_rate": 1.863053990702454e-05, "loss": 0.4147147536277771, "step": 3586, "token_acc": 0.8586473588941912 }, { "epoch": 0.19354664652241946, "grad_norm": 0.4313125014305115, "learning_rate": 1.8629657061138968e-05, "loss": 0.37848323583602905, "step": 3587, "token_acc": 0.8707155742633794 }, { "epoch": 0.19360060432741596, "grad_norm": 0.5929298996925354, "learning_rate": 1.862877395170593e-05, "loss": 0.5077915191650391, "step": 3588, "token_acc": 0.8354320451584889 }, { "epoch": 0.19365456213241244, "grad_norm": 0.4714655578136444, "learning_rate": 1.8627890578752396e-05, "loss": 0.43556728959083557, "step": 3589, "token_acc": 0.8490415335463258 }, { "epoch": 0.19370851993740895, "grad_norm": 0.42066314816474915, "learning_rate": 1.8627006942305342e-05, "loss": 0.42546993494033813, "step": 3590, "token_acc": 0.8576137112722478 }, { "epoch": 0.19376247774240543, "grad_norm": 0.42705991864204407, "learning_rate": 1.862612304239176e-05, "loss": 0.3863673210144043, "step": 3591, "token_acc": 0.8654636412363766 }, { "epoch": 0.19381643554740194, "grad_norm": 0.46329188346862793, "learning_rate": 1.862523887903864e-05, "loss": 0.4361191391944885, "step": 3592, "token_acc": 0.8545410975317518 }, { "epoch": 0.19387039335239842, "grad_norm": 0.47306278347969055, "learning_rate": 1.862435445227298e-05, "loss": 0.37338024377822876, "step": 3593, "token_acc": 0.87225718659636 }, { "epoch": 0.19392435115739493, "grad_norm": 0.4918678402900696, "learning_rate": 1.8623469762121795e-05, "loss": 0.44226935505867004, "step": 3594, "token_acc": 0.8502247353921996 }, { "epoch": 0.1939783089623914, "grad_norm": 0.48753923177719116, "learning_rate": 1.8622584808612102e-05, "loss": 0.39767754077911377, "step": 3595, "token_acc": 0.8638015484278717 }, { "epoch": 0.1940322667673879, "grad_norm": 0.442680686712265, "learning_rate": 1.862169959177093e-05, "loss": 0.49164098501205444, "step": 3596, "token_acc": 0.8373231773667029 }, { "epoch": 0.1940862245723844, "grad_norm": 0.3449092209339142, "learning_rate": 1.862081411162531e-05, "loss": 0.39864087104797363, "step": 3597, "token_acc": 0.8624751819986763 }, { "epoch": 0.1941401823773809, "grad_norm": 0.43544894456863403, "learning_rate": 1.8619928368202286e-05, "loss": 0.438681423664093, "step": 3598, "token_acc": 0.8552798176753609 }, { "epoch": 0.19419414018237738, "grad_norm": 0.3632051944732666, "learning_rate": 1.8619042361528913e-05, "loss": 0.3898521065711975, "step": 3599, "token_acc": 0.8682931767564285 }, { "epoch": 0.19424809798737389, "grad_norm": 0.5316213965415955, "learning_rate": 1.8618156091632235e-05, "loss": 0.3861997723579407, "step": 3600, "token_acc": 0.8665867785100859 }, { "epoch": 0.19430205579237037, "grad_norm": 0.4459529221057892, "learning_rate": 1.861726955853933e-05, "loss": 0.40804803371429443, "step": 3601, "token_acc": 0.8582028029678483 }, { "epoch": 0.19435601359736687, "grad_norm": 0.5406402349472046, "learning_rate": 1.8616382762277273e-05, "loss": 0.43806520104408264, "step": 3602, "token_acc": 0.8513454545454545 }, { "epoch": 0.19440997140236335, "grad_norm": 0.4146791100502014, "learning_rate": 1.861549570287314e-05, "loss": 0.41466259956359863, "step": 3603, "token_acc": 0.8552399075710208 }, { "epoch": 0.19446392920735983, "grad_norm": 0.415284126996994, "learning_rate": 1.861460838035403e-05, "loss": 0.45730501413345337, "step": 3604, "token_acc": 0.8442897654034545 }, { "epoch": 0.19451788701235634, "grad_norm": 0.2849695086479187, "learning_rate": 1.8613720794747036e-05, "loss": 0.37327077984809875, "step": 3605, "token_acc": 0.873660922217047 }, { "epoch": 0.19457184481735282, "grad_norm": 0.5503785014152527, "learning_rate": 1.861283294607926e-05, "loss": 0.4249820411205292, "step": 3606, "token_acc": 0.8520508687085796 }, { "epoch": 0.19462580262234933, "grad_norm": 0.43242156505584717, "learning_rate": 1.8611944834377825e-05, "loss": 0.5166841745376587, "step": 3607, "token_acc": 0.8285981059090303 }, { "epoch": 0.1946797604273458, "grad_norm": 0.44025200605392456, "learning_rate": 1.8611056459669854e-05, "loss": 0.4577323794364929, "step": 3608, "token_acc": 0.8494793726110452 }, { "epoch": 0.1947337182323423, "grad_norm": 0.3883156180381775, "learning_rate": 1.861016782198247e-05, "loss": 0.3652905523777008, "step": 3609, "token_acc": 0.8689898544331716 }, { "epoch": 0.1947876760373388, "grad_norm": 0.47448134422302246, "learning_rate": 1.860927892134282e-05, "loss": 0.5024015307426453, "step": 3610, "token_acc": 0.832367460828504 }, { "epoch": 0.1948416338423353, "grad_norm": 0.402444988489151, "learning_rate": 1.8608389757778046e-05, "loss": 0.46006184816360474, "step": 3611, "token_acc": 0.8467432950191571 }, { "epoch": 0.19489559164733178, "grad_norm": 0.4909261167049408, "learning_rate": 1.8607500331315302e-05, "loss": 0.40714943408966064, "step": 3612, "token_acc": 0.8566056245912361 }, { "epoch": 0.19494954945232829, "grad_norm": 0.389728307723999, "learning_rate": 1.8606610641981754e-05, "loss": 0.38014477491378784, "step": 3613, "token_acc": 0.8670049563370309 }, { "epoch": 0.19500350725732477, "grad_norm": 0.38090601563453674, "learning_rate": 1.8605720689804572e-05, "loss": 0.4322531521320343, "step": 3614, "token_acc": 0.8529443083803642 }, { "epoch": 0.19505746506232127, "grad_norm": 0.44051095843315125, "learning_rate": 1.8604830474810933e-05, "loss": 0.4671824276447296, "step": 3615, "token_acc": 0.844005956813105 }, { "epoch": 0.19511142286731775, "grad_norm": 0.4015144407749176, "learning_rate": 1.8603939997028025e-05, "loss": 0.45596498250961304, "step": 3616, "token_acc": 0.8463702116260381 }, { "epoch": 0.19516538067231426, "grad_norm": 0.4745039641857147, "learning_rate": 1.8603049256483045e-05, "loss": 0.4026654362678528, "step": 3617, "token_acc": 0.8638706980632577 }, { "epoch": 0.19521933847731074, "grad_norm": 0.4536495506763458, "learning_rate": 1.8602158253203194e-05, "loss": 0.41397520899772644, "step": 3618, "token_acc": 0.8571952113364281 }, { "epoch": 0.19527329628230725, "grad_norm": 0.4192812144756317, "learning_rate": 1.8601266987215685e-05, "loss": 0.41916054487228394, "step": 3619, "token_acc": 0.8559837728194726 }, { "epoch": 0.19532725408730373, "grad_norm": 0.615163266658783, "learning_rate": 1.8600375458547738e-05, "loss": 0.3797133266925812, "step": 3620, "token_acc": 0.8666666666666667 }, { "epoch": 0.19538121189230023, "grad_norm": 0.35709425806999207, "learning_rate": 1.859948366722657e-05, "loss": 0.44887685775756836, "step": 3621, "token_acc": 0.8478118876551274 }, { "epoch": 0.1954351696972967, "grad_norm": 0.406461238861084, "learning_rate": 1.859859161327943e-05, "loss": 0.42863327264785767, "step": 3622, "token_acc": 0.853325046612803 }, { "epoch": 0.19548912750229322, "grad_norm": 0.39285168051719666, "learning_rate": 1.8597699296733548e-05, "loss": 0.4442133903503418, "step": 3623, "token_acc": 0.8499000999000998 }, { "epoch": 0.1955430853072897, "grad_norm": 0.4272175431251526, "learning_rate": 1.8596806717616187e-05, "loss": 0.3811684250831604, "step": 3624, "token_acc": 0.8680457923277767 }, { "epoch": 0.19559704311228618, "grad_norm": 0.3921755850315094, "learning_rate": 1.8595913875954597e-05, "loss": 0.3896600604057312, "step": 3625, "token_acc": 0.8636195638782802 }, { "epoch": 0.19565100091728269, "grad_norm": 0.5043006539344788, "learning_rate": 1.8595020771776054e-05, "loss": 0.4643990397453308, "step": 3626, "token_acc": 0.8432615747567089 }, { "epoch": 0.19570495872227917, "grad_norm": 0.35755085945129395, "learning_rate": 1.8594127405107825e-05, "loss": 0.4587949514389038, "step": 3627, "token_acc": 0.847431871137676 }, { "epoch": 0.19575891652727567, "grad_norm": 0.44119569659233093, "learning_rate": 1.8593233775977197e-05, "loss": 0.3838232755661011, "step": 3628, "token_acc": 0.8653909061171082 }, { "epoch": 0.19581287433227215, "grad_norm": 0.35416311025619507, "learning_rate": 1.8592339884411462e-05, "loss": 0.44826382398605347, "step": 3629, "token_acc": 0.8443437914273089 }, { "epoch": 0.19586683213726866, "grad_norm": 0.40088632702827454, "learning_rate": 1.8591445730437916e-05, "loss": 0.3796234726905823, "step": 3630, "token_acc": 0.8649263324433897 }, { "epoch": 0.19592078994226514, "grad_norm": 0.42866483330726624, "learning_rate": 1.859055131408387e-05, "loss": 0.41635662317276, "step": 3631, "token_acc": 0.8549506271684014 }, { "epoch": 0.19597474774726165, "grad_norm": 0.4435775578022003, "learning_rate": 1.8589656635376634e-05, "loss": 0.4195117950439453, "step": 3632, "token_acc": 0.8583250988142292 }, { "epoch": 0.19602870555225813, "grad_norm": 0.4316646158695221, "learning_rate": 1.8588761694343536e-05, "loss": 0.41558191180229187, "step": 3633, "token_acc": 0.8553253955309085 }, { "epoch": 0.19608266335725463, "grad_norm": 0.36239439249038696, "learning_rate": 1.8587866491011907e-05, "loss": 0.4083610773086548, "step": 3634, "token_acc": 0.8579814120332627 }, { "epoch": 0.1961366211622511, "grad_norm": 0.4535978436470032, "learning_rate": 1.8586971025409085e-05, "loss": 0.4250469207763672, "step": 3635, "token_acc": 0.8593730485824903 }, { "epoch": 0.19619057896724762, "grad_norm": 0.45459839701652527, "learning_rate": 1.8586075297562417e-05, "loss": 0.41811585426330566, "step": 3636, "token_acc": 0.85768779342723 }, { "epoch": 0.1962445367722441, "grad_norm": 0.5363936424255371, "learning_rate": 1.8585179307499257e-05, "loss": 0.39468634128570557, "step": 3637, "token_acc": 0.8617710583153347 }, { "epoch": 0.1962984945772406, "grad_norm": 0.43351230025291443, "learning_rate": 1.8584283055246973e-05, "loss": 0.4407302141189575, "step": 3638, "token_acc": 0.855731775180857 }, { "epoch": 0.19635245238223709, "grad_norm": 0.49371251463890076, "learning_rate": 1.8583386540832933e-05, "loss": 0.42147576808929443, "step": 3639, "token_acc": 0.8544969392560038 }, { "epoch": 0.1964064101872336, "grad_norm": 0.4208751618862152, "learning_rate": 1.8582489764284513e-05, "loss": 0.4408813714981079, "step": 3640, "token_acc": 0.8484508621898219 }, { "epoch": 0.19646036799223007, "grad_norm": 0.42051777243614197, "learning_rate": 1.8581592725629107e-05, "loss": 0.42710673809051514, "step": 3641, "token_acc": 0.8488908095649669 }, { "epoch": 0.19651432579722658, "grad_norm": 0.43519386649131775, "learning_rate": 1.8580695424894108e-05, "loss": 0.4368189573287964, "step": 3642, "token_acc": 0.8544007490636704 }, { "epoch": 0.19656828360222306, "grad_norm": 0.43984562158584595, "learning_rate": 1.8579797862106915e-05, "loss": 0.4437694251537323, "step": 3643, "token_acc": 0.8472521551724138 }, { "epoch": 0.19662224140721957, "grad_norm": 0.42433831095695496, "learning_rate": 1.8578900037294945e-05, "loss": 0.4121500253677368, "step": 3644, "token_acc": 0.854955680902498 }, { "epoch": 0.19667619921221605, "grad_norm": 0.42427879571914673, "learning_rate": 1.8578001950485614e-05, "loss": 0.48691022396087646, "step": 3645, "token_acc": 0.8374066530889341 }, { "epoch": 0.19673015701721255, "grad_norm": 0.47764068841934204, "learning_rate": 1.857710360170635e-05, "loss": 0.405322402715683, "step": 3646, "token_acc": 0.8598870056497175 }, { "epoch": 0.19678411482220903, "grad_norm": 0.4203750789165497, "learning_rate": 1.8576204990984595e-05, "loss": 0.4312458634376526, "step": 3647, "token_acc": 0.8574139976275208 }, { "epoch": 0.1968380726272055, "grad_norm": 0.3915880024433136, "learning_rate": 1.8575306118347777e-05, "loss": 0.38530224561691284, "step": 3648, "token_acc": 0.8712226226781259 }, { "epoch": 0.19689203043220202, "grad_norm": 0.4860322177410126, "learning_rate": 1.8574406983823367e-05, "loss": 0.4271514415740967, "step": 3649, "token_acc": 0.855252409736971 }, { "epoch": 0.1969459882371985, "grad_norm": 0.4671199917793274, "learning_rate": 1.8573507587438806e-05, "loss": 0.38220787048339844, "step": 3650, "token_acc": 0.8695341526150329 }, { "epoch": 0.196999946042195, "grad_norm": 0.3192707300186157, "learning_rate": 1.8572607929221572e-05, "loss": 0.4018336832523346, "step": 3651, "token_acc": 0.863944782405241 }, { "epoch": 0.19705390384719149, "grad_norm": 0.3878091871738434, "learning_rate": 1.8571708009199136e-05, "loss": 0.41977906227111816, "step": 3652, "token_acc": 0.854257873181754 }, { "epoch": 0.197107861652188, "grad_norm": 0.3933209776878357, "learning_rate": 1.8570807827398982e-05, "loss": 0.43115103244781494, "step": 3653, "token_acc": 0.8553876397591842 }, { "epoch": 0.19716181945718447, "grad_norm": 0.5574842691421509, "learning_rate": 1.8569907383848605e-05, "loss": 0.48445039987564087, "step": 3654, "token_acc": 0.8424291497975709 }, { "epoch": 0.19721577726218098, "grad_norm": 0.36997485160827637, "learning_rate": 1.85690066785755e-05, "loss": 0.4159776568412781, "step": 3655, "token_acc": 0.8577813896789923 }, { "epoch": 0.19726973506717746, "grad_norm": 0.47463712096214294, "learning_rate": 1.8568105711607178e-05, "loss": 0.4186927378177643, "step": 3656, "token_acc": 0.8509821296706542 }, { "epoch": 0.19732369287217397, "grad_norm": 0.3937109410762787, "learning_rate": 1.8567204482971148e-05, "loss": 0.42980092763900757, "step": 3657, "token_acc": 0.8508722566122678 }, { "epoch": 0.19737765067717045, "grad_norm": 0.4446794390678406, "learning_rate": 1.856630299269494e-05, "loss": 0.3794001340866089, "step": 3658, "token_acc": 0.8699504430094609 }, { "epoch": 0.19743160848216695, "grad_norm": 0.39287933707237244, "learning_rate": 1.8565401240806086e-05, "loss": 0.4227449893951416, "step": 3659, "token_acc": 0.8542931483087598 }, { "epoch": 0.19748556628716343, "grad_norm": 0.3772543966770172, "learning_rate": 1.856449922733212e-05, "loss": 0.40616869926452637, "step": 3660, "token_acc": 0.8596047794117647 }, { "epoch": 0.19753952409215994, "grad_norm": 0.41795140504837036, "learning_rate": 1.8563596952300588e-05, "loss": 0.43194517493247986, "step": 3661, "token_acc": 0.8498614958448754 }, { "epoch": 0.19759348189715642, "grad_norm": 0.518215000629425, "learning_rate": 1.856269441573905e-05, "loss": 0.42335599660873413, "step": 3662, "token_acc": 0.8569553805774278 }, { "epoch": 0.19764743970215293, "grad_norm": 0.4027385711669922, "learning_rate": 1.8561791617675068e-05, "loss": 0.3449746370315552, "step": 3663, "token_acc": 0.8722419928825623 }, { "epoch": 0.1977013975071494, "grad_norm": 0.434267520904541, "learning_rate": 1.856088855813621e-05, "loss": 0.4334339499473572, "step": 3664, "token_acc": 0.8485462172718068 }, { "epoch": 0.19775535531214591, "grad_norm": 0.47467100620269775, "learning_rate": 1.8559985237150063e-05, "loss": 0.42010506987571716, "step": 3665, "token_acc": 0.8556930693069307 }, { "epoch": 0.1978093131171424, "grad_norm": 0.436420738697052, "learning_rate": 1.8559081654744204e-05, "loss": 0.44756990671157837, "step": 3666, "token_acc": 0.8525950699863744 }, { "epoch": 0.1978632709221389, "grad_norm": 0.4561963975429535, "learning_rate": 1.855817781094624e-05, "loss": 0.4208434820175171, "step": 3667, "token_acc": 0.8563903173209583 }, { "epoch": 0.19791722872713538, "grad_norm": 0.4090895652770996, "learning_rate": 1.855727370578376e-05, "loss": 0.38452598452568054, "step": 3668, "token_acc": 0.8650802082226707 }, { "epoch": 0.19797118653213186, "grad_norm": 0.5015734434127808, "learning_rate": 1.855636933928439e-05, "loss": 0.42930686473846436, "step": 3669, "token_acc": 0.8532853285328533 }, { "epoch": 0.19802514433712837, "grad_norm": 0.5084209442138672, "learning_rate": 1.8555464711475736e-05, "loss": 0.4154827892780304, "step": 3670, "token_acc": 0.8594110115236876 }, { "epoch": 0.19807910214212485, "grad_norm": 0.4698235094547272, "learning_rate": 1.8554559822385435e-05, "loss": 0.41125351190567017, "step": 3671, "token_acc": 0.8608582574772432 }, { "epoch": 0.19813305994712135, "grad_norm": 0.3826805353164673, "learning_rate": 1.8553654672041116e-05, "loss": 0.4107828736305237, "step": 3672, "token_acc": 0.8601369578750778 }, { "epoch": 0.19818701775211783, "grad_norm": 0.4273127317428589, "learning_rate": 1.8552749260470424e-05, "loss": 0.4172426164150238, "step": 3673, "token_acc": 0.8499480017827961 }, { "epoch": 0.19824097555711434, "grad_norm": 0.37594616413116455, "learning_rate": 1.8551843587701012e-05, "loss": 0.4777785539627075, "step": 3674, "token_acc": 0.8369005379420854 }, { "epoch": 0.19829493336211082, "grad_norm": 0.44058024883270264, "learning_rate": 1.8550937653760534e-05, "loss": 0.40880852937698364, "step": 3675, "token_acc": 0.8575438596491228 }, { "epoch": 0.19834889116710733, "grad_norm": 0.5054468512535095, "learning_rate": 1.8550031458676664e-05, "loss": 0.4461382031440735, "step": 3676, "token_acc": 0.853519598535196 }, { "epoch": 0.1984028489721038, "grad_norm": 0.4606744945049286, "learning_rate": 1.854912500247707e-05, "loss": 0.43914732336997986, "step": 3677, "token_acc": 0.8495399858457183 }, { "epoch": 0.19845680677710031, "grad_norm": 0.40861570835113525, "learning_rate": 1.854821828518944e-05, "loss": 0.4021736681461334, "step": 3678, "token_acc": 0.8638514743356389 }, { "epoch": 0.1985107645820968, "grad_norm": 0.3226676881313324, "learning_rate": 1.854731130684146e-05, "loss": 0.37763428688049316, "step": 3679, "token_acc": 0.8677410391775493 }, { "epoch": 0.1985647223870933, "grad_norm": 0.45499011874198914, "learning_rate": 1.8546404067460835e-05, "loss": 0.3981095254421234, "step": 3680, "token_acc": 0.8549510056730273 }, { "epoch": 0.19861868019208978, "grad_norm": 0.5581316351890564, "learning_rate": 1.8545496567075266e-05, "loss": 0.37943872809410095, "step": 3681, "token_acc": 0.8700353045013239 }, { "epoch": 0.1986726379970863, "grad_norm": 0.3351121246814728, "learning_rate": 1.8544588805712474e-05, "loss": 0.4132722318172455, "step": 3682, "token_acc": 0.8597333973820104 }, { "epoch": 0.19872659580208277, "grad_norm": 0.4640573263168335, "learning_rate": 1.854368078340017e-05, "loss": 0.3950428068637848, "step": 3683, "token_acc": 0.8632779498732751 }, { "epoch": 0.19878055360707927, "grad_norm": 0.4890476167201996, "learning_rate": 1.8542772500166103e-05, "loss": 0.4236840009689331, "step": 3684, "token_acc": 0.856020007145409 }, { "epoch": 0.19883451141207575, "grad_norm": 0.468388170003891, "learning_rate": 1.8541863956038e-05, "loss": 0.41296496987342834, "step": 3685, "token_acc": 0.8564849205170109 }, { "epoch": 0.19888846921707226, "grad_norm": 0.5119103193283081, "learning_rate": 1.8540955151043607e-05, "loss": 0.4063704013824463, "step": 3686, "token_acc": 0.8607212916855289 }, { "epoch": 0.19894242702206874, "grad_norm": 0.5124491453170776, "learning_rate": 1.8540046085210682e-05, "loss": 0.4095068573951721, "step": 3687, "token_acc": 0.8612214863870493 }, { "epoch": 0.19899638482706525, "grad_norm": 0.44170770049095154, "learning_rate": 1.853913675856698e-05, "loss": 0.45773178339004517, "step": 3688, "token_acc": 0.8484616562069494 }, { "epoch": 0.19905034263206173, "grad_norm": 0.4150424003601074, "learning_rate": 1.853822717114029e-05, "loss": 0.40122777223587036, "step": 3689, "token_acc": 0.8586313714446635 }, { "epoch": 0.1991043004370582, "grad_norm": 0.36469054222106934, "learning_rate": 1.853731732295837e-05, "loss": 0.3289691209793091, "step": 3690, "token_acc": 0.8842059838895282 }, { "epoch": 0.19915825824205471, "grad_norm": 0.4817710816860199, "learning_rate": 1.853640721404902e-05, "loss": 0.46072959899902344, "step": 3691, "token_acc": 0.8449718754287282 }, { "epoch": 0.1992122160470512, "grad_norm": 0.3765263557434082, "learning_rate": 1.8535496844440032e-05, "loss": 0.4551156461238861, "step": 3692, "token_acc": 0.8481308411214953 }, { "epoch": 0.1992661738520477, "grad_norm": 0.4786602258682251, "learning_rate": 1.85345862141592e-05, "loss": 0.37053146958351135, "step": 3693, "token_acc": 0.8685222486724044 }, { "epoch": 0.19932013165704418, "grad_norm": 0.4741894602775574, "learning_rate": 1.8533675323234342e-05, "loss": 0.41369327902793884, "step": 3694, "token_acc": 0.8626013724267 }, { "epoch": 0.1993740894620407, "grad_norm": 0.38190028071403503, "learning_rate": 1.8532764171693278e-05, "loss": 0.4234132766723633, "step": 3695, "token_acc": 0.8555506672532629 }, { "epoch": 0.19942804726703717, "grad_norm": 0.4669104814529419, "learning_rate": 1.8531852759563832e-05, "loss": 0.44860559701919556, "step": 3696, "token_acc": 0.8435330419130882 }, { "epoch": 0.19948200507203367, "grad_norm": 0.5233108401298523, "learning_rate": 1.8530941086873838e-05, "loss": 0.41120025515556335, "step": 3697, "token_acc": 0.8592726766058242 }, { "epoch": 0.19953596287703015, "grad_norm": 0.44421422481536865, "learning_rate": 1.8530029153651133e-05, "loss": 0.386809766292572, "step": 3698, "token_acc": 0.8651657897956687 }, { "epoch": 0.19958992068202666, "grad_norm": 0.49828973412513733, "learning_rate": 1.8529116959923576e-05, "loss": 0.34954357147216797, "step": 3699, "token_acc": 0.8718001228752816 }, { "epoch": 0.19964387848702314, "grad_norm": 0.549451470375061, "learning_rate": 1.852820450571902e-05, "loss": 0.41658616065979004, "step": 3700, "token_acc": 0.8543707262982813 }, { "epoch": 0.19969783629201965, "grad_norm": 0.43121081590652466, "learning_rate": 1.8527291791065333e-05, "loss": 0.3903627395629883, "step": 3701, "token_acc": 0.8600729261622607 }, { "epoch": 0.19975179409701613, "grad_norm": 0.5177670121192932, "learning_rate": 1.8526378815990388e-05, "loss": 0.4298953115940094, "step": 3702, "token_acc": 0.8496764559482329 }, { "epoch": 0.19980575190201263, "grad_norm": 0.33055055141448975, "learning_rate": 1.8525465580522072e-05, "loss": 0.45351940393447876, "step": 3703, "token_acc": 0.8437031051598753 }, { "epoch": 0.19985970970700911, "grad_norm": 0.4013242721557617, "learning_rate": 1.8524552084688266e-05, "loss": 0.41669490933418274, "step": 3704, "token_acc": 0.8535919540229885 }, { "epoch": 0.19991366751200562, "grad_norm": 0.4595080018043518, "learning_rate": 1.8523638328516874e-05, "loss": 0.46859318017959595, "step": 3705, "token_acc": 0.8469604863221885 }, { "epoch": 0.1999676253170021, "grad_norm": 0.4165586233139038, "learning_rate": 1.8522724312035798e-05, "loss": 0.45216506719589233, "step": 3706, "token_acc": 0.8465868090633569 }, { "epoch": 0.2000215831219986, "grad_norm": 0.40240299701690674, "learning_rate": 1.8521810035272956e-05, "loss": 0.44572916626930237, "step": 3707, "token_acc": 0.8437954275142064 }, { "epoch": 0.2000755409269951, "grad_norm": 0.5566118955612183, "learning_rate": 1.852089549825627e-05, "loss": 0.42032110691070557, "step": 3708, "token_acc": 0.8500543281419776 }, { "epoch": 0.2001294987319916, "grad_norm": 0.37720975279808044, "learning_rate": 1.8519980701013663e-05, "loss": 0.3388141691684723, "step": 3709, "token_acc": 0.8811317853298478 }, { "epoch": 0.20018345653698807, "grad_norm": 0.4652339220046997, "learning_rate": 1.851906564357308e-05, "loss": 0.4093690514564514, "step": 3710, "token_acc": 0.8554083885209713 }, { "epoch": 0.20023741434198455, "grad_norm": 0.39209839701652527, "learning_rate": 1.8518150325962464e-05, "loss": 0.43878263235092163, "step": 3711, "token_acc": 0.8538860103626943 }, { "epoch": 0.20029137214698106, "grad_norm": 0.42141976952552795, "learning_rate": 1.851723474820977e-05, "loss": 0.37632501125335693, "step": 3712, "token_acc": 0.8728448275862069 }, { "epoch": 0.20034532995197754, "grad_norm": 0.30376291275024414, "learning_rate": 1.8516318910342955e-05, "loss": 0.32383236289024353, "step": 3713, "token_acc": 0.882345769324555 }, { "epoch": 0.20039928775697405, "grad_norm": 0.37981483340263367, "learning_rate": 1.8515402812389992e-05, "loss": 0.43267831206321716, "step": 3714, "token_acc": 0.8533996683250414 }, { "epoch": 0.20045324556197053, "grad_norm": 0.3110583424568176, "learning_rate": 1.851448645437886e-05, "loss": 0.3643684983253479, "step": 3715, "token_acc": 0.8725922086896433 }, { "epoch": 0.20050720336696704, "grad_norm": 0.4485093355178833, "learning_rate": 1.851356983633754e-05, "loss": 0.39170706272125244, "step": 3716, "token_acc": 0.8608388867110937 }, { "epoch": 0.20056116117196351, "grad_norm": 0.3659255802631378, "learning_rate": 1.8512652958294027e-05, "loss": 0.4432642459869385, "step": 3717, "token_acc": 0.8508342742441349 }, { "epoch": 0.20061511897696002, "grad_norm": 0.3852287828922272, "learning_rate": 1.8511735820276325e-05, "loss": 0.4569152891635895, "step": 3718, "token_acc": 0.8428124638016912 }, { "epoch": 0.2006690767819565, "grad_norm": 0.46807676553726196, "learning_rate": 1.8510818422312438e-05, "loss": 0.452452152967453, "step": 3719, "token_acc": 0.8508857581252616 }, { "epoch": 0.200723034586953, "grad_norm": 0.45055434107780457, "learning_rate": 1.850990076443039e-05, "loss": 0.4097192585468292, "step": 3720, "token_acc": 0.8598695721329728 }, { "epoch": 0.2007769923919495, "grad_norm": 0.4304478168487549, "learning_rate": 1.85089828466582e-05, "loss": 0.4405742287635803, "step": 3721, "token_acc": 0.8556576766611421 }, { "epoch": 0.200830950196946, "grad_norm": 0.4477534890174866, "learning_rate": 1.85080646690239e-05, "loss": 0.4367666244506836, "step": 3722, "token_acc": 0.8543959687044158 }, { "epoch": 0.20088490800194247, "grad_norm": 0.35703638195991516, "learning_rate": 1.8507146231555535e-05, "loss": 0.3886452913284302, "step": 3723, "token_acc": 0.8637705111557077 }, { "epoch": 0.20093886580693898, "grad_norm": 0.429915189743042, "learning_rate": 1.8506227534281156e-05, "loss": 0.429433673620224, "step": 3724, "token_acc": 0.855253490080823 }, { "epoch": 0.20099282361193546, "grad_norm": 0.4198862910270691, "learning_rate": 1.8505308577228812e-05, "loss": 0.4117172360420227, "step": 3725, "token_acc": 0.8601116625310173 }, { "epoch": 0.20104678141693197, "grad_norm": 0.4412955343723297, "learning_rate": 1.8504389360426575e-05, "loss": 0.3688974976539612, "step": 3726, "token_acc": 0.8706011730205279 }, { "epoch": 0.20110073922192845, "grad_norm": 0.4644286632537842, "learning_rate": 1.8503469883902515e-05, "loss": 0.37277454137802124, "step": 3727, "token_acc": 0.8692704495210022 }, { "epoch": 0.20115469702692496, "grad_norm": 0.44281288981437683, "learning_rate": 1.8502550147684712e-05, "loss": 0.3783216178417206, "step": 3728, "token_acc": 0.870324514531474 }, { "epoch": 0.20120865483192144, "grad_norm": 0.4853478670120239, "learning_rate": 1.8501630151801255e-05, "loss": 0.4942762553691864, "step": 3729, "token_acc": 0.8370307167235495 }, { "epoch": 0.20126261263691794, "grad_norm": 0.42682066559791565, "learning_rate": 1.8500709896280238e-05, "loss": 0.47574907541275024, "step": 3730, "token_acc": 0.8397982932505819 }, { "epoch": 0.20131657044191442, "grad_norm": 0.428741455078125, "learning_rate": 1.849978938114977e-05, "loss": 0.47458821535110474, "step": 3731, "token_acc": 0.8423681504333774 }, { "epoch": 0.20137052824691093, "grad_norm": 0.4092988669872284, "learning_rate": 1.849886860643796e-05, "loss": 0.4252282977104187, "step": 3732, "token_acc": 0.8579682739783591 }, { "epoch": 0.2014244860519074, "grad_norm": 0.44358348846435547, "learning_rate": 1.849794757217293e-05, "loss": 0.4291452169418335, "step": 3733, "token_acc": 0.8579953885799538 }, { "epoch": 0.2014784438569039, "grad_norm": 0.41002288460731506, "learning_rate": 1.849702627838281e-05, "loss": 0.4330114722251892, "step": 3734, "token_acc": 0.8564006926868256 }, { "epoch": 0.2015324016619004, "grad_norm": 0.408873587846756, "learning_rate": 1.8496104725095726e-05, "loss": 0.4609299898147583, "step": 3735, "token_acc": 0.8431731902081424 }, { "epoch": 0.20158635946689688, "grad_norm": 0.4180372953414917, "learning_rate": 1.8495182912339834e-05, "loss": 0.43568742275238037, "step": 3736, "token_acc": 0.8526112185686654 }, { "epoch": 0.20164031727189338, "grad_norm": 0.5064629912376404, "learning_rate": 1.849426084014328e-05, "loss": 0.49666568636894226, "step": 3737, "token_acc": 0.832446379616191 }, { "epoch": 0.20169427507688986, "grad_norm": 0.46477919816970825, "learning_rate": 1.8493338508534226e-05, "loss": 0.3933773636817932, "step": 3738, "token_acc": 0.8676167582417582 }, { "epoch": 0.20174823288188637, "grad_norm": 0.3952394127845764, "learning_rate": 1.849241591754084e-05, "loss": 0.4033104479312897, "step": 3739, "token_acc": 0.8625832223701732 }, { "epoch": 0.20180219068688285, "grad_norm": 0.3353475332260132, "learning_rate": 1.8491493067191297e-05, "loss": 0.364208459854126, "step": 3740, "token_acc": 0.8718083016891449 }, { "epoch": 0.20185614849187936, "grad_norm": 0.5406894683837891, "learning_rate": 1.8490569957513776e-05, "loss": 0.4907342791557312, "step": 3741, "token_acc": 0.8314765416601286 }, { "epoch": 0.20191010629687584, "grad_norm": 0.46321719884872437, "learning_rate": 1.8489646588536475e-05, "loss": 0.47840994596481323, "step": 3742, "token_acc": 0.8384801480005692 }, { "epoch": 0.20196406410187234, "grad_norm": 0.44381949305534363, "learning_rate": 1.8488722960287593e-05, "loss": 0.42478686571121216, "step": 3743, "token_acc": 0.8576487252124646 }, { "epoch": 0.20201802190686882, "grad_norm": 0.34471774101257324, "learning_rate": 1.8487799072795333e-05, "loss": 0.46942150592803955, "step": 3744, "token_acc": 0.8430071009570855 }, { "epoch": 0.20207197971186533, "grad_norm": 0.4451018273830414, "learning_rate": 1.8486874926087918e-05, "loss": 0.44637054204940796, "step": 3745, "token_acc": 0.84654106141234 }, { "epoch": 0.2021259375168618, "grad_norm": 0.4430844485759735, "learning_rate": 1.8485950520193557e-05, "loss": 0.3959980309009552, "step": 3746, "token_acc": 0.8597979068928184 }, { "epoch": 0.20217989532185832, "grad_norm": 0.4323963224887848, "learning_rate": 1.8485025855140498e-05, "loss": 0.3918601870536804, "step": 3747, "token_acc": 0.8660604019310655 }, { "epoch": 0.2022338531268548, "grad_norm": 0.42637068033218384, "learning_rate": 1.8484100930956966e-05, "loss": 0.3980879783630371, "step": 3748, "token_acc": 0.8618475805492807 }, { "epoch": 0.2022878109318513, "grad_norm": 0.33505335450172424, "learning_rate": 1.8483175747671217e-05, "loss": 0.37081053853034973, "step": 3749, "token_acc": 0.8664967870596055 }, { "epoch": 0.20234176873684778, "grad_norm": 0.46343329548835754, "learning_rate": 1.8482250305311505e-05, "loss": 0.41576123237609863, "step": 3750, "token_acc": 0.8553110653791357 }, { "epoch": 0.2023957265418443, "grad_norm": 0.5681836605072021, "learning_rate": 1.848132460390609e-05, "loss": 0.4495561718940735, "step": 3751, "token_acc": 0.8483486079889331 }, { "epoch": 0.20244968434684077, "grad_norm": 0.4935106337070465, "learning_rate": 1.848039864348324e-05, "loss": 0.3868701756000519, "step": 3752, "token_acc": 0.8653846153846154 }, { "epoch": 0.20250364215183728, "grad_norm": 0.3882846534252167, "learning_rate": 1.847947242407124e-05, "loss": 0.40499573945999146, "step": 3753, "token_acc": 0.8645233749824512 }, { "epoch": 0.20255759995683376, "grad_norm": 0.48834890127182007, "learning_rate": 1.8478545945698373e-05, "loss": 0.4351157546043396, "step": 3754, "token_acc": 0.8534634458486642 }, { "epoch": 0.20261155776183024, "grad_norm": 0.39052513241767883, "learning_rate": 1.8477619208392937e-05, "loss": 0.3980550169944763, "step": 3755, "token_acc": 0.8645242596093258 }, { "epoch": 0.20266551556682674, "grad_norm": 0.5531231164932251, "learning_rate": 1.8476692212183224e-05, "loss": 0.4469790458679199, "step": 3756, "token_acc": 0.8471054718477399 }, { "epoch": 0.20271947337182322, "grad_norm": 0.5773835778236389, "learning_rate": 1.847576495709756e-05, "loss": 0.3985598385334015, "step": 3757, "token_acc": 0.861190423896332 }, { "epoch": 0.20277343117681973, "grad_norm": 0.4928325116634369, "learning_rate": 1.8474837443164246e-05, "loss": 0.4225764274597168, "step": 3758, "token_acc": 0.8523832297146501 }, { "epoch": 0.2028273889818162, "grad_norm": 0.46584099531173706, "learning_rate": 1.8473909670411623e-05, "loss": 0.3904995322227478, "step": 3759, "token_acc": 0.8598081952920663 }, { "epoch": 0.20288134678681272, "grad_norm": 0.4029273986816406, "learning_rate": 1.8472981638868016e-05, "loss": 0.436276912689209, "step": 3760, "token_acc": 0.8484719260838663 }, { "epoch": 0.2029353045918092, "grad_norm": 0.44334807991981506, "learning_rate": 1.847205334856177e-05, "loss": 0.42971715331077576, "step": 3761, "token_acc": 0.8545480755369756 }, { "epoch": 0.2029892623968057, "grad_norm": 0.42753124237060547, "learning_rate": 1.8471124799521232e-05, "loss": 0.42756709456443787, "step": 3762, "token_acc": 0.8549424509140149 }, { "epoch": 0.20304322020180218, "grad_norm": 0.37073269486427307, "learning_rate": 1.8470195991774762e-05, "loss": 0.3643565773963928, "step": 3763, "token_acc": 0.8720912775859481 }, { "epoch": 0.2030971780067987, "grad_norm": 0.4322650134563446, "learning_rate": 1.8469266925350728e-05, "loss": 0.4697653651237488, "step": 3764, "token_acc": 0.8440225035161744 }, { "epoch": 0.20315113581179517, "grad_norm": 0.3978573977947235, "learning_rate": 1.84683376002775e-05, "loss": 0.3960466980934143, "step": 3765, "token_acc": 0.8575963718820862 }, { "epoch": 0.20320509361679168, "grad_norm": 0.4086661636829376, "learning_rate": 1.8467408016583456e-05, "loss": 0.4378581643104553, "step": 3766, "token_acc": 0.8520068480121742 }, { "epoch": 0.20325905142178816, "grad_norm": 0.38385009765625, "learning_rate": 1.8466478174296995e-05, "loss": 0.40327590703964233, "step": 3767, "token_acc": 0.8586363636363636 }, { "epoch": 0.20331300922678466, "grad_norm": 0.44281575083732605, "learning_rate": 1.8465548073446507e-05, "loss": 0.3904381990432739, "step": 3768, "token_acc": 0.86309684380901 }, { "epoch": 0.20336696703178114, "grad_norm": 0.44044962525367737, "learning_rate": 1.84646177140604e-05, "loss": 0.3782637119293213, "step": 3769, "token_acc": 0.8650827933765298 }, { "epoch": 0.20342092483677765, "grad_norm": 0.42638835310935974, "learning_rate": 1.846368709616708e-05, "loss": 0.43560677766799927, "step": 3770, "token_acc": 0.8555951320866726 }, { "epoch": 0.20347488264177413, "grad_norm": 0.4576450288295746, "learning_rate": 1.846275621979498e-05, "loss": 0.4194134473800659, "step": 3771, "token_acc": 0.853602383531961 }, { "epoch": 0.20352884044677064, "grad_norm": 0.5332369804382324, "learning_rate": 1.8461825084972514e-05, "loss": 0.5091894268989563, "step": 3772, "token_acc": 0.8357305071915215 }, { "epoch": 0.20358279825176712, "grad_norm": 0.47912371158599854, "learning_rate": 1.8460893691728134e-05, "loss": 0.4034026861190796, "step": 3773, "token_acc": 0.8558943845295649 }, { "epoch": 0.20363675605676362, "grad_norm": 0.47647833824157715, "learning_rate": 1.845996204009027e-05, "loss": 0.4460008442401886, "step": 3774, "token_acc": 0.8446069469835467 }, { "epoch": 0.2036907138617601, "grad_norm": 0.49088767170906067, "learning_rate": 1.8459030130087387e-05, "loss": 0.5081210732460022, "step": 3775, "token_acc": 0.8342079370285339 }, { "epoch": 0.20374467166675658, "grad_norm": 0.395702600479126, "learning_rate": 1.8458097961747938e-05, "loss": 0.40201109647750854, "step": 3776, "token_acc": 0.8622375954198473 }, { "epoch": 0.2037986294717531, "grad_norm": 0.3275816738605499, "learning_rate": 1.8457165535100394e-05, "loss": 0.41396018862724304, "step": 3777, "token_acc": 0.8615166261151662 }, { "epoch": 0.20385258727674957, "grad_norm": 0.49336978793144226, "learning_rate": 1.845623285017323e-05, "loss": 0.4217669367790222, "step": 3778, "token_acc": 0.8559102674719585 }, { "epoch": 0.20390654508174608, "grad_norm": 0.45061764121055603, "learning_rate": 1.845529990699493e-05, "loss": 0.4379328787326813, "step": 3779, "token_acc": 0.8501291989664083 }, { "epoch": 0.20396050288674256, "grad_norm": 0.40892231464385986, "learning_rate": 1.8454366705593984e-05, "loss": 0.46107643842697144, "step": 3780, "token_acc": 0.8422908731838059 }, { "epoch": 0.20401446069173906, "grad_norm": 0.5060403347015381, "learning_rate": 1.8453433245998895e-05, "loss": 0.4073014557361603, "step": 3781, "token_acc": 0.8597076580870671 }, { "epoch": 0.20406841849673554, "grad_norm": 0.41560766100883484, "learning_rate": 1.845249952823817e-05, "loss": 0.3909544050693512, "step": 3782, "token_acc": 0.8672496025437202 }, { "epoch": 0.20412237630173205, "grad_norm": 0.36954665184020996, "learning_rate": 1.845156555234032e-05, "loss": 0.4213908910751343, "step": 3783, "token_acc": 0.8582241630276565 }, { "epoch": 0.20417633410672853, "grad_norm": 0.4803847670555115, "learning_rate": 1.8450631318333874e-05, "loss": 0.36912912130355835, "step": 3784, "token_acc": 0.8708677685950413 }, { "epoch": 0.20423029191172504, "grad_norm": 0.4886991083621979, "learning_rate": 1.844969682624736e-05, "loss": 0.4194037914276123, "step": 3785, "token_acc": 0.8488864347505982 }, { "epoch": 0.20428424971672152, "grad_norm": 0.42963624000549316, "learning_rate": 1.8448762076109327e-05, "loss": 0.4224188029766083, "step": 3786, "token_acc": 0.8585611139762764 }, { "epoch": 0.20433820752171802, "grad_norm": 0.34161892533302307, "learning_rate": 1.8447827067948306e-05, "loss": 0.3679034113883972, "step": 3787, "token_acc": 0.8727659042619672 }, { "epoch": 0.2043921653267145, "grad_norm": 0.35345888137817383, "learning_rate": 1.844689180179286e-05, "loss": 0.45418646931648254, "step": 3788, "token_acc": 0.8503147586850082 }, { "epoch": 0.204446123131711, "grad_norm": 0.404325008392334, "learning_rate": 1.844595627767155e-05, "loss": 0.40976694226264954, "step": 3789, "token_acc": 0.8583732057416268 }, { "epoch": 0.2045000809367075, "grad_norm": 0.5303424596786499, "learning_rate": 1.8445020495612952e-05, "loss": 0.38270509243011475, "step": 3790, "token_acc": 0.8637758232767437 }, { "epoch": 0.204554038741704, "grad_norm": 0.3558744192123413, "learning_rate": 1.8444084455645638e-05, "loss": 0.4548954963684082, "step": 3791, "token_acc": 0.8428663653970303 }, { "epoch": 0.20460799654670048, "grad_norm": 0.44157153367996216, "learning_rate": 1.8443148157798194e-05, "loss": 0.42171019315719604, "step": 3792, "token_acc": 0.8519014910370246 }, { "epoch": 0.20466195435169698, "grad_norm": 0.3791407644748688, "learning_rate": 1.844221160209922e-05, "loss": 0.4109572768211365, "step": 3793, "token_acc": 0.8632872503840245 }, { "epoch": 0.20471591215669346, "grad_norm": 0.524894118309021, "learning_rate": 1.8441274788577314e-05, "loss": 0.3855549097061157, "step": 3794, "token_acc": 0.8668910076649841 }, { "epoch": 0.20476986996168997, "grad_norm": 0.3229164779186249, "learning_rate": 1.844033771726109e-05, "loss": 0.4606269896030426, "step": 3795, "token_acc": 0.847992700729927 }, { "epoch": 0.20482382776668645, "grad_norm": 0.4166024923324585, "learning_rate": 1.8439400388179162e-05, "loss": 0.43014055490493774, "step": 3796, "token_acc": 0.8531849103277674 }, { "epoch": 0.20487778557168296, "grad_norm": 0.3677985370159149, "learning_rate": 1.8438462801360154e-05, "loss": 0.4424731433391571, "step": 3797, "token_acc": 0.851902682470368 }, { "epoch": 0.20493174337667944, "grad_norm": 0.48853710293769836, "learning_rate": 1.8437524956832703e-05, "loss": 0.4172332286834717, "step": 3798, "token_acc": 0.8557837511325883 }, { "epoch": 0.20498570118167592, "grad_norm": 0.4226495325565338, "learning_rate": 1.8436586854625453e-05, "loss": 0.4425273537635803, "step": 3799, "token_acc": 0.8487885133113969 }, { "epoch": 0.20503965898667242, "grad_norm": 0.42182421684265137, "learning_rate": 1.843564849476705e-05, "loss": 0.4133675694465637, "step": 3800, "token_acc": 0.8587693141772839 }, { "epoch": 0.2050936167916689, "grad_norm": 0.41404786705970764, "learning_rate": 1.843470987728615e-05, "loss": 0.40113043785095215, "step": 3801, "token_acc": 0.8659368963296845 }, { "epoch": 0.2051475745966654, "grad_norm": 0.3806251287460327, "learning_rate": 1.843377100221142e-05, "loss": 0.3906305432319641, "step": 3802, "token_acc": 0.86578208726716 }, { "epoch": 0.2052015324016619, "grad_norm": 0.5912054181098938, "learning_rate": 1.8432831869571534e-05, "loss": 0.43643903732299805, "step": 3803, "token_acc": 0.8543689320388349 }, { "epoch": 0.2052554902066584, "grad_norm": 0.3973846435546875, "learning_rate": 1.8431892479395173e-05, "loss": 0.44009292125701904, "step": 3804, "token_acc": 0.8478405315614618 }, { "epoch": 0.20530944801165488, "grad_norm": 0.4610911011695862, "learning_rate": 1.8430952831711025e-05, "loss": 0.41408830881118774, "step": 3805, "token_acc": 0.8574275640042925 }, { "epoch": 0.20536340581665138, "grad_norm": 0.483884334564209, "learning_rate": 1.8430012926547783e-05, "loss": 0.4338398277759552, "step": 3806, "token_acc": 0.8512334536702768 }, { "epoch": 0.20541736362164786, "grad_norm": 0.38089096546173096, "learning_rate": 1.8429072763934156e-05, "loss": 0.4137125611305237, "step": 3807, "token_acc": 0.8667391895567038 }, { "epoch": 0.20547132142664437, "grad_norm": 0.45139896869659424, "learning_rate": 1.8428132343898858e-05, "loss": 0.4549321234226227, "step": 3808, "token_acc": 0.8446745562130178 }, { "epoch": 0.20552527923164085, "grad_norm": 0.35521039366722107, "learning_rate": 1.84271916664706e-05, "loss": 0.44991806149482727, "step": 3809, "token_acc": 0.8489747634069401 }, { "epoch": 0.20557923703663736, "grad_norm": 0.451561838388443, "learning_rate": 1.842625073167812e-05, "loss": 0.45788970589637756, "step": 3810, "token_acc": 0.8415727088753975 }, { "epoch": 0.20563319484163384, "grad_norm": 0.3538282811641693, "learning_rate": 1.842530953955015e-05, "loss": 0.4160671830177307, "step": 3811, "token_acc": 0.8593272171253823 }, { "epoch": 0.20568715264663034, "grad_norm": 0.5737684369087219, "learning_rate": 1.8424368090115434e-05, "loss": 0.42658504843711853, "step": 3812, "token_acc": 0.8511507052709726 }, { "epoch": 0.20574111045162682, "grad_norm": 0.46377891302108765, "learning_rate": 1.8423426383402723e-05, "loss": 0.4137172996997833, "step": 3813, "token_acc": 0.858242815894404 }, { "epoch": 0.20579506825662333, "grad_norm": 0.4325103163719177, "learning_rate": 1.8422484419440776e-05, "loss": 0.37087374925613403, "step": 3814, "token_acc": 0.8688900891169322 }, { "epoch": 0.2058490260616198, "grad_norm": 0.40092363953590393, "learning_rate": 1.8421542198258363e-05, "loss": 0.4040747880935669, "step": 3815, "token_acc": 0.8573279924599434 }, { "epoch": 0.20590298386661632, "grad_norm": 0.5027853846549988, "learning_rate": 1.8420599719884254e-05, "loss": 0.38826873898506165, "step": 3816, "token_acc": 0.8630764823832713 }, { "epoch": 0.2059569416716128, "grad_norm": 0.46122288703918457, "learning_rate": 1.8419656984347236e-05, "loss": 0.44997677206993103, "step": 3817, "token_acc": 0.8495341398970936 }, { "epoch": 0.2060108994766093, "grad_norm": 0.4382401704788208, "learning_rate": 1.84187139916761e-05, "loss": 0.38386160135269165, "step": 3818, "token_acc": 0.8639736191261336 }, { "epoch": 0.20606485728160578, "grad_norm": 0.5275210738182068, "learning_rate": 1.8417770741899648e-05, "loss": 0.419184148311615, "step": 3819, "token_acc": 0.8558282208588958 }, { "epoch": 0.20611881508660226, "grad_norm": 0.4422931373119354, "learning_rate": 1.841682723504668e-05, "loss": 0.40614891052246094, "step": 3820, "token_acc": 0.8596134282807731 }, { "epoch": 0.20617277289159877, "grad_norm": 0.4536811411380768, "learning_rate": 1.841588347114601e-05, "loss": 0.4433903098106384, "step": 3821, "token_acc": 0.8439855290925535 }, { "epoch": 0.20622673069659525, "grad_norm": 0.44121673703193665, "learning_rate": 1.8414939450226467e-05, "loss": 0.3959607481956482, "step": 3822, "token_acc": 0.8631360078277887 }, { "epoch": 0.20628068850159176, "grad_norm": 0.503661036491394, "learning_rate": 1.841399517231688e-05, "loss": 0.4126180410385132, "step": 3823, "token_acc": 0.8529874213836478 }, { "epoch": 0.20633464630658824, "grad_norm": 0.3950260281562805, "learning_rate": 1.841305063744608e-05, "loss": 0.4280208349227905, "step": 3824, "token_acc": 0.8542458808618505 }, { "epoch": 0.20638860411158474, "grad_norm": 0.4413492679595947, "learning_rate": 1.841210584564292e-05, "loss": 0.3870498239994049, "step": 3825, "token_acc": 0.8665091553455405 }, { "epoch": 0.20644256191658122, "grad_norm": 0.5641450881958008, "learning_rate": 1.8411160796936246e-05, "loss": 0.4177800714969635, "step": 3826, "token_acc": 0.8542565712043939 }, { "epoch": 0.20649651972157773, "grad_norm": 0.50039142370224, "learning_rate": 1.841021549135493e-05, "loss": 0.40737903118133545, "step": 3827, "token_acc": 0.858751846381093 }, { "epoch": 0.2065504775265742, "grad_norm": 0.4731719195842743, "learning_rate": 1.8409269928927835e-05, "loss": 0.3795812726020813, "step": 3828, "token_acc": 0.8683817427385893 }, { "epoch": 0.20660443533157072, "grad_norm": 0.4394840598106384, "learning_rate": 1.8408324109683838e-05, "loss": 0.4345824718475342, "step": 3829, "token_acc": 0.8507776868202549 }, { "epoch": 0.2066583931365672, "grad_norm": 0.37623369693756104, "learning_rate": 1.840737803365183e-05, "loss": 0.4350890517234802, "step": 3830, "token_acc": 0.8460348561410745 }, { "epoch": 0.2067123509415637, "grad_norm": 0.3942546844482422, "learning_rate": 1.8406431700860694e-05, "loss": 0.4204024076461792, "step": 3831, "token_acc": 0.8547873077456006 }, { "epoch": 0.20676630874656018, "grad_norm": 0.43717026710510254, "learning_rate": 1.8405485111339338e-05, "loss": 0.4523083567619324, "step": 3832, "token_acc": 0.8494123724654526 }, { "epoch": 0.2068202665515567, "grad_norm": 0.4611395001411438, "learning_rate": 1.8404538265116668e-05, "loss": 0.4836949110031128, "step": 3833, "token_acc": 0.8374676609344087 }, { "epoch": 0.20687422435655317, "grad_norm": 0.41196301579475403, "learning_rate": 1.8403591162221602e-05, "loss": 0.4166983366012573, "step": 3834, "token_acc": 0.8578254132231405 }, { "epoch": 0.20692818216154968, "grad_norm": 0.4391477108001709, "learning_rate": 1.840264380268306e-05, "loss": 0.4315876364707947, "step": 3835, "token_acc": 0.8489795918367347 }, { "epoch": 0.20698213996654616, "grad_norm": 0.3344557285308838, "learning_rate": 1.8401696186529983e-05, "loss": 0.45714879035949707, "step": 3836, "token_acc": 0.847009534816527 }, { "epoch": 0.20703609777154267, "grad_norm": 0.2950139045715332, "learning_rate": 1.84007483137913e-05, "loss": 0.369291752576828, "step": 3837, "token_acc": 0.8687434371718586 }, { "epoch": 0.20709005557653914, "grad_norm": 0.44927915930747986, "learning_rate": 1.839980018449597e-05, "loss": 0.4129580855369568, "step": 3838, "token_acc": 0.8574912891986063 }, { "epoch": 0.20714401338153565, "grad_norm": 0.4527749717235565, "learning_rate": 1.839885179867294e-05, "loss": 0.3988440930843353, "step": 3839, "token_acc": 0.8639160839160839 }, { "epoch": 0.20719797118653213, "grad_norm": 0.48657265305519104, "learning_rate": 1.8397903156351176e-05, "loss": 0.4628196954727173, "step": 3840, "token_acc": 0.847924135538337 }, { "epoch": 0.2072519289915286, "grad_norm": 0.3700147569179535, "learning_rate": 1.8396954257559647e-05, "loss": 0.4118008017539978, "step": 3841, "token_acc": 0.8552498482702812 }, { "epoch": 0.20730588679652512, "grad_norm": 0.47318223118782043, "learning_rate": 1.8396005102327338e-05, "loss": 0.39037325978279114, "step": 3842, "token_acc": 0.8614354662930095 }, { "epoch": 0.2073598446015216, "grad_norm": 0.400684118270874, "learning_rate": 1.839505569068323e-05, "loss": 0.4047275185585022, "step": 3843, "token_acc": 0.8558222752348292 }, { "epoch": 0.2074138024065181, "grad_norm": 0.4005488455295563, "learning_rate": 1.8394106022656326e-05, "loss": 0.3722267746925354, "step": 3844, "token_acc": 0.8735280623948616 }, { "epoch": 0.20746776021151458, "grad_norm": 0.43915024399757385, "learning_rate": 1.839315609827562e-05, "loss": 0.39848989248275757, "step": 3845, "token_acc": 0.8648007590132827 }, { "epoch": 0.2075217180165111, "grad_norm": 0.44280746579170227, "learning_rate": 1.8392205917570124e-05, "loss": 0.4393000602722168, "step": 3846, "token_acc": 0.8440219142765066 }, { "epoch": 0.20757567582150757, "grad_norm": 0.4237792193889618, "learning_rate": 1.839125548056886e-05, "loss": 0.41284671425819397, "step": 3847, "token_acc": 0.8586516026034631 }, { "epoch": 0.20762963362650408, "grad_norm": 0.500539243221283, "learning_rate": 1.8390304787300848e-05, "loss": 0.37311482429504395, "step": 3848, "token_acc": 0.8690909090909091 }, { "epoch": 0.20768359143150056, "grad_norm": 0.39695146679878235, "learning_rate": 1.8389353837795127e-05, "loss": 0.3557473421096802, "step": 3849, "token_acc": 0.8766533864541832 }, { "epoch": 0.20773754923649707, "grad_norm": 0.4465285837650299, "learning_rate": 1.838840263208074e-05, "loss": 0.4229225218296051, "step": 3850, "token_acc": 0.8582883577486508 }, { "epoch": 0.20779150704149355, "grad_norm": 0.4926846921443939, "learning_rate": 1.8387451170186733e-05, "loss": 0.42990976572036743, "step": 3851, "token_acc": 0.8609152041987863 }, { "epoch": 0.20784546484649005, "grad_norm": 0.3893192708492279, "learning_rate": 1.8386499452142164e-05, "loss": 0.3916095495223999, "step": 3852, "token_acc": 0.8635830618892508 }, { "epoch": 0.20789942265148653, "grad_norm": 0.5137760043144226, "learning_rate": 1.83855474779761e-05, "loss": 0.42340177297592163, "step": 3853, "token_acc": 0.85701411835386 }, { "epoch": 0.20795338045648304, "grad_norm": 0.4626105725765228, "learning_rate": 1.838459524771761e-05, "loss": 0.3913595676422119, "step": 3854, "token_acc": 0.8617870352345727 }, { "epoch": 0.20800733826147952, "grad_norm": 0.4018542766571045, "learning_rate": 1.8383642761395778e-05, "loss": 0.4070535898208618, "step": 3855, "token_acc": 0.8575774336283186 }, { "epoch": 0.20806129606647603, "grad_norm": 0.4757767617702484, "learning_rate": 1.8382690019039697e-05, "loss": 0.4344814121723175, "step": 3856, "token_acc": 0.8523386161577117 }, { "epoch": 0.2081152538714725, "grad_norm": 0.5268154740333557, "learning_rate": 1.838173702067845e-05, "loss": 0.46113884449005127, "step": 3857, "token_acc": 0.8494291470785762 }, { "epoch": 0.208169211676469, "grad_norm": 0.5591811537742615, "learning_rate": 1.838078376634116e-05, "loss": 0.46860992908477783, "step": 3858, "token_acc": 0.8432982917214192 }, { "epoch": 0.2082231694814655, "grad_norm": 0.41314056515693665, "learning_rate": 1.8379830256056923e-05, "loss": 0.47523242235183716, "step": 3859, "token_acc": 0.8362668849774867 }, { "epoch": 0.208277127286462, "grad_norm": 0.40235742926597595, "learning_rate": 1.8378876489854866e-05, "loss": 0.4554019570350647, "step": 3860, "token_acc": 0.8463141025641026 }, { "epoch": 0.20833108509145848, "grad_norm": 0.35547542572021484, "learning_rate": 1.837792246776412e-05, "loss": 0.43739062547683716, "step": 3861, "token_acc": 0.8516113221418944 }, { "epoch": 0.20838504289645496, "grad_norm": 0.28269317746162415, "learning_rate": 1.837696818981381e-05, "loss": 0.39398419857025146, "step": 3862, "token_acc": 0.8591256072172103 }, { "epoch": 0.20843900070145147, "grad_norm": 0.49038130044937134, "learning_rate": 1.837601365603309e-05, "loss": 0.4712362289428711, "step": 3863, "token_acc": 0.8431061806656102 }, { "epoch": 0.20849295850644795, "grad_norm": 0.471757709980011, "learning_rate": 1.8375058866451104e-05, "loss": 0.4433193802833557, "step": 3864, "token_acc": 0.8514421730607673 }, { "epoch": 0.20854691631144445, "grad_norm": 0.36413875222206116, "learning_rate": 1.837410382109702e-05, "loss": 0.42968976497650146, "step": 3865, "token_acc": 0.85415337501595 }, { "epoch": 0.20860087411644093, "grad_norm": 0.4345724284648895, "learning_rate": 1.8373148519999994e-05, "loss": 0.3775785565376282, "step": 3866, "token_acc": 0.8670605612998523 }, { "epoch": 0.20865483192143744, "grad_norm": 0.4785427451133728, "learning_rate": 1.837219296318921e-05, "loss": 0.4021070897579193, "step": 3867, "token_acc": 0.8604400677027235 }, { "epoch": 0.20870878972643392, "grad_norm": 0.4571473300457001, "learning_rate": 1.8371237150693842e-05, "loss": 0.3913080394268036, "step": 3868, "token_acc": 0.8644560791157649 }, { "epoch": 0.20876274753143043, "grad_norm": 0.3161587417125702, "learning_rate": 1.8370281082543084e-05, "loss": 0.42316460609436035, "step": 3869, "token_acc": 0.8570868457165262 }, { "epoch": 0.2088167053364269, "grad_norm": 0.3755764067173004, "learning_rate": 1.8369324758766137e-05, "loss": 0.4130823612213135, "step": 3870, "token_acc": 0.857579088799316 }, { "epoch": 0.2088706631414234, "grad_norm": 0.3984343111515045, "learning_rate": 1.8368368179392204e-05, "loss": 0.3918590843677521, "step": 3871, "token_acc": 0.8686844326540456 }, { "epoch": 0.2089246209464199, "grad_norm": 0.44775521755218506, "learning_rate": 1.8367411344450498e-05, "loss": 0.38094672560691833, "step": 3872, "token_acc": 0.8642370845014017 }, { "epoch": 0.2089785787514164, "grad_norm": 0.40654614567756653, "learning_rate": 1.8366454253970244e-05, "loss": 0.47690528631210327, "step": 3873, "token_acc": 0.8368572174367006 }, { "epoch": 0.20903253655641288, "grad_norm": 0.49125567078590393, "learning_rate": 1.8365496907980666e-05, "loss": 0.4576396346092224, "step": 3874, "token_acc": 0.8490217792543374 }, { "epoch": 0.2090864943614094, "grad_norm": 0.4356318712234497, "learning_rate": 1.8364539306511005e-05, "loss": 0.46198779344558716, "step": 3875, "token_acc": 0.8417089288458584 }, { "epoch": 0.20914045216640587, "grad_norm": 0.4720188081264496, "learning_rate": 1.8363581449590505e-05, "loss": 0.46923622488975525, "step": 3876, "token_acc": 0.8431070678796361 }, { "epoch": 0.20919440997140237, "grad_norm": 0.4370291531085968, "learning_rate": 1.8362623337248417e-05, "loss": 0.45152848958969116, "step": 3877, "token_acc": 0.8481353901452748 }, { "epoch": 0.20924836777639885, "grad_norm": 0.4022461473941803, "learning_rate": 1.8361664969514005e-05, "loss": 0.3830094039440155, "step": 3878, "token_acc": 0.86888720666162 }, { "epoch": 0.20930232558139536, "grad_norm": 0.40738019347190857, "learning_rate": 1.8360706346416535e-05, "loss": 0.38016390800476074, "step": 3879, "token_acc": 0.8637836086815679 }, { "epoch": 0.20935628338639184, "grad_norm": 0.5127236247062683, "learning_rate": 1.835974746798528e-05, "loss": 0.4379579424858093, "step": 3880, "token_acc": 0.849524097295664 }, { "epoch": 0.20941024119138835, "grad_norm": 0.44920673966407776, "learning_rate": 1.835878833424953e-05, "loss": 0.39116406440734863, "step": 3881, "token_acc": 0.8623890784982935 }, { "epoch": 0.20946419899638483, "grad_norm": 0.3772052526473999, "learning_rate": 1.8357828945238576e-05, "loss": 0.4371964633464813, "step": 3882, "token_acc": 0.8533811475409836 }, { "epoch": 0.20951815680138133, "grad_norm": 0.47407856583595276, "learning_rate": 1.8356869300981715e-05, "loss": 0.4277961254119873, "step": 3883, "token_acc": 0.8555071561916615 }, { "epoch": 0.2095721146063778, "grad_norm": 0.4254858195781708, "learning_rate": 1.8355909401508253e-05, "loss": 0.4266932010650635, "step": 3884, "token_acc": 0.8537435586541376 }, { "epoch": 0.2096260724113743, "grad_norm": 0.4262836277484894, "learning_rate": 1.8354949246847505e-05, "loss": 0.3682267963886261, "step": 3885, "token_acc": 0.8724299065420561 }, { "epoch": 0.2096800302163708, "grad_norm": 0.44797906279563904, "learning_rate": 1.8353988837028798e-05, "loss": 0.3952181935310364, "step": 3886, "token_acc": 0.8655544651619235 }, { "epoch": 0.20973398802136728, "grad_norm": 0.49956047534942627, "learning_rate": 1.835302817208146e-05, "loss": 0.4340037703514099, "step": 3887, "token_acc": 0.8482522275531186 }, { "epoch": 0.2097879458263638, "grad_norm": 0.4180818498134613, "learning_rate": 1.835206725203483e-05, "loss": 0.4278780221939087, "step": 3888, "token_acc": 0.8550448735382105 }, { "epoch": 0.20984190363136027, "grad_norm": 0.47607678174972534, "learning_rate": 1.8351106076918253e-05, "loss": 0.3960038721561432, "step": 3889, "token_acc": 0.8620317002881844 }, { "epoch": 0.20989586143635677, "grad_norm": 0.451585590839386, "learning_rate": 1.8350144646761086e-05, "loss": 0.4505164325237274, "step": 3890, "token_acc": 0.8501790458588425 }, { "epoch": 0.20994981924135325, "grad_norm": 0.47130057215690613, "learning_rate": 1.8349182961592688e-05, "loss": 0.41332489252090454, "step": 3891, "token_acc": 0.8618094475580464 }, { "epoch": 0.21000377704634976, "grad_norm": 0.39930084347724915, "learning_rate": 1.8348221021442428e-05, "loss": 0.43698710203170776, "step": 3892, "token_acc": 0.8523815772208372 }, { "epoch": 0.21005773485134624, "grad_norm": 0.45557475090026855, "learning_rate": 1.834725882633969e-05, "loss": 0.4421054720878601, "step": 3893, "token_acc": 0.85425782564793 }, { "epoch": 0.21011169265634275, "grad_norm": 0.4485514163970947, "learning_rate": 1.8346296376313848e-05, "loss": 0.35525673627853394, "step": 3894, "token_acc": 0.8719753629564452 }, { "epoch": 0.21016565046133923, "grad_norm": 0.35096561908721924, "learning_rate": 1.8345333671394305e-05, "loss": 0.40265101194381714, "step": 3895, "token_acc": 0.8620263128303159 }, { "epoch": 0.21021960826633573, "grad_norm": 0.41024526953697205, "learning_rate": 1.8344370711610456e-05, "loss": 0.41042736172676086, "step": 3896, "token_acc": 0.8605552776388194 }, { "epoch": 0.2102735660713322, "grad_norm": 0.4840020537376404, "learning_rate": 1.8343407496991713e-05, "loss": 0.37445396184921265, "step": 3897, "token_acc": 0.8679418515684775 }, { "epoch": 0.21032752387632872, "grad_norm": 0.47523388266563416, "learning_rate": 1.834244402756749e-05, "loss": 0.4433235824108124, "step": 3898, "token_acc": 0.8526877308165778 }, { "epoch": 0.2103814816813252, "grad_norm": 0.4907536804676056, "learning_rate": 1.834148030336721e-05, "loss": 0.3924087882041931, "step": 3899, "token_acc": 0.8699198834668609 }, { "epoch": 0.2104354394863217, "grad_norm": 0.3902684152126312, "learning_rate": 1.834051632442031e-05, "loss": 0.36466166377067566, "step": 3900, "token_acc": 0.8715497509088461 }, { "epoch": 0.2104893972913182, "grad_norm": 0.5330954790115356, "learning_rate": 1.8339552090756224e-05, "loss": 0.44772467017173767, "step": 3901, "token_acc": 0.8458672086720868 }, { "epoch": 0.2105433550963147, "grad_norm": 0.4450390934944153, "learning_rate": 1.8338587602404403e-05, "loss": 0.3932553231716156, "step": 3902, "token_acc": 0.8676618343619923 }, { "epoch": 0.21059731290131117, "grad_norm": 0.38071587681770325, "learning_rate": 1.83376228593943e-05, "loss": 0.378839373588562, "step": 3903, "token_acc": 0.8694244074782864 }, { "epoch": 0.21065127070630768, "grad_norm": 0.41593456268310547, "learning_rate": 1.8336657861755382e-05, "loss": 0.4129400849342346, "step": 3904, "token_acc": 0.8624223602484472 }, { "epoch": 0.21070522851130416, "grad_norm": 0.47009503841400146, "learning_rate": 1.8335692609517112e-05, "loss": 0.40040212869644165, "step": 3905, "token_acc": 0.8621149042464613 }, { "epoch": 0.21075918631630064, "grad_norm": 0.5180153846740723, "learning_rate": 1.8334727102708972e-05, "loss": 0.46356767416000366, "step": 3906, "token_acc": 0.8453014184397163 }, { "epoch": 0.21081314412129715, "grad_norm": 0.40513500571250916, "learning_rate": 1.8333761341360455e-05, "loss": 0.41654467582702637, "step": 3907, "token_acc": 0.8596033402922756 }, { "epoch": 0.21086710192629363, "grad_norm": 0.4815917909145355, "learning_rate": 1.8332795325501048e-05, "loss": 0.4503268003463745, "step": 3908, "token_acc": 0.8479081929110982 }, { "epoch": 0.21092105973129013, "grad_norm": 0.39833298325538635, "learning_rate": 1.8331829055160258e-05, "loss": 0.37712255120277405, "step": 3909, "token_acc": 0.8652904128761372 }, { "epoch": 0.2109750175362866, "grad_norm": 0.3629244267940521, "learning_rate": 1.8330862530367586e-05, "loss": 0.40855181217193604, "step": 3910, "token_acc": 0.8596467754883597 }, { "epoch": 0.21102897534128312, "grad_norm": 0.47411492466926575, "learning_rate": 1.8329895751152557e-05, "loss": 0.3958485424518585, "step": 3911, "token_acc": 0.8618152085036794 }, { "epoch": 0.2110829331462796, "grad_norm": 0.46865901350975037, "learning_rate": 1.8328928717544693e-05, "loss": 0.40524446964263916, "step": 3912, "token_acc": 0.8632895294616363 }, { "epoch": 0.2111368909512761, "grad_norm": 0.5128245949745178, "learning_rate": 1.832796142957353e-05, "loss": 0.39375925064086914, "step": 3913, "token_acc": 0.8624102154828411 }, { "epoch": 0.2111908487562726, "grad_norm": 0.42629554867744446, "learning_rate": 1.8326993887268605e-05, "loss": 0.42563319206237793, "step": 3914, "token_acc": 0.8569850039463299 }, { "epoch": 0.2112448065612691, "grad_norm": 0.4010469317436218, "learning_rate": 1.832602609065947e-05, "loss": 0.4262152314186096, "step": 3915, "token_acc": 0.8512732385564755 }, { "epoch": 0.21129876436626557, "grad_norm": 0.39223048090934753, "learning_rate": 1.8325058039775675e-05, "loss": 0.3671064078807831, "step": 3916, "token_acc": 0.8740749727041126 }, { "epoch": 0.21135272217126208, "grad_norm": 0.4016273319721222, "learning_rate": 1.832408973464679e-05, "loss": 0.43308424949645996, "step": 3917, "token_acc": 0.8503490976155974 }, { "epoch": 0.21140667997625856, "grad_norm": 0.2714990973472595, "learning_rate": 1.8323121175302384e-05, "loss": 0.384507954120636, "step": 3918, "token_acc": 0.8679942331951703 }, { "epoch": 0.21146063778125507, "grad_norm": 0.3792840242385864, "learning_rate": 1.8322152361772042e-05, "loss": 0.4568939507007599, "step": 3919, "token_acc": 0.8457326508748958 }, { "epoch": 0.21151459558625155, "grad_norm": 0.4887814223766327, "learning_rate": 1.8321183294085347e-05, "loss": 0.45336443185806274, "step": 3920, "token_acc": 0.8423715620134925 }, { "epoch": 0.21156855339124805, "grad_norm": 0.4286600649356842, "learning_rate": 1.8320213972271888e-05, "loss": 0.4315122365951538, "step": 3921, "token_acc": 0.856592729513247 }, { "epoch": 0.21162251119624453, "grad_norm": 0.44339632987976074, "learning_rate": 1.831924439636128e-05, "loss": 0.4089134931564331, "step": 3922, "token_acc": 0.8572655998281602 }, { "epoch": 0.21167646900124104, "grad_norm": 0.42153099179267883, "learning_rate": 1.831827456638312e-05, "loss": 0.4152337312698364, "step": 3923, "token_acc": 0.8568912373403786 }, { "epoch": 0.21173042680623752, "grad_norm": 0.464531809091568, "learning_rate": 1.831730448236704e-05, "loss": 0.4258645176887512, "step": 3924, "token_acc": 0.8526457802428421 }, { "epoch": 0.21178438461123403, "grad_norm": 0.538783073425293, "learning_rate": 1.8316334144342663e-05, "loss": 0.4818642735481262, "step": 3925, "token_acc": 0.8405156072286112 }, { "epoch": 0.2118383424162305, "grad_norm": 0.4578881859779358, "learning_rate": 1.8315363552339614e-05, "loss": 0.422231525182724, "step": 3926, "token_acc": 0.8538908820982308 }, { "epoch": 0.211892300221227, "grad_norm": 0.3736870288848877, "learning_rate": 1.831439270638754e-05, "loss": 0.4032061696052551, "step": 3927, "token_acc": 0.8578155507063282 }, { "epoch": 0.2119462580262235, "grad_norm": 0.3420395255088806, "learning_rate": 1.8313421606516096e-05, "loss": 0.3547786474227905, "step": 3928, "token_acc": 0.8731017027151403 }, { "epoch": 0.21200021583121997, "grad_norm": 0.43269988894462585, "learning_rate": 1.831245025275493e-05, "loss": 0.3509657084941864, "step": 3929, "token_acc": 0.8707349081364829 }, { "epoch": 0.21205417363621648, "grad_norm": 0.34373244643211365, "learning_rate": 1.8311478645133712e-05, "loss": 0.42503613233566284, "step": 3930, "token_acc": 0.8554468362687541 }, { "epoch": 0.21210813144121296, "grad_norm": 0.3407756984233856, "learning_rate": 1.8310506783682113e-05, "loss": 0.43811583518981934, "step": 3931, "token_acc": 0.8547651337689557 }, { "epoch": 0.21216208924620947, "grad_norm": 0.4965348541736603, "learning_rate": 1.8309534668429814e-05, "loss": 0.39840519428253174, "step": 3932, "token_acc": 0.8578822833481219 }, { "epoch": 0.21221604705120595, "grad_norm": 0.4320782721042633, "learning_rate": 1.83085622994065e-05, "loss": 0.42711079120635986, "step": 3933, "token_acc": 0.8514137334102712 }, { "epoch": 0.21227000485620245, "grad_norm": 0.4382365047931671, "learning_rate": 1.8307589676641876e-05, "loss": 0.37533652782440186, "step": 3934, "token_acc": 0.8655330438977328 }, { "epoch": 0.21232396266119893, "grad_norm": 0.35382524132728577, "learning_rate": 1.8306616800165637e-05, "loss": 0.3780268430709839, "step": 3935, "token_acc": 0.8757938266134987 }, { "epoch": 0.21237792046619544, "grad_norm": 0.44388478994369507, "learning_rate": 1.8305643670007497e-05, "loss": 0.33244383335113525, "step": 3936, "token_acc": 0.8821040755887745 }, { "epoch": 0.21243187827119192, "grad_norm": 0.46646249294281006, "learning_rate": 1.8304670286197175e-05, "loss": 0.4177853465080261, "step": 3937, "token_acc": 0.8577726975832789 }, { "epoch": 0.21248583607618843, "grad_norm": 0.41585949063301086, "learning_rate": 1.8303696648764396e-05, "loss": 0.406254380941391, "step": 3938, "token_acc": 0.8601044727391446 }, { "epoch": 0.2125397938811849, "grad_norm": 0.42223426699638367, "learning_rate": 1.83027227577389e-05, "loss": 0.46453621983528137, "step": 3939, "token_acc": 0.8381634276622344 }, { "epoch": 0.21259375168618141, "grad_norm": 0.38042664527893066, "learning_rate": 1.8301748613150423e-05, "loss": 0.43495166301727295, "step": 3940, "token_acc": 0.852570564516129 }, { "epoch": 0.2126477094911779, "grad_norm": 0.41756364703178406, "learning_rate": 1.830077421502872e-05, "loss": 0.34576156735420227, "step": 3941, "token_acc": 0.8773452814337721 }, { "epoch": 0.2127016672961744, "grad_norm": 0.421064555644989, "learning_rate": 1.8299799563403542e-05, "loss": 0.4228849411010742, "step": 3942, "token_acc": 0.8530557834290402 }, { "epoch": 0.21275562510117088, "grad_norm": 0.5609126091003418, "learning_rate": 1.829882465830467e-05, "loss": 0.42325541377067566, "step": 3943, "token_acc": 0.856036866359447 }, { "epoch": 0.2128095829061674, "grad_norm": 0.5209319591522217, "learning_rate": 1.829784949976186e-05, "loss": 0.424386203289032, "step": 3944, "token_acc": 0.8523769808173478 }, { "epoch": 0.21286354071116387, "grad_norm": 0.5023404955863953, "learning_rate": 1.82968740878049e-05, "loss": 0.4427074193954468, "step": 3945, "token_acc": 0.8504672897196262 }, { "epoch": 0.21291749851616038, "grad_norm": 2.7879409790039062, "learning_rate": 1.8295898422463578e-05, "loss": 0.41526567935943604, "step": 3946, "token_acc": 0.8611466598413104 }, { "epoch": 0.21297145632115685, "grad_norm": 0.36615800857543945, "learning_rate": 1.8294922503767692e-05, "loss": 0.3677678406238556, "step": 3947, "token_acc": 0.8691335740072202 }, { "epoch": 0.21302541412615336, "grad_norm": 0.4389493763446808, "learning_rate": 1.829394633174705e-05, "loss": 0.4482235908508301, "step": 3948, "token_acc": 0.8514452473596442 }, { "epoch": 0.21307937193114984, "grad_norm": 0.42481711506843567, "learning_rate": 1.8292969906431456e-05, "loss": 0.4395714998245239, "step": 3949, "token_acc": 0.8502250716136953 }, { "epoch": 0.21313332973614632, "grad_norm": 0.407898873090744, "learning_rate": 1.8291993227850736e-05, "loss": 0.36649656295776367, "step": 3950, "token_acc": 0.8669039145907473 }, { "epoch": 0.21318728754114283, "grad_norm": 0.4630869925022125, "learning_rate": 1.829101629603471e-05, "loss": 0.37086939811706543, "step": 3951, "token_acc": 0.8716452742123687 }, { "epoch": 0.2132412453461393, "grad_norm": 0.3671571612358093, "learning_rate": 1.829003911101322e-05, "loss": 0.42661648988723755, "step": 3952, "token_acc": 0.8554856895056374 }, { "epoch": 0.21329520315113581, "grad_norm": 0.3914039433002472, "learning_rate": 1.828906167281611e-05, "loss": 0.4487963318824768, "step": 3953, "token_acc": 0.8492360910925338 }, { "epoch": 0.2133491609561323, "grad_norm": 0.36192625761032104, "learning_rate": 1.828808398147323e-05, "loss": 0.4481073021888733, "step": 3954, "token_acc": 0.8431592039800995 }, { "epoch": 0.2134031187611288, "grad_norm": 0.44980037212371826, "learning_rate": 1.8287106037014428e-05, "loss": 0.4084872007369995, "step": 3955, "token_acc": 0.8570977917981073 }, { "epoch": 0.21345707656612528, "grad_norm": 0.5236430168151855, "learning_rate": 1.8286127839469588e-05, "loss": 0.48490241169929504, "step": 3956, "token_acc": 0.8419021151767794 }, { "epoch": 0.2135110343711218, "grad_norm": 0.46501991152763367, "learning_rate": 1.8285149388868567e-05, "loss": 0.40582194924354553, "step": 3957, "token_acc": 0.8579921861729234 }, { "epoch": 0.21356499217611827, "grad_norm": 0.4401121437549591, "learning_rate": 1.8284170685241257e-05, "loss": 0.40737468004226685, "step": 3958, "token_acc": 0.8608289313287282 }, { "epoch": 0.21361894998111478, "grad_norm": 0.4240461587905884, "learning_rate": 1.8283191728617545e-05, "loss": 0.424256294965744, "step": 3959, "token_acc": 0.8577065037546138 }, { "epoch": 0.21367290778611125, "grad_norm": 0.32308295369148254, "learning_rate": 1.8282212519027328e-05, "loss": 0.43040138483047485, "step": 3960, "token_acc": 0.8520361990950226 }, { "epoch": 0.21372686559110776, "grad_norm": 0.4194265305995941, "learning_rate": 1.828123305650051e-05, "loss": 0.4237635135650635, "step": 3961, "token_acc": 0.8546433378196501 }, { "epoch": 0.21378082339610424, "grad_norm": 0.41982418298721313, "learning_rate": 1.8280253341067e-05, "loss": 0.4491044282913208, "step": 3962, "token_acc": 0.8474087087463923 }, { "epoch": 0.21383478120110075, "grad_norm": 0.44201338291168213, "learning_rate": 1.8279273372756726e-05, "loss": 0.4066973924636841, "step": 3963, "token_acc": 0.8571428571428571 }, { "epoch": 0.21388873900609723, "grad_norm": 0.43759337067604065, "learning_rate": 1.8278293151599614e-05, "loss": 0.43644005060195923, "step": 3964, "token_acc": 0.8516155308172685 }, { "epoch": 0.21394269681109374, "grad_norm": 0.44374608993530273, "learning_rate": 1.8277312677625594e-05, "loss": 0.41833966970443726, "step": 3965, "token_acc": 0.8580300155252717 }, { "epoch": 0.21399665461609021, "grad_norm": 0.39866167306900024, "learning_rate": 1.8276331950864616e-05, "loss": 0.45686155557632446, "step": 3966, "token_acc": 0.844315802438724 }, { "epoch": 0.21405061242108672, "grad_norm": 0.44637787342071533, "learning_rate": 1.8275350971346624e-05, "loss": 0.4248087406158447, "step": 3967, "token_acc": 0.8548005908419498 }, { "epoch": 0.2141045702260832, "grad_norm": 0.3512602150440216, "learning_rate": 1.8274369739101584e-05, "loss": 0.41101717948913574, "step": 3968, "token_acc": 0.8615744805384841 }, { "epoch": 0.2141585280310797, "grad_norm": 0.4640073776245117, "learning_rate": 1.827338825415946e-05, "loss": 0.43708452582359314, "step": 3969, "token_acc": 0.8475213675213675 }, { "epoch": 0.2142124858360762, "grad_norm": 0.379470556974411, "learning_rate": 1.8272406516550227e-05, "loss": 0.37963390350341797, "step": 3970, "token_acc": 0.8656670746634026 }, { "epoch": 0.21426644364107267, "grad_norm": 0.45483773946762085, "learning_rate": 1.8271424526303862e-05, "loss": 0.4122718274593353, "step": 3971, "token_acc": 0.8598953681255582 }, { "epoch": 0.21432040144606918, "grad_norm": 0.5672799348831177, "learning_rate": 1.8270442283450362e-05, "loss": 0.3738257884979248, "step": 3972, "token_acc": 0.8700396825396826 }, { "epoch": 0.21437435925106565, "grad_norm": 0.37709590792655945, "learning_rate": 1.826945978801972e-05, "loss": 0.4530002474784851, "step": 3973, "token_acc": 0.8490566037735849 }, { "epoch": 0.21442831705606216, "grad_norm": 0.4455846846103668, "learning_rate": 1.8268477040041944e-05, "loss": 0.4073988199234009, "step": 3974, "token_acc": 0.8584719481251762 }, { "epoch": 0.21448227486105864, "grad_norm": 0.4912164509296417, "learning_rate": 1.8267494039547044e-05, "loss": 0.4211537837982178, "step": 3975, "token_acc": 0.85619754680439 }, { "epoch": 0.21453623266605515, "grad_norm": 0.3960840106010437, "learning_rate": 1.826651078656504e-05, "loss": 0.4092041254043579, "step": 3976, "token_acc": 0.8566142460684552 }, { "epoch": 0.21459019047105163, "grad_norm": 0.44100284576416016, "learning_rate": 1.8265527281125967e-05, "loss": 0.3954947888851166, "step": 3977, "token_acc": 0.8581384543905839 }, { "epoch": 0.21464414827604814, "grad_norm": 0.4687315821647644, "learning_rate": 1.8264543523259853e-05, "loss": 0.43782728910446167, "step": 3978, "token_acc": 0.8539642745220933 }, { "epoch": 0.21469810608104462, "grad_norm": 0.48729121685028076, "learning_rate": 1.8263559512996743e-05, "loss": 0.4563707113265991, "step": 3979, "token_acc": 0.8430411649397928 }, { "epoch": 0.21475206388604112, "grad_norm": 0.38535916805267334, "learning_rate": 1.8262575250366695e-05, "loss": 0.37603503465652466, "step": 3980, "token_acc": 0.8687799483966089 }, { "epoch": 0.2148060216910376, "grad_norm": 0.43990230560302734, "learning_rate": 1.8261590735399762e-05, "loss": 0.42695537209510803, "step": 3981, "token_acc": 0.8523868240267747 }, { "epoch": 0.2148599794960341, "grad_norm": 0.4879468083381653, "learning_rate": 1.8260605968126007e-05, "loss": 0.4953082799911499, "step": 3982, "token_acc": 0.8375466819879345 }, { "epoch": 0.2149139373010306, "grad_norm": 0.45199334621429443, "learning_rate": 1.8259620948575513e-05, "loss": 0.4352985918521881, "step": 3983, "token_acc": 0.8519593613933236 }, { "epoch": 0.2149678951060271, "grad_norm": 0.40926411747932434, "learning_rate": 1.825863567677836e-05, "loss": 0.4055977761745453, "step": 3984, "token_acc": 0.8618134965372846 }, { "epoch": 0.21502185291102358, "grad_norm": 0.4245382845401764, "learning_rate": 1.8257650152764638e-05, "loss": 0.42110297083854675, "step": 3985, "token_acc": 0.8546933667083855 }, { "epoch": 0.21507581071602008, "grad_norm": 0.36593812704086304, "learning_rate": 1.825666437656444e-05, "loss": 0.38720905780792236, "step": 3986, "token_acc": 0.8687451459003661 }, { "epoch": 0.21512976852101656, "grad_norm": 0.4581277668476105, "learning_rate": 1.825567834820788e-05, "loss": 0.38603657484054565, "step": 3987, "token_acc": 0.8634345586929497 }, { "epoch": 0.21518372632601307, "grad_norm": 0.44831573963165283, "learning_rate": 1.8254692067725063e-05, "loss": 0.43042850494384766, "step": 3988, "token_acc": 0.8490355491790212 }, { "epoch": 0.21523768413100955, "grad_norm": 0.488808810710907, "learning_rate": 1.825370553514611e-05, "loss": 0.39724066853523254, "step": 3989, "token_acc": 0.8591618734593263 }, { "epoch": 0.21529164193600606, "grad_norm": 0.4082251489162445, "learning_rate": 1.8252718750501157e-05, "loss": 0.38289812207221985, "step": 3990, "token_acc": 0.8659138252384607 }, { "epoch": 0.21534559974100254, "grad_norm": 0.5021889805793762, "learning_rate": 1.825173171382033e-05, "loss": 0.4238710403442383, "step": 3991, "token_acc": 0.8573107049608355 }, { "epoch": 0.21539955754599902, "grad_norm": 0.41171950101852417, "learning_rate": 1.8250744425133783e-05, "loss": 0.4265075922012329, "step": 3992, "token_acc": 0.8515151515151516 }, { "epoch": 0.21545351535099552, "grad_norm": 0.36103618144989014, "learning_rate": 1.824975688447166e-05, "loss": 0.37885427474975586, "step": 3993, "token_acc": 0.8704860743407815 }, { "epoch": 0.215507473155992, "grad_norm": 0.40819013118743896, "learning_rate": 1.8248769091864124e-05, "loss": 0.4547263979911804, "step": 3994, "token_acc": 0.850665140895437 }, { "epoch": 0.2155614309609885, "grad_norm": 0.5697661638259888, "learning_rate": 1.8247781047341342e-05, "loss": 0.40651923418045044, "step": 3995, "token_acc": 0.8605108055009824 }, { "epoch": 0.215615388765985, "grad_norm": 0.37866681814193726, "learning_rate": 1.8246792750933483e-05, "loss": 0.3660890460014343, "step": 3996, "token_acc": 0.8738277919863597 }, { "epoch": 0.2156693465709815, "grad_norm": 0.41027164459228516, "learning_rate": 1.8245804202670737e-05, "loss": 0.4387228488922119, "step": 3997, "token_acc": 0.850810669456067 }, { "epoch": 0.21572330437597798, "grad_norm": 0.4740930497646332, "learning_rate": 1.8244815402583294e-05, "loss": 0.43282851576805115, "step": 3998, "token_acc": 0.8571428571428571 }, { "epoch": 0.21577726218097448, "grad_norm": 0.4621020257472992, "learning_rate": 1.8243826350701345e-05, "loss": 0.3747934103012085, "step": 3999, "token_acc": 0.8722615436467812 }, { "epoch": 0.21583121998597096, "grad_norm": 0.3800727128982544, "learning_rate": 1.8242837047055095e-05, "loss": 0.3624814450740814, "step": 4000, "token_acc": 0.8746066327765675 }, { "epoch": 0.21588517779096747, "grad_norm": 0.39556509256362915, "learning_rate": 1.8241847491674766e-05, "loss": 0.4636164903640747, "step": 4001, "token_acc": 0.8462411705348133 }, { "epoch": 0.21593913559596395, "grad_norm": 0.543632447719574, "learning_rate": 1.824085768459057e-05, "loss": 0.43643149733543396, "step": 4002, "token_acc": 0.8567858816179915 }, { "epoch": 0.21599309340096046, "grad_norm": 0.4608803689479828, "learning_rate": 1.8239867625832747e-05, "loss": 0.4529202878475189, "step": 4003, "token_acc": 0.8470873786407767 }, { "epoch": 0.21604705120595694, "grad_norm": 0.5267379879951477, "learning_rate": 1.8238877315431515e-05, "loss": 0.4392588436603546, "step": 4004, "token_acc": 0.8466407010710808 }, { "epoch": 0.21610100901095344, "grad_norm": 0.43350765109062195, "learning_rate": 1.8237886753417133e-05, "loss": 0.3972274959087372, "step": 4005, "token_acc": 0.8626905041031653 }, { "epoch": 0.21615496681594992, "grad_norm": 0.37920430302619934, "learning_rate": 1.823689593981985e-05, "loss": 0.3605467677116394, "step": 4006, "token_acc": 0.8719575349559814 }, { "epoch": 0.21620892462094643, "grad_norm": 0.48454171419143677, "learning_rate": 1.8235904874669918e-05, "loss": 0.42370036244392395, "step": 4007, "token_acc": 0.8525084204928204 }, { "epoch": 0.2162628824259429, "grad_norm": 0.471123605966568, "learning_rate": 1.823491355799761e-05, "loss": 0.41859006881713867, "step": 4008, "token_acc": 0.8579209461562403 }, { "epoch": 0.21631684023093942, "grad_norm": 0.3836732804775238, "learning_rate": 1.82339219898332e-05, "loss": 0.3881373405456543, "step": 4009, "token_acc": 0.8705950317735413 }, { "epoch": 0.2163707980359359, "grad_norm": 0.45517298579216003, "learning_rate": 1.823293017020697e-05, "loss": 0.4641009271144867, "step": 4010, "token_acc": 0.84492293210966 }, { "epoch": 0.2164247558409324, "grad_norm": 0.454094260931015, "learning_rate": 1.823193809914921e-05, "loss": 0.4126129746437073, "step": 4011, "token_acc": 0.8548214011957688 }, { "epoch": 0.21647871364592888, "grad_norm": 0.4212048649787903, "learning_rate": 1.8230945776690214e-05, "loss": 0.37592750787734985, "step": 4012, "token_acc": 0.8712453066332916 }, { "epoch": 0.21653267145092536, "grad_norm": 0.4699175953865051, "learning_rate": 1.8229953202860293e-05, "loss": 0.3956981301307678, "step": 4013, "token_acc": 0.860630144762986 }, { "epoch": 0.21658662925592187, "grad_norm": 0.4547670781612396, "learning_rate": 1.8228960377689753e-05, "loss": 0.39554303884506226, "step": 4014, "token_acc": 0.8636729438274585 }, { "epoch": 0.21664058706091835, "grad_norm": 0.3503051698207855, "learning_rate": 1.8227967301208923e-05, "loss": 0.3959709405899048, "step": 4015, "token_acc": 0.8631756756756757 }, { "epoch": 0.21669454486591486, "grad_norm": 0.45719781517982483, "learning_rate": 1.822697397344813e-05, "loss": 0.391732394695282, "step": 4016, "token_acc": 0.8627251597908193 }, { "epoch": 0.21674850267091134, "grad_norm": 0.3903180956840515, "learning_rate": 1.82259803944377e-05, "loss": 0.4771510064601898, "step": 4017, "token_acc": 0.843818082788671 }, { "epoch": 0.21680246047590784, "grad_norm": 0.3661738932132721, "learning_rate": 1.8224986564207986e-05, "loss": 0.39289724826812744, "step": 4018, "token_acc": 0.8637845895835274 }, { "epoch": 0.21685641828090432, "grad_norm": 0.4653840661048889, "learning_rate": 1.8223992482789337e-05, "loss": 0.4356594681739807, "step": 4019, "token_acc": 0.8473972602739726 }, { "epoch": 0.21691037608590083, "grad_norm": 0.3802664279937744, "learning_rate": 1.822299815021211e-05, "loss": 0.3769667148590088, "step": 4020, "token_acc": 0.8643261316872428 }, { "epoch": 0.2169643338908973, "grad_norm": 0.40357816219329834, "learning_rate": 1.8222003566506678e-05, "loss": 0.4033011198043823, "step": 4021, "token_acc": 0.8643410852713178 }, { "epoch": 0.21701829169589382, "grad_norm": 0.4298226833343506, "learning_rate": 1.822100873170341e-05, "loss": 0.44523680210113525, "step": 4022, "token_acc": 0.8466812090994079 }, { "epoch": 0.2170722495008903, "grad_norm": 0.45463377237319946, "learning_rate": 1.822001364583269e-05, "loss": 0.3815106153488159, "step": 4023, "token_acc": 0.8684661525278492 }, { "epoch": 0.2171262073058868, "grad_norm": 0.5205407738685608, "learning_rate": 1.8219018308924902e-05, "loss": 0.4061731696128845, "step": 4024, "token_acc": 0.8629057187017002 }, { "epoch": 0.21718016511088328, "grad_norm": 0.366334468126297, "learning_rate": 1.821802272101045e-05, "loss": 0.38044559955596924, "step": 4025, "token_acc": 0.8695652173913043 }, { "epoch": 0.2172341229158798, "grad_norm": 0.38218724727630615, "learning_rate": 1.821702688211974e-05, "loss": 0.4394229054450989, "step": 4026, "token_acc": 0.849307774227902 }, { "epoch": 0.21728808072087627, "grad_norm": 0.49193716049194336, "learning_rate": 1.821603079228318e-05, "loss": 0.3500690758228302, "step": 4027, "token_acc": 0.8736932305055698 }, { "epoch": 0.21734203852587278, "grad_norm": 0.4440787434577942, "learning_rate": 1.8215034451531188e-05, "loss": 0.4210025668144226, "step": 4028, "token_acc": 0.8590440755580996 }, { "epoch": 0.21739599633086926, "grad_norm": 0.5817440748214722, "learning_rate": 1.82140378598942e-05, "loss": 0.3826597034931183, "step": 4029, "token_acc": 0.8720636769559309 }, { "epoch": 0.21744995413586576, "grad_norm": 0.3722723424434662, "learning_rate": 1.8213041017402643e-05, "loss": 0.3718395233154297, "step": 4030, "token_acc": 0.8747557639703009 }, { "epoch": 0.21750391194086224, "grad_norm": 0.4673904478549957, "learning_rate": 1.821204392408697e-05, "loss": 0.45877906680107117, "step": 4031, "token_acc": 0.8444144953578916 }, { "epoch": 0.21755786974585875, "grad_norm": 0.46061667799949646, "learning_rate": 1.8211046579977626e-05, "loss": 0.4356943964958191, "step": 4032, "token_acc": 0.8518852606891575 }, { "epoch": 0.21761182755085523, "grad_norm": 0.4610794186592102, "learning_rate": 1.821004898510507e-05, "loss": 0.3996904492378235, "step": 4033, "token_acc": 0.8620085946204042 }, { "epoch": 0.21766578535585174, "grad_norm": 0.42726418375968933, "learning_rate": 1.8209051139499768e-05, "loss": 0.3910858631134033, "step": 4034, "token_acc": 0.8676899462778204 }, { "epoch": 0.21771974316084822, "grad_norm": 0.4716208875179291, "learning_rate": 1.8208053043192193e-05, "loss": 0.4022632837295532, "step": 4035, "token_acc": 0.8632363907531693 }, { "epoch": 0.2177737009658447, "grad_norm": 0.38160616159439087, "learning_rate": 1.820705469621283e-05, "loss": 0.34807151556015015, "step": 4036, "token_acc": 0.876975476839237 }, { "epoch": 0.2178276587708412, "grad_norm": 0.45360639691352844, "learning_rate": 1.8206056098592166e-05, "loss": 0.42792823910713196, "step": 4037, "token_acc": 0.8550615800540703 }, { "epoch": 0.21788161657583768, "grad_norm": 0.37701892852783203, "learning_rate": 1.82050572503607e-05, "loss": 0.4111882448196411, "step": 4038, "token_acc": 0.857515598411798 }, { "epoch": 0.2179355743808342, "grad_norm": 0.4518354535102844, "learning_rate": 1.8204058151548935e-05, "loss": 0.4229029417037964, "step": 4039, "token_acc": 0.8544034090909091 }, { "epoch": 0.21798953218583067, "grad_norm": 0.529783308506012, "learning_rate": 1.8203058802187385e-05, "loss": 0.45867881178855896, "step": 4040, "token_acc": 0.8478552278820375 }, { "epoch": 0.21804348999082718, "grad_norm": 0.4605710208415985, "learning_rate": 1.8202059202306563e-05, "loss": 0.45703810453414917, "step": 4041, "token_acc": 0.8458162253999717 }, { "epoch": 0.21809744779582366, "grad_norm": 0.44793492555618286, "learning_rate": 1.8201059351937e-05, "loss": 0.4093093276023865, "step": 4042, "token_acc": 0.8601911244927346 }, { "epoch": 0.21815140560082016, "grad_norm": 0.4044716954231262, "learning_rate": 1.820005925110924e-05, "loss": 0.40627261996269226, "step": 4043, "token_acc": 0.8596214511041009 }, { "epoch": 0.21820536340581664, "grad_norm": 0.5033040642738342, "learning_rate": 1.8199058899853815e-05, "loss": 0.45012152194976807, "step": 4044, "token_acc": 0.8478664192949907 }, { "epoch": 0.21825932121081315, "grad_norm": 0.4584594666957855, "learning_rate": 1.819805829820128e-05, "loss": 0.418099045753479, "step": 4045, "token_acc": 0.8573813830527341 }, { "epoch": 0.21831327901580963, "grad_norm": 0.38099876046180725, "learning_rate": 1.8197057446182192e-05, "loss": 0.4045487642288208, "step": 4046, "token_acc": 0.8644013972689742 }, { "epoch": 0.21836723682080614, "grad_norm": 0.3654380142688751, "learning_rate": 1.8196056343827116e-05, "loss": 0.4350082278251648, "step": 4047, "token_acc": 0.8551418232753589 }, { "epoch": 0.21842119462580262, "grad_norm": 0.5142551064491272, "learning_rate": 1.8195054991166627e-05, "loss": 0.4389549493789673, "step": 4048, "token_acc": 0.8529357965977684 }, { "epoch": 0.21847515243079912, "grad_norm": 0.3541904091835022, "learning_rate": 1.8194053388231308e-05, "loss": 0.3635663390159607, "step": 4049, "token_acc": 0.8675675675675676 }, { "epoch": 0.2185291102357956, "grad_norm": 0.44691163301467896, "learning_rate": 1.819305153505174e-05, "loss": 0.4483180642127991, "step": 4050, "token_acc": 0.8468725257323833 }, { "epoch": 0.2185830680407921, "grad_norm": 0.35809245705604553, "learning_rate": 1.8192049431658526e-05, "loss": 0.44929632544517517, "step": 4051, "token_acc": 0.8439935809261806 }, { "epoch": 0.2186370258457886, "grad_norm": 0.4313889145851135, "learning_rate": 1.819104707808227e-05, "loss": 0.4549994468688965, "step": 4052, "token_acc": 0.8509933774834437 }, { "epoch": 0.2186909836507851, "grad_norm": 0.39836418628692627, "learning_rate": 1.8190044474353583e-05, "loss": 0.4204358756542206, "step": 4053, "token_acc": 0.8564088696592753 }, { "epoch": 0.21874494145578158, "grad_norm": 0.4365541338920593, "learning_rate": 1.818904162050308e-05, "loss": 0.43049001693725586, "step": 4054, "token_acc": 0.852957906712173 }, { "epoch": 0.21879889926077808, "grad_norm": 0.3904320299625397, "learning_rate": 1.818803851656139e-05, "loss": 0.3925556242465973, "step": 4055, "token_acc": 0.8648418905134309 }, { "epoch": 0.21885285706577456, "grad_norm": 0.3085077404975891, "learning_rate": 1.8187035162559154e-05, "loss": 0.388089120388031, "step": 4056, "token_acc": 0.8684312962242118 }, { "epoch": 0.21890681487077104, "grad_norm": 0.48846274614334106, "learning_rate": 1.8186031558527006e-05, "loss": 0.40636005997657776, "step": 4057, "token_acc": 0.862827801210343 }, { "epoch": 0.21896077267576755, "grad_norm": 0.41670510172843933, "learning_rate": 1.8185027704495598e-05, "loss": 0.4228927493095398, "step": 4058, "token_acc": 0.8561556181213207 }, { "epoch": 0.21901473048076403, "grad_norm": 0.3553660213947296, "learning_rate": 1.8184023600495592e-05, "loss": 0.35736575722694397, "step": 4059, "token_acc": 0.8748541045260018 }, { "epoch": 0.21906868828576054, "grad_norm": 0.3997543752193451, "learning_rate": 1.8183019246557644e-05, "loss": 0.35105639696121216, "step": 4060, "token_acc": 0.8731550489551366 }, { "epoch": 0.21912264609075702, "grad_norm": 0.4296444356441498, "learning_rate": 1.8182014642712434e-05, "loss": 0.3451133370399475, "step": 4061, "token_acc": 0.87532722513089 }, { "epoch": 0.21917660389575352, "grad_norm": 0.46438756585121155, "learning_rate": 1.818100978899064e-05, "loss": 0.4218129515647888, "step": 4062, "token_acc": 0.8639044306668683 }, { "epoch": 0.21923056170075, "grad_norm": 0.48542553186416626, "learning_rate": 1.818000468542295e-05, "loss": 0.365216463804245, "step": 4063, "token_acc": 0.870626386755419 }, { "epoch": 0.2192845195057465, "grad_norm": 0.3731532394886017, "learning_rate": 1.817899933204006e-05, "loss": 0.35891029238700867, "step": 4064, "token_acc": 0.875 }, { "epoch": 0.219338477310743, "grad_norm": 0.33615097403526306, "learning_rate": 1.8177993728872673e-05, "loss": 0.4223022758960724, "step": 4065, "token_acc": 0.8582608695652174 }, { "epoch": 0.2193924351157395, "grad_norm": 0.35768648982048035, "learning_rate": 1.81769878759515e-05, "loss": 0.3964092433452606, "step": 4066, "token_acc": 0.8656423290203327 }, { "epoch": 0.21944639292073598, "grad_norm": 0.3790445029735565, "learning_rate": 1.817598177330726e-05, "loss": 0.3935672640800476, "step": 4067, "token_acc": 0.8606706524564596 }, { "epoch": 0.21950035072573248, "grad_norm": 0.35610195994377136, "learning_rate": 1.817497542097068e-05, "loss": 0.3757684528827667, "step": 4068, "token_acc": 0.8679752066115702 }, { "epoch": 0.21955430853072896, "grad_norm": 0.5140461921691895, "learning_rate": 1.817396881897249e-05, "loss": 0.4483625888824463, "step": 4069, "token_acc": 0.8507138423339541 }, { "epoch": 0.21960826633572547, "grad_norm": 0.5557931661605835, "learning_rate": 1.817296196734343e-05, "loss": 0.48636794090270996, "step": 4070, "token_acc": 0.8387905604719764 }, { "epoch": 0.21966222414072195, "grad_norm": 0.37340742349624634, "learning_rate": 1.8171954866114254e-05, "loss": 0.38347330689430237, "step": 4071, "token_acc": 0.8700288545979175 }, { "epoch": 0.21971618194571846, "grad_norm": 0.455689013004303, "learning_rate": 1.8170947515315724e-05, "loss": 0.4508526027202606, "step": 4072, "token_acc": 0.8509743981658388 }, { "epoch": 0.21977013975071494, "grad_norm": 0.4347185492515564, "learning_rate": 1.816993991497859e-05, "loss": 0.47759777307510376, "step": 4073, "token_acc": 0.8435699399288954 }, { "epoch": 0.21982409755571145, "grad_norm": 0.3118297755718231, "learning_rate": 1.8168932065133634e-05, "loss": 0.3074713945388794, "step": 4074, "token_acc": 0.8921859545004945 }, { "epoch": 0.21987805536070792, "grad_norm": 0.5640309453010559, "learning_rate": 1.816792396581163e-05, "loss": 0.39425912499427795, "step": 4075, "token_acc": 0.8666347992351816 }, { "epoch": 0.21993201316570443, "grad_norm": 0.45142319798469543, "learning_rate": 1.816691561704337e-05, "loss": 0.4362836182117462, "step": 4076, "token_acc": 0.8547806155861166 }, { "epoch": 0.2199859709707009, "grad_norm": 0.3531453311443329, "learning_rate": 1.8165907018859647e-05, "loss": 0.38439762592315674, "step": 4077, "token_acc": 0.8621557595498963 }, { "epoch": 0.2200399287756974, "grad_norm": 0.32945677638053894, "learning_rate": 1.8164898171291266e-05, "loss": 0.3962542414665222, "step": 4078, "token_acc": 0.8627684964200477 }, { "epoch": 0.2200938865806939, "grad_norm": 0.4888354539871216, "learning_rate": 1.816388907436903e-05, "loss": 0.40987053513526917, "step": 4079, "token_acc": 0.8557868442964196 }, { "epoch": 0.22014784438569038, "grad_norm": 0.43210142850875854, "learning_rate": 1.816287972812376e-05, "loss": 0.3823862671852112, "step": 4080, "token_acc": 0.8676010511825804 }, { "epoch": 0.22020180219068688, "grad_norm": 0.3887166380882263, "learning_rate": 1.816187013258628e-05, "loss": 0.4256094694137573, "step": 4081, "token_acc": 0.8494549627079747 }, { "epoch": 0.22025575999568336, "grad_norm": 0.41030752658843994, "learning_rate": 1.816086028778743e-05, "loss": 0.3925371766090393, "step": 4082, "token_acc": 0.8638670388226338 }, { "epoch": 0.22030971780067987, "grad_norm": 0.3764514923095703, "learning_rate": 1.8159850193758037e-05, "loss": 0.43454664945602417, "step": 4083, "token_acc": 0.8528040040950973 }, { "epoch": 0.22036367560567635, "grad_norm": 0.44280874729156494, "learning_rate": 1.8158839850528964e-05, "loss": 0.40193870663642883, "step": 4084, "token_acc": 0.8609138904059712 }, { "epoch": 0.22041763341067286, "grad_norm": 0.3169417083263397, "learning_rate": 1.8157829258131055e-05, "loss": 0.3719732165336609, "step": 4085, "token_acc": 0.8698884758364313 }, { "epoch": 0.22047159121566934, "grad_norm": 0.4703620672225952, "learning_rate": 1.815681841659518e-05, "loss": 0.3735697865486145, "step": 4086, "token_acc": 0.8672268907563025 }, { "epoch": 0.22052554902066585, "grad_norm": 0.3912816345691681, "learning_rate": 1.8155807325952202e-05, "loss": 0.4484219551086426, "step": 4087, "token_acc": 0.8492462311557789 }, { "epoch": 0.22057950682566232, "grad_norm": 0.5195890665054321, "learning_rate": 1.815479598623301e-05, "loss": 0.4392603039741516, "step": 4088, "token_acc": 0.850098231827112 }, { "epoch": 0.22063346463065883, "grad_norm": 0.4626937210559845, "learning_rate": 1.8153784397468484e-05, "loss": 0.3983643651008606, "step": 4089, "token_acc": 0.8605255558764078 }, { "epoch": 0.2206874224356553, "grad_norm": 0.524025022983551, "learning_rate": 1.815277255968952e-05, "loss": 0.38854023814201355, "step": 4090, "token_acc": 0.862584797356062 }, { "epoch": 0.22074138024065182, "grad_norm": 0.5661459565162659, "learning_rate": 1.8151760472927014e-05, "loss": 0.4275684356689453, "step": 4091, "token_acc": 0.8541917973462002 }, { "epoch": 0.2207953380456483, "grad_norm": 0.3988458812236786, "learning_rate": 1.815074813721188e-05, "loss": 0.41180720925331116, "step": 4092, "token_acc": 0.8614195514913999 }, { "epoch": 0.2208492958506448, "grad_norm": 0.3873578906059265, "learning_rate": 1.8149735552575034e-05, "loss": 0.37515056133270264, "step": 4093, "token_acc": 0.8691363964397402 }, { "epoch": 0.22090325365564129, "grad_norm": 0.3856096565723419, "learning_rate": 1.81487227190474e-05, "loss": 0.4064609408378601, "step": 4094, "token_acc": 0.8633780404637418 }, { "epoch": 0.2209572114606378, "grad_norm": 0.33739006519317627, "learning_rate": 1.8147709636659904e-05, "loss": 0.4233078956604004, "step": 4095, "token_acc": 0.8524433524433525 }, { "epoch": 0.22101116926563427, "grad_norm": 0.5231806039810181, "learning_rate": 1.8146696305443495e-05, "loss": 0.3550659716129303, "step": 4096, "token_acc": 0.8784434598343287 }, { "epoch": 0.22106512707063078, "grad_norm": 0.3874082863330841, "learning_rate": 1.8145682725429115e-05, "loss": 0.36626750230789185, "step": 4097, "token_acc": 0.8715644131822327 }, { "epoch": 0.22111908487562726, "grad_norm": 0.4713234305381775, "learning_rate": 1.8144668896647718e-05, "loss": 0.38548606634140015, "step": 4098, "token_acc": 0.8680738786279684 }, { "epoch": 0.22117304268062377, "grad_norm": 0.4608663022518158, "learning_rate": 1.8143654819130264e-05, "loss": 0.4470287561416626, "step": 4099, "token_acc": 0.8471417778897004 }, { "epoch": 0.22122700048562025, "grad_norm": 0.6404604911804199, "learning_rate": 1.8142640492907728e-05, "loss": 0.5082217454910278, "step": 4100, "token_acc": 0.8306428773718494 }, { "epoch": 0.22128095829061672, "grad_norm": 0.4839755892753601, "learning_rate": 1.8141625918011084e-05, "loss": 0.4180585443973541, "step": 4101, "token_acc": 0.8528870908422214 }, { "epoch": 0.22133491609561323, "grad_norm": 0.35005608201026917, "learning_rate": 1.8140611094471313e-05, "loss": 0.4016188681125641, "step": 4102, "token_acc": 0.8609618988132417 }, { "epoch": 0.2213888739006097, "grad_norm": 0.7101733684539795, "learning_rate": 1.8139596022319414e-05, "loss": 0.3931732773780823, "step": 4103, "token_acc": 0.8665328650275966 }, { "epoch": 0.22144283170560622, "grad_norm": 0.6977906823158264, "learning_rate": 1.8138580701586387e-05, "loss": 0.4610777497291565, "step": 4104, "token_acc": 0.8446006580915345 }, { "epoch": 0.2214967895106027, "grad_norm": 0.3612821102142334, "learning_rate": 1.8137565132303238e-05, "loss": 0.4677370488643646, "step": 4105, "token_acc": 0.8427240143369176 }, { "epoch": 0.2215507473155992, "grad_norm": 0.34216517210006714, "learning_rate": 1.813654931450098e-05, "loss": 0.43823421001434326, "step": 4106, "token_acc": 0.853424905114766 }, { "epoch": 0.22160470512059569, "grad_norm": 0.38925209641456604, "learning_rate": 1.8135533248210638e-05, "loss": 0.37401121854782104, "step": 4107, "token_acc": 0.8618606524365686 }, { "epoch": 0.2216586629255922, "grad_norm": 0.3593102693557739, "learning_rate": 1.8134516933463238e-05, "loss": 0.3514827489852905, "step": 4108, "token_acc": 0.8770491803278688 }, { "epoch": 0.22171262073058867, "grad_norm": 0.42976775765419006, "learning_rate": 1.8133500370289826e-05, "loss": 0.34933143854141235, "step": 4109, "token_acc": 0.8758758758758759 }, { "epoch": 0.22176657853558518, "grad_norm": 0.3607112169265747, "learning_rate": 1.8132483558721445e-05, "loss": 0.3669741153717041, "step": 4110, "token_acc": 0.8714161598609904 }, { "epoch": 0.22182053634058166, "grad_norm": 0.3830077648162842, "learning_rate": 1.8131466498789142e-05, "loss": 0.3887314796447754, "step": 4111, "token_acc": 0.8696969696969697 }, { "epoch": 0.22187449414557817, "grad_norm": 0.4613679349422455, "learning_rate": 1.8130449190523984e-05, "loss": 0.4060685932636261, "step": 4112, "token_acc": 0.8632918245804007 }, { "epoch": 0.22192845195057465, "grad_norm": 0.4734930098056793, "learning_rate": 1.8129431633957037e-05, "loss": 0.41285890340805054, "step": 4113, "token_acc": 0.8631119864940912 }, { "epoch": 0.22198240975557115, "grad_norm": 0.3698655664920807, "learning_rate": 1.812841382911938e-05, "loss": 0.4095720648765564, "step": 4114, "token_acc": 0.8564043653072947 }, { "epoch": 0.22203636756056763, "grad_norm": 0.44201773405075073, "learning_rate": 1.812739577604209e-05, "loss": 0.408877432346344, "step": 4115, "token_acc": 0.8597602975616476 }, { "epoch": 0.22209032536556414, "grad_norm": 0.45184242725372314, "learning_rate": 1.8126377474756265e-05, "loss": 0.44334277510643005, "step": 4116, "token_acc": 0.8495019784418065 }, { "epoch": 0.22214428317056062, "grad_norm": 0.4937397241592407, "learning_rate": 1.8125358925293e-05, "loss": 0.4133172333240509, "step": 4117, "token_acc": 0.8632478632478633 }, { "epoch": 0.22219824097555713, "grad_norm": 0.5817916393280029, "learning_rate": 1.8124340127683405e-05, "loss": 0.43558937311172485, "step": 4118, "token_acc": 0.8529642716921869 }, { "epoch": 0.2222521987805536, "grad_norm": 0.3948866128921509, "learning_rate": 1.8123321081958587e-05, "loss": 0.3980712890625, "step": 4119, "token_acc": 0.8611111111111112 }, { "epoch": 0.2223061565855501, "grad_norm": 0.4116281569004059, "learning_rate": 1.812230178814967e-05, "loss": 0.3540763556957245, "step": 4120, "token_acc": 0.8757529065695475 }, { "epoch": 0.2223601143905466, "grad_norm": 0.40491923689842224, "learning_rate": 1.812128224628779e-05, "loss": 0.378650426864624, "step": 4121, "token_acc": 0.8626153578269198 }, { "epoch": 0.22241407219554307, "grad_norm": 0.44732916355133057, "learning_rate": 1.8120262456404072e-05, "loss": 0.429688036441803, "step": 4122, "token_acc": 0.854644495412844 }, { "epoch": 0.22246803000053958, "grad_norm": 0.5418030619621277, "learning_rate": 1.8119242418529668e-05, "loss": 0.4091644287109375, "step": 4123, "token_acc": 0.8603266417148691 }, { "epoch": 0.22252198780553606, "grad_norm": 0.4087655544281006, "learning_rate": 1.8118222132695726e-05, "loss": 0.4172351360321045, "step": 4124, "token_acc": 0.8599293286219081 }, { "epoch": 0.22257594561053257, "grad_norm": 0.5048866868019104, "learning_rate": 1.811720159893341e-05, "loss": 0.4556400179862976, "step": 4125, "token_acc": 0.8494331789877055 }, { "epoch": 0.22262990341552905, "grad_norm": 0.4193824529647827, "learning_rate": 1.811618081727388e-05, "loss": 0.3703731596469879, "step": 4126, "token_acc": 0.8749438370525685 }, { "epoch": 0.22268386122052555, "grad_norm": 0.316907674074173, "learning_rate": 1.8115159787748315e-05, "loss": 0.38289496302604675, "step": 4127, "token_acc": 0.8678943154523618 }, { "epoch": 0.22273781902552203, "grad_norm": 0.48822158575057983, "learning_rate": 1.81141385103879e-05, "loss": 0.43785107135772705, "step": 4128, "token_acc": 0.853542425091011 }, { "epoch": 0.22279177683051854, "grad_norm": 0.5608576536178589, "learning_rate": 1.8113116985223813e-05, "loss": 0.49711596965789795, "step": 4129, "token_acc": 0.829153605015674 }, { "epoch": 0.22284573463551502, "grad_norm": 0.4044996201992035, "learning_rate": 1.8112095212287265e-05, "loss": 0.4785948097705841, "step": 4130, "token_acc": 0.8431719099799694 }, { "epoch": 0.22289969244051153, "grad_norm": 0.5052391290664673, "learning_rate": 1.811107319160945e-05, "loss": 0.3689936399459839, "step": 4131, "token_acc": 0.8710033076074972 }, { "epoch": 0.222953650245508, "grad_norm": 0.4650820195674896, "learning_rate": 1.8110050923221587e-05, "loss": 0.42126381397247314, "step": 4132, "token_acc": 0.8558454425363277 }, { "epoch": 0.2230076080505045, "grad_norm": 0.5222991108894348, "learning_rate": 1.8109028407154893e-05, "loss": 0.3624085485935211, "step": 4133, "token_acc": 0.8749779580320931 }, { "epoch": 0.223061565855501, "grad_norm": 0.5796713829040527, "learning_rate": 1.8108005643440596e-05, "loss": 0.4228202700614929, "step": 4134, "token_acc": 0.8526315789473684 }, { "epoch": 0.2231155236604975, "grad_norm": 0.44468578696250916, "learning_rate": 1.810698263210993e-05, "loss": 0.36562836170196533, "step": 4135, "token_acc": 0.8712041884816754 }, { "epoch": 0.22316948146549398, "grad_norm": 0.4412277638912201, "learning_rate": 1.8105959373194138e-05, "loss": 0.4153112769126892, "step": 4136, "token_acc": 0.8584542493337282 }, { "epoch": 0.2232234392704905, "grad_norm": 0.2938624620437622, "learning_rate": 1.810493586672447e-05, "loss": 0.347286581993103, "step": 4137, "token_acc": 0.8788582486695694 }, { "epoch": 0.22327739707548697, "grad_norm": 0.46346816420555115, "learning_rate": 1.8103912112732185e-05, "loss": 0.36409080028533936, "step": 4138, "token_acc": 0.869727257326783 }, { "epoch": 0.22333135488048347, "grad_norm": 0.3758333623409271, "learning_rate": 1.8102888111248548e-05, "loss": 0.3768809735774994, "step": 4139, "token_acc": 0.8649220195611949 }, { "epoch": 0.22338531268547995, "grad_norm": 0.34471017122268677, "learning_rate": 1.810186386230483e-05, "loss": 0.36884891986846924, "step": 4140, "token_acc": 0.8702822708531557 }, { "epoch": 0.22343927049047646, "grad_norm": 0.4590306282043457, "learning_rate": 1.810083936593231e-05, "loss": 0.4618891179561615, "step": 4141, "token_acc": 0.8475056689342404 }, { "epoch": 0.22349322829547294, "grad_norm": 0.5306953191757202, "learning_rate": 1.809981462216228e-05, "loss": 0.4344981908798218, "step": 4142, "token_acc": 0.8547375371409707 }, { "epoch": 0.22354718610046942, "grad_norm": 0.4021781086921692, "learning_rate": 1.809878963102603e-05, "loss": 0.4599221348762512, "step": 4143, "token_acc": 0.8415788771836245 }, { "epoch": 0.22360114390546593, "grad_norm": 0.4446215033531189, "learning_rate": 1.8097764392554868e-05, "loss": 0.448015421628952, "step": 4144, "token_acc": 0.8529862174578867 }, { "epoch": 0.2236551017104624, "grad_norm": 0.4559081196784973, "learning_rate": 1.8096738906780103e-05, "loss": 0.35649117827415466, "step": 4145, "token_acc": 0.8766630708378281 }, { "epoch": 0.2237090595154589, "grad_norm": 0.3760671615600586, "learning_rate": 1.8095713173733057e-05, "loss": 0.43277642130851746, "step": 4146, "token_acc": 0.8551372048500319 }, { "epoch": 0.2237630173204554, "grad_norm": 0.379475474357605, "learning_rate": 1.8094687193445045e-05, "loss": 0.38269105553627014, "step": 4147, "token_acc": 0.8622062923138192 }, { "epoch": 0.2238169751254519, "grad_norm": 0.5361934900283813, "learning_rate": 1.809366096594741e-05, "loss": 0.43258851766586304, "step": 4148, "token_acc": 0.8530522966792603 }, { "epoch": 0.22387093293044838, "grad_norm": 0.3413238227367401, "learning_rate": 1.809263449127149e-05, "loss": 0.35457471013069153, "step": 4149, "token_acc": 0.8738046715786173 }, { "epoch": 0.2239248907354449, "grad_norm": 0.4739707112312317, "learning_rate": 1.809160776944863e-05, "loss": 0.43942153453826904, "step": 4150, "token_acc": 0.8507953922106418 }, { "epoch": 0.22397884854044137, "grad_norm": 0.44862350821495056, "learning_rate": 1.809058080051019e-05, "loss": 0.3882107734680176, "step": 4151, "token_acc": 0.8649847094801223 }, { "epoch": 0.22403280634543787, "grad_norm": 0.38523945212364197, "learning_rate": 1.808955358448753e-05, "loss": 0.4282247722148895, "step": 4152, "token_acc": 0.8540833137208755 }, { "epoch": 0.22408676415043435, "grad_norm": 0.4249350130558014, "learning_rate": 1.8088526121412028e-05, "loss": 0.3724777102470398, "step": 4153, "token_acc": 0.8706857573474002 }, { "epoch": 0.22414072195543086, "grad_norm": 0.5063494443893433, "learning_rate": 1.8087498411315053e-05, "loss": 0.40840762853622437, "step": 4154, "token_acc": 0.8586189082142243 }, { "epoch": 0.22419467976042734, "grad_norm": 0.4018750786781311, "learning_rate": 1.8086470454227993e-05, "loss": 0.43955641984939575, "step": 4155, "token_acc": 0.8483117755753402 }, { "epoch": 0.22424863756542385, "grad_norm": 0.44440582394599915, "learning_rate": 1.8085442250182246e-05, "loss": 0.44906681776046753, "step": 4156, "token_acc": 0.8484610552763819 }, { "epoch": 0.22430259537042033, "grad_norm": 0.4251209795475006, "learning_rate": 1.8084413799209214e-05, "loss": 0.4410459101200104, "step": 4157, "token_acc": 0.8490701859628075 }, { "epoch": 0.22435655317541683, "grad_norm": 0.42275911569595337, "learning_rate": 1.80833851013403e-05, "loss": 0.46057945489883423, "step": 4158, "token_acc": 0.842391304347826 }, { "epoch": 0.2244105109804133, "grad_norm": 0.4529253840446472, "learning_rate": 1.808235615660692e-05, "loss": 0.3527713119983673, "step": 4159, "token_acc": 0.8808645229309436 }, { "epoch": 0.22446446878540982, "grad_norm": 0.5425334572792053, "learning_rate": 1.8081326965040504e-05, "loss": 0.45712679624557495, "step": 4160, "token_acc": 0.8507649078988448 }, { "epoch": 0.2245184265904063, "grad_norm": 0.33317455649375916, "learning_rate": 1.8080297526672478e-05, "loss": 0.37458252906799316, "step": 4161, "token_acc": 0.8666205145955925 }, { "epoch": 0.2245723843954028, "grad_norm": 0.4197564125061035, "learning_rate": 1.8079267841534278e-05, "loss": 0.3214259445667267, "step": 4162, "token_acc": 0.8912371134020619 }, { "epoch": 0.2246263422003993, "grad_norm": 0.40677526593208313, "learning_rate": 1.807823790965736e-05, "loss": 0.39762070775032043, "step": 4163, "token_acc": 0.8624736286919831 }, { "epoch": 0.22468030000539577, "grad_norm": 0.303067147731781, "learning_rate": 1.807720773107317e-05, "loss": 0.36478710174560547, "step": 4164, "token_acc": 0.8728293510902206 }, { "epoch": 0.22473425781039227, "grad_norm": 0.4804857075214386, "learning_rate": 1.8076177305813174e-05, "loss": 0.4306643009185791, "step": 4165, "token_acc": 0.8507660577489687 }, { "epoch": 0.22478821561538875, "grad_norm": 0.45616424083709717, "learning_rate": 1.8075146633908838e-05, "loss": 0.43306979537010193, "step": 4166, "token_acc": 0.8552490146900752 }, { "epoch": 0.22484217342038526, "grad_norm": 0.48708513379096985, "learning_rate": 1.8074115715391638e-05, "loss": 0.4318724274635315, "step": 4167, "token_acc": 0.8561881188118812 }, { "epoch": 0.22489613122538174, "grad_norm": 0.4153985381126404, "learning_rate": 1.807308455029306e-05, "loss": 0.4369008541107178, "step": 4168, "token_acc": 0.8520760198523181 }, { "epoch": 0.22495008903037825, "grad_norm": 0.4499325454235077, "learning_rate": 1.8072053138644597e-05, "loss": 0.4169866740703583, "step": 4169, "token_acc": 0.8582439678284183 }, { "epoch": 0.22500404683537473, "grad_norm": 0.3894256353378296, "learning_rate": 1.8071021480477743e-05, "loss": 0.4104239344596863, "step": 4170, "token_acc": 0.8602773988176444 }, { "epoch": 0.22505800464037123, "grad_norm": 0.4204785227775574, "learning_rate": 1.8069989575824007e-05, "loss": 0.44837862253189087, "step": 4171, "token_acc": 0.8519807923169268 }, { "epoch": 0.2251119624453677, "grad_norm": 0.44988197088241577, "learning_rate": 1.8068957424714905e-05, "loss": 0.3968903422355652, "step": 4172, "token_acc": 0.867068216837092 }, { "epoch": 0.22516592025036422, "grad_norm": 0.486461877822876, "learning_rate": 1.8067925027181957e-05, "loss": 0.43782714009284973, "step": 4173, "token_acc": 0.8505670446964643 }, { "epoch": 0.2252198780553607, "grad_norm": 0.34880924224853516, "learning_rate": 1.8066892383256692e-05, "loss": 0.3994641602039337, "step": 4174, "token_acc": 0.8639183251486172 }, { "epoch": 0.2252738358603572, "grad_norm": 0.4394029378890991, "learning_rate": 1.8065859492970645e-05, "loss": 0.3968248665332794, "step": 4175, "token_acc": 0.8580246913580247 }, { "epoch": 0.2253277936653537, "grad_norm": 0.3522910177707672, "learning_rate": 1.806482635635536e-05, "loss": 0.41045913100242615, "step": 4176, "token_acc": 0.8626591230551627 }, { "epoch": 0.2253817514703502, "grad_norm": 0.3833593726158142, "learning_rate": 1.80637929734424e-05, "loss": 0.4447546601295471, "step": 4177, "token_acc": 0.8506143070386956 }, { "epoch": 0.22543570927534667, "grad_norm": 0.4461929500102997, "learning_rate": 1.8062759344263304e-05, "loss": 0.3888666331768036, "step": 4178, "token_acc": 0.8660498793242156 }, { "epoch": 0.22548966708034318, "grad_norm": 0.31819087266921997, "learning_rate": 1.8061725468849655e-05, "loss": 0.39010006189346313, "step": 4179, "token_acc": 0.8650530503978779 }, { "epoch": 0.22554362488533966, "grad_norm": 0.4540398418903351, "learning_rate": 1.8060691347233025e-05, "loss": 0.47077155113220215, "step": 4180, "token_acc": 0.8443534230544177 }, { "epoch": 0.22559758269033617, "grad_norm": 0.33382123708724976, "learning_rate": 1.805965697944499e-05, "loss": 0.3683502674102783, "step": 4181, "token_acc": 0.8697449220595181 }, { "epoch": 0.22565154049533265, "grad_norm": 0.38959842920303345, "learning_rate": 1.8058622365517143e-05, "loss": 0.44364452362060547, "step": 4182, "token_acc": 0.8526672311600338 }, { "epoch": 0.22570549830032915, "grad_norm": 0.3825739324092865, "learning_rate": 1.8057587505481076e-05, "loss": 0.4581860899925232, "step": 4183, "token_acc": 0.8516653313494335 }, { "epoch": 0.22575945610532563, "grad_norm": 0.31839123368263245, "learning_rate": 1.8056552399368402e-05, "loss": 0.39265596866607666, "step": 4184, "token_acc": 0.8642303689140742 }, { "epoch": 0.22581341391032214, "grad_norm": 0.4172237515449524, "learning_rate": 1.8055517047210726e-05, "loss": 0.39833545684814453, "step": 4185, "token_acc": 0.8614326455292736 }, { "epoch": 0.22586737171531862, "grad_norm": 0.348908394575119, "learning_rate": 1.8054481449039673e-05, "loss": 0.3896767497062683, "step": 4186, "token_acc": 0.865633423180593 }, { "epoch": 0.2259213295203151, "grad_norm": 0.4049991965293884, "learning_rate": 1.805344560488686e-05, "loss": 0.3963931202888489, "step": 4187, "token_acc": 0.8635972461273667 }, { "epoch": 0.2259752873253116, "grad_norm": 0.4145544171333313, "learning_rate": 1.8052409514783936e-05, "loss": 0.43631255626678467, "step": 4188, "token_acc": 0.853423882286361 }, { "epoch": 0.2260292451303081, "grad_norm": 0.452017605304718, "learning_rate": 1.8051373178762527e-05, "loss": 0.4405363202095032, "step": 4189, "token_acc": 0.8461774639238563 }, { "epoch": 0.2260832029353046, "grad_norm": 0.38777539134025574, "learning_rate": 1.8050336596854293e-05, "loss": 0.41335195302963257, "step": 4190, "token_acc": 0.8573861700919723 }, { "epoch": 0.22613716074030107, "grad_norm": 0.49096783995628357, "learning_rate": 1.8049299769090894e-05, "loss": 0.4633154273033142, "step": 4191, "token_acc": 0.8440907751252579 }, { "epoch": 0.22619111854529758, "grad_norm": 0.42528730630874634, "learning_rate": 1.8048262695503978e-05, "loss": 0.39197468757629395, "step": 4192, "token_acc": 0.8649619903248099 }, { "epoch": 0.22624507635029406, "grad_norm": 0.36419156193733215, "learning_rate": 1.804722537612524e-05, "loss": 0.3543844521045685, "step": 4193, "token_acc": 0.8745810548597637 }, { "epoch": 0.22629903415529057, "grad_norm": 0.40214213728904724, "learning_rate": 1.8046187810986337e-05, "loss": 0.4277936816215515, "step": 4194, "token_acc": 0.8581963249516441 }, { "epoch": 0.22635299196028705, "grad_norm": 0.45194071531295776, "learning_rate": 1.8045150000118972e-05, "loss": 0.41041678190231323, "step": 4195, "token_acc": 0.8621992165640738 }, { "epoch": 0.22640694976528355, "grad_norm": 0.321658730506897, "learning_rate": 1.804411194355483e-05, "loss": 0.44703373312950134, "step": 4196, "token_acc": 0.8480515227595241 }, { "epoch": 0.22646090757028003, "grad_norm": 0.3432711958885193, "learning_rate": 1.8043073641325615e-05, "loss": 0.3430948555469513, "step": 4197, "token_acc": 0.875249500998004 }, { "epoch": 0.22651486537527654, "grad_norm": 0.3186691701412201, "learning_rate": 1.8042035093463042e-05, "loss": 0.41475558280944824, "step": 4198, "token_acc": 0.8595931392102114 }, { "epoch": 0.22656882318027302, "grad_norm": 0.5230706334114075, "learning_rate": 1.8040996299998824e-05, "loss": 0.4396975040435791, "step": 4199, "token_acc": 0.8462926755903143 }, { "epoch": 0.22662278098526953, "grad_norm": 0.333344429731369, "learning_rate": 1.8039957260964684e-05, "loss": 0.34064045548439026, "step": 4200, "token_acc": 0.8802923387096774 }, { "epoch": 0.226676738790266, "grad_norm": 0.40723681449890137, "learning_rate": 1.8038917976392355e-05, "loss": 0.42123448848724365, "step": 4201, "token_acc": 0.8568682871678914 }, { "epoch": 0.22673069659526252, "grad_norm": 0.39437469840049744, "learning_rate": 1.8037878446313578e-05, "loss": 0.44048506021499634, "step": 4202, "token_acc": 0.8492706645056726 }, { "epoch": 0.226784654400259, "grad_norm": 0.38437286019325256, "learning_rate": 1.8036838670760096e-05, "loss": 0.45111149549484253, "step": 4203, "token_acc": 0.8510359609430816 }, { "epoch": 0.2268386122052555, "grad_norm": 0.429375022649765, "learning_rate": 1.8035798649763672e-05, "loss": 0.3079964220523834, "step": 4204, "token_acc": 0.8843792633015006 }, { "epoch": 0.22689257001025198, "grad_norm": 0.4086490273475647, "learning_rate": 1.803475838335606e-05, "loss": 0.40292543172836304, "step": 4205, "token_acc": 0.8566557960272754 }, { "epoch": 0.2269465278152485, "grad_norm": 0.4625053107738495, "learning_rate": 1.8033717871569028e-05, "loss": 0.388106107711792, "step": 4206, "token_acc": 0.8581835089894606 }, { "epoch": 0.22700048562024497, "grad_norm": 0.36997106671333313, "learning_rate": 1.8032677114434363e-05, "loss": 0.43349283933639526, "step": 4207, "token_acc": 0.8537977174340811 }, { "epoch": 0.22705444342524145, "grad_norm": 0.38989734649658203, "learning_rate": 1.8031636111983838e-05, "loss": 0.4245774745941162, "step": 4208, "token_acc": 0.8547062539481997 }, { "epoch": 0.22710840123023796, "grad_norm": 0.510091245174408, "learning_rate": 1.803059486424925e-05, "loss": 0.5051025748252869, "step": 4209, "token_acc": 0.8276444744846232 }, { "epoch": 0.22716235903523443, "grad_norm": 0.4059855341911316, "learning_rate": 1.8029553371262404e-05, "loss": 0.3493887782096863, "step": 4210, "token_acc": 0.8801737353091467 }, { "epoch": 0.22721631684023094, "grad_norm": 0.4131545424461365, "learning_rate": 1.8028511633055097e-05, "loss": 0.40835505723953247, "step": 4211, "token_acc": 0.860044785668586 }, { "epoch": 0.22727027464522742, "grad_norm": 0.40784645080566406, "learning_rate": 1.8027469649659148e-05, "loss": 0.36038893461227417, "step": 4212, "token_acc": 0.874498997995992 }, { "epoch": 0.22732423245022393, "grad_norm": 0.5419365763664246, "learning_rate": 1.802642742110638e-05, "loss": 0.4093194603919983, "step": 4213, "token_acc": 0.8665721487082546 }, { "epoch": 0.2273781902552204, "grad_norm": 0.4352028965950012, "learning_rate": 1.802538494742862e-05, "loss": 0.4245701730251312, "step": 4214, "token_acc": 0.8528174936921783 }, { "epoch": 0.22743214806021692, "grad_norm": 0.42492035031318665, "learning_rate": 1.8024342228657707e-05, "loss": 0.4168720841407776, "step": 4215, "token_acc": 0.856305766413637 }, { "epoch": 0.2274861058652134, "grad_norm": 0.49514415860176086, "learning_rate": 1.8023299264825486e-05, "loss": 0.4823736548423767, "step": 4216, "token_acc": 0.8396916123886747 }, { "epoch": 0.2275400636702099, "grad_norm": 0.42532047629356384, "learning_rate": 1.8022256055963807e-05, "loss": 0.40655070543289185, "step": 4217, "token_acc": 0.8642314530467109 }, { "epoch": 0.22759402147520638, "grad_norm": 0.31248876452445984, "learning_rate": 1.8021212602104527e-05, "loss": 0.3953811824321747, "step": 4218, "token_acc": 0.8639652677279306 }, { "epoch": 0.2276479792802029, "grad_norm": 0.44912517070770264, "learning_rate": 1.8020168903279516e-05, "loss": 0.4222285747528076, "step": 4219, "token_acc": 0.8553849902534113 }, { "epoch": 0.22770193708519937, "grad_norm": 0.33203476667404175, "learning_rate": 1.801912495952065e-05, "loss": 0.3361348509788513, "step": 4220, "token_acc": 0.8796125461254612 }, { "epoch": 0.22775589489019588, "grad_norm": 0.3869483172893524, "learning_rate": 1.8018080770859808e-05, "loss": 0.4017966389656067, "step": 4221, "token_acc": 0.8619414483821264 }, { "epoch": 0.22780985269519236, "grad_norm": 0.4879925847053528, "learning_rate": 1.8017036337328878e-05, "loss": 0.44298794865608215, "step": 4222, "token_acc": 0.8452787258248009 }, { "epoch": 0.22786381050018886, "grad_norm": 0.35397621989250183, "learning_rate": 1.8015991658959758e-05, "loss": 0.3497769832611084, "step": 4223, "token_acc": 0.8765698852126941 }, { "epoch": 0.22791776830518534, "grad_norm": 0.4261466860771179, "learning_rate": 1.8014946735784353e-05, "loss": 0.4614799916744232, "step": 4224, "token_acc": 0.8474178403755869 }, { "epoch": 0.22797172611018185, "grad_norm": 0.31873974204063416, "learning_rate": 1.8013901567834574e-05, "loss": 0.37560731172561646, "step": 4225, "token_acc": 0.8715238454762804 }, { "epoch": 0.22802568391517833, "grad_norm": 0.35688820481300354, "learning_rate": 1.8012856155142346e-05, "loss": 0.38890254497528076, "step": 4226, "token_acc": 0.8652155713687736 }, { "epoch": 0.22807964172017484, "grad_norm": 0.5093340873718262, "learning_rate": 1.8011810497739582e-05, "loss": 0.46531909704208374, "step": 4227, "token_acc": 0.8423207227555054 }, { "epoch": 0.22813359952517132, "grad_norm": 0.3556145429611206, "learning_rate": 1.801076459565823e-05, "loss": 0.39563876390457153, "step": 4228, "token_acc": 0.8668089647812166 }, { "epoch": 0.2281875573301678, "grad_norm": 0.33650434017181396, "learning_rate": 1.8009718448930224e-05, "loss": 0.3866056203842163, "step": 4229, "token_acc": 0.8680130849096857 }, { "epoch": 0.2282415151351643, "grad_norm": 0.4753347933292389, "learning_rate": 1.8008672057587513e-05, "loss": 0.37202388048171997, "step": 4230, "token_acc": 0.869062119366626 }, { "epoch": 0.22829547294016078, "grad_norm": 0.4137212634086609, "learning_rate": 1.8007625421662056e-05, "loss": 0.42315059900283813, "step": 4231, "token_acc": 0.8543130254520327 }, { "epoch": 0.2283494307451573, "grad_norm": 0.35203325748443604, "learning_rate": 1.8006578541185818e-05, "loss": 0.41038942337036133, "step": 4232, "token_acc": 0.8580502981591911 }, { "epoch": 0.22840338855015377, "grad_norm": 0.41127124428749084, "learning_rate": 1.800553141619077e-05, "loss": 0.39533981680870056, "step": 4233, "token_acc": 0.8636441696719904 }, { "epoch": 0.22845734635515028, "grad_norm": 0.4238082468509674, "learning_rate": 1.8004484046708886e-05, "loss": 0.3962010145187378, "step": 4234, "token_acc": 0.86522374186695 }, { "epoch": 0.22851130416014676, "grad_norm": 0.37460818886756897, "learning_rate": 1.8003436432772158e-05, "loss": 0.4051513075828552, "step": 4235, "token_acc": 0.860394073621506 }, { "epoch": 0.22856526196514326, "grad_norm": 0.38856440782546997, "learning_rate": 1.8002388574412577e-05, "loss": 0.4005913734436035, "step": 4236, "token_acc": 0.8572565969062784 }, { "epoch": 0.22861921977013974, "grad_norm": 0.4226197600364685, "learning_rate": 1.8001340471662144e-05, "loss": 0.34566688537597656, "step": 4237, "token_acc": 0.8781619518036222 }, { "epoch": 0.22867317757513625, "grad_norm": 0.4734206795692444, "learning_rate": 1.800029212455287e-05, "loss": 0.4440106153488159, "step": 4238, "token_acc": 0.843443354148082 }, { "epoch": 0.22872713538013273, "grad_norm": 0.44653087854385376, "learning_rate": 1.7999243533116773e-05, "loss": 0.45757192373275757, "step": 4239, "token_acc": 0.8482525659347798 }, { "epoch": 0.22878109318512924, "grad_norm": 0.49160829186439514, "learning_rate": 1.7998194697385875e-05, "loss": 0.4483218789100647, "step": 4240, "token_acc": 0.8532904848400168 }, { "epoch": 0.22883505099012572, "grad_norm": 0.46670475602149963, "learning_rate": 1.7997145617392202e-05, "loss": 0.3527721166610718, "step": 4241, "token_acc": 0.8728497706422018 }, { "epoch": 0.22888900879512222, "grad_norm": 0.5379999279975891, "learning_rate": 1.7996096293167798e-05, "loss": 0.4161665439605713, "step": 4242, "token_acc": 0.862144420131291 }, { "epoch": 0.2289429666001187, "grad_norm": 0.4335636794567108, "learning_rate": 1.7995046724744706e-05, "loss": 0.39805227518081665, "step": 4243, "token_acc": 0.8575019638648861 }, { "epoch": 0.2289969244051152, "grad_norm": 0.4733755886554718, "learning_rate": 1.7993996912154985e-05, "loss": 0.4247482717037201, "step": 4244, "token_acc": 0.8531050955414012 }, { "epoch": 0.2290508822101117, "grad_norm": 0.31236007809638977, "learning_rate": 1.7992946855430694e-05, "loss": 0.36236241459846497, "step": 4245, "token_acc": 0.8698893698893699 }, { "epoch": 0.2291048400151082, "grad_norm": 0.3639155328273773, "learning_rate": 1.7991896554603894e-05, "loss": 0.4065781533718109, "step": 4246, "token_acc": 0.8620617477760335 }, { "epoch": 0.22915879782010468, "grad_norm": 0.44178977608680725, "learning_rate": 1.7990846009706672e-05, "loss": 0.3605600893497467, "step": 4247, "token_acc": 0.8718443197755961 }, { "epoch": 0.22921275562510118, "grad_norm": 0.408366858959198, "learning_rate": 1.7989795220771103e-05, "loss": 0.425400972366333, "step": 4248, "token_acc": 0.8540561031084155 }, { "epoch": 0.22926671343009766, "grad_norm": 0.45121824741363525, "learning_rate": 1.798874418782928e-05, "loss": 0.3490447402000427, "step": 4249, "token_acc": 0.8748518204911092 }, { "epoch": 0.22932067123509417, "grad_norm": 0.3978724777698517, "learning_rate": 1.7987692910913304e-05, "loss": 0.36831119656562805, "step": 4250, "token_acc": 0.8782415411212645 }, { "epoch": 0.22937462904009065, "grad_norm": 0.4890492260456085, "learning_rate": 1.798664139005528e-05, "loss": 0.40342459082603455, "step": 4251, "token_acc": 0.8626998223801066 }, { "epoch": 0.22942858684508713, "grad_norm": 0.38113391399383545, "learning_rate": 1.798558962528732e-05, "loss": 0.4000183939933777, "step": 4252, "token_acc": 0.8637674728200577 }, { "epoch": 0.22948254465008364, "grad_norm": 0.41845738887786865, "learning_rate": 1.7984537616641546e-05, "loss": 0.4406493902206421, "step": 4253, "token_acc": 0.8514675767918088 }, { "epoch": 0.22953650245508012, "grad_norm": 0.35609573125839233, "learning_rate": 1.7983485364150083e-05, "loss": 0.38592731952667236, "step": 4254, "token_acc": 0.8614165779827362 }, { "epoch": 0.22959046026007662, "grad_norm": 0.4128319025039673, "learning_rate": 1.798243286784507e-05, "loss": 0.336983323097229, "step": 4255, "token_acc": 0.8767178658043654 }, { "epoch": 0.2296444180650731, "grad_norm": 0.41538020968437195, "learning_rate": 1.7981380127758642e-05, "loss": 0.40122270584106445, "step": 4256, "token_acc": 0.8637688841770474 }, { "epoch": 0.2296983758700696, "grad_norm": 0.453637957572937, "learning_rate": 1.798032714392296e-05, "loss": 0.3882802724838257, "step": 4257, "token_acc": 0.8662076665426126 }, { "epoch": 0.2297523336750661, "grad_norm": 0.5607038140296936, "learning_rate": 1.7979273916370182e-05, "loss": 0.46914201974868774, "step": 4258, "token_acc": 0.8488448844884489 }, { "epoch": 0.2298062914800626, "grad_norm": 0.4426459074020386, "learning_rate": 1.7978220445132465e-05, "loss": 0.408162385225296, "step": 4259, "token_acc": 0.8654353562005277 }, { "epoch": 0.22986024928505908, "grad_norm": 0.37883007526397705, "learning_rate": 1.7977166730241986e-05, "loss": 0.4336881637573242, "step": 4260, "token_acc": 0.8504742885671492 }, { "epoch": 0.22991420709005558, "grad_norm": 0.3794102072715759, "learning_rate": 1.7976112771730924e-05, "loss": 0.4726000428199768, "step": 4261, "token_acc": 0.8402506799101336 }, { "epoch": 0.22996816489505206, "grad_norm": 0.4171156883239746, "learning_rate": 1.7975058569631468e-05, "loss": 0.499645859003067, "step": 4262, "token_acc": 0.8349069702860317 }, { "epoch": 0.23002212270004857, "grad_norm": 0.31095361709594727, "learning_rate": 1.7974004123975813e-05, "loss": 0.4111519753932953, "step": 4263, "token_acc": 0.8566886213945037 }, { "epoch": 0.23007608050504505, "grad_norm": 0.3681529462337494, "learning_rate": 1.797294943479616e-05, "loss": 0.4391859471797943, "step": 4264, "token_acc": 0.8527722553567484 }, { "epoch": 0.23013003831004156, "grad_norm": 0.4132155776023865, "learning_rate": 1.797189450212472e-05, "loss": 0.4167041778564453, "step": 4265, "token_acc": 0.8568615384615385 }, { "epoch": 0.23018399611503804, "grad_norm": 0.45466336607933044, "learning_rate": 1.797083932599371e-05, "loss": 0.3748003840446472, "step": 4266, "token_acc": 0.8727099690697121 }, { "epoch": 0.23023795392003454, "grad_norm": 0.42962706089019775, "learning_rate": 1.7969783906435353e-05, "loss": 0.46771010756492615, "step": 4267, "token_acc": 0.8383024442653774 }, { "epoch": 0.23029191172503102, "grad_norm": 0.3700888752937317, "learning_rate": 1.7968728243481887e-05, "loss": 0.42291975021362305, "step": 4268, "token_acc": 0.855980570734669 }, { "epoch": 0.23034586953002753, "grad_norm": 0.39908480644226074, "learning_rate": 1.7967672337165548e-05, "loss": 0.4020758867263794, "step": 4269, "token_acc": 0.8587479935794543 }, { "epoch": 0.230399827335024, "grad_norm": 0.45177406072616577, "learning_rate": 1.7966616187518582e-05, "loss": 0.421074241399765, "step": 4270, "token_acc": 0.860887415127112 }, { "epoch": 0.23045378514002052, "grad_norm": 0.39849936962127686, "learning_rate": 1.7965559794573243e-05, "loss": 0.3573240041732788, "step": 4271, "token_acc": 0.8701776852889387 }, { "epoch": 0.230507742945017, "grad_norm": 0.3580402731895447, "learning_rate": 1.7964503158361797e-05, "loss": 0.35981541872024536, "step": 4272, "token_acc": 0.8722358722358723 }, { "epoch": 0.23056170075001348, "grad_norm": 0.45039603114128113, "learning_rate": 1.796344627891651e-05, "loss": 0.4244900643825531, "step": 4273, "token_acc": 0.8582688053097345 }, { "epoch": 0.23061565855500998, "grad_norm": 0.5093483328819275, "learning_rate": 1.7962389156269657e-05, "loss": 0.44788748025894165, "step": 4274, "token_acc": 0.8554251774590758 }, { "epoch": 0.23066961636000646, "grad_norm": 0.47527700662612915, "learning_rate": 1.796133179045353e-05, "loss": 0.37683090567588806, "step": 4275, "token_acc": 0.87037889246817 }, { "epoch": 0.23072357416500297, "grad_norm": 0.3596142828464508, "learning_rate": 1.796027418150041e-05, "loss": 0.4048866629600525, "step": 4276, "token_acc": 0.86321094312455 }, { "epoch": 0.23077753196999945, "grad_norm": 0.44332772493362427, "learning_rate": 1.7959216329442605e-05, "loss": 0.36935263872146606, "step": 4277, "token_acc": 0.8748802452577122 }, { "epoch": 0.23083148977499596, "grad_norm": 0.4077065885066986, "learning_rate": 1.7958158234312422e-05, "loss": 0.3937453627586365, "step": 4278, "token_acc": 0.8642985985847094 }, { "epoch": 0.23088544757999244, "grad_norm": 0.4142276644706726, "learning_rate": 1.7957099896142164e-05, "loss": 0.3800996243953705, "step": 4279, "token_acc": 0.8698356807511737 }, { "epoch": 0.23093940538498894, "grad_norm": 0.4612438678741455, "learning_rate": 1.7956041314964164e-05, "loss": 0.3883851170539856, "step": 4280, "token_acc": 0.8677988677988678 }, { "epoch": 0.23099336318998542, "grad_norm": 0.45189815759658813, "learning_rate": 1.795498249081075e-05, "loss": 0.42752063274383545, "step": 4281, "token_acc": 0.8507313829787234 }, { "epoch": 0.23104732099498193, "grad_norm": 0.4120579957962036, "learning_rate": 1.7953923423714246e-05, "loss": 0.45040735602378845, "step": 4282, "token_acc": 0.8489731888191672 }, { "epoch": 0.2311012787999784, "grad_norm": 0.43599846959114075, "learning_rate": 1.795286411370701e-05, "loss": 0.4055406451225281, "step": 4283, "token_acc": 0.8583954405962297 }, { "epoch": 0.23115523660497492, "grad_norm": 0.5022183060646057, "learning_rate": 1.7951804560821386e-05, "loss": 0.43612363934516907, "step": 4284, "token_acc": 0.8474745458175285 }, { "epoch": 0.2312091944099714, "grad_norm": 0.48094627261161804, "learning_rate": 1.7950744765089738e-05, "loss": 0.4357123374938965, "step": 4285, "token_acc": 0.8514607532558958 }, { "epoch": 0.2312631522149679, "grad_norm": 0.4781145453453064, "learning_rate": 1.7949684726544423e-05, "loss": 0.3757134675979614, "step": 4286, "token_acc": 0.8677275838466804 }, { "epoch": 0.23131711001996438, "grad_norm": 0.36998099088668823, "learning_rate": 1.794862444521782e-05, "loss": 0.3544262647628784, "step": 4287, "token_acc": 0.8742901681327246 }, { "epoch": 0.2313710678249609, "grad_norm": 0.46643853187561035, "learning_rate": 1.7947563921142306e-05, "loss": 0.43022292852401733, "step": 4288, "token_acc": 0.8559508633677135 }, { "epoch": 0.23142502562995737, "grad_norm": 0.4753096401691437, "learning_rate": 1.7946503154350274e-05, "loss": 0.45310497283935547, "step": 4289, "token_acc": 0.8477673605593516 }, { "epoch": 0.23147898343495388, "grad_norm": 0.4458692669868469, "learning_rate": 1.7945442144874117e-05, "loss": 0.38018548488616943, "step": 4290, "token_acc": 0.8645308180191901 }, { "epoch": 0.23153294123995036, "grad_norm": 0.4383488595485687, "learning_rate": 1.794438089274624e-05, "loss": 0.39718347787857056, "step": 4291, "token_acc": 0.8561460010735373 }, { "epoch": 0.23158689904494686, "grad_norm": 0.45200735330581665, "learning_rate": 1.794331939799905e-05, "loss": 0.4382818341255188, "step": 4292, "token_acc": 0.8469467133411124 }, { "epoch": 0.23164085684994334, "grad_norm": 0.39297646284103394, "learning_rate": 1.794225766066497e-05, "loss": 0.4220084846019745, "step": 4293, "token_acc": 0.8545977011494252 }, { "epoch": 0.23169481465493982, "grad_norm": 0.4161844849586487, "learning_rate": 1.794119568077642e-05, "loss": 0.4499402642250061, "step": 4294, "token_acc": 0.8493994943109987 }, { "epoch": 0.23174877245993633, "grad_norm": 0.477694571018219, "learning_rate": 1.7940133458365826e-05, "loss": 0.4197206497192383, "step": 4295, "token_acc": 0.8575685339690108 }, { "epoch": 0.2318027302649328, "grad_norm": 0.4008007347583771, "learning_rate": 1.7939070993465645e-05, "loss": 0.3788549304008484, "step": 4296, "token_acc": 0.868409833735967 }, { "epoch": 0.23185668806992932, "grad_norm": 0.41584518551826477, "learning_rate": 1.7938008286108314e-05, "loss": 0.4365716278553009, "step": 4297, "token_acc": 0.8482443158886048 }, { "epoch": 0.2319106458749258, "grad_norm": 0.36054983735084534, "learning_rate": 1.7936945336326288e-05, "loss": 0.3777630925178528, "step": 4298, "token_acc": 0.8699984430951269 }, { "epoch": 0.2319646036799223, "grad_norm": 0.5245702862739563, "learning_rate": 1.7935882144152034e-05, "loss": 0.45461946725845337, "step": 4299, "token_acc": 0.8454230131201931 }, { "epoch": 0.23201856148491878, "grad_norm": 0.48048001527786255, "learning_rate": 1.7934818709618015e-05, "loss": 0.4397471249103546, "step": 4300, "token_acc": 0.8518627595346783 }, { "epoch": 0.2320725192899153, "grad_norm": 0.3194105327129364, "learning_rate": 1.7933755032756713e-05, "loss": 0.37291914224624634, "step": 4301, "token_acc": 0.8649933165008169 }, { "epoch": 0.23212647709491177, "grad_norm": 0.37052974104881287, "learning_rate": 1.793269111360061e-05, "loss": 0.3660035729408264, "step": 4302, "token_acc": 0.8689138576779026 }, { "epoch": 0.23218043489990828, "grad_norm": 0.3122917413711548, "learning_rate": 1.79316269521822e-05, "loss": 0.36266788840293884, "step": 4303, "token_acc": 0.8749570987301224 }, { "epoch": 0.23223439270490476, "grad_norm": 0.3624105453491211, "learning_rate": 1.7930562548533976e-05, "loss": 0.4160544276237488, "step": 4304, "token_acc": 0.8591306223708338 }, { "epoch": 0.23228835050990126, "grad_norm": 0.4084450304508209, "learning_rate": 1.7929497902688453e-05, "loss": 0.41664159297943115, "step": 4305, "token_acc": 0.8576752440106478 }, { "epoch": 0.23234230831489774, "grad_norm": 0.39763057231903076, "learning_rate": 1.792843301467814e-05, "loss": 0.45326483249664307, "step": 4306, "token_acc": 0.8500071561471304 }, { "epoch": 0.23239626611989425, "grad_norm": 0.4071663022041321, "learning_rate": 1.792736788453556e-05, "loss": 0.3821934461593628, "step": 4307, "token_acc": 0.8646417959722681 }, { "epoch": 0.23245022392489073, "grad_norm": 0.37015751004219055, "learning_rate": 1.792630251229324e-05, "loss": 0.3776448369026184, "step": 4308, "token_acc": 0.8690222789839648 }, { "epoch": 0.23250418172988724, "grad_norm": 0.3724079430103302, "learning_rate": 1.792523689798372e-05, "loss": 0.3964790105819702, "step": 4309, "token_acc": 0.863167636171338 }, { "epoch": 0.23255813953488372, "grad_norm": 0.433921754360199, "learning_rate": 1.7924171041639544e-05, "loss": 0.4497356712818146, "step": 4310, "token_acc": 0.8486230165967537 }, { "epoch": 0.23261209733988022, "grad_norm": 0.455664724111557, "learning_rate": 1.7923104943293257e-05, "loss": 0.40721410512924194, "step": 4311, "token_acc": 0.8622972096041531 }, { "epoch": 0.2326660551448767, "grad_norm": 0.4810631573200226, "learning_rate": 1.7922038602977418e-05, "loss": 0.3858126401901245, "step": 4312, "token_acc": 0.8743693239152371 }, { "epoch": 0.2327200129498732, "grad_norm": 0.3959459066390991, "learning_rate": 1.79209720207246e-05, "loss": 0.4556795656681061, "step": 4313, "token_acc": 0.8463615023474178 }, { "epoch": 0.2327739707548697, "grad_norm": 0.41203874349594116, "learning_rate": 1.791990519656737e-05, "loss": 0.4567985534667969, "step": 4314, "token_acc": 0.8428298800436205 }, { "epoch": 0.2328279285598662, "grad_norm": 0.405842125415802, "learning_rate": 1.791883813053831e-05, "loss": 0.4217228889465332, "step": 4315, "token_acc": 0.8557812322705095 }, { "epoch": 0.23288188636486268, "grad_norm": 0.4218384921550751, "learning_rate": 1.7917770822670007e-05, "loss": 0.4103833734989166, "step": 4316, "token_acc": 0.8576857305719171 }, { "epoch": 0.23293584416985916, "grad_norm": 0.4040548801422119, "learning_rate": 1.791670327299506e-05, "loss": 0.3931928277015686, "step": 4317, "token_acc": 0.8659079016221873 }, { "epoch": 0.23298980197485566, "grad_norm": 0.3310251235961914, "learning_rate": 1.7915635481546065e-05, "loss": 0.3946211636066437, "step": 4318, "token_acc": 0.8651032076751624 }, { "epoch": 0.23304375977985214, "grad_norm": 0.5393021106719971, "learning_rate": 1.7914567448355636e-05, "loss": 0.4081171751022339, "step": 4319, "token_acc": 0.8632766458999832 }, { "epoch": 0.23309771758484865, "grad_norm": 0.5388458967208862, "learning_rate": 1.7913499173456394e-05, "loss": 0.372078001499176, "step": 4320, "token_acc": 0.8692191053828658 }, { "epoch": 0.23315167538984513, "grad_norm": 0.4089651107788086, "learning_rate": 1.7912430656880956e-05, "loss": 0.38132017850875854, "step": 4321, "token_acc": 0.8715261235508972 }, { "epoch": 0.23320563319484164, "grad_norm": 0.5941669344902039, "learning_rate": 1.791136189866196e-05, "loss": 0.4310513734817505, "step": 4322, "token_acc": 0.8513420509291122 }, { "epoch": 0.23325959099983812, "grad_norm": 0.4449816942214966, "learning_rate": 1.7910292898832046e-05, "loss": 0.4004395008087158, "step": 4323, "token_acc": 0.8605150214592274 }, { "epoch": 0.23331354880483463, "grad_norm": 0.47763490676879883, "learning_rate": 1.7909223657423857e-05, "loss": 0.40981245040893555, "step": 4324, "token_acc": 0.8603848706038487 }, { "epoch": 0.2333675066098311, "grad_norm": 0.3562223017215729, "learning_rate": 1.790815417447005e-05, "loss": 0.3757381737232208, "step": 4325, "token_acc": 0.8668587896253602 }, { "epoch": 0.2334214644148276, "grad_norm": 0.44932231307029724, "learning_rate": 1.790708445000329e-05, "loss": 0.3832266926765442, "step": 4326, "token_acc": 0.862872769891913 }, { "epoch": 0.2334754222198241, "grad_norm": 0.4616783857345581, "learning_rate": 1.7906014484056235e-05, "loss": 0.4091171622276306, "step": 4327, "token_acc": 0.8603901915160194 }, { "epoch": 0.2335293800248206, "grad_norm": 0.4247942268848419, "learning_rate": 1.7904944276661576e-05, "loss": 0.4357469081878662, "step": 4328, "token_acc": 0.853827606992164 }, { "epoch": 0.23358333782981708, "grad_norm": 0.5575767159461975, "learning_rate": 1.7903873827851984e-05, "loss": 0.4353766441345215, "step": 4329, "token_acc": 0.8549511854951185 }, { "epoch": 0.23363729563481359, "grad_norm": 0.4164562523365021, "learning_rate": 1.790280313766016e-05, "loss": 0.4256419837474823, "step": 4330, "token_acc": 0.8559340904030283 }, { "epoch": 0.23369125343981006, "grad_norm": 0.3934004306793213, "learning_rate": 1.7901732206118796e-05, "loss": 0.4020727276802063, "step": 4331, "token_acc": 0.8636305651231024 }, { "epoch": 0.23374521124480657, "grad_norm": 0.41011008620262146, "learning_rate": 1.79006610332606e-05, "loss": 0.44381213188171387, "step": 4332, "token_acc": 0.8494094488188977 }, { "epoch": 0.23379916904980305, "grad_norm": 0.39185449481010437, "learning_rate": 1.7899589619118286e-05, "loss": 0.4414519667625427, "step": 4333, "token_acc": 0.8486964618249534 }, { "epoch": 0.23385312685479956, "grad_norm": 0.5615269541740417, "learning_rate": 1.7898517963724578e-05, "loss": 0.43583759665489197, "step": 4334, "token_acc": 0.8553019717435181 }, { "epoch": 0.23390708465979604, "grad_norm": 0.44827887415885925, "learning_rate": 1.7897446067112197e-05, "loss": 0.35318201780319214, "step": 4335, "token_acc": 0.8720789074355083 }, { "epoch": 0.23396104246479255, "grad_norm": 0.4699915945529938, "learning_rate": 1.7896373929313883e-05, "loss": 0.388942688703537, "step": 4336, "token_acc": 0.8662008733624454 }, { "epoch": 0.23401500026978903, "grad_norm": 0.4088856875896454, "learning_rate": 1.7895301550362375e-05, "loss": 0.4076841175556183, "step": 4337, "token_acc": 0.854952652938524 }, { "epoch": 0.2340689580747855, "grad_norm": 0.41328009963035583, "learning_rate": 1.789422893029043e-05, "loss": 0.42796361446380615, "step": 4338, "token_acc": 0.8538178363614772 }, { "epoch": 0.234122915879782, "grad_norm": 0.5978410243988037, "learning_rate": 1.78931560691308e-05, "loss": 0.4089367985725403, "step": 4339, "token_acc": 0.8638031183124427 }, { "epoch": 0.2341768736847785, "grad_norm": 0.40635430812835693, "learning_rate": 1.789208296691625e-05, "loss": 0.42071765661239624, "step": 4340, "token_acc": 0.8512062256809338 }, { "epoch": 0.234230831489775, "grad_norm": 0.4380785822868347, "learning_rate": 1.7891009623679554e-05, "loss": 0.43918099999427795, "step": 4341, "token_acc": 0.8414355628058727 }, { "epoch": 0.23428478929477148, "grad_norm": 0.4522695541381836, "learning_rate": 1.7889936039453495e-05, "loss": 0.42220747470855713, "step": 4342, "token_acc": 0.8522026934489842 }, { "epoch": 0.23433874709976799, "grad_norm": 0.3437645435333252, "learning_rate": 1.7888862214270852e-05, "loss": 0.35122084617614746, "step": 4343, "token_acc": 0.8759219088937094 }, { "epoch": 0.23439270490476447, "grad_norm": 0.46417078375816345, "learning_rate": 1.7887788148164425e-05, "loss": 0.3849842846393585, "step": 4344, "token_acc": 0.8678320535860153 }, { "epoch": 0.23444666270976097, "grad_norm": 0.3971646726131439, "learning_rate": 1.7886713841167015e-05, "loss": 0.4410274922847748, "step": 4345, "token_acc": 0.8486214669672273 }, { "epoch": 0.23450062051475745, "grad_norm": 0.4668950140476227, "learning_rate": 1.7885639293311427e-05, "loss": 0.4375433921813965, "step": 4346, "token_acc": 0.8539765319426337 }, { "epoch": 0.23455457831975396, "grad_norm": 0.463440865278244, "learning_rate": 1.7884564504630483e-05, "loss": 0.407817542552948, "step": 4347, "token_acc": 0.8581860107609531 }, { "epoch": 0.23460853612475044, "grad_norm": 0.46953633427619934, "learning_rate": 1.788348947515701e-05, "loss": 0.4483221769332886, "step": 4348, "token_acc": 0.8496927682369624 }, { "epoch": 0.23466249392974695, "grad_norm": 0.47803011536598206, "learning_rate": 1.7882414204923824e-05, "loss": 0.3913307189941406, "step": 4349, "token_acc": 0.8634412797502926 }, { "epoch": 0.23471645173474343, "grad_norm": 0.44882887601852417, "learning_rate": 1.788133869396378e-05, "loss": 0.39863288402557373, "step": 4350, "token_acc": 0.8691605512797566 }, { "epoch": 0.23477040953973993, "grad_norm": 0.4423937499523163, "learning_rate": 1.7880262942309712e-05, "loss": 0.3843197226524353, "step": 4351, "token_acc": 0.8645149525893508 }, { "epoch": 0.2348243673447364, "grad_norm": 0.4601918160915375, "learning_rate": 1.7879186949994483e-05, "loss": 0.389143168926239, "step": 4352, "token_acc": 0.8694148169501297 }, { "epoch": 0.23487832514973292, "grad_norm": 0.5534675717353821, "learning_rate": 1.7878110717050945e-05, "loss": 0.37858980894088745, "step": 4353, "token_acc": 0.865867982147052 }, { "epoch": 0.2349322829547294, "grad_norm": 0.5504299402236938, "learning_rate": 1.7877034243511972e-05, "loss": 0.5149513483047485, "step": 4354, "token_acc": 0.8327503974562798 }, { "epoch": 0.2349862407597259, "grad_norm": 0.4269694685935974, "learning_rate": 1.7875957529410434e-05, "loss": 0.4479745626449585, "step": 4355, "token_acc": 0.8480422241315821 }, { "epoch": 0.23504019856472239, "grad_norm": 0.4879125654697418, "learning_rate": 1.7874880574779217e-05, "loss": 0.3689616918563843, "step": 4356, "token_acc": 0.8678832116788321 }, { "epoch": 0.2350941563697189, "grad_norm": 0.48349276185035706, "learning_rate": 1.7873803379651212e-05, "loss": 0.3811788260936737, "step": 4357, "token_acc": 0.8733497083205404 }, { "epoch": 0.23514811417471537, "grad_norm": 0.36777204275131226, "learning_rate": 1.7872725944059312e-05, "loss": 0.40428221225738525, "step": 4358, "token_acc": 0.8588343841304014 }, { "epoch": 0.23520207197971185, "grad_norm": 0.41557440161705017, "learning_rate": 1.7871648268036422e-05, "loss": 0.4343692660331726, "step": 4359, "token_acc": 0.8545379764189746 }, { "epoch": 0.23525602978470836, "grad_norm": 0.37519514560699463, "learning_rate": 1.787057035161546e-05, "loss": 0.4489549398422241, "step": 4360, "token_acc": 0.8514432522291069 }, { "epoch": 0.23530998758970484, "grad_norm": 0.41493070125579834, "learning_rate": 1.7869492194829342e-05, "loss": 0.43008413910865784, "step": 4361, "token_acc": 0.854875283446712 }, { "epoch": 0.23536394539470135, "grad_norm": 0.47341328859329224, "learning_rate": 1.7868413797710993e-05, "loss": 0.4124738574028015, "step": 4362, "token_acc": 0.8600424113904878 }, { "epoch": 0.23541790319969783, "grad_norm": 0.43331506848335266, "learning_rate": 1.786733516029335e-05, "loss": 0.3911600708961487, "step": 4363, "token_acc": 0.860803116571771 }, { "epoch": 0.23547186100469433, "grad_norm": 0.4766436815261841, "learning_rate": 1.7866256282609346e-05, "loss": 0.425373375415802, "step": 4364, "token_acc": 0.8539775760811532 }, { "epoch": 0.2355258188096908, "grad_norm": 0.3672964870929718, "learning_rate": 1.786517716469194e-05, "loss": 0.4080020785331726, "step": 4365, "token_acc": 0.8616126638634677 }, { "epoch": 0.23557977661468732, "grad_norm": 0.33528414368629456, "learning_rate": 1.7864097806574082e-05, "loss": 0.3411439061164856, "step": 4366, "token_acc": 0.8780586724347709 }, { "epoch": 0.2356337344196838, "grad_norm": 0.41276034712791443, "learning_rate": 1.786301820828874e-05, "loss": 0.3887937068939209, "step": 4367, "token_acc": 0.8663955414972134 }, { "epoch": 0.2356876922246803, "grad_norm": 0.49000129103660583, "learning_rate": 1.786193836986888e-05, "loss": 0.38217416405677795, "step": 4368, "token_acc": 0.8687252814116215 }, { "epoch": 0.23574165002967679, "grad_norm": 0.41686198115348816, "learning_rate": 1.786085829134748e-05, "loss": 0.3774392604827881, "step": 4369, "token_acc": 0.8651510432886951 }, { "epoch": 0.2357956078346733, "grad_norm": 0.43458279967308044, "learning_rate": 1.7859777972757528e-05, "loss": 0.41168373823165894, "step": 4370, "token_acc": 0.8529079616036138 }, { "epoch": 0.23584956563966977, "grad_norm": 0.4335038661956787, "learning_rate": 1.7858697414132017e-05, "loss": 0.3960488438606262, "step": 4371, "token_acc": 0.8582314881380302 }, { "epoch": 0.23590352344466628, "grad_norm": 0.42848628759384155, "learning_rate": 1.7857616615503945e-05, "loss": 0.4149545431137085, "step": 4372, "token_acc": 0.8613636363636363 }, { "epoch": 0.23595748124966276, "grad_norm": 0.4059930443763733, "learning_rate": 1.785653557690632e-05, "loss": 0.2975602149963379, "step": 4373, "token_acc": 0.8957194041108806 }, { "epoch": 0.23601143905465927, "grad_norm": 0.3801708519458771, "learning_rate": 1.7855454298372156e-05, "loss": 0.3455800712108612, "step": 4374, "token_acc": 0.8788838612368024 }, { "epoch": 0.23606539685965575, "grad_norm": 0.42193564772605896, "learning_rate": 1.7854372779934477e-05, "loss": 0.4314488470554352, "step": 4375, "token_acc": 0.8522693116922152 }, { "epoch": 0.23611935466465225, "grad_norm": 0.3119502663612366, "learning_rate": 1.785329102162631e-05, "loss": 0.38441526889801025, "step": 4376, "token_acc": 0.8672298575211197 }, { "epoch": 0.23617331246964873, "grad_norm": 0.36404019594192505, "learning_rate": 1.785220902348069e-05, "loss": 0.40508443117141724, "step": 4377, "token_acc": 0.8624572405929305 }, { "epoch": 0.23622727027464524, "grad_norm": 0.434068500995636, "learning_rate": 1.7851126785530668e-05, "loss": 0.4927588701248169, "step": 4378, "token_acc": 0.8384203480589023 }, { "epoch": 0.23628122807964172, "grad_norm": 0.4030050039291382, "learning_rate": 1.7850044307809287e-05, "loss": 0.38117748498916626, "step": 4379, "token_acc": 0.8713212273011898 }, { "epoch": 0.2363351858846382, "grad_norm": 0.41387882828712463, "learning_rate": 1.7848961590349612e-05, "loss": 0.376364529132843, "step": 4380, "token_acc": 0.8660741187104549 }, { "epoch": 0.2363891436896347, "grad_norm": 0.43607625365257263, "learning_rate": 1.7847878633184706e-05, "loss": 0.37959814071655273, "step": 4381, "token_acc": 0.8627450980392157 }, { "epoch": 0.23644310149463119, "grad_norm": 0.40428054332733154, "learning_rate": 1.784679543634764e-05, "loss": 0.3544366955757141, "step": 4382, "token_acc": 0.8738047138047138 }, { "epoch": 0.2364970592996277, "grad_norm": 0.5818629860877991, "learning_rate": 1.78457119998715e-05, "loss": 0.46520477533340454, "step": 4383, "token_acc": 0.843764855015053 }, { "epoch": 0.23655101710462417, "grad_norm": 0.41184839606285095, "learning_rate": 1.784462832378937e-05, "loss": 0.42195069789886475, "step": 4384, "token_acc": 0.8540556115833453 }, { "epoch": 0.23660497490962068, "grad_norm": 0.37271374464035034, "learning_rate": 1.7843544408134345e-05, "loss": 0.38892340660095215, "step": 4385, "token_acc": 0.8614747528720278 }, { "epoch": 0.23665893271461716, "grad_norm": 0.38205254077911377, "learning_rate": 1.784246025293953e-05, "loss": 0.40577155351638794, "step": 4386, "token_acc": 0.8592313489073097 }, { "epoch": 0.23671289051961367, "grad_norm": 0.4966987073421478, "learning_rate": 1.7841375858238028e-05, "loss": 0.4200565218925476, "step": 4387, "token_acc": 0.8531039640987285 }, { "epoch": 0.23676684832461015, "grad_norm": 0.3745121359825134, "learning_rate": 1.7840291224062967e-05, "loss": 0.45955899357795715, "step": 4388, "token_acc": 0.8464106844741235 }, { "epoch": 0.23682080612960665, "grad_norm": 0.3994835317134857, "learning_rate": 1.7839206350447464e-05, "loss": 0.41051656007766724, "step": 4389, "token_acc": 0.860090897924088 }, { "epoch": 0.23687476393460313, "grad_norm": 0.40584614872932434, "learning_rate": 1.7838121237424652e-05, "loss": 0.38697636127471924, "step": 4390, "token_acc": 0.8678585308977846 }, { "epoch": 0.23692872173959964, "grad_norm": 0.446748286485672, "learning_rate": 1.7837035885027675e-05, "loss": 0.34383177757263184, "step": 4391, "token_acc": 0.8775222237900381 }, { "epoch": 0.23698267954459612, "grad_norm": 0.43564385175704956, "learning_rate": 1.783595029328967e-05, "loss": 0.42329496145248413, "step": 4392, "token_acc": 0.8553277505053422 }, { "epoch": 0.23703663734959263, "grad_norm": 0.4723224937915802, "learning_rate": 1.7834864462243798e-05, "loss": 0.4366428852081299, "step": 4393, "token_acc": 0.8517076623606441 }, { "epoch": 0.2370905951545891, "grad_norm": 0.4471103549003601, "learning_rate": 1.7833778391923217e-05, "loss": 0.4254133105278015, "step": 4394, "token_acc": 0.8494269529706231 }, { "epoch": 0.2371445529595856, "grad_norm": 0.45785555243492126, "learning_rate": 1.7832692082361097e-05, "loss": 0.47420012950897217, "step": 4395, "token_acc": 0.8416940789473685 }, { "epoch": 0.2371985107645821, "grad_norm": 0.33118799328804016, "learning_rate": 1.783160553359061e-05, "loss": 0.38903677463531494, "step": 4396, "token_acc": 0.8677241740954379 }, { "epoch": 0.2372524685695786, "grad_norm": 0.4172709286212921, "learning_rate": 1.7830518745644947e-05, "loss": 0.3702002465724945, "step": 4397, "token_acc": 0.8696415141639509 }, { "epoch": 0.23730642637457508, "grad_norm": 0.39832112193107605, "learning_rate": 1.7829431718557288e-05, "loss": 0.439931720495224, "step": 4398, "token_acc": 0.8504569371365273 }, { "epoch": 0.2373603841795716, "grad_norm": 0.3865768611431122, "learning_rate": 1.7828344452360836e-05, "loss": 0.3646750748157501, "step": 4399, "token_acc": 0.8711768617021277 }, { "epoch": 0.23741434198456807, "grad_norm": 0.4150640070438385, "learning_rate": 1.7827256947088796e-05, "loss": 0.42183053493499756, "step": 4400, "token_acc": 0.8531080902731952 }, { "epoch": 0.23746829978956457, "grad_norm": 0.44834262132644653, "learning_rate": 1.782616920277438e-05, "loss": 0.3932040333747864, "step": 4401, "token_acc": 0.8664616173266693 }, { "epoch": 0.23752225759456105, "grad_norm": 0.4504476487636566, "learning_rate": 1.7825081219450808e-05, "loss": 0.36468368768692017, "step": 4402, "token_acc": 0.8744618802944035 }, { "epoch": 0.23757621539955753, "grad_norm": 0.423936128616333, "learning_rate": 1.78239929971513e-05, "loss": 0.48316526412963867, "step": 4403, "token_acc": 0.8354613346181643 }, { "epoch": 0.23763017320455404, "grad_norm": 0.3757568895816803, "learning_rate": 1.78229045359091e-05, "loss": 0.4501779079437256, "step": 4404, "token_acc": 0.851165831335803 }, { "epoch": 0.23768413100955052, "grad_norm": 0.5591847896575928, "learning_rate": 1.7821815835757445e-05, "loss": 0.4632386565208435, "step": 4405, "token_acc": 0.8445454545454546 }, { "epoch": 0.23773808881454703, "grad_norm": 0.5163339972496033, "learning_rate": 1.7820726896729585e-05, "loss": 0.39440029859542847, "step": 4406, "token_acc": 0.8662191695285669 }, { "epoch": 0.2377920466195435, "grad_norm": 0.3710334599018097, "learning_rate": 1.781963771885877e-05, "loss": 0.4144894480705261, "step": 4407, "token_acc": 0.8532232285562067 }, { "epoch": 0.23784600442454001, "grad_norm": 0.43470853567123413, "learning_rate": 1.7818548302178266e-05, "loss": 0.39762091636657715, "step": 4408, "token_acc": 0.8638284551573017 }, { "epoch": 0.2378999622295365, "grad_norm": 0.41972431540489197, "learning_rate": 1.781745864672135e-05, "loss": 0.43221744894981384, "step": 4409, "token_acc": 0.8545966228893058 }, { "epoch": 0.237953920034533, "grad_norm": 0.33799803256988525, "learning_rate": 1.7816368752521292e-05, "loss": 0.45251351594924927, "step": 4410, "token_acc": 0.8474055092889173 }, { "epoch": 0.23800787783952948, "grad_norm": 0.4358648359775543, "learning_rate": 1.7815278619611382e-05, "loss": 0.43822774291038513, "step": 4411, "token_acc": 0.8473546069899157 }, { "epoch": 0.238061835644526, "grad_norm": 0.4049029052257538, "learning_rate": 1.7814188248024908e-05, "loss": 0.40559932589530945, "step": 4412, "token_acc": 0.8615249780893953 }, { "epoch": 0.23811579344952247, "grad_norm": 0.5159847736358643, "learning_rate": 1.7813097637795175e-05, "loss": 0.39182841777801514, "step": 4413, "token_acc": 0.86082295988935 }, { "epoch": 0.23816975125451897, "grad_norm": 0.40224868059158325, "learning_rate": 1.7812006788955484e-05, "loss": 0.3545714318752289, "step": 4414, "token_acc": 0.871127162136476 }, { "epoch": 0.23822370905951545, "grad_norm": 0.4582771062850952, "learning_rate": 1.7810915701539152e-05, "loss": 0.4059945344924927, "step": 4415, "token_acc": 0.8608645863154881 }, { "epoch": 0.23827766686451196, "grad_norm": 0.615139365196228, "learning_rate": 1.7809824375579504e-05, "loss": 0.39262622594833374, "step": 4416, "token_acc": 0.8606285379660356 }, { "epoch": 0.23833162466950844, "grad_norm": 0.45391151309013367, "learning_rate": 1.7808732811109862e-05, "loss": 0.41404759883880615, "step": 4417, "token_acc": 0.8571582928146948 }, { "epoch": 0.23838558247450495, "grad_norm": 0.37003836035728455, "learning_rate": 1.780764100816357e-05, "loss": 0.42695358395576477, "step": 4418, "token_acc": 0.8492941176470589 }, { "epoch": 0.23843954027950143, "grad_norm": 0.4086146950721741, "learning_rate": 1.780654896677396e-05, "loss": 0.36128106713294983, "step": 4419, "token_acc": 0.8747625680638217 }, { "epoch": 0.23849349808449793, "grad_norm": 0.43160080909729004, "learning_rate": 1.7805456686974397e-05, "loss": 0.468616783618927, "step": 4420, "token_acc": 0.8414548518466507 }, { "epoch": 0.23854745588949441, "grad_norm": 0.5808324813842773, "learning_rate": 1.7804364168798227e-05, "loss": 0.38962632417678833, "step": 4421, "token_acc": 0.8702430077945896 }, { "epoch": 0.23860141369449092, "grad_norm": 0.45461615920066833, "learning_rate": 1.780327141227882e-05, "loss": 0.39215075969696045, "step": 4422, "token_acc": 0.8657157766568099 }, { "epoch": 0.2386553714994874, "grad_norm": 0.44352927803993225, "learning_rate": 1.780217841744955e-05, "loss": 0.37904560565948486, "step": 4423, "token_acc": 0.8685676827269747 }, { "epoch": 0.23870932930448388, "grad_norm": 0.36133477091789246, "learning_rate": 1.7801085184343792e-05, "loss": 0.37222057580947876, "step": 4424, "token_acc": 0.8673539518900344 }, { "epoch": 0.2387632871094804, "grad_norm": 0.38170087337493896, "learning_rate": 1.7799991712994938e-05, "loss": 0.3712362051010132, "step": 4425, "token_acc": 0.8708025042686397 }, { "epoch": 0.23881724491447687, "grad_norm": 0.3439621925354004, "learning_rate": 1.779889800343638e-05, "loss": 0.3554811179637909, "step": 4426, "token_acc": 0.8742720109626585 }, { "epoch": 0.23887120271947337, "grad_norm": 0.39894285798072815, "learning_rate": 1.7797804055701522e-05, "loss": 0.4414559602737427, "step": 4427, "token_acc": 0.8473693995166388 }, { "epoch": 0.23892516052446985, "grad_norm": 0.4014798700809479, "learning_rate": 1.7796709869823768e-05, "loss": 0.4278240203857422, "step": 4428, "token_acc": 0.854256233877902 }, { "epoch": 0.23897911832946636, "grad_norm": 0.4511876404285431, "learning_rate": 1.779561544583654e-05, "loss": 0.4284402132034302, "step": 4429, "token_acc": 0.8548493222792473 }, { "epoch": 0.23903307613446284, "grad_norm": 0.4284054636955261, "learning_rate": 1.7794520783773256e-05, "loss": 0.36960577964782715, "step": 4430, "token_acc": 0.8714393368500377 }, { "epoch": 0.23908703393945935, "grad_norm": 0.4693312346935272, "learning_rate": 1.779342588366735e-05, "loss": 0.3539576828479767, "step": 4431, "token_acc": 0.8784046692607004 }, { "epoch": 0.23914099174445583, "grad_norm": 0.438040167093277, "learning_rate": 1.779233074555226e-05, "loss": 0.4238824248313904, "step": 4432, "token_acc": 0.8537660747091244 }, { "epoch": 0.23919494954945233, "grad_norm": 0.3843827545642853, "learning_rate": 1.779123536946143e-05, "loss": 0.3688547611236572, "step": 4433, "token_acc": 0.8728306839725828 }, { "epoch": 0.23924890735444881, "grad_norm": 0.4457017779350281, "learning_rate": 1.779013975542831e-05, "loss": 0.44479304552078247, "step": 4434, "token_acc": 0.8494814410480349 }, { "epoch": 0.23930286515944532, "grad_norm": 0.5254439115524292, "learning_rate": 1.7789043903486365e-05, "loss": 0.4339633584022522, "step": 4435, "token_acc": 0.8556537102473498 }, { "epoch": 0.2393568229644418, "grad_norm": 0.40528514981269836, "learning_rate": 1.778794781366906e-05, "loss": 0.36837881803512573, "step": 4436, "token_acc": 0.8707692307692307 }, { "epoch": 0.2394107807694383, "grad_norm": 0.40348026156425476, "learning_rate": 1.778685148600987e-05, "loss": 0.3544408679008484, "step": 4437, "token_acc": 0.867867473047594 }, { "epoch": 0.2394647385744348, "grad_norm": 0.5258001089096069, "learning_rate": 1.7785754920542274e-05, "loss": 0.3932231068611145, "step": 4438, "token_acc": 0.8647383380705568 }, { "epoch": 0.2395186963794313, "grad_norm": 0.47913435101509094, "learning_rate": 1.778465811729976e-05, "loss": 0.432186484336853, "step": 4439, "token_acc": 0.8532901833872708 }, { "epoch": 0.23957265418442777, "grad_norm": 0.40964922308921814, "learning_rate": 1.778356107631583e-05, "loss": 0.39851248264312744, "step": 4440, "token_acc": 0.8601084225836308 }, { "epoch": 0.23962661198942428, "grad_norm": 0.3762763440608978, "learning_rate": 1.778246379762398e-05, "loss": 0.4081290066242218, "step": 4441, "token_acc": 0.8603834728598434 }, { "epoch": 0.23968056979442076, "grad_norm": 0.42221081256866455, "learning_rate": 1.778136628125773e-05, "loss": 0.413354754447937, "step": 4442, "token_acc": 0.858010737496468 }, { "epoch": 0.23973452759941727, "grad_norm": 0.4558332860469818, "learning_rate": 1.778026852725059e-05, "loss": 0.43428727984428406, "step": 4443, "token_acc": 0.8539806775100893 }, { "epoch": 0.23978848540441375, "grad_norm": 0.458730548620224, "learning_rate": 1.7779170535636082e-05, "loss": 0.4381389915943146, "step": 4444, "token_acc": 0.8461196682464455 }, { "epoch": 0.23984244320941023, "grad_norm": 0.38584351539611816, "learning_rate": 1.777807230644775e-05, "loss": 0.4214183986186981, "step": 4445, "token_acc": 0.8565312970033553 }, { "epoch": 0.23989640101440673, "grad_norm": 0.3958120048046112, "learning_rate": 1.7776973839719125e-05, "loss": 0.4072796106338501, "step": 4446, "token_acc": 0.8548300776023549 }, { "epoch": 0.23995035881940321, "grad_norm": 0.35702797770500183, "learning_rate": 1.777587513548376e-05, "loss": 0.36955586075782776, "step": 4447, "token_acc": 0.8686534216335541 }, { "epoch": 0.24000431662439972, "grad_norm": 0.35893478989601135, "learning_rate": 1.7774776193775198e-05, "loss": 0.3625395894050598, "step": 4448, "token_acc": 0.8704612365063789 }, { "epoch": 0.2400582744293962, "grad_norm": 0.47671186923980713, "learning_rate": 1.7773677014627015e-05, "loss": 0.3901599943637848, "step": 4449, "token_acc": 0.865257793764988 }, { "epoch": 0.2401122322343927, "grad_norm": 0.5054813623428345, "learning_rate": 1.7772577598072766e-05, "loss": 0.39207378029823303, "step": 4450, "token_acc": 0.869928169287517 }, { "epoch": 0.2401661900393892, "grad_norm": 0.46275150775909424, "learning_rate": 1.7771477944146038e-05, "loss": 0.5180090665817261, "step": 4451, "token_acc": 0.8277432083775991 }, { "epoch": 0.2402201478443857, "grad_norm": 0.31320932507514954, "learning_rate": 1.7770378052880407e-05, "loss": 0.3769109845161438, "step": 4452, "token_acc": 0.8714442559137638 }, { "epoch": 0.24027410564938217, "grad_norm": 0.3220228850841522, "learning_rate": 1.7769277924309464e-05, "loss": 0.374020516872406, "step": 4453, "token_acc": 0.8708676291998381 }, { "epoch": 0.24032806345437868, "grad_norm": 0.45451655983924866, "learning_rate": 1.7768177558466814e-05, "loss": 0.38847965002059937, "step": 4454, "token_acc": 0.869466920514041 }, { "epoch": 0.24038202125937516, "grad_norm": 0.41973355412483215, "learning_rate": 1.7767076955386055e-05, "loss": 0.414700984954834, "step": 4455, "token_acc": 0.8609539207760711 }, { "epoch": 0.24043597906437167, "grad_norm": 0.4619777202606201, "learning_rate": 1.7765976115100797e-05, "loss": 0.4003170132637024, "step": 4456, "token_acc": 0.8652287744670241 }, { "epoch": 0.24048993686936815, "grad_norm": 0.3996327817440033, "learning_rate": 1.7764875037644667e-05, "loss": 0.4000330865383148, "step": 4457, "token_acc": 0.861128526645768 }, { "epoch": 0.24054389467436466, "grad_norm": 0.3805612027645111, "learning_rate": 1.776377372305128e-05, "loss": 0.3705442547798157, "step": 4458, "token_acc": 0.870272783632982 }, { "epoch": 0.24059785247936114, "grad_norm": 0.4604959487915039, "learning_rate": 1.7762672171354286e-05, "loss": 0.4430888891220093, "step": 4459, "token_acc": 0.8472688229767816 }, { "epoch": 0.24065181028435764, "grad_norm": 0.4198048412799835, "learning_rate": 1.7761570382587314e-05, "loss": 0.41243064403533936, "step": 4460, "token_acc": 0.858011596662424 }, { "epoch": 0.24070576808935412, "grad_norm": 0.43207889795303345, "learning_rate": 1.7760468356784016e-05, "loss": 0.4333186447620392, "step": 4461, "token_acc": 0.8520127795527157 }, { "epoch": 0.24075972589435063, "grad_norm": 0.4213940501213074, "learning_rate": 1.7759366093978046e-05, "loss": 0.32910436391830444, "step": 4462, "token_acc": 0.8800237283108409 }, { "epoch": 0.2408136836993471, "grad_norm": 0.33606860041618347, "learning_rate": 1.775826359420307e-05, "loss": 0.3514978885650635, "step": 4463, "token_acc": 0.8775487214148924 }, { "epoch": 0.24086764150434362, "grad_norm": 0.4981566071510315, "learning_rate": 1.7757160857492754e-05, "loss": 0.41542762517929077, "step": 4464, "token_acc": 0.8607629427792916 }, { "epoch": 0.2409215993093401, "grad_norm": 0.3288263976573944, "learning_rate": 1.775605788388078e-05, "loss": 0.4283648133277893, "step": 4465, "token_acc": 0.8549654121233439 }, { "epoch": 0.2409755571143366, "grad_norm": 0.3856937885284424, "learning_rate": 1.7754954673400832e-05, "loss": 0.4231109917163849, "step": 4466, "token_acc": 0.8567919075144509 }, { "epoch": 0.24102951491933308, "grad_norm": 0.41835853457450867, "learning_rate": 1.7753851226086594e-05, "loss": 0.42282626032829285, "step": 4467, "token_acc": 0.8611190408221524 }, { "epoch": 0.24108347272432956, "grad_norm": 0.45012909173965454, "learning_rate": 1.7752747541971774e-05, "loss": 0.4394673705101013, "step": 4468, "token_acc": 0.8518838575872797 }, { "epoch": 0.24113743052932607, "grad_norm": 0.41113370656967163, "learning_rate": 1.7751643621090075e-05, "loss": 0.411015123128891, "step": 4469, "token_acc": 0.8580758203249442 }, { "epoch": 0.24119138833432255, "grad_norm": 0.4768316149711609, "learning_rate": 1.7750539463475208e-05, "loss": 0.4299739897251129, "step": 4470, "token_acc": 0.855238849300607 }, { "epoch": 0.24124534613931906, "grad_norm": 0.41716113686561584, "learning_rate": 1.77494350691609e-05, "loss": 0.3999153971672058, "step": 4471, "token_acc": 0.8570247933884297 }, { "epoch": 0.24129930394431554, "grad_norm": 0.41477248072624207, "learning_rate": 1.7748330438180873e-05, "loss": 0.4350875914096832, "step": 4472, "token_acc": 0.8563293198828507 }, { "epoch": 0.24135326174931204, "grad_norm": 0.4124147593975067, "learning_rate": 1.7747225570568868e-05, "loss": 0.41713035106658936, "step": 4473, "token_acc": 0.8578969895848195 }, { "epoch": 0.24140721955430852, "grad_norm": 0.477457195520401, "learning_rate": 1.7746120466358618e-05, "loss": 0.39560848474502563, "step": 4474, "token_acc": 0.8576512455516014 }, { "epoch": 0.24146117735930503, "grad_norm": 0.4112545847892761, "learning_rate": 1.774501512558388e-05, "loss": 0.3664989471435547, "step": 4475, "token_acc": 0.8681204569055037 }, { "epoch": 0.2415151351643015, "grad_norm": 0.4149380624294281, "learning_rate": 1.774390954827841e-05, "loss": 0.44288796186447144, "step": 4476, "token_acc": 0.8503349573690622 }, { "epoch": 0.24156909296929802, "grad_norm": 0.3426521420478821, "learning_rate": 1.774280373447597e-05, "loss": 0.339489221572876, "step": 4477, "token_acc": 0.8851497769279796 }, { "epoch": 0.2416230507742945, "grad_norm": 0.3438592851161957, "learning_rate": 1.7741697684210333e-05, "loss": 0.41044145822525024, "step": 4478, "token_acc": 0.8576954069298952 }, { "epoch": 0.241677008579291, "grad_norm": 0.5952073335647583, "learning_rate": 1.7740591397515276e-05, "loss": 0.4190552234649658, "step": 4479, "token_acc": 0.8645619573796369 }, { "epoch": 0.24173096638428748, "grad_norm": 0.41448912024497986, "learning_rate": 1.7739484874424585e-05, "loss": 0.37805187702178955, "step": 4480, "token_acc": 0.8710154673839946 }, { "epoch": 0.241784924189284, "grad_norm": 0.5121203064918518, "learning_rate": 1.7738378114972054e-05, "loss": 0.37338483333587646, "step": 4481, "token_acc": 0.870722433460076 }, { "epoch": 0.24183888199428047, "grad_norm": 0.4262445271015167, "learning_rate": 1.7737271119191482e-05, "loss": 0.4162054657936096, "step": 4482, "token_acc": 0.8632698576085092 }, { "epoch": 0.24189283979927698, "grad_norm": 0.4940265715122223, "learning_rate": 1.773616388711668e-05, "loss": 0.4381757080554962, "step": 4483, "token_acc": 0.8463320463320463 }, { "epoch": 0.24194679760427346, "grad_norm": 0.4655505120754242, "learning_rate": 1.7735056418781456e-05, "loss": 0.4493112862110138, "step": 4484, "token_acc": 0.8453077137287663 }, { "epoch": 0.24200075540926996, "grad_norm": 0.43696510791778564, "learning_rate": 1.773394871421964e-05, "loss": 0.3575465679168701, "step": 4485, "token_acc": 0.8726439123790117 }, { "epoch": 0.24205471321426644, "grad_norm": 0.36881139874458313, "learning_rate": 1.773284077346505e-05, "loss": 0.41069379448890686, "step": 4486, "token_acc": 0.8594497607655502 }, { "epoch": 0.24210867101926295, "grad_norm": 0.4215715527534485, "learning_rate": 1.773173259655153e-05, "loss": 0.35751235485076904, "step": 4487, "token_acc": 0.8760276224926011 }, { "epoch": 0.24216262882425943, "grad_norm": 0.5816308856010437, "learning_rate": 1.7730624183512922e-05, "loss": 0.36399635672569275, "step": 4488, "token_acc": 0.8693247292002766 }, { "epoch": 0.2422165866292559, "grad_norm": 0.482634961605072, "learning_rate": 1.772951553438308e-05, "loss": 0.41498512029647827, "step": 4489, "token_acc": 0.8600201409869084 }, { "epoch": 0.24227054443425242, "grad_norm": 0.44091668725013733, "learning_rate": 1.7728406649195854e-05, "loss": 0.4255390465259552, "step": 4490, "token_acc": 0.8609958506224067 }, { "epoch": 0.2423245022392489, "grad_norm": 0.39092233777046204, "learning_rate": 1.7727297527985117e-05, "loss": 0.4012553095817566, "step": 4491, "token_acc": 0.8648331595411888 }, { "epoch": 0.2423784600442454, "grad_norm": 0.48428934812545776, "learning_rate": 1.7726188170784736e-05, "loss": 0.37647396326065063, "step": 4492, "token_acc": 0.8738497574033797 }, { "epoch": 0.24243241784924188, "grad_norm": 0.4221004843711853, "learning_rate": 1.7725078577628593e-05, "loss": 0.43495112657546997, "step": 4493, "token_acc": 0.8546684066911852 }, { "epoch": 0.2424863756542384, "grad_norm": 0.4548679292201996, "learning_rate": 1.7723968748550575e-05, "loss": 0.38350942730903625, "step": 4494, "token_acc": 0.8623420387531592 }, { "epoch": 0.24254033345923487, "grad_norm": 0.41485661268234253, "learning_rate": 1.7722858683584575e-05, "loss": 0.3808635473251343, "step": 4495, "token_acc": 0.8653035143769968 }, { "epoch": 0.24259429126423138, "grad_norm": 0.42953333258628845, "learning_rate": 1.772174838276449e-05, "loss": 0.3648572266101837, "step": 4496, "token_acc": 0.8707660239708181 }, { "epoch": 0.24264824906922786, "grad_norm": 0.3306657075881958, "learning_rate": 1.7720637846124236e-05, "loss": 0.37130677700042725, "step": 4497, "token_acc": 0.8699770378874856 }, { "epoch": 0.24270220687422436, "grad_norm": 0.3819544315338135, "learning_rate": 1.7719527073697724e-05, "loss": 0.3776337504386902, "step": 4498, "token_acc": 0.8670283143688806 }, { "epoch": 0.24275616467922084, "grad_norm": 0.3273582458496094, "learning_rate": 1.7718416065518873e-05, "loss": 0.4375782608985901, "step": 4499, "token_acc": 0.8492129246064622 }, { "epoch": 0.24281012248421735, "grad_norm": 0.42921724915504456, "learning_rate": 1.771730482162162e-05, "loss": 0.4067752957344055, "step": 4500, "token_acc": 0.8562329390354868 }, { "epoch": 0.24286408028921383, "grad_norm": 0.43494459986686707, "learning_rate": 1.7716193342039903e-05, "loss": 0.3953135013580322, "step": 4501, "token_acc": 0.8666171739668399 }, { "epoch": 0.24291803809421034, "grad_norm": 0.46998119354248047, "learning_rate": 1.771508162680766e-05, "loss": 0.3998759388923645, "step": 4502, "token_acc": 0.8607553366174056 }, { "epoch": 0.24297199589920682, "grad_norm": 0.4480235278606415, "learning_rate": 1.7713969675958847e-05, "loss": 0.4087211489677429, "step": 4503, "token_acc": 0.8614881439084219 }, { "epoch": 0.24302595370420332, "grad_norm": 0.4461272656917572, "learning_rate": 1.771285748952742e-05, "loss": 0.4426031708717346, "step": 4504, "token_acc": 0.8554382114936223 }, { "epoch": 0.2430799115091998, "grad_norm": 0.4198840856552124, "learning_rate": 1.7711745067547342e-05, "loss": 0.45183542370796204, "step": 4505, "token_acc": 0.848348190875721 }, { "epoch": 0.2431338693141963, "grad_norm": 0.4207153022289276, "learning_rate": 1.7710632410052594e-05, "loss": 0.4758340120315552, "step": 4506, "token_acc": 0.8383690538421328 }, { "epoch": 0.2431878271191928, "grad_norm": 0.42621415853500366, "learning_rate": 1.770951951707715e-05, "loss": 0.3798965513706207, "step": 4507, "token_acc": 0.8706287024651251 }, { "epoch": 0.2432417849241893, "grad_norm": 0.4202480614185333, "learning_rate": 1.7708406388655e-05, "loss": 0.3950966000556946, "step": 4508, "token_acc": 0.8625859940024696 }, { "epoch": 0.24329574272918578, "grad_norm": 0.3164609372615814, "learning_rate": 1.770729302482014e-05, "loss": 0.4124104082584381, "step": 4509, "token_acc": 0.8587984547755428 }, { "epoch": 0.24334970053418226, "grad_norm": 0.38610804080963135, "learning_rate": 1.7706179425606567e-05, "loss": 0.39614540338516235, "step": 4510, "token_acc": 0.8615589768456783 }, { "epoch": 0.24340365833917876, "grad_norm": 0.4866405427455902, "learning_rate": 1.77050655910483e-05, "loss": 0.41719233989715576, "step": 4511, "token_acc": 0.8562192118226601 }, { "epoch": 0.24345761614417524, "grad_norm": 0.4399382472038269, "learning_rate": 1.770395152117934e-05, "loss": 0.4469990134239197, "step": 4512, "token_acc": 0.8501394393072068 }, { "epoch": 0.24351157394917175, "grad_norm": 0.408426970243454, "learning_rate": 1.7702837216033724e-05, "loss": 0.4488857388496399, "step": 4513, "token_acc": 0.8513340323896074 }, { "epoch": 0.24356553175416823, "grad_norm": 0.3693303167819977, "learning_rate": 1.7701722675645473e-05, "loss": 0.39349138736724854, "step": 4514, "token_acc": 0.8661602933473023 }, { "epoch": 0.24361948955916474, "grad_norm": 0.37887585163116455, "learning_rate": 1.770060790004863e-05, "loss": 0.4382963180541992, "step": 4515, "token_acc": 0.8541514741118866 }, { "epoch": 0.24367344736416122, "grad_norm": 0.44082877039909363, "learning_rate": 1.7699492889277242e-05, "loss": 0.4270589351654053, "step": 4516, "token_acc": 0.8523683045595396 }, { "epoch": 0.24372740516915772, "grad_norm": 0.48933643102645874, "learning_rate": 1.7698377643365356e-05, "loss": 0.4656398296356201, "step": 4517, "token_acc": 0.83909026297086 }, { "epoch": 0.2437813629741542, "grad_norm": 0.5523622632026672, "learning_rate": 1.7697262162347034e-05, "loss": 0.4668433666229248, "step": 4518, "token_acc": 0.8429111531190926 }, { "epoch": 0.2438353207791507, "grad_norm": 0.38058221340179443, "learning_rate": 1.769614644625634e-05, "loss": 0.4295574426651001, "step": 4519, "token_acc": 0.8552531162554058 }, { "epoch": 0.2438892785841472, "grad_norm": 0.4879971146583557, "learning_rate": 1.769503049512735e-05, "loss": 0.3958670496940613, "step": 4520, "token_acc": 0.8616147528281419 }, { "epoch": 0.2439432363891437, "grad_norm": 0.43460747599601746, "learning_rate": 1.769391430899415e-05, "loss": 0.4469001591205597, "step": 4521, "token_acc": 0.8517314487632509 }, { "epoch": 0.24399719419414018, "grad_norm": 0.588962972164154, "learning_rate": 1.7692797887890818e-05, "loss": 0.42915603518486023, "step": 4522, "token_acc": 0.8493005510809665 }, { "epoch": 0.24405115199913668, "grad_norm": 0.39009585976600647, "learning_rate": 1.7691681231851454e-05, "loss": 0.38114193081855774, "step": 4523, "token_acc": 0.8668373879641486 }, { "epoch": 0.24410510980413316, "grad_norm": 0.4040619134902954, "learning_rate": 1.7690564340910163e-05, "loss": 0.35133111476898193, "step": 4524, "token_acc": 0.8714395688991532 }, { "epoch": 0.24415906760912967, "grad_norm": 0.3490005135536194, "learning_rate": 1.7689447215101048e-05, "loss": 0.415173202753067, "step": 4525, "token_acc": 0.8623455799775389 }, { "epoch": 0.24421302541412615, "grad_norm": 0.30444225668907166, "learning_rate": 1.7688329854458232e-05, "loss": 0.38449591398239136, "step": 4526, "token_acc": 0.8648912301238904 }, { "epoch": 0.24426698321912266, "grad_norm": 0.4539410173892975, "learning_rate": 1.768721225901583e-05, "loss": 0.4643135070800781, "step": 4527, "token_acc": 0.848816717019134 }, { "epoch": 0.24432094102411914, "grad_norm": 0.4022965431213379, "learning_rate": 1.7686094428807984e-05, "loss": 0.4056732654571533, "step": 4528, "token_acc": 0.861749776052553 }, { "epoch": 0.24437489882911564, "grad_norm": 0.5593093633651733, "learning_rate": 1.7684976363868832e-05, "loss": 0.3904379606246948, "step": 4529, "token_acc": 0.8683152392440692 }, { "epoch": 0.24442885663411212, "grad_norm": 0.3148002624511719, "learning_rate": 1.7683858064232508e-05, "loss": 0.3586413264274597, "step": 4530, "token_acc": 0.8727636682096834 }, { "epoch": 0.2444828144391086, "grad_norm": 0.4583123028278351, "learning_rate": 1.7682739529933176e-05, "loss": 0.3570592701435089, "step": 4531, "token_acc": 0.8718938885157824 }, { "epoch": 0.2445367722441051, "grad_norm": 0.4767757058143616, "learning_rate": 1.768162076100499e-05, "loss": 0.4003910422325134, "step": 4532, "token_acc": 0.8670368205727644 }, { "epoch": 0.2445907300491016, "grad_norm": 0.43717607855796814, "learning_rate": 1.7680501757482117e-05, "loss": 0.41794538497924805, "step": 4533, "token_acc": 0.8553703703703703 }, { "epoch": 0.2446446878540981, "grad_norm": 0.4238196611404419, "learning_rate": 1.7679382519398736e-05, "loss": 0.3736949861049652, "step": 4534, "token_acc": 0.8655913978494624 }, { "epoch": 0.24469864565909458, "grad_norm": 0.476813405752182, "learning_rate": 1.767826304678902e-05, "loss": 0.4226481318473816, "step": 4535, "token_acc": 0.8561794458290574 }, { "epoch": 0.24475260346409108, "grad_norm": 0.4233434796333313, "learning_rate": 1.7677143339687163e-05, "loss": 0.4440908432006836, "step": 4536, "token_acc": 0.8505223680685775 }, { "epoch": 0.24480656126908756, "grad_norm": 0.47257259488105774, "learning_rate": 1.7676023398127363e-05, "loss": 0.3920513987541199, "step": 4537, "token_acc": 0.863671814069568 }, { "epoch": 0.24486051907408407, "grad_norm": 0.4231422245502472, "learning_rate": 1.7674903222143817e-05, "loss": 0.41909635066986084, "step": 4538, "token_acc": 0.8563091687200104 }, { "epoch": 0.24491447687908055, "grad_norm": 0.3756173849105835, "learning_rate": 1.7673782811770736e-05, "loss": 0.3888404071331024, "step": 4539, "token_acc": 0.8656578947368421 }, { "epoch": 0.24496843468407706, "grad_norm": 0.40667831897735596, "learning_rate": 1.767266216704234e-05, "loss": 0.4223315119743347, "step": 4540, "token_acc": 0.8564779874213837 }, { "epoch": 0.24502239248907354, "grad_norm": 0.4954284131526947, "learning_rate": 1.767154128799285e-05, "loss": 0.4850836992263794, "step": 4541, "token_acc": 0.8361910160124699 }, { "epoch": 0.24507635029407004, "grad_norm": 0.4325561225414276, "learning_rate": 1.76704201746565e-05, "loss": 0.36359402537345886, "step": 4542, "token_acc": 0.8700466977985324 }, { "epoch": 0.24513030809906652, "grad_norm": 0.46070799231529236, "learning_rate": 1.7669298827067525e-05, "loss": 0.46647775173187256, "step": 4543, "token_acc": 0.8448655049906953 }, { "epoch": 0.24518426590406303, "grad_norm": 0.3769592046737671, "learning_rate": 1.7668177245260177e-05, "loss": 0.39967650175094604, "step": 4544, "token_acc": 0.869243156199678 }, { "epoch": 0.2452382237090595, "grad_norm": 0.4635387063026428, "learning_rate": 1.76670554292687e-05, "loss": 0.4569927752017975, "step": 4545, "token_acc": 0.845258473125642 }, { "epoch": 0.24529218151405602, "grad_norm": 0.4096243977546692, "learning_rate": 1.766593337912736e-05, "loss": 0.42361685633659363, "step": 4546, "token_acc": 0.8532863849765259 }, { "epoch": 0.2453461393190525, "grad_norm": 0.44776397943496704, "learning_rate": 1.7664811094870425e-05, "loss": 0.33604344725608826, "step": 4547, "token_acc": 0.8805346700083542 }, { "epoch": 0.245400097124049, "grad_norm": 0.40432244539260864, "learning_rate": 1.7663688576532165e-05, "loss": 0.3573903441429138, "step": 4548, "token_acc": 0.875417567302024 }, { "epoch": 0.24545405492904548, "grad_norm": 0.5434532761573792, "learning_rate": 1.7662565824146866e-05, "loss": 0.42605024576187134, "step": 4549, "token_acc": 0.8525858814647037 }, { "epoch": 0.245508012734042, "grad_norm": 0.4146707057952881, "learning_rate": 1.7661442837748814e-05, "loss": 0.3607901930809021, "step": 4550, "token_acc": 0.8763819095477386 }, { "epoch": 0.24556197053903847, "grad_norm": 0.35617098212242126, "learning_rate": 1.76603196173723e-05, "loss": 0.33692604303359985, "step": 4551, "token_acc": 0.8802288620470439 }, { "epoch": 0.24561592834403498, "grad_norm": 0.42083820700645447, "learning_rate": 1.7659196163051634e-05, "loss": 0.4241052269935608, "step": 4552, "token_acc": 0.8559488692232055 }, { "epoch": 0.24566988614903146, "grad_norm": 0.4243085980415344, "learning_rate": 1.7658072474821123e-05, "loss": 0.40300968289375305, "step": 4553, "token_acc": 0.8622576610381488 }, { "epoch": 0.24572384395402794, "grad_norm": 0.4073024392127991, "learning_rate": 1.765694855271509e-05, "loss": 0.50859135389328, "step": 4554, "token_acc": 0.831327234793773 }, { "epoch": 0.24577780175902444, "grad_norm": 0.446353942155838, "learning_rate": 1.7655824396767848e-05, "loss": 0.41398242115974426, "step": 4555, "token_acc": 0.8589762501843929 }, { "epoch": 0.24583175956402092, "grad_norm": 0.3111810088157654, "learning_rate": 1.7654700007013737e-05, "loss": 0.3764285743236542, "step": 4556, "token_acc": 0.8665274151436031 }, { "epoch": 0.24588571736901743, "grad_norm": 0.39433273673057556, "learning_rate": 1.765357538348709e-05, "loss": 0.3900022506713867, "step": 4557, "token_acc": 0.8635575862612258 }, { "epoch": 0.2459396751740139, "grad_norm": 0.4977606534957886, "learning_rate": 1.7652450526222262e-05, "loss": 0.46924588084220886, "step": 4558, "token_acc": 0.8409595119245702 }, { "epoch": 0.24599363297901042, "grad_norm": 0.39033597707748413, "learning_rate": 1.7651325435253596e-05, "loss": 0.4295390844345093, "step": 4559, "token_acc": 0.856026262310458 }, { "epoch": 0.2460475907840069, "grad_norm": 0.4731713831424713, "learning_rate": 1.7650200110615455e-05, "loss": 0.4129176139831543, "step": 4560, "token_acc": 0.8564364876385336 }, { "epoch": 0.2461015485890034, "grad_norm": 0.4373677670955658, "learning_rate": 1.7649074552342208e-05, "loss": 0.3623555302619934, "step": 4561, "token_acc": 0.8732261116367077 }, { "epoch": 0.24615550639399988, "grad_norm": 0.5353870987892151, "learning_rate": 1.7647948760468225e-05, "loss": 0.39791053533554077, "step": 4562, "token_acc": 0.8565594691168964 }, { "epoch": 0.2462094641989964, "grad_norm": 0.5228036642074585, "learning_rate": 1.7646822735027892e-05, "loss": 0.41287726163864136, "step": 4563, "token_acc": 0.864036332671019 }, { "epoch": 0.24626342200399287, "grad_norm": 0.4559837281703949, "learning_rate": 1.7645696476055598e-05, "loss": 0.42225778102874756, "step": 4564, "token_acc": 0.8554712207463631 }, { "epoch": 0.24631737980898938, "grad_norm": 0.39929455518722534, "learning_rate": 1.7644569983585733e-05, "loss": 0.4162693917751312, "step": 4565, "token_acc": 0.8564500484966052 }, { "epoch": 0.24637133761398586, "grad_norm": 0.4271198809146881, "learning_rate": 1.7643443257652707e-05, "loss": 0.37004321813583374, "step": 4566, "token_acc": 0.8693773011045302 }, { "epoch": 0.24642529541898237, "grad_norm": 0.4975563585758209, "learning_rate": 1.7642316298290922e-05, "loss": 0.37277960777282715, "step": 4567, "token_acc": 0.868939883645766 }, { "epoch": 0.24647925322397884, "grad_norm": 0.2845257520675659, "learning_rate": 1.7641189105534803e-05, "loss": 0.3939207196235657, "step": 4568, "token_acc": 0.863001870772773 }, { "epoch": 0.24653321102897535, "grad_norm": 0.42554596066474915, "learning_rate": 1.764006167941877e-05, "loss": 0.4798012971878052, "step": 4569, "token_acc": 0.8427178815655464 }, { "epoch": 0.24658716883397183, "grad_norm": 0.36987411975860596, "learning_rate": 1.7638934019977253e-05, "loss": 0.36321765184402466, "step": 4570, "token_acc": 0.8715142038050561 }, { "epoch": 0.24664112663896834, "grad_norm": 0.372439980506897, "learning_rate": 1.7637806127244693e-05, "loss": 0.3856613337993622, "step": 4571, "token_acc": 0.8669316375198728 }, { "epoch": 0.24669508444396482, "grad_norm": 0.5275586247444153, "learning_rate": 1.7636678001255533e-05, "loss": 0.4456651508808136, "step": 4572, "token_acc": 0.8473909935668335 }, { "epoch": 0.24674904224896133, "grad_norm": 0.5260984897613525, "learning_rate": 1.7635549642044233e-05, "loss": 0.4851503074169159, "step": 4573, "token_acc": 0.8384862091084028 }, { "epoch": 0.2468030000539578, "grad_norm": 0.40194982290267944, "learning_rate": 1.7634421049645244e-05, "loss": 0.3251636028289795, "step": 4574, "token_acc": 0.8874751051117504 }, { "epoch": 0.24685695785895428, "grad_norm": 0.287394255399704, "learning_rate": 1.7633292224093033e-05, "loss": 0.3112197518348694, "step": 4575, "token_acc": 0.8878197320341048 }, { "epoch": 0.2469109156639508, "grad_norm": 0.4943197965621948, "learning_rate": 1.763216316542208e-05, "loss": 0.4684170186519623, "step": 4576, "token_acc": 0.8416874908128766 }, { "epoch": 0.24696487346894727, "grad_norm": 0.5286778807640076, "learning_rate": 1.763103387366686e-05, "loss": 0.40513139963150024, "step": 4577, "token_acc": 0.857626443647949 }, { "epoch": 0.24701883127394378, "grad_norm": 0.45398062467575073, "learning_rate": 1.762990434886187e-05, "loss": 0.5098147392272949, "step": 4578, "token_acc": 0.8327101509486221 }, { "epoch": 0.24707278907894026, "grad_norm": 0.42549312114715576, "learning_rate": 1.7628774591041598e-05, "loss": 0.35909032821655273, "step": 4579, "token_acc": 0.8738179669030733 }, { "epoch": 0.24712674688393677, "grad_norm": 0.3393349349498749, "learning_rate": 1.7627644600240548e-05, "loss": 0.36387747526168823, "step": 4580, "token_acc": 0.8713355048859935 }, { "epoch": 0.24718070468893324, "grad_norm": 0.3719209134578705, "learning_rate": 1.762651437649323e-05, "loss": 0.4504817724227905, "step": 4581, "token_acc": 0.8441770647653001 }, { "epoch": 0.24723466249392975, "grad_norm": 0.4744122624397278, "learning_rate": 1.7625383919834158e-05, "loss": 0.42204469442367554, "step": 4582, "token_acc": 0.8555771365149833 }, { "epoch": 0.24728862029892623, "grad_norm": 0.36405128240585327, "learning_rate": 1.762425323029786e-05, "loss": 0.43029993772506714, "step": 4583, "token_acc": 0.851406165494862 }, { "epoch": 0.24734257810392274, "grad_norm": 0.43860089778900146, "learning_rate": 1.762312230791887e-05, "loss": 0.4976244866847992, "step": 4584, "token_acc": 0.8338535754824064 }, { "epoch": 0.24739653590891922, "grad_norm": 0.3155834674835205, "learning_rate": 1.762199115273172e-05, "loss": 0.38478031754493713, "step": 4585, "token_acc": 0.8673926014319809 }, { "epoch": 0.24745049371391573, "grad_norm": 0.3589397072792053, "learning_rate": 1.762085976477095e-05, "loss": 0.3743242025375366, "step": 4586, "token_acc": 0.8718692022263451 }, { "epoch": 0.2475044515189122, "grad_norm": 0.4577193260192871, "learning_rate": 1.7619728144071126e-05, "loss": 0.4048464298248291, "step": 4587, "token_acc": 0.8616589327146171 }, { "epoch": 0.2475584093239087, "grad_norm": 0.4262358546257019, "learning_rate": 1.76185962906668e-05, "loss": 0.40082791447639465, "step": 4588, "token_acc": 0.8610510046367852 }, { "epoch": 0.2476123671289052, "grad_norm": 0.4698401093482971, "learning_rate": 1.761746420459254e-05, "loss": 0.43166065216064453, "step": 4589, "token_acc": 0.8513833992094861 }, { "epoch": 0.2476663249339017, "grad_norm": 0.3838556110858917, "learning_rate": 1.7616331885882917e-05, "loss": 0.4202267527580261, "step": 4590, "token_acc": 0.8540395158787628 }, { "epoch": 0.24772028273889818, "grad_norm": 0.4004960358142853, "learning_rate": 1.7615199334572513e-05, "loss": 0.39222055673599243, "step": 4591, "token_acc": 0.8589133880962675 }, { "epoch": 0.24777424054389469, "grad_norm": 0.3562585711479187, "learning_rate": 1.7614066550695918e-05, "loss": 0.41582250595092773, "step": 4592, "token_acc": 0.8587441506148656 }, { "epoch": 0.24782819834889117, "grad_norm": 0.4232325553894043, "learning_rate": 1.7612933534287725e-05, "loss": 0.46019265055656433, "step": 4593, "token_acc": 0.8473101265822784 }, { "epoch": 0.24788215615388767, "grad_norm": 0.483195424079895, "learning_rate": 1.7611800285382533e-05, "loss": 0.42466965317726135, "step": 4594, "token_acc": 0.850343115783326 }, { "epoch": 0.24793611395888415, "grad_norm": 0.3218859136104584, "learning_rate": 1.7610666804014957e-05, "loss": 0.36671900749206543, "step": 4595, "token_acc": 0.8725944655148195 }, { "epoch": 0.24799007176388063, "grad_norm": 0.4949539601802826, "learning_rate": 1.760953309021961e-05, "loss": 0.4615652561187744, "step": 4596, "token_acc": 0.8454622084855297 }, { "epoch": 0.24804402956887714, "grad_norm": 0.4208400845527649, "learning_rate": 1.7608399144031115e-05, "loss": 0.34018057584762573, "step": 4597, "token_acc": 0.877989706327581 }, { "epoch": 0.24809798737387362, "grad_norm": 0.38680654764175415, "learning_rate": 1.7607264965484106e-05, "loss": 0.390252023935318, "step": 4598, "token_acc": 0.8650031989763276 }, { "epoch": 0.24815194517887013, "grad_norm": 0.30741962790489197, "learning_rate": 1.7606130554613212e-05, "loss": 0.44516825675964355, "step": 4599, "token_acc": 0.8506767573861156 }, { "epoch": 0.2482059029838666, "grad_norm": 0.4711011052131653, "learning_rate": 1.760499591145309e-05, "loss": 0.4398397207260132, "step": 4600, "token_acc": 0.8564450474898236 }, { "epoch": 0.2482598607888631, "grad_norm": 0.36082378029823303, "learning_rate": 1.760386103603838e-05, "loss": 0.40432387590408325, "step": 4601, "token_acc": 0.8598520511096167 }, { "epoch": 0.2483138185938596, "grad_norm": 0.44671064615249634, "learning_rate": 1.760272592840375e-05, "loss": 0.43915852904319763, "step": 4602, "token_acc": 0.8466474173885146 }, { "epoch": 0.2483677763988561, "grad_norm": 0.4884985685348511, "learning_rate": 1.760159058858386e-05, "loss": 0.40997231006622314, "step": 4603, "token_acc": 0.8604361370716511 }, { "epoch": 0.24842173420385258, "grad_norm": 0.3475654423236847, "learning_rate": 1.7600455016613385e-05, "loss": 0.41070544719696045, "step": 4604, "token_acc": 0.8549707602339182 }, { "epoch": 0.2484756920088491, "grad_norm": 0.4257566034793854, "learning_rate": 1.7599319212527005e-05, "loss": 0.44914114475250244, "step": 4605, "token_acc": 0.8500058092250494 }, { "epoch": 0.24852964981384557, "grad_norm": 0.5118705630302429, "learning_rate": 1.7598183176359406e-05, "loss": 0.44200223684310913, "step": 4606, "token_acc": 0.8496936885596239 }, { "epoch": 0.24858360761884207, "grad_norm": 0.44269970059394836, "learning_rate": 1.7597046908145284e-05, "loss": 0.3852381110191345, "step": 4607, "token_acc": 0.8644901610017889 }, { "epoch": 0.24863756542383855, "grad_norm": 0.4360080659389496, "learning_rate": 1.759591040791934e-05, "loss": 0.3519124984741211, "step": 4608, "token_acc": 0.8782926055220565 }, { "epoch": 0.24869152322883506, "grad_norm": 0.41493865847587585, "learning_rate": 1.759477367571628e-05, "loss": 0.3745613992214203, "step": 4609, "token_acc": 0.8672526140324291 }, { "epoch": 0.24874548103383154, "grad_norm": 0.46563562750816345, "learning_rate": 1.7593636711570826e-05, "loss": 0.39783233404159546, "step": 4610, "token_acc": 0.8629224776982033 }, { "epoch": 0.24879943883882805, "grad_norm": 0.46970120072364807, "learning_rate": 1.759249951551769e-05, "loss": 0.3820728063583374, "step": 4611, "token_acc": 0.8673386441229068 }, { "epoch": 0.24885339664382453, "grad_norm": 0.46248751878738403, "learning_rate": 1.759136208759161e-05, "loss": 0.3924691677093506, "step": 4612, "token_acc": 0.8645316706815928 }, { "epoch": 0.24890735444882103, "grad_norm": 0.4270506203174591, "learning_rate": 1.759022442782732e-05, "loss": 0.358356237411499, "step": 4613, "token_acc": 0.8796692750627492 }, { "epoch": 0.2489613122538175, "grad_norm": 0.37372398376464844, "learning_rate": 1.7589086536259567e-05, "loss": 0.4321836829185486, "step": 4614, "token_acc": 0.8518009929227844 }, { "epoch": 0.24901527005881402, "grad_norm": 0.2961322069168091, "learning_rate": 1.7587948412923096e-05, "loss": 0.4346759021282196, "step": 4615, "token_acc": 0.8520613186486085 }, { "epoch": 0.2490692278638105, "grad_norm": 0.528847336769104, "learning_rate": 1.7586810057852672e-05, "loss": 0.39501985907554626, "step": 4616, "token_acc": 0.8647902869757175 }, { "epoch": 0.249123185668807, "grad_norm": 0.3708072602748871, "learning_rate": 1.7585671471083053e-05, "loss": 0.40552282333374023, "step": 4617, "token_acc": 0.8579478443192242 }, { "epoch": 0.2491771434738035, "grad_norm": 0.3885432779788971, "learning_rate": 1.758453265264902e-05, "loss": 0.38580426573753357, "step": 4618, "token_acc": 0.8653168012740302 }, { "epoch": 0.24923110127879997, "grad_norm": 0.4262012839317322, "learning_rate": 1.758339360258534e-05, "loss": 0.3818814158439636, "step": 4619, "token_acc": 0.8591394902713072 }, { "epoch": 0.24928505908379647, "grad_norm": 0.430080384016037, "learning_rate": 1.7582254320926808e-05, "loss": 0.4149661064147949, "step": 4620, "token_acc": 0.8604651162790697 }, { "epoch": 0.24933901688879295, "grad_norm": 0.4136263430118561, "learning_rate": 1.7581114807708218e-05, "loss": 0.3916938006877899, "step": 4621, "token_acc": 0.8645346206064263 }, { "epoch": 0.24939297469378946, "grad_norm": 0.35260796546936035, "learning_rate": 1.7579975062964366e-05, "loss": 0.38618600368499756, "step": 4622, "token_acc": 0.8657432072456047 }, { "epoch": 0.24944693249878594, "grad_norm": 0.46812644600868225, "learning_rate": 1.757883508673006e-05, "loss": 0.4381731450557709, "step": 4623, "token_acc": 0.8540931248949404 }, { "epoch": 0.24950089030378245, "grad_norm": 0.4043491780757904, "learning_rate": 1.7577694879040116e-05, "loss": 0.3837961256504059, "step": 4624, "token_acc": 0.8636890951276102 }, { "epoch": 0.24955484810877893, "grad_norm": 0.4530501067638397, "learning_rate": 1.7576554439929358e-05, "loss": 0.3981924057006836, "step": 4625, "token_acc": 0.8603460451977402 }, { "epoch": 0.24960880591377543, "grad_norm": 0.40138015151023865, "learning_rate": 1.757541376943261e-05, "loss": 0.4015657305717468, "step": 4626, "token_acc": 0.8558927150812592 }, { "epoch": 0.2496627637187719, "grad_norm": 0.3800729811191559, "learning_rate": 1.7574272867584706e-05, "loss": 0.39283639192581177, "step": 4627, "token_acc": 0.8672134781644868 }, { "epoch": 0.24971672152376842, "grad_norm": 0.4629940986633301, "learning_rate": 1.75731317344205e-05, "loss": 0.3640134334564209, "step": 4628, "token_acc": 0.8705633056683737 }, { "epoch": 0.2497706793287649, "grad_norm": 0.3677978217601776, "learning_rate": 1.7571990369974833e-05, "loss": 0.42638349533081055, "step": 4629, "token_acc": 0.8513893225101468 }, { "epoch": 0.2498246371337614, "grad_norm": 0.3981497883796692, "learning_rate": 1.7570848774282564e-05, "loss": 0.4131152629852295, "step": 4630, "token_acc": 0.8571062411892861 }, { "epoch": 0.2498785949387579, "grad_norm": 0.37649857997894287, "learning_rate": 1.7569706947378554e-05, "loss": 0.42175084352493286, "step": 4631, "token_acc": 0.8533834586466166 }, { "epoch": 0.2499325527437544, "grad_norm": 0.4698161780834198, "learning_rate": 1.7568564889297677e-05, "loss": 0.44567033648490906, "step": 4632, "token_acc": 0.8491148068669528 }, { "epoch": 0.24998651054875087, "grad_norm": 0.3835331201553345, "learning_rate": 1.756742260007481e-05, "loss": 0.43624159693717957, "step": 4633, "token_acc": 0.851357789423535 }, { "epoch": 0.2500404683537474, "grad_norm": 0.4053375720977783, "learning_rate": 1.7566280079744845e-05, "loss": 0.4150238037109375, "step": 4634, "token_acc": 0.8581903888961634 }, { "epoch": 0.2500944261587439, "grad_norm": 0.5507175922393799, "learning_rate": 1.7565137328342663e-05, "loss": 0.4476434588432312, "step": 4635, "token_acc": 0.843937575030012 }, { "epoch": 0.25014838396374034, "grad_norm": 0.394047349691391, "learning_rate": 1.756399434590317e-05, "loss": 0.3942108154296875, "step": 4636, "token_acc": 0.8646439456913272 }, { "epoch": 0.25020234176873685, "grad_norm": 0.4921019375324249, "learning_rate": 1.7562851132461272e-05, "loss": 0.405163049697876, "step": 4637, "token_acc": 0.8640906062624917 }, { "epoch": 0.25025629957373335, "grad_norm": 0.34732872247695923, "learning_rate": 1.756170768805188e-05, "loss": 0.39945030212402344, "step": 4638, "token_acc": 0.8638918629550322 }, { "epoch": 0.2503102573787298, "grad_norm": 0.41905316710472107, "learning_rate": 1.7560564012709916e-05, "loss": 0.4092874526977539, "step": 4639, "token_acc": 0.8593395512982103 }, { "epoch": 0.2503642151837263, "grad_norm": 0.482611745595932, "learning_rate": 1.755942010647031e-05, "loss": 0.4401174783706665, "step": 4640, "token_acc": 0.8462156516149767 }, { "epoch": 0.2504181729887228, "grad_norm": 0.3726690113544464, "learning_rate": 1.7558275969367994e-05, "loss": 0.3836677670478821, "step": 4641, "token_acc": 0.8655205655526992 }, { "epoch": 0.2504721307937193, "grad_norm": 0.2946544289588928, "learning_rate": 1.7557131601437912e-05, "loss": 0.371762752532959, "step": 4642, "token_acc": 0.8705063780440665 }, { "epoch": 0.2505260885987158, "grad_norm": 0.4530419707298279, "learning_rate": 1.7555987002715005e-05, "loss": 0.32857629656791687, "step": 4643, "token_acc": 0.8826581854703716 }, { "epoch": 0.2505800464037123, "grad_norm": 0.4197542667388916, "learning_rate": 1.7554842173234238e-05, "loss": 0.3934047222137451, "step": 4644, "token_acc": 0.866148926450434 }, { "epoch": 0.2506340042087088, "grad_norm": 0.3980659246444702, "learning_rate": 1.755369711303057e-05, "loss": 0.39909279346466064, "step": 4645, "token_acc": 0.8639269406392694 }, { "epoch": 0.2506879620137053, "grad_norm": 0.3500540554523468, "learning_rate": 1.755255182213897e-05, "loss": 0.36195409297943115, "step": 4646, "token_acc": 0.876741440377804 }, { "epoch": 0.25074191981870175, "grad_norm": 0.45340150594711304, "learning_rate": 1.7551406300594414e-05, "loss": 0.4404070973396301, "step": 4647, "token_acc": 0.8484377209105047 }, { "epoch": 0.25079587762369826, "grad_norm": 0.3806701600551605, "learning_rate": 1.7550260548431893e-05, "loss": 0.41346460580825806, "step": 4648, "token_acc": 0.8523301516002246 }, { "epoch": 0.25084983542869477, "grad_norm": 0.26968929171562195, "learning_rate": 1.7549114565686386e-05, "loss": 0.40546542406082153, "step": 4649, "token_acc": 0.8611138014527845 }, { "epoch": 0.2509037932336913, "grad_norm": 0.4531516432762146, "learning_rate": 1.7547968352392904e-05, "loss": 0.4415401220321655, "step": 4650, "token_acc": 0.8463421658986175 }, { "epoch": 0.2509577510386877, "grad_norm": 0.30238983035087585, "learning_rate": 1.7546821908586442e-05, "loss": 0.36762097477912903, "step": 4651, "token_acc": 0.8682943032718817 }, { "epoch": 0.25101170884368423, "grad_norm": 0.41495099663734436, "learning_rate": 1.7545675234302017e-05, "loss": 0.37616217136383057, "step": 4652, "token_acc": 0.8687493660614667 }, { "epoch": 0.25106566664868074, "grad_norm": 0.46203717589378357, "learning_rate": 1.7544528329574646e-05, "loss": 0.43627649545669556, "step": 4653, "token_acc": 0.8550890377389315 }, { "epoch": 0.25111962445367725, "grad_norm": 0.4603657126426697, "learning_rate": 1.754338119443936e-05, "loss": 0.38032570481300354, "step": 4654, "token_acc": 0.8620378719567178 }, { "epoch": 0.2511735822586737, "grad_norm": 0.47813504934310913, "learning_rate": 1.7542233828931186e-05, "loss": 0.442550927400589, "step": 4655, "token_acc": 0.8531125071387778 }, { "epoch": 0.2512275400636702, "grad_norm": 0.2832843065261841, "learning_rate": 1.7541086233085167e-05, "loss": 0.34282398223876953, "step": 4656, "token_acc": 0.881268081002893 }, { "epoch": 0.2512814978686667, "grad_norm": 0.5105071663856506, "learning_rate": 1.7539938406936348e-05, "loss": 0.44440531730651855, "step": 4657, "token_acc": 0.849198739898644 }, { "epoch": 0.2513354556736632, "grad_norm": 0.2531467080116272, "learning_rate": 1.753879035051979e-05, "loss": 0.3865467607975006, "step": 4658, "token_acc": 0.8731551655364979 }, { "epoch": 0.2513894134786597, "grad_norm": 0.5324456095695496, "learning_rate": 1.753764206387054e-05, "loss": 0.43186524510383606, "step": 4659, "token_acc": 0.8562624916722186 }, { "epoch": 0.2514433712836562, "grad_norm": 0.42848801612854004, "learning_rate": 1.7536493547023684e-05, "loss": 0.3659895658493042, "step": 4660, "token_acc": 0.8722989727240524 }, { "epoch": 0.2514973290886527, "grad_norm": 0.5030124187469482, "learning_rate": 1.7535344800014286e-05, "loss": 0.4409976601600647, "step": 4661, "token_acc": 0.8482364993414313 }, { "epoch": 0.25155128689364914, "grad_norm": 0.44293326139450073, "learning_rate": 1.7534195822877436e-05, "loss": 0.47119492292404175, "step": 4662, "token_acc": 0.8369214902668557 }, { "epoch": 0.25160524469864565, "grad_norm": 0.32317519187927246, "learning_rate": 1.7533046615648217e-05, "loss": 0.3611607551574707, "step": 4663, "token_acc": 0.8778019586507073 }, { "epoch": 0.25165920250364215, "grad_norm": 0.39164289832115173, "learning_rate": 1.753189717836173e-05, "loss": 0.3662185072898865, "step": 4664, "token_acc": 0.8712672133684047 }, { "epoch": 0.25171316030863866, "grad_norm": 0.3793175518512726, "learning_rate": 1.7530747511053066e-05, "loss": 0.35541456937789917, "step": 4665, "token_acc": 0.8741760148028218 }, { "epoch": 0.2517671181136351, "grad_norm": 0.47180303931236267, "learning_rate": 1.7529597613757357e-05, "loss": 0.44698283076286316, "step": 4666, "token_acc": 0.8527324343506033 }, { "epoch": 0.2518210759186316, "grad_norm": 0.46672677993774414, "learning_rate": 1.752844748650971e-05, "loss": 0.3978338837623596, "step": 4667, "token_acc": 0.8625235404896422 }, { "epoch": 0.25187503372362813, "grad_norm": 0.30036264657974243, "learning_rate": 1.752729712934524e-05, "loss": 0.518255889415741, "step": 4668, "token_acc": 0.831082041802929 }, { "epoch": 0.25192899152862464, "grad_norm": 0.35452330112457275, "learning_rate": 1.7526146542299093e-05, "loss": 0.35617488622665405, "step": 4669, "token_acc": 0.8744620863629619 }, { "epoch": 0.2519829493336211, "grad_norm": 0.46474507451057434, "learning_rate": 1.75249957254064e-05, "loss": 0.4368654489517212, "step": 4670, "token_acc": 0.8480168480168481 }, { "epoch": 0.2520369071386176, "grad_norm": 0.42810770869255066, "learning_rate": 1.7523844678702313e-05, "loss": 0.44140172004699707, "step": 4671, "token_acc": 0.8491530373831776 }, { "epoch": 0.2520908649436141, "grad_norm": 0.39053040742874146, "learning_rate": 1.752269340222198e-05, "loss": 0.36183086037635803, "step": 4672, "token_acc": 0.8779952750590617 }, { "epoch": 0.2521448227486106, "grad_norm": 0.4726703464984894, "learning_rate": 1.7521541896000552e-05, "loss": 0.43889933824539185, "step": 4673, "token_acc": 0.8498929990828493 }, { "epoch": 0.25219878055360706, "grad_norm": 0.3422589898109436, "learning_rate": 1.7520390160073217e-05, "loss": 0.3545853793621063, "step": 4674, "token_acc": 0.8785999724404023 }, { "epoch": 0.25225273835860357, "grad_norm": 0.4690801501274109, "learning_rate": 1.751923819447513e-05, "loss": 0.4538193941116333, "step": 4675, "token_acc": 0.8472046837332109 }, { "epoch": 0.2523066961636001, "grad_norm": 0.41904884576797485, "learning_rate": 1.7518085999241478e-05, "loss": 0.4335106611251831, "step": 4676, "token_acc": 0.8548130096541382 }, { "epoch": 0.2523606539685966, "grad_norm": 0.41737866401672363, "learning_rate": 1.751693357440745e-05, "loss": 0.4061664044857025, "step": 4677, "token_acc": 0.8604290149590742 }, { "epoch": 0.25241461177359303, "grad_norm": 0.507903516292572, "learning_rate": 1.7515780920008242e-05, "loss": 0.4278568923473358, "step": 4678, "token_acc": 0.855319803198032 }, { "epoch": 0.25246856957858954, "grad_norm": 0.3789869546890259, "learning_rate": 1.7514628036079053e-05, "loss": 0.3965766429901123, "step": 4679, "token_acc": 0.8650306748466258 }, { "epoch": 0.25252252738358605, "grad_norm": 0.5357512831687927, "learning_rate": 1.7513474922655093e-05, "loss": 0.3668235242366791, "step": 4680, "token_acc": 0.8723061430010071 }, { "epoch": 0.25257648518858256, "grad_norm": 0.35192909836769104, "learning_rate": 1.751232157977157e-05, "loss": 0.4465702176094055, "step": 4681, "token_acc": 0.8505648197955891 }, { "epoch": 0.252630442993579, "grad_norm": 0.3347746431827545, "learning_rate": 1.7511168007463724e-05, "loss": 0.4281577467918396, "step": 4682, "token_acc": 0.8568575233022636 }, { "epoch": 0.2526844007985755, "grad_norm": 0.33705565333366394, "learning_rate": 1.751001420576677e-05, "loss": 0.35684239864349365, "step": 4683, "token_acc": 0.8735955056179775 }, { "epoch": 0.252738358603572, "grad_norm": 0.47099819779396057, "learning_rate": 1.750886017471595e-05, "loss": 0.49728357791900635, "step": 4684, "token_acc": 0.8313139472267097 }, { "epoch": 0.2527923164085685, "grad_norm": 0.35188284516334534, "learning_rate": 1.7507705914346505e-05, "loss": 0.36244720220565796, "step": 4685, "token_acc": 0.8730525100980958 }, { "epoch": 0.252846274213565, "grad_norm": 0.41268110275268555, "learning_rate": 1.750655142469369e-05, "loss": 0.4528788924217224, "step": 4686, "token_acc": 0.8433277027027027 }, { "epoch": 0.2529002320185615, "grad_norm": 0.4715268909931183, "learning_rate": 1.750539670579276e-05, "loss": 0.38269123435020447, "step": 4687, "token_acc": 0.8661430481283422 }, { "epoch": 0.252954189823558, "grad_norm": 0.4690808355808258, "learning_rate": 1.7504241757678984e-05, "loss": 0.42655664682388306, "step": 4688, "token_acc": 0.85336770866903 }, { "epoch": 0.25300814762855445, "grad_norm": 0.39252814650535583, "learning_rate": 1.7503086580387626e-05, "loss": 0.4199727475643158, "step": 4689, "token_acc": 0.8565285379202502 }, { "epoch": 0.25306210543355095, "grad_norm": 0.3873444199562073, "learning_rate": 1.7501931173953975e-05, "loss": 0.4859626889228821, "step": 4690, "token_acc": 0.8414994096812278 }, { "epoch": 0.25311606323854746, "grad_norm": 0.435167133808136, "learning_rate": 1.7500775538413307e-05, "loss": 0.3494076728820801, "step": 4691, "token_acc": 0.8788713007570543 }, { "epoch": 0.25317002104354397, "grad_norm": 0.4420851469039917, "learning_rate": 1.749961967380092e-05, "loss": 0.3729332685470581, "step": 4692, "token_acc": 0.8717669055780617 }, { "epoch": 0.2532239788485404, "grad_norm": 0.37035778164863586, "learning_rate": 1.749846358015211e-05, "loss": 0.4149784743785858, "step": 4693, "token_acc": 0.8590604026845637 }, { "epoch": 0.25327793665353693, "grad_norm": 0.36001163721084595, "learning_rate": 1.7497307257502192e-05, "loss": 0.3541603684425354, "step": 4694, "token_acc": 0.874291246024063 }, { "epoch": 0.25333189445853344, "grad_norm": 0.5226549506187439, "learning_rate": 1.749615070588647e-05, "loss": 0.4253188967704773, "step": 4695, "token_acc": 0.8550505813075645 }, { "epoch": 0.25338585226352994, "grad_norm": 0.3707594573497772, "learning_rate": 1.7494993925340274e-05, "loss": 0.4103003144264221, "step": 4696, "token_acc": 0.860301366004788 }, { "epoch": 0.2534398100685264, "grad_norm": 0.48638227581977844, "learning_rate": 1.749383691589892e-05, "loss": 0.4462358355522156, "step": 4697, "token_acc": 0.8504016064257028 }, { "epoch": 0.2534937678735229, "grad_norm": 0.4757959544658661, "learning_rate": 1.7492679677597757e-05, "loss": 0.4325302541255951, "step": 4698, "token_acc": 0.8500889071262481 }, { "epoch": 0.2535477256785194, "grad_norm": 0.49320095777511597, "learning_rate": 1.7491522210472112e-05, "loss": 0.3832506537437439, "step": 4699, "token_acc": 0.8655423883318141 }, { "epoch": 0.2536016834835159, "grad_norm": 0.42727288603782654, "learning_rate": 1.749036451455735e-05, "loss": 0.3776404857635498, "step": 4700, "token_acc": 0.862804416880956 }, { "epoch": 0.25365564128851237, "grad_norm": 0.39387059211730957, "learning_rate": 1.7489206589888813e-05, "loss": 0.4111025929450989, "step": 4701, "token_acc": 0.8600591715976331 }, { "epoch": 0.2537095990935089, "grad_norm": 0.4567168653011322, "learning_rate": 1.748804843650187e-05, "loss": 0.45178574323654175, "step": 4702, "token_acc": 0.8483843223983291 }, { "epoch": 0.2537635568985054, "grad_norm": 0.3966210186481476, "learning_rate": 1.7486890054431885e-05, "loss": 0.38028839230537415, "step": 4703, "token_acc": 0.8672537508153947 }, { "epoch": 0.25381751470350183, "grad_norm": 0.365710586309433, "learning_rate": 1.748573144371424e-05, "loss": 0.33161547780036926, "step": 4704, "token_acc": 0.8817263798589017 }, { "epoch": 0.25387147250849834, "grad_norm": 0.4491858184337616, "learning_rate": 1.7484572604384326e-05, "loss": 0.4183828830718994, "step": 4705, "token_acc": 0.8591090184715683 }, { "epoch": 0.25392543031349485, "grad_norm": 0.39154401421546936, "learning_rate": 1.7483413536477518e-05, "loss": 0.40499722957611084, "step": 4706, "token_acc": 0.8647609280861428 }, { "epoch": 0.25397938811849136, "grad_norm": 0.4436517655849457, "learning_rate": 1.748225424002922e-05, "loss": 0.40784943103790283, "step": 4707, "token_acc": 0.8603763294245977 }, { "epoch": 0.2540333459234878, "grad_norm": 0.42893287539482117, "learning_rate": 1.748109471507484e-05, "loss": 0.40763938426971436, "step": 4708, "token_acc": 0.8572615486872716 }, { "epoch": 0.2540873037284843, "grad_norm": 0.41634947061538696, "learning_rate": 1.7479934961649788e-05, "loss": 0.3486836552619934, "step": 4709, "token_acc": 0.8755673814368445 }, { "epoch": 0.2541412615334808, "grad_norm": 0.3711242079734802, "learning_rate": 1.747877497978948e-05, "loss": 0.4081662893295288, "step": 4710, "token_acc": 0.8589283504516768 }, { "epoch": 0.25419521933847733, "grad_norm": 0.4252033233642578, "learning_rate": 1.7477614769529343e-05, "loss": 0.35192573070526123, "step": 4711, "token_acc": 0.8778977871443625 }, { "epoch": 0.2542491771434738, "grad_norm": 0.5423402786254883, "learning_rate": 1.7476454330904814e-05, "loss": 0.4619732201099396, "step": 4712, "token_acc": 0.8460706560922855 }, { "epoch": 0.2543031349484703, "grad_norm": 0.470809668302536, "learning_rate": 1.7475293663951322e-05, "loss": 0.4661634862422943, "step": 4713, "token_acc": 0.8422827496757458 }, { "epoch": 0.2543570927534668, "grad_norm": 0.4661286175251007, "learning_rate": 1.7474132768704324e-05, "loss": 0.47517043352127075, "step": 4714, "token_acc": 0.838399266839774 }, { "epoch": 0.2544110505584633, "grad_norm": 0.3750297427177429, "learning_rate": 1.7472971645199268e-05, "loss": 0.4241308271884918, "step": 4715, "token_acc": 0.8550079491255962 }, { "epoch": 0.25446500836345975, "grad_norm": 0.4428500533103943, "learning_rate": 1.7471810293471615e-05, "loss": 0.37063223123550415, "step": 4716, "token_acc": 0.8692675159235669 }, { "epoch": 0.25451896616845626, "grad_norm": 0.31567469239234924, "learning_rate": 1.7470648713556834e-05, "loss": 0.3621174693107605, "step": 4717, "token_acc": 0.8709844559585492 }, { "epoch": 0.25457292397345277, "grad_norm": 0.4776032865047455, "learning_rate": 1.7469486905490402e-05, "loss": 0.4060817360877991, "step": 4718, "token_acc": 0.8621995390187686 }, { "epoch": 0.2546268817784493, "grad_norm": 0.4097009003162384, "learning_rate": 1.746832486930779e-05, "loss": 0.3888031840324402, "step": 4719, "token_acc": 0.8696161734544389 }, { "epoch": 0.25468083958344573, "grad_norm": 0.4240131080150604, "learning_rate": 1.7467162605044495e-05, "loss": 0.43433988094329834, "step": 4720, "token_acc": 0.8490804183195095 }, { "epoch": 0.25473479738844224, "grad_norm": 0.4894740581512451, "learning_rate": 1.746600011273601e-05, "loss": 0.4138528108596802, "step": 4721, "token_acc": 0.8526315789473684 }, { "epoch": 0.25478875519343874, "grad_norm": 0.4461696147918701, "learning_rate": 1.746483739241784e-05, "loss": 0.4039558470249176, "step": 4722, "token_acc": 0.8603071948261924 }, { "epoch": 0.25484271299843525, "grad_norm": 0.3979165256023407, "learning_rate": 1.746367444412549e-05, "loss": 0.3875347375869751, "step": 4723, "token_acc": 0.86499922324064 }, { "epoch": 0.2548966708034317, "grad_norm": 0.3807286024093628, "learning_rate": 1.7462511267894476e-05, "loss": 0.3530200719833374, "step": 4724, "token_acc": 0.8767033327860778 }, { "epoch": 0.2549506286084282, "grad_norm": 0.32242467999458313, "learning_rate": 1.7461347863760324e-05, "loss": 0.3493489921092987, "step": 4725, "token_acc": 0.8763590744354613 }, { "epoch": 0.2550045864134247, "grad_norm": 0.4193006753921509, "learning_rate": 1.7460184231758564e-05, "loss": 0.44637805223464966, "step": 4726, "token_acc": 0.8464458698782563 }, { "epoch": 0.25505854421842117, "grad_norm": 0.3721640408039093, "learning_rate": 1.745902037192473e-05, "loss": 0.42710235714912415, "step": 4727, "token_acc": 0.8530259365994236 }, { "epoch": 0.2551125020234177, "grad_norm": 0.44354909658432007, "learning_rate": 1.7457856284294368e-05, "loss": 0.40843409299850464, "step": 4728, "token_acc": 0.8590495197704877 }, { "epoch": 0.2551664598284142, "grad_norm": 0.547638475894928, "learning_rate": 1.7456691968903032e-05, "loss": 0.4341358542442322, "step": 4729, "token_acc": 0.8500869565217392 }, { "epoch": 0.2552204176334107, "grad_norm": 0.38338524103164673, "learning_rate": 1.7455527425786274e-05, "loss": 0.4848925471305847, "step": 4730, "token_acc": 0.8393412415063918 }, { "epoch": 0.25527437543840714, "grad_norm": 0.3856782019138336, "learning_rate": 1.745436265497966e-05, "loss": 0.41116440296173096, "step": 4731, "token_acc": 0.858041958041958 }, { "epoch": 0.25532833324340365, "grad_norm": 0.5192503929138184, "learning_rate": 1.7453197656518767e-05, "loss": 0.42506372928619385, "step": 4732, "token_acc": 0.8491773308957953 }, { "epoch": 0.25538229104840016, "grad_norm": 0.3016210198402405, "learning_rate": 1.7452032430439168e-05, "loss": 0.4434007406234741, "step": 4733, "token_acc": 0.8507328362046799 }, { "epoch": 0.25543624885339666, "grad_norm": 0.47182586789131165, "learning_rate": 1.745086697677645e-05, "loss": 0.42526543140411377, "step": 4734, "token_acc": 0.8524507326932794 }, { "epoch": 0.2554902066583931, "grad_norm": 0.4441986382007599, "learning_rate": 1.7449701295566207e-05, "loss": 0.45717158913612366, "step": 4735, "token_acc": 0.8450106157112527 }, { "epoch": 0.2555441644633896, "grad_norm": 0.4173774719238281, "learning_rate": 1.744853538684404e-05, "loss": 0.38243117928504944, "step": 4736, "token_acc": 0.8727979274611399 }, { "epoch": 0.25559812226838613, "grad_norm": 0.4444233775138855, "learning_rate": 1.744736925064555e-05, "loss": 0.41781896352767944, "step": 4737, "token_acc": 0.8567526969821112 }, { "epoch": 0.25565208007338264, "grad_norm": 0.4389871060848236, "learning_rate": 1.7446202887006354e-05, "loss": 0.4347511827945709, "step": 4738, "token_acc": 0.851656314699793 }, { "epoch": 0.2557060378783791, "grad_norm": 0.4360969662666321, "learning_rate": 1.7445036295962075e-05, "loss": 0.3967764973640442, "step": 4739, "token_acc": 0.8645989661497415 }, { "epoch": 0.2557599956833756, "grad_norm": 0.48588547110557556, "learning_rate": 1.744386947754834e-05, "loss": 0.43084320425987244, "step": 4740, "token_acc": 0.8554070473876063 }, { "epoch": 0.2558139534883721, "grad_norm": 0.5462056994438171, "learning_rate": 1.7442702431800778e-05, "loss": 0.45626822113990784, "step": 4741, "token_acc": 0.8522330453960669 }, { "epoch": 0.2558679112933686, "grad_norm": 0.4917547106742859, "learning_rate": 1.7441535158755033e-05, "loss": 0.41106128692626953, "step": 4742, "token_acc": 0.8592414930295308 }, { "epoch": 0.25592186909836506, "grad_norm": 0.32365599274635315, "learning_rate": 1.7440367658446753e-05, "loss": 0.3511267900466919, "step": 4743, "token_acc": 0.8740401288085212 }, { "epoch": 0.25597582690336157, "grad_norm": 0.3901020884513855, "learning_rate": 1.7439199930911596e-05, "loss": 0.42962509393692017, "step": 4744, "token_acc": 0.8509984639016898 }, { "epoch": 0.2560297847083581, "grad_norm": 0.42542073130607605, "learning_rate": 1.743803197618522e-05, "loss": 0.4292345643043518, "step": 4745, "token_acc": 0.8545283018867924 }, { "epoch": 0.2560837425133546, "grad_norm": 0.44162267446517944, "learning_rate": 1.7436863794303298e-05, "loss": 0.41104525327682495, "step": 4746, "token_acc": 0.8554428404213812 }, { "epoch": 0.25613770031835104, "grad_norm": 0.5057851672172546, "learning_rate": 1.7435695385301504e-05, "loss": 0.42591145634651184, "step": 4747, "token_acc": 0.8520327765521588 }, { "epoch": 0.25619165812334754, "grad_norm": 0.38183513283729553, "learning_rate": 1.7434526749215517e-05, "loss": 0.3809380531311035, "step": 4748, "token_acc": 0.869038208168643 }, { "epoch": 0.25624561592834405, "grad_norm": 0.502592921257019, "learning_rate": 1.7433357886081035e-05, "loss": 0.42951005697250366, "step": 4749, "token_acc": 0.8532910388580491 }, { "epoch": 0.2562995737333405, "grad_norm": 0.5020634531974792, "learning_rate": 1.743218879593375e-05, "loss": 0.34292182326316833, "step": 4750, "token_acc": 0.8781496803309515 }, { "epoch": 0.256353531538337, "grad_norm": 0.4010133147239685, "learning_rate": 1.7431019478809363e-05, "loss": 0.39011555910110474, "step": 4751, "token_acc": 0.8659985152190052 }, { "epoch": 0.2564074893433335, "grad_norm": 0.43913161754608154, "learning_rate": 1.742984993474359e-05, "loss": 0.4291795790195465, "step": 4752, "token_acc": 0.8482597331132206 }, { "epoch": 0.25646144714833, "grad_norm": 0.31614500284194946, "learning_rate": 1.7428680163772147e-05, "loss": 0.42449086904525757, "step": 4753, "token_acc": 0.8559063757162014 }, { "epoch": 0.2565154049533265, "grad_norm": 0.47213512659072876, "learning_rate": 1.7427510165930755e-05, "loss": 0.3843541741371155, "step": 4754, "token_acc": 0.8678469241773963 }, { "epoch": 0.256569362758323, "grad_norm": 0.39413824677467346, "learning_rate": 1.742633994125515e-05, "loss": 0.4221992790699005, "step": 4755, "token_acc": 0.8510175547615348 }, { "epoch": 0.2566233205633195, "grad_norm": 0.4247035086154938, "learning_rate": 1.742516948978107e-05, "loss": 0.3934325575828552, "step": 4756, "token_acc": 0.8649138712601995 }, { "epoch": 0.256677278368316, "grad_norm": 0.4842270314693451, "learning_rate": 1.7423998811544258e-05, "loss": 0.3677980303764343, "step": 4757, "token_acc": 0.8699791037845368 }, { "epoch": 0.25673123617331245, "grad_norm": 0.3862427771091461, "learning_rate": 1.7422827906580467e-05, "loss": 0.44482460618019104, "step": 4758, "token_acc": 0.8546186535450682 }, { "epoch": 0.25678519397830896, "grad_norm": 0.4455907642841339, "learning_rate": 1.742165677492546e-05, "loss": 0.4185698628425598, "step": 4759, "token_acc": 0.8525619273837801 }, { "epoch": 0.25683915178330546, "grad_norm": 0.4060839116573334, "learning_rate": 1.742048541661499e-05, "loss": 0.3361583948135376, "step": 4760, "token_acc": 0.8808083408418071 }, { "epoch": 0.25689310958830197, "grad_norm": 0.38007572293281555, "learning_rate": 1.741931383168485e-05, "loss": 0.37236398458480835, "step": 4761, "token_acc": 0.8701432664756447 }, { "epoch": 0.2569470673932984, "grad_norm": 0.32455044984817505, "learning_rate": 1.7418142020170802e-05, "loss": 0.3821396827697754, "step": 4762, "token_acc": 0.8683949801849405 }, { "epoch": 0.25700102519829493, "grad_norm": 0.41313448548316956, "learning_rate": 1.741696998210864e-05, "loss": 0.450491338968277, "step": 4763, "token_acc": 0.8497493564557648 }, { "epoch": 0.25705498300329144, "grad_norm": 0.45973002910614014, "learning_rate": 1.741579771753416e-05, "loss": 0.38468798995018005, "step": 4764, "token_acc": 0.8612757550098786 }, { "epoch": 0.25710894080828794, "grad_norm": 0.4397798180580139, "learning_rate": 1.7414625226483162e-05, "loss": 0.44654667377471924, "step": 4765, "token_acc": 0.8468126930637462 }, { "epoch": 0.2571628986132844, "grad_norm": 0.332851767539978, "learning_rate": 1.741345250899145e-05, "loss": 0.3258538246154785, "step": 4766, "token_acc": 0.8806675527987003 }, { "epoch": 0.2572168564182809, "grad_norm": 0.3689658045768738, "learning_rate": 1.741227956509484e-05, "loss": 0.4576234817504883, "step": 4767, "token_acc": 0.8501661609593989 }, { "epoch": 0.2572708142232774, "grad_norm": 0.5125846862792969, "learning_rate": 1.741110639482915e-05, "loss": 0.4109191596508026, "step": 4768, "token_acc": 0.8609271523178808 }, { "epoch": 0.25732477202827386, "grad_norm": 0.2995905876159668, "learning_rate": 1.740993299823022e-05, "loss": 0.338733971118927, "step": 4769, "token_acc": 0.8813268257411424 }, { "epoch": 0.25737872983327037, "grad_norm": 0.4563558101654053, "learning_rate": 1.7408759375333875e-05, "loss": 0.372756689786911, "step": 4770, "token_acc": 0.8743786714866697 }, { "epoch": 0.2574326876382669, "grad_norm": 0.5083668828010559, "learning_rate": 1.7407585526175957e-05, "loss": 0.373518705368042, "step": 4771, "token_acc": 0.8724214535068232 }, { "epoch": 0.2574866454432634, "grad_norm": 0.36759692430496216, "learning_rate": 1.740641145079232e-05, "loss": 0.4238107204437256, "step": 4772, "token_acc": 0.853319338756232 }, { "epoch": 0.25754060324825984, "grad_norm": 0.44409266114234924, "learning_rate": 1.7405237149218815e-05, "loss": 0.4175347089767456, "step": 4773, "token_acc": 0.857219730941704 }, { "epoch": 0.25759456105325634, "grad_norm": 0.4003322422504425, "learning_rate": 1.740406262149131e-05, "loss": 0.4665975570678711, "step": 4774, "token_acc": 0.8453983668064445 }, { "epoch": 0.25764851885825285, "grad_norm": 0.39752650260925293, "learning_rate": 1.740288786764567e-05, "loss": 0.35476839542388916, "step": 4775, "token_acc": 0.8782301777385605 }, { "epoch": 0.25770247666324936, "grad_norm": 0.4033268690109253, "learning_rate": 1.7401712887717775e-05, "loss": 0.45094579458236694, "step": 4776, "token_acc": 0.8428769017980636 }, { "epoch": 0.2577564344682458, "grad_norm": 0.4042663276195526, "learning_rate": 1.7400537681743505e-05, "loss": 0.37664905190467834, "step": 4777, "token_acc": 0.8640750305831181 }, { "epoch": 0.2578103922732423, "grad_norm": 0.3471052348613739, "learning_rate": 1.7399362249758755e-05, "loss": 0.3964351415634155, "step": 4778, "token_acc": 0.862796260056534 }, { "epoch": 0.2578643500782388, "grad_norm": 0.3820227384567261, "learning_rate": 1.739818659179942e-05, "loss": 0.3933444321155548, "step": 4779, "token_acc": 0.864329268292683 }, { "epoch": 0.25791830788323533, "grad_norm": 0.4303337037563324, "learning_rate": 1.7397010707901406e-05, "loss": 0.4438532590866089, "step": 4780, "token_acc": 0.8510134245854172 }, { "epoch": 0.2579722656882318, "grad_norm": 0.4588789641857147, "learning_rate": 1.7395834598100623e-05, "loss": 0.3714962303638458, "step": 4781, "token_acc": 0.8720613709477852 }, { "epoch": 0.2580262234932283, "grad_norm": 0.5262742042541504, "learning_rate": 1.7394658262432986e-05, "loss": 0.49112409353256226, "step": 4782, "token_acc": 0.8358862144420132 }, { "epoch": 0.2580801812982248, "grad_norm": 0.4124775230884552, "learning_rate": 1.7393481700934424e-05, "loss": 0.30909615755081177, "step": 4783, "token_acc": 0.8881724392041267 }, { "epoch": 0.2581341391032213, "grad_norm": 0.5285614132881165, "learning_rate": 1.7392304913640868e-05, "loss": 0.42838311195373535, "step": 4784, "token_acc": 0.855815615508308 }, { "epoch": 0.25818809690821776, "grad_norm": 0.4496205747127533, "learning_rate": 1.7391127900588256e-05, "loss": 0.3820730149745941, "step": 4785, "token_acc": 0.8612297975106817 }, { "epoch": 0.25824205471321426, "grad_norm": 0.35634827613830566, "learning_rate": 1.7389950661812535e-05, "loss": 0.3970494866371155, "step": 4786, "token_acc": 0.8625405656003708 }, { "epoch": 0.25829601251821077, "grad_norm": 0.5224217176437378, "learning_rate": 1.738877319734966e-05, "loss": 0.41167429089546204, "step": 4787, "token_acc": 0.8581670218949816 }, { "epoch": 0.2583499703232073, "grad_norm": 0.494118869304657, "learning_rate": 1.738759550723558e-05, "loss": 0.41339147090911865, "step": 4788, "token_acc": 0.8526440879382056 }, { "epoch": 0.25840392812820373, "grad_norm": 0.3542513847351074, "learning_rate": 1.738641759150627e-05, "loss": 0.3773595690727234, "step": 4789, "token_acc": 0.8687896405919662 }, { "epoch": 0.25845788593320024, "grad_norm": 0.35414162278175354, "learning_rate": 1.7385239450197704e-05, "loss": 0.3517913222312927, "step": 4790, "token_acc": 0.873088274751811 }, { "epoch": 0.25851184373819674, "grad_norm": 0.3643169701099396, "learning_rate": 1.738406108334586e-05, "loss": 0.4138637185096741, "step": 4791, "token_acc": 0.8574746363525242 }, { "epoch": 0.2585658015431932, "grad_norm": 0.32097336649894714, "learning_rate": 1.7382882490986724e-05, "loss": 0.4547923505306244, "step": 4792, "token_acc": 0.8463946069579873 }, { "epoch": 0.2586197593481897, "grad_norm": 0.41627371311187744, "learning_rate": 1.738170367315629e-05, "loss": 0.420610249042511, "step": 4793, "token_acc": 0.8611033714126498 }, { "epoch": 0.2586737171531862, "grad_norm": 0.4297351837158203, "learning_rate": 1.7380524629890562e-05, "loss": 0.40046659111976624, "step": 4794, "token_acc": 0.862162495376649 }, { "epoch": 0.2587276749581827, "grad_norm": 0.39300939440727234, "learning_rate": 1.737934536122554e-05, "loss": 0.39530307054519653, "step": 4795, "token_acc": 0.8636951833213515 }, { "epoch": 0.25878163276317917, "grad_norm": 0.363237589597702, "learning_rate": 1.737816586719725e-05, "loss": 0.4383508563041687, "step": 4796, "token_acc": 0.8528748590755355 }, { "epoch": 0.2588355905681757, "grad_norm": 0.3626827299594879, "learning_rate": 1.7376986147841705e-05, "loss": 0.41053229570388794, "step": 4797, "token_acc": 0.8603889010268735 }, { "epoch": 0.2588895483731722, "grad_norm": 0.38500475883483887, "learning_rate": 1.7375806203194937e-05, "loss": 0.35174238681793213, "step": 4798, "token_acc": 0.8766287773773219 }, { "epoch": 0.2589435061781687, "grad_norm": 0.40574222803115845, "learning_rate": 1.7374626033292975e-05, "loss": 0.3680134415626526, "step": 4799, "token_acc": 0.8618893425313248 }, { "epoch": 0.25899746398316514, "grad_norm": 0.34360429644584656, "learning_rate": 1.737344563817187e-05, "loss": 0.37754663825035095, "step": 4800, "token_acc": 0.8701750448833034 }, { "epoch": 0.25905142178816165, "grad_norm": 0.34398046135902405, "learning_rate": 1.7372265017867666e-05, "loss": 0.42444825172424316, "step": 4801, "token_acc": 0.8578737919272313 }, { "epoch": 0.25910537959315816, "grad_norm": 0.3734283447265625, "learning_rate": 1.737108417241642e-05, "loss": 0.424191951751709, "step": 4802, "token_acc": 0.8534140969162995 }, { "epoch": 0.25915933739815467, "grad_norm": 0.4068639278411865, "learning_rate": 1.7369903101854193e-05, "loss": 0.3484352231025696, "step": 4803, "token_acc": 0.879110251450677 }, { "epoch": 0.2592132952031511, "grad_norm": 0.38434773683547974, "learning_rate": 1.7368721806217058e-05, "loss": 0.34661829471588135, "step": 4804, "token_acc": 0.8764705882352941 }, { "epoch": 0.2592672530081476, "grad_norm": 0.27744659781455994, "learning_rate": 1.7367540285541087e-05, "loss": 0.3574206233024597, "step": 4805, "token_acc": 0.875737152485257 }, { "epoch": 0.25932121081314413, "grad_norm": 0.4445578157901764, "learning_rate": 1.7366358539862367e-05, "loss": 0.37738168239593506, "step": 4806, "token_acc": 0.8741211465657112 }, { "epoch": 0.25937516861814064, "grad_norm": 0.542834460735321, "learning_rate": 1.7365176569216985e-05, "loss": 0.4136587977409363, "step": 4807, "token_acc": 0.8554434030281183 }, { "epoch": 0.2594291264231371, "grad_norm": 0.3902430236339569, "learning_rate": 1.736399437364104e-05, "loss": 0.3895927667617798, "step": 4808, "token_acc": 0.8641549797955788 }, { "epoch": 0.2594830842281336, "grad_norm": 0.33264926075935364, "learning_rate": 1.736281195317064e-05, "loss": 0.3756272792816162, "step": 4809, "token_acc": 0.8686151704940849 }, { "epoch": 0.2595370420331301, "grad_norm": 0.3312918543815613, "learning_rate": 1.736162930784189e-05, "loss": 0.3487529754638672, "step": 4810, "token_acc": 0.8772186260179579 }, { "epoch": 0.2595909998381266, "grad_norm": 0.45674002170562744, "learning_rate": 1.736044643769091e-05, "loss": 0.37083855271339417, "step": 4811, "token_acc": 0.8696420680512594 }, { "epoch": 0.25964495764312306, "grad_norm": 0.44103723764419556, "learning_rate": 1.7359263342753824e-05, "loss": 0.3401745557785034, "step": 4812, "token_acc": 0.8771089356384086 }, { "epoch": 0.25969891544811957, "grad_norm": 0.34529662132263184, "learning_rate": 1.7358080023066758e-05, "loss": 0.36823707818984985, "step": 4813, "token_acc": 0.8693613220187584 }, { "epoch": 0.2597528732531161, "grad_norm": 0.42876186966896057, "learning_rate": 1.7356896478665863e-05, "loss": 0.3289526700973511, "step": 4814, "token_acc": 0.8804405755906911 }, { "epoch": 0.25980683105811253, "grad_norm": 0.3284105658531189, "learning_rate": 1.7355712709587272e-05, "loss": 0.408072292804718, "step": 4815, "token_acc": 0.8586407766990292 }, { "epoch": 0.25986078886310904, "grad_norm": 0.4627867043018341, "learning_rate": 1.7354528715867142e-05, "loss": 0.43548864126205444, "step": 4816, "token_acc": 0.8523357794880974 }, { "epoch": 0.25991474666810555, "grad_norm": 0.47165611386299133, "learning_rate": 1.7353344497541633e-05, "loss": 0.43518829345703125, "step": 4817, "token_acc": 0.8519766688269604 }, { "epoch": 0.25996870447310205, "grad_norm": 0.4330067038536072, "learning_rate": 1.735216005464691e-05, "loss": 0.3724729120731354, "step": 4818, "token_acc": 0.8689666342727568 }, { "epoch": 0.2600226622780985, "grad_norm": 0.4357307255268097, "learning_rate": 1.7350975387219145e-05, "loss": 0.393848180770874, "step": 4819, "token_acc": 0.8594608406796985 }, { "epoch": 0.260076620083095, "grad_norm": 0.5036042928695679, "learning_rate": 1.7349790495294517e-05, "loss": 0.4182252585887909, "step": 4820, "token_acc": 0.8606954225352113 }, { "epoch": 0.2601305778880915, "grad_norm": 0.44057536125183105, "learning_rate": 1.734860537890921e-05, "loss": 0.3917243182659149, "step": 4821, "token_acc": 0.8644524236983842 }, { "epoch": 0.260184535693088, "grad_norm": 0.38267025351524353, "learning_rate": 1.7347420038099424e-05, "loss": 0.3743460178375244, "step": 4822, "token_acc": 0.864844198050086 }, { "epoch": 0.2602384934980845, "grad_norm": 0.3721991777420044, "learning_rate": 1.734623447290135e-05, "loss": 0.3640785813331604, "step": 4823, "token_acc": 0.8740215264187867 }, { "epoch": 0.260292451303081, "grad_norm": 0.37934422492980957, "learning_rate": 1.7345048683351205e-05, "loss": 0.3161405920982361, "step": 4824, "token_acc": 0.884088200238379 }, { "epoch": 0.2603464091080775, "grad_norm": 0.37780874967575073, "learning_rate": 1.7343862669485193e-05, "loss": 0.34292250871658325, "step": 4825, "token_acc": 0.8804511278195488 }, { "epoch": 0.260400366913074, "grad_norm": 0.42178332805633545, "learning_rate": 1.734267643133954e-05, "loss": 0.4517413377761841, "step": 4826, "token_acc": 0.8490400301167023 }, { "epoch": 0.26045432471807045, "grad_norm": 0.3223591148853302, "learning_rate": 1.7341489968950475e-05, "loss": 0.3264225125312805, "step": 4827, "token_acc": 0.8824264297612437 }, { "epoch": 0.26050828252306696, "grad_norm": 0.4870744049549103, "learning_rate": 1.7340303282354227e-05, "loss": 0.3906653821468353, "step": 4828, "token_acc": 0.8625458650860852 }, { "epoch": 0.26056224032806347, "grad_norm": 0.3583768606185913, "learning_rate": 1.733911637158704e-05, "loss": 0.4135285019874573, "step": 4829, "token_acc": 0.8588016720854621 }, { "epoch": 0.26061619813306, "grad_norm": 0.46926796436309814, "learning_rate": 1.733792923668516e-05, "loss": 0.38717931509017944, "step": 4830, "token_acc": 0.8663601092110935 }, { "epoch": 0.2606701559380564, "grad_norm": 0.37742510437965393, "learning_rate": 1.7336741877684845e-05, "loss": 0.36883533000946045, "step": 4831, "token_acc": 0.8725367922175106 }, { "epoch": 0.26072411374305293, "grad_norm": 0.4108066260814667, "learning_rate": 1.7335554294622357e-05, "loss": 0.488262802362442, "step": 4832, "token_acc": 0.8374884579870729 }, { "epoch": 0.26077807154804944, "grad_norm": 0.3155818581581116, "learning_rate": 1.733436648753396e-05, "loss": 0.4458492696285248, "step": 4833, "token_acc": 0.8485456369107321 }, { "epoch": 0.2608320293530459, "grad_norm": 0.41082867980003357, "learning_rate": 1.7333178456455933e-05, "loss": 0.3542846441268921, "step": 4834, "token_acc": 0.8775332947307469 }, { "epoch": 0.2608859871580424, "grad_norm": 0.5488861799240112, "learning_rate": 1.733199020142456e-05, "loss": 0.42218413949012756, "step": 4835, "token_acc": 0.8564573336115168 }, { "epoch": 0.2609399449630389, "grad_norm": 0.37419572472572327, "learning_rate": 1.7330801722476122e-05, "loss": 0.3600516617298126, "step": 4836, "token_acc": 0.8767789806592872 }, { "epoch": 0.2609939027680354, "grad_norm": 0.4648229479789734, "learning_rate": 1.732961301964692e-05, "loss": 0.4033772945404053, "step": 4837, "token_acc": 0.8593977981164611 }, { "epoch": 0.26104786057303186, "grad_norm": 0.46094125509262085, "learning_rate": 1.7328424092973257e-05, "loss": 0.4309372007846832, "step": 4838, "token_acc": 0.8483457123565159 }, { "epoch": 0.26110181837802837, "grad_norm": 0.3381204307079315, "learning_rate": 1.7327234942491445e-05, "loss": 0.36227649450302124, "step": 4839, "token_acc": 0.8709124472573839 }, { "epoch": 0.2611557761830249, "grad_norm": 0.4118225872516632, "learning_rate": 1.7326045568237795e-05, "loss": 0.3882949948310852, "step": 4840, "token_acc": 0.8630814912570108 }, { "epoch": 0.2612097339880214, "grad_norm": 0.40327101945877075, "learning_rate": 1.7324855970248632e-05, "loss": 0.3327561914920807, "step": 4841, "token_acc": 0.8824056111028205 }, { "epoch": 0.26126369179301784, "grad_norm": 0.4024568200111389, "learning_rate": 1.7323666148560287e-05, "loss": 0.39473700523376465, "step": 4842, "token_acc": 0.8651856691712948 }, { "epoch": 0.26131764959801435, "grad_norm": 0.46460777521133423, "learning_rate": 1.7322476103209098e-05, "loss": 0.4628719091415405, "step": 4843, "token_acc": 0.8448348559381588 }, { "epoch": 0.26137160740301085, "grad_norm": 0.41445446014404297, "learning_rate": 1.7321285834231405e-05, "loss": 0.3633483648300171, "step": 4844, "token_acc": 0.879600655639994 }, { "epoch": 0.26142556520800736, "grad_norm": 0.4376887381076813, "learning_rate": 1.732009534166356e-05, "loss": 0.37288254499435425, "step": 4845, "token_acc": 0.8688819577735125 }, { "epoch": 0.2614795230130038, "grad_norm": 0.3381775915622711, "learning_rate": 1.7318904625541925e-05, "loss": 0.4367417097091675, "step": 4846, "token_acc": 0.8493387909319899 }, { "epoch": 0.2615334808180003, "grad_norm": 0.5329595804214478, "learning_rate": 1.7317713685902853e-05, "loss": 0.45623892545700073, "step": 4847, "token_acc": 0.8425641853705796 }, { "epoch": 0.2615874386229968, "grad_norm": 0.47664982080459595, "learning_rate": 1.7316522522782727e-05, "loss": 0.40969714522361755, "step": 4848, "token_acc": 0.8553504212224413 }, { "epoch": 0.26164139642799333, "grad_norm": 0.43482136726379395, "learning_rate": 1.731533113621792e-05, "loss": 0.4028923213481903, "step": 4849, "token_acc": 0.8615408178894165 }, { "epoch": 0.2616953542329898, "grad_norm": 0.409322053194046, "learning_rate": 1.7314139526244817e-05, "loss": 0.41223376989364624, "step": 4850, "token_acc": 0.8597204161248374 }, { "epoch": 0.2617493120379863, "grad_norm": 0.354139119386673, "learning_rate": 1.7312947692899805e-05, "loss": 0.38901233673095703, "step": 4851, "token_acc": 0.8653759057971014 }, { "epoch": 0.2618032698429828, "grad_norm": 0.3152567744255066, "learning_rate": 1.7311755636219287e-05, "loss": 0.38696885108947754, "step": 4852, "token_acc": 0.8628652886671418 }, { "epoch": 0.2618572276479793, "grad_norm": 0.36949846148490906, "learning_rate": 1.7310563356239668e-05, "loss": 0.41395604610443115, "step": 4853, "token_acc": 0.8597938144329897 }, { "epoch": 0.26191118545297576, "grad_norm": 0.4806232750415802, "learning_rate": 1.7309370852997362e-05, "loss": 0.43398696184158325, "step": 4854, "token_acc": 0.8540372670807453 }, { "epoch": 0.26196514325797227, "grad_norm": 0.44320181012153625, "learning_rate": 1.730817812652878e-05, "loss": 0.4135248363018036, "step": 4855, "token_acc": 0.8582797673706765 }, { "epoch": 0.2620191010629688, "grad_norm": 0.4304939806461334, "learning_rate": 1.730698517687036e-05, "loss": 0.3509618937969208, "step": 4856, "token_acc": 0.875 }, { "epoch": 0.2620730588679652, "grad_norm": 0.41228124499320984, "learning_rate": 1.730579200405852e-05, "loss": 0.3496274948120117, "step": 4857, "token_acc": 0.8758452291510143 }, { "epoch": 0.26212701667296173, "grad_norm": 0.43214401602745056, "learning_rate": 1.7304598608129708e-05, "loss": 0.35410141944885254, "step": 4858, "token_acc": 0.8746016180436381 }, { "epoch": 0.26218097447795824, "grad_norm": 0.4009705185890198, "learning_rate": 1.7303404989120368e-05, "loss": 0.3674447536468506, "step": 4859, "token_acc": 0.8656024014190203 }, { "epoch": 0.26223493228295475, "grad_norm": 0.36358878016471863, "learning_rate": 1.730221114706695e-05, "loss": 0.3748239278793335, "step": 4860, "token_acc": 0.8736220025002841 }, { "epoch": 0.2622888900879512, "grad_norm": 0.2872354984283447, "learning_rate": 1.7301017082005924e-05, "loss": 0.353532075881958, "step": 4861, "token_acc": 0.8753354804079442 }, { "epoch": 0.2623428478929477, "grad_norm": 0.4331808388233185, "learning_rate": 1.7299822793973743e-05, "loss": 0.3986431658267975, "step": 4862, "token_acc": 0.8648723640399556 }, { "epoch": 0.2623968056979442, "grad_norm": 0.4456188678741455, "learning_rate": 1.7298628283006888e-05, "loss": 0.43752753734588623, "step": 4863, "token_acc": 0.8528465744612416 }, { "epoch": 0.2624507635029407, "grad_norm": 0.4931572675704956, "learning_rate": 1.729743354914184e-05, "loss": 0.41894620656967163, "step": 4864, "token_acc": 0.8600892719716793 }, { "epoch": 0.26250472130793717, "grad_norm": 0.4169425070285797, "learning_rate": 1.7296238592415077e-05, "loss": 0.3726404011249542, "step": 4865, "token_acc": 0.8743791641429437 }, { "epoch": 0.2625586791129337, "grad_norm": 0.4158458709716797, "learning_rate": 1.72950434128631e-05, "loss": 0.41521263122558594, "step": 4866, "token_acc": 0.8551927688618336 }, { "epoch": 0.2626126369179302, "grad_norm": 0.3870933949947357, "learning_rate": 1.729384801052241e-05, "loss": 0.44402486085891724, "step": 4867, "token_acc": 0.8510581301901956 }, { "epoch": 0.2626665947229267, "grad_norm": 0.3547615706920624, "learning_rate": 1.7292652385429515e-05, "loss": 0.37590664625167847, "step": 4868, "token_acc": 0.8693172817994614 }, { "epoch": 0.26272055252792315, "grad_norm": 0.4120177626609802, "learning_rate": 1.7291456537620924e-05, "loss": 0.40753549337387085, "step": 4869, "token_acc": 0.8621904237754541 }, { "epoch": 0.26277451033291965, "grad_norm": 0.4133910834789276, "learning_rate": 1.729026046713316e-05, "loss": 0.4038642942905426, "step": 4870, "token_acc": 0.8629550321199143 }, { "epoch": 0.26282846813791616, "grad_norm": 0.37976840138435364, "learning_rate": 1.728906417400275e-05, "loss": 0.36810731887817383, "step": 4871, "token_acc": 0.873147661586968 }, { "epoch": 0.26288242594291267, "grad_norm": 0.3917754888534546, "learning_rate": 1.7287867658266235e-05, "loss": 0.3304198682308197, "step": 4872, "token_acc": 0.8814137554585153 }, { "epoch": 0.2629363837479091, "grad_norm": 0.4567127823829651, "learning_rate": 1.7286670919960146e-05, "loss": 0.44169872999191284, "step": 4873, "token_acc": 0.8486300344362928 }, { "epoch": 0.2629903415529056, "grad_norm": 0.34877079725265503, "learning_rate": 1.7285473959121036e-05, "loss": 0.41243189573287964, "step": 4874, "token_acc": 0.8565800992121388 }, { "epoch": 0.26304429935790213, "grad_norm": 0.4483069181442261, "learning_rate": 1.7284276775785463e-05, "loss": 0.3903198540210724, "step": 4875, "token_acc": 0.8624193326925718 }, { "epoch": 0.2630982571628986, "grad_norm": 0.3435267210006714, "learning_rate": 1.728307936998998e-05, "loss": 0.353463739156723, "step": 4876, "token_acc": 0.8765674573198369 }, { "epoch": 0.2631522149678951, "grad_norm": 0.2992454171180725, "learning_rate": 1.7281881741771166e-05, "loss": 0.33816206455230713, "step": 4877, "token_acc": 0.8791029817088449 }, { "epoch": 0.2632061727728916, "grad_norm": 0.4311549961566925, "learning_rate": 1.7280683891165592e-05, "loss": 0.3318060040473938, "step": 4878, "token_acc": 0.8825360883102179 }, { "epoch": 0.2632601305778881, "grad_norm": 0.4461393356323242, "learning_rate": 1.7279485818209834e-05, "loss": 0.4157045781612396, "step": 4879, "token_acc": 0.8623308022449654 }, { "epoch": 0.26331408838288456, "grad_norm": 0.35238391160964966, "learning_rate": 1.7278287522940487e-05, "loss": 0.38404881954193115, "step": 4880, "token_acc": 0.8650593738754948 }, { "epoch": 0.26336804618788107, "grad_norm": 0.42941543459892273, "learning_rate": 1.727708900539415e-05, "loss": 0.4069540500640869, "step": 4881, "token_acc": 0.8616497383810403 }, { "epoch": 0.2634220039928776, "grad_norm": 0.41899824142456055, "learning_rate": 1.727589026560742e-05, "loss": 0.3746933341026306, "step": 4882, "token_acc": 0.8679108534154738 }, { "epoch": 0.2634759617978741, "grad_norm": 0.4519379436969757, "learning_rate": 1.7274691303616905e-05, "loss": 0.4209514260292053, "step": 4883, "token_acc": 0.8549739047920291 }, { "epoch": 0.26352991960287053, "grad_norm": 0.33327242732048035, "learning_rate": 1.727349211945922e-05, "loss": 0.4674800634384155, "step": 4884, "token_acc": 0.8421633554083885 }, { "epoch": 0.26358387740786704, "grad_norm": 0.4522097706794739, "learning_rate": 1.7272292713171e-05, "loss": 0.35952454805374146, "step": 4885, "token_acc": 0.8743115379183731 }, { "epoch": 0.26363783521286355, "grad_norm": 0.31515589356422424, "learning_rate": 1.7271093084788857e-05, "loss": 0.3543332517147064, "step": 4886, "token_acc": 0.8768656716417911 }, { "epoch": 0.26369179301786005, "grad_norm": 0.4264526069164276, "learning_rate": 1.7269893234349442e-05, "loss": 0.4460033178329468, "step": 4887, "token_acc": 0.8457640332640333 }, { "epoch": 0.2637457508228565, "grad_norm": 0.38253945112228394, "learning_rate": 1.726869316188939e-05, "loss": 0.36016762256622314, "step": 4888, "token_acc": 0.8755200532534532 }, { "epoch": 0.263799708627853, "grad_norm": 0.39431342482566833, "learning_rate": 1.7267492867445352e-05, "loss": 0.3586936295032501, "step": 4889, "token_acc": 0.8776803776129467 }, { "epoch": 0.2638536664328495, "grad_norm": 0.31686580181121826, "learning_rate": 1.7266292351053987e-05, "loss": 0.38545042276382446, "step": 4890, "token_acc": 0.8634167140420693 }, { "epoch": 0.26390762423784603, "grad_norm": 0.4608730971813202, "learning_rate": 1.7265091612751956e-05, "loss": 0.4730004370212555, "step": 4891, "token_acc": 0.837852206273259 }, { "epoch": 0.2639615820428425, "grad_norm": 0.40899917483329773, "learning_rate": 1.7263890652575933e-05, "loss": 0.5151286721229553, "step": 4892, "token_acc": 0.8307340816529254 }, { "epoch": 0.264015539847839, "grad_norm": 0.38759902119636536, "learning_rate": 1.726268947056259e-05, "loss": 0.406945139169693, "step": 4893, "token_acc": 0.8584679832435668 }, { "epoch": 0.2640694976528355, "grad_norm": 0.3774932324886322, "learning_rate": 1.7261488066748613e-05, "loss": 0.37242013216018677, "step": 4894, "token_acc": 0.8683238636363636 }, { "epoch": 0.264123455457832, "grad_norm": 0.4082447290420532, "learning_rate": 1.7260286441170693e-05, "loss": 0.43063414096832275, "step": 4895, "token_acc": 0.8560148629818858 }, { "epoch": 0.26417741326282845, "grad_norm": 0.43041592836380005, "learning_rate": 1.7259084593865528e-05, "loss": 0.4003055989742279, "step": 4896, "token_acc": 0.863113735239279 }, { "epoch": 0.26423137106782496, "grad_norm": 0.4703722298145294, "learning_rate": 1.7257882524869817e-05, "loss": 0.352392315864563, "step": 4897, "token_acc": 0.877036581616969 }, { "epoch": 0.26428532887282147, "grad_norm": 0.4705088436603546, "learning_rate": 1.7256680234220282e-05, "loss": 0.3519476056098938, "step": 4898, "token_acc": 0.8757070135746606 }, { "epoch": 0.2643392866778179, "grad_norm": 0.34604156017303467, "learning_rate": 1.725547772195363e-05, "loss": 0.39290502667427063, "step": 4899, "token_acc": 0.8650345260514752 }, { "epoch": 0.2643932444828144, "grad_norm": 0.39875710010528564, "learning_rate": 1.7254274988106587e-05, "loss": 0.4106818437576294, "step": 4900, "token_acc": 0.8578410234296392 }, { "epoch": 0.26444720228781093, "grad_norm": 0.41839444637298584, "learning_rate": 1.7253072032715888e-05, "loss": 0.4613768756389618, "step": 4901, "token_acc": 0.8466559921172558 }, { "epoch": 0.26450116009280744, "grad_norm": 0.4338415861129761, "learning_rate": 1.725186885581827e-05, "loss": 0.4435291588306427, "step": 4902, "token_acc": 0.8518923649092103 }, { "epoch": 0.2645551178978039, "grad_norm": 0.4568834602832794, "learning_rate": 1.725066545745048e-05, "loss": 0.33596086502075195, "step": 4903, "token_acc": 0.8781238681637088 }, { "epoch": 0.2646090757028004, "grad_norm": 0.43902382254600525, "learning_rate": 1.724946183764926e-05, "loss": 0.36211687326431274, "step": 4904, "token_acc": 0.8733236151603498 }, { "epoch": 0.2646630335077969, "grad_norm": 0.4290444254875183, "learning_rate": 1.7248257996451376e-05, "loss": 0.4244305193424225, "step": 4905, "token_acc": 0.855170343689029 }, { "epoch": 0.2647169913127934, "grad_norm": 0.4118037521839142, "learning_rate": 1.7247053933893595e-05, "loss": 0.39488399028778076, "step": 4906, "token_acc": 0.864654594232059 }, { "epoch": 0.26477094911778987, "grad_norm": 0.46416735649108887, "learning_rate": 1.724584965001268e-05, "loss": 0.362084299325943, "step": 4907, "token_acc": 0.8738777255237281 }, { "epoch": 0.2648249069227864, "grad_norm": 0.48016485571861267, "learning_rate": 1.724464514484542e-05, "loss": 0.4014766812324524, "step": 4908, "token_acc": 0.8587063051290428 }, { "epoch": 0.2648788647277829, "grad_norm": 0.37981414794921875, "learning_rate": 1.7243440418428595e-05, "loss": 0.3672531843185425, "step": 4909, "token_acc": 0.8749646393210749 }, { "epoch": 0.2649328225327794, "grad_norm": 0.3531219959259033, "learning_rate": 1.7242235470798993e-05, "loss": 0.39756491780281067, "step": 4910, "token_acc": 0.8622881355932204 }, { "epoch": 0.26498678033777584, "grad_norm": 0.4642468988895416, "learning_rate": 1.7241030301993417e-05, "loss": 0.43584775924682617, "step": 4911, "token_acc": 0.8493723849372385 }, { "epoch": 0.26504073814277235, "grad_norm": 0.4243176579475403, "learning_rate": 1.723982491204868e-05, "loss": 0.429773211479187, "step": 4912, "token_acc": 0.85479375696767 }, { "epoch": 0.26509469594776885, "grad_norm": 0.40877968072891235, "learning_rate": 1.7238619301001577e-05, "loss": 0.4254755973815918, "step": 4913, "token_acc": 0.8501712328767124 }, { "epoch": 0.26514865375276536, "grad_norm": 0.5364354252815247, "learning_rate": 1.7237413468888943e-05, "loss": 0.4103412926197052, "step": 4914, "token_acc": 0.8607677293428757 }, { "epoch": 0.2652026115577618, "grad_norm": 0.3089594542980194, "learning_rate": 1.7236207415747594e-05, "loss": 0.45539629459381104, "step": 4915, "token_acc": 0.8448726772195457 }, { "epoch": 0.2652565693627583, "grad_norm": 0.5862076878547668, "learning_rate": 1.723500114161437e-05, "loss": 0.4212827980518341, "step": 4916, "token_acc": 0.8532402791625124 }, { "epoch": 0.26531052716775483, "grad_norm": 0.5198596119880676, "learning_rate": 1.7233794646526104e-05, "loss": 0.39563286304473877, "step": 4917, "token_acc": 0.8650793650793651 }, { "epoch": 0.26536448497275134, "grad_norm": 0.3810890018939972, "learning_rate": 1.7232587930519645e-05, "loss": 0.40622639656066895, "step": 4918, "token_acc": 0.8616100064143681 }, { "epoch": 0.2654184427777478, "grad_norm": 0.37834903597831726, "learning_rate": 1.7231380993631844e-05, "loss": 0.36809590458869934, "step": 4919, "token_acc": 0.8765544434334244 }, { "epoch": 0.2654724005827443, "grad_norm": 0.48117566108703613, "learning_rate": 1.7230173835899562e-05, "loss": 0.39982494711875916, "step": 4920, "token_acc": 0.8579463281375542 }, { "epoch": 0.2655263583877408, "grad_norm": 0.3937844932079315, "learning_rate": 1.7228966457359666e-05, "loss": 0.3850346505641937, "step": 4921, "token_acc": 0.8638194150063586 }, { "epoch": 0.26558031619273725, "grad_norm": 0.4398564398288727, "learning_rate": 1.722775885804903e-05, "loss": 0.42552879452705383, "step": 4922, "token_acc": 0.8531213777115416 }, { "epoch": 0.26563427399773376, "grad_norm": 0.52197265625, "learning_rate": 1.722655103800453e-05, "loss": 0.420284628868103, "step": 4923, "token_acc": 0.8536729857819905 }, { "epoch": 0.26568823180273027, "grad_norm": 0.5206055641174316, "learning_rate": 1.7225342997263054e-05, "loss": 0.4255051910877228, "step": 4924, "token_acc": 0.8522593896713615 }, { "epoch": 0.2657421896077268, "grad_norm": 0.3108028769493103, "learning_rate": 1.72241347358615e-05, "loss": 0.3796848654747009, "step": 4925, "token_acc": 0.8690378047344247 }, { "epoch": 0.2657961474127232, "grad_norm": 0.47651222348213196, "learning_rate": 1.7222926253836755e-05, "loss": 0.4290011525154114, "step": 4926, "token_acc": 0.8580487804878049 }, { "epoch": 0.26585010521771973, "grad_norm": 0.37811079621315, "learning_rate": 1.722171755122574e-05, "loss": 0.41113290190696716, "step": 4927, "token_acc": 0.8557350123186422 }, { "epoch": 0.26590406302271624, "grad_norm": 0.3297254741191864, "learning_rate": 1.7220508628065363e-05, "loss": 0.34117591381073, "step": 4928, "token_acc": 0.8782435129740519 }, { "epoch": 0.26595802082771275, "grad_norm": 0.5174132585525513, "learning_rate": 1.7219299484392542e-05, "loss": 0.40776392817497253, "step": 4929, "token_acc": 0.8634596695821186 }, { "epoch": 0.2660119786327092, "grad_norm": 0.48143118619918823, "learning_rate": 1.7218090120244207e-05, "loss": 0.3881068229675293, "step": 4930, "token_acc": 0.861005949161709 }, { "epoch": 0.2660659364377057, "grad_norm": 0.4315734803676605, "learning_rate": 1.7216880535657293e-05, "loss": 0.42781326174736023, "step": 4931, "token_acc": 0.8576918597880517 }, { "epoch": 0.2661198942427022, "grad_norm": 0.3887142539024353, "learning_rate": 1.7215670730668733e-05, "loss": 0.4414873719215393, "step": 4932, "token_acc": 0.8521467159510947 }, { "epoch": 0.2661738520476987, "grad_norm": 0.42201897501945496, "learning_rate": 1.7214460705315482e-05, "loss": 0.405132919549942, "step": 4933, "token_acc": 0.8584218512898331 }, { "epoch": 0.2662278098526952, "grad_norm": 0.4732248783111572, "learning_rate": 1.721325045963449e-05, "loss": 0.4456573724746704, "step": 4934, "token_acc": 0.8521707272394211 }, { "epoch": 0.2662817676576917, "grad_norm": 0.37034353613853455, "learning_rate": 1.7212039993662723e-05, "loss": 0.31786566972732544, "step": 4935, "token_acc": 0.8880246748021993 }, { "epoch": 0.2663357254626882, "grad_norm": 0.31312721967697144, "learning_rate": 1.7210829307437138e-05, "loss": 0.37592023611068726, "step": 4936, "token_acc": 0.8684381075826313 }, { "epoch": 0.2663896832676847, "grad_norm": 0.335750937461853, "learning_rate": 1.7209618400994716e-05, "loss": 0.42772552371025085, "step": 4937, "token_acc": 0.854329332245659 }, { "epoch": 0.26644364107268115, "grad_norm": 0.3613234758377075, "learning_rate": 1.720840727437244e-05, "loss": 0.4168550670146942, "step": 4938, "token_acc": 0.8550192374956278 }, { "epoch": 0.26649759887767765, "grad_norm": 0.4239935874938965, "learning_rate": 1.7207195927607288e-05, "loss": 0.3759746551513672, "step": 4939, "token_acc": 0.8701713134027544 }, { "epoch": 0.26655155668267416, "grad_norm": 0.37841445207595825, "learning_rate": 1.7205984360736266e-05, "loss": 0.4140033423900604, "step": 4940, "token_acc": 0.8539928486293206 }, { "epoch": 0.2666055144876706, "grad_norm": 0.5295560359954834, "learning_rate": 1.720477257379637e-05, "loss": 0.4462054371833801, "step": 4941, "token_acc": 0.8473920130399348 }, { "epoch": 0.2666594722926671, "grad_norm": 0.3747962415218353, "learning_rate": 1.7203560566824605e-05, "loss": 0.4521542489528656, "step": 4942, "token_acc": 0.8504237992354994 }, { "epoch": 0.26671343009766363, "grad_norm": 0.49014970660209656, "learning_rate": 1.7202348339857985e-05, "loss": 0.39784473180770874, "step": 4943, "token_acc": 0.8587981146897093 }, { "epoch": 0.26676738790266014, "grad_norm": 0.4640493392944336, "learning_rate": 1.7201135892933533e-05, "loss": 0.3784407079219818, "step": 4944, "token_acc": 0.8744323790720632 }, { "epoch": 0.2668213457076566, "grad_norm": 0.27857092022895813, "learning_rate": 1.7199923226088283e-05, "loss": 0.4357047975063324, "step": 4945, "token_acc": 0.8504356873455586 }, { "epoch": 0.2668753035126531, "grad_norm": 0.37215521931648254, "learning_rate": 1.719871033935926e-05, "loss": 0.4317837357521057, "step": 4946, "token_acc": 0.8577605826126536 }, { "epoch": 0.2669292613176496, "grad_norm": 0.46293264627456665, "learning_rate": 1.719749723278351e-05, "loss": 0.35560715198516846, "step": 4947, "token_acc": 0.8793593773387217 }, { "epoch": 0.2669832191226461, "grad_norm": 0.36266621947288513, "learning_rate": 1.7196283906398078e-05, "loss": 0.4081002473831177, "step": 4948, "token_acc": 0.8584417620316664 }, { "epoch": 0.26703717692764256, "grad_norm": 0.4968108534812927, "learning_rate": 1.719507036024002e-05, "loss": 0.4511862099170685, "step": 4949, "token_acc": 0.8489887640449438 }, { "epoch": 0.26709113473263907, "grad_norm": 0.3520800769329071, "learning_rate": 1.71938565943464e-05, "loss": 0.3575907349586487, "step": 4950, "token_acc": 0.8737704918032787 }, { "epoch": 0.2671450925376356, "grad_norm": 0.4283534288406372, "learning_rate": 1.719264260875429e-05, "loss": 0.3616376221179962, "step": 4951, "token_acc": 0.8776814888478477 }, { "epoch": 0.2671990503426321, "grad_norm": 0.4323655962944031, "learning_rate": 1.7191428403500752e-05, "loss": 0.41626662015914917, "step": 4952, "token_acc": 0.8571255453223461 }, { "epoch": 0.26725300814762853, "grad_norm": 0.33079713582992554, "learning_rate": 1.7190213978622872e-05, "loss": 0.3505420684814453, "step": 4953, "token_acc": 0.8736278097229483 }, { "epoch": 0.26730696595262504, "grad_norm": 0.386027991771698, "learning_rate": 1.718899933415775e-05, "loss": 0.3440523147583008, "step": 4954, "token_acc": 0.8799656694458068 }, { "epoch": 0.26736092375762155, "grad_norm": 0.44939523935317993, "learning_rate": 1.7187784470142464e-05, "loss": 0.3489970862865448, "step": 4955, "token_acc": 0.8766760212036171 }, { "epoch": 0.26741488156261806, "grad_norm": 0.392910897731781, "learning_rate": 1.7186569386614126e-05, "loss": 0.4515562057495117, "step": 4956, "token_acc": 0.8478293301506789 }, { "epoch": 0.2674688393676145, "grad_norm": 0.31273898482322693, "learning_rate": 1.718535408360984e-05, "loss": 0.3314597010612488, "step": 4957, "token_acc": 0.882473322613653 }, { "epoch": 0.267522797172611, "grad_norm": 0.41925644874572754, "learning_rate": 1.7184138561166725e-05, "loss": 0.3934336006641388, "step": 4958, "token_acc": 0.8575611184306836 }, { "epoch": 0.2675767549776075, "grad_norm": 0.36108309030532837, "learning_rate": 1.7182922819321896e-05, "loss": 0.4174196720123291, "step": 4959, "token_acc": 0.8589813058791655 }, { "epoch": 0.26763071278260403, "grad_norm": 0.46689972281455994, "learning_rate": 1.718170685811249e-05, "loss": 0.35217317938804626, "step": 4960, "token_acc": 0.8779090583601862 }, { "epoch": 0.2676846705876005, "grad_norm": 0.4250125288963318, "learning_rate": 1.7180490677575636e-05, "loss": 0.425668329000473, "step": 4961, "token_acc": 0.8515696434014021 }, { "epoch": 0.267738628392597, "grad_norm": 0.4241272211074829, "learning_rate": 1.717927427774848e-05, "loss": 0.43559837341308594, "step": 4962, "token_acc": 0.8536550542547116 }, { "epoch": 0.2677925861975935, "grad_norm": 0.3751981556415558, "learning_rate": 1.7178057658668164e-05, "loss": 0.3215133547782898, "step": 4963, "token_acc": 0.8859721082854799 }, { "epoch": 0.26784654400258995, "grad_norm": 0.35855549573898315, "learning_rate": 1.717684082037185e-05, "loss": 0.41452473402023315, "step": 4964, "token_acc": 0.8582722939193117 }, { "epoch": 0.26790050180758646, "grad_norm": 0.36583709716796875, "learning_rate": 1.71756237628967e-05, "loss": 0.45823293924331665, "step": 4965, "token_acc": 0.8413538658966956 }, { "epoch": 0.26795445961258296, "grad_norm": 0.39341041445732117, "learning_rate": 1.7174406486279876e-05, "loss": 0.4732286334037781, "step": 4966, "token_acc": 0.8408592722490136 }, { "epoch": 0.26800841741757947, "grad_norm": 0.38404273986816406, "learning_rate": 1.717318899055856e-05, "loss": 0.4036126136779785, "step": 4967, "token_acc": 0.8609221466364324 }, { "epoch": 0.2680623752225759, "grad_norm": 0.39991939067840576, "learning_rate": 1.717197127576993e-05, "loss": 0.397717148065567, "step": 4968, "token_acc": 0.8604977216964599 }, { "epoch": 0.26811633302757243, "grad_norm": 0.4813118577003479, "learning_rate": 1.7170753341951175e-05, "loss": 0.4269064664840698, "step": 4969, "token_acc": 0.8561015118790497 }, { "epoch": 0.26817029083256894, "grad_norm": 0.35052022337913513, "learning_rate": 1.7169535189139494e-05, "loss": 0.4614278972148895, "step": 4970, "token_acc": 0.8439086294416244 }, { "epoch": 0.26822424863756544, "grad_norm": 0.4987426698207855, "learning_rate": 1.7168316817372086e-05, "loss": 0.459946870803833, "step": 4971, "token_acc": 0.8465909090909091 }, { "epoch": 0.2682782064425619, "grad_norm": 0.4572800099849701, "learning_rate": 1.7167098226686157e-05, "loss": 0.43555396795272827, "step": 4972, "token_acc": 0.8536824076339613 }, { "epoch": 0.2683321642475584, "grad_norm": 0.42622846364974976, "learning_rate": 1.716587941711893e-05, "loss": 0.41476258635520935, "step": 4973, "token_acc": 0.8578214059531349 }, { "epoch": 0.2683861220525549, "grad_norm": 0.31570589542388916, "learning_rate": 1.716466038870762e-05, "loss": 0.3947729170322418, "step": 4974, "token_acc": 0.8654727793696275 }, { "epoch": 0.2684400798575514, "grad_norm": 0.39747175574302673, "learning_rate": 1.716344114148946e-05, "loss": 0.37675637006759644, "step": 4975, "token_acc": 0.8681968114348543 }, { "epoch": 0.26849403766254787, "grad_norm": 0.3862161636352539, "learning_rate": 1.716222167550168e-05, "loss": 0.335879385471344, "step": 4976, "token_acc": 0.8820678513731826 }, { "epoch": 0.2685479954675444, "grad_norm": 0.3970366418361664, "learning_rate": 1.7161001990781533e-05, "loss": 0.3757104277610779, "step": 4977, "token_acc": 0.8711048158640227 }, { "epoch": 0.2686019532725409, "grad_norm": 0.41827884316444397, "learning_rate": 1.7159782087366257e-05, "loss": 0.3849882185459137, "step": 4978, "token_acc": 0.8688479134895328 }, { "epoch": 0.2686559110775374, "grad_norm": 0.4474323093891144, "learning_rate": 1.715856196529311e-05, "loss": 0.34700003266334534, "step": 4979, "token_acc": 0.8799927047236914 }, { "epoch": 0.26870986888253384, "grad_norm": 0.31332719326019287, "learning_rate": 1.7157341624599362e-05, "loss": 0.4636971354484558, "step": 4980, "token_acc": 0.8426624321820828 }, { "epoch": 0.26876382668753035, "grad_norm": 0.4940878450870514, "learning_rate": 1.7156121065322274e-05, "loss": 0.3851965665817261, "step": 4981, "token_acc": 0.8680885972108285 }, { "epoch": 0.26881778449252686, "grad_norm": 0.4996647536754608, "learning_rate": 1.7154900287499117e-05, "loss": 0.4432680308818817, "step": 4982, "token_acc": 0.8492584141471763 }, { "epoch": 0.26887174229752336, "grad_norm": 0.3881065845489502, "learning_rate": 1.7153679291167187e-05, "loss": 0.40142685174942017, "step": 4983, "token_acc": 0.8585950736348235 }, { "epoch": 0.2689257001025198, "grad_norm": 0.3544860780239105, "learning_rate": 1.715245807636376e-05, "loss": 0.38610604405403137, "step": 4984, "token_acc": 0.8710683649597089 }, { "epoch": 0.2689796579075163, "grad_norm": 0.5165706872940063, "learning_rate": 1.715123664312614e-05, "loss": 0.4096980690956116, "step": 4985, "token_acc": 0.8643356643356643 }, { "epoch": 0.26903361571251283, "grad_norm": 0.32765287160873413, "learning_rate": 1.715001499149163e-05, "loss": 0.4091705083847046, "step": 4986, "token_acc": 0.8584590579292871 }, { "epoch": 0.2690875735175093, "grad_norm": 0.41611239314079285, "learning_rate": 1.714879312149753e-05, "loss": 0.4277779757976532, "step": 4987, "token_acc": 0.8561015118790497 }, { "epoch": 0.2691415313225058, "grad_norm": 0.39588063955307007, "learning_rate": 1.714757103318116e-05, "loss": 0.36224043369293213, "step": 4988, "token_acc": 0.8669369051815298 }, { "epoch": 0.2691954891275023, "grad_norm": 0.3128434717655182, "learning_rate": 1.7146348726579844e-05, "loss": 0.4092239737510681, "step": 4989, "token_acc": 0.8582964601769911 }, { "epoch": 0.2692494469324988, "grad_norm": 0.37619301676750183, "learning_rate": 1.7145126201730912e-05, "loss": 0.4126727879047394, "step": 4990, "token_acc": 0.8538768598040402 }, { "epoch": 0.26930340473749526, "grad_norm": 0.31459611654281616, "learning_rate": 1.7143903458671697e-05, "loss": 0.40370863676071167, "step": 4991, "token_acc": 0.858939659289922 }, { "epoch": 0.26935736254249176, "grad_norm": 0.37677186727523804, "learning_rate": 1.7142680497439542e-05, "loss": 0.3895125687122345, "step": 4992, "token_acc": 0.8615617266978279 }, { "epoch": 0.26941132034748827, "grad_norm": 0.46390488743782043, "learning_rate": 1.7141457318071796e-05, "loss": 0.4926665127277374, "step": 4993, "token_acc": 0.8389386328892772 }, { "epoch": 0.2694652781524848, "grad_norm": 0.43107691407203674, "learning_rate": 1.714023392060581e-05, "loss": 0.45679041743278503, "step": 4994, "token_acc": 0.8430503731343284 }, { "epoch": 0.26951923595748123, "grad_norm": 0.34945058822631836, "learning_rate": 1.7139010305078957e-05, "loss": 0.4024834632873535, "step": 4995, "token_acc": 0.8593052109181142 }, { "epoch": 0.26957319376247774, "grad_norm": 0.42224156856536865, "learning_rate": 1.7137786471528594e-05, "loss": 0.4200945496559143, "step": 4996, "token_acc": 0.8609523809523809 }, { "epoch": 0.26962715156747424, "grad_norm": 0.4760347306728363, "learning_rate": 1.7136562419992106e-05, "loss": 0.4476684629917145, "step": 4997, "token_acc": 0.8449197860962567 }, { "epoch": 0.26968110937247075, "grad_norm": 0.46293142437934875, "learning_rate": 1.713533815050687e-05, "loss": 0.40122362971305847, "step": 4998, "token_acc": 0.8665865384615384 }, { "epoch": 0.2697350671774672, "grad_norm": 0.4020744264125824, "learning_rate": 1.7134113663110278e-05, "loss": 0.3930877447128296, "step": 4999, "token_acc": 0.8616183615345954 }, { "epoch": 0.2697890249824637, "grad_norm": 0.3317982852458954, "learning_rate": 1.713288895783972e-05, "loss": 0.3595965802669525, "step": 5000, "token_acc": 0.8732588397367212 }, { "epoch": 0.2698429827874602, "grad_norm": 0.4767909348011017, "learning_rate": 1.7131664034732602e-05, "loss": 0.41176918148994446, "step": 5001, "token_acc": 0.8570198105081827 }, { "epoch": 0.2698969405924567, "grad_norm": 0.3736586570739746, "learning_rate": 1.7130438893826332e-05, "loss": 0.4448827803134918, "step": 5002, "token_acc": 0.8526182432432432 }, { "epoch": 0.2699508983974532, "grad_norm": 0.4088464081287384, "learning_rate": 1.7129213535158327e-05, "loss": 0.4396289885044098, "step": 5003, "token_acc": 0.8517945544554455 }, { "epoch": 0.2700048562024497, "grad_norm": 0.3680356442928314, "learning_rate": 1.712798795876601e-05, "loss": 0.42338043451309204, "step": 5004, "token_acc": 0.8540557974520239 }, { "epoch": 0.2700588140074462, "grad_norm": 0.4268566370010376, "learning_rate": 1.712676216468681e-05, "loss": 0.42447495460510254, "step": 5005, "token_acc": 0.8522129570237331 }, { "epoch": 0.27011277181244264, "grad_norm": 0.40454843640327454, "learning_rate": 1.7125536152958154e-05, "loss": 0.3990604281425476, "step": 5006, "token_acc": 0.8606800547695116 }, { "epoch": 0.27016672961743915, "grad_norm": 0.4878167510032654, "learning_rate": 1.7124309923617498e-05, "loss": 0.45147326588630676, "step": 5007, "token_acc": 0.8462892741061755 }, { "epoch": 0.27022068742243566, "grad_norm": 0.3854515850543976, "learning_rate": 1.7123083476702278e-05, "loss": 0.380606472492218, "step": 5008, "token_acc": 0.8673610239678755 }, { "epoch": 0.27027464522743216, "grad_norm": 0.4823910593986511, "learning_rate": 1.7121856812249955e-05, "loss": 0.3940827250480652, "step": 5009, "token_acc": 0.8643737832576249 }, { "epoch": 0.2703286030324286, "grad_norm": 0.411811500787735, "learning_rate": 1.7120629930297993e-05, "loss": 0.3911171555519104, "step": 5010, "token_acc": 0.8673890608875129 }, { "epoch": 0.2703825608374251, "grad_norm": 0.3979279100894928, "learning_rate": 1.7119402830883857e-05, "loss": 0.41009676456451416, "step": 5011, "token_acc": 0.8568484464172479 }, { "epoch": 0.27043651864242163, "grad_norm": 0.42595937848091125, "learning_rate": 1.711817551404502e-05, "loss": 0.43523114919662476, "step": 5012, "token_acc": 0.8505081874647092 }, { "epoch": 0.27049047644741814, "grad_norm": 0.4693267345428467, "learning_rate": 1.7116947979818972e-05, "loss": 0.44159844517707825, "step": 5013, "token_acc": 0.8503993154592128 }, { "epoch": 0.2705444342524146, "grad_norm": 0.46228280663490295, "learning_rate": 1.7115720228243196e-05, "loss": 0.4574253559112549, "step": 5014, "token_acc": 0.8420877105219277 }, { "epoch": 0.2705983920574111, "grad_norm": 0.3402658998966217, "learning_rate": 1.7114492259355188e-05, "loss": 0.38080817461013794, "step": 5015, "token_acc": 0.8666312433581297 }, { "epoch": 0.2706523498624076, "grad_norm": 0.41825586557388306, "learning_rate": 1.711326407319245e-05, "loss": 0.45968616008758545, "step": 5016, "token_acc": 0.8443935926773455 }, { "epoch": 0.2707063076674041, "grad_norm": 0.4318256974220276, "learning_rate": 1.7112035669792493e-05, "loss": 0.36429837346076965, "step": 5017, "token_acc": 0.8686384225952571 }, { "epoch": 0.27076026547240056, "grad_norm": 0.4941459596157074, "learning_rate": 1.711080704919283e-05, "loss": 0.4360591173171997, "step": 5018, "token_acc": 0.8484533402651416 }, { "epoch": 0.27081422327739707, "grad_norm": 0.507337749004364, "learning_rate": 1.7109578211430977e-05, "loss": 0.35952460765838623, "step": 5019, "token_acc": 0.8715945512820513 }, { "epoch": 0.2708681810823936, "grad_norm": 0.33041539788246155, "learning_rate": 1.7108349156544473e-05, "loss": 0.39602261781692505, "step": 5020, "token_acc": 0.8601175268267757 }, { "epoch": 0.2709221388873901, "grad_norm": 0.39589759707450867, "learning_rate": 1.7107119884570845e-05, "loss": 0.3541412055492401, "step": 5021, "token_acc": 0.8706090879793748 }, { "epoch": 0.27097609669238654, "grad_norm": 0.4551314413547516, "learning_rate": 1.710589039554764e-05, "loss": 0.4111256003379822, "step": 5022, "token_acc": 0.8566413107080164 }, { "epoch": 0.27103005449738304, "grad_norm": 0.5324865579605103, "learning_rate": 1.71046606895124e-05, "loss": 0.4683656096458435, "step": 5023, "token_acc": 0.8437830487989122 }, { "epoch": 0.27108401230237955, "grad_norm": 0.4042784869670868, "learning_rate": 1.7103430766502687e-05, "loss": 0.39381712675094604, "step": 5024, "token_acc": 0.8657243816254417 }, { "epoch": 0.27113797010737606, "grad_norm": 0.36449357867240906, "learning_rate": 1.710220062655606e-05, "loss": 0.43751260638237, "step": 5025, "token_acc": 0.8527696793002916 }, { "epoch": 0.2711919279123725, "grad_norm": 0.43394267559051514, "learning_rate": 1.7100970269710085e-05, "loss": 0.41346704959869385, "step": 5026, "token_acc": 0.8632028937849392 }, { "epoch": 0.271245885717369, "grad_norm": 0.40810126066207886, "learning_rate": 1.7099739696002336e-05, "loss": 0.43821823596954346, "step": 5027, "token_acc": 0.8480039067268954 }, { "epoch": 0.2712998435223655, "grad_norm": 0.34687650203704834, "learning_rate": 1.70985089054704e-05, "loss": 0.42803841829299927, "step": 5028, "token_acc": 0.8507048050210927 }, { "epoch": 0.271353801327362, "grad_norm": 0.6003568172454834, "learning_rate": 1.7097277898151857e-05, "loss": 0.45969057083129883, "step": 5029, "token_acc": 0.843412211510371 }, { "epoch": 0.2714077591323585, "grad_norm": 0.3517879545688629, "learning_rate": 1.709604667408431e-05, "loss": 0.3444751501083374, "step": 5030, "token_acc": 0.8758723088344469 }, { "epoch": 0.271461716937355, "grad_norm": 0.3691844642162323, "learning_rate": 1.7094815233305358e-05, "loss": 0.40383368730545044, "step": 5031, "token_acc": 0.86412830877559 }, { "epoch": 0.2715156747423515, "grad_norm": 0.35192349553108215, "learning_rate": 1.7093583575852607e-05, "loss": 0.41614896059036255, "step": 5032, "token_acc": 0.8576469156831731 }, { "epoch": 0.27156963254734795, "grad_norm": 0.4632619023323059, "learning_rate": 1.7092351701763668e-05, "loss": 0.43078458309173584, "step": 5033, "token_acc": 0.8549388900020036 }, { "epoch": 0.27162359035234446, "grad_norm": 0.4122612476348877, "learning_rate": 1.7091119611076167e-05, "loss": 0.3914225697517395, "step": 5034, "token_acc": 0.8648853037559869 }, { "epoch": 0.27167754815734096, "grad_norm": 0.3189431130886078, "learning_rate": 1.708988730382773e-05, "loss": 0.34263885021209717, "step": 5035, "token_acc": 0.8774803569050473 }, { "epoch": 0.27173150596233747, "grad_norm": 0.4156874418258667, "learning_rate": 1.7088654780055994e-05, "loss": 0.38757359981536865, "step": 5036, "token_acc": 0.867277610996998 }, { "epoch": 0.2717854637673339, "grad_norm": 0.4655454456806183, "learning_rate": 1.7087422039798597e-05, "loss": 0.41748863458633423, "step": 5037, "token_acc": 0.8589403973509934 }, { "epoch": 0.27183942157233043, "grad_norm": 0.347538024187088, "learning_rate": 1.708618908309319e-05, "loss": 0.3280564844608307, "step": 5038, "token_acc": 0.887888418079096 }, { "epoch": 0.27189337937732694, "grad_norm": 0.4156647026538849, "learning_rate": 1.7084955909977423e-05, "loss": 0.39685794711112976, "step": 5039, "token_acc": 0.8567935731749913 }, { "epoch": 0.27194733718232345, "grad_norm": 0.3745264410972595, "learning_rate": 1.708372252048896e-05, "loss": 0.4064171016216278, "step": 5040, "token_acc": 0.858440486020998 }, { "epoch": 0.2720012949873199, "grad_norm": 0.43641534447669983, "learning_rate": 1.7082488914665464e-05, "loss": 0.38135457038879395, "step": 5041, "token_acc": 0.8607226107226107 }, { "epoch": 0.2720552527923164, "grad_norm": 0.36853542923927307, "learning_rate": 1.7081255092544613e-05, "loss": 0.4717470407485962, "step": 5042, "token_acc": 0.8468947193623381 }, { "epoch": 0.2721092105973129, "grad_norm": 0.4277070164680481, "learning_rate": 1.7080021054164088e-05, "loss": 0.33733317255973816, "step": 5043, "token_acc": 0.8732826895102634 }, { "epoch": 0.2721631684023094, "grad_norm": 0.4390522837638855, "learning_rate": 1.7078786799561573e-05, "loss": 0.43631836771965027, "step": 5044, "token_acc": 0.8484802888955763 }, { "epoch": 0.27221712620730587, "grad_norm": 0.39378485083580017, "learning_rate": 1.7077552328774764e-05, "loss": 0.3352855443954468, "step": 5045, "token_acc": 0.8826416152551879 }, { "epoch": 0.2722710840123024, "grad_norm": 0.3637506663799286, "learning_rate": 1.707631764184136e-05, "loss": 0.3569087088108063, "step": 5046, "token_acc": 0.8759875846501128 }, { "epoch": 0.2723250418172989, "grad_norm": 0.6241990923881531, "learning_rate": 1.707508273879907e-05, "loss": 0.4250554144382477, "step": 5047, "token_acc": 0.8535082488570861 }, { "epoch": 0.2723789996222954, "grad_norm": 0.35154277086257935, "learning_rate": 1.707384761968561e-05, "loss": 0.3593333959579468, "step": 5048, "token_acc": 0.874443950177936 }, { "epoch": 0.27243295742729184, "grad_norm": 0.4696514904499054, "learning_rate": 1.707261228453869e-05, "loss": 0.4186926484107971, "step": 5049, "token_acc": 0.8538754764930114 }, { "epoch": 0.27248691523228835, "grad_norm": 0.4402649998664856, "learning_rate": 1.707137673339605e-05, "loss": 0.42326536774635315, "step": 5050, "token_acc": 0.8556583629893239 }, { "epoch": 0.27254087303728486, "grad_norm": 0.35595911741256714, "learning_rate": 1.7070140966295414e-05, "loss": 0.4506901502609253, "step": 5051, "token_acc": 0.8445953467661138 }, { "epoch": 0.2725948308422813, "grad_norm": 0.36678680777549744, "learning_rate": 1.7068904983274524e-05, "loss": 0.39124590158462524, "step": 5052, "token_acc": 0.8651268549545237 }, { "epoch": 0.2726487886472778, "grad_norm": 0.37783941626548767, "learning_rate": 1.706766878437113e-05, "loss": 0.3835044503211975, "step": 5053, "token_acc": 0.8694516971279374 }, { "epoch": 0.2727027464522743, "grad_norm": 0.374986857175827, "learning_rate": 1.706643236962298e-05, "loss": 0.3962143659591675, "step": 5054, "token_acc": 0.8581209031318281 }, { "epoch": 0.27275670425727083, "grad_norm": 0.3316324055194855, "learning_rate": 1.7065195739067838e-05, "loss": 0.4062361419200897, "step": 5055, "token_acc": 0.8572137328835087 }, { "epoch": 0.2728106620622673, "grad_norm": 0.5317568182945251, "learning_rate": 1.706395889274347e-05, "loss": 0.45082759857177734, "step": 5056, "token_acc": 0.8472277097326485 }, { "epoch": 0.2728646198672638, "grad_norm": 0.42854592204093933, "learning_rate": 1.7062721830687647e-05, "loss": 0.3563157916069031, "step": 5057, "token_acc": 0.8791923911229768 }, { "epoch": 0.2729185776722603, "grad_norm": 0.38988468050956726, "learning_rate": 1.7061484552938146e-05, "loss": 0.40164315700531006, "step": 5058, "token_acc": 0.859866178512814 }, { "epoch": 0.2729725354772568, "grad_norm": 0.4740150570869446, "learning_rate": 1.706024705953276e-05, "loss": 0.390847384929657, "step": 5059, "token_acc": 0.8680072573329302 }, { "epoch": 0.27302649328225326, "grad_norm": 0.39220738410949707, "learning_rate": 1.705900935050928e-05, "loss": 0.40496665239334106, "step": 5060, "token_acc": 0.8633440514469454 }, { "epoch": 0.27308045108724976, "grad_norm": 0.3745850622653961, "learning_rate": 1.70577714259055e-05, "loss": 0.34580814838409424, "step": 5061, "token_acc": 0.8820648577101258 }, { "epoch": 0.27313440889224627, "grad_norm": 0.41896510124206543, "learning_rate": 1.7056533285759235e-05, "loss": 0.4145580530166626, "step": 5062, "token_acc": 0.8604821931220169 }, { "epoch": 0.2731883666972428, "grad_norm": 0.40909337997436523, "learning_rate": 1.7055294930108285e-05, "loss": 0.4068368077278137, "step": 5063, "token_acc": 0.8564721663313213 }, { "epoch": 0.27324232450223923, "grad_norm": 0.29502072930336, "learning_rate": 1.705405635899048e-05, "loss": 0.42086777091026306, "step": 5064, "token_acc": 0.8519812116102613 }, { "epoch": 0.27329628230723574, "grad_norm": 0.4742250144481659, "learning_rate": 1.705281757244364e-05, "loss": 0.35681766271591187, "step": 5065, "token_acc": 0.8707169594010942 }, { "epoch": 0.27335024011223225, "grad_norm": 0.4420795440673828, "learning_rate": 1.7051578570505602e-05, "loss": 0.4445613920688629, "step": 5066, "token_acc": 0.8522325730806296 }, { "epoch": 0.27340419791722875, "grad_norm": 0.4206627905368805, "learning_rate": 1.70503393532142e-05, "loss": 0.3858526349067688, "step": 5067, "token_acc": 0.8630735615440641 }, { "epoch": 0.2734581557222252, "grad_norm": 0.4713928699493408, "learning_rate": 1.7049099920607282e-05, "loss": 0.40608900785446167, "step": 5068, "token_acc": 0.861878453038674 }, { "epoch": 0.2735121135272217, "grad_norm": 0.4360540211200714, "learning_rate": 1.70478602727227e-05, "loss": 0.3595510721206665, "step": 5069, "token_acc": 0.8721170669635756 }, { "epoch": 0.2735660713322182, "grad_norm": 0.33516764640808105, "learning_rate": 1.7046620409598308e-05, "loss": 0.35587966442108154, "step": 5070, "token_acc": 0.8764044943820225 }, { "epoch": 0.27362002913721467, "grad_norm": 0.32120734453201294, "learning_rate": 1.7045380331271973e-05, "loss": 0.41007134318351746, "step": 5071, "token_acc": 0.8617165373722449 }, { "epoch": 0.2736739869422112, "grad_norm": 0.4733087420463562, "learning_rate": 1.7044140037781574e-05, "loss": 0.4495604634284973, "step": 5072, "token_acc": 0.8482746955345061 }, { "epoch": 0.2737279447472077, "grad_norm": 0.39205825328826904, "learning_rate": 1.704289952916498e-05, "loss": 0.36574795842170715, "step": 5073, "token_acc": 0.8675637393767706 }, { "epoch": 0.2737819025522042, "grad_norm": 0.5297762155532837, "learning_rate": 1.704165880546008e-05, "loss": 0.3868459463119507, "step": 5074, "token_acc": 0.8688051595383571 }, { "epoch": 0.27383586035720064, "grad_norm": 0.43251073360443115, "learning_rate": 1.7040417866704766e-05, "loss": 0.4453783631324768, "step": 5075, "token_acc": 0.8470194239785667 }, { "epoch": 0.27388981816219715, "grad_norm": 0.39750969409942627, "learning_rate": 1.7039176712936934e-05, "loss": 0.42430853843688965, "step": 5076, "token_acc": 0.8523916788127733 }, { "epoch": 0.27394377596719366, "grad_norm": 0.41420695185661316, "learning_rate": 1.703793534419449e-05, "loss": 0.36607903242111206, "step": 5077, "token_acc": 0.8733001658374793 }, { "epoch": 0.27399773377219017, "grad_norm": 0.4317189157009125, "learning_rate": 1.7036693760515342e-05, "loss": 0.4088454842567444, "step": 5078, "token_acc": 0.8645189761694616 }, { "epoch": 0.2740516915771866, "grad_norm": 0.40157464146614075, "learning_rate": 1.7035451961937414e-05, "loss": 0.47766581177711487, "step": 5079, "token_acc": 0.8412669518521889 }, { "epoch": 0.2741056493821831, "grad_norm": 0.47874200344085693, "learning_rate": 1.7034209948498622e-05, "loss": 0.4369502365589142, "step": 5080, "token_acc": 0.8501228501228502 }, { "epoch": 0.27415960718717963, "grad_norm": 0.42285874485969543, "learning_rate": 1.70329677202369e-05, "loss": 0.420040488243103, "step": 5081, "token_acc": 0.8534873519520397 }, { "epoch": 0.27421356499217614, "grad_norm": 0.3586660623550415, "learning_rate": 1.7031725277190194e-05, "loss": 0.3459389805793762, "step": 5082, "token_acc": 0.8719723183391004 }, { "epoch": 0.2742675227971726, "grad_norm": 0.31629467010498047, "learning_rate": 1.703048261939643e-05, "loss": 0.4281346797943115, "step": 5083, "token_acc": 0.8527771322333256 }, { "epoch": 0.2743214806021691, "grad_norm": 0.3868926167488098, "learning_rate": 1.7029239746893574e-05, "loss": 0.3844800591468811, "step": 5084, "token_acc": 0.8685226019845645 }, { "epoch": 0.2743754384071656, "grad_norm": 0.311748206615448, "learning_rate": 1.7027996659719576e-05, "loss": 0.38428765535354614, "step": 5085, "token_acc": 0.8687698833510075 }, { "epoch": 0.2744293962121621, "grad_norm": 0.41342175006866455, "learning_rate": 1.7026753357912406e-05, "loss": 0.4476553201675415, "step": 5086, "token_acc": 0.8498861696072851 }, { "epoch": 0.27448335401715857, "grad_norm": 0.39864158630371094, "learning_rate": 1.702550984151002e-05, "loss": 0.3668692708015442, "step": 5087, "token_acc": 0.8707612046478539 }, { "epoch": 0.2745373118221551, "grad_norm": 0.33221280574798584, "learning_rate": 1.702426611055041e-05, "loss": 0.38419973850250244, "step": 5088, "token_acc": 0.8690327418145464 }, { "epoch": 0.2745912696271516, "grad_norm": 0.39239928126335144, "learning_rate": 1.7023022165071552e-05, "loss": 0.4512261152267456, "step": 5089, "token_acc": 0.8499710368797065 }, { "epoch": 0.2746452274321481, "grad_norm": 0.5420443415641785, "learning_rate": 1.7021778005111437e-05, "loss": 0.4505923092365265, "step": 5090, "token_acc": 0.8486089776412357 }, { "epoch": 0.27469918523714454, "grad_norm": 0.32232069969177246, "learning_rate": 1.7020533630708062e-05, "loss": 0.3869743347167969, "step": 5091, "token_acc": 0.8660714285714286 }, { "epoch": 0.27475314304214105, "grad_norm": 0.5057581067085266, "learning_rate": 1.7019289041899428e-05, "loss": 0.41001439094543457, "step": 5092, "token_acc": 0.8640038040893961 }, { "epoch": 0.27480710084713755, "grad_norm": 0.36383047699928284, "learning_rate": 1.7018044238723547e-05, "loss": 0.42017292976379395, "step": 5093, "token_acc": 0.8583208395802099 }, { "epoch": 0.274861058652134, "grad_norm": 0.35229650139808655, "learning_rate": 1.701679922121843e-05, "loss": 0.39578670263290405, "step": 5094, "token_acc": 0.8624271997481505 }, { "epoch": 0.2749150164571305, "grad_norm": 0.4203754961490631, "learning_rate": 1.7015553989422105e-05, "loss": 0.37781092524528503, "step": 5095, "token_acc": 0.8712806181160612 }, { "epoch": 0.274968974262127, "grad_norm": 0.36288249492645264, "learning_rate": 1.70143085433726e-05, "loss": 0.36421483755111694, "step": 5096, "token_acc": 0.8732843477055316 }, { "epoch": 0.2750229320671235, "grad_norm": 0.4269673228263855, "learning_rate": 1.7013062883107946e-05, "loss": 0.39376911520957947, "step": 5097, "token_acc": 0.8617092416744684 }, { "epoch": 0.27507688987212, "grad_norm": 0.4674246907234192, "learning_rate": 1.7011817008666192e-05, "loss": 0.3782665729522705, "step": 5098, "token_acc": 0.8655361425145598 }, { "epoch": 0.2751308476771165, "grad_norm": 0.3625301718711853, "learning_rate": 1.7010570920085382e-05, "loss": 0.3997398912906647, "step": 5099, "token_acc": 0.8598700782182156 }, { "epoch": 0.275184805482113, "grad_norm": 0.42532339692115784, "learning_rate": 1.7009324617403575e-05, "loss": 0.4191772937774658, "step": 5100, "token_acc": 0.8575170246202201 }, { "epoch": 0.2752387632871095, "grad_norm": 0.38788744807243347, "learning_rate": 1.7008078100658828e-05, "loss": 0.4182512164115906, "step": 5101, "token_acc": 0.8518134715025907 }, { "epoch": 0.27529272109210595, "grad_norm": 0.46921831369400024, "learning_rate": 1.700683136988921e-05, "loss": 0.3993527889251709, "step": 5102, "token_acc": 0.8626972506913941 }, { "epoch": 0.27534667889710246, "grad_norm": 0.5229337811470032, "learning_rate": 1.7005584425132804e-05, "loss": 0.4155564606189728, "step": 5103, "token_acc": 0.8542317173377157 }, { "epoch": 0.27540063670209897, "grad_norm": 0.4477894604206085, "learning_rate": 1.7004337266427678e-05, "loss": 0.4037516117095947, "step": 5104, "token_acc": 0.8611853832442068 }, { "epoch": 0.2754545945070955, "grad_norm": 0.4392285645008087, "learning_rate": 1.7003089893811927e-05, "loss": 0.4182182550430298, "step": 5105, "token_acc": 0.8554423855687815 }, { "epoch": 0.2755085523120919, "grad_norm": 0.45344290137290955, "learning_rate": 1.700184230732365e-05, "loss": 0.4625418186187744, "step": 5106, "token_acc": 0.8404304029304029 }, { "epoch": 0.27556251011708843, "grad_norm": 0.37178122997283936, "learning_rate": 1.700059450700094e-05, "loss": 0.41660362482070923, "step": 5107, "token_acc": 0.8539638386648123 }, { "epoch": 0.27561646792208494, "grad_norm": 0.40035897493362427, "learning_rate": 1.699934649288191e-05, "loss": 0.41939011216163635, "step": 5108, "token_acc": 0.8557439694291856 }, { "epoch": 0.27567042572708145, "grad_norm": 0.4765312373638153, "learning_rate": 1.699809826500467e-05, "loss": 0.4073432385921478, "step": 5109, "token_acc": 0.8609112709832134 }, { "epoch": 0.2757243835320779, "grad_norm": 0.4245451092720032, "learning_rate": 1.6996849823407342e-05, "loss": 0.3576938509941101, "step": 5110, "token_acc": 0.8725380899293943 }, { "epoch": 0.2757783413370744, "grad_norm": 0.395275354385376, "learning_rate": 1.6995601168128054e-05, "loss": 0.42743486166000366, "step": 5111, "token_acc": 0.8574821852731591 }, { "epoch": 0.2758322991420709, "grad_norm": 0.4381592273712158, "learning_rate": 1.699435229920494e-05, "loss": 0.4543778896331787, "step": 5112, "token_acc": 0.8468212203100555 }, { "epoch": 0.2758862569470674, "grad_norm": 0.35460367798805237, "learning_rate": 1.6993103216676137e-05, "loss": 0.3671468496322632, "step": 5113, "token_acc": 0.8685754517718846 }, { "epoch": 0.2759402147520639, "grad_norm": 0.48943227529525757, "learning_rate": 1.6991853920579792e-05, "loss": 0.463166207075119, "step": 5114, "token_acc": 0.8459519748064559 }, { "epoch": 0.2759941725570604, "grad_norm": 0.434814453125, "learning_rate": 1.6990604410954064e-05, "loss": 0.505845844745636, "step": 5115, "token_acc": 0.8272727272727273 }, { "epoch": 0.2760481303620569, "grad_norm": 0.37614551186561584, "learning_rate": 1.6989354687837107e-05, "loss": 0.4061349034309387, "step": 5116, "token_acc": 0.8574650428506991 }, { "epoch": 0.27610208816705334, "grad_norm": 0.4311831295490265, "learning_rate": 1.6988104751267086e-05, "loss": 0.3822645843029022, "step": 5117, "token_acc": 0.8715182740299469 }, { "epoch": 0.27615604597204985, "grad_norm": 0.43266522884368896, "learning_rate": 1.698685460128218e-05, "loss": 0.4126794636249542, "step": 5118, "token_acc": 0.8542116630669546 }, { "epoch": 0.27621000377704635, "grad_norm": 0.34777531027793884, "learning_rate": 1.6985604237920567e-05, "loss": 0.34781134128570557, "step": 5119, "token_acc": 0.8784606547960943 }, { "epoch": 0.27626396158204286, "grad_norm": 0.42492103576660156, "learning_rate": 1.6984353661220425e-05, "loss": 0.4270072877407074, "step": 5120, "token_acc": 0.8524705221785513 }, { "epoch": 0.2763179193870393, "grad_norm": 0.45171570777893066, "learning_rate": 1.6983102871219955e-05, "loss": 0.4192469120025635, "step": 5121, "token_acc": 0.8614173228346457 }, { "epoch": 0.2763718771920358, "grad_norm": 0.38674411177635193, "learning_rate": 1.698185186795735e-05, "loss": 0.44710636138916016, "step": 5122, "token_acc": 0.8501604542088373 }, { "epoch": 0.2764258349970323, "grad_norm": 0.4808773696422577, "learning_rate": 1.6980600651470822e-05, "loss": 0.43041175603866577, "step": 5123, "token_acc": 0.8555161194912748 }, { "epoch": 0.27647979280202883, "grad_norm": 0.38555359840393066, "learning_rate": 1.6979349221798575e-05, "loss": 0.3218342661857605, "step": 5124, "token_acc": 0.889165241722369 }, { "epoch": 0.2765337506070253, "grad_norm": 0.46230703592300415, "learning_rate": 1.6978097578978835e-05, "loss": 0.3998470902442932, "step": 5125, "token_acc": 0.8623257073236767 }, { "epoch": 0.2765877084120218, "grad_norm": 0.3896760940551758, "learning_rate": 1.697684572304982e-05, "loss": 0.45102426409721375, "step": 5126, "token_acc": 0.8501083554057308 }, { "epoch": 0.2766416662170183, "grad_norm": 0.4907539486885071, "learning_rate": 1.6975593654049766e-05, "loss": 0.34962043166160583, "step": 5127, "token_acc": 0.8799166087560806 }, { "epoch": 0.2766956240220148, "grad_norm": 0.40455228090286255, "learning_rate": 1.6974341372016908e-05, "loss": 0.3689248561859131, "step": 5128, "token_acc": 0.8713968957871396 }, { "epoch": 0.27674958182701126, "grad_norm": 0.40554237365722656, "learning_rate": 1.6973088876989493e-05, "loss": 0.4092986285686493, "step": 5129, "token_acc": 0.8603965262985417 }, { "epoch": 0.27680353963200777, "grad_norm": 0.35594412684440613, "learning_rate": 1.6971836169005768e-05, "loss": 0.41580623388290405, "step": 5130, "token_acc": 0.8598895262375186 }, { "epoch": 0.2768574974370043, "grad_norm": 0.4030422866344452, "learning_rate": 1.6970583248103994e-05, "loss": 0.39354702830314636, "step": 5131, "token_acc": 0.8679676985195155 }, { "epoch": 0.2769114552420008, "grad_norm": 0.4095991849899292, "learning_rate": 1.6969330114322433e-05, "loss": 0.4138452708721161, "step": 5132, "token_acc": 0.8564558437773244 }, { "epoch": 0.27696541304699723, "grad_norm": 0.3723956346511841, "learning_rate": 1.6968076767699356e-05, "loss": 0.44570761919021606, "step": 5133, "token_acc": 0.8504719949653871 }, { "epoch": 0.27701937085199374, "grad_norm": 0.36190974712371826, "learning_rate": 1.696682320827304e-05, "loss": 0.38315922021865845, "step": 5134, "token_acc": 0.8688182599091913 }, { "epoch": 0.27707332865699025, "grad_norm": 0.5712571144104004, "learning_rate": 1.696556943608177e-05, "loss": 0.39666008949279785, "step": 5135, "token_acc": 0.8616148119870204 }, { "epoch": 0.2771272864619867, "grad_norm": 0.35270383954048157, "learning_rate": 1.6964315451163833e-05, "loss": 0.39590713381767273, "step": 5136, "token_acc": 0.861029719547928 }, { "epoch": 0.2771812442669832, "grad_norm": 0.3257060647010803, "learning_rate": 1.6963061253557524e-05, "loss": 0.38892650604248047, "step": 5137, "token_acc": 0.8650513950073422 }, { "epoch": 0.2772352020719797, "grad_norm": 0.34167566895484924, "learning_rate": 1.6961806843301154e-05, "loss": 0.41710400581359863, "step": 5138, "token_acc": 0.8567422890644915 }, { "epoch": 0.2772891598769762, "grad_norm": 0.4024210274219513, "learning_rate": 1.6960552220433023e-05, "loss": 0.4561273455619812, "step": 5139, "token_acc": 0.8522299838796346 }, { "epoch": 0.2773431176819727, "grad_norm": 0.38945069909095764, "learning_rate": 1.695929738499145e-05, "loss": 0.37037625908851624, "step": 5140, "token_acc": 0.8708446866485013 }, { "epoch": 0.2773970754869692, "grad_norm": 0.3765919804573059, "learning_rate": 1.695804233701476e-05, "loss": 0.401114821434021, "step": 5141, "token_acc": 0.8608617594254937 }, { "epoch": 0.2774510332919657, "grad_norm": 0.49056169390678406, "learning_rate": 1.695678707654128e-05, "loss": 0.3671365976333618, "step": 5142, "token_acc": 0.8699444885011896 }, { "epoch": 0.2775049910969622, "grad_norm": 0.36756432056427, "learning_rate": 1.6955531603609345e-05, "loss": 0.3857702314853668, "step": 5143, "token_acc": 0.8646125116713352 }, { "epoch": 0.27755894890195865, "grad_norm": 0.46802210807800293, "learning_rate": 1.6954275918257294e-05, "loss": 0.4049907326698303, "step": 5144, "token_acc": 0.8579072532699168 }, { "epoch": 0.27761290670695515, "grad_norm": 0.4035334885120392, "learning_rate": 1.695302002052348e-05, "loss": 0.3857805132865906, "step": 5145, "token_acc": 0.8629990856446206 }, { "epoch": 0.27766686451195166, "grad_norm": 0.4334518313407898, "learning_rate": 1.6951763910446257e-05, "loss": 0.3729429841041565, "step": 5146, "token_acc": 0.872343101496667 }, { "epoch": 0.27772082231694817, "grad_norm": 0.353263795375824, "learning_rate": 1.6950507588063986e-05, "loss": 0.41389554738998413, "step": 5147, "token_acc": 0.858735158710928 }, { "epoch": 0.2777747801219446, "grad_norm": 0.434773325920105, "learning_rate": 1.694925105341503e-05, "loss": 0.3805966377258301, "step": 5148, "token_acc": 0.866304347826087 }, { "epoch": 0.2778287379269411, "grad_norm": 0.37872394919395447, "learning_rate": 1.694799430653777e-05, "loss": 0.43715909123420715, "step": 5149, "token_acc": 0.8489556361125496 }, { "epoch": 0.27788269573193763, "grad_norm": 0.4137064814567566, "learning_rate": 1.6946737347470583e-05, "loss": 0.4156753420829773, "step": 5150, "token_acc": 0.8613612874649343 }, { "epoch": 0.27793665353693414, "grad_norm": 0.48710429668426514, "learning_rate": 1.694548017625186e-05, "loss": 0.3157440721988678, "step": 5151, "token_acc": 0.8880911503043546 }, { "epoch": 0.2779906113419306, "grad_norm": 0.41240695118904114, "learning_rate": 1.694422279291999e-05, "loss": 0.3890675902366638, "step": 5152, "token_acc": 0.8654103180967656 }, { "epoch": 0.2780445691469271, "grad_norm": 0.41619715094566345, "learning_rate": 1.6942965197513373e-05, "loss": 0.3887050449848175, "step": 5153, "token_acc": 0.8674069235793599 }, { "epoch": 0.2780985269519236, "grad_norm": 0.405585378408432, "learning_rate": 1.6941707390070422e-05, "loss": 0.4775526523590088, "step": 5154, "token_acc": 0.8410651716888577 }, { "epoch": 0.2781524847569201, "grad_norm": 0.3785957098007202, "learning_rate": 1.6940449370629538e-05, "loss": 0.3410159647464752, "step": 5155, "token_acc": 0.8765003615328995 }, { "epoch": 0.27820644256191657, "grad_norm": 0.4333224892616272, "learning_rate": 1.6939191139229158e-05, "loss": 0.4096817076206207, "step": 5156, "token_acc": 0.8631261517842185 }, { "epoch": 0.2782604003669131, "grad_norm": 0.487820029258728, "learning_rate": 1.6937932695907694e-05, "loss": 0.4462253451347351, "step": 5157, "token_acc": 0.8521057786483839 }, { "epoch": 0.2783143581719096, "grad_norm": 0.38077491521835327, "learning_rate": 1.693667404070358e-05, "loss": 0.4266187846660614, "step": 5158, "token_acc": 0.85382326007326 }, { "epoch": 0.27836831597690603, "grad_norm": 0.39485839009284973, "learning_rate": 1.6935415173655264e-05, "loss": 0.4681366980075836, "step": 5159, "token_acc": 0.8338987473903967 }, { "epoch": 0.27842227378190254, "grad_norm": 0.40441203117370605, "learning_rate": 1.6934156094801182e-05, "loss": 0.4141864776611328, "step": 5160, "token_acc": 0.8562365182083492 }, { "epoch": 0.27847623158689905, "grad_norm": 0.47627922892570496, "learning_rate": 1.693289680417979e-05, "loss": 0.4774389863014221, "step": 5161, "token_acc": 0.8381240544629349 }, { "epoch": 0.27853018939189556, "grad_norm": 0.41428330540657043, "learning_rate": 1.6931637301829545e-05, "loss": 0.37643444538116455, "step": 5162, "token_acc": 0.8718833908707326 }, { "epoch": 0.278584147196892, "grad_norm": 0.42634981870651245, "learning_rate": 1.6930377587788912e-05, "loss": 0.37951725721359253, "step": 5163, "token_acc": 0.8685154525386314 }, { "epoch": 0.2786381050018885, "grad_norm": 0.36259064078330994, "learning_rate": 1.6929117662096368e-05, "loss": 0.37049052119255066, "step": 5164, "token_acc": 0.870004524204494 }, { "epoch": 0.278692062806885, "grad_norm": 0.2858598530292511, "learning_rate": 1.6927857524790382e-05, "loss": 0.39737367630004883, "step": 5165, "token_acc": 0.8616915422885573 }, { "epoch": 0.27874602061188153, "grad_norm": 0.5971993207931519, "learning_rate": 1.692659717590944e-05, "loss": 0.4233494997024536, "step": 5166, "token_acc": 0.857282121271203 }, { "epoch": 0.278799978416878, "grad_norm": 0.5257777571678162, "learning_rate": 1.692533661549204e-05, "loss": 0.41573649644851685, "step": 5167, "token_acc": 0.8546206896551725 }, { "epoch": 0.2788539362218745, "grad_norm": 0.40893226861953735, "learning_rate": 1.6924075843576668e-05, "loss": 0.3240686357021332, "step": 5168, "token_acc": 0.8812659303313509 }, { "epoch": 0.278907894026871, "grad_norm": 0.3964528739452362, "learning_rate": 1.6922814860201838e-05, "loss": 0.4074600040912628, "step": 5169, "token_acc": 0.8553532072845378 }, { "epoch": 0.2789618518318675, "grad_norm": 0.4946848452091217, "learning_rate": 1.6921553665406053e-05, "loss": 0.392848402261734, "step": 5170, "token_acc": 0.8669557675628794 }, { "epoch": 0.27901580963686395, "grad_norm": 0.4288675785064697, "learning_rate": 1.6920292259227833e-05, "loss": 0.35146546363830566, "step": 5171, "token_acc": 0.8782268578878748 }, { "epoch": 0.27906976744186046, "grad_norm": 0.3333207964897156, "learning_rate": 1.6919030641705704e-05, "loss": 0.3778165578842163, "step": 5172, "token_acc": 0.8679072205477657 }, { "epoch": 0.27912372524685697, "grad_norm": 0.4590946137905121, "learning_rate": 1.6917768812878185e-05, "loss": 0.4163931906223297, "step": 5173, "token_acc": 0.8534731722146223 }, { "epoch": 0.2791776830518535, "grad_norm": 0.42866477370262146, "learning_rate": 1.6916506772783824e-05, "loss": 0.5167873501777649, "step": 5174, "token_acc": 0.8279210442534225 }, { "epoch": 0.2792316408568499, "grad_norm": 0.3854069411754608, "learning_rate": 1.6915244521461157e-05, "loss": 0.3746379613876343, "step": 5175, "token_acc": 0.8659567079523093 }, { "epoch": 0.27928559866184643, "grad_norm": 0.371352881193161, "learning_rate": 1.691398205894873e-05, "loss": 0.35237032175064087, "step": 5176, "token_acc": 0.8779791636934494 }, { "epoch": 0.27933955646684294, "grad_norm": 0.5316415429115295, "learning_rate": 1.6912719385285106e-05, "loss": 0.36463475227355957, "step": 5177, "token_acc": 0.8747979162924375 }, { "epoch": 0.27939351427183945, "grad_norm": 0.3648591637611389, "learning_rate": 1.6911456500508843e-05, "loss": 0.4329952597618103, "step": 5178, "token_acc": 0.8579598145285935 }, { "epoch": 0.2794474720768359, "grad_norm": 0.49089205265045166, "learning_rate": 1.691019340465851e-05, "loss": 0.40100157260894775, "step": 5179, "token_acc": 0.8623804463336876 }, { "epoch": 0.2795014298818324, "grad_norm": 0.4284211993217468, "learning_rate": 1.6908930097772676e-05, "loss": 0.4247105121612549, "step": 5180, "token_acc": 0.8546897546897547 }, { "epoch": 0.2795553876868289, "grad_norm": 0.4933818280696869, "learning_rate": 1.690766657988993e-05, "loss": 0.4244159460067749, "step": 5181, "token_acc": 0.8596918085969181 }, { "epoch": 0.27960934549182537, "grad_norm": 0.3334897458553314, "learning_rate": 1.6906402851048856e-05, "loss": 0.39448678493499756, "step": 5182, "token_acc": 0.8637538779731128 }, { "epoch": 0.2796633032968219, "grad_norm": 0.4228649437427521, "learning_rate": 1.690513891128805e-05, "loss": 0.4858713746070862, "step": 5183, "token_acc": 0.8414859252422704 }, { "epoch": 0.2797172611018184, "grad_norm": 0.4930940568447113, "learning_rate": 1.6903874760646105e-05, "loss": 0.39262130856513977, "step": 5184, "token_acc": 0.8645333333333334 }, { "epoch": 0.2797712189068149, "grad_norm": 0.3930024802684784, "learning_rate": 1.6902610399161637e-05, "loss": 0.3520428538322449, "step": 5185, "token_acc": 0.8725085910652921 }, { "epoch": 0.27982517671181134, "grad_norm": 0.3801051378250122, "learning_rate": 1.6901345826873258e-05, "loss": 0.359241247177124, "step": 5186, "token_acc": 0.8715658564160542 }, { "epoch": 0.27987913451680785, "grad_norm": 0.426431804895401, "learning_rate": 1.690008104381958e-05, "loss": 0.42685359716415405, "step": 5187, "token_acc": 0.8566947098246115 }, { "epoch": 0.27993309232180436, "grad_norm": 0.4076748192310333, "learning_rate": 1.6898816050039236e-05, "loss": 0.40986111760139465, "step": 5188, "token_acc": 0.8546775658492279 }, { "epoch": 0.27998705012680086, "grad_norm": 0.4208644926548004, "learning_rate": 1.689755084557086e-05, "loss": 0.4633784890174866, "step": 5189, "token_acc": 0.8470785149117468 }, { "epoch": 0.2800410079317973, "grad_norm": 0.3884761929512024, "learning_rate": 1.689628543045309e-05, "loss": 0.4137324392795563, "step": 5190, "token_acc": 0.8533369953309531 }, { "epoch": 0.2800949657367938, "grad_norm": 0.4065825343132019, "learning_rate": 1.689501980472456e-05, "loss": 0.367756724357605, "step": 5191, "token_acc": 0.8709090909090909 }, { "epoch": 0.28014892354179033, "grad_norm": 0.6261574029922485, "learning_rate": 1.6893753968423938e-05, "loss": 0.35946783423423767, "step": 5192, "token_acc": 0.8702174279453901 }, { "epoch": 0.28020288134678684, "grad_norm": 0.4638383984565735, "learning_rate": 1.6892487921589874e-05, "loss": 0.37965506315231323, "step": 5193, "token_acc": 0.8706655710764174 }, { "epoch": 0.2802568391517833, "grad_norm": 0.37083107233047485, "learning_rate": 1.6891221664261038e-05, "loss": 0.43793052434921265, "step": 5194, "token_acc": 0.8520885642547363 }, { "epoch": 0.2803107969567798, "grad_norm": 0.5678310394287109, "learning_rate": 1.6889955196476095e-05, "loss": 0.4134776294231415, "step": 5195, "token_acc": 0.8535930154466085 }, { "epoch": 0.2803647547617763, "grad_norm": 0.31202343106269836, "learning_rate": 1.6888688518273724e-05, "loss": 0.4329894185066223, "step": 5196, "token_acc": 0.8524777636594664 }, { "epoch": 0.2804187125667728, "grad_norm": 0.3955792486667633, "learning_rate": 1.6887421629692612e-05, "loss": 0.4200611114501953, "step": 5197, "token_acc": 0.8522511097019657 }, { "epoch": 0.28047267037176926, "grad_norm": 0.5257490277290344, "learning_rate": 1.6886154530771446e-05, "loss": 0.4156906306743622, "step": 5198, "token_acc": 0.8544103992571959 }, { "epoch": 0.28052662817676577, "grad_norm": 0.4288984537124634, "learning_rate": 1.6884887221548927e-05, "loss": 0.41312944889068604, "step": 5199, "token_acc": 0.8623295791345584 }, { "epoch": 0.2805805859817623, "grad_norm": 0.42714738845825195, "learning_rate": 1.6883619702063754e-05, "loss": 0.3695065379142761, "step": 5200, "token_acc": 0.8711839166046166 }, { "epoch": 0.28063454378675873, "grad_norm": 0.44207876920700073, "learning_rate": 1.6882351972354642e-05, "loss": 0.3941658139228821, "step": 5201, "token_acc": 0.8582488479262673 }, { "epoch": 0.28068850159175524, "grad_norm": 0.4466996490955353, "learning_rate": 1.68810840324603e-05, "loss": 0.4246137738227844, "step": 5202, "token_acc": 0.8551569506726457 }, { "epoch": 0.28074245939675174, "grad_norm": 0.5014163851737976, "learning_rate": 1.6879815882419458e-05, "loss": 0.378385066986084, "step": 5203, "token_acc": 0.8661322039025866 }, { "epoch": 0.28079641720174825, "grad_norm": 0.32671308517456055, "learning_rate": 1.6878547522270835e-05, "loss": 0.36285626888275146, "step": 5204, "token_acc": 0.8752464599390571 }, { "epoch": 0.2808503750067447, "grad_norm": 0.37860921025276184, "learning_rate": 1.687727895205318e-05, "loss": 0.4467363655567169, "step": 5205, "token_acc": 0.85041761579347 }, { "epoch": 0.2809043328117412, "grad_norm": 0.5031213760375977, "learning_rate": 1.687601017180522e-05, "loss": 0.4479934573173523, "step": 5206, "token_acc": 0.8499754058042303 }, { "epoch": 0.2809582906167377, "grad_norm": 0.4616261422634125, "learning_rate": 1.6874741181565712e-05, "loss": 0.4150361716747284, "step": 5207, "token_acc": 0.8522525192649674 }, { "epoch": 0.2810122484217342, "grad_norm": 0.3959789574146271, "learning_rate": 1.6873471981373414e-05, "loss": 0.3889296352863312, "step": 5208, "token_acc": 0.8612523839796568 }, { "epoch": 0.2810662062267307, "grad_norm": 0.4036095440387726, "learning_rate": 1.687220257126708e-05, "loss": 0.427996426820755, "step": 5209, "token_acc": 0.8490745781164943 }, { "epoch": 0.2811201640317272, "grad_norm": 0.4229339063167572, "learning_rate": 1.687093295128548e-05, "loss": 0.41729849576950073, "step": 5210, "token_acc": 0.854988726042841 }, { "epoch": 0.2811741218367237, "grad_norm": 0.5062504410743713, "learning_rate": 1.6869663121467385e-05, "loss": 0.4321215748786926, "step": 5211, "token_acc": 0.8488684311469121 }, { "epoch": 0.2812280796417202, "grad_norm": 0.40091797709465027, "learning_rate": 1.686839308185158e-05, "loss": 0.4348651170730591, "step": 5212, "token_acc": 0.8472205026618794 }, { "epoch": 0.28128203744671665, "grad_norm": 0.4389173090457916, "learning_rate": 1.686712283247685e-05, "loss": 0.33430084586143494, "step": 5213, "token_acc": 0.8812966063004108 }, { "epoch": 0.28133599525171316, "grad_norm": 0.3840039372444153, "learning_rate": 1.6865852373381985e-05, "loss": 0.44386062026023865, "step": 5214, "token_acc": 0.8491852536692477 }, { "epoch": 0.28138995305670966, "grad_norm": 0.44439348578453064, "learning_rate": 1.6864581704605793e-05, "loss": 0.3591749370098114, "step": 5215, "token_acc": 0.8726861373920197 }, { "epoch": 0.28144391086170617, "grad_norm": 0.4224569797515869, "learning_rate": 1.686331082618707e-05, "loss": 0.420330286026001, "step": 5216, "token_acc": 0.8593329747175901 }, { "epoch": 0.2814978686667026, "grad_norm": 0.41744306683540344, "learning_rate": 1.686203973816463e-05, "loss": 0.44096213579177856, "step": 5217, "token_acc": 0.8483167806010576 }, { "epoch": 0.28155182647169913, "grad_norm": 0.4379005432128906, "learning_rate": 1.6860768440577295e-05, "loss": 0.40202632546424866, "step": 5218, "token_acc": 0.8655632984901278 }, { "epoch": 0.28160578427669564, "grad_norm": 0.34731361269950867, "learning_rate": 1.6859496933463894e-05, "loss": 0.3902776539325714, "step": 5219, "token_acc": 0.8673274017344296 }, { "epoch": 0.28165974208169214, "grad_norm": 0.45826709270477295, "learning_rate": 1.685822521686325e-05, "loss": 0.35796433687210083, "step": 5220, "token_acc": 0.8749138525155066 }, { "epoch": 0.2817136998866886, "grad_norm": 0.3912590742111206, "learning_rate": 1.6856953290814203e-05, "loss": 0.3661353886127472, "step": 5221, "token_acc": 0.8750572781426608 }, { "epoch": 0.2817676576916851, "grad_norm": 0.3889918327331543, "learning_rate": 1.68556811553556e-05, "loss": 0.3308354318141937, "step": 5222, "token_acc": 0.881215073792807 }, { "epoch": 0.2818216154966816, "grad_norm": 0.3655654191970825, "learning_rate": 1.685440881052629e-05, "loss": 0.43586158752441406, "step": 5223, "token_acc": 0.8517718785723358 }, { "epoch": 0.28187557330167806, "grad_norm": 0.356601744890213, "learning_rate": 1.685313625636513e-05, "loss": 0.3616541624069214, "step": 5224, "token_acc": 0.8766179540709812 }, { "epoch": 0.28192953110667457, "grad_norm": 0.38036394119262695, "learning_rate": 1.685186349291099e-05, "loss": 0.41309040784835815, "step": 5225, "token_acc": 0.8573246786305205 }, { "epoch": 0.2819834889116711, "grad_norm": 0.40049540996551514, "learning_rate": 1.6850590520202727e-05, "loss": 0.40284693241119385, "step": 5226, "token_acc": 0.8609027503618897 }, { "epoch": 0.2820374467166676, "grad_norm": 0.386486291885376, "learning_rate": 1.6849317338279228e-05, "loss": 0.40854203701019287, "step": 5227, "token_acc": 0.8603885039191185 }, { "epoch": 0.28209140452166404, "grad_norm": 0.3409065902233124, "learning_rate": 1.684804394717937e-05, "loss": 0.3777501583099365, "step": 5228, "token_acc": 0.8617363344051447 }, { "epoch": 0.28214536232666054, "grad_norm": 0.498260498046875, "learning_rate": 1.6846770346942043e-05, "loss": 0.4193662405014038, "step": 5229, "token_acc": 0.8540019286403085 }, { "epoch": 0.28219932013165705, "grad_norm": 0.43202051520347595, "learning_rate": 1.6845496537606142e-05, "loss": 0.3878617584705353, "step": 5230, "token_acc": 0.8664457332228667 }, { "epoch": 0.28225327793665356, "grad_norm": 0.3719000816345215, "learning_rate": 1.6844222519210577e-05, "loss": 0.4413210153579712, "step": 5231, "token_acc": 0.8527346243881677 }, { "epoch": 0.28230723574165, "grad_norm": 0.40661734342575073, "learning_rate": 1.6842948291794243e-05, "loss": 0.37665072083473206, "step": 5232, "token_acc": 0.8704186474467106 }, { "epoch": 0.2823611935466465, "grad_norm": 0.4507793188095093, "learning_rate": 1.684167385539606e-05, "loss": 0.39669904112815857, "step": 5233, "token_acc": 0.8629475660453265 }, { "epoch": 0.282415151351643, "grad_norm": 0.3628445863723755, "learning_rate": 1.6840399210054955e-05, "loss": 0.3811877965927124, "step": 5234, "token_acc": 0.8609443231441049 }, { "epoch": 0.28246910915663953, "grad_norm": 0.4293725788593292, "learning_rate": 1.6839124355809845e-05, "loss": 0.401500403881073, "step": 5235, "token_acc": 0.8603516878449493 }, { "epoch": 0.282523066961636, "grad_norm": 0.429325133562088, "learning_rate": 1.683784929269967e-05, "loss": 0.3681126832962036, "step": 5236, "token_acc": 0.8732466643859049 }, { "epoch": 0.2825770247666325, "grad_norm": 0.3592863380908966, "learning_rate": 1.683657402076337e-05, "loss": 0.3989189863204956, "step": 5237, "token_acc": 0.8661353505335675 }, { "epoch": 0.282630982571629, "grad_norm": 0.41733020544052124, "learning_rate": 1.683529854003989e-05, "loss": 0.36118367314338684, "step": 5238, "token_acc": 0.8752636759949374 }, { "epoch": 0.2826849403766255, "grad_norm": 0.4644823968410492, "learning_rate": 1.6834022850568184e-05, "loss": 0.4462607502937317, "step": 5239, "token_acc": 0.8446583965932235 }, { "epoch": 0.28273889818162196, "grad_norm": 0.43895936012268066, "learning_rate": 1.6832746952387208e-05, "loss": 0.41724851727485657, "step": 5240, "token_acc": 0.860032362459547 }, { "epoch": 0.28279285598661846, "grad_norm": 0.3502581715583801, "learning_rate": 1.6831470845535933e-05, "loss": 0.3893858790397644, "step": 5241, "token_acc": 0.8655243317340644 }, { "epoch": 0.28284681379161497, "grad_norm": 0.28044751286506653, "learning_rate": 1.6830194530053327e-05, "loss": 0.37841373682022095, "step": 5242, "token_acc": 0.8732964400759891 }, { "epoch": 0.2829007715966114, "grad_norm": 0.4249906837940216, "learning_rate": 1.682891800597837e-05, "loss": 0.4091093838214874, "step": 5243, "token_acc": 0.8607309890871297 }, { "epoch": 0.28295472940160793, "grad_norm": 0.4180140793323517, "learning_rate": 1.6827641273350045e-05, "loss": 0.3808137774467468, "step": 5244, "token_acc": 0.87034677990092 }, { "epoch": 0.28300868720660444, "grad_norm": 0.3803809583187103, "learning_rate": 1.6826364332207342e-05, "loss": 0.3674519658088684, "step": 5245, "token_acc": 0.8689284842567733 }, { "epoch": 0.28306264501160094, "grad_norm": 0.4012754559516907, "learning_rate": 1.6825087182589262e-05, "loss": 0.414824903011322, "step": 5246, "token_acc": 0.8504060407465451 }, { "epoch": 0.2831166028165974, "grad_norm": 0.354797899723053, "learning_rate": 1.6823809824534808e-05, "loss": 0.39481231570243835, "step": 5247, "token_acc": 0.8623082542001461 }, { "epoch": 0.2831705606215939, "grad_norm": 0.5367869138717651, "learning_rate": 1.682253225808299e-05, "loss": 0.45767778158187866, "step": 5248, "token_acc": 0.8456462256558829 }, { "epoch": 0.2832245184265904, "grad_norm": 0.4662196934223175, "learning_rate": 1.6821254483272824e-05, "loss": 0.43908172845840454, "step": 5249, "token_acc": 0.8503371445323952 }, { "epoch": 0.2832784762315869, "grad_norm": 0.5003737807273865, "learning_rate": 1.6819976500143332e-05, "loss": 0.41986536979675293, "step": 5250, "token_acc": 0.8518318965517241 }, { "epoch": 0.28333243403658337, "grad_norm": 0.45536601543426514, "learning_rate": 1.6818698308733545e-05, "loss": 0.3973779082298279, "step": 5251, "token_acc": 0.8630408892417626 }, { "epoch": 0.2833863918415799, "grad_norm": 0.3421337902545929, "learning_rate": 1.68174199090825e-05, "loss": 0.41816186904907227, "step": 5252, "token_acc": 0.8594336403957694 }, { "epoch": 0.2834403496465764, "grad_norm": 0.4591836631298065, "learning_rate": 1.6816141301229235e-05, "loss": 0.415574312210083, "step": 5253, "token_acc": 0.8581864379863428 }, { "epoch": 0.2834943074515729, "grad_norm": 0.4223935604095459, "learning_rate": 1.6814862485212798e-05, "loss": 0.35077664256095886, "step": 5254, "token_acc": 0.8739957716701903 }, { "epoch": 0.28354826525656934, "grad_norm": 0.38476577401161194, "learning_rate": 1.6813583461072245e-05, "loss": 0.3367907702922821, "step": 5255, "token_acc": 0.882225656877898 }, { "epoch": 0.28360222306156585, "grad_norm": 0.4421820342540741, "learning_rate": 1.6812304228846645e-05, "loss": 0.3827628195285797, "step": 5256, "token_acc": 0.8656040268456375 }, { "epoch": 0.28365618086656236, "grad_norm": 0.444306343793869, "learning_rate": 1.6811024788575054e-05, "loss": 0.3784046471118927, "step": 5257, "token_acc": 0.8673373574782025 }, { "epoch": 0.28371013867155886, "grad_norm": 0.4733126759529114, "learning_rate": 1.680974514029655e-05, "loss": 0.363342821598053, "step": 5258, "token_acc": 0.8724373576309795 }, { "epoch": 0.2837640964765553, "grad_norm": 0.4885809123516083, "learning_rate": 1.6808465284050216e-05, "loss": 0.3245108127593994, "step": 5259, "token_acc": 0.8838945098689667 }, { "epoch": 0.2838180542815518, "grad_norm": 0.44244372844696045, "learning_rate": 1.680718521987513e-05, "loss": 0.4236332178115845, "step": 5260, "token_acc": 0.8536472045841722 }, { "epoch": 0.28387201208654833, "grad_norm": 0.4627673029899597, "learning_rate": 1.68059049478104e-05, "loss": 0.37799328565597534, "step": 5261, "token_acc": 0.868070652173913 }, { "epoch": 0.28392596989154484, "grad_norm": 0.4633757770061493, "learning_rate": 1.680462446789511e-05, "loss": 0.38759636878967285, "step": 5262, "token_acc": 0.8679296715278725 }, { "epoch": 0.2839799276965413, "grad_norm": 0.4250091314315796, "learning_rate": 1.680334378016837e-05, "loss": 0.3971906304359436, "step": 5263, "token_acc": 0.864380075142407 }, { "epoch": 0.2840338855015378, "grad_norm": 0.4345170259475708, "learning_rate": 1.6802062884669293e-05, "loss": 0.415298193693161, "step": 5264, "token_acc": 0.8578925562767573 }, { "epoch": 0.2840878433065343, "grad_norm": 0.33611875772476196, "learning_rate": 1.6800781781437e-05, "loss": 0.33680588006973267, "step": 5265, "token_acc": 0.8781790437436419 }, { "epoch": 0.28414180111153076, "grad_norm": 0.46978774666786194, "learning_rate": 1.679950047051061e-05, "loss": 0.37150683999061584, "step": 5266, "token_acc": 0.8713602781399391 }, { "epoch": 0.28419575891652726, "grad_norm": 0.30836328864097595, "learning_rate": 1.679821895192926e-05, "loss": 0.37458598613739014, "step": 5267, "token_acc": 0.8651449722393584 }, { "epoch": 0.28424971672152377, "grad_norm": 0.3839823007583618, "learning_rate": 1.6796937225732085e-05, "loss": 0.3956785202026367, "step": 5268, "token_acc": 0.8622540250447227 }, { "epoch": 0.2843036745265203, "grad_norm": 0.44503772258758545, "learning_rate": 1.6795655291958223e-05, "loss": 0.4018474221229553, "step": 5269, "token_acc": 0.866044090630741 }, { "epoch": 0.28435763233151673, "grad_norm": 0.47483474016189575, "learning_rate": 1.679437315064683e-05, "loss": 0.3554823398590088, "step": 5270, "token_acc": 0.8715083798882681 }, { "epoch": 0.28441159013651324, "grad_norm": 0.5105794072151184, "learning_rate": 1.6793090801837064e-05, "loss": 0.3605395257472992, "step": 5271, "token_acc": 0.8705315799222079 }, { "epoch": 0.28446554794150974, "grad_norm": 0.44170400500297546, "learning_rate": 1.679180824556808e-05, "loss": 0.41831135749816895, "step": 5272, "token_acc": 0.8600761715515813 }, { "epoch": 0.28451950574650625, "grad_norm": 0.3821255564689636, "learning_rate": 1.6790525481879056e-05, "loss": 0.3311213552951813, "step": 5273, "token_acc": 0.8799946366318048 }, { "epoch": 0.2845734635515027, "grad_norm": 0.45015949010849, "learning_rate": 1.678924251080916e-05, "loss": 0.42670783400535583, "step": 5274, "token_acc": 0.8529411764705882 }, { "epoch": 0.2846274213564992, "grad_norm": 0.42971086502075195, "learning_rate": 1.6787959332397576e-05, "loss": 0.36651575565338135, "step": 5275, "token_acc": 0.8755879059350504 }, { "epoch": 0.2846813791614957, "grad_norm": 0.36675867438316345, "learning_rate": 1.6786675946683495e-05, "loss": 0.42536991834640503, "step": 5276, "token_acc": 0.8533768516193824 }, { "epoch": 0.2847353369664922, "grad_norm": 0.42388206720352173, "learning_rate": 1.6785392353706107e-05, "loss": 0.38032278418540955, "step": 5277, "token_acc": 0.8697214734950584 }, { "epoch": 0.2847892947714887, "grad_norm": 0.3442012369632721, "learning_rate": 1.6784108553504612e-05, "loss": 0.4078293740749359, "step": 5278, "token_acc": 0.8617980124797782 }, { "epoch": 0.2848432525764852, "grad_norm": 0.3636183738708496, "learning_rate": 1.678282454611822e-05, "loss": 0.3910250663757324, "step": 5279, "token_acc": 0.8626540453652438 }, { "epoch": 0.2848972103814817, "grad_norm": 0.40496253967285156, "learning_rate": 1.6781540331586146e-05, "loss": 0.35189634561538696, "step": 5280, "token_acc": 0.8778701225524722 }, { "epoch": 0.2849511681864782, "grad_norm": 0.40793004631996155, "learning_rate": 1.6780255909947603e-05, "loss": 0.3832892179489136, "step": 5281, "token_acc": 0.8623968675709691 }, { "epoch": 0.28500512599147465, "grad_norm": 0.43300241231918335, "learning_rate": 1.6778971281241824e-05, "loss": 0.427747905254364, "step": 5282, "token_acc": 0.856513940975053 }, { "epoch": 0.28505908379647116, "grad_norm": 0.3369784355163574, "learning_rate": 1.6777686445508036e-05, "loss": 0.3403048515319824, "step": 5283, "token_acc": 0.8723574448634441 }, { "epoch": 0.28511304160146766, "grad_norm": 0.3887532651424408, "learning_rate": 1.6776401402785478e-05, "loss": 0.4189772605895996, "step": 5284, "token_acc": 0.8567353014140412 }, { "epoch": 0.28516699940646417, "grad_norm": 0.33805030584335327, "learning_rate": 1.6775116153113398e-05, "loss": 0.44853925704956055, "step": 5285, "token_acc": 0.8500951776649747 }, { "epoch": 0.2852209572114606, "grad_norm": 0.4646049737930298, "learning_rate": 1.6773830696531046e-05, "loss": 0.4311101734638214, "step": 5286, "token_acc": 0.8604545454545455 }, { "epoch": 0.28527491501645713, "grad_norm": 0.3842288553714752, "learning_rate": 1.677254503307768e-05, "loss": 0.3858780860900879, "step": 5287, "token_acc": 0.8643187877087134 }, { "epoch": 0.28532887282145364, "grad_norm": 0.4398355782032013, "learning_rate": 1.677125916279256e-05, "loss": 0.4342743456363678, "step": 5288, "token_acc": 0.8517422748191978 }, { "epoch": 0.2853828306264501, "grad_norm": 0.37403392791748047, "learning_rate": 1.676997308571496e-05, "loss": 0.39568784832954407, "step": 5289, "token_acc": 0.8628960285586792 }, { "epoch": 0.2854367884314466, "grad_norm": 0.3453122079372406, "learning_rate": 1.676868680188416e-05, "loss": 0.3672913610935211, "step": 5290, "token_acc": 0.8725278121137207 }, { "epoch": 0.2854907462364431, "grad_norm": 0.3706391751766205, "learning_rate": 1.676740031133943e-05, "loss": 0.4006575345993042, "step": 5291, "token_acc": 0.8578538102643857 }, { "epoch": 0.2855447040414396, "grad_norm": 0.30120229721069336, "learning_rate": 1.6766113614120074e-05, "loss": 0.4189978241920471, "step": 5292, "token_acc": 0.8586355255070682 }, { "epoch": 0.28559866184643606, "grad_norm": 0.3970964252948761, "learning_rate": 1.6764826710265377e-05, "loss": 0.43918538093566895, "step": 5293, "token_acc": 0.8501043462480719 }, { "epoch": 0.28565261965143257, "grad_norm": 0.31226444244384766, "learning_rate": 1.6763539599814645e-05, "loss": 0.3662875294685364, "step": 5294, "token_acc": 0.8679448513571736 }, { "epoch": 0.2857065774564291, "grad_norm": 0.4099082052707672, "learning_rate": 1.6762252282807187e-05, "loss": 0.38591644167900085, "step": 5295, "token_acc": 0.8630154474658496 }, { "epoch": 0.2857605352614256, "grad_norm": 0.4217277467250824, "learning_rate": 1.6760964759282315e-05, "loss": 0.3789133131504059, "step": 5296, "token_acc": 0.8670378921635203 }, { "epoch": 0.28581449306642204, "grad_norm": 0.30070725083351135, "learning_rate": 1.6759677029279346e-05, "loss": 0.3758094310760498, "step": 5297, "token_acc": 0.8652331920048455 }, { "epoch": 0.28586845087141854, "grad_norm": 0.3403048515319824, "learning_rate": 1.675838909283762e-05, "loss": 0.35049647092819214, "step": 5298, "token_acc": 0.8777225378787878 }, { "epoch": 0.28592240867641505, "grad_norm": 0.33234336972236633, "learning_rate": 1.6757100949996455e-05, "loss": 0.39785265922546387, "step": 5299, "token_acc": 0.8659915214319359 }, { "epoch": 0.28597636648141156, "grad_norm": 0.5235170722007751, "learning_rate": 1.6755812600795197e-05, "loss": 0.37493830919265747, "step": 5300, "token_acc": 0.8704280155642024 }, { "epoch": 0.286030324286408, "grad_norm": 0.47024568915367126, "learning_rate": 1.6754524045273195e-05, "loss": 0.3928877115249634, "step": 5301, "token_acc": 0.8640804190853745 }, { "epoch": 0.2860842820914045, "grad_norm": 0.3473447859287262, "learning_rate": 1.6753235283469798e-05, "loss": 0.39434385299682617, "step": 5302, "token_acc": 0.8677820550784721 }, { "epoch": 0.286138239896401, "grad_norm": 0.3557027280330658, "learning_rate": 1.675194631542436e-05, "loss": 0.36751696467399597, "step": 5303, "token_acc": 0.8722678965309806 }, { "epoch": 0.28619219770139753, "grad_norm": 0.39864352345466614, "learning_rate": 1.6750657141176256e-05, "loss": 0.37020590901374817, "step": 5304, "token_acc": 0.8671043034281546 }, { "epoch": 0.286246155506394, "grad_norm": 0.49795782566070557, "learning_rate": 1.6749367760764853e-05, "loss": 0.39442548155784607, "step": 5305, "token_acc": 0.8662674650698603 }, { "epoch": 0.2863001133113905, "grad_norm": 0.373992383480072, "learning_rate": 1.674807817422952e-05, "loss": 0.3963994085788727, "step": 5306, "token_acc": 0.8641793047913032 }, { "epoch": 0.286354071116387, "grad_norm": 0.4672809839248657, "learning_rate": 1.6746788381609648e-05, "loss": 0.4528074562549591, "step": 5307, "token_acc": 0.8443081363882374 }, { "epoch": 0.28640802892138345, "grad_norm": 0.44872692227363586, "learning_rate": 1.674549838294463e-05, "loss": 0.35246121883392334, "step": 5308, "token_acc": 0.8799317147192717 }, { "epoch": 0.28646198672637996, "grad_norm": 0.38166046142578125, "learning_rate": 1.674420817827386e-05, "loss": 0.3810739815235138, "step": 5309, "token_acc": 0.8686967113276493 }, { "epoch": 0.28651594453137647, "grad_norm": 0.5196138024330139, "learning_rate": 1.6742917767636732e-05, "loss": 0.4354424476623535, "step": 5310, "token_acc": 0.8553312197563825 }, { "epoch": 0.286569902336373, "grad_norm": 0.39020299911499023, "learning_rate": 1.674162715107267e-05, "loss": 0.3859952986240387, "step": 5311, "token_acc": 0.863126002495099 }, { "epoch": 0.2866238601413694, "grad_norm": 0.4302190840244293, "learning_rate": 1.6740336328621073e-05, "loss": 0.3908860385417938, "step": 5312, "token_acc": 0.8637969804047543 }, { "epoch": 0.28667781794636593, "grad_norm": 0.3147837817668915, "learning_rate": 1.6739045300321376e-05, "loss": 0.37582892179489136, "step": 5313, "token_acc": 0.8671939558493684 }, { "epoch": 0.28673177575136244, "grad_norm": 0.44700852036476135, "learning_rate": 1.6737754066212998e-05, "loss": 0.42682945728302, "step": 5314, "token_acc": 0.8475657178410329 }, { "epoch": 0.28678573355635895, "grad_norm": 0.3992807865142822, "learning_rate": 1.6736462626335376e-05, "loss": 0.442853182554245, "step": 5315, "token_acc": 0.8513950073421439 }, { "epoch": 0.2868396913613554, "grad_norm": 0.4959181845188141, "learning_rate": 1.673517098072795e-05, "loss": 0.37585359811782837, "step": 5316, "token_acc": 0.8643980848153214 }, { "epoch": 0.2868936491663519, "grad_norm": 0.47566187381744385, "learning_rate": 1.673387912943017e-05, "loss": 0.4549199342727661, "step": 5317, "token_acc": 0.8434374052743255 }, { "epoch": 0.2869476069713484, "grad_norm": 0.4032091200351715, "learning_rate": 1.6732587072481484e-05, "loss": 0.3334040343761444, "step": 5318, "token_acc": 0.8786818851251841 }, { "epoch": 0.2870015647763449, "grad_norm": 0.4027092158794403, "learning_rate": 1.673129480992135e-05, "loss": 0.426239937543869, "step": 5319, "token_acc": 0.857471528816289 }, { "epoch": 0.28705552258134137, "grad_norm": 0.3671339750289917, "learning_rate": 1.6730002341789235e-05, "loss": 0.3571361303329468, "step": 5320, "token_acc": 0.8779612347451543 }, { "epoch": 0.2871094803863379, "grad_norm": 0.50581294298172, "learning_rate": 1.6728709668124616e-05, "loss": 0.45689499378204346, "step": 5321, "token_acc": 0.8481205295997565 }, { "epoch": 0.2871634381913344, "grad_norm": 0.34852731227874756, "learning_rate": 1.6727416788966964e-05, "loss": 0.415103018283844, "step": 5322, "token_acc": 0.8594602565993217 }, { "epoch": 0.2872173959963309, "grad_norm": 0.3181157410144806, "learning_rate": 1.6726123704355765e-05, "loss": 0.3987679183483124, "step": 5323, "token_acc": 0.8644309206266995 }, { "epoch": 0.28727135380132734, "grad_norm": 0.4304732382297516, "learning_rate": 1.672483041433051e-05, "loss": 0.4027784466743469, "step": 5324, "token_acc": 0.8614993646759848 }, { "epoch": 0.28732531160632385, "grad_norm": 0.37567463517189026, "learning_rate": 1.6723536918930698e-05, "loss": 0.36265671253204346, "step": 5325, "token_acc": 0.8738841097224477 }, { "epoch": 0.28737926941132036, "grad_norm": 0.3997870683670044, "learning_rate": 1.6722243218195828e-05, "loss": 0.3947690427303314, "step": 5326, "token_acc": 0.8684467600130251 }, { "epoch": 0.28743322721631687, "grad_norm": 0.43674150109291077, "learning_rate": 1.6720949312165412e-05, "loss": 0.3838599920272827, "step": 5327, "token_acc": 0.8641627078384798 }, { "epoch": 0.2874871850213133, "grad_norm": 0.47370481491088867, "learning_rate": 1.6719655200878965e-05, "loss": 0.3796469271183014, "step": 5328, "token_acc": 0.8703484237971083 }, { "epoch": 0.2875411428263098, "grad_norm": 0.4115867018699646, "learning_rate": 1.6718360884376004e-05, "loss": 0.412681519985199, "step": 5329, "token_acc": 0.8572060123784262 }, { "epoch": 0.28759510063130633, "grad_norm": 0.4287578761577606, "learning_rate": 1.6717066362696066e-05, "loss": 0.4171442985534668, "step": 5330, "token_acc": 0.8511252163877668 }, { "epoch": 0.2876490584363028, "grad_norm": 0.4735126495361328, "learning_rate": 1.6715771635878683e-05, "loss": 0.4827778935432434, "step": 5331, "token_acc": 0.8392882766537524 }, { "epoch": 0.2877030162412993, "grad_norm": 0.3450487554073334, "learning_rate": 1.671447670396339e-05, "loss": 0.39440762996673584, "step": 5332, "token_acc": 0.8700528415830907 }, { "epoch": 0.2877569740462958, "grad_norm": 0.44652920961380005, "learning_rate": 1.671318156698974e-05, "loss": 0.390603244304657, "step": 5333, "token_acc": 0.8625620954388078 }, { "epoch": 0.2878109318512923, "grad_norm": 0.37328335642814636, "learning_rate": 1.6711886224997277e-05, "loss": 0.4306844472885132, "step": 5334, "token_acc": 0.8519488683989941 }, { "epoch": 0.28786488965628876, "grad_norm": 0.5050148963928223, "learning_rate": 1.6710590678025573e-05, "loss": 0.409138560295105, "step": 5335, "token_acc": 0.8553930530164534 }, { "epoch": 0.28791884746128527, "grad_norm": 0.3497459888458252, "learning_rate": 1.6709294926114186e-05, "loss": 0.40553656220436096, "step": 5336, "token_acc": 0.8641655886157826 }, { "epoch": 0.2879728052662818, "grad_norm": 0.3619745075702667, "learning_rate": 1.670799896930269e-05, "loss": 0.35619884729385376, "step": 5337, "token_acc": 0.8729294841457643 }, { "epoch": 0.2880267630712783, "grad_norm": 0.44383057951927185, "learning_rate": 1.670670280763066e-05, "loss": 0.46078750491142273, "step": 5338, "token_acc": 0.8450134770889488 }, { "epoch": 0.28808072087627473, "grad_norm": 0.41093483567237854, "learning_rate": 1.6705406441137687e-05, "loss": 0.35617297887802124, "step": 5339, "token_acc": 0.8727346772551415 }, { "epoch": 0.28813467868127124, "grad_norm": 0.4237924814224243, "learning_rate": 1.6704109869863358e-05, "loss": 0.43734705448150635, "step": 5340, "token_acc": 0.8489548213081591 }, { "epoch": 0.28818863648626775, "grad_norm": 0.4454770088195801, "learning_rate": 1.670281309384727e-05, "loss": 0.35495322942733765, "step": 5341, "token_acc": 0.8766084963863917 }, { "epoch": 0.28824259429126425, "grad_norm": 0.37252041697502136, "learning_rate": 1.6701516113129022e-05, "loss": 0.4437388777732849, "step": 5342, "token_acc": 0.8459199686151432 }, { "epoch": 0.2882965520962607, "grad_norm": 0.3989388644695282, "learning_rate": 1.670021892774823e-05, "loss": 0.38021159172058105, "step": 5343, "token_acc": 0.8674632605411344 }, { "epoch": 0.2883505099012572, "grad_norm": 0.3959357440471649, "learning_rate": 1.669892153774451e-05, "loss": 0.43532466888427734, "step": 5344, "token_acc": 0.8542697694008634 }, { "epoch": 0.2884044677062537, "grad_norm": 0.36479973793029785, "learning_rate": 1.6697623943157475e-05, "loss": 0.4351137578487396, "step": 5345, "token_acc": 0.8472712180950405 }, { "epoch": 0.2884584255112502, "grad_norm": 0.3971833288669586, "learning_rate": 1.669632614402676e-05, "loss": 0.38511037826538086, "step": 5346, "token_acc": 0.8637809588729835 }, { "epoch": 0.2885123833162467, "grad_norm": 0.4585055112838745, "learning_rate": 1.6695028140392003e-05, "loss": 0.43777960538864136, "step": 5347, "token_acc": 0.8499912785627072 }, { "epoch": 0.2885663411212432, "grad_norm": 0.40459224581718445, "learning_rate": 1.669372993229284e-05, "loss": 0.4298764765262604, "step": 5348, "token_acc": 0.852435150873478 }, { "epoch": 0.2886202989262397, "grad_norm": 0.41666463017463684, "learning_rate": 1.6692431519768918e-05, "loss": 0.38145413994789124, "step": 5349, "token_acc": 0.8662128712871288 }, { "epoch": 0.2886742567312362, "grad_norm": 0.46656501293182373, "learning_rate": 1.669113290285989e-05, "loss": 0.45708873867988586, "step": 5350, "token_acc": 0.847937786418553 }, { "epoch": 0.28872821453623265, "grad_norm": 0.3719303607940674, "learning_rate": 1.6689834081605414e-05, "loss": 0.35750848054885864, "step": 5351, "token_acc": 0.8698162729658793 }, { "epoch": 0.28878217234122916, "grad_norm": 0.39008602499961853, "learning_rate": 1.668853505604516e-05, "loss": 0.3489469289779663, "step": 5352, "token_acc": 0.8778054862842892 }, { "epoch": 0.28883613014622567, "grad_norm": 0.5350364446640015, "learning_rate": 1.6687235826218797e-05, "loss": 0.40937480330467224, "step": 5353, "token_acc": 0.8579142760715491 }, { "epoch": 0.2888900879512221, "grad_norm": 0.4997212886810303, "learning_rate": 1.6685936392166003e-05, "loss": 0.44741788506507874, "step": 5354, "token_acc": 0.8462387345962847 }, { "epoch": 0.2889440457562186, "grad_norm": 0.4037880003452301, "learning_rate": 1.6684636753926464e-05, "loss": 0.44577667117118835, "step": 5355, "token_acc": 0.8390457910611462 }, { "epoch": 0.28899800356121513, "grad_norm": 0.3193359673023224, "learning_rate": 1.6683336911539868e-05, "loss": 0.3463667631149292, "step": 5356, "token_acc": 0.8755142947568308 }, { "epoch": 0.28905196136621164, "grad_norm": 0.4960751235485077, "learning_rate": 1.6682036865045915e-05, "loss": 0.3997947573661804, "step": 5357, "token_acc": 0.8601317957166392 }, { "epoch": 0.2891059191712081, "grad_norm": 0.5062166452407837, "learning_rate": 1.6680736614484307e-05, "loss": 0.4419595003128052, "step": 5358, "token_acc": 0.8469605403483825 }, { "epoch": 0.2891598769762046, "grad_norm": 0.2945152819156647, "learning_rate": 1.6679436159894752e-05, "loss": 0.39367440342903137, "step": 5359, "token_acc": 0.8629483158325378 }, { "epoch": 0.2892138347812011, "grad_norm": 0.36241745948791504, "learning_rate": 1.6678135501316966e-05, "loss": 0.38703835010528564, "step": 5360, "token_acc": 0.8665911878479102 }, { "epoch": 0.2892677925861976, "grad_norm": 0.3607863783836365, "learning_rate": 1.6676834638790672e-05, "loss": 0.4461752474308014, "step": 5361, "token_acc": 0.8490453460620525 }, { "epoch": 0.28932175039119407, "grad_norm": 0.3096359670162201, "learning_rate": 1.66755335723556e-05, "loss": 0.4261135160923004, "step": 5362, "token_acc": 0.8549063116370809 }, { "epoch": 0.2893757081961906, "grad_norm": 0.4708009660243988, "learning_rate": 1.6674232302051476e-05, "loss": 0.3960748016834259, "step": 5363, "token_acc": 0.86064 }, { "epoch": 0.2894296660011871, "grad_norm": 0.40117108821868896, "learning_rate": 1.667293082791805e-05, "loss": 0.37099286913871765, "step": 5364, "token_acc": 0.8693061440677966 }, { "epoch": 0.2894836238061836, "grad_norm": 0.3457116484642029, "learning_rate": 1.667162914999506e-05, "loss": 0.4543166756629944, "step": 5365, "token_acc": 0.8462368492042083 }, { "epoch": 0.28953758161118004, "grad_norm": 0.46690380573272705, "learning_rate": 1.667032726832227e-05, "loss": 0.4308862090110779, "step": 5366, "token_acc": 0.853599516031458 }, { "epoch": 0.28959153941617655, "grad_norm": 0.3775443434715271, "learning_rate": 1.666902518293943e-05, "loss": 0.3668028712272644, "step": 5367, "token_acc": 0.8725827245380318 }, { "epoch": 0.28964549722117305, "grad_norm": 0.5332104563713074, "learning_rate": 1.6667722893886304e-05, "loss": 0.41672512888908386, "step": 5368, "token_acc": 0.8527799530148786 }, { "epoch": 0.28969945502616956, "grad_norm": 0.37920692563056946, "learning_rate": 1.6666420401202673e-05, "loss": 0.36640188097953796, "step": 5369, "token_acc": 0.8742028985507246 }, { "epoch": 0.289753412831166, "grad_norm": 0.4838995039463043, "learning_rate": 1.6665117704928304e-05, "loss": 0.3650749623775482, "step": 5370, "token_acc": 0.8638202247191011 }, { "epoch": 0.2898073706361625, "grad_norm": 0.4671616554260254, "learning_rate": 1.666381480510299e-05, "loss": 0.40013617277145386, "step": 5371, "token_acc": 0.8626524990161354 }, { "epoch": 0.289861328441159, "grad_norm": 0.35143327713012695, "learning_rate": 1.6662511701766514e-05, "loss": 0.31881362199783325, "step": 5372, "token_acc": 0.8891439373480962 }, { "epoch": 0.2899152862461555, "grad_norm": 0.4604019820690155, "learning_rate": 1.6661208394958684e-05, "loss": 0.44593092799186707, "step": 5373, "token_acc": 0.8504870817450233 }, { "epoch": 0.289969244051152, "grad_norm": 0.40191158652305603, "learning_rate": 1.6659904884719287e-05, "loss": 0.3956160545349121, "step": 5374, "token_acc": 0.8618079584775087 }, { "epoch": 0.2900232018561485, "grad_norm": 0.4652888774871826, "learning_rate": 1.6658601171088142e-05, "loss": 0.3852273225784302, "step": 5375, "token_acc": 0.8622775800711744 }, { "epoch": 0.290077159661145, "grad_norm": 0.318817675113678, "learning_rate": 1.6657297254105062e-05, "loss": 0.4007217288017273, "step": 5376, "token_acc": 0.8625347636074692 }, { "epoch": 0.29013111746614145, "grad_norm": 0.3793073892593384, "learning_rate": 1.6655993133809866e-05, "loss": 0.41772621870040894, "step": 5377, "token_acc": 0.8539382695226851 }, { "epoch": 0.29018507527113796, "grad_norm": 0.38938814401626587, "learning_rate": 1.665468881024238e-05, "loss": 0.390333890914917, "step": 5378, "token_acc": 0.8663006177076183 }, { "epoch": 0.29023903307613447, "grad_norm": 0.4389747977256775, "learning_rate": 1.665338428344245e-05, "loss": 0.4381975531578064, "step": 5379, "token_acc": 0.8498225034727581 }, { "epoch": 0.290292990881131, "grad_norm": 0.42560282349586487, "learning_rate": 1.6652079553449903e-05, "loss": 0.39524340629577637, "step": 5380, "token_acc": 0.8633453614247468 }, { "epoch": 0.2903469486861274, "grad_norm": 0.36443647742271423, "learning_rate": 1.6650774620304588e-05, "loss": 0.4164484143257141, "step": 5381, "token_acc": 0.8561606902129955 }, { "epoch": 0.29040090649112393, "grad_norm": 0.3977176249027252, "learning_rate": 1.664946948404636e-05, "loss": 0.4268263578414917, "step": 5382, "token_acc": 0.8563494446112279 }, { "epoch": 0.29045486429612044, "grad_norm": 0.41822201013565063, "learning_rate": 1.6648164144715077e-05, "loss": 0.3764064311981201, "step": 5383, "token_acc": 0.8694820717131474 }, { "epoch": 0.29050882210111695, "grad_norm": 0.3283812701702118, "learning_rate": 1.6646858602350597e-05, "loss": 0.3590756952762604, "step": 5384, "token_acc": 0.8742685169704063 }, { "epoch": 0.2905627799061134, "grad_norm": 0.5428935885429382, "learning_rate": 1.6645552856992802e-05, "loss": 0.3940090239048004, "step": 5385, "token_acc": 0.8630450070323488 }, { "epoch": 0.2906167377111099, "grad_norm": 0.4236809313297272, "learning_rate": 1.6644246908681562e-05, "loss": 0.3993648588657379, "step": 5386, "token_acc": 0.863111599780099 }, { "epoch": 0.2906706955161064, "grad_norm": 0.4677956700325012, "learning_rate": 1.664294075745676e-05, "loss": 0.3749462962150574, "step": 5387, "token_acc": 0.8685631976953547 }, { "epoch": 0.2907246533211029, "grad_norm": 0.2809295356273651, "learning_rate": 1.664163440335829e-05, "loss": 0.36272764205932617, "step": 5388, "token_acc": 0.8728513958307936 }, { "epoch": 0.2907786111260994, "grad_norm": 0.4352147579193115, "learning_rate": 1.6640327846426042e-05, "loss": 0.4137079119682312, "step": 5389, "token_acc": 0.8588989146070355 }, { "epoch": 0.2908325689310959, "grad_norm": 0.5509915351867676, "learning_rate": 1.6639021086699926e-05, "loss": 0.4161182641983032, "step": 5390, "token_acc": 0.8568698237417711 }, { "epoch": 0.2908865267360924, "grad_norm": 0.4906843900680542, "learning_rate": 1.663771412421984e-05, "loss": 0.466276079416275, "step": 5391, "token_acc": 0.8479457856840322 }, { "epoch": 0.2909404845410889, "grad_norm": 0.3609129786491394, "learning_rate": 1.6636406959025707e-05, "loss": 0.35355767607688904, "step": 5392, "token_acc": 0.8766083376222337 }, { "epoch": 0.29099444234608535, "grad_norm": 0.42408236861228943, "learning_rate": 1.6635099591157443e-05, "loss": 0.3400188684463501, "step": 5393, "token_acc": 0.8792503346720214 }, { "epoch": 0.29104840015108185, "grad_norm": 0.4131942391395569, "learning_rate": 1.6633792020654978e-05, "loss": 0.43594813346862793, "step": 5394, "token_acc": 0.8547622687662437 }, { "epoch": 0.29110235795607836, "grad_norm": 0.4540643095970154, "learning_rate": 1.6632484247558237e-05, "loss": 0.37331539392471313, "step": 5395, "token_acc": 0.8679955394480067 }, { "epoch": 0.2911563157610748, "grad_norm": 0.4222448766231537, "learning_rate": 1.663117627190717e-05, "loss": 0.4312816858291626, "step": 5396, "token_acc": 0.8466538276360135 }, { "epoch": 0.2912102735660713, "grad_norm": 0.37797412276268005, "learning_rate": 1.6629868093741713e-05, "loss": 0.373393177986145, "step": 5397, "token_acc": 0.8640451150739217 }, { "epoch": 0.2912642313710678, "grad_norm": 0.4237290322780609, "learning_rate": 1.6628559713101822e-05, "loss": 0.44349563121795654, "step": 5398, "token_acc": 0.8514612835191323 }, { "epoch": 0.29131818917606433, "grad_norm": 0.4764014780521393, "learning_rate": 1.6627251130027454e-05, "loss": 0.4334968030452728, "step": 5399, "token_acc": 0.8514821536600121 }, { "epoch": 0.2913721469810608, "grad_norm": 0.4608491063117981, "learning_rate": 1.6625942344558574e-05, "loss": 0.42633360624313354, "step": 5400, "token_acc": 0.855763770584895 }, { "epoch": 0.2914261047860573, "grad_norm": 0.34818556904792786, "learning_rate": 1.6624633356735153e-05, "loss": 0.430022656917572, "step": 5401, "token_acc": 0.8556837298726268 }, { "epoch": 0.2914800625910538, "grad_norm": 0.38295555114746094, "learning_rate": 1.662332416659716e-05, "loss": 0.36965906620025635, "step": 5402, "token_acc": 0.8717326096271961 }, { "epoch": 0.2915340203960503, "grad_norm": 0.49763673543930054, "learning_rate": 1.6622014774184583e-05, "loss": 0.47512298822402954, "step": 5403, "token_acc": 0.8384573403674732 }, { "epoch": 0.29158797820104676, "grad_norm": 0.4195703864097595, "learning_rate": 1.6620705179537407e-05, "loss": 0.44653573632240295, "step": 5404, "token_acc": 0.8467539704653106 }, { "epoch": 0.29164193600604327, "grad_norm": 0.34521758556365967, "learning_rate": 1.6619395382695637e-05, "loss": 0.4144074022769928, "step": 5405, "token_acc": 0.8584584332029772 }, { "epoch": 0.2916958938110398, "grad_norm": 0.418811559677124, "learning_rate": 1.661808538369926e-05, "loss": 0.3998190462589264, "step": 5406, "token_acc": 0.8587253170653395 }, { "epoch": 0.2917498516160363, "grad_norm": 0.356820672750473, "learning_rate": 1.661677518258829e-05, "loss": 0.4267510771751404, "step": 5407, "token_acc": 0.8477673261974268 }, { "epoch": 0.29180380942103273, "grad_norm": 0.41532400250434875, "learning_rate": 1.6615464779402735e-05, "loss": 0.4072362780570984, "step": 5408, "token_acc": 0.8592188919164396 }, { "epoch": 0.29185776722602924, "grad_norm": 0.4406529664993286, "learning_rate": 1.6614154174182627e-05, "loss": 0.42142796516418457, "step": 5409, "token_acc": 0.8549705924544542 }, { "epoch": 0.29191172503102575, "grad_norm": 0.42068013548851013, "learning_rate": 1.661284336696798e-05, "loss": 0.42751890420913696, "step": 5410, "token_acc": 0.8552361396303901 }, { "epoch": 0.29196568283602226, "grad_norm": 0.5227024555206299, "learning_rate": 1.661153235779883e-05, "loss": 0.43530768156051636, "step": 5411, "token_acc": 0.8527378060175668 }, { "epoch": 0.2920196406410187, "grad_norm": 0.3831176459789276, "learning_rate": 1.661022114671521e-05, "loss": 0.5021636486053467, "step": 5412, "token_acc": 0.8328641280189658 }, { "epoch": 0.2920735984460152, "grad_norm": 0.43442463874816895, "learning_rate": 1.660890973375717e-05, "loss": 0.378204345703125, "step": 5413, "token_acc": 0.8683354546740206 }, { "epoch": 0.2921275562510117, "grad_norm": 0.30815085768699646, "learning_rate": 1.6607598118964755e-05, "loss": 0.3761243224143982, "step": 5414, "token_acc": 0.8693619709412508 }, { "epoch": 0.29218151405600823, "grad_norm": 0.41100025177001953, "learning_rate": 1.660628630237803e-05, "loss": 0.38331156969070435, "step": 5415, "token_acc": 0.8681894776259127 }, { "epoch": 0.2922354718610047, "grad_norm": 0.36325255036354065, "learning_rate": 1.660497428403705e-05, "loss": 0.33824968338012695, "step": 5416, "token_acc": 0.8802731411229135 }, { "epoch": 0.2922894296660012, "grad_norm": 0.4041789770126343, "learning_rate": 1.660366206398188e-05, "loss": 0.41196590662002563, "step": 5417, "token_acc": 0.855585464333782 }, { "epoch": 0.2923433874709977, "grad_norm": 0.38906383514404297, "learning_rate": 1.6602349642252608e-05, "loss": 0.4204636514186859, "step": 5418, "token_acc": 0.8572291886991993 }, { "epoch": 0.29239734527599415, "grad_norm": 0.3901921808719635, "learning_rate": 1.6601037018889306e-05, "loss": 0.44187694787979126, "step": 5419, "token_acc": 0.8443579766536965 }, { "epoch": 0.29245130308099065, "grad_norm": 0.43063533306121826, "learning_rate": 1.659972419393206e-05, "loss": 0.3737232983112335, "step": 5420, "token_acc": 0.8735512630014859 }, { "epoch": 0.29250526088598716, "grad_norm": 0.4144243896007538, "learning_rate": 1.659841116742097e-05, "loss": 0.38322097063064575, "step": 5421, "token_acc": 0.8721912122513239 }, { "epoch": 0.29255921869098367, "grad_norm": 0.4500638544559479, "learning_rate": 1.6597097939396126e-05, "loss": 0.4004352390766144, "step": 5422, "token_acc": 0.8616754801001948 }, { "epoch": 0.2926131764959801, "grad_norm": 0.533541738986969, "learning_rate": 1.6595784509897647e-05, "loss": 0.40848836302757263, "step": 5423, "token_acc": 0.8621307072515667 }, { "epoch": 0.29266713430097663, "grad_norm": 0.3242558538913727, "learning_rate": 1.6594470878965628e-05, "loss": 0.36743998527526855, "step": 5424, "token_acc": 0.870657479184611 }, { "epoch": 0.29272109210597314, "grad_norm": 0.3735352158546448, "learning_rate": 1.6593157046640204e-05, "loss": 0.417629599571228, "step": 5425, "token_acc": 0.8593865941132822 }, { "epoch": 0.29277504991096964, "grad_norm": 0.43018561601638794, "learning_rate": 1.6591843012961485e-05, "loss": 0.4233321249485016, "step": 5426, "token_acc": 0.8581367211131277 }, { "epoch": 0.2928290077159661, "grad_norm": 0.4347005784511566, "learning_rate": 1.659052877796961e-05, "loss": 0.3832571804523468, "step": 5427, "token_acc": 0.8649072380454664 }, { "epoch": 0.2928829655209626, "grad_norm": 0.4042221009731293, "learning_rate": 1.658921434170471e-05, "loss": 0.3811573386192322, "step": 5428, "token_acc": 0.8687448728465955 }, { "epoch": 0.2929369233259591, "grad_norm": 0.36855143308639526, "learning_rate": 1.6587899704206934e-05, "loss": 0.39473816752433777, "step": 5429, "token_acc": 0.8634605683537918 }, { "epoch": 0.2929908811309556, "grad_norm": 0.442545086145401, "learning_rate": 1.6586584865516423e-05, "loss": 0.36113911867141724, "step": 5430, "token_acc": 0.8740671641791045 }, { "epoch": 0.29304483893595207, "grad_norm": 0.36627161502838135, "learning_rate": 1.6585269825673338e-05, "loss": 0.38268357515335083, "step": 5431, "token_acc": 0.8637076537013801 }, { "epoch": 0.2930987967409486, "grad_norm": 0.4139486849308014, "learning_rate": 1.6583954584717838e-05, "loss": 0.40213334560394287, "step": 5432, "token_acc": 0.8604870384917518 }, { "epoch": 0.2931527545459451, "grad_norm": 0.45135053992271423, "learning_rate": 1.6582639142690086e-05, "loss": 0.4733661115169525, "step": 5433, "token_acc": 0.8416435826408125 }, { "epoch": 0.2932067123509416, "grad_norm": 0.4393117129802704, "learning_rate": 1.6581323499630264e-05, "loss": 0.3381692171096802, "step": 5434, "token_acc": 0.8740416210295728 }, { "epoch": 0.29326067015593804, "grad_norm": 0.42218253016471863, "learning_rate": 1.6580007655578544e-05, "loss": 0.41354677081108093, "step": 5435, "token_acc": 0.8584761321909424 }, { "epoch": 0.29331462796093455, "grad_norm": 0.4227144122123718, "learning_rate": 1.6578691610575114e-05, "loss": 0.4088969826698303, "step": 5436, "token_acc": 0.8589873784999275 }, { "epoch": 0.29336858576593106, "grad_norm": 0.4608992338180542, "learning_rate": 1.6577375364660165e-05, "loss": 0.39537400007247925, "step": 5437, "token_acc": 0.858041958041958 }, { "epoch": 0.2934225435709275, "grad_norm": 0.47358083724975586, "learning_rate": 1.6576058917873898e-05, "loss": 0.45505914092063904, "step": 5438, "token_acc": 0.8500290641348576 }, { "epoch": 0.293476501375924, "grad_norm": 0.39683106541633606, "learning_rate": 1.657474227025651e-05, "loss": 0.42925935983657837, "step": 5439, "token_acc": 0.852394916911046 }, { "epoch": 0.2935304591809205, "grad_norm": 0.36350059509277344, "learning_rate": 1.657342542184822e-05, "loss": 0.38385313749313354, "step": 5440, "token_acc": 0.8646110899632027 }, { "epoch": 0.29358441698591703, "grad_norm": 0.42471054196357727, "learning_rate": 1.6572108372689237e-05, "loss": 0.42741358280181885, "step": 5441, "token_acc": 0.8527648234510327 }, { "epoch": 0.2936383747909135, "grad_norm": 0.47720083594322205, "learning_rate": 1.6570791122819783e-05, "loss": 0.41963934898376465, "step": 5442, "token_acc": 0.8596436870642913 }, { "epoch": 0.29369233259591, "grad_norm": 0.375750869512558, "learning_rate": 1.6569473672280097e-05, "loss": 0.41021856665611267, "step": 5443, "token_acc": 0.8612660683967984 }, { "epoch": 0.2937462904009065, "grad_norm": 0.4759637117385864, "learning_rate": 1.6568156021110404e-05, "loss": 0.4425056576728821, "step": 5444, "token_acc": 0.8478455440480757 }, { "epoch": 0.293800248205903, "grad_norm": 0.3880325257778168, "learning_rate": 1.6566838169350942e-05, "loss": 0.4200742542743683, "step": 5445, "token_acc": 0.8556647697878945 }, { "epoch": 0.29385420601089945, "grad_norm": 0.39377179741859436, "learning_rate": 1.6565520117041963e-05, "loss": 0.3973962664604187, "step": 5446, "token_acc": 0.8621930042173158 }, { "epoch": 0.29390816381589596, "grad_norm": 0.36368024349212646, "learning_rate": 1.6564201864223724e-05, "loss": 0.3533887267112732, "step": 5447, "token_acc": 0.8733491811938722 }, { "epoch": 0.29396212162089247, "grad_norm": 0.4365871548652649, "learning_rate": 1.6562883410936476e-05, "loss": 0.45408838987350464, "step": 5448, "token_acc": 0.8506833381797034 }, { "epoch": 0.294016079425889, "grad_norm": 0.4391222596168518, "learning_rate": 1.656156475722049e-05, "loss": 0.3841876685619354, "step": 5449, "token_acc": 0.8615598885793871 }, { "epoch": 0.29407003723088543, "grad_norm": 0.32493674755096436, "learning_rate": 1.6560245903116033e-05, "loss": 0.3729200065135956, "step": 5450, "token_acc": 0.8720403541280626 }, { "epoch": 0.29412399503588194, "grad_norm": 0.4232448637485504, "learning_rate": 1.6558926848663387e-05, "loss": 0.36764365434646606, "step": 5451, "token_acc": 0.8702337023370234 }, { "epoch": 0.29417795284087844, "grad_norm": 0.3139270842075348, "learning_rate": 1.6557607593902834e-05, "loss": 0.33887168765068054, "step": 5452, "token_acc": 0.8833067517278044 }, { "epoch": 0.29423191064587495, "grad_norm": 0.32168611884117126, "learning_rate": 1.655628813887466e-05, "loss": 0.38572072982788086, "step": 5453, "token_acc": 0.8687534935718279 }, { "epoch": 0.2942858684508714, "grad_norm": 0.40334758162498474, "learning_rate": 1.6554968483619166e-05, "loss": 0.3781426250934601, "step": 5454, "token_acc": 0.8676750380517504 }, { "epoch": 0.2943398262558679, "grad_norm": 0.38586127758026123, "learning_rate": 1.655364862817665e-05, "loss": 0.36116814613342285, "step": 5455, "token_acc": 0.8738999080520163 }, { "epoch": 0.2943937840608644, "grad_norm": 0.3766021132469177, "learning_rate": 1.6552328572587422e-05, "loss": 0.4373242259025574, "step": 5456, "token_acc": 0.8562687462507499 }, { "epoch": 0.2944477418658609, "grad_norm": 0.5030026435852051, "learning_rate": 1.65510083168918e-05, "loss": 0.3907957673072815, "step": 5457, "token_acc": 0.8616364338093401 }, { "epoch": 0.2945016996708574, "grad_norm": 0.3278408646583557, "learning_rate": 1.6549687861130092e-05, "loss": 0.31318098306655884, "step": 5458, "token_acc": 0.8866708255354544 }, { "epoch": 0.2945556574758539, "grad_norm": 0.4728720486164093, "learning_rate": 1.654836720534264e-05, "loss": 0.394046813249588, "step": 5459, "token_acc": 0.8657460682777138 }, { "epoch": 0.2946096152808504, "grad_norm": 0.3588753938674927, "learning_rate": 1.6547046349569767e-05, "loss": 0.4106403887271881, "step": 5460, "token_acc": 0.8596469872185027 }, { "epoch": 0.29466357308584684, "grad_norm": 0.39691162109375, "learning_rate": 1.654572529385181e-05, "loss": 0.38964325189590454, "step": 5461, "token_acc": 0.8669397463002114 }, { "epoch": 0.29471753089084335, "grad_norm": 0.42226579785346985, "learning_rate": 1.654440403822912e-05, "loss": 0.3576767146587372, "step": 5462, "token_acc": 0.8742104313300848 }, { "epoch": 0.29477148869583986, "grad_norm": 0.4540005326271057, "learning_rate": 1.6543082582742047e-05, "loss": 0.4089711308479309, "step": 5463, "token_acc": 0.8608560041587247 }, { "epoch": 0.29482544650083636, "grad_norm": 0.3621355891227722, "learning_rate": 1.654176092743094e-05, "loss": 0.39193594455718994, "step": 5464, "token_acc": 0.8603610323735641 }, { "epoch": 0.2948794043058328, "grad_norm": 0.39690297842025757, "learning_rate": 1.6540439072336175e-05, "loss": 0.4246825575828552, "step": 5465, "token_acc": 0.8608174214928199 }, { "epoch": 0.2949333621108293, "grad_norm": 0.430197536945343, "learning_rate": 1.653911701749811e-05, "loss": 0.3837983310222626, "step": 5466, "token_acc": 0.8652894606630381 }, { "epoch": 0.29498731991582583, "grad_norm": 0.49561572074890137, "learning_rate": 1.6537794762957127e-05, "loss": 0.3796612322330475, "step": 5467, "token_acc": 0.8709480122324159 }, { "epoch": 0.29504127772082234, "grad_norm": 0.42193418741226196, "learning_rate": 1.6536472308753603e-05, "loss": 0.39900651574134827, "step": 5468, "token_acc": 0.8641899182981347 }, { "epoch": 0.2950952355258188, "grad_norm": 0.4120798408985138, "learning_rate": 1.6535149654927926e-05, "loss": 0.40862739086151123, "step": 5469, "token_acc": 0.8609728688393 }, { "epoch": 0.2951491933308153, "grad_norm": 0.4709225296974182, "learning_rate": 1.653382680152049e-05, "loss": 0.39960312843322754, "step": 5470, "token_acc": 0.8663686040735221 }, { "epoch": 0.2952031511358118, "grad_norm": 0.3136325478553772, "learning_rate": 1.65325037485717e-05, "loss": 0.4313264489173889, "step": 5471, "token_acc": 0.8537776812852801 }, { "epoch": 0.2952571089408083, "grad_norm": 0.3955362141132355, "learning_rate": 1.653118049612195e-05, "loss": 0.3937825560569763, "step": 5472, "token_acc": 0.8648415340979159 }, { "epoch": 0.29531106674580476, "grad_norm": 0.427682489156723, "learning_rate": 1.6529857044211662e-05, "loss": 0.44011908769607544, "step": 5473, "token_acc": 0.8487406983400114 }, { "epoch": 0.29536502455080127, "grad_norm": 0.29076042771339417, "learning_rate": 1.6528533392881253e-05, "loss": 0.39639028906822205, "step": 5474, "token_acc": 0.8612538540596094 }, { "epoch": 0.2954189823557978, "grad_norm": 0.3339579999446869, "learning_rate": 1.6527209542171137e-05, "loss": 0.3941090703010559, "step": 5475, "token_acc": 0.8617962941107046 }, { "epoch": 0.2954729401607943, "grad_norm": 0.44590386748313904, "learning_rate": 1.652588549212176e-05, "loss": 0.33840250968933105, "step": 5476, "token_acc": 0.879524886877828 }, { "epoch": 0.29552689796579074, "grad_norm": 0.3711310625076294, "learning_rate": 1.6524561242773545e-05, "loss": 0.3605872392654419, "step": 5477, "token_acc": 0.8747772365450873 }, { "epoch": 0.29558085577078724, "grad_norm": 0.39730894565582275, "learning_rate": 1.652323679416694e-05, "loss": 0.46773412823677063, "step": 5478, "token_acc": 0.8454285714285714 }, { "epoch": 0.29563481357578375, "grad_norm": 0.5317535400390625, "learning_rate": 1.6521912146342392e-05, "loss": 0.4552110433578491, "step": 5479, "token_acc": 0.8473611590203518 }, { "epoch": 0.29568877138078026, "grad_norm": 0.3305690884590149, "learning_rate": 1.6520587299340356e-05, "loss": 0.42274925112724304, "step": 5480, "token_acc": 0.8568273831431726 }, { "epoch": 0.2957427291857767, "grad_norm": 0.46177059412002563, "learning_rate": 1.651926225320129e-05, "loss": 0.38120460510253906, "step": 5481, "token_acc": 0.8640893989731199 }, { "epoch": 0.2957966869907732, "grad_norm": 0.38678181171417236, "learning_rate": 1.6517937007965665e-05, "loss": 0.35956478118896484, "step": 5482, "token_acc": 0.8711247045738912 }, { "epoch": 0.2958506447957697, "grad_norm": 0.48682379722595215, "learning_rate": 1.651661156367395e-05, "loss": 0.43268883228302, "step": 5483, "token_acc": 0.8577330946150667 }, { "epoch": 0.2959046026007662, "grad_norm": 0.33015838265419006, "learning_rate": 1.6515285920366627e-05, "loss": 0.3115159273147583, "step": 5484, "token_acc": 0.8885990086094443 }, { "epoch": 0.2959585604057627, "grad_norm": 0.39929482340812683, "learning_rate": 1.6513960078084178e-05, "loss": 0.3870365023612976, "step": 5485, "token_acc": 0.8662276575729069 }, { "epoch": 0.2960125182107592, "grad_norm": 0.4809959828853607, "learning_rate": 1.6512634036867094e-05, "loss": 0.41257646679878235, "step": 5486, "token_acc": 0.8547008547008547 }, { "epoch": 0.2960664760157557, "grad_norm": 0.46523499488830566, "learning_rate": 1.6511307796755868e-05, "loss": 0.45923906564712524, "step": 5487, "token_acc": 0.8419098143236075 }, { "epoch": 0.29612043382075215, "grad_norm": 0.37759852409362793, "learning_rate": 1.6509981357791014e-05, "loss": 0.39056307077407837, "step": 5488, "token_acc": 0.8676308539944904 }, { "epoch": 0.29617439162574866, "grad_norm": 0.43715164065361023, "learning_rate": 1.6508654720013033e-05, "loss": 0.41662806272506714, "step": 5489, "token_acc": 0.8538541127780652 }, { "epoch": 0.29622834943074516, "grad_norm": 0.47316089272499084, "learning_rate": 1.650732788346244e-05, "loss": 0.44607698917388916, "step": 5490, "token_acc": 0.8492778249787596 }, { "epoch": 0.29628230723574167, "grad_norm": 0.449371337890625, "learning_rate": 1.6506000848179763e-05, "loss": 0.42920035123825073, "step": 5491, "token_acc": 0.8542958954731424 }, { "epoch": 0.2963362650407381, "grad_norm": 0.45210668444633484, "learning_rate": 1.650467361420552e-05, "loss": 0.40057018399238586, "step": 5492, "token_acc": 0.8620643069440692 }, { "epoch": 0.29639022284573463, "grad_norm": 0.39946335554122925, "learning_rate": 1.650334618158025e-05, "loss": 0.3754512369632721, "step": 5493, "token_acc": 0.8751637411579775 }, { "epoch": 0.29644418065073114, "grad_norm": 0.4158490002155304, "learning_rate": 1.650201855034449e-05, "loss": 0.4229496717453003, "step": 5494, "token_acc": 0.858612503297283 }, { "epoch": 0.29649813845572764, "grad_norm": 0.32763537764549255, "learning_rate": 1.6500690720538787e-05, "loss": 0.38620665669441223, "step": 5495, "token_acc": 0.8688488816148391 }, { "epoch": 0.2965520962607241, "grad_norm": 0.3981391489505768, "learning_rate": 1.6499362692203696e-05, "loss": 0.3711049258708954, "step": 5496, "token_acc": 0.8662274805055122 }, { "epoch": 0.2966060540657206, "grad_norm": 0.4144884943962097, "learning_rate": 1.6498034465379766e-05, "loss": 0.38156044483184814, "step": 5497, "token_acc": 0.8697792362768496 }, { "epoch": 0.2966600118707171, "grad_norm": 0.34676870703697205, "learning_rate": 1.649670604010757e-05, "loss": 0.37638869881629944, "step": 5498, "token_acc": 0.8707126436781609 }, { "epoch": 0.2967139696757136, "grad_norm": 0.40291300415992737, "learning_rate": 1.649537741642767e-05, "loss": 0.4165424704551697, "step": 5499, "token_acc": 0.8569484936831876 }, { "epoch": 0.29676792748071007, "grad_norm": 0.4432699382305145, "learning_rate": 1.6494048594380644e-05, "loss": 0.406392902135849, "step": 5500, "token_acc": 0.8605583392984968 }, { "epoch": 0.2968218852857066, "grad_norm": 0.40079939365386963, "learning_rate": 1.649271957400708e-05, "loss": 0.39770832657814026, "step": 5501, "token_acc": 0.8633279881583816 }, { "epoch": 0.2968758430907031, "grad_norm": 0.4079775810241699, "learning_rate": 1.6491390355347562e-05, "loss": 0.37071824073791504, "step": 5502, "token_acc": 0.8730212573496156 }, { "epoch": 0.29692980089569954, "grad_norm": 0.388156920671463, "learning_rate": 1.6490060938442677e-05, "loss": 0.3959397077560425, "step": 5503, "token_acc": 0.864385866517106 }, { "epoch": 0.29698375870069604, "grad_norm": 0.3380526900291443, "learning_rate": 1.6488731323333033e-05, "loss": 0.35760900378227234, "step": 5504, "token_acc": 0.8720777515103756 }, { "epoch": 0.29703771650569255, "grad_norm": 0.42837536334991455, "learning_rate": 1.6487401510059236e-05, "loss": 0.393499493598938, "step": 5505, "token_acc": 0.8683269124931206 }, { "epoch": 0.29709167431068906, "grad_norm": 0.3628774583339691, "learning_rate": 1.6486071498661895e-05, "loss": 0.4086167514324188, "step": 5506, "token_acc": 0.8593524139924834 }, { "epoch": 0.2971456321156855, "grad_norm": 0.32075071334838867, "learning_rate": 1.6484741289181633e-05, "loss": 0.4378761649131775, "step": 5507, "token_acc": 0.8494503965493252 }, { "epoch": 0.297199589920682, "grad_norm": 0.30981215834617615, "learning_rate": 1.6483410881659064e-05, "loss": 0.40488824248313904, "step": 5508, "token_acc": 0.8619621723260701 }, { "epoch": 0.2972535477256785, "grad_norm": 0.3950309753417969, "learning_rate": 1.648208027613483e-05, "loss": 0.4269823431968689, "step": 5509, "token_acc": 0.8559340904030283 }, { "epoch": 0.29730750553067503, "grad_norm": 0.5019435286521912, "learning_rate": 1.6480749472649562e-05, "loss": 0.40811285376548767, "step": 5510, "token_acc": 0.8567354470726388 }, { "epoch": 0.2973614633356715, "grad_norm": 0.4391302466392517, "learning_rate": 1.6479418471243903e-05, "loss": 0.4237598180770874, "step": 5511, "token_acc": 0.858409421728199 }, { "epoch": 0.297415421140668, "grad_norm": 0.35858792066574097, "learning_rate": 1.6478087271958503e-05, "loss": 0.4106789231300354, "step": 5512, "token_acc": 0.852765435881722 }, { "epoch": 0.2974693789456645, "grad_norm": 0.46761927008628845, "learning_rate": 1.6476755874834013e-05, "loss": 0.43689531087875366, "step": 5513, "token_acc": 0.8526743398781313 }, { "epoch": 0.297523336750661, "grad_norm": 0.34017518162727356, "learning_rate": 1.647542427991109e-05, "loss": 0.32800018787384033, "step": 5514, "token_acc": 0.881768953068592 }, { "epoch": 0.29757729455565746, "grad_norm": 0.444522500038147, "learning_rate": 1.6474092487230414e-05, "loss": 0.433552086353302, "step": 5515, "token_acc": 0.8554015531308365 }, { "epoch": 0.29763125236065396, "grad_norm": 0.35654211044311523, "learning_rate": 1.6472760496832643e-05, "loss": 0.4236040413379669, "step": 5516, "token_acc": 0.8600881558367696 }, { "epoch": 0.29768521016565047, "grad_norm": 0.4341507852077484, "learning_rate": 1.6471428308758467e-05, "loss": 0.3851543068885803, "step": 5517, "token_acc": 0.8647342995169082 }, { "epoch": 0.297739167970647, "grad_norm": 0.32207754254341125, "learning_rate": 1.647009592304856e-05, "loss": 0.42667466402053833, "step": 5518, "token_acc": 0.8585493519441675 }, { "epoch": 0.29779312577564343, "grad_norm": 0.3994005620479584, "learning_rate": 1.6468763339743624e-05, "loss": 0.4069663882255554, "step": 5519, "token_acc": 0.8607928151130381 }, { "epoch": 0.29784708358063994, "grad_norm": 0.4609006941318512, "learning_rate": 1.646743055888435e-05, "loss": 0.37785327434539795, "step": 5520, "token_acc": 0.8668100358422939 }, { "epoch": 0.29790104138563644, "grad_norm": 0.5136622786521912, "learning_rate": 1.646609758051144e-05, "loss": 0.3847375214099884, "step": 5521, "token_acc": 0.8722413793103448 }, { "epoch": 0.29795499919063295, "grad_norm": 0.39825016260147095, "learning_rate": 1.64647644046656e-05, "loss": 0.38069891929626465, "step": 5522, "token_acc": 0.8710615608337373 }, { "epoch": 0.2980089569956294, "grad_norm": 0.3626258075237274, "learning_rate": 1.6463431031387555e-05, "loss": 0.3968532085418701, "step": 5523, "token_acc": 0.8664683782091421 }, { "epoch": 0.2980629148006259, "grad_norm": 0.4326876401901245, "learning_rate": 1.6462097460718013e-05, "loss": 0.32444095611572266, "step": 5524, "token_acc": 0.8826636785657116 }, { "epoch": 0.2981168726056224, "grad_norm": 0.36669766902923584, "learning_rate": 1.6460763692697712e-05, "loss": 0.3681143522262573, "step": 5525, "token_acc": 0.8690614136732329 }, { "epoch": 0.29817083041061887, "grad_norm": 0.4660140573978424, "learning_rate": 1.6459429727367378e-05, "loss": 0.4211469292640686, "step": 5526, "token_acc": 0.8519146531015968 }, { "epoch": 0.2982247882156154, "grad_norm": 0.39222171902656555, "learning_rate": 1.6458095564767753e-05, "loss": 0.4373503029346466, "step": 5527, "token_acc": 0.8561765179578031 }, { "epoch": 0.2982787460206119, "grad_norm": 0.27826809883117676, "learning_rate": 1.6456761204939582e-05, "loss": 0.36202675104141235, "step": 5528, "token_acc": 0.8707110241356817 }, { "epoch": 0.2983327038256084, "grad_norm": 0.36266598105430603, "learning_rate": 1.6455426647923612e-05, "loss": 0.4214307963848114, "step": 5529, "token_acc": 0.8593586005830903 }, { "epoch": 0.29838666163060484, "grad_norm": 0.43969449400901794, "learning_rate": 1.6454091893760605e-05, "loss": 0.42383837699890137, "step": 5530, "token_acc": 0.8535589965190253 }, { "epoch": 0.29844061943560135, "grad_norm": 0.42391490936279297, "learning_rate": 1.6452756942491327e-05, "loss": 0.4263615608215332, "step": 5531, "token_acc": 0.8451028553883942 }, { "epoch": 0.29849457724059786, "grad_norm": 0.34688055515289307, "learning_rate": 1.6451421794156537e-05, "loss": 0.3687085211277008, "step": 5532, "token_acc": 0.8714808880750744 }, { "epoch": 0.29854853504559437, "grad_norm": 0.5224451422691345, "learning_rate": 1.6450086448797014e-05, "loss": 0.39859241247177124, "step": 5533, "token_acc": 0.8624020411882631 }, { "epoch": 0.2986024928505908, "grad_norm": 0.3947922885417938, "learning_rate": 1.644875090645355e-05, "loss": 0.39559873938560486, "step": 5534, "token_acc": 0.8587672052663076 }, { "epoch": 0.2986564506555873, "grad_norm": 0.34774723649024963, "learning_rate": 1.644741516716691e-05, "loss": 0.4020196795463562, "step": 5535, "token_acc": 0.8634769539078156 }, { "epoch": 0.29871040846058383, "grad_norm": 0.4676208198070526, "learning_rate": 1.644607923097791e-05, "loss": 0.34963786602020264, "step": 5536, "token_acc": 0.8753969736596301 }, { "epoch": 0.29876436626558034, "grad_norm": 0.3887799084186554, "learning_rate": 1.6444743097927335e-05, "loss": 0.4031771123409271, "step": 5537, "token_acc": 0.8637395912187736 }, { "epoch": 0.2988183240705768, "grad_norm": 0.3270588219165802, "learning_rate": 1.6443406768055994e-05, "loss": 0.3774825930595398, "step": 5538, "token_acc": 0.8695046243506905 }, { "epoch": 0.2988722818755733, "grad_norm": 0.33518895506858826, "learning_rate": 1.6442070241404696e-05, "loss": 0.39359161257743835, "step": 5539, "token_acc": 0.8616341902734567 }, { "epoch": 0.2989262396805698, "grad_norm": 0.3745992183685303, "learning_rate": 1.6440733518014265e-05, "loss": 0.3898514211177826, "step": 5540, "token_acc": 0.8704216354344123 }, { "epoch": 0.2989801974855663, "grad_norm": 0.38109222054481506, "learning_rate": 1.6439396597925517e-05, "loss": 0.3477705717086792, "step": 5541, "token_acc": 0.8757468975026812 }, { "epoch": 0.29903415529056276, "grad_norm": 0.4023952782154083, "learning_rate": 1.6438059481179284e-05, "loss": 0.3943936228752136, "step": 5542, "token_acc": 0.8602130786186627 }, { "epoch": 0.29908811309555927, "grad_norm": 0.5204648375511169, "learning_rate": 1.64367221678164e-05, "loss": 0.44201546907424927, "step": 5543, "token_acc": 0.8511301636788776 }, { "epoch": 0.2991420709005558, "grad_norm": 0.3896077871322632, "learning_rate": 1.6435384657877708e-05, "loss": 0.371864914894104, "step": 5544, "token_acc": 0.867300131061599 }, { "epoch": 0.29919602870555223, "grad_norm": 0.501075029373169, "learning_rate": 1.6434046951404054e-05, "loss": 0.44851598143577576, "step": 5545, "token_acc": 0.8498663498663499 }, { "epoch": 0.29924998651054874, "grad_norm": 0.4258939325809479, "learning_rate": 1.6432709048436292e-05, "loss": 0.3896586298942566, "step": 5546, "token_acc": 0.863352142748208 }, { "epoch": 0.29930394431554525, "grad_norm": 0.38984936475753784, "learning_rate": 1.643137094901528e-05, "loss": 0.3600814938545227, "step": 5547, "token_acc": 0.8754021643755484 }, { "epoch": 0.29935790212054175, "grad_norm": 0.38111257553100586, "learning_rate": 1.6430032653181886e-05, "loss": 0.3734254837036133, "step": 5548, "token_acc": 0.8744058764222958 }, { "epoch": 0.2994118599255382, "grad_norm": 0.3956143260002136, "learning_rate": 1.642869416097698e-05, "loss": 0.39346563816070557, "step": 5549, "token_acc": 0.8662873399715505 }, { "epoch": 0.2994658177305347, "grad_norm": 0.45016157627105713, "learning_rate": 1.6427355472441434e-05, "loss": 0.4370276927947998, "step": 5550, "token_acc": 0.8501167921100441 }, { "epoch": 0.2995197755355312, "grad_norm": 0.3678445518016815, "learning_rate": 1.642601658761614e-05, "loss": 0.3891103267669678, "step": 5551, "token_acc": 0.8672694978201195 }, { "epoch": 0.2995737333405277, "grad_norm": 0.5073769092559814, "learning_rate": 1.642467750654198e-05, "loss": 0.4551635980606079, "step": 5552, "token_acc": 0.8437654830718414 }, { "epoch": 0.2996276911455242, "grad_norm": 0.35476061701774597, "learning_rate": 1.6423338229259855e-05, "loss": 0.3478584885597229, "step": 5553, "token_acc": 0.8762886597938144 }, { "epoch": 0.2996816489505207, "grad_norm": 0.39230847358703613, "learning_rate": 1.642199875581066e-05, "loss": 0.3118937611579895, "step": 5554, "token_acc": 0.8930838079739626 }, { "epoch": 0.2997356067555172, "grad_norm": 0.44938766956329346, "learning_rate": 1.6420659086235308e-05, "loss": 0.38740164041519165, "step": 5555, "token_acc": 0.8665186624115443 }, { "epoch": 0.2997895645605137, "grad_norm": 0.48617634177207947, "learning_rate": 1.6419319220574707e-05, "loss": 0.46734514832496643, "step": 5556, "token_acc": 0.8434250764525993 }, { "epoch": 0.29984352236551015, "grad_norm": 0.34754514694213867, "learning_rate": 1.6417979158869778e-05, "loss": 0.3830123841762543, "step": 5557, "token_acc": 0.8620234797520117 }, { "epoch": 0.29989748017050666, "grad_norm": 0.48964473605155945, "learning_rate": 1.6416638901161448e-05, "loss": 0.42446720600128174, "step": 5558, "token_acc": 0.8548748599178184 }, { "epoch": 0.29995143797550317, "grad_norm": 0.35574281215667725, "learning_rate": 1.641529844749065e-05, "loss": 0.4172866940498352, "step": 5559, "token_acc": 0.8536443148688047 }, { "epoch": 0.3000053957804997, "grad_norm": 0.3630579113960266, "learning_rate": 1.6413957797898316e-05, "loss": 0.4155225157737732, "step": 5560, "token_acc": 0.8565523720056365 }, { "epoch": 0.3000593535854961, "grad_norm": 0.4532102942466736, "learning_rate": 1.6412616952425387e-05, "loss": 0.3859401345252991, "step": 5561, "token_acc": 0.8623154623154623 }, { "epoch": 0.30011331139049263, "grad_norm": 0.35761746764183044, "learning_rate": 1.6411275911112824e-05, "loss": 0.38501739501953125, "step": 5562, "token_acc": 0.8690069379724552 }, { "epoch": 0.30016726919548914, "grad_norm": 0.344585657119751, "learning_rate": 1.6409934674001568e-05, "loss": 0.4032474160194397, "step": 5563, "token_acc": 0.8597110754414126 }, { "epoch": 0.30022122700048565, "grad_norm": 0.4000619947910309, "learning_rate": 1.6408593241132585e-05, "loss": 0.33336204290390015, "step": 5564, "token_acc": 0.8787354513998112 }, { "epoch": 0.3002751848054821, "grad_norm": 0.4551219642162323, "learning_rate": 1.640725161254685e-05, "loss": 0.399399995803833, "step": 5565, "token_acc": 0.8631994733377222 }, { "epoch": 0.3003291426104786, "grad_norm": 0.36173856258392334, "learning_rate": 1.6405909788285323e-05, "loss": 0.4011915326118469, "step": 5566, "token_acc": 0.8651067638409411 }, { "epoch": 0.3003831004154751, "grad_norm": 0.3307805061340332, "learning_rate": 1.6404567768388993e-05, "loss": 0.3840739130973816, "step": 5567, "token_acc": 0.8678320899677097 }, { "epoch": 0.30043705822047156, "grad_norm": 0.3758959174156189, "learning_rate": 1.640322555289884e-05, "loss": 0.398189902305603, "step": 5568, "token_acc": 0.8610354223433242 }, { "epoch": 0.30049101602546807, "grad_norm": 0.36225059628486633, "learning_rate": 1.6401883141855853e-05, "loss": 0.4019903540611267, "step": 5569, "token_acc": 0.8565926597190757 }, { "epoch": 0.3005449738304646, "grad_norm": 0.36931219696998596, "learning_rate": 1.6400540535301037e-05, "loss": 0.37398332357406616, "step": 5570, "token_acc": 0.8723404255319149 }, { "epoch": 0.3005989316354611, "grad_norm": 0.3330010771751404, "learning_rate": 1.6399197733275385e-05, "loss": 0.4029136598110199, "step": 5571, "token_acc": 0.8620538646814101 }, { "epoch": 0.30065288944045754, "grad_norm": 0.47307708859443665, "learning_rate": 1.6397854735819915e-05, "loss": 0.4427487850189209, "step": 5572, "token_acc": 0.8476997578692494 }, { "epoch": 0.30070684724545405, "grad_norm": 0.3919290602207184, "learning_rate": 1.6396511542975636e-05, "loss": 0.4451811909675598, "step": 5573, "token_acc": 0.8498626373626373 }, { "epoch": 0.30076080505045055, "grad_norm": 0.4470428228378296, "learning_rate": 1.639516815478357e-05, "loss": 0.35257887840270996, "step": 5574, "token_acc": 0.8754515243461928 }, { "epoch": 0.30081476285544706, "grad_norm": 0.40844210982322693, "learning_rate": 1.6393824571284744e-05, "loss": 0.3860529065132141, "step": 5575, "token_acc": 0.8638097660223805 }, { "epoch": 0.3008687206604435, "grad_norm": 0.3326355516910553, "learning_rate": 1.6392480792520188e-05, "loss": 0.47523432970046997, "step": 5576, "token_acc": 0.8395817109800868 }, { "epoch": 0.30092267846544, "grad_norm": 0.39941707253456116, "learning_rate": 1.6391136818530947e-05, "loss": 0.4071929454803467, "step": 5577, "token_acc": 0.8644815631894106 }, { "epoch": 0.3009766362704365, "grad_norm": 0.4600086212158203, "learning_rate": 1.6389792649358058e-05, "loss": 0.4590502083301544, "step": 5578, "token_acc": 0.8457774049217002 }, { "epoch": 0.30103059407543303, "grad_norm": 0.45697665214538574, "learning_rate": 1.6388448285042577e-05, "loss": 0.37077596783638, "step": 5579, "token_acc": 0.8715953307392996 }, { "epoch": 0.3010845518804295, "grad_norm": 0.5042998194694519, "learning_rate": 1.638710372562556e-05, "loss": 0.4206366539001465, "step": 5580, "token_acc": 0.8559015206372194 }, { "epoch": 0.301138509685426, "grad_norm": 0.4132435619831085, "learning_rate": 1.638575897114807e-05, "loss": 0.43132439255714417, "step": 5581, "token_acc": 0.8560691469372417 }, { "epoch": 0.3011924674904225, "grad_norm": 0.4048815965652466, "learning_rate": 1.638441402165117e-05, "loss": 0.4086911082267761, "step": 5582, "token_acc": 0.857653791130186 }, { "epoch": 0.301246425295419, "grad_norm": 0.44802311062812805, "learning_rate": 1.6383068877175938e-05, "loss": 0.38952404260635376, "step": 5583, "token_acc": 0.8620146152743946 }, { "epoch": 0.30130038310041546, "grad_norm": 0.4899073839187622, "learning_rate": 1.6381723537763457e-05, "loss": 0.4053846597671509, "step": 5584, "token_acc": 0.8648856133886074 }, { "epoch": 0.30135434090541197, "grad_norm": 0.48028868436813354, "learning_rate": 1.638037800345481e-05, "loss": 0.43364620208740234, "step": 5585, "token_acc": 0.8480069324090121 }, { "epoch": 0.3014082987104085, "grad_norm": 0.288546085357666, "learning_rate": 1.6379032274291092e-05, "loss": 0.35409441590309143, "step": 5586, "token_acc": 0.8742270591145189 }, { "epoch": 0.301462256515405, "grad_norm": 0.4378550052642822, "learning_rate": 1.6377686350313395e-05, "loss": 0.3911554217338562, "step": 5587, "token_acc": 0.8667965238237938 }, { "epoch": 0.30151621432040143, "grad_norm": 0.3467414379119873, "learning_rate": 1.6376340231562833e-05, "loss": 0.3513762354850769, "step": 5588, "token_acc": 0.8747085531967113 }, { "epoch": 0.30157017212539794, "grad_norm": 0.3944709300994873, "learning_rate": 1.637499391808051e-05, "loss": 0.3305153548717499, "step": 5589, "token_acc": 0.8768115942028986 }, { "epoch": 0.30162412993039445, "grad_norm": 0.40211784839630127, "learning_rate": 1.637364740990754e-05, "loss": 0.4232887923717499, "step": 5590, "token_acc": 0.8550280074686583 }, { "epoch": 0.3016780877353909, "grad_norm": 0.48906421661376953, "learning_rate": 1.6372300707085048e-05, "loss": 0.3469938039779663, "step": 5591, "token_acc": 0.8737001323501608 }, { "epoch": 0.3017320455403874, "grad_norm": 0.4001055657863617, "learning_rate": 1.6370953809654164e-05, "loss": 0.3940907418727875, "step": 5592, "token_acc": 0.864204236827811 }, { "epoch": 0.3017860033453839, "grad_norm": 0.35095348954200745, "learning_rate": 1.6369606717656016e-05, "loss": 0.36415714025497437, "step": 5593, "token_acc": 0.8751143043762247 }, { "epoch": 0.3018399611503804, "grad_norm": 0.38329923152923584, "learning_rate": 1.636825943113175e-05, "loss": 0.3779756724834442, "step": 5594, "token_acc": 0.8732292045041773 }, { "epoch": 0.30189391895537687, "grad_norm": 0.4036763310432434, "learning_rate": 1.636691195012251e-05, "loss": 0.3182257413864136, "step": 5595, "token_acc": 0.8853503184713376 }, { "epoch": 0.3019478767603734, "grad_norm": 0.4666918218135834, "learning_rate": 1.6365564274669443e-05, "loss": 0.42752784490585327, "step": 5596, "token_acc": 0.8545484838387204 }, { "epoch": 0.3020018345653699, "grad_norm": 0.5171011090278625, "learning_rate": 1.6364216404813714e-05, "loss": 0.4399079382419586, "step": 5597, "token_acc": 0.8505406348099058 }, { "epoch": 0.3020557923703664, "grad_norm": 0.38270217180252075, "learning_rate": 1.6362868340596483e-05, "loss": 0.3794052004814148, "step": 5598, "token_acc": 0.8684292058246411 }, { "epoch": 0.30210975017536285, "grad_norm": 0.31597474217414856, "learning_rate": 1.6361520082058923e-05, "loss": 0.3612842261791229, "step": 5599, "token_acc": 0.8702756464904803 }, { "epoch": 0.30216370798035935, "grad_norm": 0.3492033779621124, "learning_rate": 1.63601716292422e-05, "loss": 0.38296210765838623, "step": 5600, "token_acc": 0.86608442503639 }, { "epoch": 0.30221766578535586, "grad_norm": 0.38242366909980774, "learning_rate": 1.6358822982187503e-05, "loss": 0.43269771337509155, "step": 5601, "token_acc": 0.8532920394585914 }, { "epoch": 0.30227162359035237, "grad_norm": 0.4078570306301117, "learning_rate": 1.635747414093602e-05, "loss": 0.3479320704936981, "step": 5602, "token_acc": 0.8795162040626454 }, { "epoch": 0.3023255813953488, "grad_norm": 0.3980373442173004, "learning_rate": 1.635612510552894e-05, "loss": 0.4091051518917084, "step": 5603, "token_acc": 0.8608400509121765 }, { "epoch": 0.3023795392003453, "grad_norm": 0.4359710216522217, "learning_rate": 1.6354775876007467e-05, "loss": 0.39566919207572937, "step": 5604, "token_acc": 0.8640208197787899 }, { "epoch": 0.30243349700534183, "grad_norm": 0.4005633592605591, "learning_rate": 1.6353426452412802e-05, "loss": 0.40019142627716064, "step": 5605, "token_acc": 0.8625168484254381 }, { "epoch": 0.30248745481033834, "grad_norm": 0.4569682776927948, "learning_rate": 1.635207683478616e-05, "loss": 0.4308709502220154, "step": 5606, "token_acc": 0.8530469857189578 }, { "epoch": 0.3025414126153348, "grad_norm": 0.4318772852420807, "learning_rate": 1.635072702316875e-05, "loss": 0.4373849630355835, "step": 5607, "token_acc": 0.8461651483984719 }, { "epoch": 0.3025953704203313, "grad_norm": 0.4217773973941803, "learning_rate": 1.6349377017601806e-05, "loss": 0.41265416145324707, "step": 5608, "token_acc": 0.8572053675612602 }, { "epoch": 0.3026493282253278, "grad_norm": 0.40981537103652954, "learning_rate": 1.6348026818126546e-05, "loss": 0.36320483684539795, "step": 5609, "token_acc": 0.8714191419141915 }, { "epoch": 0.30270328603032426, "grad_norm": 0.31193938851356506, "learning_rate": 1.6346676424784212e-05, "loss": 0.32412946224212646, "step": 5610, "token_acc": 0.8853286064903191 }, { "epoch": 0.30275724383532077, "grad_norm": 0.36631518602371216, "learning_rate": 1.6345325837616044e-05, "loss": 0.4572770595550537, "step": 5611, "token_acc": 0.849195710455764 }, { "epoch": 0.3028112016403173, "grad_norm": 0.4113641679286957, "learning_rate": 1.6343975056663285e-05, "loss": 0.42805540561676025, "step": 5612, "token_acc": 0.8504913076341648 }, { "epoch": 0.3028651594453138, "grad_norm": 0.48631715774536133, "learning_rate": 1.6342624081967193e-05, "loss": 0.35082608461380005, "step": 5613, "token_acc": 0.8789819652789482 }, { "epoch": 0.30291911725031023, "grad_norm": 0.3189461827278137, "learning_rate": 1.634127291356902e-05, "loss": 0.40579527616500854, "step": 5614, "token_acc": 0.858745537990821 }, { "epoch": 0.30297307505530674, "grad_norm": 0.422967791557312, "learning_rate": 1.6339921551510033e-05, "loss": 0.40217146277427673, "step": 5615, "token_acc": 0.8637387387387387 }, { "epoch": 0.30302703286030325, "grad_norm": 0.4161340892314911, "learning_rate": 1.6338569995831505e-05, "loss": 0.4056856632232666, "step": 5616, "token_acc": 0.8594240837696335 }, { "epoch": 0.30308099066529975, "grad_norm": 0.27259308099746704, "learning_rate": 1.633721824657471e-05, "loss": 0.3623068332672119, "step": 5617, "token_acc": 0.8732489073181665 }, { "epoch": 0.3031349484702962, "grad_norm": 0.4135127067565918, "learning_rate": 1.6335866303780927e-05, "loss": 0.3790771961212158, "step": 5618, "token_acc": 0.8674142480211082 }, { "epoch": 0.3031889062752927, "grad_norm": 0.4941443204879761, "learning_rate": 1.6334514167491447e-05, "loss": 0.4592931866645813, "step": 5619, "token_acc": 0.8451795559529132 }, { "epoch": 0.3032428640802892, "grad_norm": 0.4192775785923004, "learning_rate": 1.6333161837747567e-05, "loss": 0.37000659108161926, "step": 5620, "token_acc": 0.8715151515151515 }, { "epoch": 0.30329682188528573, "grad_norm": 0.3887791037559509, "learning_rate": 1.6331809314590584e-05, "loss": 0.3590444326400757, "step": 5621, "token_acc": 0.8726510552182711 }, { "epoch": 0.3033507796902822, "grad_norm": 0.3469613492488861, "learning_rate": 1.63304565980618e-05, "loss": 0.35695797204971313, "step": 5622, "token_acc": 0.8791239257000277 }, { "epoch": 0.3034047374952787, "grad_norm": 0.3269638121128082, "learning_rate": 1.632910368820253e-05, "loss": 0.41256609559059143, "step": 5623, "token_acc": 0.8619140625 }, { "epoch": 0.3034586953002752, "grad_norm": 0.3439342677593231, "learning_rate": 1.6327750585054093e-05, "loss": 0.3313395380973816, "step": 5624, "token_acc": 0.8830052329927734 }, { "epoch": 0.3035126531052717, "grad_norm": 0.42066293954849243, "learning_rate": 1.632639728865781e-05, "loss": 0.4539617598056793, "step": 5625, "token_acc": 0.8453620879794143 }, { "epoch": 0.30356661091026815, "grad_norm": 0.42357727885246277, "learning_rate": 1.632504379905501e-05, "loss": 0.4105016589164734, "step": 5626, "token_acc": 0.8570989078603292 }, { "epoch": 0.30362056871526466, "grad_norm": 0.4162557125091553, "learning_rate": 1.6323690116287033e-05, "loss": 0.40336573123931885, "step": 5627, "token_acc": 0.8614855176369371 }, { "epoch": 0.30367452652026117, "grad_norm": 0.4129386842250824, "learning_rate": 1.6322336240395214e-05, "loss": 0.38628512620925903, "step": 5628, "token_acc": 0.8672616517920385 }, { "epoch": 0.3037284843252577, "grad_norm": 0.3923902213573456, "learning_rate": 1.63209821714209e-05, "loss": 0.4413028657436371, "step": 5629, "token_acc": 0.8526725922318276 }, { "epoch": 0.3037824421302541, "grad_norm": 0.43173009157180786, "learning_rate": 1.6319627909405447e-05, "loss": 0.39805686473846436, "step": 5630, "token_acc": 0.8622620380739082 }, { "epoch": 0.30383639993525063, "grad_norm": 0.3506447374820709, "learning_rate": 1.631827345439022e-05, "loss": 0.3669009804725647, "step": 5631, "token_acc": 0.8710473727518514 }, { "epoch": 0.30389035774024714, "grad_norm": 0.433554083108902, "learning_rate": 1.6316918806416568e-05, "loss": 0.4269762635231018, "step": 5632, "token_acc": 0.855621301775148 }, { "epoch": 0.3039443155452436, "grad_norm": 0.4604679048061371, "learning_rate": 1.6315563965525876e-05, "loss": 0.4574703872203827, "step": 5633, "token_acc": 0.8469848215506632 }, { "epoch": 0.3039982733502401, "grad_norm": 0.35024207830429077, "learning_rate": 1.6314208931759514e-05, "loss": 0.2991713881492615, "step": 5634, "token_acc": 0.8924214897482481 }, { "epoch": 0.3040522311552366, "grad_norm": 0.41983500123023987, "learning_rate": 1.631285370515886e-05, "loss": 0.360800176858902, "step": 5635, "token_acc": 0.8777960526315789 }, { "epoch": 0.3041061889602331, "grad_norm": 0.3648024797439575, "learning_rate": 1.6311498285765314e-05, "loss": 0.36706721782684326, "step": 5636, "token_acc": 0.8772619984264359 }, { "epoch": 0.30416014676522957, "grad_norm": 0.3791193962097168, "learning_rate": 1.6310142673620264e-05, "loss": 0.38618624210357666, "step": 5637, "token_acc": 0.8650192639829946 }, { "epoch": 0.3042141045702261, "grad_norm": 0.406675785779953, "learning_rate": 1.6308786868765106e-05, "loss": 0.36450356245040894, "step": 5638, "token_acc": 0.8771494415883708 }, { "epoch": 0.3042680623752226, "grad_norm": 0.47960910201072693, "learning_rate": 1.6307430871241252e-05, "loss": 0.44707778096199036, "step": 5639, "token_acc": 0.8450873216904413 }, { "epoch": 0.3043220201802191, "grad_norm": 0.3792031407356262, "learning_rate": 1.630607468109011e-05, "loss": 0.39564499258995056, "step": 5640, "token_acc": 0.8610561558362502 }, { "epoch": 0.30437597798521554, "grad_norm": 0.42715924978256226, "learning_rate": 1.63047182983531e-05, "loss": 0.4405108094215393, "step": 5641, "token_acc": 0.849841381741276 }, { "epoch": 0.30442993579021205, "grad_norm": 0.43561458587646484, "learning_rate": 1.6303361723071644e-05, "loss": 0.4317087233066559, "step": 5642, "token_acc": 0.8574332565388264 }, { "epoch": 0.30448389359520855, "grad_norm": 0.4940134584903717, "learning_rate": 1.6302004955287175e-05, "loss": 0.368944376707077, "step": 5643, "token_acc": 0.8740359897172236 }, { "epoch": 0.30453785140020506, "grad_norm": 0.3737511932849884, "learning_rate": 1.6300647995041123e-05, "loss": 0.3913520574569702, "step": 5644, "token_acc": 0.865377074892171 }, { "epoch": 0.3045918092052015, "grad_norm": 0.48919183015823364, "learning_rate": 1.6299290842374934e-05, "loss": 0.3703456223011017, "step": 5645, "token_acc": 0.8763191219924018 }, { "epoch": 0.304645767010198, "grad_norm": 0.36990657448768616, "learning_rate": 1.629793349733005e-05, "loss": 0.4201892018318176, "step": 5646, "token_acc": 0.8562075943340078 }, { "epoch": 0.30469972481519453, "grad_norm": 0.45984429121017456, "learning_rate": 1.629657595994793e-05, "loss": 0.465869277715683, "step": 5647, "token_acc": 0.8483007209062822 }, { "epoch": 0.30475368262019104, "grad_norm": 0.38982900977134705, "learning_rate": 1.6295218230270025e-05, "loss": 0.41009020805358887, "step": 5648, "token_acc": 0.8585237258347979 }, { "epoch": 0.3048076404251875, "grad_norm": 0.5665732622146606, "learning_rate": 1.6293860308337808e-05, "loss": 0.42861542105674744, "step": 5649, "token_acc": 0.8511240632805995 }, { "epoch": 0.304861598230184, "grad_norm": 0.3662937879562378, "learning_rate": 1.6292502194192744e-05, "loss": 0.3865041136741638, "step": 5650, "token_acc": 0.8692036290322581 }, { "epoch": 0.3049155560351805, "grad_norm": 0.3481125831604004, "learning_rate": 1.6291143887876312e-05, "loss": 0.4372946619987488, "step": 5651, "token_acc": 0.8534059945504087 }, { "epoch": 0.304969513840177, "grad_norm": 0.3450036644935608, "learning_rate": 1.6289785389429994e-05, "loss": 0.4516725540161133, "step": 5652, "token_acc": 0.8512184047222643 }, { "epoch": 0.30502347164517346, "grad_norm": 0.4207155108451843, "learning_rate": 1.628842669889528e-05, "loss": 0.40630069375038147, "step": 5653, "token_acc": 0.8640426532852439 }, { "epoch": 0.30507742945016997, "grad_norm": 0.45260515809059143, "learning_rate": 1.6287067816313656e-05, "loss": 0.445000559091568, "step": 5654, "token_acc": 0.8496306718255364 }, { "epoch": 0.3051313872551665, "grad_norm": 0.32622525095939636, "learning_rate": 1.6285708741726637e-05, "loss": 0.4430311918258667, "step": 5655, "token_acc": 0.8490925386511315 }, { "epoch": 0.3051853450601629, "grad_norm": 0.4412849545478821, "learning_rate": 1.6284349475175712e-05, "loss": 0.4238581955432892, "step": 5656, "token_acc": 0.8554175880251743 }, { "epoch": 0.30523930286515943, "grad_norm": 0.33882176876068115, "learning_rate": 1.6282990016702405e-05, "loss": 0.4512135088443756, "step": 5657, "token_acc": 0.849373618275608 }, { "epoch": 0.30529326067015594, "grad_norm": 0.3692954480648041, "learning_rate": 1.6281630366348223e-05, "loss": 0.4225183129310608, "step": 5658, "token_acc": 0.8576212327917648 }, { "epoch": 0.30534721847515245, "grad_norm": 0.44571375846862793, "learning_rate": 1.6280270524154703e-05, "loss": 0.4036964774131775, "step": 5659, "token_acc": 0.8584118438761776 }, { "epoch": 0.3054011762801489, "grad_norm": 0.4208593964576721, "learning_rate": 1.627891049016336e-05, "loss": 0.4065132141113281, "step": 5660, "token_acc": 0.8564800494896381 }, { "epoch": 0.3054551340851454, "grad_norm": 0.4595111906528473, "learning_rate": 1.627755026441574e-05, "loss": 0.41700154542922974, "step": 5661, "token_acc": 0.8518225039619651 }, { "epoch": 0.3055090918901419, "grad_norm": 0.35927310585975647, "learning_rate": 1.6276189846953375e-05, "loss": 0.380462110042572, "step": 5662, "token_acc": 0.8640528159525734 }, { "epoch": 0.3055630496951384, "grad_norm": 0.45278728008270264, "learning_rate": 1.627482923781782e-05, "loss": 0.4090440273284912, "step": 5663, "token_acc": 0.862022854364211 }, { "epoch": 0.3056170075001349, "grad_norm": 0.31406521797180176, "learning_rate": 1.6273468437050622e-05, "loss": 0.4156675338745117, "step": 5664, "token_acc": 0.8536699392239364 }, { "epoch": 0.3056709653051314, "grad_norm": 0.40034446120262146, "learning_rate": 1.6272107444693343e-05, "loss": 0.38706856966018677, "step": 5665, "token_acc": 0.8669660398540556 }, { "epoch": 0.3057249231101279, "grad_norm": 0.4546594023704529, "learning_rate": 1.6270746260787542e-05, "loss": 0.45898258686065674, "step": 5666, "token_acc": 0.8498063226290141 }, { "epoch": 0.3057788809151244, "grad_norm": 0.5409457087516785, "learning_rate": 1.6269384885374795e-05, "loss": 0.3533461391925812, "step": 5667, "token_acc": 0.877260083449235 }, { "epoch": 0.30583283872012085, "grad_norm": 0.411670446395874, "learning_rate": 1.6268023318496674e-05, "loss": 0.4213545620441437, "step": 5668, "token_acc": 0.8556910569105691 }, { "epoch": 0.30588679652511735, "grad_norm": 0.5398237705230713, "learning_rate": 1.626666156019477e-05, "loss": 0.48391833901405334, "step": 5669, "token_acc": 0.845962526944122 }, { "epoch": 0.30594075433011386, "grad_norm": 0.40631356835365295, "learning_rate": 1.6265299610510658e-05, "loss": 0.37744390964508057, "step": 5670, "token_acc": 0.8658940397350994 }, { "epoch": 0.30599471213511037, "grad_norm": 0.3585869073867798, "learning_rate": 1.6263937469485934e-05, "loss": 0.4075256586074829, "step": 5671, "token_acc": 0.8561255631448917 }, { "epoch": 0.3060486699401068, "grad_norm": 0.3881710171699524, "learning_rate": 1.6262575137162208e-05, "loss": 0.3736981153488159, "step": 5672, "token_acc": 0.8680997420464316 }, { "epoch": 0.30610262774510333, "grad_norm": 0.527802050113678, "learning_rate": 1.626121261358107e-05, "loss": 0.40304791927337646, "step": 5673, "token_acc": 0.8649679584986268 }, { "epoch": 0.30615658555009984, "grad_norm": 0.30004310607910156, "learning_rate": 1.6259849898784143e-05, "loss": 0.3211895823478699, "step": 5674, "token_acc": 0.8820700709869476 }, { "epoch": 0.3062105433550963, "grad_norm": 0.3464709520339966, "learning_rate": 1.6258486992813042e-05, "loss": 0.3425508141517639, "step": 5675, "token_acc": 0.8802858844550328 }, { "epoch": 0.3062645011600928, "grad_norm": 0.4134211242198944, "learning_rate": 1.6257123895709386e-05, "loss": 0.41039741039276123, "step": 5676, "token_acc": 0.8627784872995601 }, { "epoch": 0.3063184589650893, "grad_norm": 0.4465876519680023, "learning_rate": 1.6255760607514807e-05, "loss": 0.42274338006973267, "step": 5677, "token_acc": 0.8504684572142411 }, { "epoch": 0.3063724167700858, "grad_norm": 0.4171488285064697, "learning_rate": 1.6254397128270936e-05, "loss": 0.3718856871128082, "step": 5678, "token_acc": 0.8711696440213847 }, { "epoch": 0.30642637457508226, "grad_norm": 0.3619720935821533, "learning_rate": 1.6253033458019417e-05, "loss": 0.36860769987106323, "step": 5679, "token_acc": 0.8743464936001443 }, { "epoch": 0.30648033238007877, "grad_norm": 0.38220298290252686, "learning_rate": 1.625166959680189e-05, "loss": 0.3923576772212982, "step": 5680, "token_acc": 0.8630478087649402 }, { "epoch": 0.3065342901850753, "grad_norm": 0.35407012701034546, "learning_rate": 1.6250305544660013e-05, "loss": 0.44577282667160034, "step": 5681, "token_acc": 0.8507133743185246 }, { "epoch": 0.3065882479900718, "grad_norm": 0.551214873790741, "learning_rate": 1.6248941301635444e-05, "loss": 0.44755277037620544, "step": 5682, "token_acc": 0.8537492253666598 }, { "epoch": 0.30664220579506823, "grad_norm": 0.45811590552330017, "learning_rate": 1.6247576867769843e-05, "loss": 0.3831702172756195, "step": 5683, "token_acc": 0.8661130536130536 }, { "epoch": 0.30669616360006474, "grad_norm": 0.41748884320259094, "learning_rate": 1.6246212243104883e-05, "loss": 0.41881343722343445, "step": 5684, "token_acc": 0.8530451866404715 }, { "epoch": 0.30675012140506125, "grad_norm": 0.3519338071346283, "learning_rate": 1.6244847427682235e-05, "loss": 0.3529439866542816, "step": 5685, "token_acc": 0.8791637950838502 }, { "epoch": 0.30680407921005776, "grad_norm": 0.3506169617176056, "learning_rate": 1.6243482421543583e-05, "loss": 0.3896005153656006, "step": 5686, "token_acc": 0.8652674339878131 }, { "epoch": 0.3068580370150542, "grad_norm": 0.3096030354499817, "learning_rate": 1.6242117224730612e-05, "loss": 0.4061174988746643, "step": 5687, "token_acc": 0.8639170453221484 }, { "epoch": 0.3069119948200507, "grad_norm": 0.4420887529850006, "learning_rate": 1.624075183728502e-05, "loss": 0.4336814284324646, "step": 5688, "token_acc": 0.8472711888309125 }, { "epoch": 0.3069659526250472, "grad_norm": 0.3482394516468048, "learning_rate": 1.6239386259248502e-05, "loss": 0.32237708568573, "step": 5689, "token_acc": 0.882598235765838 }, { "epoch": 0.30701991043004373, "grad_norm": 0.42078080773353577, "learning_rate": 1.6238020490662755e-05, "loss": 0.3546251654624939, "step": 5690, "token_acc": 0.878866684916507 }, { "epoch": 0.3070738682350402, "grad_norm": 0.4090077579021454, "learning_rate": 1.6236654531569503e-05, "loss": 0.3729665279388428, "step": 5691, "token_acc": 0.8700443166846329 }, { "epoch": 0.3071278260400367, "grad_norm": 0.4618332087993622, "learning_rate": 1.6235288382010454e-05, "loss": 0.42014503479003906, "step": 5692, "token_acc": 0.8566528512219522 }, { "epoch": 0.3071817838450332, "grad_norm": 0.4865798056125641, "learning_rate": 1.623392204202733e-05, "loss": 0.4080185294151306, "step": 5693, "token_acc": 0.8601102941176471 }, { "epoch": 0.3072357416500297, "grad_norm": 0.5049293637275696, "learning_rate": 1.6232555511661864e-05, "loss": 0.4527485966682434, "step": 5694, "token_acc": 0.853599898772618 }, { "epoch": 0.30728969945502616, "grad_norm": 0.45987218618392944, "learning_rate": 1.623118879095578e-05, "loss": 0.41314420104026794, "step": 5695, "token_acc": 0.8583191368540601 }, { "epoch": 0.30734365726002266, "grad_norm": 0.2762027382850647, "learning_rate": 1.6229821879950825e-05, "loss": 0.39212435483932495, "step": 5696, "token_acc": 0.8624028825582705 }, { "epoch": 0.30739761506501917, "grad_norm": 0.38176247477531433, "learning_rate": 1.6228454778688742e-05, "loss": 0.3752603828907013, "step": 5697, "token_acc": 0.8682170542635659 }, { "epoch": 0.3074515728700156, "grad_norm": 0.4361152648925781, "learning_rate": 1.622708748721128e-05, "loss": 0.43840301036834717, "step": 5698, "token_acc": 0.8517789340458445 }, { "epoch": 0.30750553067501213, "grad_norm": 0.5218179225921631, "learning_rate": 1.62257200055602e-05, "loss": 0.42451047897338867, "step": 5699, "token_acc": 0.8521935933147632 }, { "epoch": 0.30755948848000864, "grad_norm": 0.327580064535141, "learning_rate": 1.6224352333777262e-05, "loss": 0.3999124765396118, "step": 5700, "token_acc": 0.8618217599100365 }, { "epoch": 0.30761344628500514, "grad_norm": 0.39348989725112915, "learning_rate": 1.6222984471904234e-05, "loss": 0.37131085991859436, "step": 5701, "token_acc": 0.8709158216642524 }, { "epoch": 0.3076674040900016, "grad_norm": 0.4683910310268402, "learning_rate": 1.6221616419982887e-05, "loss": 0.3665432631969452, "step": 5702, "token_acc": 0.873585426442175 }, { "epoch": 0.3077213618949981, "grad_norm": 0.3957746624946594, "learning_rate": 1.6220248178055005e-05, "loss": 0.46885794401168823, "step": 5703, "token_acc": 0.8465666712536122 }, { "epoch": 0.3077753196999946, "grad_norm": 0.47857242822647095, "learning_rate": 1.6218879746162376e-05, "loss": 0.3807727098464966, "step": 5704, "token_acc": 0.8684524825115169 }, { "epoch": 0.3078292775049911, "grad_norm": 0.3167330026626587, "learning_rate": 1.6217511124346787e-05, "loss": 0.41780346632003784, "step": 5705, "token_acc": 0.8560684947356242 }, { "epoch": 0.30788323530998757, "grad_norm": 0.3274070918560028, "learning_rate": 1.621614231265004e-05, "loss": 0.3611924350261688, "step": 5706, "token_acc": 0.8765703924362129 }, { "epoch": 0.3079371931149841, "grad_norm": 0.42804667353630066, "learning_rate": 1.6214773311113933e-05, "loss": 0.4030691385269165, "step": 5707, "token_acc": 0.8587443946188341 }, { "epoch": 0.3079911509199806, "grad_norm": 0.3246958255767822, "learning_rate": 1.6213404119780275e-05, "loss": 0.39935174584388733, "step": 5708, "token_acc": 0.8621434078643022 }, { "epoch": 0.3080451087249771, "grad_norm": 0.4269948899745941, "learning_rate": 1.6212034738690884e-05, "loss": 0.4653962254524231, "step": 5709, "token_acc": 0.847194769651817 }, { "epoch": 0.30809906652997354, "grad_norm": 0.4051326513290405, "learning_rate": 1.621066516788758e-05, "loss": 0.3598596751689911, "step": 5710, "token_acc": 0.8734645552760101 }, { "epoch": 0.30815302433497005, "grad_norm": 0.5064133405685425, "learning_rate": 1.620929540741219e-05, "loss": 0.374674528837204, "step": 5711, "token_acc": 0.8725803020633908 }, { "epoch": 0.30820698213996656, "grad_norm": 0.3878020644187927, "learning_rate": 1.6207925457306545e-05, "loss": 0.3905409872531891, "step": 5712, "token_acc": 0.8617655023016517 }, { "epoch": 0.30826093994496306, "grad_norm": 0.42641007900238037, "learning_rate": 1.620655531761248e-05, "loss": 0.40168172121047974, "step": 5713, "token_acc": 0.8608981380065718 }, { "epoch": 0.3083148977499595, "grad_norm": 0.4752657115459442, "learning_rate": 1.620518498837184e-05, "loss": 0.4204166531562805, "step": 5714, "token_acc": 0.8541438481197517 }, { "epoch": 0.308368855554956, "grad_norm": 0.4340956509113312, "learning_rate": 1.6203814469626485e-05, "loss": 0.4067745804786682, "step": 5715, "token_acc": 0.8635464733025708 }, { "epoch": 0.30842281335995253, "grad_norm": 0.4124613404273987, "learning_rate": 1.6202443761418255e-05, "loss": 0.4218870997428894, "step": 5716, "token_acc": 0.8558380270969707 }, { "epoch": 0.30847677116494904, "grad_norm": 0.4761415421962738, "learning_rate": 1.6201072863789017e-05, "loss": 0.4317772090435028, "step": 5717, "token_acc": 0.8545014688094004 }, { "epoch": 0.3085307289699455, "grad_norm": 0.4228428304195404, "learning_rate": 1.6199701776780637e-05, "loss": 0.3979068994522095, "step": 5718, "token_acc": 0.8614929785661493 }, { "epoch": 0.308584686774942, "grad_norm": 0.3621617257595062, "learning_rate": 1.6198330500434993e-05, "loss": 0.36963123083114624, "step": 5719, "token_acc": 0.8678808904307603 }, { "epoch": 0.3086386445799385, "grad_norm": 0.3929027318954468, "learning_rate": 1.6196959034793954e-05, "loss": 0.38030874729156494, "step": 5720, "token_acc": 0.8668513927373487 }, { "epoch": 0.30869260238493496, "grad_norm": 0.3835499882698059, "learning_rate": 1.6195587379899412e-05, "loss": 0.3594425320625305, "step": 5721, "token_acc": 0.8800830593528292 }, { "epoch": 0.30874656018993146, "grad_norm": 0.1944892853498459, "learning_rate": 1.6194215535793255e-05, "loss": 0.3961394429206848, "step": 5722, "token_acc": 0.8634656022540502 }, { "epoch": 0.30880051799492797, "grad_norm": 0.37358686327934265, "learning_rate": 1.619284350251738e-05, "loss": 0.43675583600997925, "step": 5723, "token_acc": 0.8473957602133932 }, { "epoch": 0.3088544757999245, "grad_norm": 0.3645040690898895, "learning_rate": 1.6191471280113687e-05, "loss": 0.37932509183883667, "step": 5724, "token_acc": 0.8644246719951741 }, { "epoch": 0.30890843360492093, "grad_norm": 0.4019537568092346, "learning_rate": 1.619009886862408e-05, "loss": 0.39019903540611267, "step": 5725, "token_acc": 0.8632953181272509 }, { "epoch": 0.30896239140991744, "grad_norm": 0.4255508780479431, "learning_rate": 1.6188726268090476e-05, "loss": 0.3888998329639435, "step": 5726, "token_acc": 0.8647810925997675 }, { "epoch": 0.30901634921491394, "grad_norm": 0.3783467411994934, "learning_rate": 1.6187353478554792e-05, "loss": 0.3991661071777344, "step": 5727, "token_acc": 0.8618133686300463 }, { "epoch": 0.30907030701991045, "grad_norm": 0.41373804211616516, "learning_rate": 1.6185980500058954e-05, "loss": 0.42815977334976196, "step": 5728, "token_acc": 0.8492378048780488 }, { "epoch": 0.3091242648249069, "grad_norm": 0.3843533992767334, "learning_rate": 1.6184607332644892e-05, "loss": 0.4108623266220093, "step": 5729, "token_acc": 0.8591194968553459 }, { "epoch": 0.3091782226299034, "grad_norm": 0.35639411211013794, "learning_rate": 1.6183233976354544e-05, "loss": 0.4134759306907654, "step": 5730, "token_acc": 0.8601941747572815 }, { "epoch": 0.3092321804348999, "grad_norm": 0.4417221248149872, "learning_rate": 1.618186043122985e-05, "loss": 0.4496973156929016, "step": 5731, "token_acc": 0.8538326506729081 }, { "epoch": 0.3092861382398964, "grad_norm": 0.3160912096500397, "learning_rate": 1.6180486697312752e-05, "loss": 0.3401445746421814, "step": 5732, "token_acc": 0.87968 }, { "epoch": 0.3093400960448929, "grad_norm": 0.48516523838043213, "learning_rate": 1.617911277464522e-05, "loss": 0.3765099048614502, "step": 5733, "token_acc": 0.8694917640433619 }, { "epoch": 0.3093940538498894, "grad_norm": 0.3857617676258087, "learning_rate": 1.6177738663269193e-05, "loss": 0.3721705377101898, "step": 5734, "token_acc": 0.8702341137123746 }, { "epoch": 0.3094480116548859, "grad_norm": 0.28420770168304443, "learning_rate": 1.6176364363226652e-05, "loss": 0.39634931087493896, "step": 5735, "token_acc": 0.8622754491017964 }, { "epoch": 0.3095019694598824, "grad_norm": 0.4296339154243469, "learning_rate": 1.6174989874559556e-05, "loss": 0.3551945686340332, "step": 5736, "token_acc": 0.8766873957227362 }, { "epoch": 0.30955592726487885, "grad_norm": 0.29134753346443176, "learning_rate": 1.6173615197309893e-05, "loss": 0.37294796109199524, "step": 5737, "token_acc": 0.8668604025400926 }, { "epoch": 0.30960988506987536, "grad_norm": 0.4199736714363098, "learning_rate": 1.6172240331519632e-05, "loss": 0.3774246573448181, "step": 5738, "token_acc": 0.8656223052601322 }, { "epoch": 0.30966384287487186, "grad_norm": 0.40669485926628113, "learning_rate": 1.6170865277230776e-05, "loss": 0.3690352141857147, "step": 5739, "token_acc": 0.8692163223493256 }, { "epoch": 0.3097178006798683, "grad_norm": 0.3530752658843994, "learning_rate": 1.6169490034485304e-05, "loss": 0.420073926448822, "step": 5740, "token_acc": 0.8557347670250897 }, { "epoch": 0.3097717584848648, "grad_norm": 0.329714834690094, "learning_rate": 1.6168114603325224e-05, "loss": 0.402879536151886, "step": 5741, "token_acc": 0.857550564586177 }, { "epoch": 0.30982571628986133, "grad_norm": 0.33735033869743347, "learning_rate": 1.6166738983792542e-05, "loss": 0.34535759687423706, "step": 5742, "token_acc": 0.8810096153846154 }, { "epoch": 0.30987967409485784, "grad_norm": 0.46615350246429443, "learning_rate": 1.6165363175929266e-05, "loss": 0.3961600959300995, "step": 5743, "token_acc": 0.8672909698996656 }, { "epoch": 0.3099336318998543, "grad_norm": 0.4049608111381531, "learning_rate": 1.6163987179777418e-05, "loss": 0.43656331300735474, "step": 5744, "token_acc": 0.8493341788205453 }, { "epoch": 0.3099875897048508, "grad_norm": 0.33816149830818176, "learning_rate": 1.616261099537901e-05, "loss": 0.4166337251663208, "step": 5745, "token_acc": 0.8569107452339688 }, { "epoch": 0.3100415475098473, "grad_norm": 0.4711812436580658, "learning_rate": 1.616123462277608e-05, "loss": 0.35885971784591675, "step": 5746, "token_acc": 0.8680934701899978 }, { "epoch": 0.3100955053148438, "grad_norm": 0.5006232857704163, "learning_rate": 1.6159858062010655e-05, "loss": 0.4351835250854492, "step": 5747, "token_acc": 0.8503529411764705 }, { "epoch": 0.31014946311984026, "grad_norm": 0.5097646713256836, "learning_rate": 1.615848131312478e-05, "loss": 0.3295254707336426, "step": 5748, "token_acc": 0.8818543504583982 }, { "epoch": 0.31020342092483677, "grad_norm": 0.36196082830429077, "learning_rate": 1.61571043761605e-05, "loss": 0.4577348232269287, "step": 5749, "token_acc": 0.8490538990825688 }, { "epoch": 0.3102573787298333, "grad_norm": 0.5628235936164856, "learning_rate": 1.6155727251159867e-05, "loss": 0.4469718337059021, "step": 5750, "token_acc": 0.8490783410138248 }, { "epoch": 0.3103113365348298, "grad_norm": 0.3657977283000946, "learning_rate": 1.6154349938164934e-05, "loss": 0.33425378799438477, "step": 5751, "token_acc": 0.8848355744907469 }, { "epoch": 0.31036529433982624, "grad_norm": 0.3159433901309967, "learning_rate": 1.6152972437217766e-05, "loss": 0.32618826627731323, "step": 5752, "token_acc": 0.880965005302227 }, { "epoch": 0.31041925214482274, "grad_norm": 0.3370809257030487, "learning_rate": 1.6151594748360433e-05, "loss": 0.41174471378326416, "step": 5753, "token_acc": 0.8551236749116607 }, { "epoch": 0.31047320994981925, "grad_norm": 0.37098249793052673, "learning_rate": 1.6150216871635006e-05, "loss": 0.4473300278186798, "step": 5754, "token_acc": 0.8518912349992405 }, { "epoch": 0.31052716775481576, "grad_norm": 0.3322104215621948, "learning_rate": 1.6148838807083568e-05, "loss": 0.33395087718963623, "step": 5755, "token_acc": 0.8825121819166215 }, { "epoch": 0.3105811255598122, "grad_norm": 0.36568862199783325, "learning_rate": 1.6147460554748204e-05, "loss": 0.38043543696403503, "step": 5756, "token_acc": 0.8652207591014718 }, { "epoch": 0.3106350833648087, "grad_norm": 0.39969953894615173, "learning_rate": 1.6146082114671004e-05, "loss": 0.32344233989715576, "step": 5757, "token_acc": 0.8829787234042553 }, { "epoch": 0.3106890411698052, "grad_norm": 0.4083506166934967, "learning_rate": 1.6144703486894067e-05, "loss": 0.372633159160614, "step": 5758, "token_acc": 0.867510959571359 }, { "epoch": 0.31074299897480173, "grad_norm": 0.3852914869785309, "learning_rate": 1.6143324671459498e-05, "loss": 0.40298494696617126, "step": 5759, "token_acc": 0.862312030075188 }, { "epoch": 0.3107969567797982, "grad_norm": 0.4373384118080139, "learning_rate": 1.6141945668409398e-05, "loss": 0.3986874222755432, "step": 5760, "token_acc": 0.8618987341772152 }, { "epoch": 0.3108509145847947, "grad_norm": 0.3223852217197418, "learning_rate": 1.6140566477785888e-05, "loss": 0.39942729473114014, "step": 5761, "token_acc": 0.8647011308562197 }, { "epoch": 0.3109048723897912, "grad_norm": 0.3647797703742981, "learning_rate": 1.6139187099631086e-05, "loss": 0.4197692573070526, "step": 5762, "token_acc": 0.8576078112286412 }, { "epoch": 0.31095883019478765, "grad_norm": 0.6150936484336853, "learning_rate": 1.6137807533987116e-05, "loss": 0.47395965456962585, "step": 5763, "token_acc": 0.841183339820942 }, { "epoch": 0.31101278799978416, "grad_norm": 0.5085703134536743, "learning_rate": 1.6136427780896114e-05, "loss": 0.4623892903327942, "step": 5764, "token_acc": 0.8457957110609481 }, { "epoch": 0.31106674580478066, "grad_norm": 0.35202085971832275, "learning_rate": 1.613504784040022e-05, "loss": 0.3716202974319458, "step": 5765, "token_acc": 0.8737138830162086 }, { "epoch": 0.31112070360977717, "grad_norm": 0.42225807905197144, "learning_rate": 1.6133667712541568e-05, "loss": 0.4223144054412842, "step": 5766, "token_acc": 0.8594954449894885 }, { "epoch": 0.3111746614147736, "grad_norm": 0.3962111175060272, "learning_rate": 1.613228739736231e-05, "loss": 0.3572768568992615, "step": 5767, "token_acc": 0.8774724312970419 }, { "epoch": 0.31122861921977013, "grad_norm": 0.3361309766769409, "learning_rate": 1.61309068949046e-05, "loss": 0.33567339181900024, "step": 5768, "token_acc": 0.8836960399571887 }, { "epoch": 0.31128257702476664, "grad_norm": 0.4248976409435272, "learning_rate": 1.6129526205210604e-05, "loss": 0.44343602657318115, "step": 5769, "token_acc": 0.8531786074672049 }, { "epoch": 0.31133653482976315, "grad_norm": 0.4139880836009979, "learning_rate": 1.612814532832248e-05, "loss": 0.4436500668525696, "step": 5770, "token_acc": 0.8458664934221212 }, { "epoch": 0.3113904926347596, "grad_norm": 0.5309815406799316, "learning_rate": 1.6126764264282405e-05, "loss": 0.37330248951911926, "step": 5771, "token_acc": 0.8668896321070234 }, { "epoch": 0.3114444504397561, "grad_norm": 0.3656003773212433, "learning_rate": 1.612538301313255e-05, "loss": 0.35410457849502563, "step": 5772, "token_acc": 0.8719889883000688 }, { "epoch": 0.3114984082447526, "grad_norm": 0.42752185463905334, "learning_rate": 1.6124001574915108e-05, "loss": 0.3962706923484802, "step": 5773, "token_acc": 0.864822663395048 }, { "epoch": 0.3115523660497491, "grad_norm": 0.5515018701553345, "learning_rate": 1.612261994967226e-05, "loss": 0.3905870318412781, "step": 5774, "token_acc": 0.8643859649122807 }, { "epoch": 0.31160632385474557, "grad_norm": 0.37870052456855774, "learning_rate": 1.61212381374462e-05, "loss": 0.4123014807701111, "step": 5775, "token_acc": 0.8608185114833362 }, { "epoch": 0.3116602816597421, "grad_norm": 0.3950919806957245, "learning_rate": 1.6119856138279136e-05, "loss": 0.41210123896598816, "step": 5776, "token_acc": 0.8589523683863306 }, { "epoch": 0.3117142394647386, "grad_norm": 0.3148537278175354, "learning_rate": 1.6118473952213265e-05, "loss": 0.3529626429080963, "step": 5777, "token_acc": 0.8781198973641241 }, { "epoch": 0.3117681972697351, "grad_norm": 0.43567800521850586, "learning_rate": 1.6117091579290804e-05, "loss": 0.41923201084136963, "step": 5778, "token_acc": 0.859107905982906 }, { "epoch": 0.31182215507473154, "grad_norm": 0.45302319526672363, "learning_rate": 1.611570901955397e-05, "loss": 0.3646901547908783, "step": 5779, "token_acc": 0.8744709154674084 }, { "epoch": 0.31187611287972805, "grad_norm": 0.3027569055557251, "learning_rate": 1.611432627304498e-05, "loss": 0.37609943747520447, "step": 5780, "token_acc": 0.8739719400096758 }, { "epoch": 0.31193007068472456, "grad_norm": 0.3029499351978302, "learning_rate": 1.611294333980607e-05, "loss": 0.2894560694694519, "step": 5781, "token_acc": 0.8910679611650485 }, { "epoch": 0.31198402848972107, "grad_norm": 0.3639487624168396, "learning_rate": 1.6111560219879474e-05, "loss": 0.4257235527038574, "step": 5782, "token_acc": 0.8537960954446855 }, { "epoch": 0.3120379862947175, "grad_norm": 0.44905707240104675, "learning_rate": 1.6110176913307425e-05, "loss": 0.31514137983322144, "step": 5783, "token_acc": 0.8870030225528946 }, { "epoch": 0.312091944099714, "grad_norm": 0.4101018011569977, "learning_rate": 1.610879342013218e-05, "loss": 0.4016043543815613, "step": 5784, "token_acc": 0.8592403017241379 }, { "epoch": 0.31214590190471053, "grad_norm": 0.43102067708969116, "learning_rate": 1.6107409740395977e-05, "loss": 0.41484493017196655, "step": 5785, "token_acc": 0.8567193675889329 }, { "epoch": 0.312199859709707, "grad_norm": 0.42278817296028137, "learning_rate": 1.6106025874141088e-05, "loss": 0.3792768120765686, "step": 5786, "token_acc": 0.8688068287251809 }, { "epoch": 0.3122538175147035, "grad_norm": 0.41388174891471863, "learning_rate": 1.610464182140976e-05, "loss": 0.39453837275505066, "step": 5787, "token_acc": 0.8586879011171856 }, { "epoch": 0.3123077753197, "grad_norm": 0.5557829737663269, "learning_rate": 1.610325758224428e-05, "loss": 0.4101579189300537, "step": 5788, "token_acc": 0.8562568405691353 }, { "epoch": 0.3123617331246965, "grad_norm": 0.4531363248825073, "learning_rate": 1.6101873156686904e-05, "loss": 0.40338024497032166, "step": 5789, "token_acc": 0.8621241202815099 }, { "epoch": 0.31241569092969296, "grad_norm": 0.3394613265991211, "learning_rate": 1.6100488544779923e-05, "loss": 0.38540971279144287, "step": 5790, "token_acc": 0.8656253589066268 }, { "epoch": 0.31246964873468946, "grad_norm": 0.3944678008556366, "learning_rate": 1.6099103746565622e-05, "loss": 0.42682671546936035, "step": 5791, "token_acc": 0.8524483133841132 }, { "epoch": 0.31252360653968597, "grad_norm": 0.41579294204711914, "learning_rate": 1.6097718762086286e-05, "loss": 0.3978992700576782, "step": 5792, "token_acc": 0.8650125535371437 }, { "epoch": 0.3125775643446825, "grad_norm": 0.3215988278388977, "learning_rate": 1.609633359138422e-05, "loss": 0.33904126286506653, "step": 5793, "token_acc": 0.8811048839071257 }, { "epoch": 0.31263152214967893, "grad_norm": 0.24938619136810303, "learning_rate": 1.609494823450172e-05, "loss": 0.42766621708869934, "step": 5794, "token_acc": 0.8522449975597852 }, { "epoch": 0.31268547995467544, "grad_norm": 0.47054240107536316, "learning_rate": 1.60935626914811e-05, "loss": 0.3480905592441559, "step": 5795, "token_acc": 0.8768304588350146 }, { "epoch": 0.31273943775967195, "grad_norm": 0.3888029158115387, "learning_rate": 1.6092176962364668e-05, "loss": 0.3769826591014862, "step": 5796, "token_acc": 0.86852038248616 }, { "epoch": 0.31279339556466845, "grad_norm": 0.3695676326751709, "learning_rate": 1.6090791047194755e-05, "loss": 0.4199976623058319, "step": 5797, "token_acc": 0.8528497409326425 }, { "epoch": 0.3128473533696649, "grad_norm": 0.4228682219982147, "learning_rate": 1.6089404946013673e-05, "loss": 0.3698374032974243, "step": 5798, "token_acc": 0.8738201689021361 }, { "epoch": 0.3129013111746614, "grad_norm": 0.3957814574241638, "learning_rate": 1.608801865886376e-05, "loss": 0.30156028270721436, "step": 5799, "token_acc": 0.889285204736767 }, { "epoch": 0.3129552689796579, "grad_norm": 0.3764095902442932, "learning_rate": 1.6086632185787353e-05, "loss": 0.37941306829452515, "step": 5800, "token_acc": 0.8655812701829925 }, { "epoch": 0.3130092267846544, "grad_norm": 0.5898087620735168, "learning_rate": 1.6085245526826795e-05, "loss": 0.4231380224227905, "step": 5801, "token_acc": 0.8581376518218623 }, { "epoch": 0.3130631845896509, "grad_norm": 0.40409907698631287, "learning_rate": 1.608385868202443e-05, "loss": 0.41574496030807495, "step": 5802, "token_acc": 0.8602471678681771 }, { "epoch": 0.3131171423946474, "grad_norm": 0.4038547873497009, "learning_rate": 1.6082471651422616e-05, "loss": 0.35756197571754456, "step": 5803, "token_acc": 0.8747675139491631 }, { "epoch": 0.3131711001996439, "grad_norm": 0.30586233735084534, "learning_rate": 1.608108443506371e-05, "loss": 0.37320035696029663, "step": 5804, "token_acc": 0.8723311546840958 }, { "epoch": 0.31322505800464034, "grad_norm": 0.3609771132469177, "learning_rate": 1.607969703299008e-05, "loss": 0.40699338912963867, "step": 5805, "token_acc": 0.8616512591648071 }, { "epoch": 0.31327901580963685, "grad_norm": 0.33361804485321045, "learning_rate": 1.6078309445244095e-05, "loss": 0.3744177222251892, "step": 5806, "token_acc": 0.8683347503947528 }, { "epoch": 0.31333297361463336, "grad_norm": 0.3531334698200226, "learning_rate": 1.6076921671868132e-05, "loss": 0.40304437279701233, "step": 5807, "token_acc": 0.8622902826258304 }, { "epoch": 0.31338693141962987, "grad_norm": 0.44767069816589355, "learning_rate": 1.607553371290457e-05, "loss": 0.38352930545806885, "step": 5808, "token_acc": 0.8672946620393324 }, { "epoch": 0.3134408892246263, "grad_norm": 0.5193749070167542, "learning_rate": 1.6074145568395803e-05, "loss": 0.3041475713253021, "step": 5809, "token_acc": 0.8920431827269092 }, { "epoch": 0.3134948470296228, "grad_norm": 0.4512692987918854, "learning_rate": 1.607275723838422e-05, "loss": 0.4252850115299225, "step": 5810, "token_acc": 0.8564644607843137 }, { "epoch": 0.31354880483461933, "grad_norm": 0.3538789451122284, "learning_rate": 1.6071368722912225e-05, "loss": 0.3673746883869171, "step": 5811, "token_acc": 0.8672100526405162 }, { "epoch": 0.31360276263961584, "grad_norm": 0.3561851680278778, "learning_rate": 1.6069980022022215e-05, "loss": 0.4307020306587219, "step": 5812, "token_acc": 0.8546323416062895 }, { "epoch": 0.3136567204446123, "grad_norm": 0.40001028776168823, "learning_rate": 1.606859113575661e-05, "loss": 0.3970818519592285, "step": 5813, "token_acc": 0.8639561057962859 }, { "epoch": 0.3137106782496088, "grad_norm": 0.4738491177558899, "learning_rate": 1.606720206415782e-05, "loss": 0.44969773292541504, "step": 5814, "token_acc": 0.8451635973538352 }, { "epoch": 0.3137646360546053, "grad_norm": 0.30701470375061035, "learning_rate": 1.6065812807268265e-05, "loss": 0.4472273290157318, "step": 5815, "token_acc": 0.8479070460300908 }, { "epoch": 0.3138185938596018, "grad_norm": 0.5208674073219299, "learning_rate": 1.606442336513038e-05, "loss": 0.41536372900009155, "step": 5816, "token_acc": 0.855618539513171 }, { "epoch": 0.31387255166459826, "grad_norm": 0.502558171749115, "learning_rate": 1.606303373778659e-05, "loss": 0.41368550062179565, "step": 5817, "token_acc": 0.8571200510855683 }, { "epoch": 0.31392650946959477, "grad_norm": 0.4465451240539551, "learning_rate": 1.6061643925279344e-05, "loss": 0.3629568815231323, "step": 5818, "token_acc": 0.871859296482412 }, { "epoch": 0.3139804672745913, "grad_norm": 0.45935043692588806, "learning_rate": 1.6060253927651074e-05, "loss": 0.4479113817214966, "step": 5819, "token_acc": 0.8480725623582767 }, { "epoch": 0.3140344250795878, "grad_norm": 0.4499063491821289, "learning_rate": 1.605886374494424e-05, "loss": 0.38330358266830444, "step": 5820, "token_acc": 0.8633558624742874 }, { "epoch": 0.31408838288458424, "grad_norm": 0.34157025814056396, "learning_rate": 1.6057473377201294e-05, "loss": 0.4372043311595917, "step": 5821, "token_acc": 0.8484567492383097 }, { "epoch": 0.31414234068958075, "grad_norm": 0.34832626581192017, "learning_rate": 1.6056082824464697e-05, "loss": 0.45313024520874023, "step": 5822, "token_acc": 0.8466763706938379 }, { "epoch": 0.31419629849457725, "grad_norm": 0.3994792699813843, "learning_rate": 1.6054692086776918e-05, "loss": 0.43416517972946167, "step": 5823, "token_acc": 0.8539227166276346 }, { "epoch": 0.31425025629957376, "grad_norm": 0.3619048297405243, "learning_rate": 1.6053301164180427e-05, "loss": 0.3904513716697693, "step": 5824, "token_acc": 0.8628358545947167 }, { "epoch": 0.3143042141045702, "grad_norm": 0.3888627886772156, "learning_rate": 1.6051910056717708e-05, "loss": 0.4007619023323059, "step": 5825, "token_acc": 0.8624111737319284 }, { "epoch": 0.3143581719095667, "grad_norm": 0.49371206760406494, "learning_rate": 1.6050518764431237e-05, "loss": 0.4188248813152313, "step": 5826, "token_acc": 0.8542646071188718 }, { "epoch": 0.3144121297145632, "grad_norm": 0.3615005910396576, "learning_rate": 1.604912728736351e-05, "loss": 0.3718852400779724, "step": 5827, "token_acc": 0.8707295569759753 }, { "epoch": 0.3144660875195597, "grad_norm": 0.472872793674469, "learning_rate": 1.6047735625557018e-05, "loss": 0.4484422504901886, "step": 5828, "token_acc": 0.8529319041614124 }, { "epoch": 0.3145200453245562, "grad_norm": 0.376611590385437, "learning_rate": 1.604634377905427e-05, "loss": 0.3495699167251587, "step": 5829, "token_acc": 0.8732095490716181 }, { "epoch": 0.3145740031295527, "grad_norm": 0.4000048339366913, "learning_rate": 1.6044951747897762e-05, "loss": 0.36139875650405884, "step": 5830, "token_acc": 0.8793548387096775 }, { "epoch": 0.3146279609345492, "grad_norm": 0.3983518183231354, "learning_rate": 1.6043559532130012e-05, "loss": 0.40681660175323486, "step": 5831, "token_acc": 0.8587479935794543 }, { "epoch": 0.31468191873954565, "grad_norm": 0.36976346373558044, "learning_rate": 1.604216713179354e-05, "loss": 0.3685269355773926, "step": 5832, "token_acc": 0.8686008737537807 }, { "epoch": 0.31473587654454216, "grad_norm": 0.36607298254966736, "learning_rate": 1.6040774546930864e-05, "loss": 0.42237764596939087, "step": 5833, "token_acc": 0.8554673616870039 }, { "epoch": 0.31478983434953867, "grad_norm": 0.4035722017288208, "learning_rate": 1.6039381777584515e-05, "loss": 0.3763777017593384, "step": 5834, "token_acc": 0.868175044031974 }, { "epoch": 0.3148437921545352, "grad_norm": 0.39541441202163696, "learning_rate": 1.603798882379703e-05, "loss": 0.4542333483695984, "step": 5835, "token_acc": 0.845775926519278 }, { "epoch": 0.3148977499595316, "grad_norm": 0.3229479193687439, "learning_rate": 1.6036595685610946e-05, "loss": 0.4057126045227051, "step": 5836, "token_acc": 0.8590314500417479 }, { "epoch": 0.31495170776452813, "grad_norm": 0.30578088760375977, "learning_rate": 1.6035202363068815e-05, "loss": 0.4643992781639099, "step": 5837, "token_acc": 0.8440535842110646 }, { "epoch": 0.31500566556952464, "grad_norm": 0.5191370248794556, "learning_rate": 1.6033808856213183e-05, "loss": 0.4395635426044464, "step": 5838, "token_acc": 0.8570476507830723 }, { "epoch": 0.31505962337452115, "grad_norm": 0.5213677287101746, "learning_rate": 1.603241516508661e-05, "loss": 0.41386836767196655, "step": 5839, "token_acc": 0.8535043688417921 }, { "epoch": 0.3151135811795176, "grad_norm": 0.4069787561893463, "learning_rate": 1.603102128973166e-05, "loss": 0.357589989900589, "step": 5840, "token_acc": 0.8707978311386522 }, { "epoch": 0.3151675389845141, "grad_norm": 0.5242999196052551, "learning_rate": 1.6029627230190894e-05, "loss": 0.4422752261161804, "step": 5841, "token_acc": 0.8528428093645485 }, { "epoch": 0.3152214967895106, "grad_norm": 0.45197364687919617, "learning_rate": 1.6028232986506894e-05, "loss": 0.3617788553237915, "step": 5842, "token_acc": 0.8726585864945695 }, { "epoch": 0.3152754545945071, "grad_norm": 0.3502274453639984, "learning_rate": 1.602683855872224e-05, "loss": 0.3946765065193176, "step": 5843, "token_acc": 0.862115937641895 }, { "epoch": 0.3153294123995036, "grad_norm": 0.3743913471698761, "learning_rate": 1.6025443946879512e-05, "loss": 0.35527342557907104, "step": 5844, "token_acc": 0.8753359211704986 }, { "epoch": 0.3153833702045001, "grad_norm": 0.391317754983902, "learning_rate": 1.6024049151021307e-05, "loss": 0.4233633875846863, "step": 5845, "token_acc": 0.8494158372998701 }, { "epoch": 0.3154373280094966, "grad_norm": 0.4214402437210083, "learning_rate": 1.6022654171190216e-05, "loss": 0.3733494281768799, "step": 5846, "token_acc": 0.8627816364560393 }, { "epoch": 0.31549128581449304, "grad_norm": 0.3441604971885681, "learning_rate": 1.6021259007428848e-05, "loss": 0.4102210998535156, "step": 5847, "token_acc": 0.8629955416315219 }, { "epoch": 0.31554524361948955, "grad_norm": 0.43926167488098145, "learning_rate": 1.6019863659779805e-05, "loss": 0.45742523670196533, "step": 5848, "token_acc": 0.8487785963551764 }, { "epoch": 0.31559920142448605, "grad_norm": 0.5032079815864563, "learning_rate": 1.6018468128285703e-05, "loss": 0.3663918375968933, "step": 5849, "token_acc": 0.8674467676961443 }, { "epoch": 0.31565315922948256, "grad_norm": 0.4331127405166626, "learning_rate": 1.601707241298916e-05, "loss": 0.4495803415775299, "step": 5850, "token_acc": 0.8504150431983737 }, { "epoch": 0.315707117034479, "grad_norm": 0.2948472499847412, "learning_rate": 1.6015676513932807e-05, "loss": 0.4043665826320648, "step": 5851, "token_acc": 0.8569906790945406 }, { "epoch": 0.3157610748394755, "grad_norm": 0.36745601892471313, "learning_rate": 1.6014280431159264e-05, "loss": 0.37844300270080566, "step": 5852, "token_acc": 0.8638997439698154 }, { "epoch": 0.315815032644472, "grad_norm": 0.37385672330856323, "learning_rate": 1.6012884164711174e-05, "loss": 0.4564768671989441, "step": 5853, "token_acc": 0.848124428179323 }, { "epoch": 0.31586899044946853, "grad_norm": 0.3464254140853882, "learning_rate": 1.6011487714631176e-05, "loss": 0.34718140959739685, "step": 5854, "token_acc": 0.8797718297498903 }, { "epoch": 0.315922948254465, "grad_norm": 0.4026961028575897, "learning_rate": 1.601009108096192e-05, "loss": 0.3846569359302521, "step": 5855, "token_acc": 0.8674684994272623 }, { "epoch": 0.3159769060594615, "grad_norm": 0.36392122507095337, "learning_rate": 1.6008694263746055e-05, "loss": 0.4014098048210144, "step": 5856, "token_acc": 0.8592434616679762 }, { "epoch": 0.316030863864458, "grad_norm": 0.4194643497467041, "learning_rate": 1.600729726302624e-05, "loss": 0.3743833303451538, "step": 5857, "token_acc": 0.8722519310754605 }, { "epoch": 0.3160848216694545, "grad_norm": 0.52492755651474, "learning_rate": 1.6005900078845142e-05, "loss": 0.45796799659729004, "step": 5858, "token_acc": 0.8430971512052593 }, { "epoch": 0.31613877947445096, "grad_norm": 0.3980092406272888, "learning_rate": 1.600450271124543e-05, "loss": 0.4359585642814636, "step": 5859, "token_acc": 0.8499762808349146 }, { "epoch": 0.31619273727944747, "grad_norm": 0.46960213780403137, "learning_rate": 1.6003105160269777e-05, "loss": 0.34537655115127563, "step": 5860, "token_acc": 0.8716937015928686 }, { "epoch": 0.316246695084444, "grad_norm": 0.31456539034843445, "learning_rate": 1.6001707425960864e-05, "loss": 0.3995591998100281, "step": 5861, "token_acc": 0.8650326797385621 }, { "epoch": 0.3163006528894405, "grad_norm": 0.446200430393219, "learning_rate": 1.6000309508361377e-05, "loss": 0.42939212918281555, "step": 5862, "token_acc": 0.858862598588626 }, { "epoch": 0.31635461069443693, "grad_norm": 0.3469056189060211, "learning_rate": 1.5998911407514012e-05, "loss": 0.412543922662735, "step": 5863, "token_acc": 0.8611066301016292 }, { "epoch": 0.31640856849943344, "grad_norm": 0.4061356484889984, "learning_rate": 1.5997513123461465e-05, "loss": 0.39503413438796997, "step": 5864, "token_acc": 0.8659647125782584 }, { "epoch": 0.31646252630442995, "grad_norm": 0.3866533935070038, "learning_rate": 1.599611465624644e-05, "loss": 0.38455045223236084, "step": 5865, "token_acc": 0.8698276946296063 }, { "epoch": 0.31651648410942645, "grad_norm": 0.46282434463500977, "learning_rate": 1.5994716005911638e-05, "loss": 0.3888470232486725, "step": 5866, "token_acc": 0.8688178103423982 }, { "epoch": 0.3165704419144229, "grad_norm": 0.41535061597824097, "learning_rate": 1.5993317172499786e-05, "loss": 0.38829556107521057, "step": 5867, "token_acc": 0.8629992065591113 }, { "epoch": 0.3166243997194194, "grad_norm": 0.36060869693756104, "learning_rate": 1.599191815605359e-05, "loss": 0.34161245822906494, "step": 5868, "token_acc": 0.8784046692607004 }, { "epoch": 0.3166783575244159, "grad_norm": 0.33916565775871277, "learning_rate": 1.599051895661579e-05, "loss": 0.37315669655799866, "step": 5869, "token_acc": 0.8684674104521433 }, { "epoch": 0.3167323153294124, "grad_norm": 0.424622118473053, "learning_rate": 1.5989119574229108e-05, "loss": 0.3196163773536682, "step": 5870, "token_acc": 0.8831383519837233 }, { "epoch": 0.3167862731344089, "grad_norm": 0.39892351627349854, "learning_rate": 1.5987720008936283e-05, "loss": 0.37766963243484497, "step": 5871, "token_acc": 0.8691852994088923 }, { "epoch": 0.3168402309394054, "grad_norm": 0.4338267147541046, "learning_rate": 1.598632026078006e-05, "loss": 0.3556716740131378, "step": 5872, "token_acc": 0.8781602193116053 }, { "epoch": 0.3168941887444019, "grad_norm": 0.4933326542377472, "learning_rate": 1.598492032980318e-05, "loss": 0.44292253255844116, "step": 5873, "token_acc": 0.8526394796097073 }, { "epoch": 0.31694814654939835, "grad_norm": 0.48099249601364136, "learning_rate": 1.5983520216048403e-05, "loss": 0.36085760593414307, "step": 5874, "token_acc": 0.8769524470669906 }, { "epoch": 0.31700210435439485, "grad_norm": 0.3952922224998474, "learning_rate": 1.5982119919558486e-05, "loss": 0.39148378372192383, "step": 5875, "token_acc": 0.8665764879488441 }, { "epoch": 0.31705606215939136, "grad_norm": 0.39933204650878906, "learning_rate": 1.5980719440376197e-05, "loss": 0.4180450439453125, "step": 5876, "token_acc": 0.859763902716541 }, { "epoch": 0.31711001996438787, "grad_norm": 0.43385541439056396, "learning_rate": 1.5979318778544296e-05, "loss": 0.38567522168159485, "step": 5877, "token_acc": 0.8683030446070332 }, { "epoch": 0.3171639777693843, "grad_norm": 0.39638400077819824, "learning_rate": 1.597791793410557e-05, "loss": 0.407523512840271, "step": 5878, "token_acc": 0.8622813500862281 }, { "epoch": 0.3172179355743808, "grad_norm": 0.4286743998527527, "learning_rate": 1.59765169071028e-05, "loss": 0.41815072298049927, "step": 5879, "token_acc": 0.8579500657030223 }, { "epoch": 0.31727189337937733, "grad_norm": 0.3504234850406647, "learning_rate": 1.597511569757876e-05, "loss": 0.3916122615337372, "step": 5880, "token_acc": 0.8675179569034318 }, { "epoch": 0.31732585118437384, "grad_norm": 0.3811517059803009, "learning_rate": 1.597371430557626e-05, "loss": 0.40139317512512207, "step": 5881, "token_acc": 0.8625861860408854 }, { "epoch": 0.3173798089893703, "grad_norm": 0.38734105229377747, "learning_rate": 1.597231273113809e-05, "loss": 0.2943299412727356, "step": 5882, "token_acc": 0.8922220226333752 }, { "epoch": 0.3174337667943668, "grad_norm": 0.3893871009349823, "learning_rate": 1.5970910974307048e-05, "loss": 0.42938947677612305, "step": 5883, "token_acc": 0.8550548112058465 }, { "epoch": 0.3174877245993633, "grad_norm": 0.4034571349620819, "learning_rate": 1.5969509035125953e-05, "loss": 0.3734506070613861, "step": 5884, "token_acc": 0.8687425506555423 }, { "epoch": 0.3175416824043598, "grad_norm": 0.3775988221168518, "learning_rate": 1.5968106913637615e-05, "loss": 0.39590272307395935, "step": 5885, "token_acc": 0.8652621264086232 }, { "epoch": 0.31759564020935627, "grad_norm": 0.35120609402656555, "learning_rate": 1.5966704609884852e-05, "loss": 0.35781770944595337, "step": 5886, "token_acc": 0.8759338313767343 }, { "epoch": 0.3176495980143528, "grad_norm": 0.4863419532775879, "learning_rate": 1.5965302123910498e-05, "loss": 0.3641029894351959, "step": 5887, "token_acc": 0.8691393705844573 }, { "epoch": 0.3177035558193493, "grad_norm": 0.3063676059246063, "learning_rate": 1.5963899455757382e-05, "loss": 0.3859218955039978, "step": 5888, "token_acc": 0.8644520809469263 }, { "epoch": 0.3177575136243458, "grad_norm": 0.44460147619247437, "learning_rate": 1.5962496605468333e-05, "loss": 0.4015091359615326, "step": 5889, "token_acc": 0.8588807785888077 }, { "epoch": 0.31781147142934224, "grad_norm": 0.4878385365009308, "learning_rate": 1.59610935730862e-05, "loss": 0.41193637251853943, "step": 5890, "token_acc": 0.858734466504785 }, { "epoch": 0.31786542923433875, "grad_norm": 0.4277150332927704, "learning_rate": 1.5959690358653833e-05, "loss": 0.42599302530288696, "step": 5891, "token_acc": 0.8565633802816901 }, { "epoch": 0.31791938703933526, "grad_norm": 0.4432153105735779, "learning_rate": 1.5958286962214088e-05, "loss": 0.42103803157806396, "step": 5892, "token_acc": 0.8570649952309579 }, { "epoch": 0.3179733448443317, "grad_norm": 0.37114134430885315, "learning_rate": 1.5956883383809812e-05, "loss": 0.4033260941505432, "step": 5893, "token_acc": 0.8589502693223099 }, { "epoch": 0.3180273026493282, "grad_norm": 0.47509104013442993, "learning_rate": 1.595547962348388e-05, "loss": 0.4051991105079651, "step": 5894, "token_acc": 0.8598078010175241 }, { "epoch": 0.3180812604543247, "grad_norm": 0.3824809789657593, "learning_rate": 1.5954075681279166e-05, "loss": 0.3884756565093994, "step": 5895, "token_acc": 0.8629917375755334 }, { "epoch": 0.31813521825932123, "grad_norm": 0.37390878796577454, "learning_rate": 1.5952671557238532e-05, "loss": 0.34244757890701294, "step": 5896, "token_acc": 0.8811345180651995 }, { "epoch": 0.3181891760643177, "grad_norm": 0.34852927923202515, "learning_rate": 1.5951267251404875e-05, "loss": 0.3771968483924866, "step": 5897, "token_acc": 0.8704530950861519 }, { "epoch": 0.3182431338693142, "grad_norm": 0.4348762333393097, "learning_rate": 1.594986276382107e-05, "loss": 0.45011526346206665, "step": 5898, "token_acc": 0.8493494973388528 }, { "epoch": 0.3182970916743107, "grad_norm": 0.409920334815979, "learning_rate": 1.594845809453002e-05, "loss": 0.394874632358551, "step": 5899, "token_acc": 0.8634103539046505 }, { "epoch": 0.3183510494793072, "grad_norm": 0.40730899572372437, "learning_rate": 1.5947053243574614e-05, "loss": 0.3028712272644043, "step": 5900, "token_acc": 0.8941513568177988 }, { "epoch": 0.31840500728430365, "grad_norm": 0.442193865776062, "learning_rate": 1.5945648210997764e-05, "loss": 0.4116310179233551, "step": 5901, "token_acc": 0.8556808068822308 }, { "epoch": 0.31845896508930016, "grad_norm": 0.40653303265571594, "learning_rate": 1.594424299684237e-05, "loss": 0.4278098940849304, "step": 5902, "token_acc": 0.8524323525062842 }, { "epoch": 0.31851292289429667, "grad_norm": 0.42780473828315735, "learning_rate": 1.594283760115135e-05, "loss": 0.42822909355163574, "step": 5903, "token_acc": 0.8552494802494802 }, { "epoch": 0.3185668806992932, "grad_norm": 0.495345801115036, "learning_rate": 1.5941432023967635e-05, "loss": 0.37751805782318115, "step": 5904, "token_acc": 0.872134670487106 }, { "epoch": 0.3186208385042896, "grad_norm": 0.509857714176178, "learning_rate": 1.594002626533414e-05, "loss": 0.38466811180114746, "step": 5905, "token_acc": 0.8672800780705423 }, { "epoch": 0.31867479630928613, "grad_norm": 0.5058160424232483, "learning_rate": 1.5938620325293796e-05, "loss": 0.4116227626800537, "step": 5906, "token_acc": 0.8587729697688203 }, { "epoch": 0.31872875411428264, "grad_norm": 0.4551837146282196, "learning_rate": 1.5937214203889543e-05, "loss": 0.40293779969215393, "step": 5907, "token_acc": 0.8582494190549961 }, { "epoch": 0.31878271191927915, "grad_norm": 0.4762759506702423, "learning_rate": 1.5935807901164326e-05, "loss": 0.3755532205104828, "step": 5908, "token_acc": 0.8706021650879567 }, { "epoch": 0.3188366697242756, "grad_norm": 0.514335572719574, "learning_rate": 1.593440141716109e-05, "loss": 0.39208486676216125, "step": 5909, "token_acc": 0.8659328762179719 }, { "epoch": 0.3188906275292721, "grad_norm": 0.42186596989631653, "learning_rate": 1.593299475192279e-05, "loss": 0.4345017075538635, "step": 5910, "token_acc": 0.8502431118314424 }, { "epoch": 0.3189445853342686, "grad_norm": 0.39477869868278503, "learning_rate": 1.5931587905492383e-05, "loss": 0.3733974099159241, "step": 5911, "token_acc": 0.8740575258307736 }, { "epoch": 0.31899854313926507, "grad_norm": 0.40570396184921265, "learning_rate": 1.5930180877912835e-05, "loss": 0.3461453318595886, "step": 5912, "token_acc": 0.8747719572582747 }, { "epoch": 0.3190525009442616, "grad_norm": 0.42349642515182495, "learning_rate": 1.5928773669227117e-05, "loss": 0.3959338068962097, "step": 5913, "token_acc": 0.870268588613502 }, { "epoch": 0.3191064587492581, "grad_norm": 0.43592900037765503, "learning_rate": 1.5927366279478206e-05, "loss": 0.3619834780693054, "step": 5914, "token_acc": 0.8739706408879341 }, { "epoch": 0.3191604165542546, "grad_norm": 0.268449604511261, "learning_rate": 1.5925958708709077e-05, "loss": 0.33451831340789795, "step": 5915, "token_acc": 0.8828325180526438 }, { "epoch": 0.31921437435925104, "grad_norm": 0.356970876455307, "learning_rate": 1.5924550956962726e-05, "loss": 0.4171781539916992, "step": 5916, "token_acc": 0.8541825717090196 }, { "epoch": 0.31926833216424755, "grad_norm": 0.34267526865005493, "learning_rate": 1.592314302428214e-05, "loss": 0.42759329080581665, "step": 5917, "token_acc": 0.8566934638238378 }, { "epoch": 0.31932228996924406, "grad_norm": 0.4165896475315094, "learning_rate": 1.5921734910710313e-05, "loss": 0.4393094778060913, "step": 5918, "token_acc": 0.8525166767738023 }, { "epoch": 0.31937624777424056, "grad_norm": 0.44925257563591003, "learning_rate": 1.592032661629026e-05, "loss": 0.4341118335723877, "step": 5919, "token_acc": 0.8469032707028532 }, { "epoch": 0.319430205579237, "grad_norm": 0.3422918915748596, "learning_rate": 1.591891814106498e-05, "loss": 0.3245576322078705, "step": 5920, "token_acc": 0.8811648661343354 }, { "epoch": 0.3194841633842335, "grad_norm": 0.4789324104785919, "learning_rate": 1.5917509485077493e-05, "loss": 0.3385501801967621, "step": 5921, "token_acc": 0.8816141995981246 }, { "epoch": 0.31953812118923003, "grad_norm": 0.4034443497657776, "learning_rate": 1.591610064837081e-05, "loss": 0.40663355588912964, "step": 5922, "token_acc": 0.864138362158783 }, { "epoch": 0.31959207899422654, "grad_norm": 0.36899733543395996, "learning_rate": 1.591469163098797e-05, "loss": 0.38559871912002563, "step": 5923, "token_acc": 0.8660726715856885 }, { "epoch": 0.319646036799223, "grad_norm": 0.33066362142562866, "learning_rate": 1.5913282432971994e-05, "loss": 0.3652002215385437, "step": 5924, "token_acc": 0.8745762711864407 }, { "epoch": 0.3196999946042195, "grad_norm": 0.3913484215736389, "learning_rate": 1.5911873054365917e-05, "loss": 0.340809166431427, "step": 5925, "token_acc": 0.8784307875894988 }, { "epoch": 0.319753952409216, "grad_norm": 0.4330183267593384, "learning_rate": 1.5910463495212793e-05, "loss": 0.44587865471839905, "step": 5926, "token_acc": 0.8431504092311821 }, { "epoch": 0.3198079102142125, "grad_norm": 0.47916725277900696, "learning_rate": 1.590905375555566e-05, "loss": 0.4125378131866455, "step": 5927, "token_acc": 0.861396437394357 }, { "epoch": 0.31986186801920896, "grad_norm": 0.5653599500656128, "learning_rate": 1.5907643835437572e-05, "loss": 0.3821568489074707, "step": 5928, "token_acc": 0.8679657983694572 }, { "epoch": 0.31991582582420547, "grad_norm": 0.4264964759349823, "learning_rate": 1.590623373490159e-05, "loss": 0.5172315835952759, "step": 5929, "token_acc": 0.8274008600095557 }, { "epoch": 0.319969783629202, "grad_norm": 0.4387648105621338, "learning_rate": 1.5904823453990778e-05, "loss": 0.3886736333370209, "step": 5930, "token_acc": 0.8662735097226649 }, { "epoch": 0.3200237414341985, "grad_norm": 0.3563358783721924, "learning_rate": 1.5903412992748202e-05, "loss": 0.41901007294654846, "step": 5931, "token_acc": 0.8593830070718459 }, { "epoch": 0.32007769923919493, "grad_norm": 0.38934409618377686, "learning_rate": 1.590200235121694e-05, "loss": 0.37436380982398987, "step": 5932, "token_acc": 0.8685416337494075 }, { "epoch": 0.32013165704419144, "grad_norm": 0.32070496678352356, "learning_rate": 1.5900591529440075e-05, "loss": 0.41630738973617554, "step": 5933, "token_acc": 0.8556574239713775 }, { "epoch": 0.32018561484918795, "grad_norm": 0.2910192906856537, "learning_rate": 1.5899180527460685e-05, "loss": 0.3618183732032776, "step": 5934, "token_acc": 0.8782345987127282 }, { "epoch": 0.3202395726541844, "grad_norm": 0.40402188897132874, "learning_rate": 1.5897769345321872e-05, "loss": 0.35504385828971863, "step": 5935, "token_acc": 0.8766995467875234 }, { "epoch": 0.3202935304591809, "grad_norm": 0.3537117540836334, "learning_rate": 1.5896357983066728e-05, "loss": 0.3704510033130646, "step": 5936, "token_acc": 0.8652369221851905 }, { "epoch": 0.3203474882641774, "grad_norm": 0.40519413352012634, "learning_rate": 1.5894946440738356e-05, "loss": 0.3953627943992615, "step": 5937, "token_acc": 0.8615502686108979 }, { "epoch": 0.3204014460691739, "grad_norm": 0.37221893668174744, "learning_rate": 1.5893534718379863e-05, "loss": 0.39649927616119385, "step": 5938, "token_acc": 0.8608881083793276 }, { "epoch": 0.3204554038741704, "grad_norm": 0.4149508476257324, "learning_rate": 1.589212281603436e-05, "loss": 0.3831794261932373, "step": 5939, "token_acc": 0.8649148778682457 }, { "epoch": 0.3205093616791669, "grad_norm": 0.3512827157974243, "learning_rate": 1.5890710733744975e-05, "loss": 0.38001471757888794, "step": 5940, "token_acc": 0.8681858802502234 }, { "epoch": 0.3205633194841634, "grad_norm": 0.39683207869529724, "learning_rate": 1.5889298471554824e-05, "loss": 0.3684995174407959, "step": 5941, "token_acc": 0.87662424648359 }, { "epoch": 0.3206172772891599, "grad_norm": 0.3277168869972229, "learning_rate": 1.5887886029507042e-05, "loss": 0.3395671248435974, "step": 5942, "token_acc": 0.8783375619185695 }, { "epoch": 0.32067123509415635, "grad_norm": 0.41986918449401855, "learning_rate": 1.5886473407644764e-05, "loss": 0.39143243432044983, "step": 5943, "token_acc": 0.8639621559633027 }, { "epoch": 0.32072519289915286, "grad_norm": 0.36250799894332886, "learning_rate": 1.5885060606011127e-05, "loss": 0.38422316312789917, "step": 5944, "token_acc": 0.8692625368731564 }, { "epoch": 0.32077915070414936, "grad_norm": 0.354805588722229, "learning_rate": 1.5883647624649282e-05, "loss": 0.36794212460517883, "step": 5945, "token_acc": 0.8698869188887338 }, { "epoch": 0.32083310850914587, "grad_norm": 0.4246165454387665, "learning_rate": 1.5882234463602384e-05, "loss": 0.4402647018432617, "step": 5946, "token_acc": 0.8509819021948402 }, { "epoch": 0.3208870663141423, "grad_norm": 0.37465259432792664, "learning_rate": 1.588082112291358e-05, "loss": 0.4608752727508545, "step": 5947, "token_acc": 0.8459893048128342 }, { "epoch": 0.32094102411913883, "grad_norm": 0.27590540051460266, "learning_rate": 1.5879407602626044e-05, "loss": 0.4317491054534912, "step": 5948, "token_acc": 0.8537812398571892 }, { "epoch": 0.32099498192413534, "grad_norm": 0.33603814244270325, "learning_rate": 1.587799390278294e-05, "loss": 0.41255101561546326, "step": 5949, "token_acc": 0.8558708414872799 }, { "epoch": 0.32104893972913184, "grad_norm": 0.384237140417099, "learning_rate": 1.5876580023427436e-05, "loss": 0.40569937229156494, "step": 5950, "token_acc": 0.8590761223162003 }, { "epoch": 0.3211028975341283, "grad_norm": 0.35519689321517944, "learning_rate": 1.5875165964602724e-05, "loss": 0.4492208659648895, "step": 5951, "token_acc": 0.8513790196314585 }, { "epoch": 0.3211568553391248, "grad_norm": 0.32561859488487244, "learning_rate": 1.587375172635198e-05, "loss": 0.36847802996635437, "step": 5952, "token_acc": 0.8697135892748324 }, { "epoch": 0.3212108131441213, "grad_norm": 0.5206959843635559, "learning_rate": 1.5872337308718398e-05, "loss": 0.4214116334915161, "step": 5953, "token_acc": 0.8547219770520741 }, { "epoch": 0.3212647709491178, "grad_norm": 0.35106080770492554, "learning_rate": 1.5870922711745175e-05, "loss": 0.3466103971004486, "step": 5954, "token_acc": 0.8823745860419477 }, { "epoch": 0.32131872875411427, "grad_norm": 0.32648250460624695, "learning_rate": 1.5869507935475506e-05, "loss": 0.3835262060165405, "step": 5955, "token_acc": 0.8627176814541757 }, { "epoch": 0.3213726865591108, "grad_norm": 0.4132806956768036, "learning_rate": 1.5868092979952603e-05, "loss": 0.42940860986709595, "step": 5956, "token_acc": 0.8503551696921863 }, { "epoch": 0.3214266443641073, "grad_norm": 0.43460822105407715, "learning_rate": 1.586667784521968e-05, "loss": 0.4028126299381256, "step": 5957, "token_acc": 0.8656989853438557 }, { "epoch": 0.32148060216910374, "grad_norm": 0.3378336429595947, "learning_rate": 1.586526253131995e-05, "loss": 0.38545477390289307, "step": 5958, "token_acc": 0.8646806362029791 }, { "epoch": 0.32153455997410024, "grad_norm": 0.4502687454223633, "learning_rate": 1.5863847038296644e-05, "loss": 0.3821508586406708, "step": 5959, "token_acc": 0.8623130127070935 }, { "epoch": 0.32158851777909675, "grad_norm": 0.4653799533843994, "learning_rate": 1.5862431366192977e-05, "loss": 0.40280550718307495, "step": 5960, "token_acc": 0.8660229330114665 }, { "epoch": 0.32164247558409326, "grad_norm": 0.38953542709350586, "learning_rate": 1.5861015515052197e-05, "loss": 0.42029452323913574, "step": 5961, "token_acc": 0.8580697485806975 }, { "epoch": 0.3216964333890897, "grad_norm": 0.4617539048194885, "learning_rate": 1.585959948491754e-05, "loss": 0.41775089502334595, "step": 5962, "token_acc": 0.8590547179400305 }, { "epoch": 0.3217503911940862, "grad_norm": 0.46823734045028687, "learning_rate": 1.5858183275832244e-05, "loss": 0.38330337405204773, "step": 5963, "token_acc": 0.8716426350014136 }, { "epoch": 0.3218043489990827, "grad_norm": 0.4722985625267029, "learning_rate": 1.585676688783957e-05, "loss": 0.39282679557800293, "step": 5964, "token_acc": 0.8624338624338624 }, { "epoch": 0.32185830680407923, "grad_norm": 0.3639506995677948, "learning_rate": 1.585535032098277e-05, "loss": 0.38833850622177124, "step": 5965, "token_acc": 0.8654319771451089 }, { "epoch": 0.3219122646090757, "grad_norm": 0.36691588163375854, "learning_rate": 1.5853933575305104e-05, "loss": 0.40429770946502686, "step": 5966, "token_acc": 0.8664475649835673 }, { "epoch": 0.3219662224140722, "grad_norm": 0.44778895378112793, "learning_rate": 1.5852516650849842e-05, "loss": 0.38819119334220886, "step": 5967, "token_acc": 0.8688242875554919 }, { "epoch": 0.3220201802190687, "grad_norm": 0.4506605267524719, "learning_rate": 1.585109954766025e-05, "loss": 0.39288103580474854, "step": 5968, "token_acc": 0.8615823449932443 }, { "epoch": 0.3220741380240652, "grad_norm": 0.511873185634613, "learning_rate": 1.5849682265779612e-05, "loss": 0.3756871521472931, "step": 5969, "token_acc": 0.861176047473476 }, { "epoch": 0.32212809582906166, "grad_norm": 0.38535454869270325, "learning_rate": 1.584826480525121e-05, "loss": 0.38578012585639954, "step": 5970, "token_acc": 0.8725761772853186 }, { "epoch": 0.32218205363405816, "grad_norm": 0.4319418668746948, "learning_rate": 1.5846847166118337e-05, "loss": 0.3754861354827881, "step": 5971, "token_acc": 0.8724039460020768 }, { "epoch": 0.32223601143905467, "grad_norm": 0.400479257106781, "learning_rate": 1.584542934842428e-05, "loss": 0.34706422686576843, "step": 5972, "token_acc": 0.875575221238938 }, { "epoch": 0.3222899692440512, "grad_norm": 0.38021859526634216, "learning_rate": 1.5844011352212342e-05, "loss": 0.42552095651626587, "step": 5973, "token_acc": 0.8546412300683371 }, { "epoch": 0.32234392704904763, "grad_norm": 0.22801846265792847, "learning_rate": 1.5842593177525828e-05, "loss": 0.404805451631546, "step": 5974, "token_acc": 0.8608912952936276 }, { "epoch": 0.32239788485404414, "grad_norm": 0.41486015915870667, "learning_rate": 1.584117482440805e-05, "loss": 0.3476405441761017, "step": 5975, "token_acc": 0.8757931472081218 }, { "epoch": 0.32245184265904064, "grad_norm": 0.4267740249633789, "learning_rate": 1.583975629290232e-05, "loss": 0.41452300548553467, "step": 5976, "token_acc": 0.8565036259836445 }, { "epoch": 0.3225058004640371, "grad_norm": 0.3618970811367035, "learning_rate": 1.5838337583051966e-05, "loss": 0.3858674466609955, "step": 5977, "token_acc": 0.8632105599318715 }, { "epoch": 0.3225597582690336, "grad_norm": 0.3738276958465576, "learning_rate": 1.583691869490031e-05, "loss": 0.3861120045185089, "step": 5978, "token_acc": 0.8641666666666666 }, { "epoch": 0.3226137160740301, "grad_norm": 0.45609715580940247, "learning_rate": 1.5835499628490686e-05, "loss": 0.3554677963256836, "step": 5979, "token_acc": 0.8752849977200182 }, { "epoch": 0.3226676738790266, "grad_norm": 0.44331714510917664, "learning_rate": 1.583408038386643e-05, "loss": 0.3574189841747284, "step": 5980, "token_acc": 0.8726456099681252 }, { "epoch": 0.32272163168402307, "grad_norm": 0.3740033805370331, "learning_rate": 1.5832660961070893e-05, "loss": 0.35400840640068054, "step": 5981, "token_acc": 0.8735273735273735 }, { "epoch": 0.3227755894890196, "grad_norm": 0.41710972785949707, "learning_rate": 1.5831241360147415e-05, "loss": 0.4243120849132538, "step": 5982, "token_acc": 0.852759835584263 }, { "epoch": 0.3228295472940161, "grad_norm": 0.42463210225105286, "learning_rate": 1.5829821581139354e-05, "loss": 0.428408145904541, "step": 5983, "token_acc": 0.8513437849944009 }, { "epoch": 0.3228835050990126, "grad_norm": 0.33903688192367554, "learning_rate": 1.582840162409007e-05, "loss": 0.3487148880958557, "step": 5984, "token_acc": 0.8778040141676505 }, { "epoch": 0.32293746290400904, "grad_norm": 0.40380144119262695, "learning_rate": 1.5826981489042926e-05, "loss": 0.4370298683643341, "step": 5985, "token_acc": 0.8484950963814677 }, { "epoch": 0.32299142070900555, "grad_norm": 0.4360674321651459, "learning_rate": 1.5825561176041293e-05, "loss": 0.4680197238922119, "step": 5986, "token_acc": 0.8430583501006036 }, { "epoch": 0.32304537851400206, "grad_norm": 0.424159973859787, "learning_rate": 1.582414068512855e-05, "loss": 0.4302065670490265, "step": 5987, "token_acc": 0.8547297297297297 }, { "epoch": 0.32309933631899856, "grad_norm": 0.40589597821235657, "learning_rate": 1.5822720016348076e-05, "loss": 0.36530303955078125, "step": 5988, "token_acc": 0.8683936281077862 }, { "epoch": 0.323153294123995, "grad_norm": 0.37764817476272583, "learning_rate": 1.5821299169743256e-05, "loss": 0.369202584028244, "step": 5989, "token_acc": 0.8686624203821656 }, { "epoch": 0.3232072519289915, "grad_norm": 0.3100883960723877, "learning_rate": 1.5819878145357485e-05, "loss": 0.40924930572509766, "step": 5990, "token_acc": 0.8620464630644741 }, { "epoch": 0.32326120973398803, "grad_norm": 0.4593884348869324, "learning_rate": 1.581845694323416e-05, "loss": 0.4307205379009247, "step": 5991, "token_acc": 0.8596607914865314 }, { "epoch": 0.32331516753898454, "grad_norm": 0.39656955003738403, "learning_rate": 1.5817035563416684e-05, "loss": 0.399291455745697, "step": 5992, "token_acc": 0.8616824464033083 }, { "epoch": 0.323369125343981, "grad_norm": 0.47779127955436707, "learning_rate": 1.5815614005948465e-05, "loss": 0.4199582040309906, "step": 5993, "token_acc": 0.854542144547606 }, { "epoch": 0.3234230831489775, "grad_norm": 0.2840268611907959, "learning_rate": 1.581419227087292e-05, "loss": 0.37916046380996704, "step": 5994, "token_acc": 0.8691011958338691 }, { "epoch": 0.323477040953974, "grad_norm": 0.4277138113975525, "learning_rate": 1.5812770358233466e-05, "loss": 0.42542773485183716, "step": 5995, "token_acc": 0.8519122896481387 }, { "epoch": 0.3235309987589705, "grad_norm": 0.4228675067424774, "learning_rate": 1.5811348268073524e-05, "loss": 0.43696892261505127, "step": 5996, "token_acc": 0.854160363086233 }, { "epoch": 0.32358495656396696, "grad_norm": 0.45172634720802307, "learning_rate": 1.5809926000436534e-05, "loss": 0.3858407139778137, "step": 5997, "token_acc": 0.8632447518505035 }, { "epoch": 0.32363891436896347, "grad_norm": 0.4951530992984772, "learning_rate": 1.5808503555365925e-05, "loss": 0.3802943825721741, "step": 5998, "token_acc": 0.8691974822974036 }, { "epoch": 0.32369287217396, "grad_norm": 0.270114928483963, "learning_rate": 1.5807080932905136e-05, "loss": 0.3673822283744812, "step": 5999, "token_acc": 0.8761778563015312 }, { "epoch": 0.32374682997895643, "grad_norm": 0.4562700390815735, "learning_rate": 1.580565813309762e-05, "loss": 0.42791613936424255, "step": 6000, "token_acc": 0.858997861289337 }, { "epoch": 0.32380078778395294, "grad_norm": 0.495440274477005, "learning_rate": 1.5804235155986822e-05, "loss": 0.4392363727092743, "step": 6001, "token_acc": 0.8481049142527741 }, { "epoch": 0.32385474558894944, "grad_norm": 0.509390652179718, "learning_rate": 1.58028120016162e-05, "loss": 0.47920095920562744, "step": 6002, "token_acc": 0.8421327076000521 }, { "epoch": 0.32390870339394595, "grad_norm": 0.36189112067222595, "learning_rate": 1.5801388670029226e-05, "loss": 0.3577880859375, "step": 6003, "token_acc": 0.8762400390307367 }, { "epoch": 0.3239626611989424, "grad_norm": 0.3849181830883026, "learning_rate": 1.5799965161269363e-05, "loss": 0.3551555573940277, "step": 6004, "token_acc": 0.8740401454937357 }, { "epoch": 0.3240166190039389, "grad_norm": 0.4039961099624634, "learning_rate": 1.5798541475380077e-05, "loss": 0.4030780494213104, "step": 6005, "token_acc": 0.8588455246552933 }, { "epoch": 0.3240705768089354, "grad_norm": 0.3958704173564911, "learning_rate": 1.5797117612404856e-05, "loss": 0.4032195210456848, "step": 6006, "token_acc": 0.8618996286980477 }, { "epoch": 0.3241245346139319, "grad_norm": 0.48631906509399414, "learning_rate": 1.5795693572387182e-05, "loss": 0.3420588970184326, "step": 6007, "token_acc": 0.8772063010058835 }, { "epoch": 0.3241784924189284, "grad_norm": 0.29886186122894287, "learning_rate": 1.5794269355370546e-05, "loss": 0.3893965482711792, "step": 6008, "token_acc": 0.8679805615550756 }, { "epoch": 0.3242324502239249, "grad_norm": 0.39779648184776306, "learning_rate": 1.579284496139844e-05, "loss": 0.4194830060005188, "step": 6009, "token_acc": 0.8587547012118679 }, { "epoch": 0.3242864080289214, "grad_norm": 0.3615383207798004, "learning_rate": 1.5791420390514366e-05, "loss": 0.46268606185913086, "step": 6010, "token_acc": 0.8394361633182112 }, { "epoch": 0.3243403658339179, "grad_norm": 0.34994274377822876, "learning_rate": 1.5789995642761834e-05, "loss": 0.3731498718261719, "step": 6011, "token_acc": 0.8703590992087644 }, { "epoch": 0.32439432363891435, "grad_norm": 0.37665796279907227, "learning_rate": 1.5788570718184346e-05, "loss": 0.35972315073013306, "step": 6012, "token_acc": 0.8720990873533246 }, { "epoch": 0.32444828144391086, "grad_norm": 0.4165859520435333, "learning_rate": 1.578714561682543e-05, "loss": 0.41506367921829224, "step": 6013, "token_acc": 0.8566390885663909 }, { "epoch": 0.32450223924890736, "grad_norm": 0.44820672273635864, "learning_rate": 1.57857203387286e-05, "loss": 0.3447751998901367, "step": 6014, "token_acc": 0.8791268266281796 }, { "epoch": 0.32455619705390387, "grad_norm": 0.26208582520484924, "learning_rate": 1.5784294883937388e-05, "loss": 0.3259890079498291, "step": 6015, "token_acc": 0.8836065573770492 }, { "epoch": 0.3246101548589003, "grad_norm": 0.34694162011146545, "learning_rate": 1.5782869252495324e-05, "loss": 0.3529561758041382, "step": 6016, "token_acc": 0.8737260092653871 }, { "epoch": 0.32466411266389683, "grad_norm": 0.5498223900794983, "learning_rate": 1.5781443444445948e-05, "loss": 0.4670464098453522, "step": 6017, "token_acc": 0.8450605369363046 }, { "epoch": 0.32471807046889334, "grad_norm": 0.3377950191497803, "learning_rate": 1.5780017459832802e-05, "loss": 0.39083147048950195, "step": 6018, "token_acc": 0.8689053537284895 }, { "epoch": 0.32477202827388985, "grad_norm": 0.33254244923591614, "learning_rate": 1.577859129869944e-05, "loss": 0.3606938123703003, "step": 6019, "token_acc": 0.8773203359950223 }, { "epoch": 0.3248259860788863, "grad_norm": 0.5357593297958374, "learning_rate": 1.5777164961089417e-05, "loss": 0.39400410652160645, "step": 6020, "token_acc": 0.8656868748724229 }, { "epoch": 0.3248799438838828, "grad_norm": 0.34619659185409546, "learning_rate": 1.5775738447046283e-05, "loss": 0.3319009244441986, "step": 6021, "token_acc": 0.8759393166028641 }, { "epoch": 0.3249339016888793, "grad_norm": 0.3686433434486389, "learning_rate": 1.5774311756613612e-05, "loss": 0.40181320905685425, "step": 6022, "token_acc": 0.8626325088339223 }, { "epoch": 0.32498785949387576, "grad_norm": 0.4081401228904724, "learning_rate": 1.577288488983497e-05, "loss": 0.43177568912506104, "step": 6023, "token_acc": 0.8549993532531367 }, { "epoch": 0.32504181729887227, "grad_norm": 0.3653571307659149, "learning_rate": 1.5771457846753943e-05, "loss": 0.3854018449783325, "step": 6024, "token_acc": 0.869088669950739 }, { "epoch": 0.3250957751038688, "grad_norm": 0.5255946516990662, "learning_rate": 1.5770030627414097e-05, "loss": 0.36258184909820557, "step": 6025, "token_acc": 0.8755938764736935 }, { "epoch": 0.3251497329088653, "grad_norm": 0.4548529088497162, "learning_rate": 1.5768603231859035e-05, "loss": 0.47867828607559204, "step": 6026, "token_acc": 0.8383213509446581 }, { "epoch": 0.32520369071386174, "grad_norm": 0.31767138838768005, "learning_rate": 1.5767175660132338e-05, "loss": 0.37301188707351685, "step": 6027, "token_acc": 0.8659147869674185 }, { "epoch": 0.32525764851885824, "grad_norm": 0.4479522407054901, "learning_rate": 1.5765747912277604e-05, "loss": 0.43681788444519043, "step": 6028, "token_acc": 0.8530573335149122 }, { "epoch": 0.32531160632385475, "grad_norm": 0.34851783514022827, "learning_rate": 1.5764319988338442e-05, "loss": 0.37287798523902893, "step": 6029, "token_acc": 0.8673317450863609 }, { "epoch": 0.32536556412885126, "grad_norm": 0.4045201241970062, "learning_rate": 1.5762891888358458e-05, "loss": 0.3404644727706909, "step": 6030, "token_acc": 0.8771587743732591 }, { "epoch": 0.3254195219338477, "grad_norm": 0.32190123200416565, "learning_rate": 1.5761463612381263e-05, "loss": 0.35942813754081726, "step": 6031, "token_acc": 0.8821557849577691 }, { "epoch": 0.3254734797388442, "grad_norm": 0.44873738288879395, "learning_rate": 1.576003516045048e-05, "loss": 0.42825087904930115, "step": 6032, "token_acc": 0.8539670371789958 }, { "epoch": 0.3255274375438407, "grad_norm": 0.40045300126075745, "learning_rate": 1.5758606532609735e-05, "loss": 0.4109609127044678, "step": 6033, "token_acc": 0.8587257617728532 }, { "epoch": 0.32558139534883723, "grad_norm": 0.39094340801239014, "learning_rate": 1.5757177728902652e-05, "loss": 0.33604562282562256, "step": 6034, "token_acc": 0.8820480928689884 }, { "epoch": 0.3256353531538337, "grad_norm": 0.3837950527667999, "learning_rate": 1.575574874937287e-05, "loss": 0.2839737832546234, "step": 6035, "token_acc": 0.8917130395548203 }, { "epoch": 0.3256893109588302, "grad_norm": 0.3960666358470917, "learning_rate": 1.5754319594064028e-05, "loss": 0.42123353481292725, "step": 6036, "token_acc": 0.8541767262192178 }, { "epoch": 0.3257432687638267, "grad_norm": 0.46976742148399353, "learning_rate": 1.5752890263019772e-05, "loss": 0.43212759494781494, "step": 6037, "token_acc": 0.8548528125910814 }, { "epoch": 0.3257972265688232, "grad_norm": 0.45712780952453613, "learning_rate": 1.575146075628376e-05, "loss": 0.3086888790130615, "step": 6038, "token_acc": 0.8865153538050734 }, { "epoch": 0.32585118437381966, "grad_norm": 0.4024363160133362, "learning_rate": 1.5750031073899638e-05, "loss": 0.3914533257484436, "step": 6039, "token_acc": 0.8636129696345857 }, { "epoch": 0.32590514217881617, "grad_norm": 0.4790157973766327, "learning_rate": 1.5748601215911076e-05, "loss": 0.36999571323394775, "step": 6040, "token_acc": 0.8697253337425196 }, { "epoch": 0.3259590999838127, "grad_norm": 0.35671231150627136, "learning_rate": 1.5747171182361736e-05, "loss": 0.34400051832199097, "step": 6041, "token_acc": 0.8741453885865774 }, { "epoch": 0.3260130577888091, "grad_norm": 0.5033445954322815, "learning_rate": 1.5745740973295297e-05, "loss": 0.40041565895080566, "step": 6042, "token_acc": 0.8620152624201838 }, { "epoch": 0.32606701559380563, "grad_norm": 0.4869259297847748, "learning_rate": 1.5744310588755432e-05, "loss": 0.4015086889266968, "step": 6043, "token_acc": 0.8603925885158686 }, { "epoch": 0.32612097339880214, "grad_norm": 0.3962436318397522, "learning_rate": 1.5742880028785825e-05, "loss": 0.38894781470298767, "step": 6044, "token_acc": 0.8660082304526749 }, { "epoch": 0.32617493120379865, "grad_norm": 0.47384342551231384, "learning_rate": 1.574144929343017e-05, "loss": 0.4029982089996338, "step": 6045, "token_acc": 0.8619357105216933 }, { "epoch": 0.3262288890087951, "grad_norm": 0.38270777463912964, "learning_rate": 1.5740018382732153e-05, "loss": 0.36979424953460693, "step": 6046, "token_acc": 0.8720618394068465 }, { "epoch": 0.3262828468137916, "grad_norm": 0.4212479591369629, "learning_rate": 1.5738587296735482e-05, "loss": 0.34623637795448303, "step": 6047, "token_acc": 0.8791062197765549 }, { "epoch": 0.3263368046187881, "grad_norm": 0.34482860565185547, "learning_rate": 1.5737156035483855e-05, "loss": 0.35665807127952576, "step": 6048, "token_acc": 0.8726917880198168 }, { "epoch": 0.3263907624237846, "grad_norm": 0.454784631729126, "learning_rate": 1.573572459902099e-05, "loss": 0.40317708253860474, "step": 6049, "token_acc": 0.8643176951528129 }, { "epoch": 0.32644472022878107, "grad_norm": 0.3841801881790161, "learning_rate": 1.5734292987390594e-05, "loss": 0.408439576625824, "step": 6050, "token_acc": 0.8654390934844193 }, { "epoch": 0.3264986780337776, "grad_norm": 0.3686581552028656, "learning_rate": 1.5732861200636396e-05, "loss": 0.3457527160644531, "step": 6051, "token_acc": 0.8789877300613497 }, { "epoch": 0.3265526358387741, "grad_norm": 0.4558446407318115, "learning_rate": 1.5731429238802118e-05, "loss": 0.35189288854599, "step": 6052, "token_acc": 0.8724884080370943 }, { "epoch": 0.3266065936437706, "grad_norm": 0.40830355882644653, "learning_rate": 1.5729997101931493e-05, "loss": 0.3292919099330902, "step": 6053, "token_acc": 0.8860582306830908 }, { "epoch": 0.32666055144876704, "grad_norm": 0.5309012532234192, "learning_rate": 1.5728564790068252e-05, "loss": 0.39930975437164307, "step": 6054, "token_acc": 0.8624247635425624 }, { "epoch": 0.32671450925376355, "grad_norm": 0.48918285965919495, "learning_rate": 1.572713230325615e-05, "loss": 0.3946782350540161, "step": 6055, "token_acc": 0.8627289226200296 }, { "epoch": 0.32676846705876006, "grad_norm": 0.5658465623855591, "learning_rate": 1.5725699641538924e-05, "loss": 0.3637270927429199, "step": 6056, "token_acc": 0.8724465558194774 }, { "epoch": 0.32682242486375657, "grad_norm": 0.342357337474823, "learning_rate": 1.572426680496033e-05, "loss": 0.4009523391723633, "step": 6057, "token_acc": 0.8632053328325979 }, { "epoch": 0.326876382668753, "grad_norm": 0.2896624207496643, "learning_rate": 1.5722833793564133e-05, "loss": 0.4076293706893921, "step": 6058, "token_acc": 0.8642474717430101 }, { "epoch": 0.3269303404737495, "grad_norm": 0.42106893658638, "learning_rate": 1.5721400607394084e-05, "loss": 0.38848426938056946, "step": 6059, "token_acc": 0.86553412035704 }, { "epoch": 0.32698429827874603, "grad_norm": 0.3736353814601898, "learning_rate": 1.5719967246493962e-05, "loss": 0.4749751091003418, "step": 6060, "token_acc": 0.8382442841423157 }, { "epoch": 0.32703825608374254, "grad_norm": 0.3925894796848297, "learning_rate": 1.5718533710907538e-05, "loss": 0.3880513310432434, "step": 6061, "token_acc": 0.8666254061581309 }, { "epoch": 0.327092213888739, "grad_norm": 0.3947172462940216, "learning_rate": 1.5717100000678594e-05, "loss": 0.37422236800193787, "step": 6062, "token_acc": 0.8715737603942101 }, { "epoch": 0.3271461716937355, "grad_norm": 0.3779109716415405, "learning_rate": 1.5715666115850915e-05, "loss": 0.40622249245643616, "step": 6063, "token_acc": 0.8604790419161676 }, { "epoch": 0.327200129498732, "grad_norm": 0.4540883004665375, "learning_rate": 1.571423205646829e-05, "loss": 0.42851537466049194, "step": 6064, "token_acc": 0.847559826644055 }, { "epoch": 0.32725408730372846, "grad_norm": 0.3129526972770691, "learning_rate": 1.571279782257451e-05, "loss": 0.3606293201446533, "step": 6065, "token_acc": 0.871694417238002 }, { "epoch": 0.32730804510872497, "grad_norm": 0.4301411807537079, "learning_rate": 1.5711363414213384e-05, "loss": 0.448399156332016, "step": 6066, "token_acc": 0.8485131558461778 }, { "epoch": 0.3273620029137215, "grad_norm": 0.440178781747818, "learning_rate": 1.5709928831428717e-05, "loss": 0.4286496639251709, "step": 6067, "token_acc": 0.8503926347143244 }, { "epoch": 0.327415960718718, "grad_norm": 0.4355708956718445, "learning_rate": 1.570849407426432e-05, "loss": 0.36189916729927063, "step": 6068, "token_acc": 0.8719475878499107 }, { "epoch": 0.32746991852371443, "grad_norm": 0.4682629108428955, "learning_rate": 1.5707059142764007e-05, "loss": 0.38700616359710693, "step": 6069, "token_acc": 0.8689281732759994 }, { "epoch": 0.32752387632871094, "grad_norm": 0.3750828504562378, "learning_rate": 1.5705624036971602e-05, "loss": 0.363209068775177, "step": 6070, "token_acc": 0.8742601898380793 }, { "epoch": 0.32757783413370745, "grad_norm": 0.3388341963291168, "learning_rate": 1.5704188756930937e-05, "loss": 0.38664424419403076, "step": 6071, "token_acc": 0.8633879781420765 }, { "epoch": 0.32763179193870395, "grad_norm": 0.46147456765174866, "learning_rate": 1.5702753302685838e-05, "loss": 0.4577919840812683, "step": 6072, "token_acc": 0.8512654282508415 }, { "epoch": 0.3276857497437004, "grad_norm": 0.5364399552345276, "learning_rate": 1.570131767428015e-05, "loss": 0.38158079981803894, "step": 6073, "token_acc": 0.8655321988655322 }, { "epoch": 0.3277397075486969, "grad_norm": 0.37158897519111633, "learning_rate": 1.569988187175771e-05, "loss": 0.4079086184501648, "step": 6074, "token_acc": 0.858831615120275 }, { "epoch": 0.3277936653536934, "grad_norm": 0.3503885865211487, "learning_rate": 1.5698445895162374e-05, "loss": 0.43887412548065186, "step": 6075, "token_acc": 0.8544127120463818 }, { "epoch": 0.3278476231586899, "grad_norm": 0.3475077748298645, "learning_rate": 1.5697009744537993e-05, "loss": 0.36968672275543213, "step": 6076, "token_acc": 0.8667625209681284 }, { "epoch": 0.3279015809636864, "grad_norm": 0.42982324957847595, "learning_rate": 1.5695573419928427e-05, "loss": 0.4152023196220398, "step": 6077, "token_acc": 0.8541033434650456 }, { "epoch": 0.3279555387686829, "grad_norm": 0.3747401833534241, "learning_rate": 1.569413692137754e-05, "loss": 0.404205322265625, "step": 6078, "token_acc": 0.864432080336545 }, { "epoch": 0.3280094965736794, "grad_norm": 0.4067721664905548, "learning_rate": 1.5692700248929203e-05, "loss": 0.3525908589363098, "step": 6079, "token_acc": 0.8773966349289162 }, { "epoch": 0.3280634543786759, "grad_norm": 0.47527146339416504, "learning_rate": 1.569126340262729e-05, "loss": 0.4329894781112671, "step": 6080, "token_acc": 0.8492965367965368 }, { "epoch": 0.32811741218367235, "grad_norm": 0.360487699508667, "learning_rate": 1.5689826382515686e-05, "loss": 0.3891109824180603, "step": 6081, "token_acc": 0.8702693187819046 }, { "epoch": 0.32817136998866886, "grad_norm": 0.40034911036491394, "learning_rate": 1.5688389188638276e-05, "loss": 0.3875095844268799, "step": 6082, "token_acc": 0.8665061530230069 }, { "epoch": 0.32822532779366537, "grad_norm": 0.4131510555744171, "learning_rate": 1.5686951821038946e-05, "loss": 0.3897797465324402, "step": 6083, "token_acc": 0.862096015061186 }, { "epoch": 0.3282792855986619, "grad_norm": 0.4815777838230133, "learning_rate": 1.5685514279761602e-05, "loss": 0.41301703453063965, "step": 6084, "token_acc": 0.8591065292096219 }, { "epoch": 0.3283332434036583, "grad_norm": 0.4335471987724304, "learning_rate": 1.568407656485014e-05, "loss": 0.39356252551078796, "step": 6085, "token_acc": 0.8628631642330272 }, { "epoch": 0.32838720120865483, "grad_norm": 0.34750232100486755, "learning_rate": 1.5682638676348465e-05, "loss": 0.3895513117313385, "step": 6086, "token_acc": 0.8625515615792575 }, { "epoch": 0.32844115901365134, "grad_norm": 0.42539483308792114, "learning_rate": 1.56812006143005e-05, "loss": 0.4645387828350067, "step": 6087, "token_acc": 0.8454227113556778 }, { "epoch": 0.3284951168186478, "grad_norm": 0.48071280121803284, "learning_rate": 1.5679762378750153e-05, "loss": 0.4168333411216736, "step": 6088, "token_acc": 0.8554061784897025 }, { "epoch": 0.3285490746236443, "grad_norm": 0.4293610155582428, "learning_rate": 1.5678323969741348e-05, "loss": 0.391286700963974, "step": 6089, "token_acc": 0.8639100762745885 }, { "epoch": 0.3286030324286408, "grad_norm": 0.3310994505882263, "learning_rate": 1.567688538731802e-05, "loss": 0.3888915181159973, "step": 6090, "token_acc": 0.8661166764879199 }, { "epoch": 0.3286569902336373, "grad_norm": 0.5155880451202393, "learning_rate": 1.5675446631524098e-05, "loss": 0.34055382013320923, "step": 6091, "token_acc": 0.8813921835504569 }, { "epoch": 0.32871094803863377, "grad_norm": 0.40721604228019714, "learning_rate": 1.5674007702403524e-05, "loss": 0.386488139629364, "step": 6092, "token_acc": 0.8654752233956133 }, { "epoch": 0.3287649058436303, "grad_norm": 0.3284183144569397, "learning_rate": 1.567256860000024e-05, "loss": 0.3938030004501343, "step": 6093, "token_acc": 0.8672279792746114 }, { "epoch": 0.3288188636486268, "grad_norm": 0.4509640038013458, "learning_rate": 1.5671129324358195e-05, "loss": 0.41731196641921997, "step": 6094, "token_acc": 0.860718492343934 }, { "epoch": 0.3288728214536233, "grad_norm": 0.30853599309921265, "learning_rate": 1.566968987552135e-05, "loss": 0.40582728385925293, "step": 6095, "token_acc": 0.8597512267488303 }, { "epoch": 0.32892677925861974, "grad_norm": 0.4695379137992859, "learning_rate": 1.5668250253533658e-05, "loss": 0.4242173433303833, "step": 6096, "token_acc": 0.8523282318463041 }, { "epoch": 0.32898073706361625, "grad_norm": 0.37325751781463623, "learning_rate": 1.566681045843909e-05, "loss": 0.47242891788482666, "step": 6097, "token_acc": 0.8416996247845047 }, { "epoch": 0.32903469486861275, "grad_norm": 0.35524848103523254, "learning_rate": 1.5665370490281615e-05, "loss": 0.40609121322631836, "step": 6098, "token_acc": 0.8583416142210445 }, { "epoch": 0.32908865267360926, "grad_norm": 0.37198373675346375, "learning_rate": 1.5663930349105208e-05, "loss": 0.3845192790031433, "step": 6099, "token_acc": 0.8674831598285364 }, { "epoch": 0.3291426104786057, "grad_norm": 0.410907506942749, "learning_rate": 1.5662490034953855e-05, "loss": 0.4007505178451538, "step": 6100, "token_acc": 0.8613068545803971 }, { "epoch": 0.3291965682836022, "grad_norm": 0.4050576388835907, "learning_rate": 1.5661049547871538e-05, "loss": 0.42059600353240967, "step": 6101, "token_acc": 0.8556690177913464 }, { "epoch": 0.3292505260885987, "grad_norm": 0.3895135819911957, "learning_rate": 1.565960888790225e-05, "loss": 0.4026400148868561, "step": 6102, "token_acc": 0.8663071564992697 }, { "epoch": 0.32930448389359523, "grad_norm": 0.49425172805786133, "learning_rate": 1.565816805508999e-05, "loss": 0.397987425327301, "step": 6103, "token_acc": 0.8605584082156611 }, { "epoch": 0.3293584416985917, "grad_norm": 0.465758740901947, "learning_rate": 1.5656727049478758e-05, "loss": 0.40658387541770935, "step": 6104, "token_acc": 0.8631073341954554 }, { "epoch": 0.3294123995035882, "grad_norm": 0.3646135926246643, "learning_rate": 1.5655285871112567e-05, "loss": 0.42513173818588257, "step": 6105, "token_acc": 0.8554661743373335 }, { "epoch": 0.3294663573085847, "grad_norm": 0.4018796682357788, "learning_rate": 1.5653844520035427e-05, "loss": 0.3452290892601013, "step": 6106, "token_acc": 0.8769480959479604 }, { "epoch": 0.32952031511358115, "grad_norm": 0.41371092200279236, "learning_rate": 1.5652402996291355e-05, "loss": 0.4068920612335205, "step": 6107, "token_acc": 0.8603663003663004 }, { "epoch": 0.32957427291857766, "grad_norm": 0.4168853759765625, "learning_rate": 1.5650961299924375e-05, "loss": 0.40215200185775757, "step": 6108, "token_acc": 0.8637211303973605 }, { "epoch": 0.32962823072357417, "grad_norm": 0.5022510886192322, "learning_rate": 1.5649519430978517e-05, "loss": 0.3651827573776245, "step": 6109, "token_acc": 0.8717504332755632 }, { "epoch": 0.3296821885285707, "grad_norm": 0.27942049503326416, "learning_rate": 1.564807738949782e-05, "loss": 0.3236820101737976, "step": 6110, "token_acc": 0.8813162297824875 }, { "epoch": 0.3297361463335671, "grad_norm": 0.4331541359424591, "learning_rate": 1.5646635175526314e-05, "loss": 0.3729592561721802, "step": 6111, "token_acc": 0.8695976154992549 }, { "epoch": 0.32979010413856363, "grad_norm": 0.42140617966651917, "learning_rate": 1.564519278910805e-05, "loss": 0.37985822558403015, "step": 6112, "token_acc": 0.8640920295809368 }, { "epoch": 0.32984406194356014, "grad_norm": 0.35450661182403564, "learning_rate": 1.564375023028708e-05, "loss": 0.3852540850639343, "step": 6113, "token_acc": 0.8665882660394616 }, { "epoch": 0.32989801974855665, "grad_norm": 0.4033765494823456, "learning_rate": 1.5642307499107456e-05, "loss": 0.36737921833992004, "step": 6114, "token_acc": 0.8730025231286795 }, { "epoch": 0.3299519775535531, "grad_norm": 0.39024049043655396, "learning_rate": 1.5640864595613236e-05, "loss": 0.38333672285079956, "step": 6115, "token_acc": 0.8649056603773585 }, { "epoch": 0.3300059353585496, "grad_norm": 0.3515487313270569, "learning_rate": 1.5639421519848493e-05, "loss": 0.4408828914165497, "step": 6116, "token_acc": 0.8504702694085764 }, { "epoch": 0.3300598931635461, "grad_norm": 0.4219837784767151, "learning_rate": 1.563797827185729e-05, "loss": 0.4059346318244934, "step": 6117, "token_acc": 0.8606275902901125 }, { "epoch": 0.3301138509685426, "grad_norm": 0.3413779139518738, "learning_rate": 1.563653485168371e-05, "loss": 0.37318894267082214, "step": 6118, "token_acc": 0.8699639698657059 }, { "epoch": 0.3301678087735391, "grad_norm": 0.44829803705215454, "learning_rate": 1.563509125937183e-05, "loss": 0.4317174255847931, "step": 6119, "token_acc": 0.8555854452782103 }, { "epoch": 0.3302217665785356, "grad_norm": 0.39902856945991516, "learning_rate": 1.563364749496574e-05, "loss": 0.33324337005615234, "step": 6120, "token_acc": 0.8833459582296288 }, { "epoch": 0.3302757243835321, "grad_norm": 0.45090726017951965, "learning_rate": 1.563220355850953e-05, "loss": 0.397827684879303, "step": 6121, "token_acc": 0.8627165526277478 }, { "epoch": 0.3303296821885286, "grad_norm": 0.4375077188014984, "learning_rate": 1.56307594500473e-05, "loss": 0.3943265378475189, "step": 6122, "token_acc": 0.8680245073687697 }, { "epoch": 0.33038363999352505, "grad_norm": 0.3850187063217163, "learning_rate": 1.5629315169623156e-05, "loss": 0.3863564133644104, "step": 6123, "token_acc": 0.8651772957526952 }, { "epoch": 0.33043759779852155, "grad_norm": 0.558436930179596, "learning_rate": 1.5627870717281196e-05, "loss": 0.4218282103538513, "step": 6124, "token_acc": 0.8506972928630024 }, { "epoch": 0.33049155560351806, "grad_norm": 0.37181681394577026, "learning_rate": 1.562642609306554e-05, "loss": 0.40707942843437195, "step": 6125, "token_acc": 0.8644732541671206 }, { "epoch": 0.33054551340851457, "grad_norm": 0.37502968311309814, "learning_rate": 1.5624981297020304e-05, "loss": 0.3849424123764038, "step": 6126, "token_acc": 0.8670009811403031 }, { "epoch": 0.330599471213511, "grad_norm": 0.4541234076023102, "learning_rate": 1.5623536329189616e-05, "loss": 0.3614421486854553, "step": 6127, "token_acc": 0.868880573744933 }, { "epoch": 0.3306534290185075, "grad_norm": 0.38215547800064087, "learning_rate": 1.5622091189617598e-05, "loss": 0.4029240012168884, "step": 6128, "token_acc": 0.8599334394443641 }, { "epoch": 0.33070738682350403, "grad_norm": 0.4161466062068939, "learning_rate": 1.562064587834839e-05, "loss": 0.3414255976676941, "step": 6129, "token_acc": 0.8792850473522742 }, { "epoch": 0.3307613446285005, "grad_norm": 0.3777807354927063, "learning_rate": 1.561920039542613e-05, "loss": 0.36191627383232117, "step": 6130, "token_acc": 0.872356495468278 }, { "epoch": 0.330815302433497, "grad_norm": 0.40279945731163025, "learning_rate": 1.5617754740894958e-05, "loss": 0.4299907088279724, "step": 6131, "token_acc": 0.8549014838238871 }, { "epoch": 0.3308692602384935, "grad_norm": 0.4417631924152374, "learning_rate": 1.561630891479903e-05, "loss": 0.34634923934936523, "step": 6132, "token_acc": 0.8758451137062078 }, { "epoch": 0.33092321804349, "grad_norm": 0.30409446358680725, "learning_rate": 1.56148629171825e-05, "loss": 0.38280290365219116, "step": 6133, "token_acc": 0.8615580691413095 }, { "epoch": 0.33097717584848646, "grad_norm": 0.4023337960243225, "learning_rate": 1.5613416748089527e-05, "loss": 0.37938833236694336, "step": 6134, "token_acc": 0.8697019242862533 }, { "epoch": 0.33103113365348297, "grad_norm": 0.46211597323417664, "learning_rate": 1.5611970407564275e-05, "loss": 0.39028283953666687, "step": 6135, "token_acc": 0.866362807657247 }, { "epoch": 0.3310850914584795, "grad_norm": 0.4387299120426178, "learning_rate": 1.5610523895650923e-05, "loss": 0.3857547640800476, "step": 6136, "token_acc": 0.8645512999720436 }, { "epoch": 0.331139049263476, "grad_norm": 0.44737088680267334, "learning_rate": 1.5609077212393637e-05, "loss": 0.4254114627838135, "step": 6137, "token_acc": 0.8529683174407875 }, { "epoch": 0.33119300706847243, "grad_norm": 0.4190841019153595, "learning_rate": 1.5607630357836602e-05, "loss": 0.3668035864830017, "step": 6138, "token_acc": 0.8691523530233207 }, { "epoch": 0.33124696487346894, "grad_norm": 0.3756243586540222, "learning_rate": 1.5606183332024003e-05, "loss": 0.37938249111175537, "step": 6139, "token_acc": 0.8659294046356613 }, { "epoch": 0.33130092267846545, "grad_norm": 0.3822888135910034, "learning_rate": 1.5604736135000038e-05, "loss": 0.40152856707572937, "step": 6140, "token_acc": 0.8606323788845163 }, { "epoch": 0.33135488048346196, "grad_norm": 0.4559667706489563, "learning_rate": 1.5603288766808896e-05, "loss": 0.3987728953361511, "step": 6141, "token_acc": 0.8617749825296995 }, { "epoch": 0.3314088382884584, "grad_norm": 0.4878002107143402, "learning_rate": 1.5601841227494783e-05, "loss": 0.3601800799369812, "step": 6142, "token_acc": 0.8760228500849159 }, { "epoch": 0.3314627960934549, "grad_norm": 0.5056569576263428, "learning_rate": 1.560039351710191e-05, "loss": 0.3877386450767517, "step": 6143, "token_acc": 0.867198404785643 }, { "epoch": 0.3315167538984514, "grad_norm": 0.36294373869895935, "learning_rate": 1.5598945635674483e-05, "loss": 0.3862905502319336, "step": 6144, "token_acc": 0.8633559066967644 }, { "epoch": 0.33157071170344793, "grad_norm": 0.331657737493515, "learning_rate": 1.5597497583256723e-05, "loss": 0.3794271945953369, "step": 6145, "token_acc": 0.868047523919672 }, { "epoch": 0.3316246695084444, "grad_norm": 0.4652271866798401, "learning_rate": 1.5596049359892856e-05, "loss": 0.40339455008506775, "step": 6146, "token_acc": 0.8621425522833973 }, { "epoch": 0.3316786273134409, "grad_norm": 0.39490821957588196, "learning_rate": 1.5594600965627102e-05, "loss": 0.4227294325828552, "step": 6147, "token_acc": 0.854714475431607 }, { "epoch": 0.3317325851184374, "grad_norm": 0.39128991961479187, "learning_rate": 1.5593152400503704e-05, "loss": 0.386224627494812, "step": 6148, "token_acc": 0.8636478304742684 }, { "epoch": 0.33178654292343385, "grad_norm": 0.3250878155231476, "learning_rate": 1.55917036645669e-05, "loss": 0.3832477927207947, "step": 6149, "token_acc": 0.8682023771957506 }, { "epoch": 0.33184050072843035, "grad_norm": 0.3801826238632202, "learning_rate": 1.5590254757860927e-05, "loss": 0.3827892541885376, "step": 6150, "token_acc": 0.8614817638835939 }, { "epoch": 0.33189445853342686, "grad_norm": 0.2988666296005249, "learning_rate": 1.5588805680430036e-05, "loss": 0.40181106328964233, "step": 6151, "token_acc": 0.8620779559379481 }, { "epoch": 0.33194841633842337, "grad_norm": 0.43232491612434387, "learning_rate": 1.5587356432318486e-05, "loss": 0.39801695942878723, "step": 6152, "token_acc": 0.8611481975967957 }, { "epoch": 0.3320023741434198, "grad_norm": 0.4404945969581604, "learning_rate": 1.5585907013570534e-05, "loss": 0.37390267848968506, "step": 6153, "token_acc": 0.8740524005851842 }, { "epoch": 0.33205633194841633, "grad_norm": 0.40065285563468933, "learning_rate": 1.5584457424230446e-05, "loss": 0.4137987494468689, "step": 6154, "token_acc": 0.8571428571428571 }, { "epoch": 0.33211028975341284, "grad_norm": 0.36400899291038513, "learning_rate": 1.558300766434249e-05, "loss": 0.3323422372341156, "step": 6155, "token_acc": 0.8807759028544394 }, { "epoch": 0.33216424755840934, "grad_norm": 0.45277243852615356, "learning_rate": 1.5581557733950944e-05, "loss": 0.36298590898513794, "step": 6156, "token_acc": 0.8674033149171271 }, { "epoch": 0.3322182053634058, "grad_norm": 0.30102136731147766, "learning_rate": 1.5580107633100085e-05, "loss": 0.35230135917663574, "step": 6157, "token_acc": 0.8799035032515209 }, { "epoch": 0.3322721631684023, "grad_norm": 0.41408535838127136, "learning_rate": 1.5578657361834202e-05, "loss": 0.32783323526382446, "step": 6158, "token_acc": 0.8826882688268827 }, { "epoch": 0.3323261209733988, "grad_norm": 0.4490729570388794, "learning_rate": 1.5577206920197583e-05, "loss": 0.3989594578742981, "step": 6159, "token_acc": 0.8612784717119765 }, { "epoch": 0.3323800787783953, "grad_norm": 0.38474956154823303, "learning_rate": 1.5575756308234528e-05, "loss": 0.34675079584121704, "step": 6160, "token_acc": 0.8750898375736668 }, { "epoch": 0.33243403658339177, "grad_norm": 0.30003753304481506, "learning_rate": 1.5574305525989335e-05, "loss": 0.35534995794296265, "step": 6161, "token_acc": 0.8774733637747336 }, { "epoch": 0.3324879943883883, "grad_norm": 0.452779084444046, "learning_rate": 1.5572854573506312e-05, "loss": 0.31738191843032837, "step": 6162, "token_acc": 0.8882671480144404 }, { "epoch": 0.3325419521933848, "grad_norm": 0.3527013659477234, "learning_rate": 1.557140345082977e-05, "loss": 0.4043341875076294, "step": 6163, "token_acc": 0.857944580848825 }, { "epoch": 0.3325959099983813, "grad_norm": 0.4710487425327301, "learning_rate": 1.5569952158004025e-05, "loss": 0.39297282695770264, "step": 6164, "token_acc": 0.8610536367977081 }, { "epoch": 0.33264986780337774, "grad_norm": 0.49245503544807434, "learning_rate": 1.5568500695073402e-05, "loss": 0.3977418839931488, "step": 6165, "token_acc": 0.862561517224823 }, { "epoch": 0.33270382560837425, "grad_norm": 0.4248639643192291, "learning_rate": 1.5567049062082225e-05, "loss": 0.37621212005615234, "step": 6166, "token_acc": 0.869248291571754 }, { "epoch": 0.33275778341337076, "grad_norm": 0.4754016697406769, "learning_rate": 1.5565597259074826e-05, "loss": 0.37272751331329346, "step": 6167, "token_acc": 0.8682275541795665 }, { "epoch": 0.33281174121836726, "grad_norm": 0.3390653431415558, "learning_rate": 1.556414528609555e-05, "loss": 0.41679465770721436, "step": 6168, "token_acc": 0.858804780876494 }, { "epoch": 0.3328656990233637, "grad_norm": 0.41633111238479614, "learning_rate": 1.556269314318873e-05, "loss": 0.3836643695831299, "step": 6169, "token_acc": 0.868362004487659 }, { "epoch": 0.3329196568283602, "grad_norm": 0.4561915993690491, "learning_rate": 1.556124083039872e-05, "loss": 0.3880757689476013, "step": 6170, "token_acc": 0.8629782045419906 }, { "epoch": 0.33297361463335673, "grad_norm": 0.514758288860321, "learning_rate": 1.555978834776987e-05, "loss": 0.4300236701965332, "step": 6171, "token_acc": 0.849481822668202 }, { "epoch": 0.3330275724383532, "grad_norm": 0.4937157928943634, "learning_rate": 1.5558335695346547e-05, "loss": 0.36017531156539917, "step": 6172, "token_acc": 0.8751709318226216 }, { "epoch": 0.3330815302433497, "grad_norm": 0.5265722274780273, "learning_rate": 1.5556882873173103e-05, "loss": 0.42486804723739624, "step": 6173, "token_acc": 0.8619246861924686 }, { "epoch": 0.3331354880483462, "grad_norm": 0.44538360834121704, "learning_rate": 1.555542988129391e-05, "loss": 0.3700132369995117, "step": 6174, "token_acc": 0.870534992726685 }, { "epoch": 0.3331894458533427, "grad_norm": 0.4020426869392395, "learning_rate": 1.555397671975335e-05, "loss": 0.36892613768577576, "step": 6175, "token_acc": 0.8748553479914035 }, { "epoch": 0.33324340365833915, "grad_norm": 0.40602874755859375, "learning_rate": 1.555252338859579e-05, "loss": 0.37742775678634644, "step": 6176, "token_acc": 0.8656143974037469 }, { "epoch": 0.33329736146333566, "grad_norm": 0.4764662981033325, "learning_rate": 1.5551069887865623e-05, "loss": 0.3615034222602844, "step": 6177, "token_acc": 0.8724568542163603 }, { "epoch": 0.33335131926833217, "grad_norm": 0.4820774793624878, "learning_rate": 1.5549616217607235e-05, "loss": 0.38577061891555786, "step": 6178, "token_acc": 0.8682251340083383 }, { "epoch": 0.3334052770733287, "grad_norm": 0.3454960882663727, "learning_rate": 1.554816237786502e-05, "loss": 0.3961203992366791, "step": 6179, "token_acc": 0.8617511520737328 }, { "epoch": 0.33345923487832513, "grad_norm": 0.42174044251441956, "learning_rate": 1.5546708368683382e-05, "loss": 0.4004821479320526, "step": 6180, "token_acc": 0.8664060393637099 }, { "epoch": 0.33351319268332164, "grad_norm": 0.4262867271900177, "learning_rate": 1.5545254190106725e-05, "loss": 0.36998167634010315, "step": 6181, "token_acc": 0.8723605420737472 }, { "epoch": 0.33356715048831814, "grad_norm": 0.33645787835121155, "learning_rate": 1.5543799842179454e-05, "loss": 0.40043550729751587, "step": 6182, "token_acc": 0.8637041773231031 }, { "epoch": 0.33362110829331465, "grad_norm": 0.3444063663482666, "learning_rate": 1.5542345324945987e-05, "loss": 0.38816821575164795, "step": 6183, "token_acc": 0.867441103028987 }, { "epoch": 0.3336750660983111, "grad_norm": 0.425921231508255, "learning_rate": 1.554089063845075e-05, "loss": 0.43041449785232544, "step": 6184, "token_acc": 0.8503629257398102 }, { "epoch": 0.3337290239033076, "grad_norm": 0.4197206199169159, "learning_rate": 1.5539435782738164e-05, "loss": 0.40069153904914856, "step": 6185, "token_acc": 0.8615520282186949 }, { "epoch": 0.3337829817083041, "grad_norm": 0.3494225740432739, "learning_rate": 1.5537980757852658e-05, "loss": 0.34460940957069397, "step": 6186, "token_acc": 0.8816491511721908 }, { "epoch": 0.3338369395133006, "grad_norm": 0.40169620513916016, "learning_rate": 1.553652556383867e-05, "loss": 0.3874099850654602, "step": 6187, "token_acc": 0.8595859585958596 }, { "epoch": 0.3338908973182971, "grad_norm": 0.4588896632194519, "learning_rate": 1.5535070200740646e-05, "loss": 0.43568333983421326, "step": 6188, "token_acc": 0.8513485260296885 }, { "epoch": 0.3339448551232936, "grad_norm": 0.27935221791267395, "learning_rate": 1.5533614668603022e-05, "loss": 0.36611515283584595, "step": 6189, "token_acc": 0.8688890798178538 }, { "epoch": 0.3339988129282901, "grad_norm": 0.40533795952796936, "learning_rate": 1.553215896747026e-05, "loss": 0.34893274307250977, "step": 6190, "token_acc": 0.8799736495388669 }, { "epoch": 0.3340527707332866, "grad_norm": 0.34457042813301086, "learning_rate": 1.553070309738681e-05, "loss": 0.40450140833854675, "step": 6191, "token_acc": 0.8589892535579436 }, { "epoch": 0.33410672853828305, "grad_norm": 0.4437344968318939, "learning_rate": 1.5529247058397134e-05, "loss": 0.4528297483921051, "step": 6192, "token_acc": 0.8446347189614694 }, { "epoch": 0.33416068634327956, "grad_norm": 0.42920154333114624, "learning_rate": 1.5527790850545708e-05, "loss": 0.40464839339256287, "step": 6193, "token_acc": 0.8590550127158345 }, { "epoch": 0.33421464414827606, "grad_norm": 0.3195975422859192, "learning_rate": 1.552633447387699e-05, "loss": 0.34295690059661865, "step": 6194, "token_acc": 0.8751699755235246 }, { "epoch": 0.3342686019532725, "grad_norm": 0.39364203810691833, "learning_rate": 1.552487792843547e-05, "loss": 0.3321272134780884, "step": 6195, "token_acc": 0.8813217499224325 }, { "epoch": 0.334322559758269, "grad_norm": 0.45160290598869324, "learning_rate": 1.5523421214265626e-05, "loss": 0.4296042323112488, "step": 6196, "token_acc": 0.8517205148410822 }, { "epoch": 0.33437651756326553, "grad_norm": 0.47192078828811646, "learning_rate": 1.5521964331411944e-05, "loss": 0.4113449454307556, "step": 6197, "token_acc": 0.8621918198660397 }, { "epoch": 0.33443047536826204, "grad_norm": 0.45892831683158875, "learning_rate": 1.5520507279918918e-05, "loss": 0.3980873227119446, "step": 6198, "token_acc": 0.865328585302687 }, { "epoch": 0.3344844331732585, "grad_norm": 0.331978440284729, "learning_rate": 1.5519050059831045e-05, "loss": 0.4941730201244354, "step": 6199, "token_acc": 0.8366081871345029 }, { "epoch": 0.334538390978255, "grad_norm": 0.3321033716201782, "learning_rate": 1.5517592671192828e-05, "loss": 0.3522866368293762, "step": 6200, "token_acc": 0.8735774647887324 }, { "epoch": 0.3345923487832515, "grad_norm": 0.4301827847957611, "learning_rate": 1.551613511404878e-05, "loss": 0.37603843212127686, "step": 6201, "token_acc": 0.8732621103468462 }, { "epoch": 0.334646306588248, "grad_norm": 0.4296092092990875, "learning_rate": 1.5514677388443404e-05, "loss": 0.44871917366981506, "step": 6202, "token_acc": 0.8532469746037157 }, { "epoch": 0.33470026439324446, "grad_norm": 0.34099316596984863, "learning_rate": 1.551321949442123e-05, "loss": 0.33981162309646606, "step": 6203, "token_acc": 0.8842449572960204 }, { "epoch": 0.33475422219824097, "grad_norm": 0.3494813144207001, "learning_rate": 1.5511761432026776e-05, "loss": 0.401727557182312, "step": 6204, "token_acc": 0.8620779900549594 }, { "epoch": 0.3348081800032375, "grad_norm": 0.5073039531707764, "learning_rate": 1.5510303201304567e-05, "loss": 0.4042070508003235, "step": 6205, "token_acc": 0.8594509803921568 }, { "epoch": 0.334862137808234, "grad_norm": 0.32990846037864685, "learning_rate": 1.550884480229915e-05, "loss": 0.42224520444869995, "step": 6206, "token_acc": 0.8517839514878014 }, { "epoch": 0.33491609561323044, "grad_norm": 0.43569010496139526, "learning_rate": 1.550738623505505e-05, "loss": 0.35921600461006165, "step": 6207, "token_acc": 0.8746675531914894 }, { "epoch": 0.33497005341822694, "grad_norm": 0.43963518738746643, "learning_rate": 1.550592749961682e-05, "loss": 0.3951259255409241, "step": 6208, "token_acc": 0.859139912332662 }, { "epoch": 0.33502401122322345, "grad_norm": 0.36235401034355164, "learning_rate": 1.5504468596029007e-05, "loss": 0.366527259349823, "step": 6209, "token_acc": 0.8687196110210696 }, { "epoch": 0.33507796902821996, "grad_norm": 0.36353635787963867, "learning_rate": 1.5503009524336164e-05, "loss": 0.3383348286151886, "step": 6210, "token_acc": 0.8791637136782424 }, { "epoch": 0.3351319268332164, "grad_norm": 0.3964933454990387, "learning_rate": 1.550155028458285e-05, "loss": 0.37509822845458984, "step": 6211, "token_acc": 0.8736347750109218 }, { "epoch": 0.3351858846382129, "grad_norm": 0.3624950647354126, "learning_rate": 1.5500090876813633e-05, "loss": 0.39975255727767944, "step": 6212, "token_acc": 0.860408263048527 }, { "epoch": 0.3352398424432094, "grad_norm": 0.44944262504577637, "learning_rate": 1.5498631301073083e-05, "loss": 0.4024120569229126, "step": 6213, "token_acc": 0.8653601019757807 }, { "epoch": 0.3352938002482059, "grad_norm": 0.3176948130130768, "learning_rate": 1.549717155740577e-05, "loss": 0.35391461849212646, "step": 6214, "token_acc": 0.8759467223818229 }, { "epoch": 0.3353477580532024, "grad_norm": 0.39873242378234863, "learning_rate": 1.549571164585628e-05, "loss": 0.3587620258331299, "step": 6215, "token_acc": 0.8680332261521972 }, { "epoch": 0.3354017158581989, "grad_norm": 0.3756742477416992, "learning_rate": 1.54942515664692e-05, "loss": 0.36521852016448975, "step": 6216, "token_acc": 0.8693232131562302 }, { "epoch": 0.3354556736631954, "grad_norm": 0.4520569145679474, "learning_rate": 1.5492791319289114e-05, "loss": 0.45315757393836975, "step": 6217, "token_acc": 0.8506078055022392 }, { "epoch": 0.33550963146819185, "grad_norm": 0.41611453890800476, "learning_rate": 1.5491330904360612e-05, "loss": 0.3968042731285095, "step": 6218, "token_acc": 0.8567639257294429 }, { "epoch": 0.33556358927318836, "grad_norm": 0.40474772453308105, "learning_rate": 1.548987032172831e-05, "loss": 0.3038158416748047, "step": 6219, "token_acc": 0.8890922959572846 }, { "epoch": 0.33561754707818486, "grad_norm": 0.43554094433784485, "learning_rate": 1.5488409571436805e-05, "loss": 0.4114740490913391, "step": 6220, "token_acc": 0.857912457912458 }, { "epoch": 0.33567150488318137, "grad_norm": 0.4667649567127228, "learning_rate": 1.5486948653530714e-05, "loss": 0.44264957308769226, "step": 6221, "token_acc": 0.8530035335689046 }, { "epoch": 0.3357254626881778, "grad_norm": 0.43348750472068787, "learning_rate": 1.5485487568054644e-05, "loss": 0.4180372655391693, "step": 6222, "token_acc": 0.8574426229508196 }, { "epoch": 0.33577942049317433, "grad_norm": 0.4152258336544037, "learning_rate": 1.548402631505322e-05, "loss": 0.4004962146282196, "step": 6223, "token_acc": 0.8628682842287695 }, { "epoch": 0.33583337829817084, "grad_norm": 0.4261181354522705, "learning_rate": 1.548256489457107e-05, "loss": 0.38921403884887695, "step": 6224, "token_acc": 0.8630839068677698 }, { "epoch": 0.33588733610316734, "grad_norm": 0.45589742064476013, "learning_rate": 1.5481103306652823e-05, "loss": 0.39211398363113403, "step": 6225, "token_acc": 0.8673148840688107 }, { "epoch": 0.3359412939081638, "grad_norm": 0.4548551142215729, "learning_rate": 1.5479641551343123e-05, "loss": 0.3194378614425659, "step": 6226, "token_acc": 0.8847093307278945 }, { "epoch": 0.3359952517131603, "grad_norm": 0.32678452134132385, "learning_rate": 1.54781796286866e-05, "loss": 0.3443904519081116, "step": 6227, "token_acc": 0.8794920037629351 }, { "epoch": 0.3360492095181568, "grad_norm": 0.3985944390296936, "learning_rate": 1.5476717538727907e-05, "loss": 0.3766050338745117, "step": 6228, "token_acc": 0.8757188498402556 }, { "epoch": 0.3361031673231533, "grad_norm": 0.4245660901069641, "learning_rate": 1.5475255281511696e-05, "loss": 0.4353192150592804, "step": 6229, "token_acc": 0.8572701807995926 }, { "epoch": 0.33615712512814977, "grad_norm": 0.47819221019744873, "learning_rate": 1.5473792857082623e-05, "loss": 0.4570065438747406, "step": 6230, "token_acc": 0.8458227040816326 }, { "epoch": 0.3362110829331463, "grad_norm": 0.5196739435195923, "learning_rate": 1.5472330265485343e-05, "loss": 0.43753576278686523, "step": 6231, "token_acc": 0.853377008267041 }, { "epoch": 0.3362650407381428, "grad_norm": 0.35509535670280457, "learning_rate": 1.547086750676454e-05, "loss": 0.35842663049697876, "step": 6232, "token_acc": 0.8729561805101373 }, { "epoch": 0.3363189985431393, "grad_norm": 0.4340546429157257, "learning_rate": 1.5469404580964874e-05, "loss": 0.345412015914917, "step": 6233, "token_acc": 0.8782941343156702 }, { "epoch": 0.33637295634813574, "grad_norm": 0.5153405070304871, "learning_rate": 1.5467941488131024e-05, "loss": 0.4279574453830719, "step": 6234, "token_acc": 0.8550300415960561 }, { "epoch": 0.33642691415313225, "grad_norm": 0.3754373788833618, "learning_rate": 1.5466478228307673e-05, "loss": 0.3274763822555542, "step": 6235, "token_acc": 0.8816257147272446 }, { "epoch": 0.33648087195812876, "grad_norm": 0.3147079646587372, "learning_rate": 1.546501480153951e-05, "loss": 0.34122395515441895, "step": 6236, "token_acc": 0.8787748058671269 }, { "epoch": 0.3365348297631252, "grad_norm": 0.41856497526168823, "learning_rate": 1.5463551207871224e-05, "loss": 0.4527820944786072, "step": 6237, "token_acc": 0.8404189772027111 }, { "epoch": 0.3365887875681217, "grad_norm": 0.464118629693985, "learning_rate": 1.546208744734752e-05, "loss": 0.46187156438827515, "step": 6238, "token_acc": 0.8467543732832152 }, { "epoch": 0.3366427453731182, "grad_norm": 0.3498471677303314, "learning_rate": 1.546062352001309e-05, "loss": 0.39208483695983887, "step": 6239, "token_acc": 0.8688923281008181 }, { "epoch": 0.33669670317811473, "grad_norm": 0.4509376287460327, "learning_rate": 1.5459159425912652e-05, "loss": 0.39351773262023926, "step": 6240, "token_acc": 0.8598613364803068 }, { "epoch": 0.3367506609831112, "grad_norm": 0.37914228439331055, "learning_rate": 1.5457695165090915e-05, "loss": 0.3621895909309387, "step": 6241, "token_acc": 0.8744881889763779 }, { "epoch": 0.3368046187881077, "grad_norm": 0.3133385479450226, "learning_rate": 1.5456230737592596e-05, "loss": 0.3719863295555115, "step": 6242, "token_acc": 0.8686373467916366 }, { "epoch": 0.3368585765931042, "grad_norm": 0.45154568552970886, "learning_rate": 1.5454766143462424e-05, "loss": 0.40247422456741333, "step": 6243, "token_acc": 0.8562393162393163 }, { "epoch": 0.3369125343981007, "grad_norm": 0.3674169182777405, "learning_rate": 1.5453301382745118e-05, "loss": 0.34727898240089417, "step": 6244, "token_acc": 0.8769657724329325 }, { "epoch": 0.33696649220309716, "grad_norm": 0.404427170753479, "learning_rate": 1.545183645548542e-05, "loss": 0.3610995411872864, "step": 6245, "token_acc": 0.8769549651403806 }, { "epoch": 0.33702045000809366, "grad_norm": 0.5480382442474365, "learning_rate": 1.5450371361728064e-05, "loss": 0.42602264881134033, "step": 6246, "token_acc": 0.8576067543432375 }, { "epoch": 0.33707440781309017, "grad_norm": 0.356402188539505, "learning_rate": 1.544890610151779e-05, "loss": 0.37689265608787537, "step": 6247, "token_acc": 0.8708863951582398 }, { "epoch": 0.3371283656180867, "grad_norm": 0.40946993231773376, "learning_rate": 1.5447440674899356e-05, "loss": 0.4032633900642395, "step": 6248, "token_acc": 0.8606050748210801 }, { "epoch": 0.33718232342308313, "grad_norm": 0.4554026126861572, "learning_rate": 1.544597508191751e-05, "loss": 0.35605520009994507, "step": 6249, "token_acc": 0.8728943338437979 }, { "epoch": 0.33723628122807964, "grad_norm": 0.40822556614875793, "learning_rate": 1.544450932261701e-05, "loss": 0.41991710662841797, "step": 6250, "token_acc": 0.8530120481927711 }, { "epoch": 0.33729023903307614, "grad_norm": 0.3854626715183258, "learning_rate": 1.544304339704262e-05, "loss": 0.4247952103614807, "step": 6251, "token_acc": 0.8544839255499154 }, { "epoch": 0.33734419683807265, "grad_norm": 0.37717434763908386, "learning_rate": 1.5441577305239116e-05, "loss": 0.4424220323562622, "step": 6252, "token_acc": 0.8489875579409613 }, { "epoch": 0.3373981546430691, "grad_norm": 0.34294313192367554, "learning_rate": 1.5440111047251264e-05, "loss": 0.40549010038375854, "step": 6253, "token_acc": 0.8653647180548371 }, { "epoch": 0.3374521124480656, "grad_norm": 0.3190036714076996, "learning_rate": 1.5438644623123845e-05, "loss": 0.3944559097290039, "step": 6254, "token_acc": 0.8644253608624155 }, { "epoch": 0.3375060702530621, "grad_norm": 0.4005756378173828, "learning_rate": 1.543717803290164e-05, "loss": 0.3963204622268677, "step": 6255, "token_acc": 0.8634696755994358 }, { "epoch": 0.3375600280580586, "grad_norm": 0.5012432336807251, "learning_rate": 1.543571127662945e-05, "loss": 0.3745114207267761, "step": 6256, "token_acc": 0.8672389698542623 }, { "epoch": 0.3376139858630551, "grad_norm": 0.37545332312583923, "learning_rate": 1.543424435435206e-05, "loss": 0.36516863107681274, "step": 6257, "token_acc": 0.868140589569161 }, { "epoch": 0.3376679436680516, "grad_norm": 0.37317919731140137, "learning_rate": 1.543277726611427e-05, "loss": 0.4175514280796051, "step": 6258, "token_acc": 0.8615601605728546 }, { "epoch": 0.3377219014730481, "grad_norm": 0.46082842350006104, "learning_rate": 1.5431310011960884e-05, "loss": 0.4087848365306854, "step": 6259, "token_acc": 0.8632031970488779 }, { "epoch": 0.33777585927804454, "grad_norm": 0.3153008818626404, "learning_rate": 1.542984259193671e-05, "loss": 0.43061792850494385, "step": 6260, "token_acc": 0.8548364840623707 }, { "epoch": 0.33782981708304105, "grad_norm": 0.33765727281570435, "learning_rate": 1.542837500608657e-05, "loss": 0.43440666794776917, "step": 6261, "token_acc": 0.8546301864101022 }, { "epoch": 0.33788377488803756, "grad_norm": 0.27464547753334045, "learning_rate": 1.5426907254455283e-05, "loss": 0.38227275013923645, "step": 6262, "token_acc": 0.8667655157805549 }, { "epoch": 0.33793773269303407, "grad_norm": 0.2917994260787964, "learning_rate": 1.542543933708766e-05, "loss": 0.3438653349876404, "step": 6263, "token_acc": 0.878505832712832 }, { "epoch": 0.3379916904980305, "grad_norm": 0.3824000656604767, "learning_rate": 1.5423971254028546e-05, "loss": 0.42352068424224854, "step": 6264, "token_acc": 0.8539809084233715 }, { "epoch": 0.338045648303027, "grad_norm": 0.29727864265441895, "learning_rate": 1.5422503005322774e-05, "loss": 0.3543710708618164, "step": 6265, "token_acc": 0.8748500119990401 }, { "epoch": 0.33809960610802353, "grad_norm": 0.3739388585090637, "learning_rate": 1.542103459101518e-05, "loss": 0.3664476275444031, "step": 6266, "token_acc": 0.8709940566367557 }, { "epoch": 0.33815356391302004, "grad_norm": 0.37835025787353516, "learning_rate": 1.5419566011150607e-05, "loss": 0.330841600894928, "step": 6267, "token_acc": 0.8827346099066308 }, { "epoch": 0.3382075217180165, "grad_norm": 0.3168562054634094, "learning_rate": 1.5418097265773906e-05, "loss": 0.3823480010032654, "step": 6268, "token_acc": 0.8678929765886287 }, { "epoch": 0.338261479523013, "grad_norm": 0.4613708555698395, "learning_rate": 1.5416628354929936e-05, "loss": 0.4480710029602051, "step": 6269, "token_acc": 0.843362136684996 }, { "epoch": 0.3383154373280095, "grad_norm": 0.3780740201473236, "learning_rate": 1.541515927866356e-05, "loss": 0.4166322946548462, "step": 6270, "token_acc": 0.8523104134424054 }, { "epoch": 0.338369395133006, "grad_norm": 0.36641767621040344, "learning_rate": 1.5413690037019633e-05, "loss": 0.34140539169311523, "step": 6271, "token_acc": 0.8790143964562569 }, { "epoch": 0.33842335293800246, "grad_norm": 0.4125726819038391, "learning_rate": 1.541222063004303e-05, "loss": 0.3916298449039459, "step": 6272, "token_acc": 0.8663321754792229 }, { "epoch": 0.33847731074299897, "grad_norm": 0.37221747636795044, "learning_rate": 1.541075105777863e-05, "loss": 0.3737332224845886, "step": 6273, "token_acc": 0.8728988880268942 }, { "epoch": 0.3385312685479955, "grad_norm": 0.37317174673080444, "learning_rate": 1.540928132027131e-05, "loss": 0.43469053506851196, "step": 6274, "token_acc": 0.8529859841560025 }, { "epoch": 0.338585226352992, "grad_norm": 0.348651260137558, "learning_rate": 1.540781141756595e-05, "loss": 0.32908597588539124, "step": 6275, "token_acc": 0.8816592674805771 }, { "epoch": 0.33863918415798844, "grad_norm": 0.46204522252082825, "learning_rate": 1.5406341349707446e-05, "loss": 0.409881055355072, "step": 6276, "token_acc": 0.8590489223400616 }, { "epoch": 0.33869314196298494, "grad_norm": 0.38763853907585144, "learning_rate": 1.54048711167407e-05, "loss": 0.4215511381626129, "step": 6277, "token_acc": 0.8562021712048408 }, { "epoch": 0.33874709976798145, "grad_norm": 0.3446166515350342, "learning_rate": 1.5403400718710602e-05, "loss": 0.3874273896217346, "step": 6278, "token_acc": 0.8694946974422957 }, { "epoch": 0.3388010575729779, "grad_norm": 0.4045826494693756, "learning_rate": 1.540193015566206e-05, "loss": 0.38005203008651733, "step": 6279, "token_acc": 0.8636441111300495 }, { "epoch": 0.3388550153779744, "grad_norm": 0.5020855069160461, "learning_rate": 1.540045942763999e-05, "loss": 0.43930914998054504, "step": 6280, "token_acc": 0.8550580431177446 }, { "epoch": 0.3389089731829709, "grad_norm": 0.362154096364975, "learning_rate": 1.5398988534689304e-05, "loss": 0.4083075225353241, "step": 6281, "token_acc": 0.857690384935844 }, { "epoch": 0.3389629309879674, "grad_norm": 0.45728176832199097, "learning_rate": 1.539751747685492e-05, "loss": 0.4584786891937256, "step": 6282, "token_acc": 0.8421959952260973 }, { "epoch": 0.3390168887929639, "grad_norm": 0.40837132930755615, "learning_rate": 1.5396046254181766e-05, "loss": 0.3641161024570465, "step": 6283, "token_acc": 0.8685987619583568 }, { "epoch": 0.3390708465979604, "grad_norm": 0.3963327705860138, "learning_rate": 1.5394574866714776e-05, "loss": 0.4077181816101074, "step": 6284, "token_acc": 0.8631277292576419 }, { "epoch": 0.3391248044029569, "grad_norm": 0.30621933937072754, "learning_rate": 1.539310331449888e-05, "loss": 0.3501664996147156, "step": 6285, "token_acc": 0.8788359788359789 }, { "epoch": 0.3391787622079534, "grad_norm": 0.5365172028541565, "learning_rate": 1.5391631597579025e-05, "loss": 0.39683637022972107, "step": 6286, "token_acc": 0.8561583318607527 }, { "epoch": 0.33923272001294985, "grad_norm": 0.4720117449760437, "learning_rate": 1.539015971600015e-05, "loss": 0.40328657627105713, "step": 6287, "token_acc": 0.8611368755797005 }, { "epoch": 0.33928667781794636, "grad_norm": 0.4418947398662567, "learning_rate": 1.5388687669807212e-05, "loss": 0.47451701760292053, "step": 6288, "token_acc": 0.8396136795440152 }, { "epoch": 0.33934063562294287, "grad_norm": 0.408115953207016, "learning_rate": 1.538721545904516e-05, "loss": 0.3525606095790863, "step": 6289, "token_acc": 0.8735183014198254 }, { "epoch": 0.3393945934279394, "grad_norm": 0.3666847348213196, "learning_rate": 1.5385743083758963e-05, "loss": 0.4056462347507477, "step": 6290, "token_acc": 0.8612860457408021 }, { "epoch": 0.3394485512329358, "grad_norm": 0.4087758958339691, "learning_rate": 1.538427054399358e-05, "loss": 0.3467448353767395, "step": 6291, "token_acc": 0.8789738554068982 }, { "epoch": 0.33950250903793233, "grad_norm": 0.41251668334007263, "learning_rate": 1.5382797839793986e-05, "loss": 0.4089330732822418, "step": 6292, "token_acc": 0.8606366459627329 }, { "epoch": 0.33955646684292884, "grad_norm": 0.4396165609359741, "learning_rate": 1.5381324971205162e-05, "loss": 0.35392940044403076, "step": 6293, "token_acc": 0.8769433851569375 }, { "epoch": 0.33961042464792535, "grad_norm": 0.4047572612762451, "learning_rate": 1.5379851938272078e-05, "loss": 0.4018881916999817, "step": 6294, "token_acc": 0.8618175927425887 }, { "epoch": 0.3396643824529218, "grad_norm": 0.48318037390708923, "learning_rate": 1.537837874103972e-05, "loss": 0.43514955043792725, "step": 6295, "token_acc": 0.8478955780500799 }, { "epoch": 0.3397183402579183, "grad_norm": 0.4218173921108246, "learning_rate": 1.5376905379553087e-05, "loss": 0.40751656889915466, "step": 6296, "token_acc": 0.8621394503747445 }, { "epoch": 0.3397722980629148, "grad_norm": 0.3979201018810272, "learning_rate": 1.537543185385718e-05, "loss": 0.4366067051887512, "step": 6297, "token_acc": 0.848842995770092 }, { "epoch": 0.3398262558679113, "grad_norm": 0.3858036398887634, "learning_rate": 1.5373958163996984e-05, "loss": 0.35275721549987793, "step": 6298, "token_acc": 0.8738712065136935 }, { "epoch": 0.33988021367290777, "grad_norm": 0.42784732580184937, "learning_rate": 1.5372484310017516e-05, "loss": 0.35044193267822266, "step": 6299, "token_acc": 0.8752074344507136 }, { "epoch": 0.3399341714779043, "grad_norm": 0.40934300422668457, "learning_rate": 1.5371010291963786e-05, "loss": 0.40444254875183105, "step": 6300, "token_acc": 0.8594028785535863 }, { "epoch": 0.3399881292829008, "grad_norm": 0.3602435886859894, "learning_rate": 1.5369536109880808e-05, "loss": 0.3494490683078766, "step": 6301, "token_acc": 0.8750355416548194 }, { "epoch": 0.34004208708789724, "grad_norm": 0.40991440415382385, "learning_rate": 1.5368061763813603e-05, "loss": 0.3805444836616516, "step": 6302, "token_acc": 0.8672474896633195 }, { "epoch": 0.34009604489289375, "grad_norm": 0.4352111220359802, "learning_rate": 1.53665872538072e-05, "loss": 0.3964160680770874, "step": 6303, "token_acc": 0.8629021123843711 }, { "epoch": 0.34015000269789025, "grad_norm": 0.4336777627468109, "learning_rate": 1.5365112579906626e-05, "loss": 0.42833632230758667, "step": 6304, "token_acc": 0.8519104084321476 }, { "epoch": 0.34020396050288676, "grad_norm": 0.3834024965763092, "learning_rate": 1.5363637742156917e-05, "loss": 0.3051493167877197, "step": 6305, "token_acc": 0.8862926232848798 }, { "epoch": 0.3402579183078832, "grad_norm": 0.34098246693611145, "learning_rate": 1.5362162740603118e-05, "loss": 0.4094076156616211, "step": 6306, "token_acc": 0.8618194348725017 }, { "epoch": 0.3403118761128797, "grad_norm": 0.38373294472694397, "learning_rate": 1.5360687575290278e-05, "loss": 0.3787483870983124, "step": 6307, "token_acc": 0.8717506326201978 }, { "epoch": 0.3403658339178762, "grad_norm": 0.3270212411880493, "learning_rate": 1.5359212246263438e-05, "loss": 0.40899842977523804, "step": 6308, "token_acc": 0.8604600219058051 }, { "epoch": 0.34041979172287273, "grad_norm": 0.4309585988521576, "learning_rate": 1.5357736753567665e-05, "loss": 0.40222710371017456, "step": 6309, "token_acc": 0.8649148289977552 }, { "epoch": 0.3404737495278692, "grad_norm": 0.3755984306335449, "learning_rate": 1.5356261097248013e-05, "loss": 0.36932796239852905, "step": 6310, "token_acc": 0.8691289806601716 }, { "epoch": 0.3405277073328657, "grad_norm": 0.3956622779369354, "learning_rate": 1.5354785277349547e-05, "loss": 0.3523107171058655, "step": 6311, "token_acc": 0.8777902481606186 }, { "epoch": 0.3405816651378622, "grad_norm": 0.3690803050994873, "learning_rate": 1.5353309293917348e-05, "loss": 0.3892536163330078, "step": 6312, "token_acc": 0.8636730924621956 }, { "epoch": 0.3406356229428587, "grad_norm": 0.4134519100189209, "learning_rate": 1.535183314699648e-05, "loss": 0.3044072687625885, "step": 6313, "token_acc": 0.8874829667120888 }, { "epoch": 0.34068958074785516, "grad_norm": 0.4153746962547302, "learning_rate": 1.535035683663203e-05, "loss": 0.35960090160369873, "step": 6314, "token_acc": 0.8700564971751412 }, { "epoch": 0.34074353855285167, "grad_norm": 0.40575072169303894, "learning_rate": 1.5348880362869087e-05, "loss": 0.33874309062957764, "step": 6315, "token_acc": 0.8797610156833457 }, { "epoch": 0.3407974963578482, "grad_norm": 0.414823442697525, "learning_rate": 1.5347403725752737e-05, "loss": 0.370561420917511, "step": 6316, "token_acc": 0.8717497801783696 }, { "epoch": 0.3408514541628447, "grad_norm": 0.3646456003189087, "learning_rate": 1.5345926925328078e-05, "loss": 0.4077954590320587, "step": 6317, "token_acc": 0.8666490625825192 }, { "epoch": 0.34090541196784113, "grad_norm": 0.41908594965934753, "learning_rate": 1.5344449961640214e-05, "loss": 0.34550607204437256, "step": 6318, "token_acc": 0.8790005134348794 }, { "epoch": 0.34095936977283764, "grad_norm": 0.48956558108329773, "learning_rate": 1.5342972834734244e-05, "loss": 0.3826972246170044, "step": 6319, "token_acc": 0.8698390482855144 }, { "epoch": 0.34101332757783415, "grad_norm": 0.28057077527046204, "learning_rate": 1.5341495544655285e-05, "loss": 0.33853262662887573, "step": 6320, "token_acc": 0.8838482596793117 }, { "epoch": 0.34106728538283065, "grad_norm": 0.46350663900375366, "learning_rate": 1.534001809144845e-05, "loss": 0.36479321122169495, "step": 6321, "token_acc": 0.8672428694900605 }, { "epoch": 0.3411212431878271, "grad_norm": 0.4985750913619995, "learning_rate": 1.5338540475158865e-05, "loss": 0.35087254643440247, "step": 6322, "token_acc": 0.871983039791259 }, { "epoch": 0.3411752009928236, "grad_norm": 0.4199382960796356, "learning_rate": 1.533706269583165e-05, "loss": 0.3964293599128723, "step": 6323, "token_acc": 0.8652454780361757 }, { "epoch": 0.3412291587978201, "grad_norm": 0.42973002791404724, "learning_rate": 1.5335584753511933e-05, "loss": 0.37357017397880554, "step": 6324, "token_acc": 0.8695023148148148 }, { "epoch": 0.34128311660281657, "grad_norm": 0.31690263748168945, "learning_rate": 1.533410664824486e-05, "loss": 0.35795271396636963, "step": 6325, "token_acc": 0.8733325463345532 }, { "epoch": 0.3413370744078131, "grad_norm": 0.41034093499183655, "learning_rate": 1.5332628380075575e-05, "loss": 0.3728964030742645, "step": 6326, "token_acc": 0.8638286620835537 }, { "epoch": 0.3413910322128096, "grad_norm": 0.5174494981765747, "learning_rate": 1.5331149949049206e-05, "loss": 0.4456818699836731, "step": 6327, "token_acc": 0.8479802651865557 }, { "epoch": 0.3414449900178061, "grad_norm": 0.4339738190174103, "learning_rate": 1.5329671355210917e-05, "loss": 0.4480385184288025, "step": 6328, "token_acc": 0.8473326586670522 }, { "epoch": 0.34149894782280255, "grad_norm": 0.4802868366241455, "learning_rate": 1.5328192598605863e-05, "loss": 0.39581525325775146, "step": 6329, "token_acc": 0.8627598674299488 }, { "epoch": 0.34155290562779905, "grad_norm": 0.33573117852211, "learning_rate": 1.53267136792792e-05, "loss": 0.3970872759819031, "step": 6330, "token_acc": 0.857728414650271 }, { "epoch": 0.34160686343279556, "grad_norm": 0.39827489852905273, "learning_rate": 1.5325234597276097e-05, "loss": 0.37534135580062866, "step": 6331, "token_acc": 0.8719385304946374 }, { "epoch": 0.34166082123779207, "grad_norm": 0.38489848375320435, "learning_rate": 1.5323755352641728e-05, "loss": 0.38420116901397705, "step": 6332, "token_acc": 0.8654768247202983 }, { "epoch": 0.3417147790427885, "grad_norm": 0.38082176446914673, "learning_rate": 1.532227594542126e-05, "loss": 0.3940444588661194, "step": 6333, "token_acc": 0.8686547382372433 }, { "epoch": 0.341768736847785, "grad_norm": 0.40489333868026733, "learning_rate": 1.5320796375659884e-05, "loss": 0.3931485712528229, "step": 6334, "token_acc": 0.860789183222958 }, { "epoch": 0.34182269465278153, "grad_norm": 0.33677369356155396, "learning_rate": 1.531931664340278e-05, "loss": 0.34004729986190796, "step": 6335, "token_acc": 0.8806044092147635 }, { "epoch": 0.34187665245777804, "grad_norm": 0.37709781527519226, "learning_rate": 1.5317836748695135e-05, "loss": 0.42226266860961914, "step": 6336, "token_acc": 0.8535317059078434 }, { "epoch": 0.3419306102627745, "grad_norm": 0.3805553913116455, "learning_rate": 1.5316356691582148e-05, "loss": 0.3478813171386719, "step": 6337, "token_acc": 0.8753173062565328 }, { "epoch": 0.341984568067771, "grad_norm": 0.5020240545272827, "learning_rate": 1.5314876472109025e-05, "loss": 0.4594189524650574, "step": 6338, "token_acc": 0.8456748569505217 }, { "epoch": 0.3420385258727675, "grad_norm": 0.451116144657135, "learning_rate": 1.5313396090320963e-05, "loss": 0.3515702784061432, "step": 6339, "token_acc": 0.8770034843205575 }, { "epoch": 0.342092483677764, "grad_norm": 0.3744661808013916, "learning_rate": 1.531191554626318e-05, "loss": 0.40301334857940674, "step": 6340, "token_acc": 0.8603559870550161 }, { "epoch": 0.34214644148276047, "grad_norm": 0.42902871966362, "learning_rate": 1.5310434839980884e-05, "loss": 0.3791109025478363, "step": 6341, "token_acc": 0.8701986754966887 }, { "epoch": 0.342200399287757, "grad_norm": 0.36393827199935913, "learning_rate": 1.5308953971519303e-05, "loss": 0.42332834005355835, "step": 6342, "token_acc": 0.8556408544726302 }, { "epoch": 0.3422543570927535, "grad_norm": 0.45685040950775146, "learning_rate": 1.5307472940923655e-05, "loss": 0.3599148988723755, "step": 6343, "token_acc": 0.8710994459025955 }, { "epoch": 0.34230831489774993, "grad_norm": 0.2711457908153534, "learning_rate": 1.5305991748239176e-05, "loss": 0.3280923664569855, "step": 6344, "token_acc": 0.8847338786784235 }, { "epoch": 0.34236227270274644, "grad_norm": 0.4326046109199524, "learning_rate": 1.53045103935111e-05, "loss": 0.36768782138824463, "step": 6345, "token_acc": 0.8708024036762106 }, { "epoch": 0.34241623050774295, "grad_norm": 0.3981100022792816, "learning_rate": 1.5303028876784667e-05, "loss": 0.46478909254074097, "step": 6346, "token_acc": 0.8400812285823074 }, { "epoch": 0.34247018831273945, "grad_norm": 0.42043522000312805, "learning_rate": 1.5301547198105117e-05, "loss": 0.4209328889846802, "step": 6347, "token_acc": 0.8581989247311828 }, { "epoch": 0.3425241461177359, "grad_norm": 0.35653313994407654, "learning_rate": 1.5300065357517706e-05, "loss": 0.33832424879074097, "step": 6348, "token_acc": 0.879560750974141 }, { "epoch": 0.3425781039227324, "grad_norm": 0.45481833815574646, "learning_rate": 1.5298583355067687e-05, "loss": 0.38337525725364685, "step": 6349, "token_acc": 0.8623238226194568 }, { "epoch": 0.3426320617277289, "grad_norm": 0.3890405297279358, "learning_rate": 1.5297101190800323e-05, "loss": 0.41077250242233276, "step": 6350, "token_acc": 0.8571896495250573 }, { "epoch": 0.3426860195327254, "grad_norm": 0.3306938111782074, "learning_rate": 1.5295618864760875e-05, "loss": 0.37923604249954224, "step": 6351, "token_acc": 0.8694148936170213 }, { "epoch": 0.3427399773377219, "grad_norm": 0.34424304962158203, "learning_rate": 1.5294136376994614e-05, "loss": 0.40321505069732666, "step": 6352, "token_acc": 0.863986718842642 }, { "epoch": 0.3427939351427184, "grad_norm": 0.5100805759429932, "learning_rate": 1.5292653727546812e-05, "loss": 0.40771108865737915, "step": 6353, "token_acc": 0.8604216706698692 }, { "epoch": 0.3428478929477149, "grad_norm": 0.4279897212982178, "learning_rate": 1.5291170916462755e-05, "loss": 0.34098953008651733, "step": 6354, "token_acc": 0.8820160366552119 }, { "epoch": 0.3429018507527114, "grad_norm": 0.38256126642227173, "learning_rate": 1.5289687943787723e-05, "loss": 0.3854488730430603, "step": 6355, "token_acc": 0.8679775280898876 }, { "epoch": 0.34295580855770785, "grad_norm": 0.3105677664279938, "learning_rate": 1.5288204809567004e-05, "loss": 0.3928266167640686, "step": 6356, "token_acc": 0.8644130301167794 }, { "epoch": 0.34300976636270436, "grad_norm": 0.4145994186401367, "learning_rate": 1.52867215138459e-05, "loss": 0.38645845651626587, "step": 6357, "token_acc": 0.8660278430663533 }, { "epoch": 0.34306372416770087, "grad_norm": 0.4511116147041321, "learning_rate": 1.5285238056669704e-05, "loss": 0.42006587982177734, "step": 6358, "token_acc": 0.856690698083348 }, { "epoch": 0.3431176819726974, "grad_norm": 0.47913581132888794, "learning_rate": 1.5283754438083724e-05, "loss": 0.46245989203453064, "step": 6359, "token_acc": 0.839667668375464 }, { "epoch": 0.3431716397776938, "grad_norm": 0.48773393034935, "learning_rate": 1.5282270658133266e-05, "loss": 0.42994987964630127, "step": 6360, "token_acc": 0.8505178543824393 }, { "epoch": 0.34322559758269033, "grad_norm": 0.4915430247783661, "learning_rate": 1.528078671686365e-05, "loss": 0.3657512664794922, "step": 6361, "token_acc": 0.8728440987487318 }, { "epoch": 0.34327955538768684, "grad_norm": 0.43044960498809814, "learning_rate": 1.527930261432019e-05, "loss": 0.3057701587677002, "step": 6362, "token_acc": 0.8882658359293873 }, { "epoch": 0.34333351319268335, "grad_norm": 0.4249102473258972, "learning_rate": 1.5277818350548204e-05, "loss": 0.3677850365638733, "step": 6363, "token_acc": 0.8664509486689219 }, { "epoch": 0.3433874709976798, "grad_norm": 0.3940223455429077, "learning_rate": 1.5276333925593032e-05, "loss": 0.3335592448711395, "step": 6364, "token_acc": 0.8812424688713348 }, { "epoch": 0.3434414288026763, "grad_norm": 0.44404110312461853, "learning_rate": 1.5274849339500006e-05, "loss": 0.4169737696647644, "step": 6365, "token_acc": 0.857916769268653 }, { "epoch": 0.3434953866076728, "grad_norm": 0.29796117544174194, "learning_rate": 1.527336459231446e-05, "loss": 0.3183920383453369, "step": 6366, "token_acc": 0.8851642880849652 }, { "epoch": 0.34354934441266927, "grad_norm": 0.4264286756515503, "learning_rate": 1.5271879684081746e-05, "loss": 0.38488417863845825, "step": 6367, "token_acc": 0.8662357036300348 }, { "epoch": 0.3436033022176658, "grad_norm": 0.455439031124115, "learning_rate": 1.5270394614847205e-05, "loss": 0.3413606286048889, "step": 6368, "token_acc": 0.8788685524126456 }, { "epoch": 0.3436572600226623, "grad_norm": 0.34726637601852417, "learning_rate": 1.526890938465619e-05, "loss": 0.39744144678115845, "step": 6369, "token_acc": 0.870137788074625 }, { "epoch": 0.3437112178276588, "grad_norm": 0.43318450450897217, "learning_rate": 1.5267423993554067e-05, "loss": 0.34745532274246216, "step": 6370, "token_acc": 0.8788445199660153 }, { "epoch": 0.34376517563265524, "grad_norm": 0.3297565281391144, "learning_rate": 1.5265938441586193e-05, "loss": 0.3604471683502197, "step": 6371, "token_acc": 0.8724917048506873 }, { "epoch": 0.34381913343765175, "grad_norm": 0.40226227045059204, "learning_rate": 1.5264452728797937e-05, "loss": 0.3851940631866455, "step": 6372, "token_acc": 0.8659473299195318 }, { "epoch": 0.34387309124264825, "grad_norm": 0.3925260305404663, "learning_rate": 1.5262966855234672e-05, "loss": 0.44510358572006226, "step": 6373, "token_acc": 0.8527630582891749 }, { "epoch": 0.34392704904764476, "grad_norm": 0.32475876808166504, "learning_rate": 1.5261480820941783e-05, "loss": 0.37953951954841614, "step": 6374, "token_acc": 0.862817728956444 }, { "epoch": 0.3439810068526412, "grad_norm": 0.36466720700263977, "learning_rate": 1.525999462596465e-05, "loss": 0.4078137278556824, "step": 6375, "token_acc": 0.8594850236468734 }, { "epoch": 0.3440349646576377, "grad_norm": 0.48702573776245117, "learning_rate": 1.5258508270348653e-05, "loss": 0.40226662158966064, "step": 6376, "token_acc": 0.8657628128724673 }, { "epoch": 0.34408892246263423, "grad_norm": 0.4989556074142456, "learning_rate": 1.5257021754139193e-05, "loss": 0.3857176899909973, "step": 6377, "token_acc": 0.8660498793242156 }, { "epoch": 0.34414288026763074, "grad_norm": 0.42021965980529785, "learning_rate": 1.5255535077381666e-05, "loss": 0.48360443115234375, "step": 6378, "token_acc": 0.8380475189914337 }, { "epoch": 0.3441968380726272, "grad_norm": 0.38550108671188354, "learning_rate": 1.5254048240121475e-05, "loss": 0.37815096974372864, "step": 6379, "token_acc": 0.866787221217601 }, { "epoch": 0.3442507958776237, "grad_norm": 0.3393312990665436, "learning_rate": 1.5252561242404026e-05, "loss": 0.33581483364105225, "step": 6380, "token_acc": 0.8767342582710779 }, { "epoch": 0.3443047536826202, "grad_norm": 0.4812958538532257, "learning_rate": 1.5251074084274736e-05, "loss": 0.33590924739837646, "step": 6381, "token_acc": 0.8768994365716237 }, { "epoch": 0.3443587114876167, "grad_norm": 0.3383432924747467, "learning_rate": 1.5249586765779013e-05, "loss": 0.3777981102466583, "step": 6382, "token_acc": 0.8713860544217688 }, { "epoch": 0.34441266929261316, "grad_norm": 0.3914584517478943, "learning_rate": 1.524809928696229e-05, "loss": 0.3251935839653015, "step": 6383, "token_acc": 0.88616612052999 }, { "epoch": 0.34446662709760967, "grad_norm": 0.4791739583015442, "learning_rate": 1.524661164786999e-05, "loss": 0.39780524373054504, "step": 6384, "token_acc": 0.8602810531416573 }, { "epoch": 0.3445205849026062, "grad_norm": 0.4536605775356293, "learning_rate": 1.5245123848547542e-05, "loss": 0.3691094219684601, "step": 6385, "token_acc": 0.8672631270167205 }, { "epoch": 0.3445745427076027, "grad_norm": 0.42184412479400635, "learning_rate": 1.5243635889040387e-05, "loss": 0.368330717086792, "step": 6386, "token_acc": 0.8674920528693324 }, { "epoch": 0.34462850051259913, "grad_norm": 0.38044247031211853, "learning_rate": 1.5242147769393966e-05, "loss": 0.2980215847492218, "step": 6387, "token_acc": 0.8910415631727107 }, { "epoch": 0.34468245831759564, "grad_norm": 0.42865756154060364, "learning_rate": 1.5240659489653723e-05, "loss": 0.4036794900894165, "step": 6388, "token_acc": 0.8614352783366868 }, { "epoch": 0.34473641612259215, "grad_norm": 0.3757311701774597, "learning_rate": 1.5239171049865116e-05, "loss": 0.34492027759552, "step": 6389, "token_acc": 0.8762526571515336 }, { "epoch": 0.3447903739275886, "grad_norm": 0.46490806341171265, "learning_rate": 1.5237682450073595e-05, "loss": 0.4240858554840088, "step": 6390, "token_acc": 0.8537686174213931 }, { "epoch": 0.3448443317325851, "grad_norm": 0.3949664533138275, "learning_rate": 1.5236193690324626e-05, "loss": 0.36671215295791626, "step": 6391, "token_acc": 0.8770380434782609 }, { "epoch": 0.3448982895375816, "grad_norm": 0.5586932897567749, "learning_rate": 1.5234704770663672e-05, "loss": 0.4279004633426666, "step": 6392, "token_acc": 0.8542461353141939 }, { "epoch": 0.3449522473425781, "grad_norm": 0.31012821197509766, "learning_rate": 1.5233215691136206e-05, "loss": 0.3723834156990051, "step": 6393, "token_acc": 0.8692874692874692 }, { "epoch": 0.3450062051475746, "grad_norm": 0.38522541522979736, "learning_rate": 1.5231726451787702e-05, "loss": 0.3325374126434326, "step": 6394, "token_acc": 0.878095238095238 }, { "epoch": 0.3450601629525711, "grad_norm": 0.4189956784248352, "learning_rate": 1.5230237052663643e-05, "loss": 0.3308473825454712, "step": 6395, "token_acc": 0.8855367085329535 }, { "epoch": 0.3451141207575676, "grad_norm": 0.3449324369430542, "learning_rate": 1.5228747493809514e-05, "loss": 0.39398178458213806, "step": 6396, "token_acc": 0.8633963149865039 }, { "epoch": 0.3451680785625641, "grad_norm": 0.33560824394226074, "learning_rate": 1.522725777527081e-05, "loss": 0.39035001397132874, "step": 6397, "token_acc": 0.8613026819923372 }, { "epoch": 0.34522203636756055, "grad_norm": 0.43235066533088684, "learning_rate": 1.522576789709302e-05, "loss": 0.3982788324356079, "step": 6398, "token_acc": 0.8597958605046782 }, { "epoch": 0.34527599417255705, "grad_norm": 0.43890681862831116, "learning_rate": 1.5224277859321648e-05, "loss": 0.4169398844242096, "step": 6399, "token_acc": 0.8550614947965941 }, { "epoch": 0.34532995197755356, "grad_norm": 0.44601595401763916, "learning_rate": 1.5222787662002199e-05, "loss": 0.38470929861068726, "step": 6400, "token_acc": 0.8659957627118644 }, { "epoch": 0.34538390978255007, "grad_norm": 0.4783211350440979, "learning_rate": 1.5221297305180182e-05, "loss": 0.4055957496166229, "step": 6401, "token_acc": 0.8611732537907034 }, { "epoch": 0.3454378675875465, "grad_norm": 0.4017810523509979, "learning_rate": 1.5219806788901114e-05, "loss": 0.4729485511779785, "step": 6402, "token_acc": 0.8438615551761601 }, { "epoch": 0.34549182539254303, "grad_norm": 0.36497625708580017, "learning_rate": 1.5218316113210514e-05, "loss": 0.34490931034088135, "step": 6403, "token_acc": 0.8786610878661087 }, { "epoch": 0.34554578319753954, "grad_norm": 0.4266195297241211, "learning_rate": 1.5216825278153907e-05, "loss": 0.37703937292099, "step": 6404, "token_acc": 0.8706203549218868 }, { "epoch": 0.34559974100253604, "grad_norm": 0.38851457834243774, "learning_rate": 1.5215334283776821e-05, "loss": 0.3753551244735718, "step": 6405, "token_acc": 0.8699759807846277 }, { "epoch": 0.3456536988075325, "grad_norm": 0.27500391006469727, "learning_rate": 1.5213843130124795e-05, "loss": 0.3329585790634155, "step": 6406, "token_acc": 0.884326280623608 }, { "epoch": 0.345707656612529, "grad_norm": 0.4746479094028473, "learning_rate": 1.5212351817243366e-05, "loss": 0.41741156578063965, "step": 6407, "token_acc": 0.854318418314256 }, { "epoch": 0.3457616144175255, "grad_norm": 0.3960250914096832, "learning_rate": 1.5210860345178073e-05, "loss": 0.3713175058364868, "step": 6408, "token_acc": 0.8723861357777141 }, { "epoch": 0.34581557222252196, "grad_norm": 0.40858638286590576, "learning_rate": 1.5209368713974476e-05, "loss": 0.45202910900115967, "step": 6409, "token_acc": 0.8455467869222097 }, { "epoch": 0.34586953002751847, "grad_norm": 0.3295181095600128, "learning_rate": 1.520787692367812e-05, "loss": 0.3912150263786316, "step": 6410, "token_acc": 0.8617773464577846 }, { "epoch": 0.345923487832515, "grad_norm": 0.48572567105293274, "learning_rate": 1.5206384974334568e-05, "loss": 0.3769087791442871, "step": 6411, "token_acc": 0.8676928398478035 }, { "epoch": 0.3459774456375115, "grad_norm": 0.42585280537605286, "learning_rate": 1.5204892865989383e-05, "loss": 0.3565111756324768, "step": 6412, "token_acc": 0.8766195867865063 }, { "epoch": 0.34603140344250793, "grad_norm": 0.33171743154525757, "learning_rate": 1.5203400598688131e-05, "loss": 0.3814363479614258, "step": 6413, "token_acc": 0.8635683883584493 }, { "epoch": 0.34608536124750444, "grad_norm": 0.3964349031448364, "learning_rate": 1.5201908172476389e-05, "loss": 0.3933347165584564, "step": 6414, "token_acc": 0.8604568797591642 }, { "epoch": 0.34613931905250095, "grad_norm": 0.3086518943309784, "learning_rate": 1.5200415587399734e-05, "loss": 0.35521847009658813, "step": 6415, "token_acc": 0.8731765730459394 }, { "epoch": 0.34619327685749746, "grad_norm": 0.4847484827041626, "learning_rate": 1.5198922843503752e-05, "loss": 0.3694944381713867, "step": 6416, "token_acc": 0.8647087939283946 }, { "epoch": 0.3462472346624939, "grad_norm": 0.3632986545562744, "learning_rate": 1.5197429940834027e-05, "loss": 0.34535884857177734, "step": 6417, "token_acc": 0.8769816106531388 }, { "epoch": 0.3463011924674904, "grad_norm": 0.42341524362564087, "learning_rate": 1.519593687943615e-05, "loss": 0.4171578884124756, "step": 6418, "token_acc": 0.8530420424234726 }, { "epoch": 0.3463551502724869, "grad_norm": 0.371135950088501, "learning_rate": 1.5194443659355726e-05, "loss": 0.3873903155326843, "step": 6419, "token_acc": 0.8640039206076942 }, { "epoch": 0.34640910807748343, "grad_norm": 0.39941802620887756, "learning_rate": 1.5192950280638353e-05, "loss": 0.44459161162376404, "step": 6420, "token_acc": 0.8489198202116862 }, { "epoch": 0.3464630658824799, "grad_norm": 0.40195468068122864, "learning_rate": 1.5191456743329636e-05, "loss": 0.4002751111984253, "step": 6421, "token_acc": 0.85893470790378 }, { "epoch": 0.3465170236874764, "grad_norm": 0.39159107208251953, "learning_rate": 1.5189963047475191e-05, "loss": 0.366934597492218, "step": 6422, "token_acc": 0.8696801480306635 }, { "epoch": 0.3465709814924729, "grad_norm": 0.5075599551200867, "learning_rate": 1.5188469193120634e-05, "loss": 0.40733766555786133, "step": 6423, "token_acc": 0.8596491228070176 }, { "epoch": 0.3466249392974694, "grad_norm": 0.33939477801322937, "learning_rate": 1.5186975180311587e-05, "loss": 0.3697815537452698, "step": 6424, "token_acc": 0.8677165354330708 }, { "epoch": 0.34667889710246585, "grad_norm": 0.49492451548576355, "learning_rate": 1.5185481009093677e-05, "loss": 0.4007255434989929, "step": 6425, "token_acc": 0.8617108950057689 }, { "epoch": 0.34673285490746236, "grad_norm": 0.3842164874076843, "learning_rate": 1.5183986679512538e-05, "loss": 0.36961305141448975, "step": 6426, "token_acc": 0.8711455993465387 }, { "epoch": 0.34678681271245887, "grad_norm": 0.4004574716091156, "learning_rate": 1.5182492191613802e-05, "loss": 0.3455166816711426, "step": 6427, "token_acc": 0.8860055607043559 }, { "epoch": 0.3468407705174554, "grad_norm": 0.3499525487422943, "learning_rate": 1.518099754544311e-05, "loss": 0.36170321702957153, "step": 6428, "token_acc": 0.8694225721784777 }, { "epoch": 0.34689472832245183, "grad_norm": 0.43583062291145325, "learning_rate": 1.5179502741046115e-05, "loss": 0.4235563576221466, "step": 6429, "token_acc": 0.8568501170960188 }, { "epoch": 0.34694868612744834, "grad_norm": 0.4560825526714325, "learning_rate": 1.5178007778468458e-05, "loss": 0.40286123752593994, "step": 6430, "token_acc": 0.8611386138613861 }, { "epoch": 0.34700264393244484, "grad_norm": 0.37985774874687195, "learning_rate": 1.5176512657755802e-05, "loss": 0.3228517770767212, "step": 6431, "token_acc": 0.8824343015214384 }, { "epoch": 0.3470566017374413, "grad_norm": 0.3529168367385864, "learning_rate": 1.5175017378953807e-05, "loss": 0.36376017332077026, "step": 6432, "token_acc": 0.8752147239263803 }, { "epoch": 0.3471105595424378, "grad_norm": 0.4770529270172119, "learning_rate": 1.5173521942108134e-05, "loss": 0.38130033016204834, "step": 6433, "token_acc": 0.8660122048288671 }, { "epoch": 0.3471645173474343, "grad_norm": 0.4355423152446747, "learning_rate": 1.517202634726446e-05, "loss": 0.3204329311847687, "step": 6434, "token_acc": 0.8862962962962962 }, { "epoch": 0.3472184751524308, "grad_norm": 0.3284112513065338, "learning_rate": 1.5170530594468451e-05, "loss": 0.41799837350845337, "step": 6435, "token_acc": 0.8601882613510521 }, { "epoch": 0.34727243295742727, "grad_norm": 0.3544856309890747, "learning_rate": 1.5169034683765795e-05, "loss": 0.34151574969291687, "step": 6436, "token_acc": 0.8764175257731959 }, { "epoch": 0.3473263907624238, "grad_norm": 0.40983495116233826, "learning_rate": 1.5167538615202175e-05, "loss": 0.35884636640548706, "step": 6437, "token_acc": 0.8748794267603693 }, { "epoch": 0.3473803485674203, "grad_norm": 0.37212345004081726, "learning_rate": 1.5166042388823278e-05, "loss": 0.36885035037994385, "step": 6438, "token_acc": 0.8716941212029621 }, { "epoch": 0.3474343063724168, "grad_norm": 0.46057990193367004, "learning_rate": 1.5164546004674798e-05, "loss": 0.39892861247062683, "step": 6439, "token_acc": 0.8637116056648887 }, { "epoch": 0.34748826417741324, "grad_norm": 0.45575201511383057, "learning_rate": 1.5163049462802439e-05, "loss": 0.44865334033966064, "step": 6440, "token_acc": 0.8468330786211581 }, { "epoch": 0.34754222198240975, "grad_norm": 0.48849916458129883, "learning_rate": 1.51615527632519e-05, "loss": 0.4239625334739685, "step": 6441, "token_acc": 0.8532356095816946 }, { "epoch": 0.34759617978740626, "grad_norm": 0.4009515941143036, "learning_rate": 1.5160055906068893e-05, "loss": 0.37866899371147156, "step": 6442, "token_acc": 0.8690904578731853 }, { "epoch": 0.34765013759240276, "grad_norm": 0.38276973366737366, "learning_rate": 1.515855889129913e-05, "loss": 0.3529753088951111, "step": 6443, "token_acc": 0.8777665268335021 }, { "epoch": 0.3477040953973992, "grad_norm": 0.5019548535346985, "learning_rate": 1.5157061718988331e-05, "loss": 0.423888623714447, "step": 6444, "token_acc": 0.8511206768591361 }, { "epoch": 0.3477580532023957, "grad_norm": 0.4075140357017517, "learning_rate": 1.5155564389182216e-05, "loss": 0.3474944233894348, "step": 6445, "token_acc": 0.8764916467780429 }, { "epoch": 0.34781201100739223, "grad_norm": 0.31100112199783325, "learning_rate": 1.5154066901926516e-05, "loss": 0.36659860610961914, "step": 6446, "token_acc": 0.8708395429706905 }, { "epoch": 0.34786596881238874, "grad_norm": 0.27109605073928833, "learning_rate": 1.5152569257266962e-05, "loss": 0.364315927028656, "step": 6447, "token_acc": 0.8748866727107888 }, { "epoch": 0.3479199266173852, "grad_norm": 0.4233139455318451, "learning_rate": 1.5151071455249296e-05, "loss": 0.35076308250427246, "step": 6448, "token_acc": 0.8772385154425123 }, { "epoch": 0.3479738844223817, "grad_norm": 0.3112397789955139, "learning_rate": 1.5149573495919253e-05, "loss": 0.3452075123786926, "step": 6449, "token_acc": 0.8763213530655392 }, { "epoch": 0.3480278422273782, "grad_norm": 0.3315301835536957, "learning_rate": 1.5148075379322587e-05, "loss": 0.34524136781692505, "step": 6450, "token_acc": 0.8785355961148469 }, { "epoch": 0.34808180003237466, "grad_norm": 0.5237424969673157, "learning_rate": 1.5146577105505049e-05, "loss": 0.4868104159832001, "step": 6451, "token_acc": 0.8363171355498721 }, { "epoch": 0.34813575783737116, "grad_norm": 0.4292786121368408, "learning_rate": 1.514507867451239e-05, "loss": 0.3222757577896118, "step": 6452, "token_acc": 0.8868153542709756 }, { "epoch": 0.34818971564236767, "grad_norm": 0.44771504402160645, "learning_rate": 1.5143580086390376e-05, "loss": 0.37170201539993286, "step": 6453, "token_acc": 0.8733584905660378 }, { "epoch": 0.3482436734473642, "grad_norm": 0.42296159267425537, "learning_rate": 1.5142081341184778e-05, "loss": 0.34863394498825073, "step": 6454, "token_acc": 0.8753335874952345 }, { "epoch": 0.34829763125236063, "grad_norm": 0.43059593439102173, "learning_rate": 1.514058243894136e-05, "loss": 0.4179249703884125, "step": 6455, "token_acc": 0.8583982424605552 }, { "epoch": 0.34835158905735714, "grad_norm": 0.45198357105255127, "learning_rate": 1.51390833797059e-05, "loss": 0.38890302181243896, "step": 6456, "token_acc": 0.8654089642947582 }, { "epoch": 0.34840554686235364, "grad_norm": 0.3322555124759674, "learning_rate": 1.5137584163524185e-05, "loss": 0.3663117289543152, "step": 6457, "token_acc": 0.8695124418369411 }, { "epoch": 0.34845950466735015, "grad_norm": 0.44780412316322327, "learning_rate": 1.5136084790441992e-05, "loss": 0.3612916171550751, "step": 6458, "token_acc": 0.8718520879821485 }, { "epoch": 0.3485134624723466, "grad_norm": 0.34427350759506226, "learning_rate": 1.5134585260505115e-05, "loss": 0.3509213328361511, "step": 6459, "token_acc": 0.8754248366013072 }, { "epoch": 0.3485674202773431, "grad_norm": 0.44087764620780945, "learning_rate": 1.5133085573759349e-05, "loss": 0.46754488348960876, "step": 6460, "token_acc": 0.846031112883925 }, { "epoch": 0.3486213780823396, "grad_norm": 0.3763948082923889, "learning_rate": 1.5131585730250495e-05, "loss": 0.38569584488868713, "step": 6461, "token_acc": 0.873015873015873 }, { "epoch": 0.3486753358873361, "grad_norm": 0.4245639443397522, "learning_rate": 1.5130085730024355e-05, "loss": 0.44050782918930054, "step": 6462, "token_acc": 0.8496356502242153 }, { "epoch": 0.3487292936923326, "grad_norm": 0.3479986786842346, "learning_rate": 1.5128585573126744e-05, "loss": 0.3853849172592163, "step": 6463, "token_acc": 0.8639018691588785 }, { "epoch": 0.3487832514973291, "grad_norm": 0.48161789774894714, "learning_rate": 1.512708525960347e-05, "loss": 0.41812291741371155, "step": 6464, "token_acc": 0.857313989085585 }, { "epoch": 0.3488372093023256, "grad_norm": 0.42236757278442383, "learning_rate": 1.5125584789500358e-05, "loss": 0.37846270203590393, "step": 6465, "token_acc": 0.864697536437508 }, { "epoch": 0.3488911671073221, "grad_norm": 0.361502468585968, "learning_rate": 1.5124084162863227e-05, "loss": 0.44997337460517883, "step": 6466, "token_acc": 0.8494803888702648 }, { "epoch": 0.34894512491231855, "grad_norm": 0.41256803274154663, "learning_rate": 1.512258337973791e-05, "loss": 0.40627047419548035, "step": 6467, "token_acc": 0.8590916652803194 }, { "epoch": 0.34899908271731506, "grad_norm": 0.3966885209083557, "learning_rate": 1.5121082440170238e-05, "loss": 0.3032766580581665, "step": 6468, "token_acc": 0.8882429879377998 }, { "epoch": 0.34905304052231156, "grad_norm": 0.41524216532707214, "learning_rate": 1.511958134420605e-05, "loss": 0.3818979859352112, "step": 6469, "token_acc": 0.8687247115240522 }, { "epoch": 0.34910699832730807, "grad_norm": 0.36273741722106934, "learning_rate": 1.5118080091891188e-05, "loss": 0.41619735956192017, "step": 6470, "token_acc": 0.8588026243849098 }, { "epoch": 0.3491609561323045, "grad_norm": 0.34424474835395813, "learning_rate": 1.5116578683271499e-05, "loss": 0.3893480896949768, "step": 6471, "token_acc": 0.8688673531193216 }, { "epoch": 0.34921491393730103, "grad_norm": 0.32326582074165344, "learning_rate": 1.511507711839284e-05, "loss": 0.3492678105831146, "step": 6472, "token_acc": 0.8798449612403101 }, { "epoch": 0.34926887174229754, "grad_norm": 0.3663184642791748, "learning_rate": 1.5113575397301063e-05, "loss": 0.3989616632461548, "step": 6473, "token_acc": 0.856808803301238 }, { "epoch": 0.349322829547294, "grad_norm": 0.3036171495914459, "learning_rate": 1.511207352004204e-05, "loss": 0.36957302689552307, "step": 6474, "token_acc": 0.8724956334121031 }, { "epoch": 0.3493767873522905, "grad_norm": 0.5299179553985596, "learning_rate": 1.5110571486661622e-05, "loss": 0.4628069996833801, "step": 6475, "token_acc": 0.8460890324392641 }, { "epoch": 0.349430745157287, "grad_norm": 0.4032175838947296, "learning_rate": 1.5109069297205693e-05, "loss": 0.3514097332954407, "step": 6476, "token_acc": 0.876097057305111 }, { "epoch": 0.3494847029622835, "grad_norm": 0.41529467701911926, "learning_rate": 1.5107566951720129e-05, "loss": 0.45480528473854065, "step": 6477, "token_acc": 0.8456448345712356 }, { "epoch": 0.34953866076727996, "grad_norm": 0.4264794886112213, "learning_rate": 1.5106064450250806e-05, "loss": 0.41730475425720215, "step": 6478, "token_acc": 0.8588395397748361 }, { "epoch": 0.34959261857227647, "grad_norm": 0.45086669921875, "learning_rate": 1.5104561792843612e-05, "loss": 0.33829647302627563, "step": 6479, "token_acc": 0.8758845944474687 }, { "epoch": 0.349646576377273, "grad_norm": 0.41333121061325073, "learning_rate": 1.5103058979544438e-05, "loss": 0.3395281434059143, "step": 6480, "token_acc": 0.8781158951116866 }, { "epoch": 0.3497005341822695, "grad_norm": 0.4604162275791168, "learning_rate": 1.510155601039918e-05, "loss": 0.3669254779815674, "step": 6481, "token_acc": 0.8685057471264368 }, { "epoch": 0.34975449198726594, "grad_norm": 0.38634470105171204, "learning_rate": 1.5100052885453735e-05, "loss": 0.37540021538734436, "step": 6482, "token_acc": 0.8626277372262774 }, { "epoch": 0.34980844979226244, "grad_norm": 0.4521251618862152, "learning_rate": 1.5098549604754013e-05, "loss": 0.413520872592926, "step": 6483, "token_acc": 0.8566048667439166 }, { "epoch": 0.34986240759725895, "grad_norm": 0.3687642812728882, "learning_rate": 1.5097046168345925e-05, "loss": 0.3661394715309143, "step": 6484, "token_acc": 0.8708196721311475 }, { "epoch": 0.34991636540225546, "grad_norm": 0.3634476363658905, "learning_rate": 1.5095542576275379e-05, "loss": 0.4213709235191345, "step": 6485, "token_acc": 0.8528908655490934 }, { "epoch": 0.3499703232072519, "grad_norm": 0.4069423973560333, "learning_rate": 1.50940388285883e-05, "loss": 0.37393003702163696, "step": 6486, "token_acc": 0.8685772426657623 }, { "epoch": 0.3500242810122484, "grad_norm": 0.3679460287094116, "learning_rate": 1.5092534925330609e-05, "loss": 0.34914630651474, "step": 6487, "token_acc": 0.8798135037126575 }, { "epoch": 0.3500782388172449, "grad_norm": 0.37175965309143066, "learning_rate": 1.5091030866548231e-05, "loss": 0.44589167833328247, "step": 6488, "token_acc": 0.8481097356975578 }, { "epoch": 0.35013219662224143, "grad_norm": 0.40450426936149597, "learning_rate": 1.5089526652287108e-05, "loss": 0.3663129210472107, "step": 6489, "token_acc": 0.8668069753457607 }, { "epoch": 0.3501861544272379, "grad_norm": 0.3468526005744934, "learning_rate": 1.5088022282593173e-05, "loss": 0.36784034967422485, "step": 6490, "token_acc": 0.871760075994029 }, { "epoch": 0.3502401122322344, "grad_norm": 0.5051108598709106, "learning_rate": 1.5086517757512371e-05, "loss": 0.3744378685951233, "step": 6491, "token_acc": 0.8686822415635975 }, { "epoch": 0.3502940700372309, "grad_norm": 0.43478453159332275, "learning_rate": 1.5085013077090648e-05, "loss": 0.43924832344055176, "step": 6492, "token_acc": 0.8516949152542372 }, { "epoch": 0.3503480278422274, "grad_norm": 0.41662782430648804, "learning_rate": 1.508350824137396e-05, "loss": 0.3961249589920044, "step": 6493, "token_acc": 0.8648783530879601 }, { "epoch": 0.35040198564722386, "grad_norm": 0.2830028533935547, "learning_rate": 1.5082003250408264e-05, "loss": 0.39894530177116394, "step": 6494, "token_acc": 0.8684676182611774 }, { "epoch": 0.35045594345222036, "grad_norm": 0.36239349842071533, "learning_rate": 1.5080498104239514e-05, "loss": 0.35218310356140137, "step": 6495, "token_acc": 0.8763791938752533 }, { "epoch": 0.35050990125721687, "grad_norm": 0.4200246334075928, "learning_rate": 1.5078992802913685e-05, "loss": 0.4032323360443115, "step": 6496, "token_acc": 0.8596258653704522 }, { "epoch": 0.3505638590622133, "grad_norm": 0.3925268352031708, "learning_rate": 1.5077487346476747e-05, "loss": 0.3491041660308838, "step": 6497, "token_acc": 0.882775426446069 }, { "epoch": 0.35061781686720983, "grad_norm": 0.41022560000419617, "learning_rate": 1.5075981734974673e-05, "loss": 0.35921066999435425, "step": 6498, "token_acc": 0.8730628591328574 }, { "epoch": 0.35067177467220634, "grad_norm": 0.39135509729385376, "learning_rate": 1.5074475968453453e-05, "loss": 0.41570836305618286, "step": 6499, "token_acc": 0.8535091851154027 }, { "epoch": 0.35072573247720285, "grad_norm": 0.3970085680484772, "learning_rate": 1.5072970046959062e-05, "loss": 0.36064600944519043, "step": 6500, "token_acc": 0.8749459108610991 }, { "epoch": 0.3507796902821993, "grad_norm": 0.3060755431652069, "learning_rate": 1.5071463970537495e-05, "loss": 0.3409709930419922, "step": 6501, "token_acc": 0.8775925172834486 }, { "epoch": 0.3508336480871958, "grad_norm": 0.4014628231525421, "learning_rate": 1.5069957739234746e-05, "loss": 0.4018259048461914, "step": 6502, "token_acc": 0.8577850429096854 }, { "epoch": 0.3508876058921923, "grad_norm": 0.4090292453765869, "learning_rate": 1.5068451353096818e-05, "loss": 0.3640952408313751, "step": 6503, "token_acc": 0.8742889647326507 }, { "epoch": 0.3509415636971888, "grad_norm": 0.4390312135219574, "learning_rate": 1.506694481216971e-05, "loss": 0.4148077666759491, "step": 6504, "token_acc": 0.8517355371900827 }, { "epoch": 0.35099552150218527, "grad_norm": 0.4398069381713867, "learning_rate": 1.5065438116499438e-05, "loss": 0.44365549087524414, "step": 6505, "token_acc": 0.8534393363290702 }, { "epoch": 0.3510494793071818, "grad_norm": 0.46795353293418884, "learning_rate": 1.5063931266132012e-05, "loss": 0.41566959023475647, "step": 6506, "token_acc": 0.856635596981802 }, { "epoch": 0.3511034371121783, "grad_norm": 0.40178483724594116, "learning_rate": 1.5062424261113453e-05, "loss": 0.3407993018627167, "step": 6507, "token_acc": 0.8771349862258954 }, { "epoch": 0.3511573949171748, "grad_norm": 0.4546658992767334, "learning_rate": 1.506091710148978e-05, "loss": 0.41135579347610474, "step": 6508, "token_acc": 0.8543329532497149 }, { "epoch": 0.35121135272217124, "grad_norm": 0.32929572463035583, "learning_rate": 1.5059409787307026e-05, "loss": 0.3821128010749817, "step": 6509, "token_acc": 0.86873795761079 }, { "epoch": 0.35126531052716775, "grad_norm": 0.39275115728378296, "learning_rate": 1.5057902318611225e-05, "loss": 0.37590184807777405, "step": 6510, "token_acc": 0.8663648313129838 }, { "epoch": 0.35131926833216426, "grad_norm": 0.3956722021102905, "learning_rate": 1.505639469544841e-05, "loss": 0.35840100049972534, "step": 6511, "token_acc": 0.8772695285010556 }, { "epoch": 0.35137322613716077, "grad_norm": 0.3862549364566803, "learning_rate": 1.5054886917864624e-05, "loss": 0.41552844643592834, "step": 6512, "token_acc": 0.8615384615384616 }, { "epoch": 0.3514271839421572, "grad_norm": 0.4656660556793213, "learning_rate": 1.5053378985905917e-05, "loss": 0.44293180108070374, "step": 6513, "token_acc": 0.8491943605236657 }, { "epoch": 0.3514811417471537, "grad_norm": 0.4293939173221588, "learning_rate": 1.5051870899618339e-05, "loss": 0.3936257064342499, "step": 6514, "token_acc": 0.8617921146953404 }, { "epoch": 0.35153509955215023, "grad_norm": 0.3196048140525818, "learning_rate": 1.5050362659047947e-05, "loss": 0.36315208673477173, "step": 6515, "token_acc": 0.8727853616032529 }, { "epoch": 0.3515890573571467, "grad_norm": 0.47340789437294006, "learning_rate": 1.5048854264240805e-05, "loss": 0.4495493173599243, "step": 6516, "token_acc": 0.8494864612511671 }, { "epoch": 0.3516430151621432, "grad_norm": 0.4149550795555115, "learning_rate": 1.5047345715242973e-05, "loss": 0.3773133456707001, "step": 6517, "token_acc": 0.8658058188950637 }, { "epoch": 0.3516969729671397, "grad_norm": 0.3883100152015686, "learning_rate": 1.504583701210053e-05, "loss": 0.378218412399292, "step": 6518, "token_acc": 0.8687803749132146 }, { "epoch": 0.3517509307721362, "grad_norm": 0.5166175961494446, "learning_rate": 1.5044328154859545e-05, "loss": 0.3855178952217102, "step": 6519, "token_acc": 0.8630353266232484 }, { "epoch": 0.35180488857713266, "grad_norm": 0.4145638644695282, "learning_rate": 1.5042819143566094e-05, "loss": 0.3612615168094635, "step": 6520, "token_acc": 0.8749530134068413 }, { "epoch": 0.35185884638212916, "grad_norm": 0.41260093450546265, "learning_rate": 1.5041309978266271e-05, "loss": 0.40684670209884644, "step": 6521, "token_acc": 0.8627319171820382 }, { "epoch": 0.35191280418712567, "grad_norm": 0.3470267653465271, "learning_rate": 1.5039800659006163e-05, "loss": 0.41487887501716614, "step": 6522, "token_acc": 0.8561531679052777 }, { "epoch": 0.3519667619921222, "grad_norm": 0.3156275153160095, "learning_rate": 1.5038291185831861e-05, "loss": 0.4021266996860504, "step": 6523, "token_acc": 0.8619738577767119 }, { "epoch": 0.35202071979711863, "grad_norm": 0.3937634229660034, "learning_rate": 1.5036781558789466e-05, "loss": 0.4241083562374115, "step": 6524, "token_acc": 0.8589962931280296 }, { "epoch": 0.35207467760211514, "grad_norm": 0.35665008425712585, "learning_rate": 1.5035271777925086e-05, "loss": 0.3803611099720001, "step": 6525, "token_acc": 0.8623591692683589 }, { "epoch": 0.35212863540711165, "grad_norm": 0.36864879727363586, "learning_rate": 1.5033761843284821e-05, "loss": 0.349892795085907, "step": 6526, "token_acc": 0.8777365713602105 }, { "epoch": 0.35218259321210815, "grad_norm": 0.396003782749176, "learning_rate": 1.5032251754914788e-05, "loss": 0.38976815342903137, "step": 6527, "token_acc": 0.8590134529147982 }, { "epoch": 0.3522365510171046, "grad_norm": 0.41881200671195984, "learning_rate": 1.5030741512861106e-05, "loss": 0.4031009078025818, "step": 6528, "token_acc": 0.8646833013435701 }, { "epoch": 0.3522905088221011, "grad_norm": 0.4118974804878235, "learning_rate": 1.5029231117169898e-05, "loss": 0.4000081717967987, "step": 6529, "token_acc": 0.8660079527653934 }, { "epoch": 0.3523444666270976, "grad_norm": 0.30026984214782715, "learning_rate": 1.5027720567887285e-05, "loss": 0.3568766117095947, "step": 6530, "token_acc": 0.8739580339177925 }, { "epoch": 0.3523984244320941, "grad_norm": 0.4639032483100891, "learning_rate": 1.5026209865059404e-05, "loss": 0.37668490409851074, "step": 6531, "token_acc": 0.8707992338499042 }, { "epoch": 0.3524523822370906, "grad_norm": 0.3647666275501251, "learning_rate": 1.5024699008732392e-05, "loss": 0.38258659839630127, "step": 6532, "token_acc": 0.8660309520087541 }, { "epoch": 0.3525063400420871, "grad_norm": 0.35444316267967224, "learning_rate": 1.5023187998952389e-05, "loss": 0.37426525354385376, "step": 6533, "token_acc": 0.8679152291769344 }, { "epoch": 0.3525602978470836, "grad_norm": 0.5145921111106873, "learning_rate": 1.502167683576554e-05, "loss": 0.3706696033477783, "step": 6534, "token_acc": 0.8714285714285714 }, { "epoch": 0.3526142556520801, "grad_norm": 0.33175644278526306, "learning_rate": 1.5020165519217995e-05, "loss": 0.33296382427215576, "step": 6535, "token_acc": 0.8851617524173332 }, { "epoch": 0.35266821345707655, "grad_norm": 0.4223674535751343, "learning_rate": 1.501865404935591e-05, "loss": 0.405165433883667, "step": 6536, "token_acc": 0.8583092576309509 }, { "epoch": 0.35272217126207306, "grad_norm": 0.45467105507850647, "learning_rate": 1.5017142426225445e-05, "loss": 0.3812800347805023, "step": 6537, "token_acc": 0.8682425886456113 }, { "epoch": 0.35277612906706957, "grad_norm": 0.33823299407958984, "learning_rate": 1.5015630649872765e-05, "loss": 0.3984365463256836, "step": 6538, "token_acc": 0.8617789650737113 }, { "epoch": 0.352830086872066, "grad_norm": 0.44969409704208374, "learning_rate": 1.501411872034404e-05, "loss": 0.43326491117477417, "step": 6539, "token_acc": 0.8500549920567029 }, { "epoch": 0.3528840446770625, "grad_norm": 0.4533594846725464, "learning_rate": 1.5012606637685438e-05, "loss": 0.4067860245704651, "step": 6540, "token_acc": 0.8598681928546653 }, { "epoch": 0.35293800248205903, "grad_norm": 0.44240063428878784, "learning_rate": 1.5011094401943147e-05, "loss": 0.3929697871208191, "step": 6541, "token_acc": 0.8655291170945523 }, { "epoch": 0.35299196028705554, "grad_norm": 0.4137722849845886, "learning_rate": 1.5009582013163346e-05, "loss": 0.39590054750442505, "step": 6542, "token_acc": 0.8637706855791962 }, { "epoch": 0.353045918092052, "grad_norm": 0.3361181914806366, "learning_rate": 1.5008069471392222e-05, "loss": 0.3764615058898926, "step": 6543, "token_acc": 0.8694078471116259 }, { "epoch": 0.3530998758970485, "grad_norm": 0.36354491114616394, "learning_rate": 1.5006556776675967e-05, "loss": 0.396907776594162, "step": 6544, "token_acc": 0.8618713450292398 }, { "epoch": 0.353153833702045, "grad_norm": 0.3862597644329071, "learning_rate": 1.500504392906078e-05, "loss": 0.33467045426368713, "step": 6545, "token_acc": 0.8808122424955857 }, { "epoch": 0.3532077915070415, "grad_norm": 0.37081781029701233, "learning_rate": 1.5003530928592862e-05, "loss": 0.3399043083190918, "step": 6546, "token_acc": 0.8764028131078857 }, { "epoch": 0.35326174931203796, "grad_norm": 0.4739809036254883, "learning_rate": 1.5002017775318421e-05, "loss": 0.3920043110847473, "step": 6547, "token_acc": 0.8601371951219512 }, { "epoch": 0.35331570711703447, "grad_norm": 0.4255666136741638, "learning_rate": 1.5000504469283667e-05, "loss": 0.3988254964351654, "step": 6548, "token_acc": 0.8644601057135666 }, { "epoch": 0.353369664922031, "grad_norm": 0.4350721538066864, "learning_rate": 1.4998991010534818e-05, "loss": 0.3664974570274353, "step": 6549, "token_acc": 0.8752978554408261 }, { "epoch": 0.3534236227270275, "grad_norm": 0.33912184834480286, "learning_rate": 1.4997477399118091e-05, "loss": 0.37332096695899963, "step": 6550, "token_acc": 0.8675074586384595 }, { "epoch": 0.35347758053202394, "grad_norm": 0.45235514640808105, "learning_rate": 1.4995963635079716e-05, "loss": 0.40750032663345337, "step": 6551, "token_acc": 0.8630258302583026 }, { "epoch": 0.35353153833702045, "grad_norm": 0.3817461133003235, "learning_rate": 1.4994449718465918e-05, "loss": 0.3350214660167694, "step": 6552, "token_acc": 0.8773082166315529 }, { "epoch": 0.35358549614201695, "grad_norm": 0.3500032424926758, "learning_rate": 1.4992935649322936e-05, "loss": 0.40523993968963623, "step": 6553, "token_acc": 0.8589983489268024 }, { "epoch": 0.35363945394701346, "grad_norm": 0.42467403411865234, "learning_rate": 1.4991421427697004e-05, "loss": 0.4656621813774109, "step": 6554, "token_acc": 0.8421390924283327 }, { "epoch": 0.3536934117520099, "grad_norm": 0.4298091232776642, "learning_rate": 1.4989907053634371e-05, "loss": 0.40645861625671387, "step": 6555, "token_acc": 0.8635167726375795 }, { "epoch": 0.3537473695570064, "grad_norm": 0.35377034544944763, "learning_rate": 1.4988392527181284e-05, "loss": 0.36333855986595154, "step": 6556, "token_acc": 0.8741691842900302 }, { "epoch": 0.3538013273620029, "grad_norm": 0.3483310341835022, "learning_rate": 1.4986877848383996e-05, "loss": 0.3766087293624878, "step": 6557, "token_acc": 0.8703316073592596 }, { "epoch": 0.35385528516699943, "grad_norm": 0.3615747392177582, "learning_rate": 1.4985363017288763e-05, "loss": 0.37743014097213745, "step": 6558, "token_acc": 0.872692264129573 }, { "epoch": 0.3539092429719959, "grad_norm": 0.3522215485572815, "learning_rate": 1.4983848033941852e-05, "loss": 0.36705946922302246, "step": 6559, "token_acc": 0.8713009167923872 }, { "epoch": 0.3539632007769924, "grad_norm": 0.5617651343345642, "learning_rate": 1.4982332898389529e-05, "loss": 0.44566190242767334, "step": 6560, "token_acc": 0.8481684354673057 }, { "epoch": 0.3540171585819889, "grad_norm": 0.30130133032798767, "learning_rate": 1.4980817610678062e-05, "loss": 0.34831124544143677, "step": 6561, "token_acc": 0.8788332833882729 }, { "epoch": 0.35407111638698535, "grad_norm": 0.511402428150177, "learning_rate": 1.497930217085373e-05, "loss": 0.41930073499679565, "step": 6562, "token_acc": 0.8574021782977007 }, { "epoch": 0.35412507419198186, "grad_norm": 0.2597263753414154, "learning_rate": 1.4977786578962814e-05, "loss": 0.34628912806510925, "step": 6563, "token_acc": 0.8768661034602476 }, { "epoch": 0.35417903199697837, "grad_norm": 0.32972583174705505, "learning_rate": 1.49762708350516e-05, "loss": 0.39015066623687744, "step": 6564, "token_acc": 0.8639298331100013 }, { "epoch": 0.3542329898019749, "grad_norm": 0.4737629294395447, "learning_rate": 1.4974754939166376e-05, "loss": 0.4131101965904236, "step": 6565, "token_acc": 0.861720356408499 }, { "epoch": 0.3542869476069713, "grad_norm": 0.4009908437728882, "learning_rate": 1.4973238891353443e-05, "loss": 0.443887859582901, "step": 6566, "token_acc": 0.8526008039956146 }, { "epoch": 0.35434090541196783, "grad_norm": 0.37546753883361816, "learning_rate": 1.4971722691659098e-05, "loss": 0.40651261806488037, "step": 6567, "token_acc": 0.8610780479510287 }, { "epoch": 0.35439486321696434, "grad_norm": 0.47990846633911133, "learning_rate": 1.4970206340129643e-05, "loss": 0.376613974571228, "step": 6568, "token_acc": 0.8721413721413721 }, { "epoch": 0.35444882102196085, "grad_norm": 0.4023476839065552, "learning_rate": 1.4968689836811385e-05, "loss": 0.38518720865249634, "step": 6569, "token_acc": 0.8684527796383121 }, { "epoch": 0.3545027788269573, "grad_norm": 0.3533056676387787, "learning_rate": 1.4967173181750645e-05, "loss": 0.4021962881088257, "step": 6570, "token_acc": 0.8617204831156027 }, { "epoch": 0.3545567366319538, "grad_norm": 0.491566002368927, "learning_rate": 1.4965656374993738e-05, "loss": 0.439650297164917, "step": 6571, "token_acc": 0.8527118350372552 }, { "epoch": 0.3546106944369503, "grad_norm": 0.506378710269928, "learning_rate": 1.4964139416586983e-05, "loss": 0.363118052482605, "step": 6572, "token_acc": 0.8729802513464991 }, { "epoch": 0.3546646522419468, "grad_norm": 0.3613158166408539, "learning_rate": 1.496262230657671e-05, "loss": 0.4129961133003235, "step": 6573, "token_acc": 0.8580068035781782 }, { "epoch": 0.3547186100469433, "grad_norm": 0.3079000413417816, "learning_rate": 1.4961105045009254e-05, "loss": 0.3131621778011322, "step": 6574, "token_acc": 0.8842048127374706 }, { "epoch": 0.3547725678519398, "grad_norm": 0.40425169467926025, "learning_rate": 1.4959587631930949e-05, "loss": 0.47762152552604675, "step": 6575, "token_acc": 0.840528186896902 }, { "epoch": 0.3548265256569363, "grad_norm": 0.40952956676483154, "learning_rate": 1.495807006738814e-05, "loss": 0.39833498001098633, "step": 6576, "token_acc": 0.8596227674845601 }, { "epoch": 0.3548804834619328, "grad_norm": 0.4696275591850281, "learning_rate": 1.4956552351427167e-05, "loss": 0.4140539765357971, "step": 6577, "token_acc": 0.8577863577863578 }, { "epoch": 0.35493444126692925, "grad_norm": 0.3550833463668823, "learning_rate": 1.4955034484094382e-05, "loss": 0.43894997239112854, "step": 6578, "token_acc": 0.8513229735405292 }, { "epoch": 0.35498839907192575, "grad_norm": 0.43011388182640076, "learning_rate": 1.4953516465436145e-05, "loss": 0.4334756135940552, "step": 6579, "token_acc": 0.848210170533108 }, { "epoch": 0.35504235687692226, "grad_norm": 0.4666544497013092, "learning_rate": 1.495199829549881e-05, "loss": 0.3023782968521118, "step": 6580, "token_acc": 0.8914799853103195 }, { "epoch": 0.3550963146819187, "grad_norm": 0.4482751488685608, "learning_rate": 1.4950479974328746e-05, "loss": 0.4792826175689697, "step": 6581, "token_acc": 0.8401960784313726 }, { "epoch": 0.3551502724869152, "grad_norm": 0.4163176715373993, "learning_rate": 1.494896150197232e-05, "loss": 0.26918601989746094, "step": 6582, "token_acc": 0.8980245595301655 }, { "epoch": 0.3552042302919117, "grad_norm": 0.4852403402328491, "learning_rate": 1.4947442878475906e-05, "loss": 0.4704146385192871, "step": 6583, "token_acc": 0.8412771864877372 }, { "epoch": 0.35525818809690823, "grad_norm": 0.4270865321159363, "learning_rate": 1.4945924103885885e-05, "loss": 0.4192010462284088, "step": 6584, "token_acc": 0.8618361251785482 }, { "epoch": 0.3553121459019047, "grad_norm": 0.4575890600681305, "learning_rate": 1.4944405178248633e-05, "loss": 0.39019882678985596, "step": 6585, "token_acc": 0.8660020277120649 }, { "epoch": 0.3553661037069012, "grad_norm": 0.37767288088798523, "learning_rate": 1.4942886101610546e-05, "loss": 0.37171441316604614, "step": 6586, "token_acc": 0.8689084696996943 }, { "epoch": 0.3554200615118977, "grad_norm": 0.40894824266433716, "learning_rate": 1.494136687401801e-05, "loss": 0.33620843291282654, "step": 6587, "token_acc": 0.8827049670855776 }, { "epoch": 0.3554740193168942, "grad_norm": 0.33495277166366577, "learning_rate": 1.4939847495517422e-05, "loss": 0.40031325817108154, "step": 6588, "token_acc": 0.8610978520286396 }, { "epoch": 0.35552797712189066, "grad_norm": 0.36917099356651306, "learning_rate": 1.4938327966155193e-05, "loss": 0.3696357011795044, "step": 6589, "token_acc": 0.8705862187178608 }, { "epoch": 0.35558193492688717, "grad_norm": 0.4797642230987549, "learning_rate": 1.4936808285977716e-05, "loss": 0.3926507830619812, "step": 6590, "token_acc": 0.8654376542313834 }, { "epoch": 0.3556358927318837, "grad_norm": 0.4813539683818817, "learning_rate": 1.493528845503141e-05, "loss": 0.4437981843948364, "step": 6591, "token_acc": 0.8474436503573392 }, { "epoch": 0.3556898505368802, "grad_norm": 0.3270023763179779, "learning_rate": 1.4933768473362684e-05, "loss": 0.3303898870944977, "step": 6592, "token_acc": 0.8843203822456934 }, { "epoch": 0.35574380834187663, "grad_norm": 0.38784345984458923, "learning_rate": 1.4932248341017964e-05, "loss": 0.3842201828956604, "step": 6593, "token_acc": 0.8666666666666667 }, { "epoch": 0.35579776614687314, "grad_norm": 0.44706887006759644, "learning_rate": 1.4930728058043669e-05, "loss": 0.4096773862838745, "step": 6594, "token_acc": 0.8567737768448248 }, { "epoch": 0.35585172395186965, "grad_norm": 0.3897138237953186, "learning_rate": 1.4929207624486233e-05, "loss": 0.43132907152175903, "step": 6595, "token_acc": 0.8551078551078551 }, { "epoch": 0.35590568175686615, "grad_norm": 0.3578304052352905, "learning_rate": 1.4927687040392086e-05, "loss": 0.3631949722766876, "step": 6596, "token_acc": 0.8673508171797796 }, { "epoch": 0.3559596395618626, "grad_norm": 0.3970905840396881, "learning_rate": 1.492616630580767e-05, "loss": 0.37158697843551636, "step": 6597, "token_acc": 0.8714285714285714 }, { "epoch": 0.3560135973668591, "grad_norm": 0.39761731028556824, "learning_rate": 1.4924645420779423e-05, "loss": 0.3473872244358063, "step": 6598, "token_acc": 0.8763385653513276 }, { "epoch": 0.3560675551718556, "grad_norm": 0.4303607940673828, "learning_rate": 1.4923124385353797e-05, "loss": 0.41583478450775146, "step": 6599, "token_acc": 0.856249068970654 }, { "epoch": 0.35612151297685213, "grad_norm": 0.393264502286911, "learning_rate": 1.4921603199577242e-05, "loss": 0.33541339635849, "step": 6600, "token_acc": 0.882704519985058 }, { "epoch": 0.3561754707818486, "grad_norm": 0.3743237555027008, "learning_rate": 1.4920081863496214e-05, "loss": 0.3724853992462158, "step": 6601, "token_acc": 0.8724225089796461 }, { "epoch": 0.3562294285868451, "grad_norm": 0.38131183385849, "learning_rate": 1.4918560377157173e-05, "loss": 0.3386343717575073, "step": 6602, "token_acc": 0.8819767441860465 }, { "epoch": 0.3562833863918416, "grad_norm": 0.3650827705860138, "learning_rate": 1.4917038740606588e-05, "loss": 0.33847576379776, "step": 6603, "token_acc": 0.8820392768890623 }, { "epoch": 0.35633734419683805, "grad_norm": 0.35865819454193115, "learning_rate": 1.4915516953890928e-05, "loss": 0.37123337388038635, "step": 6604, "token_acc": 0.8672294704528012 }, { "epoch": 0.35639130200183455, "grad_norm": 0.31369346380233765, "learning_rate": 1.4913995017056668e-05, "loss": 0.3426674008369446, "step": 6605, "token_acc": 0.8798174442190669 }, { "epoch": 0.35644525980683106, "grad_norm": 0.4095573127269745, "learning_rate": 1.4912472930150288e-05, "loss": 0.424711138010025, "step": 6606, "token_acc": 0.854630715123095 }, { "epoch": 0.35649921761182757, "grad_norm": 0.3730679154396057, "learning_rate": 1.491095069321827e-05, "loss": 0.36898672580718994, "step": 6607, "token_acc": 0.8722282314764738 }, { "epoch": 0.356553175416824, "grad_norm": 0.3312043249607086, "learning_rate": 1.4909428306307105e-05, "loss": 0.36257338523864746, "step": 6608, "token_acc": 0.8691435596230023 }, { "epoch": 0.3566071332218205, "grad_norm": 0.479139506816864, "learning_rate": 1.4907905769463286e-05, "loss": 0.4382322430610657, "step": 6609, "token_acc": 0.8457646718704828 }, { "epoch": 0.35666109102681703, "grad_norm": 0.4731367826461792, "learning_rate": 1.4906383082733312e-05, "loss": 0.3853064775466919, "step": 6610, "token_acc": 0.8626486264862648 }, { "epoch": 0.35671504883181354, "grad_norm": 0.46289342641830444, "learning_rate": 1.4904860246163681e-05, "loss": 0.3924265205860138, "step": 6611, "token_acc": 0.8618009798952526 }, { "epoch": 0.35676900663681, "grad_norm": 0.44935059547424316, "learning_rate": 1.4903337259800905e-05, "loss": 0.3874199390411377, "step": 6612, "token_acc": 0.8682065217391305 }, { "epoch": 0.3568229644418065, "grad_norm": 0.36422207951545715, "learning_rate": 1.4901814123691493e-05, "loss": 0.374440461397171, "step": 6613, "token_acc": 0.8707471919257692 }, { "epoch": 0.356876922246803, "grad_norm": 0.38837045431137085, "learning_rate": 1.4900290837881959e-05, "loss": 0.3335314691066742, "step": 6614, "token_acc": 0.879119255209103 }, { "epoch": 0.3569308800517995, "grad_norm": 0.29346099495887756, "learning_rate": 1.489876740241883e-05, "loss": 0.3472341001033783, "step": 6615, "token_acc": 0.8715548621944877 }, { "epoch": 0.35698483785679597, "grad_norm": 0.3947954475879669, "learning_rate": 1.4897243817348628e-05, "loss": 0.33138054609298706, "step": 6616, "token_acc": 0.8767530811729707 }, { "epoch": 0.3570387956617925, "grad_norm": 0.48213374614715576, "learning_rate": 1.4895720082717882e-05, "loss": 0.4314205050468445, "step": 6617, "token_acc": 0.8532745591939547 }, { "epoch": 0.357092753466789, "grad_norm": 0.37565669417381287, "learning_rate": 1.4894196198573128e-05, "loss": 0.3961711525917053, "step": 6618, "token_acc": 0.8614782183064121 }, { "epoch": 0.3571467112717855, "grad_norm": 0.4851180613040924, "learning_rate": 1.4892672164960904e-05, "loss": 0.3563278615474701, "step": 6619, "token_acc": 0.8705599036724865 }, { "epoch": 0.35720066907678194, "grad_norm": 0.39859235286712646, "learning_rate": 1.489114798192775e-05, "loss": 0.3973248302936554, "step": 6620, "token_acc": 0.854173594945128 }, { "epoch": 0.35725462688177845, "grad_norm": 0.42859628796577454, "learning_rate": 1.4889623649520222e-05, "loss": 0.32201194763183594, "step": 6621, "token_acc": 0.8854851643945469 }, { "epoch": 0.35730858468677495, "grad_norm": 0.47776705026626587, "learning_rate": 1.4888099167784868e-05, "loss": 0.43886154890060425, "step": 6622, "token_acc": 0.8506952915345206 }, { "epoch": 0.35736254249177146, "grad_norm": 0.41785767674446106, "learning_rate": 1.4886574536768243e-05, "loss": 0.38907188177108765, "step": 6623, "token_acc": 0.8624382207578254 }, { "epoch": 0.3574165002967679, "grad_norm": 0.36591067910194397, "learning_rate": 1.4885049756516914e-05, "loss": 0.3540611267089844, "step": 6624, "token_acc": 0.8789565770637824 }, { "epoch": 0.3574704581017644, "grad_norm": 0.4133557081222534, "learning_rate": 1.4883524827077449e-05, "loss": 0.3312824070453644, "step": 6625, "token_acc": 0.8789422944498003 }, { "epoch": 0.35752441590676093, "grad_norm": 0.43486863374710083, "learning_rate": 1.488199974849641e-05, "loss": 0.39317718148231506, "step": 6626, "token_acc": 0.8679867986798679 }, { "epoch": 0.3575783737117574, "grad_norm": 0.4439401626586914, "learning_rate": 1.4880474520820378e-05, "loss": 0.4076439142227173, "step": 6627, "token_acc": 0.8585436425012056 }, { "epoch": 0.3576323315167539, "grad_norm": 0.42732155323028564, "learning_rate": 1.4878949144095934e-05, "loss": 0.3873193562030792, "step": 6628, "token_acc": 0.8674123413346738 }, { "epoch": 0.3576862893217504, "grad_norm": 0.47870558500289917, "learning_rate": 1.4877423618369661e-05, "loss": 0.43659263849258423, "step": 6629, "token_acc": 0.8549253731343284 }, { "epoch": 0.3577402471267469, "grad_norm": 0.3331679105758667, "learning_rate": 1.4875897943688147e-05, "loss": 0.42305588722229004, "step": 6630, "token_acc": 0.8526601520086862 }, { "epoch": 0.35779420493174335, "grad_norm": 0.4101790189743042, "learning_rate": 1.4874372120097989e-05, "loss": 0.38984325528144836, "step": 6631, "token_acc": 0.8646523816864499 }, { "epoch": 0.35784816273673986, "grad_norm": 0.40929126739501953, "learning_rate": 1.4872846147645783e-05, "loss": 0.35269057750701904, "step": 6632, "token_acc": 0.8759361311290095 }, { "epoch": 0.35790212054173637, "grad_norm": 0.47840914130210876, "learning_rate": 1.4871320026378131e-05, "loss": 0.41849464178085327, "step": 6633, "token_acc": 0.853042876901798 }, { "epoch": 0.3579560783467329, "grad_norm": 0.4612616300582886, "learning_rate": 1.4869793756341643e-05, "loss": 0.39156991243362427, "step": 6634, "token_acc": 0.8661164603392939 }, { "epoch": 0.3580100361517293, "grad_norm": 0.3922993242740631, "learning_rate": 1.486826733758293e-05, "loss": 0.395569771528244, "step": 6635, "token_acc": 0.8601532567049809 }, { "epoch": 0.35806399395672583, "grad_norm": 0.32274144887924194, "learning_rate": 1.4866740770148606e-05, "loss": 0.36094507575035095, "step": 6636, "token_acc": 0.873200041437895 }, { "epoch": 0.35811795176172234, "grad_norm": 0.44509971141815186, "learning_rate": 1.4865214054085293e-05, "loss": 0.43361538648605347, "step": 6637, "token_acc": 0.8530057042562528 }, { "epoch": 0.35817190956671885, "grad_norm": 0.2704598307609558, "learning_rate": 1.4863687189439619e-05, "loss": 0.35768744349479675, "step": 6638, "token_acc": 0.872190392243279 }, { "epoch": 0.3582258673717153, "grad_norm": 0.4191255271434784, "learning_rate": 1.4862160176258214e-05, "loss": 0.381894588470459, "step": 6639, "token_acc": 0.8670526428648011 }, { "epoch": 0.3582798251767118, "grad_norm": 0.416415810585022, "learning_rate": 1.4860633014587706e-05, "loss": 0.38232123851776123, "step": 6640, "token_acc": 0.8654048370136698 }, { "epoch": 0.3583337829817083, "grad_norm": 0.4918486177921295, "learning_rate": 1.4859105704474743e-05, "loss": 0.4278327226638794, "step": 6641, "token_acc": 0.854629241726016 }, { "epoch": 0.3583877407867048, "grad_norm": 0.407146155834198, "learning_rate": 1.4857578245965967e-05, "loss": 0.3983888030052185, "step": 6642, "token_acc": 0.861327065630085 }, { "epoch": 0.3584416985917013, "grad_norm": 0.4511188864707947, "learning_rate": 1.4856050639108019e-05, "loss": 0.396129846572876, "step": 6643, "token_acc": 0.8660688869911757 }, { "epoch": 0.3584956563966978, "grad_norm": 0.3613016903400421, "learning_rate": 1.4854522883947558e-05, "loss": 0.41486698389053345, "step": 6644, "token_acc": 0.8626353570994039 }, { "epoch": 0.3585496142016943, "grad_norm": 0.35402679443359375, "learning_rate": 1.4852994980531242e-05, "loss": 0.3736909031867981, "step": 6645, "token_acc": 0.8734611742424242 }, { "epoch": 0.35860357200669074, "grad_norm": 0.43561914563179016, "learning_rate": 1.4851466928905724e-05, "loss": 0.3558945059776306, "step": 6646, "token_acc": 0.8742129364625072 }, { "epoch": 0.35865752981168725, "grad_norm": 0.3181195557117462, "learning_rate": 1.4849938729117683e-05, "loss": 0.3650292158126831, "step": 6647, "token_acc": 0.8716807595312268 }, { "epoch": 0.35871148761668376, "grad_norm": 0.3838740289211273, "learning_rate": 1.4848410381213785e-05, "loss": 0.32483240962028503, "step": 6648, "token_acc": 0.8835492408413428 }, { "epoch": 0.35876544542168026, "grad_norm": 0.35290616750717163, "learning_rate": 1.4846881885240701e-05, "loss": 0.3832578659057617, "step": 6649, "token_acc": 0.8604333731445146 }, { "epoch": 0.3588194032266767, "grad_norm": 0.43743443489074707, "learning_rate": 1.4845353241245116e-05, "loss": 0.3644683361053467, "step": 6650, "token_acc": 0.8676056338028169 }, { "epoch": 0.3588733610316732, "grad_norm": 0.4825551211833954, "learning_rate": 1.484382444927371e-05, "loss": 0.38871920108795166, "step": 6651, "token_acc": 0.8631928586257006 }, { "epoch": 0.35892731883666973, "grad_norm": 0.4137478768825531, "learning_rate": 1.4842295509373173e-05, "loss": 0.43206772208213806, "step": 6652, "token_acc": 0.854015748031496 }, { "epoch": 0.35898127664166624, "grad_norm": 0.35901597142219543, "learning_rate": 1.4840766421590201e-05, "loss": 0.38228723406791687, "step": 6653, "token_acc": 0.8673127542474276 }, { "epoch": 0.3590352344466627, "grad_norm": 0.36356401443481445, "learning_rate": 1.483923718597149e-05, "loss": 0.35967379808425903, "step": 6654, "token_acc": 0.8690520103282922 }, { "epoch": 0.3590891922516592, "grad_norm": 0.4240027964115143, "learning_rate": 1.4837707802563746e-05, "loss": 0.4471116364002228, "step": 6655, "token_acc": 0.8517635370094386 }, { "epoch": 0.3591431500566557, "grad_norm": 0.49708855152130127, "learning_rate": 1.4836178271413668e-05, "loss": 0.39522993564605713, "step": 6656, "token_acc": 0.8634093037777084 }, { "epoch": 0.3591971078616522, "grad_norm": 0.3916020095348358, "learning_rate": 1.4834648592567977e-05, "loss": 0.34767645597457886, "step": 6657, "token_acc": 0.874129768866611 }, { "epoch": 0.35925106566664866, "grad_norm": 0.48990598320961, "learning_rate": 1.4833118766073385e-05, "loss": 0.4201095700263977, "step": 6658, "token_acc": 0.8495575221238938 }, { "epoch": 0.35930502347164517, "grad_norm": 0.5415908694267273, "learning_rate": 1.483158879197661e-05, "loss": 0.4055787920951843, "step": 6659, "token_acc": 0.8583232914443105 }, { "epoch": 0.3593589812766417, "grad_norm": 0.4789915382862091, "learning_rate": 1.4830058670324379e-05, "loss": 0.4048672318458557, "step": 6660, "token_acc": 0.8612005037078494 }, { "epoch": 0.3594129390816382, "grad_norm": 0.42923444509506226, "learning_rate": 1.4828528401163422e-05, "loss": 0.44333404302597046, "step": 6661, "token_acc": 0.8506202164159409 }, { "epoch": 0.35946689688663463, "grad_norm": 0.3289561867713928, "learning_rate": 1.482699798454047e-05, "loss": 0.4491404891014099, "step": 6662, "token_acc": 0.8456924280622446 }, { "epoch": 0.35952085469163114, "grad_norm": 0.41297686100006104, "learning_rate": 1.4825467420502268e-05, "loss": 0.3512656092643738, "step": 6663, "token_acc": 0.8815165876777251 }, { "epoch": 0.35957481249662765, "grad_norm": 0.31767505407333374, "learning_rate": 1.4823936709095552e-05, "loss": 0.3683443069458008, "step": 6664, "token_acc": 0.8673598289767758 }, { "epoch": 0.35962877030162416, "grad_norm": 0.35245460271835327, "learning_rate": 1.4822405850367071e-05, "loss": 0.36765146255493164, "step": 6665, "token_acc": 0.8721691531608887 }, { "epoch": 0.3596827281066206, "grad_norm": 0.4288948178291321, "learning_rate": 1.4820874844363587e-05, "loss": 0.4013575315475464, "step": 6666, "token_acc": 0.8728107259330837 }, { "epoch": 0.3597366859116171, "grad_norm": 0.32539090514183044, "learning_rate": 1.4819343691131842e-05, "loss": 0.44293713569641113, "step": 6667, "token_acc": 0.8542759407069556 }, { "epoch": 0.3597906437166136, "grad_norm": 0.474840372800827, "learning_rate": 1.4817812390718603e-05, "loss": 0.37977755069732666, "step": 6668, "token_acc": 0.8632462686567164 }, { "epoch": 0.3598446015216101, "grad_norm": 0.38268333673477173, "learning_rate": 1.4816280943170636e-05, "loss": 0.3441215753555298, "step": 6669, "token_acc": 0.8782188077962726 }, { "epoch": 0.3598985593266066, "grad_norm": 0.46445050835609436, "learning_rate": 1.4814749348534711e-05, "loss": 0.4119757413864136, "step": 6670, "token_acc": 0.858489238992826 }, { "epoch": 0.3599525171316031, "grad_norm": 0.4818188548088074, "learning_rate": 1.4813217606857604e-05, "loss": 0.3993999660015106, "step": 6671, "token_acc": 0.8553504212224413 }, { "epoch": 0.3600064749365996, "grad_norm": 0.367809534072876, "learning_rate": 1.4811685718186088e-05, "loss": 0.40425384044647217, "step": 6672, "token_acc": 0.8630503144654088 }, { "epoch": 0.36006043274159605, "grad_norm": 0.389008104801178, "learning_rate": 1.4810153682566954e-05, "loss": 0.37431299686431885, "step": 6673, "token_acc": 0.867451183240657 }, { "epoch": 0.36011439054659256, "grad_norm": 0.3906804323196411, "learning_rate": 1.4808621500046986e-05, "loss": 0.3874373137950897, "step": 6674, "token_acc": 0.8667008021846732 }, { "epoch": 0.36016834835158906, "grad_norm": 0.42608368396759033, "learning_rate": 1.4807089170672977e-05, "loss": 0.3940117359161377, "step": 6675, "token_acc": 0.8638941398865785 }, { "epoch": 0.36022230615658557, "grad_norm": 0.4136304259300232, "learning_rate": 1.4805556694491725e-05, "loss": 0.32004889845848083, "step": 6676, "token_acc": 0.8819812989638615 }, { "epoch": 0.360276263961582, "grad_norm": 0.35779523849487305, "learning_rate": 1.480402407155003e-05, "loss": 0.43599188327789307, "step": 6677, "token_acc": 0.8505530417295123 }, { "epoch": 0.36033022176657853, "grad_norm": 0.437648743391037, "learning_rate": 1.4802491301894697e-05, "loss": 0.35196271538734436, "step": 6678, "token_acc": 0.8730685920577618 }, { "epoch": 0.36038417957157504, "grad_norm": 0.4188537001609802, "learning_rate": 1.480095838557254e-05, "loss": 0.4266938269138336, "step": 6679, "token_acc": 0.8493350632500811 }, { "epoch": 0.36043813737657154, "grad_norm": 0.5030517578125, "learning_rate": 1.4799425322630368e-05, "loss": 0.3955264091491699, "step": 6680, "token_acc": 0.8653031140593329 }, { "epoch": 0.360492095181568, "grad_norm": 0.46481743454933167, "learning_rate": 1.4797892113115005e-05, "loss": 0.45565396547317505, "step": 6681, "token_acc": 0.8454480286738352 }, { "epoch": 0.3605460529865645, "grad_norm": 0.330437034368515, "learning_rate": 1.4796358757073274e-05, "loss": 0.34758293628692627, "step": 6682, "token_acc": 0.8752213814200612 }, { "epoch": 0.360600010791561, "grad_norm": 0.3753681182861328, "learning_rate": 1.4794825254552008e-05, "loss": 0.39401566982269287, "step": 6683, "token_acc": 0.8669654110068618 }, { "epoch": 0.3606539685965575, "grad_norm": 0.4048936367034912, "learning_rate": 1.479329160559803e-05, "loss": 0.38211947679519653, "step": 6684, "token_acc": 0.8638412984670875 }, { "epoch": 0.36070792640155397, "grad_norm": 0.4247778654098511, "learning_rate": 1.479175781025818e-05, "loss": 0.3292786478996277, "step": 6685, "token_acc": 0.8786958606360876 }, { "epoch": 0.3607618842065505, "grad_norm": 0.39986053109169006, "learning_rate": 1.4790223868579306e-05, "loss": 0.4101681113243103, "step": 6686, "token_acc": 0.8546576879910214 }, { "epoch": 0.360815842011547, "grad_norm": 0.37191587686538696, "learning_rate": 1.4788689780608247e-05, "loss": 0.34697848558425903, "step": 6687, "token_acc": 0.8770073923018098 }, { "epoch": 0.3608697998165435, "grad_norm": 0.4689766764640808, "learning_rate": 1.4787155546391859e-05, "loss": 0.42860841751098633, "step": 6688, "token_acc": 0.8554475264512439 }, { "epoch": 0.36092375762153994, "grad_norm": 0.37575677037239075, "learning_rate": 1.4785621165976994e-05, "loss": 0.4183245897293091, "step": 6689, "token_acc": 0.8583008737761869 }, { "epoch": 0.36097771542653645, "grad_norm": 0.45780491828918457, "learning_rate": 1.4784086639410511e-05, "loss": 0.4222320318222046, "step": 6690, "token_acc": 0.8555590750712702 }, { "epoch": 0.36103167323153296, "grad_norm": 0.4411601424217224, "learning_rate": 1.4782551966739276e-05, "loss": 0.3362470269203186, "step": 6691, "token_acc": 0.8808569667077681 }, { "epoch": 0.3610856310365294, "grad_norm": 0.33817949891090393, "learning_rate": 1.4781017148010157e-05, "loss": 0.3492687940597534, "step": 6692, "token_acc": 0.8789370078740157 }, { "epoch": 0.3611395888415259, "grad_norm": 0.4058437943458557, "learning_rate": 1.4779482183270026e-05, "loss": 0.36428529024124146, "step": 6693, "token_acc": 0.8725155279503105 }, { "epoch": 0.3611935466465224, "grad_norm": 0.35695570707321167, "learning_rate": 1.4777947072565761e-05, "loss": 0.4350506663322449, "step": 6694, "token_acc": 0.8517076093469144 }, { "epoch": 0.36124750445151893, "grad_norm": 0.3640756607055664, "learning_rate": 1.4776411815944244e-05, "loss": 0.38827407360076904, "step": 6695, "token_acc": 0.8632769901853872 }, { "epoch": 0.3613014622565154, "grad_norm": 0.48125988245010376, "learning_rate": 1.4774876413452364e-05, "loss": 0.394644170999527, "step": 6696, "token_acc": 0.8665523646235176 }, { "epoch": 0.3613554200615119, "grad_norm": 0.3125368058681488, "learning_rate": 1.4773340865137007e-05, "loss": 0.3249947726726532, "step": 6697, "token_acc": 0.8784350256171402 }, { "epoch": 0.3614093778665084, "grad_norm": 0.3638608753681183, "learning_rate": 1.477180517104507e-05, "loss": 0.3517535328865051, "step": 6698, "token_acc": 0.877986248688964 }, { "epoch": 0.3614633356715049, "grad_norm": 0.41771405935287476, "learning_rate": 1.4770269331223455e-05, "loss": 0.3881000280380249, "step": 6699, "token_acc": 0.8629379225568531 }, { "epoch": 0.36151729347650136, "grad_norm": 0.43943870067596436, "learning_rate": 1.4768733345719062e-05, "loss": 0.3425326347351074, "step": 6700, "token_acc": 0.8750672163470156 }, { "epoch": 0.36157125128149786, "grad_norm": 0.2984566390514374, "learning_rate": 1.4767197214578804e-05, "loss": 0.3777462840080261, "step": 6701, "token_acc": 0.8666010337189269 }, { "epoch": 0.36162520908649437, "grad_norm": 0.44248729944229126, "learning_rate": 1.4765660937849591e-05, "loss": 0.32968413829803467, "step": 6702, "token_acc": 0.8824769938650306 }, { "epoch": 0.3616791668914909, "grad_norm": 0.4517180621623993, "learning_rate": 1.4764124515578342e-05, "loss": 0.3800947666168213, "step": 6703, "token_acc": 0.8649091206811855 }, { "epoch": 0.36173312469648733, "grad_norm": 0.33658140897750854, "learning_rate": 1.4762587947811976e-05, "loss": 0.3168630003929138, "step": 6704, "token_acc": 0.8845871110022053 }, { "epoch": 0.36178708250148384, "grad_norm": 0.35932785272598267, "learning_rate": 1.4761051234597426e-05, "loss": 0.3837953805923462, "step": 6705, "token_acc": 0.8662663755458515 }, { "epoch": 0.36184104030648034, "grad_norm": 0.3998226523399353, "learning_rate": 1.4759514375981615e-05, "loss": 0.3779188096523285, "step": 6706, "token_acc": 0.8684997011356844 }, { "epoch": 0.36189499811147685, "grad_norm": 0.31026893854141235, "learning_rate": 1.4757977372011482e-05, "loss": 0.41100531816482544, "step": 6707, "token_acc": 0.8570009930486594 }, { "epoch": 0.3619489559164733, "grad_norm": 0.437198281288147, "learning_rate": 1.4756440222733967e-05, "loss": 0.3884231448173523, "step": 6708, "token_acc": 0.8618744313011829 }, { "epoch": 0.3620029137214698, "grad_norm": 0.32181107997894287, "learning_rate": 1.4754902928196016e-05, "loss": 0.4099429249763489, "step": 6709, "token_acc": 0.8592067727983804 }, { "epoch": 0.3620568715264663, "grad_norm": 0.3676808476448059, "learning_rate": 1.475336548844457e-05, "loss": 0.3218953013420105, "step": 6710, "token_acc": 0.8848204763597582 }, { "epoch": 0.36211082933146277, "grad_norm": 0.36017951369285583, "learning_rate": 1.4751827903526593e-05, "loss": 0.3959615230560303, "step": 6711, "token_acc": 0.8613287538907374 }, { "epoch": 0.3621647871364593, "grad_norm": 0.45494264364242554, "learning_rate": 1.4750290173489032e-05, "loss": 0.3942403793334961, "step": 6712, "token_acc": 0.86703146374829 }, { "epoch": 0.3622187449414558, "grad_norm": 0.35975125432014465, "learning_rate": 1.4748752298378857e-05, "loss": 0.3626774847507477, "step": 6713, "token_acc": 0.872013183191431 }, { "epoch": 0.3622727027464523, "grad_norm": 0.41317638754844666, "learning_rate": 1.4747214278243027e-05, "loss": 0.37217825651168823, "step": 6714, "token_acc": 0.870645385149202 }, { "epoch": 0.36232666055144874, "grad_norm": 0.40543729066848755, "learning_rate": 1.474567611312852e-05, "loss": 0.32616302371025085, "step": 6715, "token_acc": 0.8837604198826798 }, { "epoch": 0.36238061835644525, "grad_norm": 0.3404856026172638, "learning_rate": 1.4744137803082306e-05, "loss": 0.3871845602989197, "step": 6716, "token_acc": 0.8622716441620334 }, { "epoch": 0.36243457616144176, "grad_norm": 0.3926245868206024, "learning_rate": 1.4742599348151365e-05, "loss": 0.3473958969116211, "step": 6717, "token_acc": 0.8723127035830619 }, { "epoch": 0.36248853396643826, "grad_norm": 0.4606937766075134, "learning_rate": 1.4741060748382685e-05, "loss": 0.3842755854129791, "step": 6718, "token_acc": 0.8635006784260516 }, { "epoch": 0.3625424917714347, "grad_norm": 0.40760958194732666, "learning_rate": 1.4739522003823253e-05, "loss": 0.36342281103134155, "step": 6719, "token_acc": 0.8729605575795977 }, { "epoch": 0.3625964495764312, "grad_norm": 0.4272005259990692, "learning_rate": 1.4737983114520056e-05, "loss": 0.33503228425979614, "step": 6720, "token_acc": 0.8864884009208429 }, { "epoch": 0.36265040738142773, "grad_norm": 0.4022773504257202, "learning_rate": 1.4736444080520098e-05, "loss": 0.3916265368461609, "step": 6721, "token_acc": 0.8615019997037476 }, { "epoch": 0.36270436518642424, "grad_norm": 0.46495187282562256, "learning_rate": 1.473490490187038e-05, "loss": 0.3610572814941406, "step": 6722, "token_acc": 0.8710084791629081 }, { "epoch": 0.3627583229914207, "grad_norm": 0.4023004472255707, "learning_rate": 1.4733365578617904e-05, "loss": 0.4040669798851013, "step": 6723, "token_acc": 0.8585716298070151 }, { "epoch": 0.3628122807964172, "grad_norm": 0.4372021555900574, "learning_rate": 1.4731826110809687e-05, "loss": 0.39094603061676025, "step": 6724, "token_acc": 0.860655737704918 }, { "epoch": 0.3628662386014137, "grad_norm": 0.39949896931648254, "learning_rate": 1.4730286498492737e-05, "loss": 0.3830389082431793, "step": 6725, "token_acc": 0.8698650234741784 }, { "epoch": 0.3629201964064102, "grad_norm": 0.46432870626449585, "learning_rate": 1.4728746741714074e-05, "loss": 0.3993901014328003, "step": 6726, "token_acc": 0.8604752879428488 }, { "epoch": 0.36297415421140666, "grad_norm": 0.31701019406318665, "learning_rate": 1.472720684052073e-05, "loss": 0.34286242723464966, "step": 6727, "token_acc": 0.8763301696865113 }, { "epoch": 0.36302811201640317, "grad_norm": 0.34392520785331726, "learning_rate": 1.4725666794959726e-05, "loss": 0.40786048769950867, "step": 6728, "token_acc": 0.8595955742083174 }, { "epoch": 0.3630820698213997, "grad_norm": 0.36911362409591675, "learning_rate": 1.4724126605078092e-05, "loss": 0.40766555070877075, "step": 6729, "token_acc": 0.8608815426997245 }, { "epoch": 0.3631360276263962, "grad_norm": 0.3930630087852478, "learning_rate": 1.4722586270922872e-05, "loss": 0.43472394347190857, "step": 6730, "token_acc": 0.8525393142562516 }, { "epoch": 0.36318998543139264, "grad_norm": 0.43050551414489746, "learning_rate": 1.4721045792541104e-05, "loss": 0.3676025867462158, "step": 6731, "token_acc": 0.8749344061570754 }, { "epoch": 0.36324394323638914, "grad_norm": 0.4711582660675049, "learning_rate": 1.4719505169979838e-05, "loss": 0.3801073729991913, "step": 6732, "token_acc": 0.8648694472702612 }, { "epoch": 0.36329790104138565, "grad_norm": 0.43372201919555664, "learning_rate": 1.4717964403286115e-05, "loss": 0.3995007276535034, "step": 6733, "token_acc": 0.8617797794688616 }, { "epoch": 0.3633518588463821, "grad_norm": 0.30059024691581726, "learning_rate": 1.4716423492506997e-05, "loss": 0.367012619972229, "step": 6734, "token_acc": 0.868382710053424 }, { "epoch": 0.3634058166513786, "grad_norm": 0.5316348075866699, "learning_rate": 1.4714882437689543e-05, "loss": 0.3780720829963684, "step": 6735, "token_acc": 0.8666948793906052 }, { "epoch": 0.3634597744563751, "grad_norm": 0.39979037642478943, "learning_rate": 1.4713341238880812e-05, "loss": 0.38164177536964417, "step": 6736, "token_acc": 0.866522561469873 }, { "epoch": 0.3635137322613716, "grad_norm": 0.41457226872444153, "learning_rate": 1.4711799896127874e-05, "loss": 0.3514515161514282, "step": 6737, "token_acc": 0.8747272493903222 }, { "epoch": 0.3635676900663681, "grad_norm": 0.45370742678642273, "learning_rate": 1.4710258409477804e-05, "loss": 0.3636830449104309, "step": 6738, "token_acc": 0.8751888217522659 }, { "epoch": 0.3636216478713646, "grad_norm": 0.26310470700263977, "learning_rate": 1.4708716778977673e-05, "loss": 0.3880966901779175, "step": 6739, "token_acc": 0.8662733529990168 }, { "epoch": 0.3636756056763611, "grad_norm": 0.33966317772865295, "learning_rate": 1.4707175004674567e-05, "loss": 0.433218777179718, "step": 6740, "token_acc": 0.8550472040668119 }, { "epoch": 0.3637295634813576, "grad_norm": 0.41224151849746704, "learning_rate": 1.4705633086615569e-05, "loss": 0.32387664914131165, "step": 6741, "token_acc": 0.886770140428677 }, { "epoch": 0.36378352128635405, "grad_norm": 0.35620689392089844, "learning_rate": 1.4704091024847773e-05, "loss": 0.3570501208305359, "step": 6742, "token_acc": 0.870184013801035 }, { "epoch": 0.36383747909135056, "grad_norm": 0.4439307749271393, "learning_rate": 1.4702548819418264e-05, "loss": 0.39486125111579895, "step": 6743, "token_acc": 0.8618852459016394 }, { "epoch": 0.36389143689634706, "grad_norm": 0.2892597019672394, "learning_rate": 1.4701006470374148e-05, "loss": 0.3374255895614624, "step": 6744, "token_acc": 0.8800638884855525 }, { "epoch": 0.36394539470134357, "grad_norm": 0.4112904965877533, "learning_rate": 1.4699463977762528e-05, "loss": 0.3818144202232361, "step": 6745, "token_acc": 0.8623723487824038 }, { "epoch": 0.36399935250634, "grad_norm": 0.4282805323600769, "learning_rate": 1.4697921341630503e-05, "loss": 0.4268814027309418, "step": 6746, "token_acc": 0.8549289209974364 }, { "epoch": 0.36405331031133653, "grad_norm": 0.32017454504966736, "learning_rate": 1.4696378562025195e-05, "loss": 0.3862168788909912, "step": 6747, "token_acc": 0.866187893888319 }, { "epoch": 0.36410726811633304, "grad_norm": 0.39476439356803894, "learning_rate": 1.4694835638993716e-05, "loss": 0.36676186323165894, "step": 6748, "token_acc": 0.8709392914890489 }, { "epoch": 0.36416122592132955, "grad_norm": 0.37534862756729126, "learning_rate": 1.4693292572583187e-05, "loss": 0.36448830366134644, "step": 6749, "token_acc": 0.8739090491502067 }, { "epoch": 0.364215183726326, "grad_norm": 0.3745032548904419, "learning_rate": 1.4691749362840733e-05, "loss": 0.4039113521575928, "step": 6750, "token_acc": 0.8624765271180824 }, { "epoch": 0.3642691415313225, "grad_norm": 0.44114407896995544, "learning_rate": 1.4690206009813482e-05, "loss": 0.4059791564941406, "step": 6751, "token_acc": 0.8634496919917864 }, { "epoch": 0.364323099336319, "grad_norm": 0.36703360080718994, "learning_rate": 1.4688662513548564e-05, "loss": 0.48997366428375244, "step": 6752, "token_acc": 0.8371082132235187 }, { "epoch": 0.3643770571413155, "grad_norm": 0.36562013626098633, "learning_rate": 1.4687118874093125e-05, "loss": 0.2706737816333771, "step": 6753, "token_acc": 0.9009665931829743 }, { "epoch": 0.36443101494631197, "grad_norm": 0.3484581410884857, "learning_rate": 1.4685575091494304e-05, "loss": 0.4470401406288147, "step": 6754, "token_acc": 0.8454166196865487 }, { "epoch": 0.3644849727513085, "grad_norm": 0.3759167790412903, "learning_rate": 1.4684031165799244e-05, "loss": 0.3695545792579651, "step": 6755, "token_acc": 0.8719877420253517 }, { "epoch": 0.364538930556305, "grad_norm": 0.36224639415740967, "learning_rate": 1.46824870970551e-05, "loss": 0.3648717701435089, "step": 6756, "token_acc": 0.872904339250493 }, { "epoch": 0.36459288836130144, "grad_norm": 0.34133821725845337, "learning_rate": 1.4680942885309028e-05, "loss": 0.373523473739624, "step": 6757, "token_acc": 0.8735329025451668 }, { "epoch": 0.36464684616629794, "grad_norm": 0.4694764018058777, "learning_rate": 1.4679398530608189e-05, "loss": 0.48695677518844604, "step": 6758, "token_acc": 0.8393589743589743 }, { "epoch": 0.36470080397129445, "grad_norm": 0.400745153427124, "learning_rate": 1.4677854032999741e-05, "loss": 0.36636269092559814, "step": 6759, "token_acc": 0.8693853427895981 }, { "epoch": 0.36475476177629096, "grad_norm": 0.44909408688545227, "learning_rate": 1.4676309392530858e-05, "loss": 0.42394351959228516, "step": 6760, "token_acc": 0.8555386108415362 }, { "epoch": 0.3648087195812874, "grad_norm": 0.34521934390068054, "learning_rate": 1.467476460924871e-05, "loss": 0.3924916982650757, "step": 6761, "token_acc": 0.8625764842070448 }, { "epoch": 0.3648626773862839, "grad_norm": 0.4478911757469177, "learning_rate": 1.4673219683200474e-05, "loss": 0.39028239250183105, "step": 6762, "token_acc": 0.8671830719818628 }, { "epoch": 0.3649166351912804, "grad_norm": 0.32808786630630493, "learning_rate": 1.4671674614433337e-05, "loss": 0.3655311167240143, "step": 6763, "token_acc": 0.8702928870292888 }, { "epoch": 0.36497059299627693, "grad_norm": 0.44247451424598694, "learning_rate": 1.467012940299448e-05, "loss": 0.3776036202907562, "step": 6764, "token_acc": 0.8713675802231098 }, { "epoch": 0.3650245508012734, "grad_norm": 0.38203197717666626, "learning_rate": 1.4668584048931091e-05, "loss": 0.37592625617980957, "step": 6765, "token_acc": 0.8691898285038439 }, { "epoch": 0.3650785086062699, "grad_norm": 0.39076104760169983, "learning_rate": 1.4667038552290373e-05, "loss": 0.4627956748008728, "step": 6766, "token_acc": 0.8429103484969407 }, { "epoch": 0.3651324664112664, "grad_norm": 0.3712807595729828, "learning_rate": 1.4665492913119519e-05, "loss": 0.338327556848526, "step": 6767, "token_acc": 0.8816251933080276 }, { "epoch": 0.3651864242162629, "grad_norm": 0.3751855492591858, "learning_rate": 1.4663947131465733e-05, "loss": 0.4396752119064331, "step": 6768, "token_acc": 0.8562734688592516 }, { "epoch": 0.36524038202125936, "grad_norm": 0.3980846703052521, "learning_rate": 1.4662401207376222e-05, "loss": 0.35682493448257446, "step": 6769, "token_acc": 0.8756861478850501 }, { "epoch": 0.36529433982625586, "grad_norm": 0.45487233996391296, "learning_rate": 1.46608551408982e-05, "loss": 0.37935417890548706, "step": 6770, "token_acc": 0.8658064516129033 }, { "epoch": 0.36534829763125237, "grad_norm": 0.41280117630958557, "learning_rate": 1.4659308932078884e-05, "loss": 0.4071853458881378, "step": 6771, "token_acc": 0.860900354071826 }, { "epoch": 0.3654022554362489, "grad_norm": 0.4215109944343567, "learning_rate": 1.465776258096549e-05, "loss": 0.42439597845077515, "step": 6772, "token_acc": 0.8541728541728542 }, { "epoch": 0.36545621324124533, "grad_norm": 0.352304607629776, "learning_rate": 1.4656216087605252e-05, "loss": 0.4253445267677307, "step": 6773, "token_acc": 0.8587381351200447 }, { "epoch": 0.36551017104624184, "grad_norm": 0.3815668523311615, "learning_rate": 1.4654669452045394e-05, "loss": 0.3872967064380646, "step": 6774, "token_acc": 0.8629269046515222 }, { "epoch": 0.36556412885123835, "grad_norm": 0.3427886962890625, "learning_rate": 1.4653122674333148e-05, "loss": 0.38537678122520447, "step": 6775, "token_acc": 0.8716959064327485 }, { "epoch": 0.3656180866562348, "grad_norm": 0.364812970161438, "learning_rate": 1.4651575754515757e-05, "loss": 0.4223136007785797, "step": 6776, "token_acc": 0.8593299532756735 }, { "epoch": 0.3656720444612313, "grad_norm": 0.44870349764823914, "learning_rate": 1.465002869264046e-05, "loss": 0.39253899455070496, "step": 6777, "token_acc": 0.8632268632268633 }, { "epoch": 0.3657260022662278, "grad_norm": 0.3606625497341156, "learning_rate": 1.4648481488754502e-05, "loss": 0.4101758599281311, "step": 6778, "token_acc": 0.8589014051794217 }, { "epoch": 0.3657799600712243, "grad_norm": 0.4163565933704376, "learning_rate": 1.4646934142905141e-05, "loss": 0.4011201858520508, "step": 6779, "token_acc": 0.86189995675364 }, { "epoch": 0.36583391787622077, "grad_norm": 0.4202534854412079, "learning_rate": 1.4645386655139629e-05, "loss": 0.3764941096305847, "step": 6780, "token_acc": 0.8717149823895963 }, { "epoch": 0.3658878756812173, "grad_norm": 0.4230588674545288, "learning_rate": 1.464383902550522e-05, "loss": 0.4226614236831665, "step": 6781, "token_acc": 0.8532313764183522 }, { "epoch": 0.3659418334862138, "grad_norm": 0.4404590129852295, "learning_rate": 1.4642291254049188e-05, "loss": 0.3889417350292206, "step": 6782, "token_acc": 0.8662465988830016 }, { "epoch": 0.3659957912912103, "grad_norm": 0.37209561467170715, "learning_rate": 1.4640743340818797e-05, "loss": 0.35972464084625244, "step": 6783, "token_acc": 0.8736685991640825 }, { "epoch": 0.36604974909620674, "grad_norm": 0.3599618077278137, "learning_rate": 1.4639195285861319e-05, "loss": 0.4057931900024414, "step": 6784, "token_acc": 0.8603647774073347 }, { "epoch": 0.36610370690120325, "grad_norm": 0.45001643896102905, "learning_rate": 1.4637647089224034e-05, "loss": 0.29496145248413086, "step": 6785, "token_acc": 0.8899803536345776 }, { "epoch": 0.36615766470619976, "grad_norm": 0.5459789633750916, "learning_rate": 1.463609875095422e-05, "loss": 0.4823439121246338, "step": 6786, "token_acc": 0.8385329478009436 }, { "epoch": 0.36621162251119627, "grad_norm": 0.4333777129650116, "learning_rate": 1.4634550271099167e-05, "loss": 0.39704084396362305, "step": 6787, "token_acc": 0.8616279069767442 }, { "epoch": 0.3662655803161927, "grad_norm": 0.4002443253993988, "learning_rate": 1.4633001649706158e-05, "loss": 0.37787121534347534, "step": 6788, "token_acc": 0.8653472142775426 }, { "epoch": 0.3663195381211892, "grad_norm": 0.36116263270378113, "learning_rate": 1.4631452886822497e-05, "loss": 0.48557549715042114, "step": 6789, "token_acc": 0.8362169745148469 }, { "epoch": 0.36637349592618573, "grad_norm": 0.4575364589691162, "learning_rate": 1.4629903982495478e-05, "loss": 0.41447678208351135, "step": 6790, "token_acc": 0.8563226438874108 }, { "epoch": 0.36642745373118224, "grad_norm": 0.4478965103626251, "learning_rate": 1.46283549367724e-05, "loss": 0.3964853882789612, "step": 6791, "token_acc": 0.8635983263598327 }, { "epoch": 0.3664814115361787, "grad_norm": 0.41489890217781067, "learning_rate": 1.4626805749700578e-05, "loss": 0.3842719793319702, "step": 6792, "token_acc": 0.8663087248322148 }, { "epoch": 0.3665353693411752, "grad_norm": 0.34578633308410645, "learning_rate": 1.462525642132732e-05, "loss": 0.3495405316352844, "step": 6793, "token_acc": 0.8771402823670772 }, { "epoch": 0.3665893271461717, "grad_norm": 0.4384556710720062, "learning_rate": 1.4623706951699941e-05, "loss": 0.396138072013855, "step": 6794, "token_acc": 0.8631526438310608 }, { "epoch": 0.3666432849511682, "grad_norm": 0.3505966067314148, "learning_rate": 1.4622157340865766e-05, "loss": 0.37109363079071045, "step": 6795, "token_acc": 0.8703876382043426 }, { "epoch": 0.36669724275616467, "grad_norm": 0.35545215010643005, "learning_rate": 1.4620607588872116e-05, "loss": 0.3758907914161682, "step": 6796, "token_acc": 0.8679281345565749 }, { "epoch": 0.3667512005611612, "grad_norm": 0.34259921312332153, "learning_rate": 1.4619057695766317e-05, "loss": 0.42101889848709106, "step": 6797, "token_acc": 0.8489566081483935 }, { "epoch": 0.3668051583661577, "grad_norm": 0.32303425669670105, "learning_rate": 1.461750766159571e-05, "loss": 0.36951756477355957, "step": 6798, "token_acc": 0.871244635193133 }, { "epoch": 0.36685911617115413, "grad_norm": 0.4857378900051117, "learning_rate": 1.4615957486407626e-05, "loss": 0.3765750527381897, "step": 6799, "token_acc": 0.8701768488745981 }, { "epoch": 0.36691307397615064, "grad_norm": 0.3225483298301697, "learning_rate": 1.4614407170249412e-05, "loss": 0.344879686832428, "step": 6800, "token_acc": 0.8778801843317973 }, { "epoch": 0.36696703178114715, "grad_norm": 0.4599120318889618, "learning_rate": 1.461285671316841e-05, "loss": 0.4017407298088074, "step": 6801, "token_acc": 0.8625919117647058 }, { "epoch": 0.36702098958614365, "grad_norm": 0.4727461636066437, "learning_rate": 1.4611306115211975e-05, "loss": 0.37032145261764526, "step": 6802, "token_acc": 0.8732603451475227 }, { "epoch": 0.3670749473911401, "grad_norm": 0.39048779010772705, "learning_rate": 1.4609755376427456e-05, "loss": 0.35510051250457764, "step": 6803, "token_acc": 0.8724779319041615 }, { "epoch": 0.3671289051961366, "grad_norm": 0.34161144495010376, "learning_rate": 1.4608204496862215e-05, "loss": 0.34870272874832153, "step": 6804, "token_acc": 0.8787298547949577 }, { "epoch": 0.3671828630011331, "grad_norm": 0.3521907925605774, "learning_rate": 1.4606653476563616e-05, "loss": 0.38228747248649597, "step": 6805, "token_acc": 0.8630252100840337 }, { "epoch": 0.3672368208061296, "grad_norm": 0.38424545526504517, "learning_rate": 1.460510231557903e-05, "loss": 0.33540984988212585, "step": 6806, "token_acc": 0.8733390854184642 }, { "epoch": 0.3672907786111261, "grad_norm": 0.36186763644218445, "learning_rate": 1.4603551013955826e-05, "loss": 0.4159029722213745, "step": 6807, "token_acc": 0.8556178275954096 }, { "epoch": 0.3673447364161226, "grad_norm": 0.44447675347328186, "learning_rate": 1.4601999571741377e-05, "loss": 0.3631511926651001, "step": 6808, "token_acc": 0.8747642991829039 }, { "epoch": 0.3673986942211191, "grad_norm": 0.4176214933395386, "learning_rate": 1.4600447988983069e-05, "loss": 0.3577331304550171, "step": 6809, "token_acc": 0.8750316215532506 }, { "epoch": 0.3674526520261156, "grad_norm": 0.4610535800457001, "learning_rate": 1.459889626572828e-05, "loss": 0.30980029702186584, "step": 6810, "token_acc": 0.8885885306043638 }, { "epoch": 0.36750660983111205, "grad_norm": 0.39279526472091675, "learning_rate": 1.4597344402024407e-05, "loss": 0.4121624827384949, "step": 6811, "token_acc": 0.8618364170845582 }, { "epoch": 0.36756056763610856, "grad_norm": 0.44277989864349365, "learning_rate": 1.459579239791884e-05, "loss": 0.3724847435951233, "step": 6812, "token_acc": 0.8680436784633601 }, { "epoch": 0.36761452544110507, "grad_norm": 0.4507998526096344, "learning_rate": 1.4594240253458978e-05, "loss": 0.3700661063194275, "step": 6813, "token_acc": 0.8706060606060606 }, { "epoch": 0.3676684832461016, "grad_norm": 0.36544138193130493, "learning_rate": 1.4592687968692221e-05, "loss": 0.391422301530838, "step": 6814, "token_acc": 0.8636738541298457 }, { "epoch": 0.367722441051098, "grad_norm": 0.3838885426521301, "learning_rate": 1.4591135543665975e-05, "loss": 0.41196346282958984, "step": 6815, "token_acc": 0.8614552185675637 }, { "epoch": 0.36777639885609453, "grad_norm": 0.3685183525085449, "learning_rate": 1.4589582978427657e-05, "loss": 0.47812598943710327, "step": 6816, "token_acc": 0.8364178362936281 }, { "epoch": 0.36783035666109104, "grad_norm": 0.4383734464645386, "learning_rate": 1.4588030273024674e-05, "loss": 0.3727485239505768, "step": 6817, "token_acc": 0.86789506558401 }, { "epoch": 0.3678843144660875, "grad_norm": 0.45367106795310974, "learning_rate": 1.458647742750445e-05, "loss": 0.39695125818252563, "step": 6818, "token_acc": 0.8650613357564744 }, { "epoch": 0.367938272271084, "grad_norm": 0.3683871626853943, "learning_rate": 1.4584924441914406e-05, "loss": 0.40398430824279785, "step": 6819, "token_acc": 0.8579945799457994 }, { "epoch": 0.3679922300760805, "grad_norm": 0.40151894092559814, "learning_rate": 1.4583371316301966e-05, "loss": 0.38915932178497314, "step": 6820, "token_acc": 0.864491219329929 }, { "epoch": 0.368046187881077, "grad_norm": 0.34422487020492554, "learning_rate": 1.4581818050714574e-05, "loss": 0.3621461093425751, "step": 6821, "token_acc": 0.8754994551398474 }, { "epoch": 0.36810014568607347, "grad_norm": 0.4977518916130066, "learning_rate": 1.4580264645199656e-05, "loss": 0.3391963541507721, "step": 6822, "token_acc": 0.8803918470532469 }, { "epoch": 0.36815410349107, "grad_norm": 0.448880136013031, "learning_rate": 1.4578711099804652e-05, "loss": 0.39942532777786255, "step": 6823, "token_acc": 0.8675856307435255 }, { "epoch": 0.3682080612960665, "grad_norm": 0.39973217248916626, "learning_rate": 1.4577157414577017e-05, "loss": 0.38997238874435425, "step": 6824, "token_acc": 0.8656859609849696 }, { "epoch": 0.368262019101063, "grad_norm": 0.37854915857315063, "learning_rate": 1.4575603589564191e-05, "loss": 0.32239222526550293, "step": 6825, "token_acc": 0.8850989667621063 }, { "epoch": 0.36831597690605944, "grad_norm": 0.440664142370224, "learning_rate": 1.4574049624813627e-05, "loss": 0.38272809982299805, "step": 6826, "token_acc": 0.863513298295907 }, { "epoch": 0.36836993471105595, "grad_norm": 0.40020787715911865, "learning_rate": 1.457249552037279e-05, "loss": 0.40806686878204346, "step": 6827, "token_acc": 0.8634735305824599 }, { "epoch": 0.36842389251605245, "grad_norm": 0.4200681149959564, "learning_rate": 1.4570941276289134e-05, "loss": 0.3786824345588684, "step": 6828, "token_acc": 0.8693009118541033 }, { "epoch": 0.36847785032104896, "grad_norm": 0.3875525891780853, "learning_rate": 1.4569386892610131e-05, "loss": 0.4004330039024353, "step": 6829, "token_acc": 0.8634000663643402 }, { "epoch": 0.3685318081260454, "grad_norm": 0.5435097217559814, "learning_rate": 1.4567832369383247e-05, "loss": 0.3918813467025757, "step": 6830, "token_acc": 0.858094469115866 }, { "epoch": 0.3685857659310419, "grad_norm": 0.4613654315471649, "learning_rate": 1.4566277706655963e-05, "loss": 0.37300270795822144, "step": 6831, "token_acc": 0.8665291223615299 }, { "epoch": 0.3686397237360384, "grad_norm": 0.4139605462551117, "learning_rate": 1.4564722904475752e-05, "loss": 0.38657450675964355, "step": 6832, "token_acc": 0.8649425287356322 }, { "epoch": 0.36869368154103493, "grad_norm": 0.39713001251220703, "learning_rate": 1.4563167962890101e-05, "loss": 0.4251619577407837, "step": 6833, "token_acc": 0.8539941036080303 }, { "epoch": 0.3687476393460314, "grad_norm": 0.3461464047431946, "learning_rate": 1.4561612881946494e-05, "loss": 0.4024996757507324, "step": 6834, "token_acc": 0.8633705475810739 }, { "epoch": 0.3688015971510279, "grad_norm": 0.24243523180484772, "learning_rate": 1.4560057661692427e-05, "loss": 0.33523958921432495, "step": 6835, "token_acc": 0.8795405982905983 }, { "epoch": 0.3688555549560244, "grad_norm": 0.3334743082523346, "learning_rate": 1.4558502302175387e-05, "loss": 0.35763266682624817, "step": 6836, "token_acc": 0.871290127195639 }, { "epoch": 0.3689095127610209, "grad_norm": 0.3989622890949249, "learning_rate": 1.4556946803442886e-05, "loss": 0.3611214756965637, "step": 6837, "token_acc": 0.8668730650154799 }, { "epoch": 0.36896347056601736, "grad_norm": 0.3822506070137024, "learning_rate": 1.4555391165542423e-05, "loss": 0.3988019824028015, "step": 6838, "token_acc": 0.8606228800493371 }, { "epoch": 0.36901742837101387, "grad_norm": 0.46720707416534424, "learning_rate": 1.4553835388521504e-05, "loss": 0.415126770734787, "step": 6839, "token_acc": 0.8596721311475409 }, { "epoch": 0.3690713861760104, "grad_norm": 0.34555584192276, "learning_rate": 1.4552279472427651e-05, "loss": 0.3786763846874237, "step": 6840, "token_acc": 0.8696324951644101 }, { "epoch": 0.3691253439810068, "grad_norm": 0.3765125870704651, "learning_rate": 1.4550723417308375e-05, "loss": 0.3721356987953186, "step": 6841, "token_acc": 0.8699418228829994 }, { "epoch": 0.36917930178600333, "grad_norm": 0.37086641788482666, "learning_rate": 1.4549167223211194e-05, "loss": 0.35774481296539307, "step": 6842, "token_acc": 0.8718096611391493 }, { "epoch": 0.36923325959099984, "grad_norm": 0.39027687907218933, "learning_rate": 1.4547610890183638e-05, "loss": 0.37724488973617554, "step": 6843, "token_acc": 0.8717660292463442 }, { "epoch": 0.36928721739599635, "grad_norm": 0.33424896001815796, "learning_rate": 1.4546054418273238e-05, "loss": 0.3329102694988251, "step": 6844, "token_acc": 0.880863309352518 }, { "epoch": 0.3693411752009928, "grad_norm": 0.31526678800582886, "learning_rate": 1.4544497807527527e-05, "loss": 0.3551519215106964, "step": 6845, "token_acc": 0.8748987385719246 }, { "epoch": 0.3693951330059893, "grad_norm": 0.4261366128921509, "learning_rate": 1.4542941057994042e-05, "loss": 0.37297195196151733, "step": 6846, "token_acc": 0.8667153284671533 }, { "epoch": 0.3694490908109858, "grad_norm": 0.31965428590774536, "learning_rate": 1.454138416972033e-05, "loss": 0.3574652075767517, "step": 6847, "token_acc": 0.8726800296956199 }, { "epoch": 0.3695030486159823, "grad_norm": 0.3978779911994934, "learning_rate": 1.4539827142753934e-05, "loss": 0.3954133987426758, "step": 6848, "token_acc": 0.8635631875503086 }, { "epoch": 0.3695570064209788, "grad_norm": 0.33333611488342285, "learning_rate": 1.4538269977142405e-05, "loss": 0.30576291680336, "step": 6849, "token_acc": 0.8924111431316042 }, { "epoch": 0.3696109642259753, "grad_norm": 0.4426977336406708, "learning_rate": 1.4536712672933303e-05, "loss": 0.4870694875717163, "step": 6850, "token_acc": 0.8390822297505389 }, { "epoch": 0.3696649220309718, "grad_norm": 0.4411888122558594, "learning_rate": 1.4535155230174181e-05, "loss": 0.39291760325431824, "step": 6851, "token_acc": 0.8654979598005138 }, { "epoch": 0.3697188798359683, "grad_norm": 0.3919227123260498, "learning_rate": 1.4533597648912604e-05, "loss": 0.4429783821105957, "step": 6852, "token_acc": 0.8491860621552536 }, { "epoch": 0.36977283764096475, "grad_norm": 0.4436333477497101, "learning_rate": 1.4532039929196147e-05, "loss": 0.4080874025821686, "step": 6853, "token_acc": 0.862037037037037 }, { "epoch": 0.36982679544596125, "grad_norm": 0.40568482875823975, "learning_rate": 1.4530482071072376e-05, "loss": 0.4485909640789032, "step": 6854, "token_acc": 0.8492139878087905 }, { "epoch": 0.36988075325095776, "grad_norm": 0.4202239513397217, "learning_rate": 1.452892407458887e-05, "loss": 0.3202267587184906, "step": 6855, "token_acc": 0.8893630472577841 }, { "epoch": 0.36993471105595427, "grad_norm": 0.3349190652370453, "learning_rate": 1.452736593979321e-05, "loss": 0.35255271196365356, "step": 6856, "token_acc": 0.8775830499607638 }, { "epoch": 0.3699886688609507, "grad_norm": 0.32677629590034485, "learning_rate": 1.4525807666732979e-05, "loss": 0.35409992933273315, "step": 6857, "token_acc": 0.8719445953286258 }, { "epoch": 0.3700426266659472, "grad_norm": 0.44033554196357727, "learning_rate": 1.4524249255455768e-05, "loss": 0.38189512491226196, "step": 6858, "token_acc": 0.8650998824911869 }, { "epoch": 0.37009658447094373, "grad_norm": 0.5270500779151917, "learning_rate": 1.452269070600917e-05, "loss": 0.3659321069717407, "step": 6859, "token_acc": 0.8720324261725536 }, { "epoch": 0.37015054227594024, "grad_norm": 0.42047587037086487, "learning_rate": 1.4521132018440783e-05, "loss": 0.4010361433029175, "step": 6860, "token_acc": 0.8592316067817701 }, { "epoch": 0.3702045000809367, "grad_norm": 0.3884733021259308, "learning_rate": 1.4519573192798209e-05, "loss": 0.343997985124588, "step": 6861, "token_acc": 0.8765814613994319 }, { "epoch": 0.3702584578859332, "grad_norm": 0.3841801881790161, "learning_rate": 1.451801422912905e-05, "loss": 0.38378405570983887, "step": 6862, "token_acc": 0.8588628762541806 }, { "epoch": 0.3703124156909297, "grad_norm": 0.3565032184123993, "learning_rate": 1.4516455127480921e-05, "loss": 0.45760101079940796, "step": 6863, "token_acc": 0.8467927421415794 }, { "epoch": 0.37036637349592616, "grad_norm": 0.2979395389556885, "learning_rate": 1.4514895887901438e-05, "loss": 0.36956697702407837, "step": 6864, "token_acc": 0.8730855526290118 }, { "epoch": 0.37042033130092267, "grad_norm": 0.36354950070381165, "learning_rate": 1.4513336510438218e-05, "loss": 0.3847765028476715, "step": 6865, "token_acc": 0.8669443480273054 }, { "epoch": 0.3704742891059192, "grad_norm": 0.5028098225593567, "learning_rate": 1.4511776995138882e-05, "loss": 0.45873749256134033, "step": 6866, "token_acc": 0.846002621231979 }, { "epoch": 0.3705282469109157, "grad_norm": 0.4997715353965759, "learning_rate": 1.451021734205106e-05, "loss": 0.44960740208625793, "step": 6867, "token_acc": 0.8446529080675422 }, { "epoch": 0.37058220471591213, "grad_norm": 0.33311185240745544, "learning_rate": 1.4508657551222379e-05, "loss": 0.309762179851532, "step": 6868, "token_acc": 0.8825498861657962 }, { "epoch": 0.37063616252090864, "grad_norm": 0.3641577363014221, "learning_rate": 1.450709762270048e-05, "loss": 0.3784666657447815, "step": 6869, "token_acc": 0.8704933586337761 }, { "epoch": 0.37069012032590515, "grad_norm": 0.4805319011211395, "learning_rate": 1.4505537556532998e-05, "loss": 0.3863430619239807, "step": 6870, "token_acc": 0.8632531043998161 }, { "epoch": 0.37074407813090166, "grad_norm": 0.35569536685943604, "learning_rate": 1.4503977352767578e-05, "loss": 0.4421997666358948, "step": 6871, "token_acc": 0.8512534818941504 }, { "epoch": 0.3707980359358981, "grad_norm": 0.42188581824302673, "learning_rate": 1.4502417011451872e-05, "loss": 0.32809045910835266, "step": 6872, "token_acc": 0.8824300699300699 }, { "epoch": 0.3708519937408946, "grad_norm": 0.42731204628944397, "learning_rate": 1.450085653263353e-05, "loss": 0.385694295167923, "step": 6873, "token_acc": 0.8649119748997735 }, { "epoch": 0.3709059515458911, "grad_norm": 0.36300915479660034, "learning_rate": 1.4499295916360207e-05, "loss": 0.406890869140625, "step": 6874, "token_acc": 0.864501679731243 }, { "epoch": 0.37095990935088763, "grad_norm": 0.43370237946510315, "learning_rate": 1.449773516267957e-05, "loss": 0.37607842683792114, "step": 6875, "token_acc": 0.8657718120805369 }, { "epoch": 0.3710138671558841, "grad_norm": 0.30884429812431335, "learning_rate": 1.4496174271639274e-05, "loss": 0.3578573763370514, "step": 6876, "token_acc": 0.8682191412033922 }, { "epoch": 0.3710678249608806, "grad_norm": 0.3978109061717987, "learning_rate": 1.4494613243286998e-05, "loss": 0.4016914963722229, "step": 6877, "token_acc": 0.8659995649336524 }, { "epoch": 0.3711217827658771, "grad_norm": 0.37664544582366943, "learning_rate": 1.4493052077670406e-05, "loss": 0.32935404777526855, "step": 6878, "token_acc": 0.8829523547232168 }, { "epoch": 0.3711757405708736, "grad_norm": 0.4053122401237488, "learning_rate": 1.4491490774837184e-05, "loss": 0.3880721926689148, "step": 6879, "token_acc": 0.8698127065736321 }, { "epoch": 0.37122969837587005, "grad_norm": 0.4740428626537323, "learning_rate": 1.4489929334835008e-05, "loss": 0.34476128220558167, "step": 6880, "token_acc": 0.8803914461761507 }, { "epoch": 0.37128365618086656, "grad_norm": 0.4633410573005676, "learning_rate": 1.4488367757711568e-05, "loss": 0.42526376247406006, "step": 6881, "token_acc": 0.856673161227472 }, { "epoch": 0.37133761398586307, "grad_norm": 0.36113789677619934, "learning_rate": 1.4486806043514554e-05, "loss": 0.39262282848358154, "step": 6882, "token_acc": 0.8670053795576809 }, { "epoch": 0.3713915717908595, "grad_norm": 0.4593918025493622, "learning_rate": 1.4485244192291656e-05, "loss": 0.3353000283241272, "step": 6883, "token_acc": 0.8858765341637662 }, { "epoch": 0.371445529595856, "grad_norm": 0.4511711299419403, "learning_rate": 1.4483682204090577e-05, "loss": 0.39931854605674744, "step": 6884, "token_acc": 0.862496222423693 }, { "epoch": 0.37149948740085253, "grad_norm": 0.31681936979293823, "learning_rate": 1.4482120078959017e-05, "loss": 0.3837442994117737, "step": 6885, "token_acc": 0.8695296044933302 }, { "epoch": 0.37155344520584904, "grad_norm": 0.4844920337200165, "learning_rate": 1.4480557816944684e-05, "loss": 0.43072789907455444, "step": 6886, "token_acc": 0.8615331491712708 }, { "epoch": 0.3716074030108455, "grad_norm": 0.5373637080192566, "learning_rate": 1.447899541809529e-05, "loss": 0.4159034490585327, "step": 6887, "token_acc": 0.8581406177246624 }, { "epoch": 0.371661360815842, "grad_norm": 0.4020138382911682, "learning_rate": 1.4477432882458549e-05, "loss": 0.4427911937236786, "step": 6888, "token_acc": 0.8501701125359853 }, { "epoch": 0.3717153186208385, "grad_norm": 0.33811524510383606, "learning_rate": 1.447587021008218e-05, "loss": 0.3900536298751831, "step": 6889, "token_acc": 0.8632238904471209 }, { "epoch": 0.371769276425835, "grad_norm": 0.3428995907306671, "learning_rate": 1.4474307401013907e-05, "loss": 0.3861614167690277, "step": 6890, "token_acc": 0.8615584838132422 }, { "epoch": 0.37182323423083147, "grad_norm": 0.39399367570877075, "learning_rate": 1.447274445530146e-05, "loss": 0.34552302956581116, "step": 6891, "token_acc": 0.8759795570698466 }, { "epoch": 0.371877192035828, "grad_norm": 0.3465690016746521, "learning_rate": 1.4471181372992569e-05, "loss": 0.3804093599319458, "step": 6892, "token_acc": 0.8653585926928281 }, { "epoch": 0.3719311498408245, "grad_norm": 0.397597074508667, "learning_rate": 1.4469618154134969e-05, "loss": 0.43407830595970154, "step": 6893, "token_acc": 0.8522863618184242 }, { "epoch": 0.371985107645821, "grad_norm": 0.45864254236221313, "learning_rate": 1.4468054798776398e-05, "loss": 0.4178106486797333, "step": 6894, "token_acc": 0.8579672375279226 }, { "epoch": 0.37203906545081744, "grad_norm": 0.4746376574039459, "learning_rate": 1.446649130696461e-05, "loss": 0.41946935653686523, "step": 6895, "token_acc": 0.8534217764115839 }, { "epoch": 0.37209302325581395, "grad_norm": 0.3526628315448761, "learning_rate": 1.4464927678747345e-05, "loss": 0.44086164236068726, "step": 6896, "token_acc": 0.8553117532044319 }, { "epoch": 0.37214698106081046, "grad_norm": 0.4211522340774536, "learning_rate": 1.4463363914172356e-05, "loss": 0.42081326246261597, "step": 6897, "token_acc": 0.8567330917874396 }, { "epoch": 0.37220093886580696, "grad_norm": 0.3481432795524597, "learning_rate": 1.4461800013287405e-05, "loss": 0.3728291094303131, "step": 6898, "token_acc": 0.8702830188679245 }, { "epoch": 0.3722548966708034, "grad_norm": 0.4086751341819763, "learning_rate": 1.4460235976140252e-05, "loss": 0.37744438648223877, "step": 6899, "token_acc": 0.8707677043284138 }, { "epoch": 0.3723088544757999, "grad_norm": 0.4479946196079254, "learning_rate": 1.4458671802778659e-05, "loss": 0.38792937994003296, "step": 6900, "token_acc": 0.8638477186864284 }, { "epoch": 0.37236281228079643, "grad_norm": 0.3804624080657959, "learning_rate": 1.4457107493250396e-05, "loss": 0.32583731412887573, "step": 6901, "token_acc": 0.8786630761919848 }, { "epoch": 0.37241677008579294, "grad_norm": 0.4422910213470459, "learning_rate": 1.445554304760324e-05, "loss": 0.36698418855667114, "step": 6902, "token_acc": 0.865835596149099 }, { "epoch": 0.3724707278907894, "grad_norm": 0.4029122591018677, "learning_rate": 1.4453978465884966e-05, "loss": 0.4078795611858368, "step": 6903, "token_acc": 0.856991712220067 }, { "epoch": 0.3725246856957859, "grad_norm": 0.41007572412490845, "learning_rate": 1.4452413748143353e-05, "loss": 0.43631160259246826, "step": 6904, "token_acc": 0.850095916689504 }, { "epoch": 0.3725786435007824, "grad_norm": 0.43028295040130615, "learning_rate": 1.4450848894426193e-05, "loss": 0.38222551345825195, "step": 6905, "token_acc": 0.8723082295425445 }, { "epoch": 0.37263260130577885, "grad_norm": 0.4425804615020752, "learning_rate": 1.4449283904781274e-05, "loss": 0.39100536704063416, "step": 6906, "token_acc": 0.8654714304140333 }, { "epoch": 0.37268655911077536, "grad_norm": 0.2930055558681488, "learning_rate": 1.444771877925639e-05, "loss": 0.37501323223114014, "step": 6907, "token_acc": 0.870390023879013 }, { "epoch": 0.37274051691577187, "grad_norm": 0.4015602469444275, "learning_rate": 1.444615351789934e-05, "loss": 0.3961625099182129, "step": 6908, "token_acc": 0.8615843800522323 }, { "epoch": 0.3727944747207684, "grad_norm": 0.4580482542514801, "learning_rate": 1.4444588120757927e-05, "loss": 0.38013583421707153, "step": 6909, "token_acc": 0.8695185731527424 }, { "epoch": 0.37284843252576483, "grad_norm": 0.4728623330593109, "learning_rate": 1.4443022587879953e-05, "loss": 0.410273939371109, "step": 6910, "token_acc": 0.8532491274721622 }, { "epoch": 0.37290239033076134, "grad_norm": 0.3320143520832062, "learning_rate": 1.4441456919313237e-05, "loss": 0.3571055233478546, "step": 6911, "token_acc": 0.8764115432873275 }, { "epoch": 0.37295634813575784, "grad_norm": 0.5134910345077515, "learning_rate": 1.4439891115105588e-05, "loss": 0.4189596176147461, "step": 6912, "token_acc": 0.8562178072111847 }, { "epoch": 0.37301030594075435, "grad_norm": 0.4194762408733368, "learning_rate": 1.4438325175304831e-05, "loss": 0.3673035800457001, "step": 6913, "token_acc": 0.8688691602386843 }, { "epoch": 0.3730642637457508, "grad_norm": 0.44453322887420654, "learning_rate": 1.4436759099958782e-05, "loss": 0.3517518937587738, "step": 6914, "token_acc": 0.877593670697119 }, { "epoch": 0.3731182215507473, "grad_norm": 0.3872769773006439, "learning_rate": 1.4435192889115275e-05, "loss": 0.40298590064048767, "step": 6915, "token_acc": 0.8620114063152038 }, { "epoch": 0.3731721793557438, "grad_norm": 0.3367692828178406, "learning_rate": 1.4433626542822136e-05, "loss": 0.4114273190498352, "step": 6916, "token_acc": 0.8575159605339524 }, { "epoch": 0.3732261371607403, "grad_norm": 0.4455413818359375, "learning_rate": 1.4432060061127206e-05, "loss": 0.42159584164619446, "step": 6917, "token_acc": 0.8528791076273742 }, { "epoch": 0.3732800949657368, "grad_norm": 0.3567097783088684, "learning_rate": 1.4430493444078325e-05, "loss": 0.41234585642814636, "step": 6918, "token_acc": 0.864912538585918 }, { "epoch": 0.3733340527707333, "grad_norm": 0.4916706681251526, "learning_rate": 1.4428926691723334e-05, "loss": 0.4050861895084381, "step": 6919, "token_acc": 0.8582245851458031 }, { "epoch": 0.3733880105757298, "grad_norm": 0.4409116506576538, "learning_rate": 1.4427359804110077e-05, "loss": 0.41505855321884155, "step": 6920, "token_acc": 0.8585134962261737 }, { "epoch": 0.3734419683807263, "grad_norm": 0.3837173879146576, "learning_rate": 1.4425792781286414e-05, "loss": 0.41373586654663086, "step": 6921, "token_acc": 0.8559421751554883 }, { "epoch": 0.37349592618572275, "grad_norm": 0.3986181616783142, "learning_rate": 1.44242256233002e-05, "loss": 0.3784310221672058, "step": 6922, "token_acc": 0.8672320514695131 }, { "epoch": 0.37354988399071926, "grad_norm": 0.39217129349708557, "learning_rate": 1.4422658330199293e-05, "loss": 0.3813181519508362, "step": 6923, "token_acc": 0.8645449763282483 }, { "epoch": 0.37360384179571576, "grad_norm": 0.5214122533798218, "learning_rate": 1.4421090902031563e-05, "loss": 0.4520748257637024, "step": 6924, "token_acc": 0.8449465332873405 }, { "epoch": 0.37365779960071227, "grad_norm": 0.46509334444999695, "learning_rate": 1.4419523338844874e-05, "loss": 0.37459704279899597, "step": 6925, "token_acc": 0.8666768947529917 }, { "epoch": 0.3737117574057087, "grad_norm": 0.2868311107158661, "learning_rate": 1.4417955640687095e-05, "loss": 0.3744423985481262, "step": 6926, "token_acc": 0.8669364728967378 }, { "epoch": 0.37376571521070523, "grad_norm": 0.33068060874938965, "learning_rate": 1.4416387807606112e-05, "loss": 0.39064499735832214, "step": 6927, "token_acc": 0.8680293005671077 }, { "epoch": 0.37381967301570174, "grad_norm": 0.3674580752849579, "learning_rate": 1.4414819839649803e-05, "loss": 0.3245031237602234, "step": 6928, "token_acc": 0.8830957230142567 }, { "epoch": 0.3738736308206982, "grad_norm": 0.3360210359096527, "learning_rate": 1.4413251736866049e-05, "loss": 0.33223265409469604, "step": 6929, "token_acc": 0.8866564235905922 }, { "epoch": 0.3739275886256947, "grad_norm": 0.2871580719947815, "learning_rate": 1.4411683499302744e-05, "loss": 0.4085999131202698, "step": 6930, "token_acc": 0.8584849565941254 }, { "epoch": 0.3739815464306912, "grad_norm": 0.3632751405239105, "learning_rate": 1.4410115127007785e-05, "loss": 0.35327768325805664, "step": 6931, "token_acc": 0.8746561886051081 }, { "epoch": 0.3740355042356877, "grad_norm": 0.4613671600818634, "learning_rate": 1.4408546620029063e-05, "loss": 0.44107669591903687, "step": 6932, "token_acc": 0.8531367447239087 }, { "epoch": 0.37408946204068416, "grad_norm": 0.4320457875728607, "learning_rate": 1.4406977978414481e-05, "loss": 0.3617744445800781, "step": 6933, "token_acc": 0.8740595354923127 }, { "epoch": 0.37414341984568067, "grad_norm": 0.40286797285079956, "learning_rate": 1.4405409202211946e-05, "loss": 0.39429378509521484, "step": 6934, "token_acc": 0.8684935701163503 }, { "epoch": 0.3741973776506772, "grad_norm": 0.39572954177856445, "learning_rate": 1.4403840291469369e-05, "loss": 0.33771827816963196, "step": 6935, "token_acc": 0.881572714353387 }, { "epoch": 0.3742513354556737, "grad_norm": 0.3560648560523987, "learning_rate": 1.440227124623466e-05, "loss": 0.3817673325538635, "step": 6936, "token_acc": 0.866110130036924 }, { "epoch": 0.37430529326067014, "grad_norm": 0.36093810200691223, "learning_rate": 1.4400702066555746e-05, "loss": 0.41840171813964844, "step": 6937, "token_acc": 0.8573571525978454 }, { "epoch": 0.37435925106566664, "grad_norm": 0.44493433833122253, "learning_rate": 1.439913275248054e-05, "loss": 0.37344419956207275, "step": 6938, "token_acc": 0.8720882713526767 }, { "epoch": 0.37441320887066315, "grad_norm": 0.48477569222450256, "learning_rate": 1.4397563304056972e-05, "loss": 0.3914812207221985, "step": 6939, "token_acc": 0.8650808062465952 }, { "epoch": 0.37446716667565966, "grad_norm": 0.419130802154541, "learning_rate": 1.4395993721332973e-05, "loss": 0.396196186542511, "step": 6940, "token_acc": 0.8645092838196287 }, { "epoch": 0.3745211244806561, "grad_norm": 0.37882891297340393, "learning_rate": 1.439442400435648e-05, "loss": 0.4339277148246765, "step": 6941, "token_acc": 0.8541577825159915 }, { "epoch": 0.3745750822856526, "grad_norm": 0.3489689528942108, "learning_rate": 1.4392854153175426e-05, "loss": 0.36102649569511414, "step": 6942, "token_acc": 0.8757820383451059 }, { "epoch": 0.3746290400906491, "grad_norm": 0.4186669886112213, "learning_rate": 1.4391284167837758e-05, "loss": 0.3840276002883911, "step": 6943, "token_acc": 0.8673129354740987 }, { "epoch": 0.37468299789564563, "grad_norm": 0.5026746392250061, "learning_rate": 1.4389714048391421e-05, "loss": 0.4212062656879425, "step": 6944, "token_acc": 0.8544176706827309 }, { "epoch": 0.3747369557006421, "grad_norm": 0.5693314075469971, "learning_rate": 1.4388143794884366e-05, "loss": 0.4033265709877014, "step": 6945, "token_acc": 0.8633890791859762 }, { "epoch": 0.3747909135056386, "grad_norm": 0.4551473557949066, "learning_rate": 1.4386573407364553e-05, "loss": 0.39908134937286377, "step": 6946, "token_acc": 0.8644588045234248 }, { "epoch": 0.3748448713106351, "grad_norm": 0.4183832108974457, "learning_rate": 1.4385002885879936e-05, "loss": 0.3853702247142792, "step": 6947, "token_acc": 0.8644924207625172 }, { "epoch": 0.37489882911563155, "grad_norm": 0.3763435184955597, "learning_rate": 1.4383432230478476e-05, "loss": 0.39926302433013916, "step": 6948, "token_acc": 0.858862959285005 }, { "epoch": 0.37495278692062806, "grad_norm": 0.33570894598960876, "learning_rate": 1.438186144120815e-05, "loss": 0.3814791738986969, "step": 6949, "token_acc": 0.8673455714859976 }, { "epoch": 0.37500674472562456, "grad_norm": 0.3645235002040863, "learning_rate": 1.4380290518116918e-05, "loss": 0.35649287700653076, "step": 6950, "token_acc": 0.8741037793170494 }, { "epoch": 0.37506070253062107, "grad_norm": 0.5212129950523376, "learning_rate": 1.4378719461252764e-05, "loss": 0.3789050579071045, "step": 6951, "token_acc": 0.8629093678598629 }, { "epoch": 0.3751146603356175, "grad_norm": 0.349192351102829, "learning_rate": 1.4377148270663664e-05, "loss": 0.38239434361457825, "step": 6952, "token_acc": 0.867711421713963 }, { "epoch": 0.37516861814061403, "grad_norm": 0.32912665605545044, "learning_rate": 1.43755769463976e-05, "loss": 0.3583528697490692, "step": 6953, "token_acc": 0.8704598597038192 }, { "epoch": 0.37522257594561054, "grad_norm": 0.3858325779438019, "learning_rate": 1.4374005488502568e-05, "loss": 0.3853130340576172, "step": 6954, "token_acc": 0.8657670454545454 }, { "epoch": 0.37527653375060704, "grad_norm": 0.4120437502861023, "learning_rate": 1.4372433897026548e-05, "loss": 0.39375171065330505, "step": 6955, "token_acc": 0.8625700015135462 }, { "epoch": 0.3753304915556035, "grad_norm": 0.3232837915420532, "learning_rate": 1.4370862172017544e-05, "loss": 0.3277946412563324, "step": 6956, "token_acc": 0.8849822840010902 }, { "epoch": 0.3753844493606, "grad_norm": 0.496925950050354, "learning_rate": 1.4369290313523556e-05, "loss": 0.40890470147132874, "step": 6957, "token_acc": 0.8594466134781871 }, { "epoch": 0.3754384071655965, "grad_norm": 0.4910525381565094, "learning_rate": 1.4367718321592585e-05, "loss": 0.41672253608703613, "step": 6958, "token_acc": 0.8564962279966472 }, { "epoch": 0.375492364970593, "grad_norm": 0.48245763778686523, "learning_rate": 1.4366146196272644e-05, "loss": 0.3994371294975281, "step": 6959, "token_acc": 0.8630680690613217 }, { "epoch": 0.37554632277558947, "grad_norm": 0.44253769516944885, "learning_rate": 1.436457393761174e-05, "loss": 0.38751739263534546, "step": 6960, "token_acc": 0.8645150155333103 }, { "epoch": 0.375600280580586, "grad_norm": 0.4660751223564148, "learning_rate": 1.4363001545657888e-05, "loss": 0.44385334849357605, "step": 6961, "token_acc": 0.8514990630855716 }, { "epoch": 0.3756542383855825, "grad_norm": 0.34752941131591797, "learning_rate": 1.4361429020459114e-05, "loss": 0.4011533260345459, "step": 6962, "token_acc": 0.860657209552882 }, { "epoch": 0.375708196190579, "grad_norm": 0.43272140622138977, "learning_rate": 1.4359856362063443e-05, "loss": 0.40183278918266296, "step": 6963, "token_acc": 0.8575904300769012 }, { "epoch": 0.37576215399557544, "grad_norm": 0.4797908365726471, "learning_rate": 1.4358283570518895e-05, "loss": 0.29314395785331726, "step": 6964, "token_acc": 0.8919765984120351 }, { "epoch": 0.37581611180057195, "grad_norm": 0.40314579010009766, "learning_rate": 1.4356710645873513e-05, "loss": 0.3502523899078369, "step": 6965, "token_acc": 0.8741247860588144 }, { "epoch": 0.37587006960556846, "grad_norm": 0.4422987699508667, "learning_rate": 1.435513758817533e-05, "loss": 0.33608365058898926, "step": 6966, "token_acc": 0.8830763900918067 }, { "epoch": 0.37592402741056496, "grad_norm": 0.39124777913093567, "learning_rate": 1.4353564397472383e-05, "loss": 0.3798335790634155, "step": 6967, "token_acc": 0.870906949352179 }, { "epoch": 0.3759779852155614, "grad_norm": 0.39784178137779236, "learning_rate": 1.435199107381272e-05, "loss": 0.369159996509552, "step": 6968, "token_acc": 0.8732524399894487 }, { "epoch": 0.3760319430205579, "grad_norm": 0.5329310297966003, "learning_rate": 1.4350417617244392e-05, "loss": 0.36684268712997437, "step": 6969, "token_acc": 0.8669982351997433 }, { "epoch": 0.37608590082555443, "grad_norm": 0.42660489678382874, "learning_rate": 1.4348844027815447e-05, "loss": 0.3982723355293274, "step": 6970, "token_acc": 0.8587927317523868 }, { "epoch": 0.3761398586305509, "grad_norm": 0.38748055696487427, "learning_rate": 1.4347270305573946e-05, "loss": 0.37057051062583923, "step": 6971, "token_acc": 0.8733119866351107 }, { "epoch": 0.3761938164355474, "grad_norm": 0.4305363595485687, "learning_rate": 1.4345696450567948e-05, "loss": 0.44716331362724304, "step": 6972, "token_acc": 0.8551659065890863 }, { "epoch": 0.3762477742405439, "grad_norm": 0.287987619638443, "learning_rate": 1.434412246284552e-05, "loss": 0.3800109624862671, "step": 6973, "token_acc": 0.8665389527458492 }, { "epoch": 0.3763017320455404, "grad_norm": 0.44097065925598145, "learning_rate": 1.434254834245473e-05, "loss": 0.40180671215057373, "step": 6974, "token_acc": 0.8633147674243564 }, { "epoch": 0.37635568985053686, "grad_norm": 0.5496434569358826, "learning_rate": 1.434097408944365e-05, "loss": 0.4186614155769348, "step": 6975, "token_acc": 0.8602537398220034 }, { "epoch": 0.37640964765553336, "grad_norm": 0.40604379773139954, "learning_rate": 1.433939970386036e-05, "loss": 0.3548336625099182, "step": 6976, "token_acc": 0.873653984206748 }, { "epoch": 0.37646360546052987, "grad_norm": 0.44146496057510376, "learning_rate": 1.4337825185752938e-05, "loss": 0.36680617928504944, "step": 6977, "token_acc": 0.865366407877296 }, { "epoch": 0.3765175632655264, "grad_norm": 0.46318989992141724, "learning_rate": 1.4336250535169472e-05, "loss": 0.3898775577545166, "step": 6978, "token_acc": 0.8662739691690688 }, { "epoch": 0.37657152107052283, "grad_norm": 0.3921443819999695, "learning_rate": 1.4334675752158049e-05, "loss": 0.37328851222991943, "step": 6979, "token_acc": 0.8694847708511244 }, { "epoch": 0.37662547887551934, "grad_norm": 0.45439037680625916, "learning_rate": 1.4333100836766767e-05, "loss": 0.39692312479019165, "step": 6980, "token_acc": 0.8605051664753157 }, { "epoch": 0.37667943668051584, "grad_norm": 0.3805469572544098, "learning_rate": 1.4331525789043716e-05, "loss": 0.41182082891464233, "step": 6981, "token_acc": 0.8560329792294277 }, { "epoch": 0.37673339448551235, "grad_norm": 0.4121883511543274, "learning_rate": 1.432995060903701e-05, "loss": 0.37807852029800415, "step": 6982, "token_acc": 0.8669316676130986 }, { "epoch": 0.3767873522905088, "grad_norm": 0.43035492300987244, "learning_rate": 1.432837529679474e-05, "loss": 0.3907433748245239, "step": 6983, "token_acc": 0.8657372329140779 }, { "epoch": 0.3768413100955053, "grad_norm": 0.3587186336517334, "learning_rate": 1.4326799852365023e-05, "loss": 0.3820796012878418, "step": 6984, "token_acc": 0.8658745572946418 }, { "epoch": 0.3768952679005018, "grad_norm": 0.2810901999473572, "learning_rate": 1.4325224275795974e-05, "loss": 0.41448402404785156, "step": 6985, "token_acc": 0.8537987679671458 }, { "epoch": 0.3769492257054983, "grad_norm": 0.39290887117385864, "learning_rate": 1.4323648567135708e-05, "loss": 0.35530930757522583, "step": 6986, "token_acc": 0.8745051464766429 }, { "epoch": 0.3770031835104948, "grad_norm": 0.462260365486145, "learning_rate": 1.4322072726432345e-05, "loss": 0.44910508394241333, "step": 6987, "token_acc": 0.8508914100486223 }, { "epoch": 0.3770571413154913, "grad_norm": 0.42990556359291077, "learning_rate": 1.4320496753734017e-05, "loss": 0.3789689540863037, "step": 6988, "token_acc": 0.8668997042215649 }, { "epoch": 0.3771110991204878, "grad_norm": 0.4266756474971771, "learning_rate": 1.431892064908885e-05, "loss": 0.39164695143699646, "step": 6989, "token_acc": 0.8641991570073762 }, { "epoch": 0.3771650569254843, "grad_norm": 0.2935260832309723, "learning_rate": 1.4317344412544976e-05, "loss": 0.38956746459007263, "step": 6990, "token_acc": 0.8640346013086392 }, { "epoch": 0.37721901473048075, "grad_norm": 0.42357146739959717, "learning_rate": 1.4315768044150535e-05, "loss": 0.4124433696269989, "step": 6991, "token_acc": 0.8596227674845601 }, { "epoch": 0.37727297253547726, "grad_norm": 0.3683992326259613, "learning_rate": 1.4314191543953669e-05, "loss": 0.361834853887558, "step": 6992, "token_acc": 0.8680825605743344 }, { "epoch": 0.37732693034047377, "grad_norm": 0.3624913990497589, "learning_rate": 1.4312614912002523e-05, "loss": 0.3567922115325928, "step": 6993, "token_acc": 0.8730277053050068 }, { "epoch": 0.3773808881454702, "grad_norm": 0.3661712408065796, "learning_rate": 1.4311038148345244e-05, "loss": 0.34577059745788574, "step": 6994, "token_acc": 0.8795947219604147 }, { "epoch": 0.3774348459504667, "grad_norm": 0.3970104455947876, "learning_rate": 1.4309461253029992e-05, "loss": 0.40223753452301025, "step": 6995, "token_acc": 0.8599571160546771 }, { "epoch": 0.37748880375546323, "grad_norm": 0.43496131896972656, "learning_rate": 1.4307884226104924e-05, "loss": 0.35035622119903564, "step": 6996, "token_acc": 0.8742169469172436 }, { "epoch": 0.37754276156045974, "grad_norm": 0.412275493144989, "learning_rate": 1.4306307067618198e-05, "loss": 0.371620237827301, "step": 6997, "token_acc": 0.8726970494528328 }, { "epoch": 0.3775967193654562, "grad_norm": 0.41495752334594727, "learning_rate": 1.4304729777617982e-05, "loss": 0.39884862303733826, "step": 6998, "token_acc": 0.857192254495159 }, { "epoch": 0.3776506771704527, "grad_norm": 0.3592222034931183, "learning_rate": 1.4303152356152452e-05, "loss": 0.4104625880718231, "step": 6999, "token_acc": 0.8624804146076895 }, { "epoch": 0.3777046349754492, "grad_norm": 0.3580928146839142, "learning_rate": 1.430157480326977e-05, "loss": 0.4045817255973816, "step": 7000, "token_acc": 0.8647946761889304 }, { "epoch": 0.3777585927804457, "grad_norm": 0.4040168523788452, "learning_rate": 1.4299997119018123e-05, "loss": 0.3751997947692871, "step": 7001, "token_acc": 0.8688122384182422 }, { "epoch": 0.37781255058544216, "grad_norm": 0.44376569986343384, "learning_rate": 1.429841930344569e-05, "loss": 0.42000114917755127, "step": 7002, "token_acc": 0.8584487930295672 }, { "epoch": 0.37786650839043867, "grad_norm": 0.6018074750900269, "learning_rate": 1.4296841356600655e-05, "loss": 0.3755989074707031, "step": 7003, "token_acc": 0.867310864954042 }, { "epoch": 0.3779204661954352, "grad_norm": 0.3640458583831787, "learning_rate": 1.4295263278531213e-05, "loss": 0.3705906271934509, "step": 7004, "token_acc": 0.8700835959983555 }, { "epoch": 0.3779744240004317, "grad_norm": 0.4048405885696411, "learning_rate": 1.4293685069285554e-05, "loss": 0.3650800585746765, "step": 7005, "token_acc": 0.8727013727013727 }, { "epoch": 0.37802838180542814, "grad_norm": 0.40904781222343445, "learning_rate": 1.4292106728911882e-05, "loss": 0.42374563217163086, "step": 7006, "token_acc": 0.8548099355015781 }, { "epoch": 0.37808233961042464, "grad_norm": 0.32903245091438293, "learning_rate": 1.4290528257458391e-05, "loss": 0.32830727100372314, "step": 7007, "token_acc": 0.8823074942055112 }, { "epoch": 0.37813629741542115, "grad_norm": 0.30586546659469604, "learning_rate": 1.4288949654973292e-05, "loss": 0.4064887762069702, "step": 7008, "token_acc": 0.8576845298281092 }, { "epoch": 0.37819025522041766, "grad_norm": 0.3451162874698639, "learning_rate": 1.4287370921504795e-05, "loss": 0.4151705503463745, "step": 7009, "token_acc": 0.8558165548098434 }, { "epoch": 0.3782442130254141, "grad_norm": 0.34626123309135437, "learning_rate": 1.4285792057101111e-05, "loss": 0.38405439257621765, "step": 7010, "token_acc": 0.86260046240229 }, { "epoch": 0.3782981708304106, "grad_norm": 0.39783668518066406, "learning_rate": 1.428421306181046e-05, "loss": 0.38698142766952515, "step": 7011, "token_acc": 0.8655362189498735 }, { "epoch": 0.3783521286354071, "grad_norm": 0.37919119000434875, "learning_rate": 1.4282633935681067e-05, "loss": 0.3430713415145874, "step": 7012, "token_acc": 0.8798474253019708 }, { "epoch": 0.3784060864404036, "grad_norm": 0.34573566913604736, "learning_rate": 1.4281054678761152e-05, "loss": 0.37158071994781494, "step": 7013, "token_acc": 0.8721080615030895 }, { "epoch": 0.3784600442454001, "grad_norm": 0.47345656156539917, "learning_rate": 1.4279475291098952e-05, "loss": 0.35785040259361267, "step": 7014, "token_acc": 0.8726643598615917 }, { "epoch": 0.3785140020503966, "grad_norm": 0.42824360728263855, "learning_rate": 1.4277895772742698e-05, "loss": 0.35341012477874756, "step": 7015, "token_acc": 0.8765222316624186 }, { "epoch": 0.3785679598553931, "grad_norm": 0.29814931750297546, "learning_rate": 1.4276316123740627e-05, "loss": 0.4298597276210785, "step": 7016, "token_acc": 0.8557964184731386 }, { "epoch": 0.37862191766038955, "grad_norm": 0.4103899598121643, "learning_rate": 1.4274736344140983e-05, "loss": 0.4254765510559082, "step": 7017, "token_acc": 0.856314145470772 }, { "epoch": 0.37867587546538606, "grad_norm": 0.4937400221824646, "learning_rate": 1.4273156433992008e-05, "loss": 0.4267309606075287, "step": 7018, "token_acc": 0.8498381877022654 }, { "epoch": 0.37872983327038257, "grad_norm": 0.3091387152671814, "learning_rate": 1.4271576393341955e-05, "loss": 0.35216277837753296, "step": 7019, "token_acc": 0.8717123670956911 }, { "epoch": 0.3787837910753791, "grad_norm": 0.43901264667510986, "learning_rate": 1.426999622223908e-05, "loss": 0.34461313486099243, "step": 7020, "token_acc": 0.8783584836216415 }, { "epoch": 0.3788377488803755, "grad_norm": 0.42430824041366577, "learning_rate": 1.4268415920731637e-05, "loss": 0.41403675079345703, "step": 7021, "token_acc": 0.8526829268292683 }, { "epoch": 0.37889170668537203, "grad_norm": 0.43173426389694214, "learning_rate": 1.4266835488867892e-05, "loss": 0.33261287212371826, "step": 7022, "token_acc": 0.8742976066597294 }, { "epoch": 0.37894566449036854, "grad_norm": 0.37697476148605347, "learning_rate": 1.4265254926696108e-05, "loss": 0.3845668435096741, "step": 7023, "token_acc": 0.8632520524445534 }, { "epoch": 0.37899962229536505, "grad_norm": 0.3578835725784302, "learning_rate": 1.4263674234264557e-05, "loss": 0.3377186954021454, "step": 7024, "token_acc": 0.8791170287316048 }, { "epoch": 0.3790535801003615, "grad_norm": 0.2998051941394806, "learning_rate": 1.4262093411621511e-05, "loss": 0.43383538722991943, "step": 7025, "token_acc": 0.8551655956500247 }, { "epoch": 0.379107537905358, "grad_norm": 0.46842193603515625, "learning_rate": 1.4260512458815245e-05, "loss": 0.4061100482940674, "step": 7026, "token_acc": 0.8625260367354668 }, { "epoch": 0.3791614957103545, "grad_norm": 0.42072415351867676, "learning_rate": 1.425893137589405e-05, "loss": 0.3866737484931946, "step": 7027, "token_acc": 0.8684495843874233 }, { "epoch": 0.379215453515351, "grad_norm": 0.34695982933044434, "learning_rate": 1.4257350162906204e-05, "loss": 0.35237714648246765, "step": 7028, "token_acc": 0.8754291319274154 }, { "epoch": 0.37926941132034747, "grad_norm": 0.46236172318458557, "learning_rate": 1.4255768819899998e-05, "loss": 0.38777288794517517, "step": 7029, "token_acc": 0.8622376518857503 }, { "epoch": 0.379323369125344, "grad_norm": 0.5560778379440308, "learning_rate": 1.4254187346923729e-05, "loss": 0.40345579385757446, "step": 7030, "token_acc": 0.8632402234636871 }, { "epoch": 0.3793773269303405, "grad_norm": 0.5443564653396606, "learning_rate": 1.4252605744025694e-05, "loss": 0.37387919425964355, "step": 7031, "token_acc": 0.8731753256945535 }, { "epoch": 0.379431284735337, "grad_norm": 0.488709419965744, "learning_rate": 1.4251024011254193e-05, "loss": 0.4274969696998596, "step": 7032, "token_acc": 0.8562705495537811 }, { "epoch": 0.37948524254033345, "grad_norm": 0.39635270833969116, "learning_rate": 1.424944214865753e-05, "loss": 0.39645981788635254, "step": 7033, "token_acc": 0.8621551038946292 }, { "epoch": 0.37953920034532995, "grad_norm": 0.4674387276172638, "learning_rate": 1.424786015628402e-05, "loss": 0.423168420791626, "step": 7034, "token_acc": 0.8543956043956044 }, { "epoch": 0.37959315815032646, "grad_norm": 0.40712082386016846, "learning_rate": 1.424627803418197e-05, "loss": 0.44133326411247253, "step": 7035, "token_acc": 0.8464824120603015 }, { "epoch": 0.3796471159553229, "grad_norm": 0.44100573658943176, "learning_rate": 1.4244695782399704e-05, "loss": 0.4263906180858612, "step": 7036, "token_acc": 0.8585566382460414 }, { "epoch": 0.3797010737603194, "grad_norm": 0.3927657902240753, "learning_rate": 1.4243113400985543e-05, "loss": 0.3536936640739441, "step": 7037, "token_acc": 0.8770343580470162 }, { "epoch": 0.3797550315653159, "grad_norm": 0.4183662533760071, "learning_rate": 1.4241530889987805e-05, "loss": 0.40460121631622314, "step": 7038, "token_acc": 0.8600869025450031 }, { "epoch": 0.37980898937031243, "grad_norm": 0.4167940318584442, "learning_rate": 1.4239948249454827e-05, "loss": 0.45653021335601807, "step": 7039, "token_acc": 0.8418464900300464 }, { "epoch": 0.3798629471753089, "grad_norm": 0.49865633249282837, "learning_rate": 1.4238365479434943e-05, "loss": 0.35728007555007935, "step": 7040, "token_acc": 0.8708487084870848 }, { "epoch": 0.3799169049803054, "grad_norm": 0.4758484959602356, "learning_rate": 1.4236782579976484e-05, "loss": 0.4419659376144409, "step": 7041, "token_acc": 0.8513451191391237 }, { "epoch": 0.3799708627853019, "grad_norm": 0.4375050961971283, "learning_rate": 1.4235199551127794e-05, "loss": 0.32619035243988037, "step": 7042, "token_acc": 0.8813096862210096 }, { "epoch": 0.3800248205902984, "grad_norm": 0.2432095855474472, "learning_rate": 1.4233616392937221e-05, "loss": 0.3413736820220947, "step": 7043, "token_acc": 0.8785150297825184 }, { "epoch": 0.38007877839529486, "grad_norm": 0.46733248233795166, "learning_rate": 1.4232033105453114e-05, "loss": 0.3853608965873718, "step": 7044, "token_acc": 0.8646838990968546 }, { "epoch": 0.38013273620029137, "grad_norm": 0.3543132543563843, "learning_rate": 1.4230449688723822e-05, "loss": 0.3412374258041382, "step": 7045, "token_acc": 0.8803111047097796 }, { "epoch": 0.3801866940052879, "grad_norm": 0.35201963782310486, "learning_rate": 1.4228866142797706e-05, "loss": 0.41117963194847107, "step": 7046, "token_acc": 0.8572783809423421 }, { "epoch": 0.3802406518102844, "grad_norm": 0.3696347177028656, "learning_rate": 1.4227282467723127e-05, "loss": 0.38855451345443726, "step": 7047, "token_acc": 0.8680233428101152 }, { "epoch": 0.38029460961528083, "grad_norm": 0.5076371431350708, "learning_rate": 1.4225698663548446e-05, "loss": 0.42289984226226807, "step": 7048, "token_acc": 0.8589263420724095 }, { "epoch": 0.38034856742027734, "grad_norm": 0.2840486168861389, "learning_rate": 1.4224114730322036e-05, "loss": 0.3866086006164551, "step": 7049, "token_acc": 0.8636266324127596 }, { "epoch": 0.38040252522527385, "grad_norm": 0.4993155896663666, "learning_rate": 1.4222530668092272e-05, "loss": 0.3315502107143402, "step": 7050, "token_acc": 0.8861111111111111 }, { "epoch": 0.38045648303027035, "grad_norm": 0.4186616539955139, "learning_rate": 1.4220946476907522e-05, "loss": 0.3960493206977844, "step": 7051, "token_acc": 0.8685491723466408 }, { "epoch": 0.3805104408352668, "grad_norm": 0.4586513340473175, "learning_rate": 1.4219362156816174e-05, "loss": 0.37456434965133667, "step": 7052, "token_acc": 0.868075117370892 }, { "epoch": 0.3805643986402633, "grad_norm": 0.39138007164001465, "learning_rate": 1.4217777707866613e-05, "loss": 0.37288662791252136, "step": 7053, "token_acc": 0.871485373473445 }, { "epoch": 0.3806183564452598, "grad_norm": 0.29515212774276733, "learning_rate": 1.4216193130107226e-05, "loss": 0.3966565728187561, "step": 7054, "token_acc": 0.8625785431696532 }, { "epoch": 0.3806723142502563, "grad_norm": 0.5983590483665466, "learning_rate": 1.4214608423586401e-05, "loss": 0.40250855684280396, "step": 7055, "token_acc": 0.8639763437119499 }, { "epoch": 0.3807262720552528, "grad_norm": 0.37397709488868713, "learning_rate": 1.4213023588352542e-05, "loss": 0.33453837037086487, "step": 7056, "token_acc": 0.8833195020746888 }, { "epoch": 0.3807802298602493, "grad_norm": 0.44224077463150024, "learning_rate": 1.4211438624454046e-05, "loss": 0.3908095359802246, "step": 7057, "token_acc": 0.8643662453023574 }, { "epoch": 0.3808341876652458, "grad_norm": 0.42012929916381836, "learning_rate": 1.4209853531939317e-05, "loss": 0.4266076683998108, "step": 7058, "token_acc": 0.8487089344162929 }, { "epoch": 0.38088814547024225, "grad_norm": 0.42435422539711, "learning_rate": 1.4208268310856765e-05, "loss": 0.3706111013889313, "step": 7059, "token_acc": 0.8729523809523809 }, { "epoch": 0.38094210327523875, "grad_norm": 0.33005231618881226, "learning_rate": 1.4206682961254803e-05, "loss": 0.3370039463043213, "step": 7060, "token_acc": 0.8794030653401452 }, { "epoch": 0.38099606108023526, "grad_norm": 0.3651301860809326, "learning_rate": 1.420509748318184e-05, "loss": 0.3508245348930359, "step": 7061, "token_acc": 0.8750992326012172 }, { "epoch": 0.38105001888523177, "grad_norm": 0.36190375685691833, "learning_rate": 1.4203511876686308e-05, "loss": 0.3858015537261963, "step": 7062, "token_acc": 0.8657559437803757 }, { "epoch": 0.3811039766902282, "grad_norm": 0.38571518659591675, "learning_rate": 1.4201926141816617e-05, "loss": 0.4033418893814087, "step": 7063, "token_acc": 0.8539024825861761 }, { "epoch": 0.3811579344952247, "grad_norm": 0.43163642287254333, "learning_rate": 1.420034027862121e-05, "loss": 0.42689746618270874, "step": 7064, "token_acc": 0.8598329321817925 }, { "epoch": 0.38121189230022123, "grad_norm": 0.4680667519569397, "learning_rate": 1.4198754287148505e-05, "loss": 0.41841867566108704, "step": 7065, "token_acc": 0.8529657477025898 }, { "epoch": 0.38126585010521774, "grad_norm": 0.4169442057609558, "learning_rate": 1.419716816744695e-05, "loss": 0.3980804681777954, "step": 7066, "token_acc": 0.8647899649941657 }, { "epoch": 0.3813198079102142, "grad_norm": 0.412178099155426, "learning_rate": 1.4195581919564973e-05, "loss": 0.39005714654922485, "step": 7067, "token_acc": 0.8629355534998459 }, { "epoch": 0.3813737657152107, "grad_norm": 0.3679927587509155, "learning_rate": 1.4193995543551027e-05, "loss": 0.3361302316188812, "step": 7068, "token_acc": 0.8834905660377359 }, { "epoch": 0.3814277235202072, "grad_norm": 0.347267210483551, "learning_rate": 1.4192409039453554e-05, "loss": 0.37365904450416565, "step": 7069, "token_acc": 0.8684846045481989 }, { "epoch": 0.3814816813252037, "grad_norm": 0.37828102707862854, "learning_rate": 1.4190822407321007e-05, "loss": 0.3718162775039673, "step": 7070, "token_acc": 0.8713442447008317 }, { "epoch": 0.38153563913020017, "grad_norm": 0.3657225966453552, "learning_rate": 1.418923564720184e-05, "loss": 0.43507876992225647, "step": 7071, "token_acc": 0.8505017879801592 }, { "epoch": 0.3815895969351967, "grad_norm": 0.4346040189266205, "learning_rate": 1.4187648759144516e-05, "loss": 0.3824044466018677, "step": 7072, "token_acc": 0.8658465143526655 }, { "epoch": 0.3816435547401932, "grad_norm": 0.43823936581611633, "learning_rate": 1.4186061743197497e-05, "loss": 0.3559524118900299, "step": 7073, "token_acc": 0.8752154886619812 }, { "epoch": 0.3816975125451897, "grad_norm": 0.3451631963253021, "learning_rate": 1.4184474599409248e-05, "loss": 0.34494534134864807, "step": 7074, "token_acc": 0.8803864437969636 }, { "epoch": 0.38175147035018614, "grad_norm": 0.30851230025291443, "learning_rate": 1.4182887327828241e-05, "loss": 0.37647730112075806, "step": 7075, "token_acc": 0.8678213309024613 }, { "epoch": 0.38180542815518265, "grad_norm": 0.385930597782135, "learning_rate": 1.4181299928502951e-05, "loss": 0.35430341958999634, "step": 7076, "token_acc": 0.873660320262262 }, { "epoch": 0.38185938596017915, "grad_norm": 0.3816101849079132, "learning_rate": 1.4179712401481855e-05, "loss": 0.33508142828941345, "step": 7077, "token_acc": 0.8795798104022546 }, { "epoch": 0.3819133437651756, "grad_norm": 0.45800065994262695, "learning_rate": 1.4178124746813438e-05, "loss": 0.4099041819572449, "step": 7078, "token_acc": 0.8589862977251563 }, { "epoch": 0.3819673015701721, "grad_norm": 0.45396333932876587, "learning_rate": 1.4176536964546186e-05, "loss": 0.3768642544746399, "step": 7079, "token_acc": 0.8679558666508482 }, { "epoch": 0.3820212593751686, "grad_norm": 0.3613174557685852, "learning_rate": 1.417494905472859e-05, "loss": 0.3758641481399536, "step": 7080, "token_acc": 0.8676431776194247 }, { "epoch": 0.3820752171801651, "grad_norm": 0.37312087416648865, "learning_rate": 1.4173361017409142e-05, "loss": 0.3967202305793762, "step": 7081, "token_acc": 0.8604489701457996 }, { "epoch": 0.3821291749851616, "grad_norm": 0.40196821093559265, "learning_rate": 1.4171772852636343e-05, "loss": 0.3488101363182068, "step": 7082, "token_acc": 0.8783275916066395 }, { "epoch": 0.3821831327901581, "grad_norm": 0.4382971525192261, "learning_rate": 1.4170184560458692e-05, "loss": 0.4247184991836548, "step": 7083, "token_acc": 0.8552278820375335 }, { "epoch": 0.3822370905951546, "grad_norm": 0.3715674877166748, "learning_rate": 1.41685961409247e-05, "loss": 0.38713276386260986, "step": 7084, "token_acc": 0.8677739034685263 }, { "epoch": 0.3822910484001511, "grad_norm": 0.4315732717514038, "learning_rate": 1.416700759408287e-05, "loss": 0.3838443160057068, "step": 7085, "token_acc": 0.8713627546071775 }, { "epoch": 0.38234500620514755, "grad_norm": 0.4078189730644226, "learning_rate": 1.4165418919981724e-05, "loss": 0.3217528760433197, "step": 7086, "token_acc": 0.8811451706188548 }, { "epoch": 0.38239896401014406, "grad_norm": 0.4015892744064331, "learning_rate": 1.4163830118669772e-05, "loss": 0.37346935272216797, "step": 7087, "token_acc": 0.8700156985871271 }, { "epoch": 0.38245292181514057, "grad_norm": 0.381995290517807, "learning_rate": 1.416224119019554e-05, "loss": 0.4101400375366211, "step": 7088, "token_acc": 0.8619724877593845 }, { "epoch": 0.3825068796201371, "grad_norm": 0.34130313992500305, "learning_rate": 1.4160652134607554e-05, "loss": 0.39145949482917786, "step": 7089, "token_acc": 0.864740944520862 }, { "epoch": 0.3825608374251335, "grad_norm": 0.3330959677696228, "learning_rate": 1.4159062951954341e-05, "loss": 0.3952297270298004, "step": 7090, "token_acc": 0.8658243080625753 }, { "epoch": 0.38261479523013003, "grad_norm": 0.43521955609321594, "learning_rate": 1.4157473642284434e-05, "loss": 0.3900330364704132, "step": 7091, "token_acc": 0.8610678374167965 }, { "epoch": 0.38266875303512654, "grad_norm": 0.38603687286376953, "learning_rate": 1.4155884205646371e-05, "loss": 0.3820359408855438, "step": 7092, "token_acc": 0.8664310954063604 }, { "epoch": 0.38272271084012305, "grad_norm": 0.3861533999443054, "learning_rate": 1.4154294642088694e-05, "loss": 0.3632826805114746, "step": 7093, "token_acc": 0.8741770299926848 }, { "epoch": 0.3827766686451195, "grad_norm": 0.3299746811389923, "learning_rate": 1.4152704951659946e-05, "loss": 0.3216022253036499, "step": 7094, "token_acc": 0.8850561540224616 }, { "epoch": 0.382830626450116, "grad_norm": 0.4139416813850403, "learning_rate": 1.4151115134408677e-05, "loss": 0.3748966455459595, "step": 7095, "token_acc": 0.8704604409857328 }, { "epoch": 0.3828845842551125, "grad_norm": 0.5024723410606384, "learning_rate": 1.4149525190383439e-05, "loss": 0.4101116955280304, "step": 7096, "token_acc": 0.8566171266808209 }, { "epoch": 0.382938542060109, "grad_norm": 0.3513432443141937, "learning_rate": 1.4147935119632787e-05, "loss": 0.3756192922592163, "step": 7097, "token_acc": 0.8683250846574628 }, { "epoch": 0.3829924998651055, "grad_norm": 0.4826512038707733, "learning_rate": 1.4146344922205286e-05, "loss": 0.46637529134750366, "step": 7098, "token_acc": 0.8462574850299401 }, { "epoch": 0.383046457670102, "grad_norm": 0.3969770073890686, "learning_rate": 1.41447545981495e-05, "loss": 0.36001116037368774, "step": 7099, "token_acc": 0.875603476021886 }, { "epoch": 0.3831004154750985, "grad_norm": 0.4404621422290802, "learning_rate": 1.4143164147513988e-05, "loss": 0.45714071393013, "step": 7100, "token_acc": 0.8509154315605929 }, { "epoch": 0.38315437328009494, "grad_norm": 0.44988909363746643, "learning_rate": 1.414157357034733e-05, "loss": 0.4000520706176758, "step": 7101, "token_acc": 0.8589245004693576 }, { "epoch": 0.38320833108509145, "grad_norm": 0.4093745946884155, "learning_rate": 1.4139982866698104e-05, "loss": 0.4083940386772156, "step": 7102, "token_acc": 0.8629853596435392 }, { "epoch": 0.38326228889008795, "grad_norm": 0.33286118507385254, "learning_rate": 1.4138392036614881e-05, "loss": 0.3686198890209198, "step": 7103, "token_acc": 0.8678102926337034 }, { "epoch": 0.38331624669508446, "grad_norm": 0.41597893834114075, "learning_rate": 1.4136801080146252e-05, "loss": 0.30131399631500244, "step": 7104, "token_acc": 0.8918074684083487 }, { "epoch": 0.3833702045000809, "grad_norm": 0.48319900035858154, "learning_rate": 1.41352099973408e-05, "loss": 0.3462192118167877, "step": 7105, "token_acc": 0.8702370500438982 }, { "epoch": 0.3834241623050774, "grad_norm": 0.3350692093372345, "learning_rate": 1.4133618788247118e-05, "loss": 0.4127473831176758, "step": 7106, "token_acc": 0.8601205424409845 }, { "epoch": 0.38347812011007393, "grad_norm": 0.4022861421108246, "learning_rate": 1.4132027452913804e-05, "loss": 0.4003954231739044, "step": 7107, "token_acc": 0.8626398210290828 }, { "epoch": 0.38353207791507044, "grad_norm": 0.40322554111480713, "learning_rate": 1.4130435991389452e-05, "loss": 0.36127203702926636, "step": 7108, "token_acc": 0.8726366700864723 }, { "epoch": 0.3835860357200669, "grad_norm": 0.2808765172958374, "learning_rate": 1.4128844403722669e-05, "loss": 0.37554311752319336, "step": 7109, "token_acc": 0.8661990828162935 }, { "epoch": 0.3836399935250634, "grad_norm": 0.42503970861434937, "learning_rate": 1.4127252689962054e-05, "loss": 0.3548283278942108, "step": 7110, "token_acc": 0.8773378047207533 }, { "epoch": 0.3836939513300599, "grad_norm": 0.5370287299156189, "learning_rate": 1.4125660850156227e-05, "loss": 0.3946566581726074, "step": 7111, "token_acc": 0.8609015639374425 }, { "epoch": 0.3837479091350564, "grad_norm": 0.3497389256954193, "learning_rate": 1.4124068884353798e-05, "loss": 0.3488750457763672, "step": 7112, "token_acc": 0.8754618226600985 }, { "epoch": 0.38380186694005286, "grad_norm": 0.3568305969238281, "learning_rate": 1.4122476792603383e-05, "loss": 0.42737895250320435, "step": 7113, "token_acc": 0.856044115512988 }, { "epoch": 0.38385582474504937, "grad_norm": 0.36267316341400146, "learning_rate": 1.412088457495361e-05, "loss": 0.3445584774017334, "step": 7114, "token_acc": 0.874437131970904 }, { "epoch": 0.3839097825500459, "grad_norm": 0.42391082644462585, "learning_rate": 1.4119292231453102e-05, "loss": 0.419708251953125, "step": 7115, "token_acc": 0.8586766876929272 }, { "epoch": 0.3839637403550424, "grad_norm": 0.38322868943214417, "learning_rate": 1.4117699762150487e-05, "loss": 0.372564435005188, "step": 7116, "token_acc": 0.8671220518108004 }, { "epoch": 0.38401769816003883, "grad_norm": 0.42161229252815247, "learning_rate": 1.4116107167094399e-05, "loss": 0.39272820949554443, "step": 7117, "token_acc": 0.8662376779846659 }, { "epoch": 0.38407165596503534, "grad_norm": 0.3145093619823456, "learning_rate": 1.4114514446333478e-05, "loss": 0.32192736864089966, "step": 7118, "token_acc": 0.8807069219440353 }, { "epoch": 0.38412561377003185, "grad_norm": 0.48638883233070374, "learning_rate": 1.4112921599916363e-05, "loss": 0.4018312096595764, "step": 7119, "token_acc": 0.8552945859872612 }, { "epoch": 0.3841795715750283, "grad_norm": 0.3534385561943054, "learning_rate": 1.4111328627891698e-05, "loss": 0.33318817615509033, "step": 7120, "token_acc": 0.8843139693991791 }, { "epoch": 0.3842335293800248, "grad_norm": 0.43642744421958923, "learning_rate": 1.4109735530308137e-05, "loss": 0.35906335711479187, "step": 7121, "token_acc": 0.875309902289631 }, { "epoch": 0.3842874871850213, "grad_norm": 0.42775392532348633, "learning_rate": 1.4108142307214328e-05, "loss": 0.33832091093063354, "step": 7122, "token_acc": 0.8763062486670932 }, { "epoch": 0.3843414449900178, "grad_norm": 0.5507577657699585, "learning_rate": 1.410654895865893e-05, "loss": 0.43503838777542114, "step": 7123, "token_acc": 0.8505966587112171 }, { "epoch": 0.3843954027950143, "grad_norm": 0.39266595244407654, "learning_rate": 1.4104955484690604e-05, "loss": 0.3977185785770416, "step": 7124, "token_acc": 0.8601333177406151 }, { "epoch": 0.3844493606000108, "grad_norm": 0.5333537459373474, "learning_rate": 1.410336188535801e-05, "loss": 0.35335808992385864, "step": 7125, "token_acc": 0.8795007201152184 }, { "epoch": 0.3845033184050073, "grad_norm": 0.48871901631355286, "learning_rate": 1.410176816070982e-05, "loss": 0.399564653635025, "step": 7126, "token_acc": 0.8578611487139818 }, { "epoch": 0.3845572762100038, "grad_norm": 0.3286350667476654, "learning_rate": 1.4100174310794703e-05, "loss": 0.3777901530265808, "step": 7127, "token_acc": 0.8706192582511059 }, { "epoch": 0.38461123401500025, "grad_norm": 0.39976415038108826, "learning_rate": 1.4098580335661338e-05, "loss": 0.3832014501094818, "step": 7128, "token_acc": 0.8635964361902372 }, { "epoch": 0.38466519181999675, "grad_norm": 0.4986989200115204, "learning_rate": 1.40969862353584e-05, "loss": 0.4395119547843933, "step": 7129, "token_acc": 0.849346305868045 }, { "epoch": 0.38471914962499326, "grad_norm": 0.38335496187210083, "learning_rate": 1.409539200993458e-05, "loss": 0.3762780427932739, "step": 7130, "token_acc": 0.8694258639910813 }, { "epoch": 0.38477310742998977, "grad_norm": 0.4185316562652588, "learning_rate": 1.4093797659438562e-05, "loss": 0.42887240648269653, "step": 7131, "token_acc": 0.8507462686567164 }, { "epoch": 0.3848270652349862, "grad_norm": 0.49452438950538635, "learning_rate": 1.4092203183919034e-05, "loss": 0.38781028985977173, "step": 7132, "token_acc": 0.8682156471228898 }, { "epoch": 0.38488102303998273, "grad_norm": 0.39319416880607605, "learning_rate": 1.4090608583424692e-05, "loss": 0.41418853402137756, "step": 7133, "token_acc": 0.8547297297297297 }, { "epoch": 0.38493498084497924, "grad_norm": 0.5055912733078003, "learning_rate": 1.4089013858004237e-05, "loss": 0.36224672198295593, "step": 7134, "token_acc": 0.872377074851237 }, { "epoch": 0.38498893864997574, "grad_norm": 0.4205065667629242, "learning_rate": 1.4087419007706367e-05, "loss": 0.3615902066230774, "step": 7135, "token_acc": 0.8729365439951537 }, { "epoch": 0.3850428964549722, "grad_norm": 0.325637549161911, "learning_rate": 1.4085824032579793e-05, "loss": 0.35322096943855286, "step": 7136, "token_acc": 0.8759368308351178 }, { "epoch": 0.3850968542599687, "grad_norm": 0.38457369804382324, "learning_rate": 1.4084228932673224e-05, "loss": 0.45173972845077515, "step": 7137, "token_acc": 0.8461034708578913 }, { "epoch": 0.3851508120649652, "grad_norm": 0.3519468903541565, "learning_rate": 1.4082633708035374e-05, "loss": 0.3483600616455078, "step": 7138, "token_acc": 0.8768217488789237 }, { "epoch": 0.3852047698699617, "grad_norm": 0.36312028765678406, "learning_rate": 1.4081038358714958e-05, "loss": 0.43282756209373474, "step": 7139, "token_acc": 0.8548715339608243 }, { "epoch": 0.38525872767495817, "grad_norm": 0.5230083465576172, "learning_rate": 1.4079442884760704e-05, "loss": 0.40453898906707764, "step": 7140, "token_acc": 0.86788796899771 }, { "epoch": 0.3853126854799547, "grad_norm": 0.44434818625450134, "learning_rate": 1.407784728622133e-05, "loss": 0.39568275213241577, "step": 7141, "token_acc": 0.8597595878649112 }, { "epoch": 0.3853666432849512, "grad_norm": 0.35172033309936523, "learning_rate": 1.4076251563145564e-05, "loss": 0.46719813346862793, "step": 7142, "token_acc": 0.8376365441906654 }, { "epoch": 0.38542060108994763, "grad_norm": 0.43085968494415283, "learning_rate": 1.4074655715582148e-05, "loss": 0.3900865316390991, "step": 7143, "token_acc": 0.8652567975830816 }, { "epoch": 0.38547455889494414, "grad_norm": 0.3257971405982971, "learning_rate": 1.4073059743579813e-05, "loss": 0.3267582058906555, "step": 7144, "token_acc": 0.8785060311001308 }, { "epoch": 0.38552851669994065, "grad_norm": 0.3530965745449066, "learning_rate": 1.4071463647187298e-05, "loss": 0.37085777521133423, "step": 7145, "token_acc": 0.8704617330803289 }, { "epoch": 0.38558247450493716, "grad_norm": 0.45703116059303284, "learning_rate": 1.4069867426453352e-05, "loss": 0.37986457347869873, "step": 7146, "token_acc": 0.8706436420722135 }, { "epoch": 0.3856364323099336, "grad_norm": 0.406319260597229, "learning_rate": 1.4068271081426722e-05, "loss": 0.395645409822464, "step": 7147, "token_acc": 0.8647119341563786 }, { "epoch": 0.3856903901149301, "grad_norm": 0.4013853073120117, "learning_rate": 1.4066674612156157e-05, "loss": 0.4406507611274719, "step": 7148, "token_acc": 0.8504295984569524 }, { "epoch": 0.3857443479199266, "grad_norm": 0.39113277196884155, "learning_rate": 1.4065078018690417e-05, "loss": 0.3678962290287018, "step": 7149, "token_acc": 0.8709058341862845 }, { "epoch": 0.38579830572492313, "grad_norm": 0.3927532732486725, "learning_rate": 1.4063481301078259e-05, "loss": 0.4218901991844177, "step": 7150, "token_acc": 0.8588745551601423 }, { "epoch": 0.3858522635299196, "grad_norm": 0.4382777214050293, "learning_rate": 1.4061884459368442e-05, "loss": 0.4308559000492096, "step": 7151, "token_acc": 0.8524116178210015 }, { "epoch": 0.3859062213349161, "grad_norm": 0.33391568064689636, "learning_rate": 1.4060287493609742e-05, "loss": 0.35819166898727417, "step": 7152, "token_acc": 0.8741344195519348 }, { "epoch": 0.3859601791399126, "grad_norm": 0.4071352779865265, "learning_rate": 1.4058690403850922e-05, "loss": 0.3985758125782013, "step": 7153, "token_acc": 0.8617223580548764 }, { "epoch": 0.3860141369449091, "grad_norm": 0.35642877221107483, "learning_rate": 1.4057093190140766e-05, "loss": 0.39198821783065796, "step": 7154, "token_acc": 0.8681318681318682 }, { "epoch": 0.38606809474990555, "grad_norm": 0.3130093812942505, "learning_rate": 1.4055495852528042e-05, "loss": 0.34395816922187805, "step": 7155, "token_acc": 0.8740828092243187 }, { "epoch": 0.38612205255490206, "grad_norm": 0.4351039528846741, "learning_rate": 1.4053898391061538e-05, "loss": 0.38668981194496155, "step": 7156, "token_acc": 0.8655204175621738 }, { "epoch": 0.38617601035989857, "grad_norm": 0.4096180200576782, "learning_rate": 1.4052300805790042e-05, "loss": 0.44275131821632385, "step": 7157, "token_acc": 0.849281682914315 }, { "epoch": 0.3862299681648951, "grad_norm": 0.4172576069831848, "learning_rate": 1.4050703096762337e-05, "loss": 0.3559578061103821, "step": 7158, "token_acc": 0.8736187845303868 }, { "epoch": 0.38628392596989153, "grad_norm": 0.4351003170013428, "learning_rate": 1.4049105264027223e-05, "loss": 0.3713756799697876, "step": 7159, "token_acc": 0.8700522778192681 }, { "epoch": 0.38633788377488804, "grad_norm": 0.5152490139007568, "learning_rate": 1.4047507307633494e-05, "loss": 0.3475889265537262, "step": 7160, "token_acc": 0.8708109046709498 }, { "epoch": 0.38639184157988454, "grad_norm": 0.438456654548645, "learning_rate": 1.4045909227629952e-05, "loss": 0.387046217918396, "step": 7161, "token_acc": 0.8661971830985915 }, { "epoch": 0.38644579938488105, "grad_norm": 0.31590700149536133, "learning_rate": 1.4044311024065402e-05, "loss": 0.31822383403778076, "step": 7162, "token_acc": 0.8871522798967594 }, { "epoch": 0.3864997571898775, "grad_norm": 0.51267009973526, "learning_rate": 1.4042712696988653e-05, "loss": 0.366031289100647, "step": 7163, "token_acc": 0.8681208053691275 }, { "epoch": 0.386553714994874, "grad_norm": 0.4276934266090393, "learning_rate": 1.4041114246448517e-05, "loss": 0.34665119647979736, "step": 7164, "token_acc": 0.8849897189856066 }, { "epoch": 0.3866076727998705, "grad_norm": 0.39443325996398926, "learning_rate": 1.4039515672493814e-05, "loss": 0.3232548236846924, "step": 7165, "token_acc": 0.8844368811881188 }, { "epoch": 0.38666163060486697, "grad_norm": 0.4355718791484833, "learning_rate": 1.4037916975173356e-05, "loss": 0.3847185969352722, "step": 7166, "token_acc": 0.8620288892578449 }, { "epoch": 0.3867155884098635, "grad_norm": 0.32893839478492737, "learning_rate": 1.4036318154535973e-05, "loss": 0.38921672105789185, "step": 7167, "token_acc": 0.8680670285579419 }, { "epoch": 0.38676954621486, "grad_norm": 0.27217116951942444, "learning_rate": 1.403471921063049e-05, "loss": 0.390494704246521, "step": 7168, "token_acc": 0.8620453768099419 }, { "epoch": 0.3868235040198565, "grad_norm": 0.4147220849990845, "learning_rate": 1.403312014350574e-05, "loss": 0.39431658387184143, "step": 7169, "token_acc": 0.8657049630774515 }, { "epoch": 0.38687746182485294, "grad_norm": 0.4357982575893402, "learning_rate": 1.4031520953210561e-05, "loss": 0.38173288106918335, "step": 7170, "token_acc": 0.864262114537445 }, { "epoch": 0.38693141962984945, "grad_norm": 0.4475655257701874, "learning_rate": 1.402992163979378e-05, "loss": 0.4005741775035858, "step": 7171, "token_acc": 0.8593562334854672 }, { "epoch": 0.38698537743484596, "grad_norm": 0.4634174108505249, "learning_rate": 1.4028322203304254e-05, "loss": 0.41339850425720215, "step": 7172, "token_acc": 0.860594188703885 }, { "epoch": 0.38703933523984246, "grad_norm": 0.4424059987068176, "learning_rate": 1.4026722643790823e-05, "loss": 0.3414868116378784, "step": 7173, "token_acc": 0.8747677443329617 }, { "epoch": 0.3870932930448389, "grad_norm": 0.371982216835022, "learning_rate": 1.4025122961302336e-05, "loss": 0.3804798424243927, "step": 7174, "token_acc": 0.8676573020664036 }, { "epoch": 0.3871472508498354, "grad_norm": 0.38549983501434326, "learning_rate": 1.4023523155887649e-05, "loss": 0.3981979489326477, "step": 7175, "token_acc": 0.8648321218414676 }, { "epoch": 0.38720120865483193, "grad_norm": 0.38576698303222656, "learning_rate": 1.4021923227595617e-05, "loss": 0.3538817763328552, "step": 7176, "token_acc": 0.875774647887324 }, { "epoch": 0.38725516645982844, "grad_norm": 0.46868088841438293, "learning_rate": 1.4020323176475103e-05, "loss": 0.416824609041214, "step": 7177, "token_acc": 0.8503480278422274 }, { "epoch": 0.3873091242648249, "grad_norm": 0.4430256187915802, "learning_rate": 1.4018723002574975e-05, "loss": 0.35510194301605225, "step": 7178, "token_acc": 0.8796742562738906 }, { "epoch": 0.3873630820698214, "grad_norm": 0.41804981231689453, "learning_rate": 1.4017122705944098e-05, "loss": 0.3854745030403137, "step": 7179, "token_acc": 0.8665213178294574 }, { "epoch": 0.3874170398748179, "grad_norm": 0.3572639524936676, "learning_rate": 1.4015522286631345e-05, "loss": 0.3542085289955139, "step": 7180, "token_acc": 0.8740792704314275 }, { "epoch": 0.3874709976798144, "grad_norm": 0.39126819372177124, "learning_rate": 1.4013921744685594e-05, "loss": 0.36819154024124146, "step": 7181, "token_acc": 0.8727542442722928 }, { "epoch": 0.38752495548481086, "grad_norm": 0.46032318472862244, "learning_rate": 1.4012321080155726e-05, "loss": 0.33677437901496887, "step": 7182, "token_acc": 0.8785568160684396 }, { "epoch": 0.38757891328980737, "grad_norm": 0.49794596433639526, "learning_rate": 1.4010720293090623e-05, "loss": 0.3941619098186493, "step": 7183, "token_acc": 0.8661145617667356 }, { "epoch": 0.3876328710948039, "grad_norm": 0.50859534740448, "learning_rate": 1.400911938353917e-05, "loss": 0.34701642394065857, "step": 7184, "token_acc": 0.8766973280770916 }, { "epoch": 0.38768682889980033, "grad_norm": 0.34625500440597534, "learning_rate": 1.4007518351550263e-05, "loss": 0.3133428692817688, "step": 7185, "token_acc": 0.8854611612086695 }, { "epoch": 0.38774078670479684, "grad_norm": 0.3486287295818329, "learning_rate": 1.4005917197172796e-05, "loss": 0.4319424629211426, "step": 7186, "token_acc": 0.8546528803545052 }, { "epoch": 0.38779474450979334, "grad_norm": 0.4341345727443695, "learning_rate": 1.4004315920455664e-05, "loss": 0.4001990258693695, "step": 7187, "token_acc": 0.8612580016699137 }, { "epoch": 0.38784870231478985, "grad_norm": 0.48511531949043274, "learning_rate": 1.4002714521447776e-05, "loss": 0.40120744705200195, "step": 7188, "token_acc": 0.8627640703112401 }, { "epoch": 0.3879026601197863, "grad_norm": 0.28858861327171326, "learning_rate": 1.4001113000198034e-05, "loss": 0.3506315350532532, "step": 7189, "token_acc": 0.8778670279686441 }, { "epoch": 0.3879566179247828, "grad_norm": 0.40913623571395874, "learning_rate": 1.3999511356755351e-05, "loss": 0.34959203004837036, "step": 7190, "token_acc": 0.8726934630238878 }, { "epoch": 0.3880105757297793, "grad_norm": 0.37834829092025757, "learning_rate": 1.3997909591168637e-05, "loss": 0.36983275413513184, "step": 7191, "token_acc": 0.8654661016949152 }, { "epoch": 0.3880645335347758, "grad_norm": 0.3360591232776642, "learning_rate": 1.3996307703486813e-05, "loss": 0.36558088660240173, "step": 7192, "token_acc": 0.8702214698829172 }, { "epoch": 0.3881184913397723, "grad_norm": 0.3635498285293579, "learning_rate": 1.3994705693758795e-05, "loss": 0.35228997468948364, "step": 7193, "token_acc": 0.8755785461475633 }, { "epoch": 0.3881724491447688, "grad_norm": 0.39126238226890564, "learning_rate": 1.3993103562033516e-05, "loss": 0.3407479226589203, "step": 7194, "token_acc": 0.8782039289055191 }, { "epoch": 0.3882264069497653, "grad_norm": 0.34460192918777466, "learning_rate": 1.3991501308359899e-05, "loss": 0.3941397964954376, "step": 7195, "token_acc": 0.8652368390870513 }, { "epoch": 0.3882803647547618, "grad_norm": 0.357799768447876, "learning_rate": 1.3989898932786874e-05, "loss": 0.34521588683128357, "step": 7196, "token_acc": 0.8763773584905661 }, { "epoch": 0.38833432255975825, "grad_norm": 0.41060465574264526, "learning_rate": 1.3988296435363385e-05, "loss": 0.3543936610221863, "step": 7197, "token_acc": 0.8746458923512748 }, { "epoch": 0.38838828036475476, "grad_norm": 0.31491175293922424, "learning_rate": 1.3986693816138368e-05, "loss": 0.37665989995002747, "step": 7198, "token_acc": 0.8722696068233826 }, { "epoch": 0.38844223816975126, "grad_norm": 0.4053308963775635, "learning_rate": 1.3985091075160765e-05, "loss": 0.4108521044254303, "step": 7199, "token_acc": 0.8615068291966514 }, { "epoch": 0.38849619597474777, "grad_norm": 0.36640000343322754, "learning_rate": 1.3983488212479524e-05, "loss": 0.41926026344299316, "step": 7200, "token_acc": 0.8559230143670371 }, { "epoch": 0.3885501537797442, "grad_norm": 0.5078972578048706, "learning_rate": 1.3981885228143598e-05, "loss": 0.364761620759964, "step": 7201, "token_acc": 0.8697700716170373 }, { "epoch": 0.38860411158474073, "grad_norm": 0.4808880388736725, "learning_rate": 1.3980282122201938e-05, "loss": 0.4221979081630707, "step": 7202, "token_acc": 0.8580133201076945 }, { "epoch": 0.38865806938973724, "grad_norm": 0.3911135196685791, "learning_rate": 1.3978678894703503e-05, "loss": 0.4172004163265228, "step": 7203, "token_acc": 0.8564796392094443 }, { "epoch": 0.38871202719473374, "grad_norm": 0.4686562716960907, "learning_rate": 1.397707554569726e-05, "loss": 0.4230946898460388, "step": 7204, "token_acc": 0.8600817246723967 }, { "epoch": 0.3887659849997302, "grad_norm": 0.3821510076522827, "learning_rate": 1.3975472075232172e-05, "loss": 0.3116737902164459, "step": 7205, "token_acc": 0.8872967316052166 }, { "epoch": 0.3888199428047267, "grad_norm": 0.3604089021682739, "learning_rate": 1.397386848335721e-05, "loss": 0.3386002480983734, "step": 7206, "token_acc": 0.8814149560117303 }, { "epoch": 0.3888739006097232, "grad_norm": 0.3270818293094635, "learning_rate": 1.3972264770121342e-05, "loss": 0.37798064947128296, "step": 7207, "token_acc": 0.8732436030173051 }, { "epoch": 0.38892785841471966, "grad_norm": 0.41811230778694153, "learning_rate": 1.397066093557355e-05, "loss": 0.44400084018707275, "step": 7208, "token_acc": 0.8470919324577861 }, { "epoch": 0.38898181621971617, "grad_norm": 0.41634199023246765, "learning_rate": 1.396905697976281e-05, "loss": 0.36059969663619995, "step": 7209, "token_acc": 0.8721391872956562 }, { "epoch": 0.3890357740247127, "grad_norm": 0.373294860124588, "learning_rate": 1.3967452902738111e-05, "loss": 0.3922123908996582, "step": 7210, "token_acc": 0.8620380068831364 }, { "epoch": 0.3890897318297092, "grad_norm": 0.40508848428726196, "learning_rate": 1.3965848704548443e-05, "loss": 0.3770834505558014, "step": 7211, "token_acc": 0.8686481303930969 }, { "epoch": 0.38914368963470564, "grad_norm": 0.5099495053291321, "learning_rate": 1.3964244385242793e-05, "loss": 0.38158610463142395, "step": 7212, "token_acc": 0.8698279018674479 }, { "epoch": 0.38919764743970214, "grad_norm": 0.3508886396884918, "learning_rate": 1.3962639944870156e-05, "loss": 0.37661322951316833, "step": 7213, "token_acc": 0.8649052841475573 }, { "epoch": 0.38925160524469865, "grad_norm": 0.33930060267448425, "learning_rate": 1.3961035383479535e-05, "loss": 0.351577490568161, "step": 7214, "token_acc": 0.8764319875430987 }, { "epoch": 0.38930556304969516, "grad_norm": 0.48831140995025635, "learning_rate": 1.3959430701119932e-05, "loss": 0.43192440271377563, "step": 7215, "token_acc": 0.8469996858309771 }, { "epoch": 0.3893595208546916, "grad_norm": 0.39034345746040344, "learning_rate": 1.395782589784035e-05, "loss": 0.38453641533851624, "step": 7216, "token_acc": 0.8711249756287776 }, { "epoch": 0.3894134786596881, "grad_norm": 0.3960413336753845, "learning_rate": 1.3956220973689804e-05, "loss": 0.36871829628944397, "step": 7217, "token_acc": 0.8744252873563219 }, { "epoch": 0.3894674364646846, "grad_norm": 0.28744545578956604, "learning_rate": 1.3954615928717306e-05, "loss": 0.3196864724159241, "step": 7218, "token_acc": 0.8871143735740502 }, { "epoch": 0.38952139426968113, "grad_norm": 0.4274550676345825, "learning_rate": 1.395301076297187e-05, "loss": 0.3992212414741516, "step": 7219, "token_acc": 0.8646680942184154 }, { "epoch": 0.3895753520746776, "grad_norm": 0.35350698232650757, "learning_rate": 1.3951405476502523e-05, "loss": 0.34669870138168335, "step": 7220, "token_acc": 0.875520705709385 }, { "epoch": 0.3896293098796741, "grad_norm": 0.530401885509491, "learning_rate": 1.3949800069358288e-05, "loss": 0.348947137594223, "step": 7221, "token_acc": 0.8767711464147703 }, { "epoch": 0.3896832676846706, "grad_norm": 0.3654678761959076, "learning_rate": 1.3948194541588196e-05, "loss": 0.42395395040512085, "step": 7222, "token_acc": 0.8538969980721565 }, { "epoch": 0.3897372254896671, "grad_norm": 0.4202796220779419, "learning_rate": 1.3946588893241275e-05, "loss": 0.3589182198047638, "step": 7223, "token_acc": 0.8785581683168316 }, { "epoch": 0.38979118329466356, "grad_norm": 0.41809651255607605, "learning_rate": 1.3944983124366562e-05, "loss": 0.36469733715057373, "step": 7224, "token_acc": 0.8750219413726523 }, { "epoch": 0.38984514109966006, "grad_norm": 0.3848607540130615, "learning_rate": 1.3943377235013098e-05, "loss": 0.3643660545349121, "step": 7225, "token_acc": 0.8723645111079666 }, { "epoch": 0.38989909890465657, "grad_norm": 0.26526084542274475, "learning_rate": 1.3941771225229928e-05, "loss": 0.3691155016422272, "step": 7226, "token_acc": 0.8659900818821359 }, { "epoch": 0.3899530567096531, "grad_norm": 0.35679373145103455, "learning_rate": 1.3940165095066096e-05, "loss": 0.3732287287712097, "step": 7227, "token_acc": 0.8765983860955928 }, { "epoch": 0.39000701451464953, "grad_norm": 0.37489065527915955, "learning_rate": 1.3938558844570654e-05, "loss": 0.3578745126724243, "step": 7228, "token_acc": 0.8728414442700158 }, { "epoch": 0.39006097231964604, "grad_norm": 0.3803423345088959, "learning_rate": 1.3936952473792656e-05, "loss": 0.40168967843055725, "step": 7229, "token_acc": 0.8601941747572815 }, { "epoch": 0.39011493012464254, "grad_norm": 0.38080281019210815, "learning_rate": 1.3935345982781161e-05, "loss": 0.31109943985939026, "step": 7230, "token_acc": 0.8849199935348311 }, { "epoch": 0.390168887929639, "grad_norm": 0.4027899503707886, "learning_rate": 1.3933739371585233e-05, "loss": 0.3429197669029236, "step": 7231, "token_acc": 0.8774549586106152 }, { "epoch": 0.3902228457346355, "grad_norm": 0.3914068341255188, "learning_rate": 1.3932132640253935e-05, "loss": 0.3879786729812622, "step": 7232, "token_acc": 0.8628919045390735 }, { "epoch": 0.390276803539632, "grad_norm": 0.373826801776886, "learning_rate": 1.3930525788836334e-05, "loss": 0.35704344511032104, "step": 7233, "token_acc": 0.8756390769655935 }, { "epoch": 0.3903307613446285, "grad_norm": 0.31516343355178833, "learning_rate": 1.3928918817381508e-05, "loss": 0.33007609844207764, "step": 7234, "token_acc": 0.8806959403479702 }, { "epoch": 0.39038471914962497, "grad_norm": 0.34470003843307495, "learning_rate": 1.3927311725938527e-05, "loss": 0.4114243984222412, "step": 7235, "token_acc": 0.8608791208791209 }, { "epoch": 0.3904386769546215, "grad_norm": 0.3952138423919678, "learning_rate": 1.3925704514556475e-05, "loss": 0.3530305325984955, "step": 7236, "token_acc": 0.8784448085803664 }, { "epoch": 0.390492634759618, "grad_norm": 0.3675759434700012, "learning_rate": 1.3924097183284438e-05, "loss": 0.351751446723938, "step": 7237, "token_acc": 0.882038330114396 }, { "epoch": 0.3905465925646145, "grad_norm": 0.3713139593601227, "learning_rate": 1.3922489732171496e-05, "loss": 0.3690741956233978, "step": 7238, "token_acc": 0.8744287020109689 }, { "epoch": 0.39060055036961094, "grad_norm": 0.3769816756248474, "learning_rate": 1.3920882161266752e-05, "loss": 0.3404071629047394, "step": 7239, "token_acc": 0.8781897491821156 }, { "epoch": 0.39065450817460745, "grad_norm": 0.29431548714637756, "learning_rate": 1.3919274470619291e-05, "loss": 0.2968904376029968, "step": 7240, "token_acc": 0.8916768665850673 }, { "epoch": 0.39070846597960396, "grad_norm": 0.3691249191761017, "learning_rate": 1.3917666660278216e-05, "loss": 0.381830096244812, "step": 7241, "token_acc": 0.8683865325754263 }, { "epoch": 0.39076242378460047, "grad_norm": 0.4112388789653778, "learning_rate": 1.3916058730292622e-05, "loss": 0.33629512786865234, "step": 7242, "token_acc": 0.8813765798690422 }, { "epoch": 0.3908163815895969, "grad_norm": 0.4665224850177765, "learning_rate": 1.3914450680711625e-05, "loss": 0.3735966682434082, "step": 7243, "token_acc": 0.8654439803885964 }, { "epoch": 0.3908703393945934, "grad_norm": 0.42348456382751465, "learning_rate": 1.3912842511584329e-05, "loss": 0.32898756861686707, "step": 7244, "token_acc": 0.884155581747769 }, { "epoch": 0.39092429719958993, "grad_norm": 0.4177721440792084, "learning_rate": 1.3911234222959844e-05, "loss": 0.41158151626586914, "step": 7245, "token_acc": 0.8559728017307989 }, { "epoch": 0.39097825500458644, "grad_norm": 0.36786848306655884, "learning_rate": 1.3909625814887295e-05, "loss": 0.37885239720344543, "step": 7246, "token_acc": 0.8677809654484221 }, { "epoch": 0.3910322128095829, "grad_norm": 0.4554729759693146, "learning_rate": 1.3908017287415799e-05, "loss": 0.3864293098449707, "step": 7247, "token_acc": 0.865608619685498 }, { "epoch": 0.3910861706145794, "grad_norm": 0.3923685848712921, "learning_rate": 1.3906408640594475e-05, "loss": 0.35159170627593994, "step": 7248, "token_acc": 0.8728879646477775 }, { "epoch": 0.3911401284195759, "grad_norm": 0.4089318811893463, "learning_rate": 1.3904799874472456e-05, "loss": 0.36533868312835693, "step": 7249, "token_acc": 0.8767480120647108 }, { "epoch": 0.39119408622457236, "grad_norm": 0.3519311845302582, "learning_rate": 1.3903190989098872e-05, "loss": 0.4125176668167114, "step": 7250, "token_acc": 0.8605366814377154 }, { "epoch": 0.39124804402956886, "grad_norm": 0.36626696586608887, "learning_rate": 1.3901581984522856e-05, "loss": 0.4055148959159851, "step": 7251, "token_acc": 0.8603219034289713 }, { "epoch": 0.39130200183456537, "grad_norm": 0.4767605662345886, "learning_rate": 1.3899972860793549e-05, "loss": 0.424299418926239, "step": 7252, "token_acc": 0.854502688172043 }, { "epoch": 0.3913559596395619, "grad_norm": 0.3478839695453644, "learning_rate": 1.3898363617960093e-05, "loss": 0.3279874920845032, "step": 7253, "token_acc": 0.8847708894878706 }, { "epoch": 0.39140991744455833, "grad_norm": 0.3227674663066864, "learning_rate": 1.3896754256071632e-05, "loss": 0.39139294624328613, "step": 7254, "token_acc": 0.8629233916799273 }, { "epoch": 0.39146387524955484, "grad_norm": 0.3852519690990448, "learning_rate": 1.3895144775177318e-05, "loss": 0.350394070148468, "step": 7255, "token_acc": 0.8806554524361949 }, { "epoch": 0.39151783305455135, "grad_norm": 0.37503400444984436, "learning_rate": 1.3893535175326305e-05, "loss": 0.37568604946136475, "step": 7256, "token_acc": 0.8682316465507696 }, { "epoch": 0.39157179085954785, "grad_norm": 0.4851950705051422, "learning_rate": 1.3891925456567747e-05, "loss": 0.3544803857803345, "step": 7257, "token_acc": 0.8725459151361621 }, { "epoch": 0.3916257486645443, "grad_norm": 0.3925403356552124, "learning_rate": 1.3890315618950802e-05, "loss": 0.37836745381355286, "step": 7258, "token_acc": 0.8694915254237288 }, { "epoch": 0.3916797064695408, "grad_norm": 0.38912010192871094, "learning_rate": 1.388870566252464e-05, "loss": 0.39643406867980957, "step": 7259, "token_acc": 0.8644636015325671 }, { "epoch": 0.3917336642745373, "grad_norm": 0.3581917881965637, "learning_rate": 1.3887095587338423e-05, "loss": 0.42911937832832336, "step": 7260, "token_acc": 0.8531210191082803 }, { "epoch": 0.3917876220795338, "grad_norm": 0.44424235820770264, "learning_rate": 1.3885485393441327e-05, "loss": 0.4046729803085327, "step": 7261, "token_acc": 0.8601936596365566 }, { "epoch": 0.3918415798845303, "grad_norm": 0.4241786003112793, "learning_rate": 1.3883875080882525e-05, "loss": 0.4132939875125885, "step": 7262, "token_acc": 0.8531187122736419 }, { "epoch": 0.3918955376895268, "grad_norm": 0.40187910199165344, "learning_rate": 1.3882264649711195e-05, "loss": 0.31833526492118835, "step": 7263, "token_acc": 0.8844323589394969 }, { "epoch": 0.3919494954945233, "grad_norm": 0.38117414712905884, "learning_rate": 1.388065409997652e-05, "loss": 0.3948822617530823, "step": 7264, "token_acc": 0.8643572151613992 }, { "epoch": 0.3920034532995198, "grad_norm": 0.2702045738697052, "learning_rate": 1.3879043431727689e-05, "loss": 0.3750542402267456, "step": 7265, "token_acc": 0.8700787401574803 }, { "epoch": 0.39205741110451625, "grad_norm": 0.46817511320114136, "learning_rate": 1.3877432645013881e-05, "loss": 0.40925419330596924, "step": 7266, "token_acc": 0.862877234104893 }, { "epoch": 0.39211136890951276, "grad_norm": 0.3625454902648926, "learning_rate": 1.3875821739884299e-05, "loss": 0.38541728258132935, "step": 7267, "token_acc": 0.8653050327059137 }, { "epoch": 0.39216532671450927, "grad_norm": 0.4588523209095001, "learning_rate": 1.3874210716388135e-05, "loss": 0.36905068159103394, "step": 7268, "token_acc": 0.8702115494568324 }, { "epoch": 0.3922192845195058, "grad_norm": 0.4096839725971222, "learning_rate": 1.3872599574574592e-05, "loss": 0.3718929886817932, "step": 7269, "token_acc": 0.8702266849231706 }, { "epoch": 0.3922732423245022, "grad_norm": 0.40177613496780396, "learning_rate": 1.3870988314492873e-05, "loss": 0.38327592611312866, "step": 7270, "token_acc": 0.8632958801498127 }, { "epoch": 0.39232720012949873, "grad_norm": 0.47467467188835144, "learning_rate": 1.3869376936192183e-05, "loss": 0.3522418737411499, "step": 7271, "token_acc": 0.8745030250648228 }, { "epoch": 0.39238115793449524, "grad_norm": 0.4392988681793213, "learning_rate": 1.3867765439721736e-05, "loss": 0.39563649892807007, "step": 7272, "token_acc": 0.8617064294379296 }, { "epoch": 0.3924351157394917, "grad_norm": 0.4023967683315277, "learning_rate": 1.3866153825130746e-05, "loss": 0.3672875463962555, "step": 7273, "token_acc": 0.8756001745962462 }, { "epoch": 0.3924890735444882, "grad_norm": 0.27985531091690063, "learning_rate": 1.3864542092468431e-05, "loss": 0.3952215313911438, "step": 7274, "token_acc": 0.8615403333738898 }, { "epoch": 0.3925430313494847, "grad_norm": 0.35429373383522034, "learning_rate": 1.3862930241784014e-05, "loss": 0.3682475686073303, "step": 7275, "token_acc": 0.8713980118283629 }, { "epoch": 0.3925969891544812, "grad_norm": 0.33335646986961365, "learning_rate": 1.3861318273126719e-05, "loss": 0.3946095108985901, "step": 7276, "token_acc": 0.8655907780979827 }, { "epoch": 0.39265094695947766, "grad_norm": 0.35917845368385315, "learning_rate": 1.3859706186545771e-05, "loss": 0.3495854139328003, "step": 7277, "token_acc": 0.8793282805766087 }, { "epoch": 0.39270490476447417, "grad_norm": 0.47161248326301575, "learning_rate": 1.3858093982090412e-05, "loss": 0.3984229266643524, "step": 7278, "token_acc": 0.8623320617017748 }, { "epoch": 0.3927588625694707, "grad_norm": 0.33224940299987793, "learning_rate": 1.3856481659809873e-05, "loss": 0.4321315884590149, "step": 7279, "token_acc": 0.8533392996979527 }, { "epoch": 0.3928128203744672, "grad_norm": 0.45000991225242615, "learning_rate": 1.3854869219753391e-05, "loss": 0.44848230481147766, "step": 7280, "token_acc": 0.8460222412318221 }, { "epoch": 0.39286677817946364, "grad_norm": 0.35163769125938416, "learning_rate": 1.3853256661970217e-05, "loss": 0.36740803718566895, "step": 7281, "token_acc": 0.8725981620718463 }, { "epoch": 0.39292073598446015, "grad_norm": 0.4129151403903961, "learning_rate": 1.3851643986509596e-05, "loss": 0.3694611191749573, "step": 7282, "token_acc": 0.8713534822601839 }, { "epoch": 0.39297469378945665, "grad_norm": 0.3657006025314331, "learning_rate": 1.3850031193420769e-05, "loss": 0.3524962365627289, "step": 7283, "token_acc": 0.8746714621662747 }, { "epoch": 0.39302865159445316, "grad_norm": 0.32454559206962585, "learning_rate": 1.3848418282753002e-05, "loss": 0.33397412300109863, "step": 7284, "token_acc": 0.8780921142544759 }, { "epoch": 0.3930826093994496, "grad_norm": 0.4171574115753174, "learning_rate": 1.384680525455555e-05, "loss": 0.38934797048568726, "step": 7285, "token_acc": 0.8659948250034046 }, { "epoch": 0.3931365672044461, "grad_norm": 0.37171313166618347, "learning_rate": 1.3845192108877672e-05, "loss": 0.34586575627326965, "step": 7286, "token_acc": 0.8772052646317559 }, { "epoch": 0.3931905250094426, "grad_norm": 0.3614194691181183, "learning_rate": 1.3843578845768633e-05, "loss": 0.40623724460601807, "step": 7287, "token_acc": 0.8599167822468793 }, { "epoch": 0.39324448281443913, "grad_norm": 0.396877646446228, "learning_rate": 1.3841965465277705e-05, "loss": 0.36113080382347107, "step": 7288, "token_acc": 0.8743101545253863 }, { "epoch": 0.3932984406194356, "grad_norm": 0.3830198049545288, "learning_rate": 1.3840351967454157e-05, "loss": 0.36793550848960876, "step": 7289, "token_acc": 0.8716339869281046 }, { "epoch": 0.3933523984244321, "grad_norm": 0.38157516717910767, "learning_rate": 1.3838738352347267e-05, "loss": 0.34198254346847534, "step": 7290, "token_acc": 0.8773776312452447 }, { "epoch": 0.3934063562294286, "grad_norm": 0.2815839648246765, "learning_rate": 1.3837124620006312e-05, "loss": 0.3376421332359314, "step": 7291, "token_acc": 0.8801078073378543 }, { "epoch": 0.3934603140344251, "grad_norm": 0.4886358082294464, "learning_rate": 1.3835510770480578e-05, "loss": 0.41939544677734375, "step": 7292, "token_acc": 0.8571428571428571 }, { "epoch": 0.39351427183942156, "grad_norm": 0.4382477402687073, "learning_rate": 1.3833896803819346e-05, "loss": 0.29985806345939636, "step": 7293, "token_acc": 0.8950603232415206 }, { "epoch": 0.39356822964441807, "grad_norm": 0.39777106046676636, "learning_rate": 1.3832282720071914e-05, "loss": 0.3717358112335205, "step": 7294, "token_acc": 0.8667509481668774 }, { "epoch": 0.3936221874494146, "grad_norm": 0.40481337904930115, "learning_rate": 1.383066851928757e-05, "loss": 0.4120774567127228, "step": 7295, "token_acc": 0.8589677419354839 }, { "epoch": 0.393676145254411, "grad_norm": 0.43612009286880493, "learning_rate": 1.3829054201515613e-05, "loss": 0.40362149477005005, "step": 7296, "token_acc": 0.8586922351461804 }, { "epoch": 0.39373010305940753, "grad_norm": 0.32341253757476807, "learning_rate": 1.3827439766805343e-05, "loss": 0.34182465076446533, "step": 7297, "token_acc": 0.8803440424993676 }, { "epoch": 0.39378406086440404, "grad_norm": 0.4281093180179596, "learning_rate": 1.382582521520607e-05, "loss": 0.3240659832954407, "step": 7298, "token_acc": 0.8816518381735773 }, { "epoch": 0.39383801866940055, "grad_norm": 0.3490217626094818, "learning_rate": 1.3824210546767092e-05, "loss": 0.3663499653339386, "step": 7299, "token_acc": 0.870477891916034 }, { "epoch": 0.393891976474397, "grad_norm": 0.3659096360206604, "learning_rate": 1.3822595761537728e-05, "loss": 0.36280620098114014, "step": 7300, "token_acc": 0.8742632612966601 }, { "epoch": 0.3939459342793935, "grad_norm": 0.4417649805545807, "learning_rate": 1.382098085956729e-05, "loss": 0.36103272438049316, "step": 7301, "token_acc": 0.874453781512605 }, { "epoch": 0.39399989208439, "grad_norm": 0.4075980484485626, "learning_rate": 1.3819365840905097e-05, "loss": 0.43278151750564575, "step": 7302, "token_acc": 0.8553132132893736 }, { "epoch": 0.3940538498893865, "grad_norm": 0.3304787278175354, "learning_rate": 1.381775070560047e-05, "loss": 0.3525010049343109, "step": 7303, "token_acc": 0.8766123144317353 }, { "epoch": 0.39410780769438297, "grad_norm": 0.4084479808807373, "learning_rate": 1.3816135453702741e-05, "loss": 0.4382938742637634, "step": 7304, "token_acc": 0.8520539639229953 }, { "epoch": 0.3941617654993795, "grad_norm": 0.3680367171764374, "learning_rate": 1.3814520085261231e-05, "loss": 0.35452282428741455, "step": 7305, "token_acc": 0.8727396792903446 }, { "epoch": 0.394215723304376, "grad_norm": 0.3659915328025818, "learning_rate": 1.3812904600325281e-05, "loss": 0.34488293528556824, "step": 7306, "token_acc": 0.8781025113819945 }, { "epoch": 0.3942696811093725, "grad_norm": 0.5271995067596436, "learning_rate": 1.3811288998944222e-05, "loss": 0.367644339799881, "step": 7307, "token_acc": 0.8713898916967509 }, { "epoch": 0.39432363891436895, "grad_norm": 0.33500561118125916, "learning_rate": 1.3809673281167393e-05, "loss": 0.4149182140827179, "step": 7308, "token_acc": 0.8596187175043327 }, { "epoch": 0.39437759671936545, "grad_norm": 0.37436601519584656, "learning_rate": 1.3808057447044138e-05, "loss": 0.4238591492176056, "step": 7309, "token_acc": 0.8518383248112977 }, { "epoch": 0.39443155452436196, "grad_norm": 0.5632918477058411, "learning_rate": 1.3806441496623812e-05, "loss": 0.39949703216552734, "step": 7310, "token_acc": 0.8646362098138748 }, { "epoch": 0.39448551232935847, "grad_norm": 0.3679456114768982, "learning_rate": 1.3804825429955757e-05, "loss": 0.3706621527671814, "step": 7311, "token_acc": 0.8735244519392917 }, { "epoch": 0.3945394701343549, "grad_norm": 0.3079332709312439, "learning_rate": 1.380320924708933e-05, "loss": 0.39395830035209656, "step": 7312, "token_acc": 0.8622019279553527 }, { "epoch": 0.3945934279393514, "grad_norm": 0.3509909510612488, "learning_rate": 1.3801592948073888e-05, "loss": 0.3874056935310364, "step": 7313, "token_acc": 0.864290610006854 }, { "epoch": 0.39464738574434793, "grad_norm": 0.4092544615268707, "learning_rate": 1.3799976532958796e-05, "loss": 0.4000548720359802, "step": 7314, "token_acc": 0.8578282237304002 }, { "epoch": 0.3947013435493444, "grad_norm": 0.33467400074005127, "learning_rate": 1.3798360001793417e-05, "loss": 0.2873801589012146, "step": 7315, "token_acc": 0.89677648731927 }, { "epoch": 0.3947553013543409, "grad_norm": 0.3778545558452606, "learning_rate": 1.3796743354627115e-05, "loss": 0.38374415040016174, "step": 7316, "token_acc": 0.8677184466019418 }, { "epoch": 0.3948092591593374, "grad_norm": 0.3327571451663971, "learning_rate": 1.3795126591509267e-05, "loss": 0.36784929037094116, "step": 7317, "token_acc": 0.871322479291631 }, { "epoch": 0.3948632169643339, "grad_norm": 0.42351558804512024, "learning_rate": 1.3793509712489245e-05, "loss": 0.3805374205112457, "step": 7318, "token_acc": 0.8644595359366157 }, { "epoch": 0.39491717476933036, "grad_norm": 0.45280542969703674, "learning_rate": 1.3791892717616432e-05, "loss": 0.41637352108955383, "step": 7319, "token_acc": 0.8578735275883447 }, { "epoch": 0.39497113257432687, "grad_norm": 0.3853704631328583, "learning_rate": 1.3790275606940209e-05, "loss": 0.34198296070098877, "step": 7320, "token_acc": 0.8782249742002064 }, { "epoch": 0.3950250903793234, "grad_norm": 0.38553640246391296, "learning_rate": 1.3788658380509962e-05, "loss": 0.3374759554862976, "step": 7321, "token_acc": 0.8799822195880871 }, { "epoch": 0.3950790481843199, "grad_norm": 0.3692046105861664, "learning_rate": 1.3787041038375083e-05, "loss": 0.3826340436935425, "step": 7322, "token_acc": 0.8685003274394237 }, { "epoch": 0.39513300598931633, "grad_norm": 0.5317650437355042, "learning_rate": 1.3785423580584957e-05, "loss": 0.3958783447742462, "step": 7323, "token_acc": 0.8600975281654616 }, { "epoch": 0.39518696379431284, "grad_norm": 0.3820960521697998, "learning_rate": 1.3783806007188992e-05, "loss": 0.37409061193466187, "step": 7324, "token_acc": 0.8676400571337883 }, { "epoch": 0.39524092159930935, "grad_norm": 0.31459301710128784, "learning_rate": 1.3782188318236577e-05, "loss": 0.34494200348854065, "step": 7325, "token_acc": 0.8801579735802806 }, { "epoch": 0.39529487940430585, "grad_norm": 0.4344964027404785, "learning_rate": 1.3780570513777126e-05, "loss": 0.40280216932296753, "step": 7326, "token_acc": 0.8588965517241379 }, { "epoch": 0.3953488372093023, "grad_norm": 0.4346897304058075, "learning_rate": 1.3778952593860039e-05, "loss": 0.4009135663509369, "step": 7327, "token_acc": 0.8561161420152364 }, { "epoch": 0.3954027950142988, "grad_norm": 0.31695356965065, "learning_rate": 1.3777334558534732e-05, "loss": 0.432357519865036, "step": 7328, "token_acc": 0.8537908414280307 }, { "epoch": 0.3954567528192953, "grad_norm": 0.4358402490615845, "learning_rate": 1.3775716407850616e-05, "loss": 0.38647618889808655, "step": 7329, "token_acc": 0.8624523990860624 }, { "epoch": 0.39551071062429183, "grad_norm": 0.36107510328292847, "learning_rate": 1.377409814185711e-05, "loss": 0.4511881470680237, "step": 7330, "token_acc": 0.8409580193756727 }, { "epoch": 0.3955646684292883, "grad_norm": 0.2773831784725189, "learning_rate": 1.3772479760603638e-05, "loss": 0.3665732145309448, "step": 7331, "token_acc": 0.8767524076557357 }, { "epoch": 0.3956186262342848, "grad_norm": 0.439449280500412, "learning_rate": 1.377086126413962e-05, "loss": 0.4112931489944458, "step": 7332, "token_acc": 0.8554736402837668 }, { "epoch": 0.3956725840392813, "grad_norm": 0.4184251129627228, "learning_rate": 1.3769242652514487e-05, "loss": 0.36959108710289, "step": 7333, "token_acc": 0.8738049713193117 }, { "epoch": 0.3957265418442778, "grad_norm": 0.41959065198898315, "learning_rate": 1.3767623925777673e-05, "loss": 0.47041890025138855, "step": 7334, "token_acc": 0.8439410284070479 }, { "epoch": 0.39578049964927425, "grad_norm": 0.4568289518356323, "learning_rate": 1.3766005083978606e-05, "loss": 0.3570582866668701, "step": 7335, "token_acc": 0.8740037307105307 }, { "epoch": 0.39583445745427076, "grad_norm": 0.34947019815444946, "learning_rate": 1.3764386127166735e-05, "loss": 0.3569074273109436, "step": 7336, "token_acc": 0.8736569579288026 }, { "epoch": 0.39588841525926727, "grad_norm": 0.46974286437034607, "learning_rate": 1.3762767055391497e-05, "loss": 0.4106070101261139, "step": 7337, "token_acc": 0.8560347502327025 }, { "epoch": 0.3959423730642637, "grad_norm": 0.38399145007133484, "learning_rate": 1.3761147868702337e-05, "loss": 0.37294211983680725, "step": 7338, "token_acc": 0.8650398406374502 }, { "epoch": 0.3959963308692602, "grad_norm": 0.3865572214126587, "learning_rate": 1.375952856714871e-05, "loss": 0.38410240411758423, "step": 7339, "token_acc": 0.8659737417943107 }, { "epoch": 0.39605028867425673, "grad_norm": 0.32125744223594666, "learning_rate": 1.3757909150780063e-05, "loss": 0.30332159996032715, "step": 7340, "token_acc": 0.891607471717969 }, { "epoch": 0.39610424647925324, "grad_norm": 0.36615151166915894, "learning_rate": 1.3756289619645852e-05, "loss": 0.3703917860984802, "step": 7341, "token_acc": 0.8757726336201672 }, { "epoch": 0.3961582042842497, "grad_norm": 0.3453526198863983, "learning_rate": 1.3754669973795545e-05, "loss": 0.3633911609649658, "step": 7342, "token_acc": 0.8693211488250653 }, { "epoch": 0.3962121620892462, "grad_norm": 0.37623217701911926, "learning_rate": 1.3753050213278599e-05, "loss": 0.4185890555381775, "step": 7343, "token_acc": 0.8560688603181521 }, { "epoch": 0.3962661198942427, "grad_norm": 0.4050438404083252, "learning_rate": 1.3751430338144483e-05, "loss": 0.37088823318481445, "step": 7344, "token_acc": 0.8690937257939582 }, { "epoch": 0.3963200776992392, "grad_norm": 0.4713600277900696, "learning_rate": 1.3749810348442662e-05, "loss": 0.4141790270805359, "step": 7345, "token_acc": 0.8532323108884272 }, { "epoch": 0.39637403550423567, "grad_norm": 0.34058240056037903, "learning_rate": 1.3748190244222622e-05, "loss": 0.3635656237602234, "step": 7346, "token_acc": 0.8660889013139502 }, { "epoch": 0.3964279933092322, "grad_norm": 0.31134557723999023, "learning_rate": 1.3746570025533831e-05, "loss": 0.35201045870780945, "step": 7347, "token_acc": 0.8782728525493799 }, { "epoch": 0.3964819511142287, "grad_norm": 0.4110410213470459, "learning_rate": 1.3744949692425775e-05, "loss": 0.3767348825931549, "step": 7348, "token_acc": 0.8710691823899371 }, { "epoch": 0.3965359089192252, "grad_norm": 0.5330836176872253, "learning_rate": 1.3743329244947936e-05, "loss": 0.3884783387184143, "step": 7349, "token_acc": 0.86602787456446 }, { "epoch": 0.39658986672422164, "grad_norm": 0.4305218458175659, "learning_rate": 1.3741708683149802e-05, "loss": 0.372852623462677, "step": 7350, "token_acc": 0.8705412054120542 }, { "epoch": 0.39664382452921815, "grad_norm": 0.35751065611839294, "learning_rate": 1.3740088007080861e-05, "loss": 0.3484509587287903, "step": 7351, "token_acc": 0.8744200433034334 }, { "epoch": 0.39669778233421465, "grad_norm": 0.4662046432495117, "learning_rate": 1.3738467216790616e-05, "loss": 0.32482653856277466, "step": 7352, "token_acc": 0.8840526023337655 }, { "epoch": 0.39675174013921116, "grad_norm": 0.3092995285987854, "learning_rate": 1.3736846312328562e-05, "loss": 0.3686203360557556, "step": 7353, "token_acc": 0.8694871507126024 }, { "epoch": 0.3968056979442076, "grad_norm": 0.5025225877761841, "learning_rate": 1.37352252937442e-05, "loss": 0.47078806161880493, "step": 7354, "token_acc": 0.8420454545454545 }, { "epoch": 0.3968596557492041, "grad_norm": 0.2793317437171936, "learning_rate": 1.3733604161087033e-05, "loss": 0.38166743516921997, "step": 7355, "token_acc": 0.8663659793814433 }, { "epoch": 0.39691361355420063, "grad_norm": 0.4512089490890503, "learning_rate": 1.3731982914406579e-05, "loss": 0.4022921323776245, "step": 7356, "token_acc": 0.8628015350877193 }, { "epoch": 0.39696757135919714, "grad_norm": 0.2509862780570984, "learning_rate": 1.3730361553752338e-05, "loss": 0.3672149181365967, "step": 7357, "token_acc": 0.8715926110833749 }, { "epoch": 0.3970215291641936, "grad_norm": 0.49177801609039307, "learning_rate": 1.3728740079173837e-05, "loss": 0.4041149616241455, "step": 7358, "token_acc": 0.8570361517777114 }, { "epoch": 0.3970754869691901, "grad_norm": 0.4299212396144867, "learning_rate": 1.3727118490720588e-05, "loss": 0.43153274059295654, "step": 7359, "token_acc": 0.8556590257879656 }, { "epoch": 0.3971294447741866, "grad_norm": 0.4279937744140625, "learning_rate": 1.3725496788442118e-05, "loss": 0.3803377151489258, "step": 7360, "token_acc": 0.8686567164179104 }, { "epoch": 0.39718340257918305, "grad_norm": 0.3963170349597931, "learning_rate": 1.3723874972387952e-05, "loss": 0.42330053448677063, "step": 7361, "token_acc": 0.855188679245283 }, { "epoch": 0.39723736038417956, "grad_norm": 0.3946409225463867, "learning_rate": 1.3722253042607618e-05, "loss": 0.3251396417617798, "step": 7362, "token_acc": 0.8882505027291009 }, { "epoch": 0.39729131818917607, "grad_norm": 0.37306785583496094, "learning_rate": 1.3720630999150655e-05, "loss": 0.35335201025009155, "step": 7363, "token_acc": 0.8744270333653129 }, { "epoch": 0.3973452759941726, "grad_norm": 0.40660542249679565, "learning_rate": 1.3719008842066592e-05, "loss": 0.4122610092163086, "step": 7364, "token_acc": 0.8578252318066872 }, { "epoch": 0.397399233799169, "grad_norm": 0.31906160712242126, "learning_rate": 1.3717386571404977e-05, "loss": 0.3303645849227905, "step": 7365, "token_acc": 0.8825968247547781 }, { "epoch": 0.39745319160416553, "grad_norm": 0.5461150407791138, "learning_rate": 1.3715764187215349e-05, "loss": 0.41041237115859985, "step": 7366, "token_acc": 0.8615384615384616 }, { "epoch": 0.39750714940916204, "grad_norm": 0.4101386070251465, "learning_rate": 1.371414168954725e-05, "loss": 0.3339563012123108, "step": 7367, "token_acc": 0.8794636556104446 }, { "epoch": 0.39756110721415855, "grad_norm": 0.2556149661540985, "learning_rate": 1.3712519078450247e-05, "loss": 0.3514274060726166, "step": 7368, "token_acc": 0.8759377449333782 }, { "epoch": 0.397615065019155, "grad_norm": 0.41971641778945923, "learning_rate": 1.3710896353973878e-05, "loss": 0.40691468119621277, "step": 7369, "token_acc": 0.8585726004922067 }, { "epoch": 0.3976690228241515, "grad_norm": 0.3073502480983734, "learning_rate": 1.3709273516167704e-05, "loss": 0.3487764596939087, "step": 7370, "token_acc": 0.8765176191886289 }, { "epoch": 0.397722980629148, "grad_norm": 0.3259519338607788, "learning_rate": 1.3707650565081292e-05, "loss": 0.32539427280426025, "step": 7371, "token_acc": 0.8874972042048759 }, { "epoch": 0.3977769384341445, "grad_norm": 0.3858603239059448, "learning_rate": 1.3706027500764202e-05, "loss": 0.41909050941467285, "step": 7372, "token_acc": 0.8544066099148723 }, { "epoch": 0.397830896239141, "grad_norm": 0.4234255850315094, "learning_rate": 1.3704404323266006e-05, "loss": 0.37053948640823364, "step": 7373, "token_acc": 0.8693048128342246 }, { "epoch": 0.3978848540441375, "grad_norm": 0.4464437961578369, "learning_rate": 1.370278103263627e-05, "loss": 0.42810964584350586, "step": 7374, "token_acc": 0.8560559577862314 }, { "epoch": 0.397938811849134, "grad_norm": 0.38502639532089233, "learning_rate": 1.3701157628924571e-05, "loss": 0.3592309355735779, "step": 7375, "token_acc": 0.8737554759060135 }, { "epoch": 0.3979927696541305, "grad_norm": 0.45542454719543457, "learning_rate": 1.3699534112180487e-05, "loss": 0.40045931935310364, "step": 7376, "token_acc": 0.8602116062840655 }, { "epoch": 0.39804672745912695, "grad_norm": 0.30859673023223877, "learning_rate": 1.3697910482453603e-05, "loss": 0.33233508467674255, "step": 7377, "token_acc": 0.8838759862206912 }, { "epoch": 0.39810068526412346, "grad_norm": 0.4122796058654785, "learning_rate": 1.3696286739793499e-05, "loss": 0.3606916666030884, "step": 7378, "token_acc": 0.8768068758952988 }, { "epoch": 0.39815464306911996, "grad_norm": 0.3737839162349701, "learning_rate": 1.3694662884249768e-05, "loss": 0.3363102674484253, "step": 7379, "token_acc": 0.8795115332428766 }, { "epoch": 0.3982086008741164, "grad_norm": 0.2781851887702942, "learning_rate": 1.3693038915871999e-05, "loss": 0.35504117608070374, "step": 7380, "token_acc": 0.8761827245904975 }, { "epoch": 0.3982625586791129, "grad_norm": 0.4016624987125397, "learning_rate": 1.3691414834709791e-05, "loss": 0.4127488136291504, "step": 7381, "token_acc": 0.8594528875379939 }, { "epoch": 0.39831651648410943, "grad_norm": 0.38894355297088623, "learning_rate": 1.3689790640812739e-05, "loss": 0.3824004530906677, "step": 7382, "token_acc": 0.8641897233201581 }, { "epoch": 0.39837047428910594, "grad_norm": 0.49428796768188477, "learning_rate": 1.3688166334230447e-05, "loss": 0.3527107238769531, "step": 7383, "token_acc": 0.8742753023024681 }, { "epoch": 0.3984244320941024, "grad_norm": 0.4183540642261505, "learning_rate": 1.368654191501252e-05, "loss": 0.3326261043548584, "step": 7384, "token_acc": 0.877882797731569 }, { "epoch": 0.3984783898990989, "grad_norm": 0.3720143139362335, "learning_rate": 1.3684917383208574e-05, "loss": 0.3425520062446594, "step": 7385, "token_acc": 0.8777777777777778 }, { "epoch": 0.3985323477040954, "grad_norm": 0.4378797709941864, "learning_rate": 1.368329273886821e-05, "loss": 0.39581677317619324, "step": 7386, "token_acc": 0.8670674682698731 }, { "epoch": 0.3985863055090919, "grad_norm": 0.3407815992832184, "learning_rate": 1.3681667982041056e-05, "loss": 0.41374433040618896, "step": 7387, "token_acc": 0.862061427478413 }, { "epoch": 0.39864026331408836, "grad_norm": 0.3147885799407959, "learning_rate": 1.3680043112776725e-05, "loss": 0.3246171176433563, "step": 7388, "token_acc": 0.8793198025233132 }, { "epoch": 0.39869422111908487, "grad_norm": 0.35892733931541443, "learning_rate": 1.3678418131124839e-05, "loss": 0.4574100077152252, "step": 7389, "token_acc": 0.8486203511833351 }, { "epoch": 0.3987481789240814, "grad_norm": 0.4860004484653473, "learning_rate": 1.367679303713503e-05, "loss": 0.3710092306137085, "step": 7390, "token_acc": 0.8749218261413383 }, { "epoch": 0.3988021367290779, "grad_norm": 0.3568497896194458, "learning_rate": 1.3675167830856924e-05, "loss": 0.35582637786865234, "step": 7391, "token_acc": 0.8705131613932465 }, { "epoch": 0.39885609453407433, "grad_norm": 0.3145293593406677, "learning_rate": 1.3673542512340154e-05, "loss": 0.361721396446228, "step": 7392, "token_acc": 0.8730679797565313 }, { "epoch": 0.39891005233907084, "grad_norm": 0.3808962106704712, "learning_rate": 1.3671917081634356e-05, "loss": 0.4105086624622345, "step": 7393, "token_acc": 0.8612355736591989 }, { "epoch": 0.39896401014406735, "grad_norm": 0.4556223750114441, "learning_rate": 1.3670291538789176e-05, "loss": 0.45001479983329773, "step": 7394, "token_acc": 0.8500851788756388 }, { "epoch": 0.39901796794906386, "grad_norm": 0.4137152433395386, "learning_rate": 1.3668665883854253e-05, "loss": 0.3584795892238617, "step": 7395, "token_acc": 0.8764878048780488 }, { "epoch": 0.3990719257540603, "grad_norm": 0.360495924949646, "learning_rate": 1.3667040116879233e-05, "loss": 0.35575225949287415, "step": 7396, "token_acc": 0.8730999860549435 }, { "epoch": 0.3991258835590568, "grad_norm": 0.4202914834022522, "learning_rate": 1.3665414237913772e-05, "loss": 0.342793345451355, "step": 7397, "token_acc": 0.8833201789002894 }, { "epoch": 0.3991798413640533, "grad_norm": 0.4366600513458252, "learning_rate": 1.366378824700752e-05, "loss": 0.41540223360061646, "step": 7398, "token_acc": 0.8596172649811427 }, { "epoch": 0.39923379916904983, "grad_norm": 0.4678473472595215, "learning_rate": 1.3662162144210131e-05, "loss": 0.3798828721046448, "step": 7399, "token_acc": 0.8676988036593948 }, { "epoch": 0.3992877569740463, "grad_norm": 0.476235955953598, "learning_rate": 1.3660535929571273e-05, "loss": 0.37393248081207275, "step": 7400, "token_acc": 0.8676369863013699 }, { "epoch": 0.3993417147790428, "grad_norm": 0.36841726303100586, "learning_rate": 1.3658909603140605e-05, "loss": 0.29108813405036926, "step": 7401, "token_acc": 0.8940948472412221 }, { "epoch": 0.3993956725840393, "grad_norm": 0.33968019485473633, "learning_rate": 1.3657283164967797e-05, "loss": 0.4051140546798706, "step": 7402, "token_acc": 0.8619102416570771 }, { "epoch": 0.39944963038903575, "grad_norm": 0.4253101944923401, "learning_rate": 1.3655656615102517e-05, "loss": 0.3921283483505249, "step": 7403, "token_acc": 0.8632237871674492 }, { "epoch": 0.39950358819403226, "grad_norm": 0.3077383041381836, "learning_rate": 1.3654029953594443e-05, "loss": 0.37792229652404785, "step": 7404, "token_acc": 0.8703108252947481 }, { "epoch": 0.39955754599902876, "grad_norm": 0.4635264575481415, "learning_rate": 1.3652403180493253e-05, "loss": 0.3477964401245117, "step": 7405, "token_acc": 0.8722741433021807 }, { "epoch": 0.39961150380402527, "grad_norm": 0.42252886295318604, "learning_rate": 1.3650776295848624e-05, "loss": 0.42411288619041443, "step": 7406, "token_acc": 0.8553740107224918 }, { "epoch": 0.3996654616090217, "grad_norm": 0.43649014830589294, "learning_rate": 1.3649149299710247e-05, "loss": 0.3498181402683258, "step": 7407, "token_acc": 0.8753557199772339 }, { "epoch": 0.39971941941401823, "grad_norm": 0.39045026898384094, "learning_rate": 1.3647522192127804e-05, "loss": 0.40804916620254517, "step": 7408, "token_acc": 0.8629066033784727 }, { "epoch": 0.39977337721901474, "grad_norm": 0.3278850018978119, "learning_rate": 1.3645894973150987e-05, "loss": 0.38311856985092163, "step": 7409, "token_acc": 0.8614985403827441 }, { "epoch": 0.39982733502401124, "grad_norm": 0.33908703923225403, "learning_rate": 1.3644267642829496e-05, "loss": 0.3860335946083069, "step": 7410, "token_acc": 0.8618032984230167 }, { "epoch": 0.3998812928290077, "grad_norm": 0.42983758449554443, "learning_rate": 1.3642640201213024e-05, "loss": 0.4337981641292572, "step": 7411, "token_acc": 0.8535463258785942 }, { "epoch": 0.3999352506340042, "grad_norm": 0.3793331980705261, "learning_rate": 1.3641012648351273e-05, "loss": 0.38964179158210754, "step": 7412, "token_acc": 0.8652592802493624 }, { "epoch": 0.3999892084390007, "grad_norm": 0.4423622786998749, "learning_rate": 1.3639384984293953e-05, "loss": 0.3998364210128784, "step": 7413, "token_acc": 0.8606416850355131 }, { "epoch": 0.4000431662439972, "grad_norm": 0.40416744351387024, "learning_rate": 1.3637757209090767e-05, "loss": 0.37535443902015686, "step": 7414, "token_acc": 0.8693453964874933 }, { "epoch": 0.40009712404899367, "grad_norm": 0.3552059531211853, "learning_rate": 1.363612932279143e-05, "loss": 0.4481867849826813, "step": 7415, "token_acc": 0.8483893077450309 }, { "epoch": 0.4001510818539902, "grad_norm": 0.45371925830841064, "learning_rate": 1.3634501325445655e-05, "loss": 0.4007343053817749, "step": 7416, "token_acc": 0.8651994497936726 }, { "epoch": 0.4002050396589867, "grad_norm": 0.4304831027984619, "learning_rate": 1.3632873217103161e-05, "loss": 0.4219281077384949, "step": 7417, "token_acc": 0.8553802904888334 }, { "epoch": 0.4002589974639832, "grad_norm": 0.5155109763145447, "learning_rate": 1.3631244997813673e-05, "loss": 0.36116093397140503, "step": 7418, "token_acc": 0.8745644599303136 }, { "epoch": 0.40031295526897964, "grad_norm": 0.38994237780570984, "learning_rate": 1.362961666762691e-05, "loss": 0.4339863061904907, "step": 7419, "token_acc": 0.8537604456824512 }, { "epoch": 0.40036691307397615, "grad_norm": 0.36740878224372864, "learning_rate": 1.362798822659261e-05, "loss": 0.37516337633132935, "step": 7420, "token_acc": 0.8629213483146068 }, { "epoch": 0.40042087087897266, "grad_norm": 0.36599645018577576, "learning_rate": 1.3626359674760496e-05, "loss": 0.33565157651901245, "step": 7421, "token_acc": 0.88171109733416 }, { "epoch": 0.4004748286839691, "grad_norm": 0.4659813940525055, "learning_rate": 1.362473101218031e-05, "loss": 0.41266030073165894, "step": 7422, "token_acc": 0.8585803644275224 }, { "epoch": 0.4005287864889656, "grad_norm": 0.3792521357536316, "learning_rate": 1.3623102238901786e-05, "loss": 0.3430880904197693, "step": 7423, "token_acc": 0.8786219081272085 }, { "epoch": 0.4005827442939621, "grad_norm": 0.44356730580329895, "learning_rate": 1.362147335497467e-05, "loss": 0.4542575180530548, "step": 7424, "token_acc": 0.8459257167066798 }, { "epoch": 0.40063670209895863, "grad_norm": 0.3487355709075928, "learning_rate": 1.3619844360448706e-05, "loss": 0.41288918256759644, "step": 7425, "token_acc": 0.8597823622761278 }, { "epoch": 0.4006906599039551, "grad_norm": 0.4080153703689575, "learning_rate": 1.3618215255373644e-05, "loss": 0.3235529363155365, "step": 7426, "token_acc": 0.8800128328521014 }, { "epoch": 0.4007446177089516, "grad_norm": 0.3323703408241272, "learning_rate": 1.3616586039799238e-05, "loss": 0.3783019781112671, "step": 7427, "token_acc": 0.8693251533742331 }, { "epoch": 0.4007985755139481, "grad_norm": 0.4443647265434265, "learning_rate": 1.3614956713775236e-05, "loss": 0.3614516854286194, "step": 7428, "token_acc": 0.8760535448686168 }, { "epoch": 0.4008525333189446, "grad_norm": 0.4729519188404083, "learning_rate": 1.3613327277351407e-05, "loss": 0.36570507287979126, "step": 7429, "token_acc": 0.866834657879434 }, { "epoch": 0.40090649112394106, "grad_norm": 0.4148566424846649, "learning_rate": 1.3611697730577512e-05, "loss": 0.35678404569625854, "step": 7430, "token_acc": 0.8745013807916538 }, { "epoch": 0.40096044892893756, "grad_norm": 0.342830091714859, "learning_rate": 1.3610068073503308e-05, "loss": 0.38469386100769043, "step": 7431, "token_acc": 0.8664080494605406 }, { "epoch": 0.40101440673393407, "grad_norm": 0.3743458688259125, "learning_rate": 1.3608438306178577e-05, "loss": 0.3812323808670044, "step": 7432, "token_acc": 0.8715307892454467 }, { "epoch": 0.4010683645389306, "grad_norm": 0.3871855139732361, "learning_rate": 1.3606808428653081e-05, "loss": 0.37319618463516235, "step": 7433, "token_acc": 0.8701139566229629 }, { "epoch": 0.40112232234392703, "grad_norm": 0.4435862600803375, "learning_rate": 1.3605178440976602e-05, "loss": 0.3412647545337677, "step": 7434, "token_acc": 0.8778612303290415 }, { "epoch": 0.40117628014892354, "grad_norm": 0.3211545944213867, "learning_rate": 1.3603548343198918e-05, "loss": 0.3767118453979492, "step": 7435, "token_acc": 0.8666146645865834 }, { "epoch": 0.40123023795392004, "grad_norm": 0.32565295696258545, "learning_rate": 1.360191813536981e-05, "loss": 0.3488691747188568, "step": 7436, "token_acc": 0.8790983606557377 }, { "epoch": 0.40128419575891655, "grad_norm": 0.4292061924934387, "learning_rate": 1.3600287817539069e-05, "loss": 0.37321943044662476, "step": 7437, "token_acc": 0.8720185361864522 }, { "epoch": 0.401338153563913, "grad_norm": 0.3805229961872101, "learning_rate": 1.3598657389756478e-05, "loss": 0.4128519892692566, "step": 7438, "token_acc": 0.8593521421107628 }, { "epoch": 0.4013921113689095, "grad_norm": 0.4113259017467499, "learning_rate": 1.3597026852071838e-05, "loss": 0.4155823588371277, "step": 7439, "token_acc": 0.8539392151077014 }, { "epoch": 0.401446069173906, "grad_norm": 0.345062255859375, "learning_rate": 1.3595396204534936e-05, "loss": 0.37592265009880066, "step": 7440, "token_acc": 0.8667962326207206 }, { "epoch": 0.4015000269789025, "grad_norm": 0.3587968051433563, "learning_rate": 1.3593765447195576e-05, "loss": 0.37811434268951416, "step": 7441, "token_acc": 0.8658912888677902 }, { "epoch": 0.401553984783899, "grad_norm": 0.3152958154678345, "learning_rate": 1.359213458010356e-05, "loss": 0.3779929578304291, "step": 7442, "token_acc": 0.8679813749657628 }, { "epoch": 0.4016079425888955, "grad_norm": 0.42174091935157776, "learning_rate": 1.3590503603308698e-05, "loss": 0.34645989537239075, "step": 7443, "token_acc": 0.8750614854894245 }, { "epoch": 0.401661900393892, "grad_norm": 0.43394768238067627, "learning_rate": 1.3588872516860792e-05, "loss": 0.38401320576667786, "step": 7444, "token_acc": 0.8655667583256951 }, { "epoch": 0.40171585819888844, "grad_norm": 0.35783326625823975, "learning_rate": 1.3587241320809663e-05, "loss": 0.4081210494041443, "step": 7445, "token_acc": 0.8575296544610624 }, { "epoch": 0.40176981600388495, "grad_norm": 0.45273274183273315, "learning_rate": 1.3585610015205122e-05, "loss": 0.42649078369140625, "step": 7446, "token_acc": 0.8543434102043255 }, { "epoch": 0.40182377380888146, "grad_norm": 0.3728827238082886, "learning_rate": 1.3583978600096992e-05, "loss": 0.37791529297828674, "step": 7447, "token_acc": 0.8705285021924763 }, { "epoch": 0.40187773161387796, "grad_norm": 0.3279775083065033, "learning_rate": 1.3582347075535095e-05, "loss": 0.3890169560909271, "step": 7448, "token_acc": 0.8633865480240774 }, { "epoch": 0.4019316894188744, "grad_norm": 0.4524098336696625, "learning_rate": 1.3580715441569256e-05, "loss": 0.4036738872528076, "step": 7449, "token_acc": 0.8624818577648766 }, { "epoch": 0.4019856472238709, "grad_norm": 0.3931187093257904, "learning_rate": 1.3579083698249305e-05, "loss": 0.4039659798145294, "step": 7450, "token_acc": 0.8621513944223107 }, { "epoch": 0.40203960502886743, "grad_norm": 0.33425086736679077, "learning_rate": 1.3577451845625074e-05, "loss": 0.39025869965553284, "step": 7451, "token_acc": 0.8652382294897096 }, { "epoch": 0.40209356283386394, "grad_norm": 0.5006304979324341, "learning_rate": 1.3575819883746401e-05, "loss": 0.41524040699005127, "step": 7452, "token_acc": 0.8619708884055546 }, { "epoch": 0.4021475206388604, "grad_norm": 0.34009838104248047, "learning_rate": 1.357418781266313e-05, "loss": 0.3802453279495239, "step": 7453, "token_acc": 0.8621260343730108 }, { "epoch": 0.4022014784438569, "grad_norm": 0.48299625515937805, "learning_rate": 1.3572555632425093e-05, "loss": 0.38422906398773193, "step": 7454, "token_acc": 0.8657323055360897 }, { "epoch": 0.4022554362488534, "grad_norm": 0.3741207718849182, "learning_rate": 1.3570923343082147e-05, "loss": 0.34889620542526245, "step": 7455, "token_acc": 0.8802423112767941 }, { "epoch": 0.4023093940538499, "grad_norm": 0.4215230643749237, "learning_rate": 1.356929094468414e-05, "loss": 0.349750816822052, "step": 7456, "token_acc": 0.8762043795620438 }, { "epoch": 0.40236335185884636, "grad_norm": 0.40782544016838074, "learning_rate": 1.3567658437280917e-05, "loss": 0.4528690576553345, "step": 7457, "token_acc": 0.8441883071795584 }, { "epoch": 0.40241730966384287, "grad_norm": 0.3616045415401459, "learning_rate": 1.3566025820922342e-05, "loss": 0.4011108875274658, "step": 7458, "token_acc": 0.8576874205844981 }, { "epoch": 0.4024712674688394, "grad_norm": 0.48650461435317993, "learning_rate": 1.3564393095658273e-05, "loss": 0.34903085231781006, "step": 7459, "token_acc": 0.8768227168073676 }, { "epoch": 0.4025252252738359, "grad_norm": 0.3264980912208557, "learning_rate": 1.3562760261538572e-05, "loss": 0.3247782289981842, "step": 7460, "token_acc": 0.8856 }, { "epoch": 0.40257918307883234, "grad_norm": 0.4854796230792999, "learning_rate": 1.3561127318613105e-05, "loss": 0.4011848568916321, "step": 7461, "token_acc": 0.8582822085889571 }, { "epoch": 0.40263314088382884, "grad_norm": 0.47162070870399475, "learning_rate": 1.3559494266931745e-05, "loss": 0.4178388714790344, "step": 7462, "token_acc": 0.859507701165971 }, { "epoch": 0.40268709868882535, "grad_norm": 0.42042601108551025, "learning_rate": 1.355786110654436e-05, "loss": 0.3196398615837097, "step": 7463, "token_acc": 0.8859128420443029 }, { "epoch": 0.40274105649382186, "grad_norm": 0.4412132203578949, "learning_rate": 1.3556227837500833e-05, "loss": 0.4919614791870117, "step": 7464, "token_acc": 0.8437679494543366 }, { "epoch": 0.4027950142988183, "grad_norm": 0.3066573441028595, "learning_rate": 1.3554594459851035e-05, "loss": 0.3455367684364319, "step": 7465, "token_acc": 0.8789508196721312 }, { "epoch": 0.4028489721038148, "grad_norm": 0.43225279450416565, "learning_rate": 1.3552960973644858e-05, "loss": 0.36406534910202026, "step": 7466, "token_acc": 0.8711630526612222 }, { "epoch": 0.4029029299088113, "grad_norm": 0.49135705828666687, "learning_rate": 1.3551327378932177e-05, "loss": 0.45398253202438354, "step": 7467, "token_acc": 0.8404916847433116 }, { "epoch": 0.4029568877138078, "grad_norm": 0.33531567454338074, "learning_rate": 1.3549693675762893e-05, "loss": 0.3256984353065491, "step": 7468, "token_acc": 0.8846296985485672 }, { "epoch": 0.4030108455188043, "grad_norm": 0.28575611114501953, "learning_rate": 1.3548059864186894e-05, "loss": 0.3578743040561676, "step": 7469, "token_acc": 0.8706930956498603 }, { "epoch": 0.4030648033238008, "grad_norm": 0.46515074372291565, "learning_rate": 1.3546425944254074e-05, "loss": 0.36121511459350586, "step": 7470, "token_acc": 0.8705701078582434 }, { "epoch": 0.4031187611287973, "grad_norm": 0.3787094056606293, "learning_rate": 1.3544791916014335e-05, "loss": 0.38689616322517395, "step": 7471, "token_acc": 0.8694042799305958 }, { "epoch": 0.40317271893379375, "grad_norm": 0.3614422082901001, "learning_rate": 1.3543157779517579e-05, "loss": 0.36867257952690125, "step": 7472, "token_acc": 0.8695372750642674 }, { "epoch": 0.40322667673879026, "grad_norm": 0.37536513805389404, "learning_rate": 1.3541523534813716e-05, "loss": 0.4200912117958069, "step": 7473, "token_acc": 0.852294154619736 }, { "epoch": 0.40328063454378676, "grad_norm": 0.42814716696739197, "learning_rate": 1.353988918195265e-05, "loss": 0.40286576747894287, "step": 7474, "token_acc": 0.8568906672943392 }, { "epoch": 0.40333459234878327, "grad_norm": 0.3094799816608429, "learning_rate": 1.3538254720984297e-05, "loss": 0.3042924404144287, "step": 7475, "token_acc": 0.8860632183908046 }, { "epoch": 0.4033885501537797, "grad_norm": 0.4193194806575775, "learning_rate": 1.3536620151958571e-05, "loss": 0.41212207078933716, "step": 7476, "token_acc": 0.8558731265249215 }, { "epoch": 0.40344250795877623, "grad_norm": 0.40306058526039124, "learning_rate": 1.3534985474925389e-05, "loss": 0.4320217967033386, "step": 7477, "token_acc": 0.8540609137055838 }, { "epoch": 0.40349646576377274, "grad_norm": 0.40270501375198364, "learning_rate": 1.353335068993468e-05, "loss": 0.36401280760765076, "step": 7478, "token_acc": 0.8687577383408998 }, { "epoch": 0.40355042356876925, "grad_norm": 0.3484819531440735, "learning_rate": 1.3531715797036364e-05, "loss": 0.39450156688690186, "step": 7479, "token_acc": 0.8662585354367789 }, { "epoch": 0.4036043813737657, "grad_norm": 0.45603182911872864, "learning_rate": 1.3530080796280376e-05, "loss": 0.4336080253124237, "step": 7480, "token_acc": 0.854745673833246 }, { "epoch": 0.4036583391787622, "grad_norm": 0.3421420753002167, "learning_rate": 1.3528445687716643e-05, "loss": 0.364157497882843, "step": 7481, "token_acc": 0.8694826074247296 }, { "epoch": 0.4037122969837587, "grad_norm": 0.4974059760570526, "learning_rate": 1.3526810471395104e-05, "loss": 0.3455047309398651, "step": 7482, "token_acc": 0.8788047512991833 }, { "epoch": 0.4037662547887552, "grad_norm": 0.46793094277381897, "learning_rate": 1.3525175147365697e-05, "loss": 0.3702397048473358, "step": 7483, "token_acc": 0.8694779116465864 }, { "epoch": 0.40382021259375167, "grad_norm": 0.4374583065509796, "learning_rate": 1.3523539715678365e-05, "loss": 0.313510537147522, "step": 7484, "token_acc": 0.8898753305629014 }, { "epoch": 0.4038741703987482, "grad_norm": 0.42556560039520264, "learning_rate": 1.3521904176383052e-05, "loss": 0.4179815649986267, "step": 7485, "token_acc": 0.8562336984872196 }, { "epoch": 0.4039281282037447, "grad_norm": 0.3518570363521576, "learning_rate": 1.3520268529529705e-05, "loss": 0.3872767686843872, "step": 7486, "token_acc": 0.8678802113916618 }, { "epoch": 0.40398208600874114, "grad_norm": 0.4193960130214691, "learning_rate": 1.3518632775168284e-05, "loss": 0.349841833114624, "step": 7487, "token_acc": 0.8763830599008012 }, { "epoch": 0.40403604381373764, "grad_norm": 0.41451117396354675, "learning_rate": 1.351699691334874e-05, "loss": 0.32332420349121094, "step": 7488, "token_acc": 0.8817204301075269 }, { "epoch": 0.40409000161873415, "grad_norm": 0.45339855551719666, "learning_rate": 1.3515360944121027e-05, "loss": 0.39398762583732605, "step": 7489, "token_acc": 0.8600237247924081 }, { "epoch": 0.40414395942373066, "grad_norm": 0.36756718158721924, "learning_rate": 1.3513724867535117e-05, "loss": 0.36599093675613403, "step": 7490, "token_acc": 0.8689734339127583 }, { "epoch": 0.4041979172287271, "grad_norm": 0.29009753465652466, "learning_rate": 1.3512088683640967e-05, "loss": 0.3622342050075531, "step": 7491, "token_acc": 0.8733684738955824 }, { "epoch": 0.4042518750337236, "grad_norm": 0.39435961842536926, "learning_rate": 1.3510452392488551e-05, "loss": 0.38496342301368713, "step": 7492, "token_acc": 0.8647192285876347 }, { "epoch": 0.4043058328387201, "grad_norm": 0.2674863338470459, "learning_rate": 1.3508815994127835e-05, "loss": 0.39423811435699463, "step": 7493, "token_acc": 0.8658419243986254 }, { "epoch": 0.40435979064371663, "grad_norm": 0.37268906831741333, "learning_rate": 1.35071794886088e-05, "loss": 0.3758460283279419, "step": 7494, "token_acc": 0.8684746825521241 }, { "epoch": 0.4044137484487131, "grad_norm": 0.36642760038375854, "learning_rate": 1.3505542875981421e-05, "loss": 0.33222073316574097, "step": 7495, "token_acc": 0.8853936136579197 }, { "epoch": 0.4044677062537096, "grad_norm": 0.43100905418395996, "learning_rate": 1.3503906156295681e-05, "loss": 0.3849848508834839, "step": 7496, "token_acc": 0.8649091450700029 }, { "epoch": 0.4045216640587061, "grad_norm": 0.41309407353401184, "learning_rate": 1.3502269329601567e-05, "loss": 0.42885708808898926, "step": 7497, "token_acc": 0.8561689025731252 }, { "epoch": 0.4045756218637026, "grad_norm": 0.354584276676178, "learning_rate": 1.3500632395949065e-05, "loss": 0.3312208652496338, "step": 7498, "token_acc": 0.8808193668528864 }, { "epoch": 0.40462957966869906, "grad_norm": 0.29952722787857056, "learning_rate": 1.3498995355388162e-05, "loss": 0.3552356958389282, "step": 7499, "token_acc": 0.8783688443629181 }, { "epoch": 0.40468353747369556, "grad_norm": 0.5119913816452026, "learning_rate": 1.3497358207968863e-05, "loss": 0.35559335350990295, "step": 7500, "token_acc": 0.8739946380697051 }, { "epoch": 0.40473749527869207, "grad_norm": 0.3666030466556549, "learning_rate": 1.3495720953741158e-05, "loss": 0.3870925307273865, "step": 7501, "token_acc": 0.8630734590486913 }, { "epoch": 0.4047914530836886, "grad_norm": 0.41187024116516113, "learning_rate": 1.349408359275505e-05, "loss": 0.4388772249221802, "step": 7502, "token_acc": 0.8522002551020408 }, { "epoch": 0.40484541088868503, "grad_norm": 0.42032065987586975, "learning_rate": 1.3492446125060546e-05, "loss": 0.3184764087200165, "step": 7503, "token_acc": 0.8813491266813893 }, { "epoch": 0.40489936869368154, "grad_norm": 0.4776783883571625, "learning_rate": 1.3490808550707652e-05, "loss": 0.3919178247451782, "step": 7504, "token_acc": 0.8637028370868675 }, { "epoch": 0.40495332649867805, "grad_norm": 0.49396663904190063, "learning_rate": 1.348917086974638e-05, "loss": 0.33928820490837097, "step": 7505, "token_acc": 0.877563704164077 }, { "epoch": 0.40500728430367455, "grad_norm": 0.3613823652267456, "learning_rate": 1.3487533082226743e-05, "loss": 0.3939506411552429, "step": 7506, "token_acc": 0.8632218844984803 }, { "epoch": 0.405061242108671, "grad_norm": 0.5189364552497864, "learning_rate": 1.348589518819876e-05, "loss": 0.3890805244445801, "step": 7507, "token_acc": 0.8655375043207743 }, { "epoch": 0.4051151999136675, "grad_norm": 0.4073578417301178, "learning_rate": 1.348425718771245e-05, "loss": 0.43783849477767944, "step": 7508, "token_acc": 0.8453362944162437 }, { "epoch": 0.405169157718664, "grad_norm": 0.2541223168373108, "learning_rate": 1.3482619080817837e-05, "loss": 0.39683443307876587, "step": 7509, "token_acc": 0.8666666666666667 }, { "epoch": 0.40522311552366047, "grad_norm": 0.38018935918807983, "learning_rate": 1.348098086756495e-05, "loss": 0.3669222593307495, "step": 7510, "token_acc": 0.8711321873147599 }, { "epoch": 0.405277073328657, "grad_norm": 0.30898675322532654, "learning_rate": 1.3479342548003821e-05, "loss": 0.3229546546936035, "step": 7511, "token_acc": 0.8812509442513975 }, { "epoch": 0.4053310311336535, "grad_norm": 0.4443933367729187, "learning_rate": 1.3477704122184481e-05, "loss": 0.3613876700401306, "step": 7512, "token_acc": 0.872123151958543 }, { "epoch": 0.40538498893865, "grad_norm": 0.43011754751205444, "learning_rate": 1.3476065590156967e-05, "loss": 0.37918972969055176, "step": 7513, "token_acc": 0.8705758848230354 }, { "epoch": 0.40543894674364644, "grad_norm": 0.2911064028739929, "learning_rate": 1.3474426951971324e-05, "loss": 0.41378775238990784, "step": 7514, "token_acc": 0.8560780593576366 }, { "epoch": 0.40549290454864295, "grad_norm": 0.3851039707660675, "learning_rate": 1.347278820767759e-05, "loss": 0.39505481719970703, "step": 7515, "token_acc": 0.8616751269035533 }, { "epoch": 0.40554686235363946, "grad_norm": 0.3821662366390228, "learning_rate": 1.3471149357325812e-05, "loss": 0.35726410150527954, "step": 7516, "token_acc": 0.8758986000756716 }, { "epoch": 0.40560082015863597, "grad_norm": 0.3836406469345093, "learning_rate": 1.3469510400966045e-05, "loss": 0.402032732963562, "step": 7517, "token_acc": 0.8599748269351793 }, { "epoch": 0.4056547779636324, "grad_norm": 0.28952348232269287, "learning_rate": 1.3467871338648337e-05, "loss": 0.34256353974342346, "step": 7518, "token_acc": 0.8798247691214777 }, { "epoch": 0.4057087357686289, "grad_norm": 0.5107372999191284, "learning_rate": 1.3466232170422749e-05, "loss": 0.4484798312187195, "step": 7519, "token_acc": 0.8543927913168134 }, { "epoch": 0.40576269357362543, "grad_norm": 0.3616211712360382, "learning_rate": 1.3464592896339336e-05, "loss": 0.36361831426620483, "step": 7520, "token_acc": 0.8730682868841633 }, { "epoch": 0.40581665137862194, "grad_norm": 0.30711400508880615, "learning_rate": 1.3462953516448168e-05, "loss": 0.4010915756225586, "step": 7521, "token_acc": 0.8651526815499102 }, { "epoch": 0.4058706091836184, "grad_norm": 0.415790319442749, "learning_rate": 1.3461314030799301e-05, "loss": 0.3668699860572815, "step": 7522, "token_acc": 0.8674716536592549 }, { "epoch": 0.4059245669886149, "grad_norm": 0.3927161991596222, "learning_rate": 1.3459674439442814e-05, "loss": 0.3975100517272949, "step": 7523, "token_acc": 0.8623646209386282 }, { "epoch": 0.4059785247936114, "grad_norm": 0.39862963557243347, "learning_rate": 1.3458034742428776e-05, "loss": 0.3800978660583496, "step": 7524, "token_acc": 0.8697988254404598 }, { "epoch": 0.4060324825986079, "grad_norm": 0.3879140019416809, "learning_rate": 1.345639493980726e-05, "loss": 0.34918615221977234, "step": 7525, "token_acc": 0.8784546375343655 }, { "epoch": 0.40608644040360437, "grad_norm": 0.4068702459335327, "learning_rate": 1.345475503162835e-05, "loss": 0.4058408737182617, "step": 7526, "token_acc": 0.8614818223653935 }, { "epoch": 0.4061403982086009, "grad_norm": 0.3484480082988739, "learning_rate": 1.3453115017942127e-05, "loss": 0.36656755208969116, "step": 7527, "token_acc": 0.8713464302827024 }, { "epoch": 0.4061943560135974, "grad_norm": 0.34075474739074707, "learning_rate": 1.3451474898798673e-05, "loss": 0.40108522772789, "step": 7528, "token_acc": 0.8570503480654039 }, { "epoch": 0.4062483138185939, "grad_norm": 0.38691526651382446, "learning_rate": 1.3449834674248083e-05, "loss": 0.362210750579834, "step": 7529, "token_acc": 0.8727130162049137 }, { "epoch": 0.40630227162359034, "grad_norm": 0.419968843460083, "learning_rate": 1.3448194344340445e-05, "loss": 0.38948142528533936, "step": 7530, "token_acc": 0.8617707417179713 }, { "epoch": 0.40635622942858685, "grad_norm": 0.41258230805397034, "learning_rate": 1.3446553909125855e-05, "loss": 0.3299903869628906, "step": 7531, "token_acc": 0.8796875 }, { "epoch": 0.40641018723358335, "grad_norm": 0.3750467896461487, "learning_rate": 1.344491336865441e-05, "loss": 0.34713393449783325, "step": 7532, "token_acc": 0.8794912952216323 }, { "epoch": 0.4064641450385798, "grad_norm": 0.4242367446422577, "learning_rate": 1.3443272722976214e-05, "loss": 0.4008815586566925, "step": 7533, "token_acc": 0.8616405477904843 }, { "epoch": 0.4065181028435763, "grad_norm": 0.4044777452945709, "learning_rate": 1.3441631972141372e-05, "loss": 0.37351906299591064, "step": 7534, "token_acc": 0.8637970979148885 }, { "epoch": 0.4065720606485728, "grad_norm": 0.29155415296554565, "learning_rate": 1.3439991116199988e-05, "loss": 0.36567920446395874, "step": 7535, "token_acc": 0.8747784473590925 }, { "epoch": 0.4066260184535693, "grad_norm": 0.382258802652359, "learning_rate": 1.343835015520218e-05, "loss": 0.3569384813308716, "step": 7536, "token_acc": 0.8696846977147816 }, { "epoch": 0.4066799762585658, "grad_norm": 0.4551389813423157, "learning_rate": 1.3436709089198059e-05, "loss": 0.424845814704895, "step": 7537, "token_acc": 0.851335656213705 }, { "epoch": 0.4067339340635623, "grad_norm": 0.3452994227409363, "learning_rate": 1.3435067918237743e-05, "loss": 0.4266926646232605, "step": 7538, "token_acc": 0.8528162258462382 }, { "epoch": 0.4067878918685588, "grad_norm": 0.47270894050598145, "learning_rate": 1.343342664237135e-05, "loss": 0.3849448263645172, "step": 7539, "token_acc": 0.8676804754413564 }, { "epoch": 0.4068418496735553, "grad_norm": 0.5139350295066833, "learning_rate": 1.3431785261649009e-05, "loss": 0.4374244511127472, "step": 7540, "token_acc": 0.8528080469404862 }, { "epoch": 0.40689580747855175, "grad_norm": 0.38923999667167664, "learning_rate": 1.3430143776120843e-05, "loss": 0.38827115297317505, "step": 7541, "token_acc": 0.861244019138756 }, { "epoch": 0.40694976528354826, "grad_norm": 0.5182308554649353, "learning_rate": 1.3428502185836986e-05, "loss": 0.38561737537384033, "step": 7542, "token_acc": 0.8715684700593996 }, { "epoch": 0.40700372308854477, "grad_norm": 0.3397808372974396, "learning_rate": 1.342686049084757e-05, "loss": 0.3401244878768921, "step": 7543, "token_acc": 0.8841230631035257 }, { "epoch": 0.4070576808935413, "grad_norm": 0.4069146513938904, "learning_rate": 1.342521869120273e-05, "loss": 0.35871753096580505, "step": 7544, "token_acc": 0.8678313040409388 }, { "epoch": 0.4071116386985377, "grad_norm": 0.38800373673439026, "learning_rate": 1.3423576786952613e-05, "loss": 0.3660139739513397, "step": 7545, "token_acc": 0.87284260215371 }, { "epoch": 0.40716559650353423, "grad_norm": 0.3899752199649811, "learning_rate": 1.3421934778147357e-05, "loss": 0.4221906065940857, "step": 7546, "token_acc": 0.8604271083024624 }, { "epoch": 0.40721955430853074, "grad_norm": 0.29995429515838623, "learning_rate": 1.3420292664837106e-05, "loss": 0.3350147604942322, "step": 7547, "token_acc": 0.8824197671116161 }, { "epoch": 0.40727351211352725, "grad_norm": 0.4504256546497345, "learning_rate": 1.3418650447072017e-05, "loss": 0.4107974171638489, "step": 7548, "token_acc": 0.8634631317315659 }, { "epoch": 0.4073274699185237, "grad_norm": 0.436350017786026, "learning_rate": 1.3417008124902236e-05, "loss": 0.48449021577835083, "step": 7549, "token_acc": 0.8400108651364933 }, { "epoch": 0.4073814277235202, "grad_norm": 0.2755270004272461, "learning_rate": 1.3415365698377924e-05, "loss": 0.3257509469985962, "step": 7550, "token_acc": 0.8863895680521597 }, { "epoch": 0.4074353855285167, "grad_norm": 0.39116108417510986, "learning_rate": 1.3413723167549236e-05, "loss": 0.44703295826911926, "step": 7551, "token_acc": 0.850640634533252 }, { "epoch": 0.40748934333351317, "grad_norm": 0.3130013644695282, "learning_rate": 1.3412080532466339e-05, "loss": 0.39073991775512695, "step": 7552, "token_acc": 0.862534026231131 }, { "epoch": 0.4075433011385097, "grad_norm": 0.4223988652229309, "learning_rate": 1.3410437793179396e-05, "loss": 0.40240752696990967, "step": 7553, "token_acc": 0.8635831381733021 }, { "epoch": 0.4075972589435062, "grad_norm": 0.4706045687198639, "learning_rate": 1.3408794949738573e-05, "loss": 0.408780962228775, "step": 7554, "token_acc": 0.8633130962705985 }, { "epoch": 0.4076512167485027, "grad_norm": 0.39989009499549866, "learning_rate": 1.340715200219405e-05, "loss": 0.3953644037246704, "step": 7555, "token_acc": 0.8572202166064982 }, { "epoch": 0.40770517455349914, "grad_norm": 0.4474799931049347, "learning_rate": 1.3405508950595996e-05, "loss": 0.4054531753063202, "step": 7556, "token_acc": 0.8611971104231166 }, { "epoch": 0.40775913235849565, "grad_norm": 0.3326452970504761, "learning_rate": 1.3403865794994589e-05, "loss": 0.3172106146812439, "step": 7557, "token_acc": 0.8875587858059 }, { "epoch": 0.40781309016349215, "grad_norm": 0.47292783856391907, "learning_rate": 1.3402222535440013e-05, "loss": 0.36109721660614014, "step": 7558, "token_acc": 0.8709628961994893 }, { "epoch": 0.40786704796848866, "grad_norm": 0.3985278904438019, "learning_rate": 1.3400579171982453e-05, "loss": 0.35789424180984497, "step": 7559, "token_acc": 0.8719346049046321 }, { "epoch": 0.4079210057734851, "grad_norm": 0.410627543926239, "learning_rate": 1.3398935704672091e-05, "loss": 0.38659703731536865, "step": 7560, "token_acc": 0.8635030658491069 }, { "epoch": 0.4079749635784816, "grad_norm": 0.3245519995689392, "learning_rate": 1.339729213355913e-05, "loss": 0.30121418833732605, "step": 7561, "token_acc": 0.8922573933719721 }, { "epoch": 0.4080289213834781, "grad_norm": 0.38089099526405334, "learning_rate": 1.3395648458693753e-05, "loss": 0.3612477779388428, "step": 7562, "token_acc": 0.8748323186461665 }, { "epoch": 0.40808287918847463, "grad_norm": 0.35123011469841003, "learning_rate": 1.3394004680126164e-05, "loss": 0.3416343927383423, "step": 7563, "token_acc": 0.8814693111137191 }, { "epoch": 0.4081368369934711, "grad_norm": 0.29157668352127075, "learning_rate": 1.339236079790656e-05, "loss": 0.37800997495651245, "step": 7564, "token_acc": 0.8703049049933717 }, { "epoch": 0.4081907947984676, "grad_norm": 0.3766239583492279, "learning_rate": 1.3390716812085148e-05, "loss": 0.40102264285087585, "step": 7565, "token_acc": 0.8627425944841676 }, { "epoch": 0.4082447526034641, "grad_norm": 0.45500990748405457, "learning_rate": 1.3389072722712131e-05, "loss": 0.36112770438194275, "step": 7566, "token_acc": 0.8705366464815202 }, { "epoch": 0.4082987104084606, "grad_norm": 0.43020331859588623, "learning_rate": 1.3387428529837718e-05, "loss": 0.44038301706314087, "step": 7567, "token_acc": 0.8479672019132217 }, { "epoch": 0.40835266821345706, "grad_norm": 0.3381916582584381, "learning_rate": 1.338578423351213e-05, "loss": 0.31563347578048706, "step": 7568, "token_acc": 0.888641425389755 }, { "epoch": 0.40840662601845357, "grad_norm": 0.35881131887435913, "learning_rate": 1.3384139833785577e-05, "loss": 0.39981427788734436, "step": 7569, "token_acc": 0.8612639041681946 }, { "epoch": 0.4084605838234501, "grad_norm": 0.4985105097293854, "learning_rate": 1.3382495330708275e-05, "loss": 0.44115781784057617, "step": 7570, "token_acc": 0.8512713897329282 }, { "epoch": 0.4085145416284466, "grad_norm": 0.3448461592197418, "learning_rate": 1.338085072433046e-05, "loss": 0.34617966413497925, "step": 7571, "token_acc": 0.8761882408168056 }, { "epoch": 0.40856849943344303, "grad_norm": 0.3903297483921051, "learning_rate": 1.3379206014702344e-05, "loss": 0.38041505217552185, "step": 7572, "token_acc": 0.866361609761821 }, { "epoch": 0.40862245723843954, "grad_norm": 0.41285550594329834, "learning_rate": 1.3377561201874162e-05, "loss": 0.3646293878555298, "step": 7573, "token_acc": 0.8659702549575071 }, { "epoch": 0.40867641504343605, "grad_norm": 0.40161800384521484, "learning_rate": 1.337591628589615e-05, "loss": 0.33893144130706787, "step": 7574, "token_acc": 0.8795398520953164 }, { "epoch": 0.4087303728484325, "grad_norm": 0.46130043268203735, "learning_rate": 1.3374271266818535e-05, "loss": 0.389096736907959, "step": 7575, "token_acc": 0.8643091028159791 }, { "epoch": 0.408784330653429, "grad_norm": 0.39996179938316345, "learning_rate": 1.3372626144691558e-05, "loss": 0.37732744216918945, "step": 7576, "token_acc": 0.8696629213483146 }, { "epoch": 0.4088382884584255, "grad_norm": 0.41382479667663574, "learning_rate": 1.3370980919565466e-05, "loss": 0.41228771209716797, "step": 7577, "token_acc": 0.8542001070090958 }, { "epoch": 0.408892246263422, "grad_norm": 0.4995872676372528, "learning_rate": 1.3369335591490498e-05, "loss": 0.39794307947158813, "step": 7578, "token_acc": 0.8715083798882681 }, { "epoch": 0.4089462040684185, "grad_norm": 0.39199045300483704, "learning_rate": 1.3367690160516904e-05, "loss": 0.3928336799144745, "step": 7579, "token_acc": 0.864956568946797 }, { "epoch": 0.409000161873415, "grad_norm": 0.43151915073394775, "learning_rate": 1.3366044626694933e-05, "loss": 0.40442848205566406, "step": 7580, "token_acc": 0.8655484209146901 }, { "epoch": 0.4090541196784115, "grad_norm": 0.34743955731391907, "learning_rate": 1.3364398990074845e-05, "loss": 0.39062803983688354, "step": 7581, "token_acc": 0.8627611634575993 }, { "epoch": 0.409108077483408, "grad_norm": 0.41361865401268005, "learning_rate": 1.336275325070689e-05, "loss": 0.34637248516082764, "step": 7582, "token_acc": 0.8789611238157465 }, { "epoch": 0.40916203528840445, "grad_norm": 0.3231585919857025, "learning_rate": 1.3361107408641329e-05, "loss": 0.3454672694206238, "step": 7583, "token_acc": 0.8770267808343961 }, { "epoch": 0.40921599309340095, "grad_norm": 0.4034496545791626, "learning_rate": 1.3359461463928434e-05, "loss": 0.41409552097320557, "step": 7584, "token_acc": 0.8568248225963545 }, { "epoch": 0.40926995089839746, "grad_norm": 0.5400397181510925, "learning_rate": 1.3357815416618463e-05, "loss": 0.3561951518058777, "step": 7585, "token_acc": 0.873006379585327 }, { "epoch": 0.40932390870339397, "grad_norm": 0.43928560614585876, "learning_rate": 1.3356169266761688e-05, "loss": 0.41890013217926025, "step": 7586, "token_acc": 0.8597156398104265 }, { "epoch": 0.4093778665083904, "grad_norm": 0.46824994683265686, "learning_rate": 1.3354523014408385e-05, "loss": 0.4659477770328522, "step": 7587, "token_acc": 0.84181631019101 }, { "epoch": 0.4094318243133869, "grad_norm": 0.45938751101493835, "learning_rate": 1.3352876659608827e-05, "loss": 0.34688615798950195, "step": 7588, "token_acc": 0.8780633592349073 }, { "epoch": 0.40948578211838343, "grad_norm": 0.436162531375885, "learning_rate": 1.3351230202413294e-05, "loss": 0.4030471742153168, "step": 7589, "token_acc": 0.8736013261500207 }, { "epoch": 0.40953973992337994, "grad_norm": 0.47663918137550354, "learning_rate": 1.3349583642872068e-05, "loss": 0.3514270782470703, "step": 7590, "token_acc": 0.8770004572473709 }, { "epoch": 0.4095936977283764, "grad_norm": 0.41280442476272583, "learning_rate": 1.3347936981035436e-05, "loss": 0.35046255588531494, "step": 7591, "token_acc": 0.8776879670581058 }, { "epoch": 0.4096476555333729, "grad_norm": 0.3616144359111786, "learning_rate": 1.3346290216953683e-05, "loss": 0.38204091787338257, "step": 7592, "token_acc": 0.8629154078549849 }, { "epoch": 0.4097016133383694, "grad_norm": 0.4048956334590912, "learning_rate": 1.3344643350677107e-05, "loss": 0.394487202167511, "step": 7593, "token_acc": 0.8668831168831169 }, { "epoch": 0.4097555711433659, "grad_norm": 0.38144609332084656, "learning_rate": 1.3342996382255998e-05, "loss": 0.330875039100647, "step": 7594, "token_acc": 0.8823347745521927 }, { "epoch": 0.40980952894836237, "grad_norm": 0.4397096037864685, "learning_rate": 1.3341349311740657e-05, "loss": 0.34259849786758423, "step": 7595, "token_acc": 0.8773109243697479 }, { "epoch": 0.4098634867533589, "grad_norm": 0.33549103140830994, "learning_rate": 1.3339702139181378e-05, "loss": 0.34929290413856506, "step": 7596, "token_acc": 0.8770458678520914 }, { "epoch": 0.4099174445583554, "grad_norm": 0.27373209595680237, "learning_rate": 1.3338054864628477e-05, "loss": 0.3484726548194885, "step": 7597, "token_acc": 0.8804488134246847 }, { "epoch": 0.40997140236335183, "grad_norm": 0.42563921213150024, "learning_rate": 1.3336407488132252e-05, "loss": 0.35912269353866577, "step": 7598, "token_acc": 0.88035360463344 }, { "epoch": 0.41002536016834834, "grad_norm": 0.36066997051239014, "learning_rate": 1.3334760009743015e-05, "loss": 0.35523152351379395, "step": 7599, "token_acc": 0.8738847724179646 }, { "epoch": 0.41007931797334485, "grad_norm": 0.30178356170654297, "learning_rate": 1.333311242951108e-05, "loss": 0.38348376750946045, "step": 7600, "token_acc": 0.8675766283524904 }, { "epoch": 0.41013327577834136, "grad_norm": 0.430528461933136, "learning_rate": 1.3331464747486768e-05, "loss": 0.41575902700424194, "step": 7601, "token_acc": 0.8550791455355771 }, { "epoch": 0.4101872335833378, "grad_norm": 0.41736042499542236, "learning_rate": 1.3329816963720395e-05, "loss": 0.400808185338974, "step": 7602, "token_acc": 0.8623037639493096 }, { "epoch": 0.4102411913883343, "grad_norm": 0.34485355019569397, "learning_rate": 1.3328169078262282e-05, "loss": 0.32353633642196655, "step": 7603, "token_acc": 0.8852297861882527 }, { "epoch": 0.4102951491933308, "grad_norm": 0.37752920389175415, "learning_rate": 1.3326521091162759e-05, "loss": 0.3683839738368988, "step": 7604, "token_acc": 0.8713189804503836 }, { "epoch": 0.41034910699832733, "grad_norm": 0.3189120888710022, "learning_rate": 1.3324873002472152e-05, "loss": 0.358971506357193, "step": 7605, "token_acc": 0.8769200277474978 }, { "epoch": 0.4104030648033238, "grad_norm": 0.416004478931427, "learning_rate": 1.3323224812240797e-05, "loss": 0.35303711891174316, "step": 7606, "token_acc": 0.8807339449541285 }, { "epoch": 0.4104570226083203, "grad_norm": 0.396062433719635, "learning_rate": 1.3321576520519025e-05, "loss": 0.388555645942688, "step": 7607, "token_acc": 0.8656990068754775 }, { "epoch": 0.4105109804133168, "grad_norm": 0.4244312345981598, "learning_rate": 1.3319928127357176e-05, "loss": 0.3754514753818512, "step": 7608, "token_acc": 0.8665350744002429 }, { "epoch": 0.4105649382183133, "grad_norm": 0.3928195536136627, "learning_rate": 1.3318279632805591e-05, "loss": 0.4178228974342346, "step": 7609, "token_acc": 0.855392486872223 }, { "epoch": 0.41061889602330975, "grad_norm": 0.4190812408924103, "learning_rate": 1.3316631036914616e-05, "loss": 0.3309524357318878, "step": 7610, "token_acc": 0.8840722495894909 }, { "epoch": 0.41067285382830626, "grad_norm": 0.32830721139907837, "learning_rate": 1.33149823397346e-05, "loss": 0.369411826133728, "step": 7611, "token_acc": 0.8695170229612035 }, { "epoch": 0.41072681163330277, "grad_norm": 0.3662072718143463, "learning_rate": 1.3313333541315887e-05, "loss": 0.3809877634048462, "step": 7612, "token_acc": 0.8705838601710673 }, { "epoch": 0.4107807694382993, "grad_norm": 0.2929287850856781, "learning_rate": 1.331168464170884e-05, "loss": 0.42239099740982056, "step": 7613, "token_acc": 0.857371581054036 }, { "epoch": 0.4108347272432957, "grad_norm": 0.322123646736145, "learning_rate": 1.3310035640963813e-05, "loss": 0.347578763961792, "step": 7614, "token_acc": 0.8766532917223956 }, { "epoch": 0.41088868504829223, "grad_norm": 0.4522111713886261, "learning_rate": 1.330838653913116e-05, "loss": 0.38561543822288513, "step": 7615, "token_acc": 0.8684654300168634 }, { "epoch": 0.41094264285328874, "grad_norm": 0.4284585118293762, "learning_rate": 1.3306737336261249e-05, "loss": 0.41688328981399536, "step": 7616, "token_acc": 0.8587845042056629 }, { "epoch": 0.4109966006582852, "grad_norm": 0.34606435894966125, "learning_rate": 1.330508803240445e-05, "loss": 0.43950891494750977, "step": 7617, "token_acc": 0.8530653043091959 }, { "epoch": 0.4110505584632817, "grad_norm": 0.3328540027141571, "learning_rate": 1.3303438627611123e-05, "loss": 0.3715175986289978, "step": 7618, "token_acc": 0.8758062624604198 }, { "epoch": 0.4111045162682782, "grad_norm": 0.4206486642360687, "learning_rate": 1.3301789121931648e-05, "loss": 0.3605574667453766, "step": 7619, "token_acc": 0.8645680819912153 }, { "epoch": 0.4111584740732747, "grad_norm": 0.43294692039489746, "learning_rate": 1.3300139515416399e-05, "loss": 0.38594716787338257, "step": 7620, "token_acc": 0.8653182042427232 }, { "epoch": 0.41121243187827117, "grad_norm": 0.4168526530265808, "learning_rate": 1.3298489808115754e-05, "loss": 0.3716249465942383, "step": 7621, "token_acc": 0.8762844780962683 }, { "epoch": 0.4112663896832677, "grad_norm": 0.4942280650138855, "learning_rate": 1.3296840000080096e-05, "loss": 0.3292350172996521, "step": 7622, "token_acc": 0.8820727757668128 }, { "epoch": 0.4113203474882642, "grad_norm": 0.3173290193080902, "learning_rate": 1.3295190091359803e-05, "loss": 0.3250221014022827, "step": 7623, "token_acc": 0.886109191430546 }, { "epoch": 0.4113743052932607, "grad_norm": 0.43941670656204224, "learning_rate": 1.3293540082005275e-05, "loss": 0.3667753338813782, "step": 7624, "token_acc": 0.8738980452280567 }, { "epoch": 0.41142826309825714, "grad_norm": 0.4150192141532898, "learning_rate": 1.3291889972066888e-05, "loss": 0.3418677747249603, "step": 7625, "token_acc": 0.874585635359116 }, { "epoch": 0.41148222090325365, "grad_norm": 0.3679444491863251, "learning_rate": 1.3290239761595047e-05, "loss": 0.3996083438396454, "step": 7626, "token_acc": 0.8649218556189531 }, { "epoch": 0.41153617870825016, "grad_norm": 0.4435380697250366, "learning_rate": 1.3288589450640146e-05, "loss": 0.3634573817253113, "step": 7627, "token_acc": 0.8739399869536856 }, { "epoch": 0.41159013651324666, "grad_norm": 0.33155128359794617, "learning_rate": 1.3286939039252582e-05, "loss": 0.3479686379432678, "step": 7628, "token_acc": 0.8743443291758947 }, { "epoch": 0.4116440943182431, "grad_norm": 0.451752632856369, "learning_rate": 1.3285288527482765e-05, "loss": 0.3665112257003784, "step": 7629, "token_acc": 0.8697625698324022 }, { "epoch": 0.4116980521232396, "grad_norm": 0.4313414990901947, "learning_rate": 1.3283637915381093e-05, "loss": 0.40946972370147705, "step": 7630, "token_acc": 0.8578600114744693 }, { "epoch": 0.41175200992823613, "grad_norm": 0.3324686288833618, "learning_rate": 1.3281987202997984e-05, "loss": 0.35863828659057617, "step": 7631, "token_acc": 0.8751899030033891 }, { "epoch": 0.41180596773323264, "grad_norm": 0.4036158621311188, "learning_rate": 1.328033639038384e-05, "loss": 0.32693952322006226, "step": 7632, "token_acc": 0.8847801578354002 }, { "epoch": 0.4118599255382291, "grad_norm": 0.31198254227638245, "learning_rate": 1.3278685477589086e-05, "loss": 0.37278303503990173, "step": 7633, "token_acc": 0.8700426301511432 }, { "epoch": 0.4119138833432256, "grad_norm": 0.375458687543869, "learning_rate": 1.3277034464664133e-05, "loss": 0.37097495794296265, "step": 7634, "token_acc": 0.8700519792083167 }, { "epoch": 0.4119678411482221, "grad_norm": 0.37891685962677, "learning_rate": 1.327538335165941e-05, "loss": 0.3766171634197235, "step": 7635, "token_acc": 0.8658485566866546 }, { "epoch": 0.4120217989532186, "grad_norm": 0.41444894671440125, "learning_rate": 1.3273732138625336e-05, "loss": 0.434221088886261, "step": 7636, "token_acc": 0.8506844953836358 }, { "epoch": 0.41207575675821506, "grad_norm": 0.4952709674835205, "learning_rate": 1.3272080825612341e-05, "loss": 0.37293335795402527, "step": 7637, "token_acc": 0.8649258542875564 }, { "epoch": 0.41212971456321157, "grad_norm": 0.430345356464386, "learning_rate": 1.3270429412670854e-05, "loss": 0.31487876176834106, "step": 7638, "token_acc": 0.8844694094428419 }, { "epoch": 0.4121836723682081, "grad_norm": 0.37537914514541626, "learning_rate": 1.3268777899851309e-05, "loss": 0.3421787619590759, "step": 7639, "token_acc": 0.8773474178403756 }, { "epoch": 0.41223763017320453, "grad_norm": 0.28544971346855164, "learning_rate": 1.3267126287204144e-05, "loss": 0.3752036392688751, "step": 7640, "token_acc": 0.8679971146910315 }, { "epoch": 0.41229158797820104, "grad_norm": 0.4714369475841522, "learning_rate": 1.3265474574779796e-05, "loss": 0.3835121989250183, "step": 7641, "token_acc": 0.8656563587958223 }, { "epoch": 0.41234554578319754, "grad_norm": 0.3363199830055237, "learning_rate": 1.3263822762628711e-05, "loss": 0.3934910297393799, "step": 7642, "token_acc": 0.8638963019443385 }, { "epoch": 0.41239950358819405, "grad_norm": 0.5079508423805237, "learning_rate": 1.3262170850801335e-05, "loss": 0.3642483949661255, "step": 7643, "token_acc": 0.8681436483830623 }, { "epoch": 0.4124534613931905, "grad_norm": 0.293226420879364, "learning_rate": 1.3260518839348113e-05, "loss": 0.35645341873168945, "step": 7644, "token_acc": 0.87772347790674 }, { "epoch": 0.412507419198187, "grad_norm": 0.3588810861110687, "learning_rate": 1.3258866728319502e-05, "loss": 0.35822707414627075, "step": 7645, "token_acc": 0.8711256117455138 }, { "epoch": 0.4125613770031835, "grad_norm": 0.3356541693210602, "learning_rate": 1.3257214517765956e-05, "loss": 0.3705308139324188, "step": 7646, "token_acc": 0.8761310054798012 }, { "epoch": 0.41261533480818, "grad_norm": 0.39873677492141724, "learning_rate": 1.3255562207737929e-05, "loss": 0.3305296003818512, "step": 7647, "token_acc": 0.8761168384879725 }, { "epoch": 0.4126692926131765, "grad_norm": 0.34518224000930786, "learning_rate": 1.3253909798285886e-05, "loss": 0.39259424805641174, "step": 7648, "token_acc": 0.8604398588107521 }, { "epoch": 0.412723250418173, "grad_norm": 0.45875808596611023, "learning_rate": 1.325225728946029e-05, "loss": 0.4267657399177551, "step": 7649, "token_acc": 0.8550938337801609 }, { "epoch": 0.4127772082231695, "grad_norm": 0.5211137533187866, "learning_rate": 1.3250604681311608e-05, "loss": 0.40449440479278564, "step": 7650, "token_acc": 0.8616649537512847 }, { "epoch": 0.412831166028166, "grad_norm": 0.43146395683288574, "learning_rate": 1.3248951973890311e-05, "loss": 0.37764203548431396, "step": 7651, "token_acc": 0.8667696195423277 }, { "epoch": 0.41288512383316245, "grad_norm": 0.28690534830093384, "learning_rate": 1.3247299167246874e-05, "loss": 0.39352044463157654, "step": 7652, "token_acc": 0.8704427904629747 }, { "epoch": 0.41293908163815896, "grad_norm": 0.43051108717918396, "learning_rate": 1.324564626143177e-05, "loss": 0.3540692627429962, "step": 7653, "token_acc": 0.8723843206601827 }, { "epoch": 0.41299303944315546, "grad_norm": 0.361070454120636, "learning_rate": 1.3243993256495476e-05, "loss": 0.37368160486221313, "step": 7654, "token_acc": 0.8745085190039319 }, { "epoch": 0.41304699724815197, "grad_norm": 0.36656466126441956, "learning_rate": 1.3242340152488485e-05, "loss": 0.41881510615348816, "step": 7655, "token_acc": 0.8549573747518393 }, { "epoch": 0.4131009550531484, "grad_norm": 0.3465616703033447, "learning_rate": 1.324068694946127e-05, "loss": 0.34847545623779297, "step": 7656, "token_acc": 0.8777423920736023 }, { "epoch": 0.41315491285814493, "grad_norm": 0.5004872679710388, "learning_rate": 1.3239033647464323e-05, "loss": 0.39054402709007263, "step": 7657, "token_acc": 0.8617021276595744 }, { "epoch": 0.41320887066314144, "grad_norm": 0.5212293863296509, "learning_rate": 1.323738024654814e-05, "loss": 0.3839351534843445, "step": 7658, "token_acc": 0.8667870036101083 }, { "epoch": 0.41326282846813794, "grad_norm": 0.36158114671707153, "learning_rate": 1.3235726746763212e-05, "loss": 0.35283997654914856, "step": 7659, "token_acc": 0.885321724935383 }, { "epoch": 0.4133167862731344, "grad_norm": 0.33402180671691895, "learning_rate": 1.3234073148160034e-05, "loss": 0.3858160674571991, "step": 7660, "token_acc": 0.868179312878927 }, { "epoch": 0.4133707440781309, "grad_norm": 0.3353082239627838, "learning_rate": 1.3232419450789111e-05, "loss": 0.4439278841018677, "step": 7661, "token_acc": 0.8447180689924754 }, { "epoch": 0.4134247018831274, "grad_norm": 0.45363810658454895, "learning_rate": 1.3230765654700948e-05, "loss": 0.39481639862060547, "step": 7662, "token_acc": 0.8619210977701544 }, { "epoch": 0.41347865968812386, "grad_norm": 0.3993529677391052, "learning_rate": 1.3229111759946044e-05, "loss": 0.4087640941143036, "step": 7663, "token_acc": 0.8599629858112277 }, { "epoch": 0.41353261749312037, "grad_norm": 0.35088855028152466, "learning_rate": 1.3227457766574916e-05, "loss": 0.39391833543777466, "step": 7664, "token_acc": 0.8687676966637942 }, { "epoch": 0.4135865752981169, "grad_norm": 0.4158759117126465, "learning_rate": 1.322580367463807e-05, "loss": 0.40252119302749634, "step": 7665, "token_acc": 0.8567684242984891 }, { "epoch": 0.4136405331031134, "grad_norm": 0.41174206137657166, "learning_rate": 1.3224149484186027e-05, "loss": 0.4378564953804016, "step": 7666, "token_acc": 0.8500842223469961 }, { "epoch": 0.41369449090810984, "grad_norm": 0.4836721420288086, "learning_rate": 1.3222495195269303e-05, "loss": 0.4204029142856598, "step": 7667, "token_acc": 0.8637610976594028 }, { "epoch": 0.41374844871310634, "grad_norm": 0.3223855197429657, "learning_rate": 1.3220840807938418e-05, "loss": 0.33202141523361206, "step": 7668, "token_acc": 0.8803900655188176 }, { "epoch": 0.41380240651810285, "grad_norm": 0.3226166069507599, "learning_rate": 1.3219186322243902e-05, "loss": 0.33996063470840454, "step": 7669, "token_acc": 0.8807672369103162 }, { "epoch": 0.41385636432309936, "grad_norm": 0.4126635491847992, "learning_rate": 1.3217531738236276e-05, "loss": 0.39830172061920166, "step": 7670, "token_acc": 0.8648423392137797 }, { "epoch": 0.4139103221280958, "grad_norm": 0.31606778502464294, "learning_rate": 1.3215877055966076e-05, "loss": 0.40102577209472656, "step": 7671, "token_acc": 0.8653530377668309 }, { "epoch": 0.4139642799330923, "grad_norm": 0.4332435429096222, "learning_rate": 1.3214222275483832e-05, "loss": 0.4254363477230072, "step": 7672, "token_acc": 0.852039456145028 }, { "epoch": 0.4140182377380888, "grad_norm": 0.3799898326396942, "learning_rate": 1.321256739684008e-05, "loss": 0.3712809085845947, "step": 7673, "token_acc": 0.8654581504036255 }, { "epoch": 0.41407219554308533, "grad_norm": 0.4263361990451813, "learning_rate": 1.3210912420085364e-05, "loss": 0.3695482611656189, "step": 7674, "token_acc": 0.8650772509323389 }, { "epoch": 0.4141261533480818, "grad_norm": 0.47164902091026306, "learning_rate": 1.3209257345270223e-05, "loss": 0.379241943359375, "step": 7675, "token_acc": 0.8702895034772764 }, { "epoch": 0.4141801111530783, "grad_norm": 0.34871843457221985, "learning_rate": 1.3207602172445202e-05, "loss": 0.3918711543083191, "step": 7676, "token_acc": 0.8642699926632429 }, { "epoch": 0.4142340689580748, "grad_norm": 0.43133896589279175, "learning_rate": 1.320594690166085e-05, "loss": 0.40200546383857727, "step": 7677, "token_acc": 0.8580654630852781 }, { "epoch": 0.4142880267630713, "grad_norm": 0.38794854283332825, "learning_rate": 1.3204291532967723e-05, "loss": 0.325533390045166, "step": 7678, "token_acc": 0.8841323191225898 }, { "epoch": 0.41434198456806776, "grad_norm": 0.31588733196258545, "learning_rate": 1.3202636066416372e-05, "loss": 0.33623987436294556, "step": 7679, "token_acc": 0.8806595863610955 }, { "epoch": 0.41439594237306426, "grad_norm": 0.4736155867576599, "learning_rate": 1.3200980502057353e-05, "loss": 0.3476085066795349, "step": 7680, "token_acc": 0.8720779220779221 }, { "epoch": 0.41444990017806077, "grad_norm": 0.3512064218521118, "learning_rate": 1.3199324839941228e-05, "loss": 0.34736719727516174, "step": 7681, "token_acc": 0.8782346175963197 }, { "epoch": 0.4145038579830572, "grad_norm": 0.48730412125587463, "learning_rate": 1.319766908011856e-05, "loss": 0.3836165964603424, "step": 7682, "token_acc": 0.8639716730598996 }, { "epoch": 0.41455781578805373, "grad_norm": 0.4474477171897888, "learning_rate": 1.3196013222639916e-05, "loss": 0.3628990352153778, "step": 7683, "token_acc": 0.8727850217318622 }, { "epoch": 0.41461177359305024, "grad_norm": 0.4242042303085327, "learning_rate": 1.3194357267555866e-05, "loss": 0.407789409160614, "step": 7684, "token_acc": 0.8595325389550871 }, { "epoch": 0.41466573139804674, "grad_norm": 0.34434327483177185, "learning_rate": 1.3192701214916983e-05, "loss": 0.3956593871116638, "step": 7685, "token_acc": 0.8607977671822304 }, { "epoch": 0.4147196892030432, "grad_norm": 0.401017963886261, "learning_rate": 1.3191045064773837e-05, "loss": 0.4088956117630005, "step": 7686, "token_acc": 0.8595347458882797 }, { "epoch": 0.4147736470080397, "grad_norm": 0.3683088719844818, "learning_rate": 1.3189388817177016e-05, "loss": 0.3563992977142334, "step": 7687, "token_acc": 0.8728586171310629 }, { "epoch": 0.4148276048130362, "grad_norm": 0.3405455946922302, "learning_rate": 1.3187732472177093e-05, "loss": 0.3101763129234314, "step": 7688, "token_acc": 0.8902862618277638 }, { "epoch": 0.4148815626180327, "grad_norm": 0.543700098991394, "learning_rate": 1.3186076029824657e-05, "loss": 0.4337177276611328, "step": 7689, "token_acc": 0.8559385537654552 }, { "epoch": 0.41493552042302917, "grad_norm": 0.4148791432380676, "learning_rate": 1.3184419490170293e-05, "loss": 0.3483065962791443, "step": 7690, "token_acc": 0.8750521775427856 }, { "epoch": 0.4149894782280257, "grad_norm": 0.4277019500732422, "learning_rate": 1.3182762853264592e-05, "loss": 0.3854186534881592, "step": 7691, "token_acc": 0.867566365119383 }, { "epoch": 0.4150434360330222, "grad_norm": 0.44066524505615234, "learning_rate": 1.3181106119158145e-05, "loss": 0.37378358840942383, "step": 7692, "token_acc": 0.8656601448158989 }, { "epoch": 0.4150973938380187, "grad_norm": 0.46930158138275146, "learning_rate": 1.3179449287901554e-05, "loss": 0.4064811170101166, "step": 7693, "token_acc": 0.8577574243685817 }, { "epoch": 0.41515135164301514, "grad_norm": 0.3253172039985657, "learning_rate": 1.3177792359545412e-05, "loss": 0.3803098201751709, "step": 7694, "token_acc": 0.86973779167669 }, { "epoch": 0.41520530944801165, "grad_norm": 0.40635937452316284, "learning_rate": 1.3176135334140323e-05, "loss": 0.3763771057128906, "step": 7695, "token_acc": 0.8613695571269073 }, { "epoch": 0.41525926725300816, "grad_norm": 0.4393317699432373, "learning_rate": 1.3174478211736899e-05, "loss": 0.4319799244403839, "step": 7696, "token_acc": 0.8526992672349335 }, { "epoch": 0.41531322505800466, "grad_norm": 0.4321020543575287, "learning_rate": 1.3172820992385737e-05, "loss": 0.38195210695266724, "step": 7697, "token_acc": 0.8680968256269025 }, { "epoch": 0.4153671828630011, "grad_norm": 0.41461795568466187, "learning_rate": 1.3171163676137456e-05, "loss": 0.3790876269340515, "step": 7698, "token_acc": 0.8636706379128336 }, { "epoch": 0.4154211406679976, "grad_norm": 0.45730653405189514, "learning_rate": 1.3169506263042661e-05, "loss": 0.3049885928630829, "step": 7699, "token_acc": 0.8986849416558622 }, { "epoch": 0.41547509847299413, "grad_norm": 0.5435136556625366, "learning_rate": 1.316784875315198e-05, "loss": 0.3919234573841095, "step": 7700, "token_acc": 0.8584326471650415 }, { "epoch": 0.41552905627799064, "grad_norm": 0.3499218225479126, "learning_rate": 1.3166191146516027e-05, "loss": 0.3443092703819275, "step": 7701, "token_acc": 0.8813229571984436 }, { "epoch": 0.4155830140829871, "grad_norm": 0.4042758643627167, "learning_rate": 1.3164533443185425e-05, "loss": 0.37118616700172424, "step": 7702, "token_acc": 0.8675271739130435 }, { "epoch": 0.4156369718879836, "grad_norm": 0.30249738693237305, "learning_rate": 1.3162875643210802e-05, "loss": 0.3477526903152466, "step": 7703, "token_acc": 0.8797640653357531 }, { "epoch": 0.4156909296929801, "grad_norm": 0.37379035353660583, "learning_rate": 1.3161217746642785e-05, "loss": 0.3812486529350281, "step": 7704, "token_acc": 0.8677352097993224 }, { "epoch": 0.41574488749797656, "grad_norm": 0.5148494839668274, "learning_rate": 1.3159559753532007e-05, "loss": 0.3695783019065857, "step": 7705, "token_acc": 0.8708568622636877 }, { "epoch": 0.41579884530297306, "grad_norm": 0.3253045380115509, "learning_rate": 1.31579016639291e-05, "loss": 0.41562163829803467, "step": 7706, "token_acc": 0.8582420448617631 }, { "epoch": 0.41585280310796957, "grad_norm": 0.37033945322036743, "learning_rate": 1.3156243477884705e-05, "loss": 0.34734028577804565, "step": 7707, "token_acc": 0.8760939659143252 }, { "epoch": 0.4159067609129661, "grad_norm": 0.409553200006485, "learning_rate": 1.315458519544946e-05, "loss": 0.39049383997917175, "step": 7708, "token_acc": 0.8637205987170349 }, { "epoch": 0.41596071871796253, "grad_norm": 0.3881126046180725, "learning_rate": 1.315292681667401e-05, "loss": 0.3104501962661743, "step": 7709, "token_acc": 0.8875438808860913 }, { "epoch": 0.41601467652295904, "grad_norm": 0.3354957699775696, "learning_rate": 1.3151268341609e-05, "loss": 0.3969978988170624, "step": 7710, "token_acc": 0.863029315960912 }, { "epoch": 0.41606863432795554, "grad_norm": 0.4018626809120178, "learning_rate": 1.3149609770305083e-05, "loss": 0.3593297004699707, "step": 7711, "token_acc": 0.8754142629188658 }, { "epoch": 0.41612259213295205, "grad_norm": 0.3995495140552521, "learning_rate": 1.3147951102812904e-05, "loss": 0.4314614534378052, "step": 7712, "token_acc": 0.8495499878375091 }, { "epoch": 0.4161765499379485, "grad_norm": 0.29827407002449036, "learning_rate": 1.3146292339183129e-05, "loss": 0.3850187659263611, "step": 7713, "token_acc": 0.8661283467913302 }, { "epoch": 0.416230507742945, "grad_norm": 0.3701536953449249, "learning_rate": 1.3144633479466406e-05, "loss": 0.3906499743461609, "step": 7714, "token_acc": 0.8676185439802574 }, { "epoch": 0.4162844655479415, "grad_norm": 0.3475353419780731, "learning_rate": 1.3142974523713398e-05, "loss": 0.32587191462516785, "step": 7715, "token_acc": 0.8837016574585635 }, { "epoch": 0.416338423352938, "grad_norm": 0.3792470395565033, "learning_rate": 1.3141315471974778e-05, "loss": 0.38527607917785645, "step": 7716, "token_acc": 0.870832897899467 }, { "epoch": 0.4163923811579345, "grad_norm": 0.3430788815021515, "learning_rate": 1.3139656324301203e-05, "loss": 0.2835344076156616, "step": 7717, "token_acc": 0.8995952677459527 }, { "epoch": 0.416446338962931, "grad_norm": 0.3705673813819885, "learning_rate": 1.3137997080743343e-05, "loss": 0.3516806364059448, "step": 7718, "token_acc": 0.8761198208286675 }, { "epoch": 0.4165002967679275, "grad_norm": 0.4761599898338318, "learning_rate": 1.3136337741351878e-05, "loss": 0.4353192150592804, "step": 7719, "token_acc": 0.8498357578601595 }, { "epoch": 0.416554254572924, "grad_norm": 0.3860294818878174, "learning_rate": 1.3134678306177482e-05, "loss": 0.3577668070793152, "step": 7720, "token_acc": 0.8714018054845852 }, { "epoch": 0.41660821237792045, "grad_norm": 0.4268248379230499, "learning_rate": 1.3133018775270829e-05, "loss": 0.36713889241218567, "step": 7721, "token_acc": 0.8698441796516957 }, { "epoch": 0.41666217018291696, "grad_norm": 0.34203630685806274, "learning_rate": 1.3131359148682602e-05, "loss": 0.32256367802619934, "step": 7722, "token_acc": 0.8869283928833995 }, { "epoch": 0.41671612798791346, "grad_norm": 0.24835513532161713, "learning_rate": 1.312969942646349e-05, "loss": 0.3209055960178375, "step": 7723, "token_acc": 0.8902259660382404 }, { "epoch": 0.4167700857929099, "grad_norm": 0.38537850975990295, "learning_rate": 1.3128039608664173e-05, "loss": 0.35648563504219055, "step": 7724, "token_acc": 0.8715114171801377 }, { "epoch": 0.4168240435979064, "grad_norm": 0.5557768940925598, "learning_rate": 1.3126379695335351e-05, "loss": 0.4215373992919922, "step": 7725, "token_acc": 0.8553261767134599 }, { "epoch": 0.41687800140290293, "grad_norm": 0.3619753420352936, "learning_rate": 1.3124719686527709e-05, "loss": 0.3111213147640228, "step": 7726, "token_acc": 0.8895903117753157 }, { "epoch": 0.41693195920789944, "grad_norm": 0.37637773156166077, "learning_rate": 1.3123059582291948e-05, "loss": 0.34299612045288086, "step": 7727, "token_acc": 0.8742765273311897 }, { "epoch": 0.4169859170128959, "grad_norm": 0.3876894414424896, "learning_rate": 1.3121399382678764e-05, "loss": 0.3529885709285736, "step": 7728, "token_acc": 0.8808219178082192 }, { "epoch": 0.4170398748178924, "grad_norm": 0.34204623103141785, "learning_rate": 1.311973908773886e-05, "loss": 0.3059946596622467, "step": 7729, "token_acc": 0.8883758826724606 }, { "epoch": 0.4170938326228889, "grad_norm": 0.453104168176651, "learning_rate": 1.3118078697522945e-05, "loss": 0.4049864709377289, "step": 7730, "token_acc": 0.861558784676354 }, { "epoch": 0.4171477904278854, "grad_norm": 0.3343226909637451, "learning_rate": 1.311641821208172e-05, "loss": 0.33777761459350586, "step": 7731, "token_acc": 0.8817891373801917 }, { "epoch": 0.41720174823288186, "grad_norm": 0.413540244102478, "learning_rate": 1.3114757631465902e-05, "loss": 0.423360139131546, "step": 7732, "token_acc": 0.8595951417004049 }, { "epoch": 0.41725570603787837, "grad_norm": 0.4199320077896118, "learning_rate": 1.3113096955726201e-05, "loss": 0.3583078384399414, "step": 7733, "token_acc": 0.8741915614413305 }, { "epoch": 0.4173096638428749, "grad_norm": 0.41596361994743347, "learning_rate": 1.3111436184913331e-05, "loss": 0.3759942054748535, "step": 7734, "token_acc": 0.8651636363636364 }, { "epoch": 0.4173636216478714, "grad_norm": 0.38037675619125366, "learning_rate": 1.3109775319078017e-05, "loss": 0.3190307021141052, "step": 7735, "token_acc": 0.8853303471444569 }, { "epoch": 0.41741757945286784, "grad_norm": 0.4991533160209656, "learning_rate": 1.3108114358270982e-05, "loss": 0.41911906003952026, "step": 7736, "token_acc": 0.8586043112897332 }, { "epoch": 0.41747153725786434, "grad_norm": 0.4090191125869751, "learning_rate": 1.3106453302542947e-05, "loss": 0.4088401794433594, "step": 7737, "token_acc": 0.8604618015963512 }, { "epoch": 0.41752549506286085, "grad_norm": 0.41699403524398804, "learning_rate": 1.3104792151944641e-05, "loss": 0.3484342694282532, "step": 7738, "token_acc": 0.8743493089211991 }, { "epoch": 0.41757945286785736, "grad_norm": 0.3537808358669281, "learning_rate": 1.3103130906526797e-05, "loss": 0.3887905478477478, "step": 7739, "token_acc": 0.8661567877629063 }, { "epoch": 0.4176334106728538, "grad_norm": 0.3726661503314972, "learning_rate": 1.3101469566340149e-05, "loss": 0.3480495512485504, "step": 7740, "token_acc": 0.8743693239152371 }, { "epoch": 0.4176873684778503, "grad_norm": 0.3976292610168457, "learning_rate": 1.309980813143543e-05, "loss": 0.3821359872817993, "step": 7741, "token_acc": 0.8686517322944205 }, { "epoch": 0.4177413262828468, "grad_norm": 0.2911889851093292, "learning_rate": 1.3098146601863384e-05, "loss": 0.3646475672721863, "step": 7742, "token_acc": 0.867068216837092 }, { "epoch": 0.41779528408784333, "grad_norm": 0.3628210723400116, "learning_rate": 1.3096484977674756e-05, "loss": 0.3777408003807068, "step": 7743, "token_acc": 0.8682278481012659 }, { "epoch": 0.4178492418928398, "grad_norm": 0.48579666018486023, "learning_rate": 1.3094823258920283e-05, "loss": 0.38124924898147583, "step": 7744, "token_acc": 0.8649592549476135 }, { "epoch": 0.4179031996978363, "grad_norm": 0.4319723844528198, "learning_rate": 1.3093161445650719e-05, "loss": 0.3650226593017578, "step": 7745, "token_acc": 0.8748541423570595 }, { "epoch": 0.4179571575028328, "grad_norm": 0.3643532395362854, "learning_rate": 1.3091499537916817e-05, "loss": 0.4000534415245056, "step": 7746, "token_acc": 0.8640563640563641 }, { "epoch": 0.41801111530782925, "grad_norm": 0.3843685984611511, "learning_rate": 1.3089837535769328e-05, "loss": 0.33336588740348816, "step": 7747, "token_acc": 0.8847902981303689 }, { "epoch": 0.41806507311282576, "grad_norm": 0.39206621050834656, "learning_rate": 1.308817543925901e-05, "loss": 0.3298514783382416, "step": 7748, "token_acc": 0.8832588941937471 }, { "epoch": 0.41811903091782227, "grad_norm": 0.37604033946990967, "learning_rate": 1.3086513248436623e-05, "loss": 0.3583422303199768, "step": 7749, "token_acc": 0.8728655282817502 }, { "epoch": 0.4181729887228188, "grad_norm": 0.41568735241889954, "learning_rate": 1.3084850963352928e-05, "loss": 0.36313626170158386, "step": 7750, "token_acc": 0.8723627309657974 }, { "epoch": 0.4182269465278152, "grad_norm": 0.3090169131755829, "learning_rate": 1.3083188584058695e-05, "loss": 0.31469839811325073, "step": 7751, "token_acc": 0.8862282582797236 }, { "epoch": 0.41828090433281173, "grad_norm": 0.4081467092037201, "learning_rate": 1.3081526110604693e-05, "loss": 0.36270543932914734, "step": 7752, "token_acc": 0.875280143433438 }, { "epoch": 0.41833486213780824, "grad_norm": 0.47581011056900024, "learning_rate": 1.3079863543041685e-05, "loss": 0.4023570418357849, "step": 7753, "token_acc": 0.8620391705069125 }, { "epoch": 0.41838881994280475, "grad_norm": 0.3843471109867096, "learning_rate": 1.3078200881420455e-05, "loss": 0.43302905559539795, "step": 7754, "token_acc": 0.8534495494164573 }, { "epoch": 0.4184427777478012, "grad_norm": 0.32551130652427673, "learning_rate": 1.3076538125791775e-05, "loss": 0.36659669876098633, "step": 7755, "token_acc": 0.8703925681750075 }, { "epoch": 0.4184967355527977, "grad_norm": 0.3885513246059418, "learning_rate": 1.3074875276206428e-05, "loss": 0.39788633584976196, "step": 7756, "token_acc": 0.8656805617809652 }, { "epoch": 0.4185506933577942, "grad_norm": 0.44446876645088196, "learning_rate": 1.3073212332715196e-05, "loss": 0.42266613245010376, "step": 7757, "token_acc": 0.8550855085508551 }, { "epoch": 0.4186046511627907, "grad_norm": 0.48390793800354004, "learning_rate": 1.3071549295368862e-05, "loss": 0.3836355209350586, "step": 7758, "token_acc": 0.8691502463054187 }, { "epoch": 0.41865860896778717, "grad_norm": 0.34531930088996887, "learning_rate": 1.3069886164218218e-05, "loss": 0.3722517788410187, "step": 7759, "token_acc": 0.8732655959794603 }, { "epoch": 0.4187125667727837, "grad_norm": 0.33371809124946594, "learning_rate": 1.3068222939314053e-05, "loss": 0.34054267406463623, "step": 7760, "token_acc": 0.8792298051726102 }, { "epoch": 0.4187665245777802, "grad_norm": 0.42207637429237366, "learning_rate": 1.3066559620707166e-05, "loss": 0.3638879656791687, "step": 7761, "token_acc": 0.865450204638472 }, { "epoch": 0.4188204823827767, "grad_norm": 0.4089109003543854, "learning_rate": 1.3064896208448352e-05, "loss": 0.32818686962127686, "step": 7762, "token_acc": 0.880835781913157 }, { "epoch": 0.41887444018777314, "grad_norm": 0.44509413838386536, "learning_rate": 1.3063232702588408e-05, "loss": 0.3866974711418152, "step": 7763, "token_acc": 0.865669014084507 }, { "epoch": 0.41892839799276965, "grad_norm": 0.4481261074542999, "learning_rate": 1.3061569103178143e-05, "loss": 0.39740362763404846, "step": 7764, "token_acc": 0.8639662224567884 }, { "epoch": 0.41898235579776616, "grad_norm": 0.2948492765426636, "learning_rate": 1.3059905410268359e-05, "loss": 0.32906562089920044, "step": 7765, "token_acc": 0.8874468085106383 }, { "epoch": 0.41903631360276267, "grad_norm": 0.4459044337272644, "learning_rate": 1.305824162390986e-05, "loss": 0.4070526659488678, "step": 7766, "token_acc": 0.8607594936708861 }, { "epoch": 0.4190902714077591, "grad_norm": 0.37590357661247253, "learning_rate": 1.3056577744153467e-05, "loss": 0.36370062828063965, "step": 7767, "token_acc": 0.8734494979326639 }, { "epoch": 0.4191442292127556, "grad_norm": 0.4400542974472046, "learning_rate": 1.3054913771049992e-05, "loss": 0.3899100422859192, "step": 7768, "token_acc": 0.868326892800916 }, { "epoch": 0.41919818701775213, "grad_norm": 0.47329020500183105, "learning_rate": 1.305324970465025e-05, "loss": 0.4047422409057617, "step": 7769, "token_acc": 0.860060405580325 }, { "epoch": 0.4192521448227486, "grad_norm": 0.3602716028690338, "learning_rate": 1.3051585545005059e-05, "loss": 0.40637603402137756, "step": 7770, "token_acc": 0.8605529332434255 }, { "epoch": 0.4193061026277451, "grad_norm": 0.3646676540374756, "learning_rate": 1.3049921292165246e-05, "loss": 0.31244567036628723, "step": 7771, "token_acc": 0.8840881485337497 }, { "epoch": 0.4193600604327416, "grad_norm": 0.418575644493103, "learning_rate": 1.3048256946181642e-05, "loss": 0.3721466362476349, "step": 7772, "token_acc": 0.8663528740550563 }, { "epoch": 0.4194140182377381, "grad_norm": 0.36455145478248596, "learning_rate": 1.304659250710506e-05, "loss": 0.4168360233306885, "step": 7773, "token_acc": 0.8531905872376511 }, { "epoch": 0.41946797604273456, "grad_norm": 0.40569964051246643, "learning_rate": 1.3044927974986346e-05, "loss": 0.3557087779045105, "step": 7774, "token_acc": 0.8763169288497619 }, { "epoch": 0.41952193384773107, "grad_norm": 0.44894251227378845, "learning_rate": 1.304326334987633e-05, "loss": 0.44443491101264954, "step": 7775, "token_acc": 0.8491035204816123 }, { "epoch": 0.4195758916527276, "grad_norm": 0.41243934631347656, "learning_rate": 1.3041598631825845e-05, "loss": 0.348451167345047, "step": 7776, "token_acc": 0.8757980266976204 }, { "epoch": 0.4196298494577241, "grad_norm": 0.3774137496948242, "learning_rate": 1.3039933820885735e-05, "loss": 0.3685413599014282, "step": 7777, "token_acc": 0.8698069498069498 }, { "epoch": 0.41968380726272053, "grad_norm": 0.3388616740703583, "learning_rate": 1.3038268917106844e-05, "loss": 0.3827994167804718, "step": 7778, "token_acc": 0.8664896939412867 }, { "epoch": 0.41973776506771704, "grad_norm": 0.39737892150878906, "learning_rate": 1.3036603920540016e-05, "loss": 0.3735301196575165, "step": 7779, "token_acc": 0.8695242581252944 }, { "epoch": 0.41979172287271355, "grad_norm": 0.38272786140441895, "learning_rate": 1.3034938831236101e-05, "loss": 0.4223582148551941, "step": 7780, "token_acc": 0.8558394160583942 }, { "epoch": 0.41984568067771005, "grad_norm": 0.4022790491580963, "learning_rate": 1.3033273649245949e-05, "loss": 0.3868817687034607, "step": 7781, "token_acc": 0.87059765208111 }, { "epoch": 0.4198996384827065, "grad_norm": 0.4011508822441101, "learning_rate": 1.303160837462041e-05, "loss": 0.4159237742424011, "step": 7782, "token_acc": 0.8581675463526864 }, { "epoch": 0.419953596287703, "grad_norm": 0.35659581422805786, "learning_rate": 1.3029943007410349e-05, "loss": 0.3635980784893036, "step": 7783, "token_acc": 0.8689935064935065 }, { "epoch": 0.4200075540926995, "grad_norm": 0.3291989862918854, "learning_rate": 1.3028277547666624e-05, "loss": 0.365611732006073, "step": 7784, "token_acc": 0.8752390057361377 }, { "epoch": 0.420061511897696, "grad_norm": 0.2975606322288513, "learning_rate": 1.3026611995440092e-05, "loss": 0.32641756534576416, "step": 7785, "token_acc": 0.881562302340291 }, { "epoch": 0.4201154697026925, "grad_norm": 0.3396089971065521, "learning_rate": 1.3024946350781624e-05, "loss": 0.3282656669616699, "step": 7786, "token_acc": 0.8815015829941203 }, { "epoch": 0.420169427507689, "grad_norm": 0.40557926893234253, "learning_rate": 1.3023280613742086e-05, "loss": 0.3178909420967102, "step": 7787, "token_acc": 0.8797709923664122 }, { "epoch": 0.4202233853126855, "grad_norm": 0.42603567242622375, "learning_rate": 1.3021614784372354e-05, "loss": 0.4169682562351227, "step": 7788, "token_acc": 0.8580237397872669 }, { "epoch": 0.42027734311768195, "grad_norm": 0.33244383335113525, "learning_rate": 1.3019948862723296e-05, "loss": 0.35947397351264954, "step": 7789, "token_acc": 0.869095442582075 }, { "epoch": 0.42033130092267845, "grad_norm": 0.4031151235103607, "learning_rate": 1.3018282848845789e-05, "loss": 0.38642024993896484, "step": 7790, "token_acc": 0.8632818248712288 }, { "epoch": 0.42038525872767496, "grad_norm": 0.40425652265548706, "learning_rate": 1.3016616742790717e-05, "loss": 0.3177248239517212, "step": 7791, "token_acc": 0.8842767295597485 }, { "epoch": 0.42043921653267147, "grad_norm": 0.36196959018707275, "learning_rate": 1.3014950544608957e-05, "loss": 0.42441558837890625, "step": 7792, "token_acc": 0.8590010405827263 }, { "epoch": 0.4204931743376679, "grad_norm": 0.3099421560764313, "learning_rate": 1.30132842543514e-05, "loss": 0.36032038927078247, "step": 7793, "token_acc": 0.8776514707605766 }, { "epoch": 0.4205471321426644, "grad_norm": 0.5355786085128784, "learning_rate": 1.301161787206893e-05, "loss": 0.392574667930603, "step": 7794, "token_acc": 0.867297581493165 }, { "epoch": 0.42060108994766093, "grad_norm": 0.34908682107925415, "learning_rate": 1.3009951397812437e-05, "loss": 0.3650532364845276, "step": 7795, "token_acc": 0.8739346139168045 }, { "epoch": 0.42065504775265744, "grad_norm": 0.2574402391910553, "learning_rate": 1.3008284831632821e-05, "loss": 0.33475613594055176, "step": 7796, "token_acc": 0.8836096194256362 }, { "epoch": 0.4207090055576539, "grad_norm": 0.4588717818260193, "learning_rate": 1.3006618173580974e-05, "loss": 0.3960421085357666, "step": 7797, "token_acc": 0.8641625932595832 }, { "epoch": 0.4207629633626504, "grad_norm": 0.3957550823688507, "learning_rate": 1.3004951423707792e-05, "loss": 0.3831605315208435, "step": 7798, "token_acc": 0.8669762360010926 }, { "epoch": 0.4208169211676469, "grad_norm": 0.39633235335350037, "learning_rate": 1.3003284582064181e-05, "loss": 0.3752399981021881, "step": 7799, "token_acc": 0.8654171147641727 }, { "epoch": 0.4208708789726434, "grad_norm": 0.3702443540096283, "learning_rate": 1.3001617648701047e-05, "loss": 0.36759185791015625, "step": 7800, "token_acc": 0.8718197237702932 }, { "epoch": 0.42092483677763987, "grad_norm": 0.4024631083011627, "learning_rate": 1.29999506236693e-05, "loss": 0.3582608103752136, "step": 7801, "token_acc": 0.8718584656084656 }, { "epoch": 0.4209787945826364, "grad_norm": 0.3625473082065582, "learning_rate": 1.2998283507019842e-05, "loss": 0.38024553656578064, "step": 7802, "token_acc": 0.8700703148884134 }, { "epoch": 0.4210327523876329, "grad_norm": 0.43070176243782043, "learning_rate": 1.2996616298803592e-05, "loss": 0.3234976530075073, "step": 7803, "token_acc": 0.8863535666814355 }, { "epoch": 0.4210867101926294, "grad_norm": 0.4192532002925873, "learning_rate": 1.2994948999071466e-05, "loss": 0.38709351420402527, "step": 7804, "token_acc": 0.8660762696547984 }, { "epoch": 0.42114066799762584, "grad_norm": 0.35174891352653503, "learning_rate": 1.2993281607874382e-05, "loss": 0.35420095920562744, "step": 7805, "token_acc": 0.8678699366216589 }, { "epoch": 0.42119462580262235, "grad_norm": 0.3049324154853821, "learning_rate": 1.2991614125263263e-05, "loss": 0.3774300515651703, "step": 7806, "token_acc": 0.8693574751666038 }, { "epoch": 0.42124858360761885, "grad_norm": 0.29076340794563293, "learning_rate": 1.2989946551289032e-05, "loss": 0.3420426547527313, "step": 7807, "token_acc": 0.8826606026151222 }, { "epoch": 0.42130254141261536, "grad_norm": 0.30283769965171814, "learning_rate": 1.2988278886002613e-05, "loss": 0.38316893577575684, "step": 7808, "token_acc": 0.8604331385355792 }, { "epoch": 0.4213564992176118, "grad_norm": 0.3646112382411957, "learning_rate": 1.2986611129454943e-05, "loss": 0.36653465032577515, "step": 7809, "token_acc": 0.8720548336427245 }, { "epoch": 0.4214104570226083, "grad_norm": 0.36207568645477295, "learning_rate": 1.2984943281696952e-05, "loss": 0.4320226311683655, "step": 7810, "token_acc": 0.8541593746353985 }, { "epoch": 0.4214644148276048, "grad_norm": 0.4079325497150421, "learning_rate": 1.2983275342779571e-05, "loss": 0.3442176580429077, "step": 7811, "token_acc": 0.8748619220451318 }, { "epoch": 0.4215183726326013, "grad_norm": 0.41889119148254395, "learning_rate": 1.2981607312753746e-05, "loss": 0.36693644523620605, "step": 7812, "token_acc": 0.870011911852293 }, { "epoch": 0.4215723304375978, "grad_norm": 0.3405568301677704, "learning_rate": 1.2979939191670416e-05, "loss": 0.34709036350250244, "step": 7813, "token_acc": 0.8778268433025559 }, { "epoch": 0.4216262882425943, "grad_norm": 0.31156453490257263, "learning_rate": 1.2978270979580522e-05, "loss": 0.3532372713088989, "step": 7814, "token_acc": 0.8778215223097113 }, { "epoch": 0.4216802460475908, "grad_norm": 0.4550286829471588, "learning_rate": 1.2976602676535013e-05, "loss": 0.43580085039138794, "step": 7815, "token_acc": 0.851692103516921 }, { "epoch": 0.42173420385258725, "grad_norm": 0.4375947117805481, "learning_rate": 1.2974934282584836e-05, "loss": 0.38955357670783997, "step": 7816, "token_acc": 0.8716617210682492 }, { "epoch": 0.42178816165758376, "grad_norm": 0.4278637766838074, "learning_rate": 1.2973265797780947e-05, "loss": 0.3570215702056885, "step": 7817, "token_acc": 0.8750530635347389 }, { "epoch": 0.42184211946258027, "grad_norm": 0.36096006631851196, "learning_rate": 1.2971597222174297e-05, "loss": 0.32843875885009766, "step": 7818, "token_acc": 0.8812908182865924 }, { "epoch": 0.4218960772675768, "grad_norm": 0.47388479113578796, "learning_rate": 1.296992855581585e-05, "loss": 0.3956759572029114, "step": 7819, "token_acc": 0.863579023247432 }, { "epoch": 0.4219500350725732, "grad_norm": 0.3754573464393616, "learning_rate": 1.2968259798756562e-05, "loss": 0.32645416259765625, "step": 7820, "token_acc": 0.8823188405797101 }, { "epoch": 0.42200399287756973, "grad_norm": 0.3638451099395752, "learning_rate": 1.2966590951047397e-05, "loss": 0.3674909472465515, "step": 7821, "token_acc": 0.8743934715483017 }, { "epoch": 0.42205795068256624, "grad_norm": 0.40218037366867065, "learning_rate": 1.296492201273932e-05, "loss": 0.394608736038208, "step": 7822, "token_acc": 0.8646696661141369 }, { "epoch": 0.42211190848756275, "grad_norm": 0.3170340061187744, "learning_rate": 1.2963252983883306e-05, "loss": 0.3793843984603882, "step": 7823, "token_acc": 0.8612134610660771 }, { "epoch": 0.4221658662925592, "grad_norm": 0.39749810099601746, "learning_rate": 1.2961583864530317e-05, "loss": 0.43203580379486084, "step": 7824, "token_acc": 0.8563899868247694 }, { "epoch": 0.4222198240975557, "grad_norm": 0.3797364830970764, "learning_rate": 1.2959914654731334e-05, "loss": 0.37121570110321045, "step": 7825, "token_acc": 0.8680126356269743 }, { "epoch": 0.4222737819025522, "grad_norm": 0.38699185848236084, "learning_rate": 1.2958245354537333e-05, "loss": 0.34296363592147827, "step": 7826, "token_acc": 0.8806491603741688 }, { "epoch": 0.4223277397075487, "grad_norm": 0.3834724724292755, "learning_rate": 1.2956575963999294e-05, "loss": 0.3641825318336487, "step": 7827, "token_acc": 0.8717980447964361 }, { "epoch": 0.4223816975125452, "grad_norm": 0.38949054479599, "learning_rate": 1.29549064831682e-05, "loss": 0.36169418692588806, "step": 7828, "token_acc": 0.8732080723729994 }, { "epoch": 0.4224356553175417, "grad_norm": 0.33785420656204224, "learning_rate": 1.2953236912095035e-05, "loss": 0.3824545741081238, "step": 7829, "token_acc": 0.864694014794889 }, { "epoch": 0.4224896131225382, "grad_norm": 0.46062740683555603, "learning_rate": 1.295156725083079e-05, "loss": 0.36441922187805176, "step": 7830, "token_acc": 0.869498366327119 }, { "epoch": 0.4225435709275347, "grad_norm": 0.41032373905181885, "learning_rate": 1.2949897499426453e-05, "loss": 0.3535921275615692, "step": 7831, "token_acc": 0.8778430486519054 }, { "epoch": 0.42259752873253115, "grad_norm": 0.38672760128974915, "learning_rate": 1.2948227657933021e-05, "loss": 0.39627140760421753, "step": 7832, "token_acc": 0.864304519526108 }, { "epoch": 0.42265148653752765, "grad_norm": 0.3700767159461975, "learning_rate": 1.2946557726401486e-05, "loss": 0.42127084732055664, "step": 7833, "token_acc": 0.8551162485222645 }, { "epoch": 0.42270544434252416, "grad_norm": 0.4517193138599396, "learning_rate": 1.2944887704882851e-05, "loss": 0.36666709184646606, "step": 7834, "token_acc": 0.871919810664068 }, { "epoch": 0.4227594021475206, "grad_norm": 0.3640618622303009, "learning_rate": 1.2943217593428116e-05, "loss": 0.4094918370246887, "step": 7835, "token_acc": 0.8594298553573936 }, { "epoch": 0.4228133599525171, "grad_norm": 0.425191730260849, "learning_rate": 1.294154739208829e-05, "loss": 0.35731154680252075, "step": 7836, "token_acc": 0.8738845925044616 }, { "epoch": 0.4228673177575136, "grad_norm": 0.4224002957344055, "learning_rate": 1.2939877100914374e-05, "loss": 0.42686086893081665, "step": 7837, "token_acc": 0.8531031403482661 }, { "epoch": 0.42292127556251013, "grad_norm": 0.39300450682640076, "learning_rate": 1.2938206719957382e-05, "loss": 0.39879095554351807, "step": 7838, "token_acc": 0.8590706556333545 }, { "epoch": 0.4229752333675066, "grad_norm": 0.3039762079715729, "learning_rate": 1.2936536249268328e-05, "loss": 0.31758156418800354, "step": 7839, "token_acc": 0.8892483554405263 }, { "epoch": 0.4230291911725031, "grad_norm": 0.3707265555858612, "learning_rate": 1.2934865688898223e-05, "loss": 0.3692198693752289, "step": 7840, "token_acc": 0.873085031623331 }, { "epoch": 0.4230831489774996, "grad_norm": 0.40101906657218933, "learning_rate": 1.2933195038898092e-05, "loss": 0.3464616537094116, "step": 7841, "token_acc": 0.8776135544340303 }, { "epoch": 0.4231371067824961, "grad_norm": 0.4431155025959015, "learning_rate": 1.2931524299318952e-05, "loss": 0.3834015429019928, "step": 7842, "token_acc": 0.8632855567805954 }, { "epoch": 0.42319106458749256, "grad_norm": 0.3461475670337677, "learning_rate": 1.2929853470211829e-05, "loss": 0.30233266949653625, "step": 7843, "token_acc": 0.8905524779158557 }, { "epoch": 0.42324502239248907, "grad_norm": 0.412699431180954, "learning_rate": 1.2928182551627746e-05, "loss": 0.3328477740287781, "step": 7844, "token_acc": 0.8793588301462317 }, { "epoch": 0.4232989801974856, "grad_norm": 0.4005691111087799, "learning_rate": 1.2926511543617735e-05, "loss": 0.41209647059440613, "step": 7845, "token_acc": 0.8576266770874039 }, { "epoch": 0.4233529380024821, "grad_norm": 0.3677263557910919, "learning_rate": 1.292484044623283e-05, "loss": 0.38976529240608215, "step": 7846, "token_acc": 0.8679132086791321 }, { "epoch": 0.42340689580747853, "grad_norm": 0.48625802993774414, "learning_rate": 1.2923169259524065e-05, "loss": 0.3866838216781616, "step": 7847, "token_acc": 0.8655027481409635 }, { "epoch": 0.42346085361247504, "grad_norm": 0.35542750358581543, "learning_rate": 1.2921497983542474e-05, "loss": 0.37436971068382263, "step": 7848, "token_acc": 0.868840579710145 }, { "epoch": 0.42351481141747155, "grad_norm": 0.3489699363708496, "learning_rate": 1.2919826618339102e-05, "loss": 0.40589576959609985, "step": 7849, "token_acc": 0.8588891841615732 }, { "epoch": 0.42356876922246806, "grad_norm": 0.39252185821533203, "learning_rate": 1.2918155163964987e-05, "loss": 0.34825319051742554, "step": 7850, "token_acc": 0.8764298372804897 }, { "epoch": 0.4236227270274645, "grad_norm": 0.34287676215171814, "learning_rate": 1.291648362047118e-05, "loss": 0.3209485113620758, "step": 7851, "token_acc": 0.8852346694073313 }, { "epoch": 0.423676684832461, "grad_norm": 0.2905100882053375, "learning_rate": 1.2914811987908727e-05, "loss": 0.29720866680145264, "step": 7852, "token_acc": 0.8926053697315134 }, { "epoch": 0.4237306426374575, "grad_norm": 0.38268759846687317, "learning_rate": 1.2913140266328676e-05, "loss": 0.3932405710220337, "step": 7853, "token_acc": 0.8641995981245814 }, { "epoch": 0.423784600442454, "grad_norm": 0.3934105932712555, "learning_rate": 1.291146845578209e-05, "loss": 0.3574950397014618, "step": 7854, "token_acc": 0.8759511844938981 }, { "epoch": 0.4238385582474505, "grad_norm": 0.3809189200401306, "learning_rate": 1.2909796556320019e-05, "loss": 0.3925507068634033, "step": 7855, "token_acc": 0.8698508522727273 }, { "epoch": 0.423892516052447, "grad_norm": 0.3297101855278015, "learning_rate": 1.2908124567993521e-05, "loss": 0.3935331106185913, "step": 7856, "token_acc": 0.8635118541607631 }, { "epoch": 0.4239464738574435, "grad_norm": 0.40065526962280273, "learning_rate": 1.290645249085366e-05, "loss": 0.3760925233364105, "step": 7857, "token_acc": 0.8705898566703418 }, { "epoch": 0.42400043166243995, "grad_norm": 0.433621883392334, "learning_rate": 1.2904780324951504e-05, "loss": 0.3918322026729584, "step": 7858, "token_acc": 0.8611625323954091 }, { "epoch": 0.42405438946743645, "grad_norm": 0.3618287742137909, "learning_rate": 1.2903108070338118e-05, "loss": 0.4251025915145874, "step": 7859, "token_acc": 0.849936305732484 }, { "epoch": 0.42410834727243296, "grad_norm": 0.28267139196395874, "learning_rate": 1.2901435727064568e-05, "loss": 0.315746009349823, "step": 7860, "token_acc": 0.8881614839061648 }, { "epoch": 0.42416230507742947, "grad_norm": 0.5105139017105103, "learning_rate": 1.2899763295181936e-05, "loss": 0.41290077567100525, "step": 7861, "token_acc": 0.8599377501111605 }, { "epoch": 0.4242162628824259, "grad_norm": 0.30183687806129456, "learning_rate": 1.289809077474129e-05, "loss": 0.39464500546455383, "step": 7862, "token_acc": 0.8657476139978791 }, { "epoch": 0.42427022068742243, "grad_norm": 0.3302993178367615, "learning_rate": 1.2896418165793711e-05, "loss": 0.4051913022994995, "step": 7863, "token_acc": 0.8645314353499407 }, { "epoch": 0.42432417849241894, "grad_norm": 0.3608742952346802, "learning_rate": 1.2894745468390282e-05, "loss": 0.40989208221435547, "step": 7864, "token_acc": 0.8583171594930928 }, { "epoch": 0.42437813629741544, "grad_norm": 0.39420509338378906, "learning_rate": 1.2893072682582082e-05, "loss": 0.3724495768547058, "step": 7865, "token_acc": 0.8673417721518988 }, { "epoch": 0.4244320941024119, "grad_norm": 0.3474689722061157, "learning_rate": 1.28913998084202e-05, "loss": 0.3602408766746521, "step": 7866, "token_acc": 0.871658465221439 }, { "epoch": 0.4244860519074084, "grad_norm": 0.36932262778282166, "learning_rate": 1.2889726845955725e-05, "loss": 0.3348526954650879, "step": 7867, "token_acc": 0.8813231920801642 }, { "epoch": 0.4245400097124049, "grad_norm": 0.3725346028804779, "learning_rate": 1.2888053795239752e-05, "loss": 0.3580177426338196, "step": 7868, "token_acc": 0.8713595402899308 }, { "epoch": 0.4245939675174014, "grad_norm": 0.48925891518592834, "learning_rate": 1.288638065632337e-05, "loss": 0.3936465382575989, "step": 7869, "token_acc": 0.8616525423728814 }, { "epoch": 0.42464792532239787, "grad_norm": 0.37472572922706604, "learning_rate": 1.288470742925768e-05, "loss": 0.31033217906951904, "step": 7870, "token_acc": 0.8876101165103722 }, { "epoch": 0.4247018831273944, "grad_norm": 0.3649803400039673, "learning_rate": 1.2883034114093784e-05, "loss": 0.30901169776916504, "step": 7871, "token_acc": 0.8819924575738529 }, { "epoch": 0.4247558409323909, "grad_norm": 0.44961073994636536, "learning_rate": 1.2881360710882778e-05, "loss": 0.36253753304481506, "step": 7872, "token_acc": 0.8726854980402757 }, { "epoch": 0.4248097987373874, "grad_norm": 0.4219925105571747, "learning_rate": 1.287968721967577e-05, "loss": 0.40122318267822266, "step": 7873, "token_acc": 0.8585048426150121 }, { "epoch": 0.42486375654238384, "grad_norm": 0.47774818539619446, "learning_rate": 1.287801364052387e-05, "loss": 0.3613976240158081, "step": 7874, "token_acc": 0.8743210621605311 }, { "epoch": 0.42491771434738035, "grad_norm": 0.34322983026504517, "learning_rate": 1.2876339973478187e-05, "loss": 0.3315860629081726, "step": 7875, "token_acc": 0.8858199468392465 }, { "epoch": 0.42497167215237686, "grad_norm": 0.30737027525901794, "learning_rate": 1.2874666218589833e-05, "loss": 0.3599165678024292, "step": 7876, "token_acc": 0.876103714085494 }, { "epoch": 0.4250256299573733, "grad_norm": 0.44829410314559937, "learning_rate": 1.2872992375909927e-05, "loss": 0.40784916281700134, "step": 7877, "token_acc": 0.8598145780051151 }, { "epoch": 0.4250795877623698, "grad_norm": 0.36547085642814636, "learning_rate": 1.2871318445489588e-05, "loss": 0.3609381914138794, "step": 7878, "token_acc": 0.8703531598513011 }, { "epoch": 0.4251335455673663, "grad_norm": 0.46846479177474976, "learning_rate": 1.2869644427379935e-05, "loss": 0.45029619336128235, "step": 7879, "token_acc": 0.845867658782886 }, { "epoch": 0.42518750337236283, "grad_norm": 0.3101542294025421, "learning_rate": 1.2867970321632095e-05, "loss": 0.3651021420955658, "step": 7880, "token_acc": 0.8713217559093102 }, { "epoch": 0.4252414611773593, "grad_norm": 0.5203626155853271, "learning_rate": 1.2866296128297189e-05, "loss": 0.40033236145973206, "step": 7881, "token_acc": 0.862101609340486 }, { "epoch": 0.4252954189823558, "grad_norm": 0.2755293846130371, "learning_rate": 1.2864621847426351e-05, "loss": 0.32567858695983887, "step": 7882, "token_acc": 0.8862121988723731 }, { "epoch": 0.4253493767873523, "grad_norm": 0.3532285988330841, "learning_rate": 1.2862947479070714e-05, "loss": 0.4458813965320587, "step": 7883, "token_acc": 0.8512769860307605 }, { "epoch": 0.4254033345923488, "grad_norm": 0.46844181418418884, "learning_rate": 1.2861273023281408e-05, "loss": 0.34139132499694824, "step": 7884, "token_acc": 0.8762078395624431 }, { "epoch": 0.42545729239734525, "grad_norm": 0.5127772688865662, "learning_rate": 1.2859598480109575e-05, "loss": 0.39590221643447876, "step": 7885, "token_acc": 0.8634651600753296 }, { "epoch": 0.42551125020234176, "grad_norm": 0.29132112860679626, "learning_rate": 1.2857923849606353e-05, "loss": 0.3358769416809082, "step": 7886, "token_acc": 0.8791874075070631 }, { "epoch": 0.42556520800733827, "grad_norm": 0.38302290439605713, "learning_rate": 1.2856249131822887e-05, "loss": 0.38481196761131287, "step": 7887, "token_acc": 0.8631578947368421 }, { "epoch": 0.4256191658123348, "grad_norm": 0.34583181142807007, "learning_rate": 1.285457432681032e-05, "loss": 0.38235437870025635, "step": 7888, "token_acc": 0.8693550614556 }, { "epoch": 0.42567312361733123, "grad_norm": 0.4974357783794403, "learning_rate": 1.28528994346198e-05, "loss": 0.4096348285675049, "step": 7889, "token_acc": 0.8585961342828077 }, { "epoch": 0.42572708142232774, "grad_norm": 0.304203063249588, "learning_rate": 1.285122445530248e-05, "loss": 0.3711355924606323, "step": 7890, "token_acc": 0.8693662777396477 }, { "epoch": 0.42578103922732424, "grad_norm": 0.428071528673172, "learning_rate": 1.2849549388909513e-05, "loss": 0.3889850378036499, "step": 7891, "token_acc": 0.8700482385616138 }, { "epoch": 0.42583499703232075, "grad_norm": 0.338191419839859, "learning_rate": 1.2847874235492047e-05, "loss": 0.3651043176651001, "step": 7892, "token_acc": 0.8739025321287696 }, { "epoch": 0.4258889548373172, "grad_norm": 0.434182733297348, "learning_rate": 1.2846198995101256e-05, "loss": 0.39294910430908203, "step": 7893, "token_acc": 0.8655632494511062 }, { "epoch": 0.4259429126423137, "grad_norm": 0.40047067403793335, "learning_rate": 1.284452366778829e-05, "loss": 0.3801133930683136, "step": 7894, "token_acc": 0.8635769015137358 }, { "epoch": 0.4259968704473102, "grad_norm": 0.5259035229682922, "learning_rate": 1.2842848253604319e-05, "loss": 0.3812277019023895, "step": 7895, "token_acc": 0.86295025728988 }, { "epoch": 0.4260508282523067, "grad_norm": 0.3183748126029968, "learning_rate": 1.2841172752600505e-05, "loss": 0.37476032972335815, "step": 7896, "token_acc": 0.8709084644538877 }, { "epoch": 0.4261047860573032, "grad_norm": 0.4482334852218628, "learning_rate": 1.2839497164828019e-05, "loss": 0.3651006817817688, "step": 7897, "token_acc": 0.871661463753035 }, { "epoch": 0.4261587438622997, "grad_norm": 0.3529897630214691, "learning_rate": 1.2837821490338032e-05, "loss": 0.3437930941581726, "step": 7898, "token_acc": 0.8801385681293302 }, { "epoch": 0.4262127016672962, "grad_norm": 0.4007163941860199, "learning_rate": 1.2836145729181723e-05, "loss": 0.36621353030204773, "step": 7899, "token_acc": 0.8685933061627763 }, { "epoch": 0.42626665947229264, "grad_norm": 0.411546915769577, "learning_rate": 1.2834469881410265e-05, "loss": 0.40698447823524475, "step": 7900, "token_acc": 0.8593728973220294 }, { "epoch": 0.42632061727728915, "grad_norm": 0.35061773657798767, "learning_rate": 1.2832793947074839e-05, "loss": 0.33998769521713257, "step": 7901, "token_acc": 0.8774888063256169 }, { "epoch": 0.42637457508228566, "grad_norm": 0.3365749716758728, "learning_rate": 1.2831117926226628e-05, "loss": 0.38161054253578186, "step": 7902, "token_acc": 0.8710627921154237 }, { "epoch": 0.42642853288728216, "grad_norm": 0.43768373131752014, "learning_rate": 1.2829441818916819e-05, "loss": 0.4294816851615906, "step": 7903, "token_acc": 0.8539229671897289 }, { "epoch": 0.4264824906922786, "grad_norm": 0.36906060576438904, "learning_rate": 1.2827765625196598e-05, "loss": 0.4431483745574951, "step": 7904, "token_acc": 0.8478927638305124 }, { "epoch": 0.4265364484972751, "grad_norm": 0.4043881297111511, "learning_rate": 1.2826089345117155e-05, "loss": 0.3730815649032593, "step": 7905, "token_acc": 0.8679656628968524 }, { "epoch": 0.42659040630227163, "grad_norm": 0.4496191740036011, "learning_rate": 1.2824412978729684e-05, "loss": 0.44677844643592834, "step": 7906, "token_acc": 0.8464858199753391 }, { "epoch": 0.42664436410726814, "grad_norm": 0.3575296401977539, "learning_rate": 1.2822736526085379e-05, "loss": 0.34023258090019226, "step": 7907, "token_acc": 0.8793270160510539 }, { "epoch": 0.4266983219122646, "grad_norm": 0.3983736038208008, "learning_rate": 1.2821059987235441e-05, "loss": 0.36076703667640686, "step": 7908, "token_acc": 0.8706449221645663 }, { "epoch": 0.4267522797172611, "grad_norm": 0.41573092341423035, "learning_rate": 1.2819383362231068e-05, "loss": 0.3814961910247803, "step": 7909, "token_acc": 0.8616004026170105 }, { "epoch": 0.4268062375222576, "grad_norm": 0.3643968403339386, "learning_rate": 1.2817706651123467e-05, "loss": 0.37193942070007324, "step": 7910, "token_acc": 0.8707732634338139 }, { "epoch": 0.4268601953272541, "grad_norm": 0.47792476415634155, "learning_rate": 1.2816029853963843e-05, "loss": 0.40205812454223633, "step": 7911, "token_acc": 0.8542189611894188 }, { "epoch": 0.42691415313225056, "grad_norm": 0.4054529070854187, "learning_rate": 1.2814352970803408e-05, "loss": 0.3755355477333069, "step": 7912, "token_acc": 0.8689674523007856 }, { "epoch": 0.42696811093724707, "grad_norm": 0.4229845106601715, "learning_rate": 1.2812676001693367e-05, "loss": 0.3590734899044037, "step": 7913, "token_acc": 0.872181551976574 }, { "epoch": 0.4270220687422436, "grad_norm": 0.3761841356754303, "learning_rate": 1.2810998946684938e-05, "loss": 0.3594765067100525, "step": 7914, "token_acc": 0.8777323367649132 }, { "epoch": 0.4270760265472401, "grad_norm": 0.6922245621681213, "learning_rate": 1.2809321805829338e-05, "loss": 0.36253559589385986, "step": 7915, "token_acc": 0.8682974242669691 }, { "epoch": 0.42712998435223654, "grad_norm": 0.32187819480895996, "learning_rate": 1.2807644579177786e-05, "loss": 0.35727906227111816, "step": 7916, "token_acc": 0.8726712064756521 }, { "epoch": 0.42718394215723304, "grad_norm": 0.33515429496765137, "learning_rate": 1.2805967266781504e-05, "loss": 0.37780606746673584, "step": 7917, "token_acc": 0.865546218487395 }, { "epoch": 0.42723789996222955, "grad_norm": 0.36015060544013977, "learning_rate": 1.2804289868691717e-05, "loss": 0.41492998600006104, "step": 7918, "token_acc": 0.8565586186004241 }, { "epoch": 0.427291857767226, "grad_norm": 0.3644542098045349, "learning_rate": 1.2802612384959652e-05, "loss": 0.33068424463272095, "step": 7919, "token_acc": 0.8773156899810964 }, { "epoch": 0.4273458155722225, "grad_norm": 0.4268358051776886, "learning_rate": 1.2800934815636539e-05, "loss": 0.43103814125061035, "step": 7920, "token_acc": 0.849754022071533 }, { "epoch": 0.427399773377219, "grad_norm": 0.43953707814216614, "learning_rate": 1.279925716077361e-05, "loss": 0.35616323351860046, "step": 7921, "token_acc": 0.8745829244357213 }, { "epoch": 0.4274537311822155, "grad_norm": 0.4716208577156067, "learning_rate": 1.2797579420422101e-05, "loss": 0.42713475227355957, "step": 7922, "token_acc": 0.8552127076866898 }, { "epoch": 0.427507688987212, "grad_norm": 0.39376547932624817, "learning_rate": 1.2795901594633251e-05, "loss": 0.39155328273773193, "step": 7923, "token_acc": 0.8667912439935932 }, { "epoch": 0.4275616467922085, "grad_norm": 0.39088472723960876, "learning_rate": 1.2794223683458296e-05, "loss": 0.34976935386657715, "step": 7924, "token_acc": 0.8771361988606939 }, { "epoch": 0.427615604597205, "grad_norm": 0.41808420419692993, "learning_rate": 1.2792545686948484e-05, "loss": 0.33643415570259094, "step": 7925, "token_acc": 0.8811881188118812 }, { "epoch": 0.4276695624022015, "grad_norm": 0.333509236574173, "learning_rate": 1.2790867605155057e-05, "loss": 0.3871793746948242, "step": 7926, "token_acc": 0.8645971735716874 }, { "epoch": 0.42772352020719795, "grad_norm": 0.36476272344589233, "learning_rate": 1.2789189438129262e-05, "loss": 0.367766410112381, "step": 7927, "token_acc": 0.8700718797647571 }, { "epoch": 0.42777747801219446, "grad_norm": 0.3787074089050293, "learning_rate": 1.2787511185922354e-05, "loss": 0.4040071368217468, "step": 7928, "token_acc": 0.8664269543214809 }, { "epoch": 0.42783143581719096, "grad_norm": 0.4010229706764221, "learning_rate": 1.2785832848585587e-05, "loss": 0.4251467287540436, "step": 7929, "token_acc": 0.8554537885095753 }, { "epoch": 0.42788539362218747, "grad_norm": 0.37094128131866455, "learning_rate": 1.2784154426170212e-05, "loss": 0.4148930311203003, "step": 7930, "token_acc": 0.8602464332036317 }, { "epoch": 0.4279393514271839, "grad_norm": 0.4144544303417206, "learning_rate": 1.2782475918727493e-05, "loss": 0.4159952998161316, "step": 7931, "token_acc": 0.8560124529770399 }, { "epoch": 0.42799330923218043, "grad_norm": 0.37685978412628174, "learning_rate": 1.2780797326308687e-05, "loss": 0.35523465275764465, "step": 7932, "token_acc": 0.8752407932011331 }, { "epoch": 0.42804726703717694, "grad_norm": 0.3968847095966339, "learning_rate": 1.2779118648965059e-05, "loss": 0.3904026746749878, "step": 7933, "token_acc": 0.8616192142516303 }, { "epoch": 0.42810122484217344, "grad_norm": 0.3520664572715759, "learning_rate": 1.2777439886747873e-05, "loss": 0.3522563576698303, "step": 7934, "token_acc": 0.8741496598639455 }, { "epoch": 0.4281551826471699, "grad_norm": 0.43562862277030945, "learning_rate": 1.2775761039708404e-05, "loss": 0.42275121808052063, "step": 7935, "token_acc": 0.852811214383666 }, { "epoch": 0.4282091404521664, "grad_norm": 0.42625582218170166, "learning_rate": 1.2774082107897917e-05, "loss": 0.4397391676902771, "step": 7936, "token_acc": 0.8495412844036697 }, { "epoch": 0.4282630982571629, "grad_norm": 0.4144342839717865, "learning_rate": 1.277240309136769e-05, "loss": 0.3327772319316864, "step": 7937, "token_acc": 0.8793597304128054 }, { "epoch": 0.4283170560621594, "grad_norm": 0.29757964611053467, "learning_rate": 1.2770723990169002e-05, "loss": 0.3875617980957031, "step": 7938, "token_acc": 0.8621422322209724 }, { "epoch": 0.42837101386715587, "grad_norm": 0.367949903011322, "learning_rate": 1.2769044804353128e-05, "loss": 0.33210229873657227, "step": 7939, "token_acc": 0.8771991555242786 }, { "epoch": 0.4284249716721524, "grad_norm": 0.41859865188598633, "learning_rate": 1.2767365533971347e-05, "loss": 0.41305989027023315, "step": 7940, "token_acc": 0.8574709710348347 }, { "epoch": 0.4284789294771489, "grad_norm": 0.41429203748703003, "learning_rate": 1.2765686179074953e-05, "loss": 0.37927335500717163, "step": 7941, "token_acc": 0.870811149389289 }, { "epoch": 0.42853288728214534, "grad_norm": 0.35499054193496704, "learning_rate": 1.2764006739715225e-05, "loss": 0.35709255933761597, "step": 7942, "token_acc": 0.8763506625891947 }, { "epoch": 0.42858684508714184, "grad_norm": 0.391450971364975, "learning_rate": 1.276232721594345e-05, "loss": 0.3937658667564392, "step": 7943, "token_acc": 0.8635846911708981 }, { "epoch": 0.42864080289213835, "grad_norm": 0.42233240604400635, "learning_rate": 1.2760647607810932e-05, "loss": 0.3987031877040863, "step": 7944, "token_acc": 0.8598726114649682 }, { "epoch": 0.42869476069713486, "grad_norm": 0.35024920105934143, "learning_rate": 1.275896791536896e-05, "loss": 0.4071695804595947, "step": 7945, "token_acc": 0.8626098948457163 }, { "epoch": 0.4287487185021313, "grad_norm": 0.4434964656829834, "learning_rate": 1.2757288138668829e-05, "loss": 0.40694165229797363, "step": 7946, "token_acc": 0.8614690721649485 }, { "epoch": 0.4288026763071278, "grad_norm": 0.40252265334129333, "learning_rate": 1.275560827776184e-05, "loss": 0.365733802318573, "step": 7947, "token_acc": 0.8697682773486618 }, { "epoch": 0.4288566341121243, "grad_norm": 0.3960677981376648, "learning_rate": 1.2753928332699297e-05, "loss": 0.36569738388061523, "step": 7948, "token_acc": 0.8703296703296703 }, { "epoch": 0.42891059191712083, "grad_norm": 0.23502960801124573, "learning_rate": 1.2752248303532504e-05, "loss": 0.3449615240097046, "step": 7949, "token_acc": 0.8806012748549139 }, { "epoch": 0.4289645497221173, "grad_norm": 0.475328266620636, "learning_rate": 1.2750568190312764e-05, "loss": 0.400860995054245, "step": 7950, "token_acc": 0.8630268199233716 }, { "epoch": 0.4290185075271138, "grad_norm": 0.3651987910270691, "learning_rate": 1.2748887993091397e-05, "loss": 0.407174289226532, "step": 7951, "token_acc": 0.8603409933283914 }, { "epoch": 0.4290724653321103, "grad_norm": 0.3679228723049164, "learning_rate": 1.2747207711919711e-05, "loss": 0.3817483186721802, "step": 7952, "token_acc": 0.8687688383955483 }, { "epoch": 0.4291264231371068, "grad_norm": 0.4415832459926605, "learning_rate": 1.2745527346849018e-05, "loss": 0.3990892469882965, "step": 7953, "token_acc": 0.858321479374111 }, { "epoch": 0.42918038094210326, "grad_norm": 0.3697435259819031, "learning_rate": 1.2743846897930643e-05, "loss": 0.3692670464515686, "step": 7954, "token_acc": 0.8706604019838162 }, { "epoch": 0.42923433874709976, "grad_norm": 0.3955978751182556, "learning_rate": 1.2742166365215902e-05, "loss": 0.3907926380634308, "step": 7955, "token_acc": 0.8617524339360223 }, { "epoch": 0.42928829655209627, "grad_norm": 0.43029263615608215, "learning_rate": 1.2740485748756116e-05, "loss": 0.4454236626625061, "step": 7956, "token_acc": 0.8494231936854888 }, { "epoch": 0.4293422543570928, "grad_norm": 0.39943015575408936, "learning_rate": 1.2738805048602614e-05, "loss": 0.4309694170951843, "step": 7957, "token_acc": 0.8543160690571049 }, { "epoch": 0.42939621216208923, "grad_norm": 0.4030379354953766, "learning_rate": 1.2737124264806724e-05, "loss": 0.3893958330154419, "step": 7958, "token_acc": 0.8665409101626975 }, { "epoch": 0.42945016996708574, "grad_norm": 0.2761673033237457, "learning_rate": 1.2735443397419774e-05, "loss": 0.3774074912071228, "step": 7959, "token_acc": 0.8726243429033562 }, { "epoch": 0.42950412777208224, "grad_norm": 0.36237359046936035, "learning_rate": 1.27337624464931e-05, "loss": 0.3131134510040283, "step": 7960, "token_acc": 0.8868987433193702 }, { "epoch": 0.42955808557707875, "grad_norm": 0.3303651511669159, "learning_rate": 1.2732081412078038e-05, "loss": 0.38240206241607666, "step": 7961, "token_acc": 0.8700774517290281 }, { "epoch": 0.4296120433820752, "grad_norm": 0.511029064655304, "learning_rate": 1.2730400294225926e-05, "loss": 0.3719848096370697, "step": 7962, "token_acc": 0.8728667606074839 }, { "epoch": 0.4296660011870717, "grad_norm": 0.40776628255844116, "learning_rate": 1.2728719092988105e-05, "loss": 0.3710525631904602, "step": 7963, "token_acc": 0.8685548293391431 }, { "epoch": 0.4297199589920682, "grad_norm": 0.39692339301109314, "learning_rate": 1.2727037808415917e-05, "loss": 0.3713577389717102, "step": 7964, "token_acc": 0.8703057378193494 }, { "epoch": 0.42977391679706467, "grad_norm": 0.380966454744339, "learning_rate": 1.2725356440560708e-05, "loss": 0.34092432260513306, "step": 7965, "token_acc": 0.8775034932463903 }, { "epoch": 0.4298278746020612, "grad_norm": 0.44645529985427856, "learning_rate": 1.2723674989473826e-05, "loss": 0.4110526442527771, "step": 7966, "token_acc": 0.8586956521739131 }, { "epoch": 0.4298818324070577, "grad_norm": 0.44414690136909485, "learning_rate": 1.2721993455206624e-05, "loss": 0.35385286808013916, "step": 7967, "token_acc": 0.8736641780120041 }, { "epoch": 0.4299357902120542, "grad_norm": 0.36370816826820374, "learning_rate": 1.2720311837810457e-05, "loss": 0.34044381976127625, "step": 7968, "token_acc": 0.8796884016156953 }, { "epoch": 0.42998974801705064, "grad_norm": 0.5039041638374329, "learning_rate": 1.2718630137336674e-05, "loss": 0.35872796177864075, "step": 7969, "token_acc": 0.8747704124227751 }, { "epoch": 0.43004370582204715, "grad_norm": 0.404675155878067, "learning_rate": 1.2716948353836643e-05, "loss": 0.4085758626461029, "step": 7970, "token_acc": 0.8604407135362014 }, { "epoch": 0.43009766362704366, "grad_norm": 0.45943760871887207, "learning_rate": 1.2715266487361723e-05, "loss": 0.3980101943016052, "step": 7971, "token_acc": 0.8620352250489237 }, { "epoch": 0.43015162143204017, "grad_norm": 0.40414029359817505, "learning_rate": 1.2713584537963272e-05, "loss": 0.4186995029449463, "step": 7972, "token_acc": 0.8548069644208932 }, { "epoch": 0.4302055792370366, "grad_norm": 0.3562511205673218, "learning_rate": 1.271190250569266e-05, "loss": 0.3928139805793762, "step": 7973, "token_acc": 0.8670571756016809 }, { "epoch": 0.4302595370420331, "grad_norm": 0.41014623641967773, "learning_rate": 1.2710220390601258e-05, "loss": 0.38168227672576904, "step": 7974, "token_acc": 0.8710538532961931 }, { "epoch": 0.43031349484702963, "grad_norm": 0.26278409361839294, "learning_rate": 1.2708538192740436e-05, "loss": 0.29585859179496765, "step": 7975, "token_acc": 0.8933188484519283 }, { "epoch": 0.43036745265202614, "grad_norm": 0.5090049505233765, "learning_rate": 1.2706855912161563e-05, "loss": 0.4058239459991455, "step": 7976, "token_acc": 0.8604014598540146 }, { "epoch": 0.4304214104570226, "grad_norm": 0.44385218620300293, "learning_rate": 1.2705173548916023e-05, "loss": 0.32051873207092285, "step": 7977, "token_acc": 0.8803045027534824 }, { "epoch": 0.4304753682620191, "grad_norm": 0.37185317277908325, "learning_rate": 1.270349110305519e-05, "loss": 0.40570253133773804, "step": 7978, "token_acc": 0.8647262647262647 }, { "epoch": 0.4305293260670156, "grad_norm": 0.36653006076812744, "learning_rate": 1.270180857463045e-05, "loss": 0.3837408423423767, "step": 7979, "token_acc": 0.8618730950401773 }, { "epoch": 0.4305832838720121, "grad_norm": 0.4517727792263031, "learning_rate": 1.270012596369318e-05, "loss": 0.40177619457244873, "step": 7980, "token_acc": 0.85847470579245 }, { "epoch": 0.43063724167700856, "grad_norm": 0.3983243703842163, "learning_rate": 1.2698443270294774e-05, "loss": 0.3799760341644287, "step": 7981, "token_acc": 0.8635322645830527 }, { "epoch": 0.43069119948200507, "grad_norm": 0.3168291747570038, "learning_rate": 1.2696760494486612e-05, "loss": 0.3400455713272095, "step": 7982, "token_acc": 0.8789609226290449 }, { "epoch": 0.4307451572870016, "grad_norm": 0.39104822278022766, "learning_rate": 1.2695077636320095e-05, "loss": 0.31412965059280396, "step": 7983, "token_acc": 0.89189910583211 }, { "epoch": 0.43079911509199803, "grad_norm": 0.47323617339134216, "learning_rate": 1.2693394695846612e-05, "loss": 0.46066245436668396, "step": 7984, "token_acc": 0.8454301075268817 }, { "epoch": 0.43085307289699454, "grad_norm": 0.4936395287513733, "learning_rate": 1.269171167311756e-05, "loss": 0.41391706466674805, "step": 7985, "token_acc": 0.8563690085357847 }, { "epoch": 0.43090703070199105, "grad_norm": 0.48231402039527893, "learning_rate": 1.269002856818434e-05, "loss": 0.3814643621444702, "step": 7986, "token_acc": 0.864381198792583 }, { "epoch": 0.43096098850698755, "grad_norm": 0.4780493378639221, "learning_rate": 1.2688345381098351e-05, "loss": 0.38277989625930786, "step": 7987, "token_acc": 0.8646408839779005 }, { "epoch": 0.431014946311984, "grad_norm": 0.4460866451263428, "learning_rate": 1.2686662111910999e-05, "loss": 0.37589848041534424, "step": 7988, "token_acc": 0.8709618208516887 }, { "epoch": 0.4310689041169805, "grad_norm": 0.33617013692855835, "learning_rate": 1.2684978760673689e-05, "loss": 0.3531990051269531, "step": 7989, "token_acc": 0.8759028233749179 }, { "epoch": 0.431122861921977, "grad_norm": 0.42607155442237854, "learning_rate": 1.2683295327437834e-05, "loss": 0.3971964716911316, "step": 7990, "token_acc": 0.8649809160305344 }, { "epoch": 0.4311768197269735, "grad_norm": 0.34387147426605225, "learning_rate": 1.268161181225484e-05, "loss": 0.3150063753128052, "step": 7991, "token_acc": 0.8878705427466704 }, { "epoch": 0.43123077753197, "grad_norm": 0.4663907587528229, "learning_rate": 1.2679928215176123e-05, "loss": 0.3752484917640686, "step": 7992, "token_acc": 0.8664244795305296 }, { "epoch": 0.4312847353369665, "grad_norm": 0.39468106627464294, "learning_rate": 1.2678244536253102e-05, "loss": 0.3541857600212097, "step": 7993, "token_acc": 0.875906432748538 }, { "epoch": 0.431338693141963, "grad_norm": 0.4214860796928406, "learning_rate": 1.2676560775537194e-05, "loss": 0.31332436203956604, "step": 7994, "token_acc": 0.885310431204151 }, { "epoch": 0.4313926509469595, "grad_norm": 0.32690879702568054, "learning_rate": 1.2674876933079823e-05, "loss": 0.35808122158050537, "step": 7995, "token_acc": 0.8741302972802024 }, { "epoch": 0.43144660875195595, "grad_norm": 0.3985236883163452, "learning_rate": 1.267319300893241e-05, "loss": 0.4036230444908142, "step": 7996, "token_acc": 0.8607682706975455 }, { "epoch": 0.43150056655695246, "grad_norm": 0.47337329387664795, "learning_rate": 1.267150900314638e-05, "loss": 0.40682733058929443, "step": 7997, "token_acc": 0.855402694181714 }, { "epoch": 0.43155452436194897, "grad_norm": 0.3372645676136017, "learning_rate": 1.2669824915773168e-05, "loss": 0.36192500591278076, "step": 7998, "token_acc": 0.8746721072573593 }, { "epoch": 0.4316084821669455, "grad_norm": 0.2784948945045471, "learning_rate": 1.2668140746864199e-05, "loss": 0.3733910322189331, "step": 7999, "token_acc": 0.8753309265944645 }, { "epoch": 0.4316624399719419, "grad_norm": 0.47890588641166687, "learning_rate": 1.2666456496470914e-05, "loss": 0.38031625747680664, "step": 8000, "token_acc": 0.8661167512690355 }, { "epoch": 0.43171639777693843, "grad_norm": 0.370306134223938, "learning_rate": 1.2664772164644746e-05, "loss": 0.3784918785095215, "step": 8001, "token_acc": 0.8697256735145774 }, { "epoch": 0.43177035558193494, "grad_norm": 0.39856213331222534, "learning_rate": 1.2663087751437131e-05, "loss": 0.38638728857040405, "step": 8002, "token_acc": 0.8646715154405628 }, { "epoch": 0.43182431338693145, "grad_norm": 0.3533206284046173, "learning_rate": 1.2661403256899516e-05, "loss": 0.3166123330593109, "step": 8003, "token_acc": 0.8887268180871015 }, { "epoch": 0.4318782711919279, "grad_norm": 0.4374808073043823, "learning_rate": 1.2659718681083344e-05, "loss": 0.36677438020706177, "step": 8004, "token_acc": 0.8721363115693013 }, { "epoch": 0.4319322289969244, "grad_norm": 0.4126166105270386, "learning_rate": 1.265803402404006e-05, "loss": 0.3626813590526581, "step": 8005, "token_acc": 0.8729593009887331 }, { "epoch": 0.4319861868019209, "grad_norm": 0.4563133716583252, "learning_rate": 1.2656349285821112e-05, "loss": 0.4411413073539734, "step": 8006, "token_acc": 0.8527873453246587 }, { "epoch": 0.43204014460691736, "grad_norm": 0.2684879004955292, "learning_rate": 1.2654664466477953e-05, "loss": 0.35006776452064514, "step": 8007, "token_acc": 0.8752213214052744 }, { "epoch": 0.43209410241191387, "grad_norm": 0.31210920214653015, "learning_rate": 1.2652979566062035e-05, "loss": 0.34859979152679443, "step": 8008, "token_acc": 0.8788200339558574 }, { "epoch": 0.4321480602169104, "grad_norm": 0.35066846013069153, "learning_rate": 1.2651294584624817e-05, "loss": 0.38590821623802185, "step": 8009, "token_acc": 0.8653409704960553 }, { "epoch": 0.4322020180219069, "grad_norm": 0.3370359539985657, "learning_rate": 1.2649609522217758e-05, "loss": 0.32208549976348877, "step": 8010, "token_acc": 0.8886118038237739 }, { "epoch": 0.43225597582690334, "grad_norm": 0.353695809841156, "learning_rate": 1.2647924378892315e-05, "loss": 0.397238552570343, "step": 8011, "token_acc": 0.8620241191771104 }, { "epoch": 0.43230993363189985, "grad_norm": 0.3844413757324219, "learning_rate": 1.2646239154699962e-05, "loss": 0.3484314978122711, "step": 8012, "token_acc": 0.8757370604935576 }, { "epoch": 0.43236389143689635, "grad_norm": 0.3308895528316498, "learning_rate": 1.2644553849692154e-05, "loss": 0.30013149976730347, "step": 8013, "token_acc": 0.8918632388020143 }, { "epoch": 0.43241784924189286, "grad_norm": 0.3702770173549652, "learning_rate": 1.2642868463920363e-05, "loss": 0.3253406286239624, "step": 8014, "token_acc": 0.8815386663117264 }, { "epoch": 0.4324718070468893, "grad_norm": 0.46930131316185, "learning_rate": 1.2641182997436064e-05, "loss": 0.4305307865142822, "step": 8015, "token_acc": 0.855194901437676 }, { "epoch": 0.4325257648518858, "grad_norm": 0.39281463623046875, "learning_rate": 1.2639497450290728e-05, "loss": 0.3407314419746399, "step": 8016, "token_acc": 0.8773928361714621 }, { "epoch": 0.4325797226568823, "grad_norm": 0.4377923011779785, "learning_rate": 1.2637811822535827e-05, "loss": 0.3779066205024719, "step": 8017, "token_acc": 0.8648863035430989 }, { "epoch": 0.43263368046187883, "grad_norm": 0.33010154962539673, "learning_rate": 1.2636126114222849e-05, "loss": 0.38313212990760803, "step": 8018, "token_acc": 0.8650760429747454 }, { "epoch": 0.4326876382668753, "grad_norm": 0.40883538126945496, "learning_rate": 1.2634440325403268e-05, "loss": 0.33251431584358215, "step": 8019, "token_acc": 0.8819822400458321 }, { "epoch": 0.4327415960718718, "grad_norm": 0.4016408920288086, "learning_rate": 1.2632754456128572e-05, "loss": 0.30939981341362, "step": 8020, "token_acc": 0.8877481177275839 }, { "epoch": 0.4327955538768683, "grad_norm": 0.36067995429039, "learning_rate": 1.2631068506450241e-05, "loss": 0.35148704051971436, "step": 8021, "token_acc": 0.876597234377735 }, { "epoch": 0.4328495116818648, "grad_norm": 0.41436949372291565, "learning_rate": 1.2629382476419768e-05, "loss": 0.3933568000793457, "step": 8022, "token_acc": 0.862578977599081 }, { "epoch": 0.43290346948686126, "grad_norm": 0.2831234335899353, "learning_rate": 1.2627696366088645e-05, "loss": 0.3262227177619934, "step": 8023, "token_acc": 0.8846153846153846 }, { "epoch": 0.43295742729185777, "grad_norm": 0.3411237597465515, "learning_rate": 1.262601017550836e-05, "loss": 0.3906921148300171, "step": 8024, "token_acc": 0.865369562786537 }, { "epoch": 0.4330113850968543, "grad_norm": 0.5049949288368225, "learning_rate": 1.2624323904730416e-05, "loss": 0.336897611618042, "step": 8025, "token_acc": 0.8859352344127598 }, { "epoch": 0.4330653429018507, "grad_norm": 0.4906199276447296, "learning_rate": 1.2622637553806305e-05, "loss": 0.33965402841567993, "step": 8026, "token_acc": 0.8779360088773812 }, { "epoch": 0.43311930070684723, "grad_norm": 0.32259175181388855, "learning_rate": 1.262095112278753e-05, "loss": 0.35109132528305054, "step": 8027, "token_acc": 0.8732880937692782 }, { "epoch": 0.43317325851184374, "grad_norm": 0.4842456579208374, "learning_rate": 1.2619264611725595e-05, "loss": 0.3753634989261627, "step": 8028, "token_acc": 0.8665901592311003 }, { "epoch": 0.43322721631684025, "grad_norm": 0.38907358050346375, "learning_rate": 1.2617578020672008e-05, "loss": 0.4261232614517212, "step": 8029, "token_acc": 0.8513999687157829 }, { "epoch": 0.4332811741218367, "grad_norm": 0.4021233022212982, "learning_rate": 1.2615891349678268e-05, "loss": 0.3409215807914734, "step": 8030, "token_acc": 0.8777004760161113 }, { "epoch": 0.4333351319268332, "grad_norm": 0.44382956624031067, "learning_rate": 1.2614204598795894e-05, "loss": 0.3558597266674042, "step": 8031, "token_acc": 0.8702276707530648 }, { "epoch": 0.4333890897318297, "grad_norm": 0.31181156635284424, "learning_rate": 1.2612517768076398e-05, "loss": 0.3783520758152008, "step": 8032, "token_acc": 0.8691548691548692 }, { "epoch": 0.4334430475368262, "grad_norm": 0.37843117117881775, "learning_rate": 1.2610830857571292e-05, "loss": 0.35638558864593506, "step": 8033, "token_acc": 0.876001526135063 }, { "epoch": 0.43349700534182267, "grad_norm": 0.46025440096855164, "learning_rate": 1.2609143867332097e-05, "loss": 0.3987724184989929, "step": 8034, "token_acc": 0.8641975308641975 }, { "epoch": 0.4335509631468192, "grad_norm": 0.326541543006897, "learning_rate": 1.2607456797410329e-05, "loss": 0.37959909439086914, "step": 8035, "token_acc": 0.8673521850899742 }, { "epoch": 0.4336049209518157, "grad_norm": 0.38472220301628113, "learning_rate": 1.2605769647857516e-05, "loss": 0.38524478673934937, "step": 8036, "token_acc": 0.8670782640276012 }, { "epoch": 0.4336588787568122, "grad_norm": 0.3478255569934845, "learning_rate": 1.260408241872518e-05, "loss": 0.37620851397514343, "step": 8037, "token_acc": 0.866871704745167 }, { "epoch": 0.43371283656180865, "grad_norm": 0.34819215536117554, "learning_rate": 1.260239511006485e-05, "loss": 0.33919557929039, "step": 8038, "token_acc": 0.881103515625 }, { "epoch": 0.43376679436680515, "grad_norm": 0.3907468616962433, "learning_rate": 1.2600707721928053e-05, "loss": 0.38390055298805237, "step": 8039, "token_acc": 0.8648456057007126 }, { "epoch": 0.43382075217180166, "grad_norm": 0.4088260531425476, "learning_rate": 1.2599020254366324e-05, "loss": 0.369057834148407, "step": 8040, "token_acc": 0.8683234930660442 }, { "epoch": 0.43387470997679817, "grad_norm": 0.36679431796073914, "learning_rate": 1.2597332707431197e-05, "loss": 0.3806656002998352, "step": 8041, "token_acc": 0.8684536721413078 }, { "epoch": 0.4339286677817946, "grad_norm": 0.3661295175552368, "learning_rate": 1.2595645081174213e-05, "loss": 0.37420207262039185, "step": 8042, "token_acc": 0.8717427683480755 }, { "epoch": 0.4339826255867911, "grad_norm": 0.36922112107276917, "learning_rate": 1.2593957375646904e-05, "loss": 0.43567150831222534, "step": 8043, "token_acc": 0.8511731061048208 }, { "epoch": 0.43403658339178763, "grad_norm": 0.39066073298454285, "learning_rate": 1.2592269590900816e-05, "loss": 0.36900967359542847, "step": 8044, "token_acc": 0.8705472379969025 }, { "epoch": 0.43409054119678414, "grad_norm": 0.37446656823158264, "learning_rate": 1.2590581726987497e-05, "loss": 0.3715009391307831, "step": 8045, "token_acc": 0.8693372177713037 }, { "epoch": 0.4341444990017806, "grad_norm": 0.28119900822639465, "learning_rate": 1.258889378395849e-05, "loss": 0.3589060306549072, "step": 8046, "token_acc": 0.8735015772870662 }, { "epoch": 0.4341984568067771, "grad_norm": 0.3532141149044037, "learning_rate": 1.2587205761865344e-05, "loss": 0.35811787843704224, "step": 8047, "token_acc": 0.8763702113233134 }, { "epoch": 0.4342524146117736, "grad_norm": 0.40192943811416626, "learning_rate": 1.2585517660759613e-05, "loss": 0.3971922993659973, "step": 8048, "token_acc": 0.8651548402828578 }, { "epoch": 0.43430637241677006, "grad_norm": 0.43783625960350037, "learning_rate": 1.2583829480692848e-05, "loss": 0.372262179851532, "step": 8049, "token_acc": 0.8701281885277742 }, { "epoch": 0.43436033022176657, "grad_norm": 0.41270682215690613, "learning_rate": 1.2582141221716607e-05, "loss": 0.3768349289894104, "step": 8050, "token_acc": 0.8710626514364832 }, { "epoch": 0.4344142880267631, "grad_norm": 0.37778159976005554, "learning_rate": 1.2580452883882453e-05, "loss": 0.3901650905609131, "step": 8051, "token_acc": 0.8572905894519132 }, { "epoch": 0.4344682458317596, "grad_norm": 0.3894261419773102, "learning_rate": 1.2578764467241941e-05, "loss": 0.36063405871391296, "step": 8052, "token_acc": 0.8725099601593626 }, { "epoch": 0.43452220363675603, "grad_norm": 0.5504685640335083, "learning_rate": 1.257707597184664e-05, "loss": 0.3826714754104614, "step": 8053, "token_acc": 0.8662420382165605 }, { "epoch": 0.43457616144175254, "grad_norm": 0.4138990342617035, "learning_rate": 1.2575387397748111e-05, "loss": 0.4308648407459259, "step": 8054, "token_acc": 0.8522987627921186 }, { "epoch": 0.43463011924674905, "grad_norm": 0.3449980914592743, "learning_rate": 1.257369874499793e-05, "loss": 0.379465788602829, "step": 8055, "token_acc": 0.862947755911086 }, { "epoch": 0.43468407705174555, "grad_norm": 0.3052994906902313, "learning_rate": 1.2572010013647657e-05, "loss": 0.34642672538757324, "step": 8056, "token_acc": 0.8807301389753164 }, { "epoch": 0.434738034856742, "grad_norm": 0.3833934962749481, "learning_rate": 1.2570321203748876e-05, "loss": 0.3553852438926697, "step": 8057, "token_acc": 0.8781798245614035 }, { "epoch": 0.4347919926617385, "grad_norm": 0.30726420879364014, "learning_rate": 1.256863231535316e-05, "loss": 0.4333285689353943, "step": 8058, "token_acc": 0.8516895398454619 }, { "epoch": 0.434845950466735, "grad_norm": 0.3872777819633484, "learning_rate": 1.2566943348512081e-05, "loss": 0.38943350315093994, "step": 8059, "token_acc": 0.8586529340018972 }, { "epoch": 0.43489990827173153, "grad_norm": 0.3275674283504486, "learning_rate": 1.256525430327723e-05, "loss": 0.39889049530029297, "step": 8060, "token_acc": 0.864488414475397 }, { "epoch": 0.434953866076728, "grad_norm": 0.4836820662021637, "learning_rate": 1.2563565179700183e-05, "loss": 0.33620139956474304, "step": 8061, "token_acc": 0.8785814116002795 }, { "epoch": 0.4350078238817245, "grad_norm": 0.3573499023914337, "learning_rate": 1.2561875977832528e-05, "loss": 0.37801995873451233, "step": 8062, "token_acc": 0.8704656199242015 }, { "epoch": 0.435061781686721, "grad_norm": 0.4596136510372162, "learning_rate": 1.2560186697725849e-05, "loss": 0.4139431118965149, "step": 8063, "token_acc": 0.8588680343372618 }, { "epoch": 0.4351157394917175, "grad_norm": 0.2633382976055145, "learning_rate": 1.2558497339431742e-05, "loss": 0.3044678568840027, "step": 8064, "token_acc": 0.8878900052882073 }, { "epoch": 0.43516969729671395, "grad_norm": 0.40989693999290466, "learning_rate": 1.2556807903001795e-05, "loss": 0.39791297912597656, "step": 8065, "token_acc": 0.8657727211160775 }, { "epoch": 0.43522365510171046, "grad_norm": 0.3915960192680359, "learning_rate": 1.2555118388487601e-05, "loss": 0.33674901723861694, "step": 8066, "token_acc": 0.8780002212144674 }, { "epoch": 0.43527761290670697, "grad_norm": 0.4085235893726349, "learning_rate": 1.2553428795940766e-05, "loss": 0.36825722455978394, "step": 8067, "token_acc": 0.8728214195503915 }, { "epoch": 0.4353315707117035, "grad_norm": 0.4447847604751587, "learning_rate": 1.2551739125412881e-05, "loss": 0.41348937153816223, "step": 8068, "token_acc": 0.8630780894617134 }, { "epoch": 0.4353855285166999, "grad_norm": 0.3027699887752533, "learning_rate": 1.2550049376955551e-05, "loss": 0.3412923812866211, "step": 8069, "token_acc": 0.8819634374550166 }, { "epoch": 0.43543948632169643, "grad_norm": 0.31695500016212463, "learning_rate": 1.2548359550620386e-05, "loss": 0.3401227295398712, "step": 8070, "token_acc": 0.8764071572461192 }, { "epoch": 0.43549344412669294, "grad_norm": 0.34952643513679504, "learning_rate": 1.2546669646458986e-05, "loss": 0.33045998215675354, "step": 8071, "token_acc": 0.8796653088186722 }, { "epoch": 0.4355474019316894, "grad_norm": 0.5021848082542419, "learning_rate": 1.2544979664522958e-05, "loss": 0.3978497385978699, "step": 8072, "token_acc": 0.8595188595188595 }, { "epoch": 0.4356013597366859, "grad_norm": 0.3958246409893036, "learning_rate": 1.2543289604863923e-05, "loss": 0.4095771908760071, "step": 8073, "token_acc": 0.8564724919093851 }, { "epoch": 0.4356553175416824, "grad_norm": 0.3239987790584564, "learning_rate": 1.2541599467533487e-05, "loss": 0.3579806387424469, "step": 8074, "token_acc": 0.8754019292604501 }, { "epoch": 0.4357092753466789, "grad_norm": 0.3589240610599518, "learning_rate": 1.253990925258327e-05, "loss": 0.38564151525497437, "step": 8075, "token_acc": 0.8641871423763042 }, { "epoch": 0.43576323315167537, "grad_norm": 0.5621614456176758, "learning_rate": 1.2538218960064887e-05, "loss": 0.34142717719078064, "step": 8076, "token_acc": 0.8747814685314685 }, { "epoch": 0.4358171909566719, "grad_norm": 0.3597109317779541, "learning_rate": 1.2536528590029965e-05, "loss": 0.36123010516166687, "step": 8077, "token_acc": 0.8756476683937824 }, { "epoch": 0.4358711487616684, "grad_norm": 0.43743130564689636, "learning_rate": 1.2534838142530124e-05, "loss": 0.33863356709480286, "step": 8078, "token_acc": 0.8816770812336222 }, { "epoch": 0.4359251065666649, "grad_norm": 0.5178209543228149, "learning_rate": 1.253314761761699e-05, "loss": 0.4229116439819336, "step": 8079, "token_acc": 0.8584566748095991 }, { "epoch": 0.43597906437166134, "grad_norm": 0.34109798073768616, "learning_rate": 1.2531457015342192e-05, "loss": 0.41796427965164185, "step": 8080, "token_acc": 0.8584988135381542 }, { "epoch": 0.43603302217665785, "grad_norm": 0.45905444025993347, "learning_rate": 1.2529766335757359e-05, "loss": 0.3988921046257019, "step": 8081, "token_acc": 0.8591240875912409 }, { "epoch": 0.43608697998165435, "grad_norm": 0.4624443054199219, "learning_rate": 1.2528075578914124e-05, "loss": 0.4254024624824524, "step": 8082, "token_acc": 0.8509122502172024 }, { "epoch": 0.43614093778665086, "grad_norm": 0.41816446185112, "learning_rate": 1.2526384744864123e-05, "loss": 0.34179845452308655, "step": 8083, "token_acc": 0.878649037832177 }, { "epoch": 0.4361948955916473, "grad_norm": 0.39351484179496765, "learning_rate": 1.2524693833658996e-05, "loss": 0.3539285957813263, "step": 8084, "token_acc": 0.875192406362237 }, { "epoch": 0.4362488533966438, "grad_norm": 0.32596421241760254, "learning_rate": 1.2523002845350378e-05, "loss": 0.3080470561981201, "step": 8085, "token_acc": 0.8844297338298166 }, { "epoch": 0.43630281120164033, "grad_norm": 0.45060622692108154, "learning_rate": 1.2521311779989915e-05, "loss": 0.38038700819015503, "step": 8086, "token_acc": 0.869192371688212 }, { "epoch": 0.43635676900663684, "grad_norm": 0.4441591203212738, "learning_rate": 1.2519620637629251e-05, "loss": 0.3797049820423126, "step": 8087, "token_acc": 0.8647519582245431 }, { "epoch": 0.4364107268116333, "grad_norm": 0.3434359133243561, "learning_rate": 1.2517929418320033e-05, "loss": 0.28345170617103577, "step": 8088, "token_acc": 0.8956882255389718 }, { "epoch": 0.4364646846166298, "grad_norm": 0.3537549674510956, "learning_rate": 1.251623812211391e-05, "loss": 0.3728055953979492, "step": 8089, "token_acc": 0.8722769322590271 }, { "epoch": 0.4365186424216263, "grad_norm": 0.44570526480674744, "learning_rate": 1.2514546749062534e-05, "loss": 0.4157792031764984, "step": 8090, "token_acc": 0.8586043112897332 }, { "epoch": 0.43657260022662275, "grad_norm": 0.3690299689769745, "learning_rate": 1.2512855299217559e-05, "loss": 0.3456135392189026, "step": 8091, "token_acc": 0.8755794825781367 }, { "epoch": 0.43662655803161926, "grad_norm": 0.40642043948173523, "learning_rate": 1.251116377263064e-05, "loss": 0.326002836227417, "step": 8092, "token_acc": 0.8805544759683896 }, { "epoch": 0.43668051583661577, "grad_norm": 0.39824172854423523, "learning_rate": 1.2509472169353441e-05, "loss": 0.38703453540802, "step": 8093, "token_acc": 0.8651129943502824 }, { "epoch": 0.4367344736416123, "grad_norm": 0.46712827682495117, "learning_rate": 1.2507780489437617e-05, "loss": 0.3701478838920593, "step": 8094, "token_acc": 0.875696236924331 }, { "epoch": 0.4367884314466087, "grad_norm": 0.3696604371070862, "learning_rate": 1.2506088732934834e-05, "loss": 0.3745214343070984, "step": 8095, "token_acc": 0.8712149301693239 }, { "epoch": 0.43684238925160523, "grad_norm": 0.31096377968788147, "learning_rate": 1.2504396899896759e-05, "loss": 0.3283191919326782, "step": 8096, "token_acc": 0.8832710485821059 }, { "epoch": 0.43689634705660174, "grad_norm": 0.375835120677948, "learning_rate": 1.2502704990375056e-05, "loss": 0.39586758613586426, "step": 8097, "token_acc": 0.8608147429679922 }, { "epoch": 0.43695030486159825, "grad_norm": 0.4771028757095337, "learning_rate": 1.2501013004421398e-05, "loss": 0.39634495973587036, "step": 8098, "token_acc": 0.8619787659639945 }, { "epoch": 0.4370042626665947, "grad_norm": 0.49796628952026367, "learning_rate": 1.2499320942087461e-05, "loss": 0.38339030742645264, "step": 8099, "token_acc": 0.8630182421227197 }, { "epoch": 0.4370582204715912, "grad_norm": 0.34788358211517334, "learning_rate": 1.2497628803424915e-05, "loss": 0.39888858795166016, "step": 8100, "token_acc": 0.8630731102850062 }, { "epoch": 0.4371121782765877, "grad_norm": 0.40232253074645996, "learning_rate": 1.2495936588485437e-05, "loss": 0.3909495770931244, "step": 8101, "token_acc": 0.8668315542827388 }, { "epoch": 0.4371661360815842, "grad_norm": 0.4353882670402527, "learning_rate": 1.2494244297320714e-05, "loss": 0.3691949248313904, "step": 8102, "token_acc": 0.8716952949962659 }, { "epoch": 0.4372200938865807, "grad_norm": 0.3971119821071625, "learning_rate": 1.2492551929982422e-05, "loss": 0.38634395599365234, "step": 8103, "token_acc": 0.8680441120547598 }, { "epoch": 0.4372740516915772, "grad_norm": 0.4091881513595581, "learning_rate": 1.2490859486522249e-05, "loss": 0.3775925040245056, "step": 8104, "token_acc": 0.8649569224604288 }, { "epoch": 0.4373280094965737, "grad_norm": 0.39252400398254395, "learning_rate": 1.2489166966991877e-05, "loss": 0.3887827396392822, "step": 8105, "token_acc": 0.8639545634658565 }, { "epoch": 0.4373819673015702, "grad_norm": 0.4046095609664917, "learning_rate": 1.2487474371443e-05, "loss": 0.3732125759124756, "step": 8106, "token_acc": 0.8661077549007521 }, { "epoch": 0.43743592510656665, "grad_norm": 0.3539055585861206, "learning_rate": 1.2485781699927307e-05, "loss": 0.3062281012535095, "step": 8107, "token_acc": 0.8919662824703252 }, { "epoch": 0.43748988291156315, "grad_norm": 0.3273376226425171, "learning_rate": 1.248408895249649e-05, "loss": 0.331143856048584, "step": 8108, "token_acc": 0.8840133119684457 }, { "epoch": 0.43754384071655966, "grad_norm": 0.3817353844642639, "learning_rate": 1.2482396129202249e-05, "loss": 0.3553796410560608, "step": 8109, "token_acc": 0.8802304426925409 }, { "epoch": 0.43759779852155617, "grad_norm": 0.4296378195285797, "learning_rate": 1.2480703230096279e-05, "loss": 0.3876720666885376, "step": 8110, "token_acc": 0.8686338487728756 }, { "epoch": 0.4376517563265526, "grad_norm": 0.39260315895080566, "learning_rate": 1.2479010255230281e-05, "loss": 0.36652523279190063, "step": 8111, "token_acc": 0.8715442613795029 }, { "epoch": 0.43770571413154913, "grad_norm": 0.36953115463256836, "learning_rate": 1.2477317204655965e-05, "loss": 0.37319836020469666, "step": 8112, "token_acc": 0.8732224309095787 }, { "epoch": 0.43775967193654564, "grad_norm": 0.3326677978038788, "learning_rate": 1.2475624078425026e-05, "loss": 0.3586418628692627, "step": 8113, "token_acc": 0.8801388888888889 }, { "epoch": 0.4378136297415421, "grad_norm": 0.44312697649002075, "learning_rate": 1.2473930876589176e-05, "loss": 0.369292289018631, "step": 8114, "token_acc": 0.8712953712953713 }, { "epoch": 0.4378675875465386, "grad_norm": 0.27127158641815186, "learning_rate": 1.2472237599200128e-05, "loss": 0.3638167977333069, "step": 8115, "token_acc": 0.8755322309139518 }, { "epoch": 0.4379215453515351, "grad_norm": 0.3375126123428345, "learning_rate": 1.2470544246309589e-05, "loss": 0.3673071563243866, "step": 8116, "token_acc": 0.872375 }, { "epoch": 0.4379755031565316, "grad_norm": 0.34929120540618896, "learning_rate": 1.2468850817969273e-05, "loss": 0.392736554145813, "step": 8117, "token_acc": 0.8610567514677103 }, { "epoch": 0.43802946096152806, "grad_norm": 0.3088383078575134, "learning_rate": 1.2467157314230904e-05, "loss": 0.3282007575035095, "step": 8118, "token_acc": 0.8790378006872852 }, { "epoch": 0.43808341876652457, "grad_norm": 0.38080671429634094, "learning_rate": 1.2465463735146197e-05, "loss": 0.3596932291984558, "step": 8119, "token_acc": 0.875882012724118 }, { "epoch": 0.4381373765715211, "grad_norm": 0.37480056285858154, "learning_rate": 1.2463770080766872e-05, "loss": 0.34562915563583374, "step": 8120, "token_acc": 0.8750616067028093 }, { "epoch": 0.4381913343765176, "grad_norm": 0.464140921831131, "learning_rate": 1.2462076351144657e-05, "loss": 0.40819329023361206, "step": 8121, "token_acc": 0.8573728699852409 }, { "epoch": 0.43824529218151403, "grad_norm": 0.462976336479187, "learning_rate": 1.2460382546331273e-05, "loss": 0.4490707218647003, "step": 8122, "token_acc": 0.8472745971888928 }, { "epoch": 0.43829924998651054, "grad_norm": 0.29118457436561584, "learning_rate": 1.2458688666378452e-05, "loss": 0.3339431881904602, "step": 8123, "token_acc": 0.8821329472795271 }, { "epoch": 0.43835320779150705, "grad_norm": 0.42061230540275574, "learning_rate": 1.2456994711337918e-05, "loss": 0.34237247705459595, "step": 8124, "token_acc": 0.8814476577033654 }, { "epoch": 0.43840716559650356, "grad_norm": 0.39372414350509644, "learning_rate": 1.2455300681261412e-05, "loss": 0.36210304498672485, "step": 8125, "token_acc": 0.8722971254133808 }, { "epoch": 0.4384611234015, "grad_norm": 0.45836108922958374, "learning_rate": 1.245360657620067e-05, "loss": 0.3733852505683899, "step": 8126, "token_acc": 0.8692017702016063 }, { "epoch": 0.4385150812064965, "grad_norm": 0.36493486166000366, "learning_rate": 1.245191239620742e-05, "loss": 0.3402901589870453, "step": 8127, "token_acc": 0.8801601971657425 }, { "epoch": 0.438569039011493, "grad_norm": 0.36236661672592163, "learning_rate": 1.2450218141333413e-05, "loss": 0.3535076379776001, "step": 8128, "token_acc": 0.8755641521598968 }, { "epoch": 0.43862299681648953, "grad_norm": 0.40218019485473633, "learning_rate": 1.2448523811630384e-05, "loss": 0.3654172718524933, "step": 8129, "token_acc": 0.8662323260327142 }, { "epoch": 0.438676954621486, "grad_norm": 0.440677285194397, "learning_rate": 1.2446829407150078e-05, "loss": 0.4056282043457031, "step": 8130, "token_acc": 0.8544717980679339 }, { "epoch": 0.4387309124264825, "grad_norm": 0.38811802864074707, "learning_rate": 1.2445134927944245e-05, "loss": 0.3327772617340088, "step": 8131, "token_acc": 0.8846411056620598 }, { "epoch": 0.438784870231479, "grad_norm": 0.31355753540992737, "learning_rate": 1.244344037406463e-05, "loss": 0.405558705329895, "step": 8132, "token_acc": 0.8620726610223984 }, { "epoch": 0.4388388280364755, "grad_norm": 0.47638604044914246, "learning_rate": 1.2441745745562986e-05, "loss": 0.3444637060165405, "step": 8133, "token_acc": 0.8796762589928058 }, { "epoch": 0.43889278584147196, "grad_norm": 0.45799338817596436, "learning_rate": 1.2440051042491068e-05, "loss": 0.3486329913139343, "step": 8134, "token_acc": 0.8820004133085348 }, { "epoch": 0.43894674364646846, "grad_norm": 0.40799155831336975, "learning_rate": 1.2438356264900629e-05, "loss": 0.3672860264778137, "step": 8135, "token_acc": 0.8669488962806169 }, { "epoch": 0.43900070145146497, "grad_norm": 0.3735690116882324, "learning_rate": 1.2436661412843429e-05, "loss": 0.3816075026988983, "step": 8136, "token_acc": 0.864655475994835 }, { "epoch": 0.4390546592564614, "grad_norm": 0.5311614274978638, "learning_rate": 1.243496648637123e-05, "loss": 0.37827035784721375, "step": 8137, "token_acc": 0.8725490196078431 }, { "epoch": 0.43910861706145793, "grad_norm": 0.3244081437587738, "learning_rate": 1.243327148553579e-05, "loss": 0.39038559794425964, "step": 8138, "token_acc": 0.8685196752029981 }, { "epoch": 0.43916257486645444, "grad_norm": 0.45821690559387207, "learning_rate": 1.2431576410388877e-05, "loss": 0.3749920129776001, "step": 8139, "token_acc": 0.8664525011473153 }, { "epoch": 0.43921653267145094, "grad_norm": 0.31276461482048035, "learning_rate": 1.2429881260982255e-05, "loss": 0.36552801728248596, "step": 8140, "token_acc": 0.8775800711743772 }, { "epoch": 0.4392704904764474, "grad_norm": 0.3243686556816101, "learning_rate": 1.2428186037367698e-05, "loss": 0.3739360570907593, "step": 8141, "token_acc": 0.8746961325966851 }, { "epoch": 0.4393244482814439, "grad_norm": 0.4853862524032593, "learning_rate": 1.2426490739596975e-05, "loss": 0.3387645483016968, "step": 8142, "token_acc": 0.8752312435765673 }, { "epoch": 0.4393784060864404, "grad_norm": 0.43304774165153503, "learning_rate": 1.242479536772186e-05, "loss": 0.44082945585250854, "step": 8143, "token_acc": 0.8441891891891892 }, { "epoch": 0.4394323638914369, "grad_norm": 0.3835044801235199, "learning_rate": 1.242309992179413e-05, "loss": 0.3367573022842407, "step": 8144, "token_acc": 0.878585416426679 }, { "epoch": 0.43948632169643337, "grad_norm": 0.43950262665748596, "learning_rate": 1.2421404401865565e-05, "loss": 0.39181917905807495, "step": 8145, "token_acc": 0.8630794492863458 }, { "epoch": 0.4395402795014299, "grad_norm": 0.4207136034965515, "learning_rate": 1.2419708807987943e-05, "loss": 0.3281993567943573, "step": 8146, "token_acc": 0.886237607124853 }, { "epoch": 0.4395942373064264, "grad_norm": 0.33023515343666077, "learning_rate": 1.2418013140213048e-05, "loss": 0.38212257623672485, "step": 8147, "token_acc": 0.8681066176470589 }, { "epoch": 0.4396481951114229, "grad_norm": 0.2872057557106018, "learning_rate": 1.2416317398592663e-05, "loss": 0.35438209772109985, "step": 8148, "token_acc": 0.873578302712161 }, { "epoch": 0.43970215291641934, "grad_norm": 0.34936830401420593, "learning_rate": 1.2414621583178578e-05, "loss": 0.346583753824234, "step": 8149, "token_acc": 0.8781289506953224 }, { "epoch": 0.43975611072141585, "grad_norm": 0.3702546954154968, "learning_rate": 1.2412925694022584e-05, "loss": 0.4507521092891693, "step": 8150, "token_acc": 0.8454136465883529 }, { "epoch": 0.43981006852641236, "grad_norm": 0.37612342834472656, "learning_rate": 1.241122973117647e-05, "loss": 0.3549109101295471, "step": 8151, "token_acc": 0.8737365084803838 }, { "epoch": 0.43986402633140886, "grad_norm": 0.4044942557811737, "learning_rate": 1.2409533694692036e-05, "loss": 0.40720629692077637, "step": 8152, "token_acc": 0.8604097818902842 }, { "epoch": 0.4399179841364053, "grad_norm": 0.34758928418159485, "learning_rate": 1.2407837584621069e-05, "loss": 0.3708752691745758, "step": 8153, "token_acc": 0.8740963033692538 }, { "epoch": 0.4399719419414018, "grad_norm": 0.3548547625541687, "learning_rate": 1.2406141401015374e-05, "loss": 0.3704727590084076, "step": 8154, "token_acc": 0.870515329419439 }, { "epoch": 0.44002589974639833, "grad_norm": 0.3570558428764343, "learning_rate": 1.2404445143926753e-05, "loss": 0.35230889916419983, "step": 8155, "token_acc": 0.8797044884304432 }, { "epoch": 0.4400798575513948, "grad_norm": 0.36046501994132996, "learning_rate": 1.2402748813407003e-05, "loss": 0.37045425176620483, "step": 8156, "token_acc": 0.8712938005390836 }, { "epoch": 0.4401338153563913, "grad_norm": 0.47130510210990906, "learning_rate": 1.2401052409507939e-05, "loss": 0.3719034790992737, "step": 8157, "token_acc": 0.8702679091438555 }, { "epoch": 0.4401877731613878, "grad_norm": 0.3582206964492798, "learning_rate": 1.239935593228136e-05, "loss": 0.4091809093952179, "step": 8158, "token_acc": 0.8603943171933894 }, { "epoch": 0.4402417309663843, "grad_norm": 0.3774843215942383, "learning_rate": 1.2397659381779079e-05, "loss": 0.37087124586105347, "step": 8159, "token_acc": 0.8689006466784245 }, { "epoch": 0.44029568877138076, "grad_norm": 0.3873686194419861, "learning_rate": 1.2395962758052911e-05, "loss": 0.3356204032897949, "step": 8160, "token_acc": 0.8792295019642631 }, { "epoch": 0.44034964657637726, "grad_norm": 0.3022780120372772, "learning_rate": 1.2394266061154667e-05, "loss": 0.37366557121276855, "step": 8161, "token_acc": 0.8672550432276657 }, { "epoch": 0.44040360438137377, "grad_norm": 0.47324976325035095, "learning_rate": 1.2392569291136165e-05, "loss": 0.415956974029541, "step": 8162, "token_acc": 0.8536742864522795 }, { "epoch": 0.4404575621863703, "grad_norm": 0.3665274381637573, "learning_rate": 1.2390872448049223e-05, "loss": 0.31403884291648865, "step": 8163, "token_acc": 0.8860279170619325 }, { "epoch": 0.44051151999136673, "grad_norm": 0.33138367533683777, "learning_rate": 1.2389175531945662e-05, "loss": 0.37950706481933594, "step": 8164, "token_acc": 0.867392380009896 }, { "epoch": 0.44056547779636324, "grad_norm": 0.33338600397109985, "learning_rate": 1.2387478542877307e-05, "loss": 0.33619898557662964, "step": 8165, "token_acc": 0.878429774517794 }, { "epoch": 0.44061943560135974, "grad_norm": 0.44219669699668884, "learning_rate": 1.2385781480895984e-05, "loss": 0.42986536026000977, "step": 8166, "token_acc": 0.8515797207935342 }, { "epoch": 0.44067339340635625, "grad_norm": 0.3751773238182068, "learning_rate": 1.2384084346053518e-05, "loss": 0.3397531509399414, "step": 8167, "token_acc": 0.8811867461527797 }, { "epoch": 0.4407273512113527, "grad_norm": 0.29996559023857117, "learning_rate": 1.2382387138401741e-05, "loss": 0.3929729759693146, "step": 8168, "token_acc": 0.8622559652928417 }, { "epoch": 0.4407813090163492, "grad_norm": 0.33406302332878113, "learning_rate": 1.2380689857992483e-05, "loss": 0.33223435282707214, "step": 8169, "token_acc": 0.8831769525571561 }, { "epoch": 0.4408352668213457, "grad_norm": 0.362080454826355, "learning_rate": 1.2378992504877586e-05, "loss": 0.36523550748825073, "step": 8170, "token_acc": 0.869090472762179 }, { "epoch": 0.4408892246263422, "grad_norm": 0.46280646324157715, "learning_rate": 1.2377295079108878e-05, "loss": 0.3785620927810669, "step": 8171, "token_acc": 0.8692298564139077 }, { "epoch": 0.4409431824313387, "grad_norm": 0.45351144671440125, "learning_rate": 1.2375597580738198e-05, "loss": 0.4361877739429474, "step": 8172, "token_acc": 0.8537443457865639 }, { "epoch": 0.4409971402363352, "grad_norm": 0.5119465589523315, "learning_rate": 1.2373900009817392e-05, "loss": 0.3959270417690277, "step": 8173, "token_acc": 0.8641732283464567 }, { "epoch": 0.4410510980413317, "grad_norm": 0.40352174639701843, "learning_rate": 1.2372202366398303e-05, "loss": 0.3640516698360443, "step": 8174, "token_acc": 0.86934835076428 }, { "epoch": 0.4411050558463282, "grad_norm": 0.3541419208049774, "learning_rate": 1.2370504650532773e-05, "loss": 0.4129599928855896, "step": 8175, "token_acc": 0.8562477406916496 }, { "epoch": 0.44115901365132465, "grad_norm": 0.35955098271369934, "learning_rate": 1.2368806862272655e-05, "loss": 0.35622966289520264, "step": 8176, "token_acc": 0.8722865894260221 }, { "epoch": 0.44121297145632116, "grad_norm": 0.3171727955341339, "learning_rate": 1.2367109001669795e-05, "loss": 0.34232139587402344, "step": 8177, "token_acc": 0.8775202304210671 }, { "epoch": 0.44126692926131766, "grad_norm": 0.3787786066532135, "learning_rate": 1.2365411068776045e-05, "loss": 0.35117483139038086, "step": 8178, "token_acc": 0.8773946360153256 }, { "epoch": 0.4413208870663141, "grad_norm": 0.2753162384033203, "learning_rate": 1.2363713063643261e-05, "loss": 0.28059127926826477, "step": 8179, "token_acc": 0.898870636550308 }, { "epoch": 0.4413748448713106, "grad_norm": 0.31427672505378723, "learning_rate": 1.23620149863233e-05, "loss": 0.3586341142654419, "step": 8180, "token_acc": 0.87151841868823 }, { "epoch": 0.44142880267630713, "grad_norm": 0.42130595445632935, "learning_rate": 1.236031683686802e-05, "loss": 0.39575856924057007, "step": 8181, "token_acc": 0.8635872825434913 }, { "epoch": 0.44148276048130364, "grad_norm": 0.41355592012405396, "learning_rate": 1.2358618615329282e-05, "loss": 0.3998531699180603, "step": 8182, "token_acc": 0.8601779312176338 }, { "epoch": 0.4415367182863001, "grad_norm": 0.5231595635414124, "learning_rate": 1.2356920321758948e-05, "loss": 0.3699648976325989, "step": 8183, "token_acc": 0.8682634730538922 }, { "epoch": 0.4415906760912966, "grad_norm": 0.449984610080719, "learning_rate": 1.2355221956208885e-05, "loss": 0.3698645234107971, "step": 8184, "token_acc": 0.8697568795084157 }, { "epoch": 0.4416446338962931, "grad_norm": 0.39201000332832336, "learning_rate": 1.2353523518730958e-05, "loss": 0.3635631203651428, "step": 8185, "token_acc": 0.871087467524434 }, { "epoch": 0.4416985917012896, "grad_norm": 0.4116602838039398, "learning_rate": 1.2351825009377045e-05, "loss": 0.4299108386039734, "step": 8186, "token_acc": 0.853592046183451 }, { "epoch": 0.44175254950628606, "grad_norm": 0.3457552194595337, "learning_rate": 1.2350126428199012e-05, "loss": 0.3394584655761719, "step": 8187, "token_acc": 0.8840066146940704 }, { "epoch": 0.44180650731128257, "grad_norm": 0.41277241706848145, "learning_rate": 1.234842777524873e-05, "loss": 0.3569317162036896, "step": 8188, "token_acc": 0.8773374267373709 }, { "epoch": 0.4418604651162791, "grad_norm": 0.46075183153152466, "learning_rate": 1.2346729050578081e-05, "loss": 0.4064142107963562, "step": 8189, "token_acc": 0.8595939751146038 }, { "epoch": 0.4419144229212756, "grad_norm": 0.39517897367477417, "learning_rate": 1.2345030254238941e-05, "loss": 0.36707738041877747, "step": 8190, "token_acc": 0.8674640570254923 }, { "epoch": 0.44196838072627204, "grad_norm": 0.40113645792007446, "learning_rate": 1.2343331386283188e-05, "loss": 0.3759620785713196, "step": 8191, "token_acc": 0.874572962420693 }, { "epoch": 0.44202233853126854, "grad_norm": 0.406781405210495, "learning_rate": 1.2341632446762713e-05, "loss": 0.37119024991989136, "step": 8192, "token_acc": 0.8742304309586632 }, { "epoch": 0.44207629633626505, "grad_norm": 0.399801641702652, "learning_rate": 1.2339933435729395e-05, "loss": 0.3819238841533661, "step": 8193, "token_acc": 0.8634401569653368 }, { "epoch": 0.44213025414126156, "grad_norm": 0.4045865833759308, "learning_rate": 1.2338234353235124e-05, "loss": 0.3709665834903717, "step": 8194, "token_acc": 0.8692957746478873 }, { "epoch": 0.442184211946258, "grad_norm": 0.3781452178955078, "learning_rate": 1.2336535199331788e-05, "loss": 0.390006422996521, "step": 8195, "token_acc": 0.8679139905054454 }, { "epoch": 0.4422381697512545, "grad_norm": 0.3292511999607086, "learning_rate": 1.2334835974071277e-05, "loss": 0.3737490773200989, "step": 8196, "token_acc": 0.8723233940364219 }, { "epoch": 0.442292127556251, "grad_norm": 0.5156688094139099, "learning_rate": 1.2333136677505489e-05, "loss": 0.3593441843986511, "step": 8197, "token_acc": 0.8720124696917215 }, { "epoch": 0.44234608536124753, "grad_norm": 0.36136868596076965, "learning_rate": 1.2331437309686316e-05, "loss": 0.3641652464866638, "step": 8198, "token_acc": 0.8696715912298861 }, { "epoch": 0.442400043166244, "grad_norm": 0.3503282368183136, "learning_rate": 1.232973787066566e-05, "loss": 0.2808741331100464, "step": 8199, "token_acc": 0.8955873213175886 }, { "epoch": 0.4424540009712405, "grad_norm": 0.332902729511261, "learning_rate": 1.232803836049542e-05, "loss": 0.32086074352264404, "step": 8200, "token_acc": 0.883184011026878 }, { "epoch": 0.442507958776237, "grad_norm": 0.4573851227760315, "learning_rate": 1.2326338779227495e-05, "loss": 0.4220542311668396, "step": 8201, "token_acc": 0.8577537058152793 }, { "epoch": 0.44256191658123345, "grad_norm": 0.38659700751304626, "learning_rate": 1.2324639126913798e-05, "loss": 0.3778601288795471, "step": 8202, "token_acc": 0.8620567375886525 }, { "epoch": 0.44261587438622996, "grad_norm": 0.4026467800140381, "learning_rate": 1.232293940360623e-05, "loss": 0.3538147807121277, "step": 8203, "token_acc": 0.875935628742515 }, { "epoch": 0.44266983219122646, "grad_norm": 0.41801130771636963, "learning_rate": 1.2321239609356698e-05, "loss": 0.32131004333496094, "step": 8204, "token_acc": 0.8786341463414634 }, { "epoch": 0.44272378999622297, "grad_norm": 0.46905237436294556, "learning_rate": 1.231953974421712e-05, "loss": 0.37948015332221985, "step": 8205, "token_acc": 0.8736825546641498 }, { "epoch": 0.4427777478012194, "grad_norm": 0.4466531574726105, "learning_rate": 1.2317839808239405e-05, "loss": 0.42165371775627136, "step": 8206, "token_acc": 0.8528904227782571 }, { "epoch": 0.44283170560621593, "grad_norm": 0.37554991245269775, "learning_rate": 1.2316139801475467e-05, "loss": 0.3281257152557373, "step": 8207, "token_acc": 0.8836813611755607 }, { "epoch": 0.44288566341121244, "grad_norm": 0.4047641456127167, "learning_rate": 1.2314439723977227e-05, "loss": 0.35763832926750183, "step": 8208, "token_acc": 0.8726776887676837 }, { "epoch": 0.44293962121620895, "grad_norm": 0.4919179081916809, "learning_rate": 1.2312739575796606e-05, "loss": 0.41897648572921753, "step": 8209, "token_acc": 0.8562531560343376 }, { "epoch": 0.4429935790212054, "grad_norm": 0.4295870065689087, "learning_rate": 1.2311039356985523e-05, "loss": 0.39301598072052, "step": 8210, "token_acc": 0.8653871262566915 }, { "epoch": 0.4430475368262019, "grad_norm": 0.4104096293449402, "learning_rate": 1.2309339067595905e-05, "loss": 0.3719218075275421, "step": 8211, "token_acc": 0.8719782577599771 }, { "epoch": 0.4431014946311984, "grad_norm": 0.4252706468105316, "learning_rate": 1.2307638707679676e-05, "loss": 0.3042161464691162, "step": 8212, "token_acc": 0.8867025365103767 }, { "epoch": 0.4431554524361949, "grad_norm": 0.47869113087654114, "learning_rate": 1.2305938277288765e-05, "loss": 0.32735583186149597, "step": 8213, "token_acc": 0.8832224685883222 }, { "epoch": 0.44320941024119137, "grad_norm": 0.37090516090393066, "learning_rate": 1.23042377764751e-05, "loss": 0.42278093099594116, "step": 8214, "token_acc": 0.8531652583460313 }, { "epoch": 0.4432633680461879, "grad_norm": 0.42369887232780457, "learning_rate": 1.2302537205290623e-05, "loss": 0.35883063077926636, "step": 8215, "token_acc": 0.8740933351580676 }, { "epoch": 0.4433173258511844, "grad_norm": 0.4206761419773102, "learning_rate": 1.230083656378726e-05, "loss": 0.3643069267272949, "step": 8216, "token_acc": 0.8686938493434693 }, { "epoch": 0.4433712836561809, "grad_norm": 0.3794553875923157, "learning_rate": 1.229913585201695e-05, "loss": 0.36464762687683105, "step": 8217, "token_acc": 0.8718765721952038 }, { "epoch": 0.44342524146117734, "grad_norm": 0.3490515649318695, "learning_rate": 1.2297435070031633e-05, "loss": 0.3483392298221588, "step": 8218, "token_acc": 0.8802743229106126 }, { "epoch": 0.44347919926617385, "grad_norm": 0.3980613648891449, "learning_rate": 1.2295734217883254e-05, "loss": 0.4329909086227417, "step": 8219, "token_acc": 0.8487462928012941 }, { "epoch": 0.44353315707117036, "grad_norm": 0.4172763228416443, "learning_rate": 1.2294033295623751e-05, "loss": 0.3905075490474701, "step": 8220, "token_acc": 0.8636517424156986 }, { "epoch": 0.4435871148761668, "grad_norm": 0.3720622956752777, "learning_rate": 1.2292332303305075e-05, "loss": 0.4088582992553711, "step": 8221, "token_acc": 0.8608432383942588 }, { "epoch": 0.4436410726811633, "grad_norm": 0.34131747484207153, "learning_rate": 1.2290631240979168e-05, "loss": 0.3359440565109253, "step": 8222, "token_acc": 0.8798736551179548 }, { "epoch": 0.4436950304861598, "grad_norm": 0.3938451111316681, "learning_rate": 1.228893010869798e-05, "loss": 0.38230377435684204, "step": 8223, "token_acc": 0.8621760259179265 }, { "epoch": 0.44374898829115633, "grad_norm": 0.3913465142250061, "learning_rate": 1.228722890651347e-05, "loss": 0.38799649477005005, "step": 8224, "token_acc": 0.8647540983606558 }, { "epoch": 0.4438029460961528, "grad_norm": 0.3595507740974426, "learning_rate": 1.228552763447759e-05, "loss": 0.34724318981170654, "step": 8225, "token_acc": 0.8762923779754749 }, { "epoch": 0.4438569039011493, "grad_norm": 0.37062564492225647, "learning_rate": 1.2283826292642294e-05, "loss": 0.36851876974105835, "step": 8226, "token_acc": 0.8711882229232387 }, { "epoch": 0.4439108617061458, "grad_norm": 0.3461938202381134, "learning_rate": 1.2282124881059538e-05, "loss": 0.37466564774513245, "step": 8227, "token_acc": 0.8716784765279008 }, { "epoch": 0.4439648195111423, "grad_norm": 0.38981592655181885, "learning_rate": 1.2280423399781289e-05, "loss": 0.3723266124725342, "step": 8228, "token_acc": 0.8682011234415673 }, { "epoch": 0.44401877731613876, "grad_norm": 0.5015191435813904, "learning_rate": 1.2278721848859507e-05, "loss": 0.39195501804351807, "step": 8229, "token_acc": 0.8684506408814932 }, { "epoch": 0.44407273512113526, "grad_norm": 0.435687780380249, "learning_rate": 1.2277020228346155e-05, "loss": 0.34287112951278687, "step": 8230, "token_acc": 0.8750241452578713 }, { "epoch": 0.44412669292613177, "grad_norm": 0.5228608846664429, "learning_rate": 1.2275318538293202e-05, "loss": 0.39413753151893616, "step": 8231, "token_acc": 0.8616849580374435 }, { "epoch": 0.4441806507311283, "grad_norm": 0.37751469016075134, "learning_rate": 1.2273616778752618e-05, "loss": 0.4086008667945862, "step": 8232, "token_acc": 0.8553673859601202 }, { "epoch": 0.44423460853612473, "grad_norm": 0.4123045802116394, "learning_rate": 1.2271914949776372e-05, "loss": 0.4052164554595947, "step": 8233, "token_acc": 0.8588216611084644 }, { "epoch": 0.44428856634112124, "grad_norm": 0.4290477931499481, "learning_rate": 1.2270213051416437e-05, "loss": 0.3690868020057678, "step": 8234, "token_acc": 0.8695084819486734 }, { "epoch": 0.44434252414611775, "grad_norm": 0.4319865107536316, "learning_rate": 1.2268511083724792e-05, "loss": 0.3891177475452423, "step": 8235, "token_acc": 0.8659675334003735 }, { "epoch": 0.44439648195111425, "grad_norm": 0.4337214231491089, "learning_rate": 1.2266809046753414e-05, "loss": 0.40102696418762207, "step": 8236, "token_acc": 0.8637145986543577 }, { "epoch": 0.4444504397561107, "grad_norm": 0.3885129690170288, "learning_rate": 1.226510694055428e-05, "loss": 0.3443836271762848, "step": 8237, "token_acc": 0.88 }, { "epoch": 0.4445043975611072, "grad_norm": 0.4495203197002411, "learning_rate": 1.2263404765179376e-05, "loss": 0.3959345817565918, "step": 8238, "token_acc": 0.8637957637817366 }, { "epoch": 0.4445583553661037, "grad_norm": 0.42954352498054504, "learning_rate": 1.226170252068068e-05, "loss": 0.3675130009651184, "step": 8239, "token_acc": 0.8702591062711228 }, { "epoch": 0.4446123131711002, "grad_norm": 0.4006570875644684, "learning_rate": 1.2260000207110184e-05, "loss": 0.35507452487945557, "step": 8240, "token_acc": 0.8769572185920355 }, { "epoch": 0.4446662709760967, "grad_norm": 0.4839516878128052, "learning_rate": 1.2258297824519871e-05, "loss": 0.40232133865356445, "step": 8241, "token_acc": 0.8602941176470589 }, { "epoch": 0.4447202287810932, "grad_norm": 0.37701117992401123, "learning_rate": 1.2256595372961734e-05, "loss": 0.374336302280426, "step": 8242, "token_acc": 0.8682526432943796 }, { "epoch": 0.4447741865860897, "grad_norm": 0.3825969994068146, "learning_rate": 1.2254892852487768e-05, "loss": 0.39821523427963257, "step": 8243, "token_acc": 0.8633906633906634 }, { "epoch": 0.44482814439108614, "grad_norm": 0.4999568462371826, "learning_rate": 1.2253190263149963e-05, "loss": 0.3745901584625244, "step": 8244, "token_acc": 0.8710531291359426 }, { "epoch": 0.44488210219608265, "grad_norm": 0.3382711410522461, "learning_rate": 1.2251487605000319e-05, "loss": 0.3575935959815979, "step": 8245, "token_acc": 0.8733195449844882 }, { "epoch": 0.44493606000107916, "grad_norm": 0.1970025599002838, "learning_rate": 1.2249784878090834e-05, "loss": 0.28425848484039307, "step": 8246, "token_acc": 0.8943962728096285 }, { "epoch": 0.44499001780607567, "grad_norm": 0.33173301815986633, "learning_rate": 1.2248082082473507e-05, "loss": 0.3640691041946411, "step": 8247, "token_acc": 0.869793242561775 }, { "epoch": 0.4450439756110721, "grad_norm": 0.4222557246685028, "learning_rate": 1.2246379218200342e-05, "loss": 0.4098418354988098, "step": 8248, "token_acc": 0.8560130262851826 }, { "epoch": 0.4450979334160686, "grad_norm": 0.4095853567123413, "learning_rate": 1.2244676285323342e-05, "loss": 0.34895092248916626, "step": 8249, "token_acc": 0.8840371081253998 }, { "epoch": 0.44515189122106513, "grad_norm": 0.4545498788356781, "learning_rate": 1.224297328389452e-05, "loss": 0.4093977212905884, "step": 8250, "token_acc": 0.8612725844461901 }, { "epoch": 0.44520584902606164, "grad_norm": 0.41391950845718384, "learning_rate": 1.2241270213965881e-05, "loss": 0.38391321897506714, "step": 8251, "token_acc": 0.8665300371461508 }, { "epoch": 0.4452598068310581, "grad_norm": 0.2729511559009552, "learning_rate": 1.2239567075589436e-05, "loss": 0.3890168368816376, "step": 8252, "token_acc": 0.8680326998619811 }, { "epoch": 0.4453137646360546, "grad_norm": 0.4036422073841095, "learning_rate": 1.2237863868817199e-05, "loss": 0.3262088894844055, "step": 8253, "token_acc": 0.8797337981302488 }, { "epoch": 0.4453677224410511, "grad_norm": 0.5128771066665649, "learning_rate": 1.2236160593701187e-05, "loss": 0.40602603554725647, "step": 8254, "token_acc": 0.8602811950790861 }, { "epoch": 0.4454216802460476, "grad_norm": 0.46170884370803833, "learning_rate": 1.2234457250293415e-05, "loss": 0.4334868788719177, "step": 8255, "token_acc": 0.8550564893910169 }, { "epoch": 0.44547563805104406, "grad_norm": 0.40651601552963257, "learning_rate": 1.22327538386459e-05, "loss": 0.36257368326187134, "step": 8256, "token_acc": 0.8734193716594968 }, { "epoch": 0.44552959585604057, "grad_norm": 0.3606221377849579, "learning_rate": 1.2231050358810675e-05, "loss": 0.3471563756465912, "step": 8257, "token_acc": 0.8744095129499918 }, { "epoch": 0.4455835536610371, "grad_norm": 0.4245343804359436, "learning_rate": 1.2229346810839752e-05, "loss": 0.40241414308547974, "step": 8258, "token_acc": 0.8640611724723875 }, { "epoch": 0.4456375114660336, "grad_norm": 0.39561113715171814, "learning_rate": 1.222764319478516e-05, "loss": 0.3115352690219879, "step": 8259, "token_acc": 0.886842515998127 }, { "epoch": 0.44569146927103004, "grad_norm": 0.3477323055267334, "learning_rate": 1.2225939510698931e-05, "loss": 0.3322259187698364, "step": 8260, "token_acc": 0.8816187097621863 }, { "epoch": 0.44574542707602655, "grad_norm": 0.43344277143478394, "learning_rate": 1.2224235758633094e-05, "loss": 0.3710057735443115, "step": 8261, "token_acc": 0.8699673558215452 }, { "epoch": 0.44579938488102305, "grad_norm": 0.48285242915153503, "learning_rate": 1.222253193863968e-05, "loss": 0.41232597827911377, "step": 8262, "token_acc": 0.8573244808588784 }, { "epoch": 0.44585334268601956, "grad_norm": 0.3923604488372803, "learning_rate": 1.2220828050770721e-05, "loss": 0.34174633026123047, "step": 8263, "token_acc": 0.8777340035321288 }, { "epoch": 0.445907300491016, "grad_norm": 0.4123738408088684, "learning_rate": 1.2219124095078254e-05, "loss": 0.45976752042770386, "step": 8264, "token_acc": 0.8388911209321013 }, { "epoch": 0.4459612582960125, "grad_norm": 0.3696421980857849, "learning_rate": 1.2217420071614319e-05, "loss": 0.36254796385765076, "step": 8265, "token_acc": 0.8702061048786851 }, { "epoch": 0.446015216101009, "grad_norm": 0.4069390594959259, "learning_rate": 1.2215715980430955e-05, "loss": 0.38245150446891785, "step": 8266, "token_acc": 0.868259982512387 }, { "epoch": 0.4460691739060055, "grad_norm": 0.44680318236351013, "learning_rate": 1.2214011821580207e-05, "loss": 0.36858701705932617, "step": 8267, "token_acc": 0.8678199529727914 }, { "epoch": 0.446123131711002, "grad_norm": 0.4002804756164551, "learning_rate": 1.221230759511412e-05, "loss": 0.4598425626754761, "step": 8268, "token_acc": 0.8453998954521693 }, { "epoch": 0.4461770895159985, "grad_norm": 0.5206528902053833, "learning_rate": 1.2210603301084735e-05, "loss": 0.3964742422103882, "step": 8269, "token_acc": 0.8592206859400251 }, { "epoch": 0.446231047320995, "grad_norm": 0.41597670316696167, "learning_rate": 1.2208898939544107e-05, "loss": 0.38021552562713623, "step": 8270, "token_acc": 0.8623751387347391 }, { "epoch": 0.44628500512599145, "grad_norm": 0.4246120750904083, "learning_rate": 1.2207194510544282e-05, "loss": 0.33109986782073975, "step": 8271, "token_acc": 0.8841187321817877 }, { "epoch": 0.44633896293098796, "grad_norm": 0.337838351726532, "learning_rate": 1.2205490014137313e-05, "loss": 0.41566717624664307, "step": 8272, "token_acc": 0.8612395929694727 }, { "epoch": 0.44639292073598447, "grad_norm": 0.4061387777328491, "learning_rate": 1.220378545037526e-05, "loss": 0.3259962201118469, "step": 8273, "token_acc": 0.8816371681415929 }, { "epoch": 0.446446878540981, "grad_norm": 0.42719095945358276, "learning_rate": 1.2202080819310175e-05, "loss": 0.38122689723968506, "step": 8274, "token_acc": 0.8675174718930416 }, { "epoch": 0.4465008363459774, "grad_norm": 0.45243921875953674, "learning_rate": 1.220037612099412e-05, "loss": 0.2680150866508484, "step": 8275, "token_acc": 0.8989740914623544 }, { "epoch": 0.44655479415097393, "grad_norm": 0.4458816349506378, "learning_rate": 1.2198671355479155e-05, "loss": 0.4497729539871216, "step": 8276, "token_acc": 0.8504579278763594 }, { "epoch": 0.44660875195597044, "grad_norm": 0.38831931352615356, "learning_rate": 1.2196966522817342e-05, "loss": 0.3318363428115845, "step": 8277, "token_acc": 0.8806262230919765 }, { "epoch": 0.44666270976096695, "grad_norm": 0.42498499155044556, "learning_rate": 1.2195261623060746e-05, "loss": 0.41516441106796265, "step": 8278, "token_acc": 0.8546000924641701 }, { "epoch": 0.4467166675659634, "grad_norm": 0.4677489697933197, "learning_rate": 1.2193556656261437e-05, "loss": 0.2863805294036865, "step": 8279, "token_acc": 0.899070385126162 }, { "epoch": 0.4467706253709599, "grad_norm": 0.4434506893157959, "learning_rate": 1.2191851622471482e-05, "loss": 0.3128817081451416, "step": 8280, "token_acc": 0.8875559820857326 }, { "epoch": 0.4468245831759564, "grad_norm": 0.3371787667274475, "learning_rate": 1.2190146521742949e-05, "loss": 0.3338826298713684, "step": 8281, "token_acc": 0.8789367880276971 }, { "epoch": 0.4468785409809529, "grad_norm": 0.4002407491207123, "learning_rate": 1.218844135412792e-05, "loss": 0.3914150595664978, "step": 8282, "token_acc": 0.8655521963364752 }, { "epoch": 0.4469324987859494, "grad_norm": 0.34523651003837585, "learning_rate": 1.2186736119678462e-05, "loss": 0.3353975713253021, "step": 8283, "token_acc": 0.8830981220135571 }, { "epoch": 0.4469864565909459, "grad_norm": 0.344303160905838, "learning_rate": 1.2185030818446659e-05, "loss": 0.37120383977890015, "step": 8284, "token_acc": 0.8715948489351164 }, { "epoch": 0.4470404143959424, "grad_norm": 0.3581590950489044, "learning_rate": 1.2183325450484582e-05, "loss": 0.3009093999862671, "step": 8285, "token_acc": 0.8920368587539541 }, { "epoch": 0.44709437220093884, "grad_norm": 0.37408629059791565, "learning_rate": 1.2181620015844322e-05, "loss": 0.3524143695831299, "step": 8286, "token_acc": 0.8796204503611387 }, { "epoch": 0.44714833000593535, "grad_norm": 0.40555089712142944, "learning_rate": 1.217991451457796e-05, "loss": 0.43665510416030884, "step": 8287, "token_acc": 0.8497725980670836 }, { "epoch": 0.44720228781093185, "grad_norm": 0.4111395478248596, "learning_rate": 1.2178208946737575e-05, "loss": 0.3675435185432434, "step": 8288, "token_acc": 0.8737878561837983 }, { "epoch": 0.44725624561592836, "grad_norm": 0.3561992943286896, "learning_rate": 1.217650331237526e-05, "loss": 0.37909212708473206, "step": 8289, "token_acc": 0.8723464881037185 }, { "epoch": 0.4473102034209248, "grad_norm": 0.36046990752220154, "learning_rate": 1.2174797611543108e-05, "loss": 0.3512113094329834, "step": 8290, "token_acc": 0.8809330078654732 }, { "epoch": 0.4473641612259213, "grad_norm": 0.37937843799591064, "learning_rate": 1.2173091844293201e-05, "loss": 0.366748183965683, "step": 8291, "token_acc": 0.8712121212121212 }, { "epoch": 0.4474181190309178, "grad_norm": 0.4614518880844116, "learning_rate": 1.2171386010677642e-05, "loss": 0.4188869595527649, "step": 8292, "token_acc": 0.8567662565905096 }, { "epoch": 0.44747207683591433, "grad_norm": 0.4427312910556793, "learning_rate": 1.2169680110748523e-05, "loss": 0.375040203332901, "step": 8293, "token_acc": 0.8682552185899961 }, { "epoch": 0.4475260346409108, "grad_norm": 0.3763023018836975, "learning_rate": 1.2167974144557941e-05, "loss": 0.36990809440612793, "step": 8294, "token_acc": 0.8679658671586716 }, { "epoch": 0.4475799924459073, "grad_norm": 0.3852117955684662, "learning_rate": 1.2166268112157997e-05, "loss": 0.3136378228664398, "step": 8295, "token_acc": 0.8897614013893084 }, { "epoch": 0.4476339502509038, "grad_norm": 0.33134767413139343, "learning_rate": 1.2164562013600794e-05, "loss": 0.399303138256073, "step": 8296, "token_acc": 0.8629931972789116 }, { "epoch": 0.4476879080559003, "grad_norm": 0.4438377320766449, "learning_rate": 1.2162855848938428e-05, "loss": 0.35961875319480896, "step": 8297, "token_acc": 0.8712018140589569 }, { "epoch": 0.44774186586089676, "grad_norm": 0.3642232418060303, "learning_rate": 1.2161149618223016e-05, "loss": 0.33538150787353516, "step": 8298, "token_acc": 0.8811482289743159 }, { "epoch": 0.44779582366589327, "grad_norm": 0.30697497725486755, "learning_rate": 1.2159443321506661e-05, "loss": 0.32008251547813416, "step": 8299, "token_acc": 0.8856700232378002 }, { "epoch": 0.4478497814708898, "grad_norm": 0.35730668902397156, "learning_rate": 1.2157736958841473e-05, "loss": 0.3902248442173004, "step": 8300, "token_acc": 0.8651439679261329 }, { "epoch": 0.4479037392758863, "grad_norm": 0.3783804178237915, "learning_rate": 1.215603053027956e-05, "loss": 0.37976133823394775, "step": 8301, "token_acc": 0.8653801441525226 }, { "epoch": 0.44795769708088273, "grad_norm": 0.3599943220615387, "learning_rate": 1.2154324035873042e-05, "loss": 0.3275832533836365, "step": 8302, "token_acc": 0.8857657158681805 }, { "epoch": 0.44801165488587924, "grad_norm": 0.38087743520736694, "learning_rate": 1.2152617475674031e-05, "loss": 0.3997288644313812, "step": 8303, "token_acc": 0.8654850233580654 }, { "epoch": 0.44806561269087575, "grad_norm": 0.37338265776634216, "learning_rate": 1.2150910849734647e-05, "loss": 0.37716352939605713, "step": 8304, "token_acc": 0.8674550760251881 }, { "epoch": 0.44811957049587225, "grad_norm": 0.3867999017238617, "learning_rate": 1.214920415810701e-05, "loss": 0.3640006482601166, "step": 8305, "token_acc": 0.8738143834794093 }, { "epoch": 0.4481735283008687, "grad_norm": 0.35707756876945496, "learning_rate": 1.214749740084324e-05, "loss": 0.39628392457962036, "step": 8306, "token_acc": 0.8593161402492031 }, { "epoch": 0.4482274861058652, "grad_norm": 0.32645460963249207, "learning_rate": 1.2145790577995462e-05, "loss": 0.3319931924343109, "step": 8307, "token_acc": 0.8776227741862311 }, { "epoch": 0.4482814439108617, "grad_norm": 0.4381502866744995, "learning_rate": 1.2144083689615802e-05, "loss": 0.38837742805480957, "step": 8308, "token_acc": 0.8675280420275451 }, { "epoch": 0.4483354017158582, "grad_norm": 0.3511645793914795, "learning_rate": 1.2142376735756388e-05, "loss": 0.394448846578598, "step": 8309, "token_acc": 0.8638812054156355 }, { "epoch": 0.4483893595208547, "grad_norm": 0.3428378999233246, "learning_rate": 1.2140669716469348e-05, "loss": 0.41548043489456177, "step": 8310, "token_acc": 0.8599412340842312 }, { "epoch": 0.4484433173258512, "grad_norm": 0.43409261107444763, "learning_rate": 1.2138962631806818e-05, "loss": 0.3969040513038635, "step": 8311, "token_acc": 0.8655256723716381 }, { "epoch": 0.4484972751308477, "grad_norm": 0.2864575982093811, "learning_rate": 1.2137255481820931e-05, "loss": 0.3861933946609497, "step": 8312, "token_acc": 0.8681329423264907 }, { "epoch": 0.44855123293584415, "grad_norm": 0.4156210422515869, "learning_rate": 1.2135548266563819e-05, "loss": 0.37679365277290344, "step": 8313, "token_acc": 0.8668749070355496 }, { "epoch": 0.44860519074084065, "grad_norm": 0.35592544078826904, "learning_rate": 1.2133840986087622e-05, "loss": 0.3390626311302185, "step": 8314, "token_acc": 0.877916440586001 }, { "epoch": 0.44865914854583716, "grad_norm": 0.47673770785331726, "learning_rate": 1.2132133640444482e-05, "loss": 0.4289172291755676, "step": 8315, "token_acc": 0.854287458040891 }, { "epoch": 0.44871310635083367, "grad_norm": 0.44112429022789, "learning_rate": 1.2130426229686536e-05, "loss": 0.41909241676330566, "step": 8316, "token_acc": 0.8510948905109489 }, { "epoch": 0.4487670641558301, "grad_norm": 0.3600989282131195, "learning_rate": 1.212871875386593e-05, "loss": 0.40636497735977173, "step": 8317, "token_acc": 0.8627365356622999 }, { "epoch": 0.4488210219608266, "grad_norm": 0.38209450244903564, "learning_rate": 1.2127011213034816e-05, "loss": 0.31378668546676636, "step": 8318, "token_acc": 0.8839253020871476 }, { "epoch": 0.44887497976582313, "grad_norm": 0.42146536707878113, "learning_rate": 1.2125303607245335e-05, "loss": 0.3799072504043579, "step": 8319, "token_acc": 0.8660005124263387 }, { "epoch": 0.44892893757081964, "grad_norm": 0.32015758752822876, "learning_rate": 1.2123595936549639e-05, "loss": 0.35061126947402954, "step": 8320, "token_acc": 0.8754499640028798 }, { "epoch": 0.4489828953758161, "grad_norm": 0.3582799434661865, "learning_rate": 1.2121888200999879e-05, "loss": 0.413082480430603, "step": 8321, "token_acc": 0.8585069444444444 }, { "epoch": 0.4490368531808126, "grad_norm": 0.4379570186138153, "learning_rate": 1.2120180400648209e-05, "loss": 0.37670618295669556, "step": 8322, "token_acc": 0.8644782688428388 }, { "epoch": 0.4490908109858091, "grad_norm": 0.41258662939071655, "learning_rate": 1.2118472535546784e-05, "loss": 0.4123394191265106, "step": 8323, "token_acc": 0.8567131095630146 }, { "epoch": 0.4491447687908056, "grad_norm": 0.35326600074768066, "learning_rate": 1.2116764605747764e-05, "loss": 0.40510714054107666, "step": 8324, "token_acc": 0.8589218417171882 }, { "epoch": 0.44919872659580207, "grad_norm": 0.4083879292011261, "learning_rate": 1.2115056611303307e-05, "loss": 0.3945207893848419, "step": 8325, "token_acc": 0.8586036108004473 }, { "epoch": 0.4492526844007986, "grad_norm": 0.4154914915561676, "learning_rate": 1.2113348552265574e-05, "loss": 0.38695043325424194, "step": 8326, "token_acc": 0.8639162561576355 }, { "epoch": 0.4493066422057951, "grad_norm": 0.3975192606449127, "learning_rate": 1.2111640428686731e-05, "loss": 0.3750128448009491, "step": 8327, "token_acc": 0.8718768502072232 }, { "epoch": 0.44936060001079153, "grad_norm": 0.4444561004638672, "learning_rate": 1.2109932240618945e-05, "loss": 0.3921002149581909, "step": 8328, "token_acc": 0.8632421614409607 }, { "epoch": 0.44941455781578804, "grad_norm": 0.4854384660720825, "learning_rate": 1.2108223988114379e-05, "loss": 0.37927186489105225, "step": 8329, "token_acc": 0.8733831560793331 }, { "epoch": 0.44946851562078455, "grad_norm": 0.423097163438797, "learning_rate": 1.2106515671225202e-05, "loss": 0.2929162383079529, "step": 8330, "token_acc": 0.8987105794624826 }, { "epoch": 0.44952247342578106, "grad_norm": 0.33863919973373413, "learning_rate": 1.210480729000359e-05, "loss": 0.3542560338973999, "step": 8331, "token_acc": 0.8758013691187656 }, { "epoch": 0.4495764312307775, "grad_norm": 0.375853568315506, "learning_rate": 1.2103098844501717e-05, "loss": 0.3652917742729187, "step": 8332, "token_acc": 0.8754953342707401 }, { "epoch": 0.449630389035774, "grad_norm": 0.3912492096424103, "learning_rate": 1.2101390334771753e-05, "loss": 0.4115843176841736, "step": 8333, "token_acc": 0.859504132231405 }, { "epoch": 0.4496843468407705, "grad_norm": 0.4162255823612213, "learning_rate": 1.209968176086588e-05, "loss": 0.32848799228668213, "step": 8334, "token_acc": 0.880421398276098 }, { "epoch": 0.44973830464576703, "grad_norm": 0.43050453066825867, "learning_rate": 1.2097973122836277e-05, "loss": 0.39042502641677856, "step": 8335, "token_acc": 0.8653710247349823 }, { "epoch": 0.4497922624507635, "grad_norm": 0.3292248547077179, "learning_rate": 1.2096264420735124e-05, "loss": 0.47863277792930603, "step": 8336, "token_acc": 0.838118344790403 }, { "epoch": 0.44984622025576, "grad_norm": 0.4151972532272339, "learning_rate": 1.2094555654614605e-05, "loss": 0.4171442985534668, "step": 8337, "token_acc": 0.8621976503109883 }, { "epoch": 0.4499001780607565, "grad_norm": 0.31242600083351135, "learning_rate": 1.2092846824526905e-05, "loss": 0.3966456651687622, "step": 8338, "token_acc": 0.86458208057727 }, { "epoch": 0.449954135865753, "grad_norm": 0.43745332956314087, "learning_rate": 1.2091137930524209e-05, "loss": 0.4180747866630554, "step": 8339, "token_acc": 0.8551493162154619 }, { "epoch": 0.45000809367074945, "grad_norm": 0.33882758021354675, "learning_rate": 1.208942897265871e-05, "loss": 0.3491560220718384, "step": 8340, "token_acc": 0.8751446424438787 }, { "epoch": 0.45006205147574596, "grad_norm": 0.42322230339050293, "learning_rate": 1.2087719950982599e-05, "loss": 0.36819368600845337, "step": 8341, "token_acc": 0.8685625108375239 }, { "epoch": 0.45011600928074247, "grad_norm": 0.38644614815711975, "learning_rate": 1.2086010865548066e-05, "loss": 0.3987053632736206, "step": 8342, "token_acc": 0.8579434015130288 }, { "epoch": 0.450169967085739, "grad_norm": 0.331240713596344, "learning_rate": 1.2084301716407305e-05, "loss": 0.3117378056049347, "step": 8343, "token_acc": 0.8875432525951558 }, { "epoch": 0.4502239248907354, "grad_norm": 0.4410611689090729, "learning_rate": 1.208259250361252e-05, "loss": 0.30473899841308594, "step": 8344, "token_acc": 0.8876456161863887 }, { "epoch": 0.45027788269573193, "grad_norm": 0.29556459188461304, "learning_rate": 1.2080883227215905e-05, "loss": 0.3663504123687744, "step": 8345, "token_acc": 0.8688568735694089 }, { "epoch": 0.45033184050072844, "grad_norm": 0.37574490904808044, "learning_rate": 1.2079173887269659e-05, "loss": 0.37581899762153625, "step": 8346, "token_acc": 0.8742138364779874 }, { "epoch": 0.45038579830572495, "grad_norm": 0.4279667139053345, "learning_rate": 1.207746448382599e-05, "loss": 0.36102643609046936, "step": 8347, "token_acc": 0.875844467797628 }, { "epoch": 0.4504397561107214, "grad_norm": 0.34994789958000183, "learning_rate": 1.2075755016937099e-05, "loss": 0.3586450219154358, "step": 8348, "token_acc": 0.8767155152361015 }, { "epoch": 0.4504937139157179, "grad_norm": 0.361261248588562, "learning_rate": 1.207404548665519e-05, "loss": 0.35628387331962585, "step": 8349, "token_acc": 0.8734981356166276 }, { "epoch": 0.4505476717207144, "grad_norm": 0.32612910866737366, "learning_rate": 1.2072335893032478e-05, "loss": 0.36895322799682617, "step": 8350, "token_acc": 0.8714572360170554 }, { "epoch": 0.45060162952571087, "grad_norm": 0.3126155138015747, "learning_rate": 1.2070626236121173e-05, "loss": 0.3573939800262451, "step": 8351, "token_acc": 0.8795961610370185 }, { "epoch": 0.4506555873307074, "grad_norm": 0.3868550658226013, "learning_rate": 1.2068916515973482e-05, "loss": 0.38514620065689087, "step": 8352, "token_acc": 0.8594249201277955 }, { "epoch": 0.4507095451357039, "grad_norm": 0.42466622591018677, "learning_rate": 1.2067206732641624e-05, "loss": 0.3776070773601532, "step": 8353, "token_acc": 0.8697859495060373 }, { "epoch": 0.4507635029407004, "grad_norm": 0.476002037525177, "learning_rate": 1.2065496886177815e-05, "loss": 0.3671991229057312, "step": 8354, "token_acc": 0.8732595501606569 }, { "epoch": 0.45081746074569684, "grad_norm": 0.3903146982192993, "learning_rate": 1.2063786976634267e-05, "loss": 0.3914870321750641, "step": 8355, "token_acc": 0.8643619625893644 }, { "epoch": 0.45087141855069335, "grad_norm": 0.384975790977478, "learning_rate": 1.206207700406321e-05, "loss": 0.33426329493522644, "step": 8356, "token_acc": 0.8830409356725146 }, { "epoch": 0.45092537635568986, "grad_norm": 0.4050632119178772, "learning_rate": 1.2060366968516861e-05, "loss": 0.3654409348964691, "step": 8357, "token_acc": 0.8746499533271103 }, { "epoch": 0.45097933416068636, "grad_norm": 0.37634357810020447, "learning_rate": 1.2058656870047446e-05, "loss": 0.39794376492500305, "step": 8358, "token_acc": 0.8621597892888498 }, { "epoch": 0.4510332919656828, "grad_norm": 0.42806875705718994, "learning_rate": 1.2056946708707184e-05, "loss": 0.41809573769569397, "step": 8359, "token_acc": 0.8579234972677595 }, { "epoch": 0.4510872497706793, "grad_norm": 0.4352591931819916, "learning_rate": 1.2055236484548312e-05, "loss": 0.3483065366744995, "step": 8360, "token_acc": 0.873825979089137 }, { "epoch": 0.45114120757567583, "grad_norm": 0.5068300366401672, "learning_rate": 1.2053526197623056e-05, "loss": 0.35449954867362976, "step": 8361, "token_acc": 0.8755055389484789 }, { "epoch": 0.45119516538067234, "grad_norm": 0.3884439468383789, "learning_rate": 1.2051815847983648e-05, "loss": 0.37026646733283997, "step": 8362, "token_acc": 0.8709638842382206 }, { "epoch": 0.4512491231856688, "grad_norm": 0.3292132019996643, "learning_rate": 1.2050105435682322e-05, "loss": 0.4002138376235962, "step": 8363, "token_acc": 0.858139534883721 }, { "epoch": 0.4513030809906653, "grad_norm": 0.4611673653125763, "learning_rate": 1.2048394960771312e-05, "loss": 0.3748592138290405, "step": 8364, "token_acc": 0.8746360678198322 }, { "epoch": 0.4513570387956618, "grad_norm": 0.44895192980766296, "learning_rate": 1.2046684423302858e-05, "loss": 0.3750231862068176, "step": 8365, "token_acc": 0.8687545257060101 }, { "epoch": 0.4514109966006583, "grad_norm": 0.35415297746658325, "learning_rate": 1.2044973823329196e-05, "loss": 0.35157039761543274, "step": 8366, "token_acc": 0.8835758835758836 }, { "epoch": 0.45146495440565476, "grad_norm": 0.4253879487514496, "learning_rate": 1.204326316090257e-05, "loss": 0.3712023198604584, "step": 8367, "token_acc": 0.8648770924445784 }, { "epoch": 0.45151891221065127, "grad_norm": 0.4065883755683899, "learning_rate": 1.2041552436075221e-05, "loss": 0.3243149518966675, "step": 8368, "token_acc": 0.8864289821736631 }, { "epoch": 0.4515728700156478, "grad_norm": 0.31362226605415344, "learning_rate": 1.20398416488994e-05, "loss": 0.3669206202030182, "step": 8369, "token_acc": 0.8695175438596491 }, { "epoch": 0.4516268278206443, "grad_norm": 0.38192301988601685, "learning_rate": 1.2038130799427345e-05, "loss": 0.36583688855171204, "step": 8370, "token_acc": 0.8734357674639632 }, { "epoch": 0.45168078562564073, "grad_norm": 0.37514355778694153, "learning_rate": 1.203641988771131e-05, "loss": 0.3882979154586792, "step": 8371, "token_acc": 0.8685815463309069 }, { "epoch": 0.45173474343063724, "grad_norm": 0.3584350049495697, "learning_rate": 1.2034708913803545e-05, "loss": 0.4187609553337097, "step": 8372, "token_acc": 0.8559268600252207 }, { "epoch": 0.45178870123563375, "grad_norm": 0.3015931248664856, "learning_rate": 1.2032997877756303e-05, "loss": 0.34596920013427734, "step": 8373, "token_acc": 0.879083624409287 }, { "epoch": 0.4518426590406302, "grad_norm": 0.3549371063709259, "learning_rate": 1.203128677962184e-05, "loss": 0.40246838331222534, "step": 8374, "token_acc": 0.8620689655172413 }, { "epoch": 0.4518966168456267, "grad_norm": 0.4277687072753906, "learning_rate": 1.2029575619452407e-05, "loss": 0.40459293127059937, "step": 8375, "token_acc": 0.8647493837304848 }, { "epoch": 0.4519505746506232, "grad_norm": 0.3424149751663208, "learning_rate": 1.2027864397300269e-05, "loss": 0.400734543800354, "step": 8376, "token_acc": 0.863387210119466 }, { "epoch": 0.4520045324556197, "grad_norm": 0.3447425365447998, "learning_rate": 1.2026153113217685e-05, "loss": 0.3337424397468567, "step": 8377, "token_acc": 0.8817540048873201 }, { "epoch": 0.4520584902606162, "grad_norm": 0.38589364290237427, "learning_rate": 1.2024441767256914e-05, "loss": 0.38955074548721313, "step": 8378, "token_acc": 0.8665184838558727 }, { "epoch": 0.4521124480656127, "grad_norm": 0.4547552466392517, "learning_rate": 1.2022730359470223e-05, "loss": 0.35733562707901, "step": 8379, "token_acc": 0.8724215880192144 }, { "epoch": 0.4521664058706092, "grad_norm": 0.34644922614097595, "learning_rate": 1.2021018889909875e-05, "loss": 0.35028183460235596, "step": 8380, "token_acc": 0.8735083532219571 }, { "epoch": 0.4522203636756057, "grad_norm": 0.4239576458930969, "learning_rate": 1.2019307358628139e-05, "loss": 0.40348607301712036, "step": 8381, "token_acc": 0.8599559528436326 }, { "epoch": 0.45227432148060215, "grad_norm": 0.3936692476272583, "learning_rate": 1.2017595765677286e-05, "loss": 0.3986489176750183, "step": 8382, "token_acc": 0.8653757491931766 }, { "epoch": 0.45232827928559866, "grad_norm": 0.43406838178634644, "learning_rate": 1.2015884111109588e-05, "loss": 0.4162482023239136, "step": 8383, "token_acc": 0.8567120974550718 }, { "epoch": 0.45238223709059516, "grad_norm": 0.419048011302948, "learning_rate": 1.2014172394977316e-05, "loss": 0.31573447585105896, "step": 8384, "token_acc": 0.8846398980242193 }, { "epoch": 0.45243619489559167, "grad_norm": 0.40293940901756287, "learning_rate": 1.2012460617332746e-05, "loss": 0.4147135019302368, "step": 8385, "token_acc": 0.8566008258006919 }, { "epoch": 0.4524901527005881, "grad_norm": 0.4709862768650055, "learning_rate": 1.2010748778228159e-05, "loss": 0.45770660042762756, "step": 8386, "token_acc": 0.8376511226252159 }, { "epoch": 0.45254411050558463, "grad_norm": 0.41911396384239197, "learning_rate": 1.200903687771583e-05, "loss": 0.399189829826355, "step": 8387, "token_acc": 0.8615844544095665 }, { "epoch": 0.45259806831058114, "grad_norm": 0.32969099283218384, "learning_rate": 1.2007324915848038e-05, "loss": 0.3830372989177704, "step": 8388, "token_acc": 0.8651604547416895 }, { "epoch": 0.45265202611557764, "grad_norm": 0.31544262170791626, "learning_rate": 1.2005612892677073e-05, "loss": 0.3257478177547455, "step": 8389, "token_acc": 0.8800874537504204 }, { "epoch": 0.4527059839205741, "grad_norm": 0.3883407711982727, "learning_rate": 1.2003900808255213e-05, "loss": 0.42078831791877747, "step": 8390, "token_acc": 0.8561099060014461 }, { "epoch": 0.4527599417255706, "grad_norm": 0.5225081443786621, "learning_rate": 1.2002188662634746e-05, "loss": 0.43910032510757446, "step": 8391, "token_acc": 0.84669882423877 }, { "epoch": 0.4528138995305671, "grad_norm": 0.3598272204399109, "learning_rate": 1.2000476455867964e-05, "loss": 0.368308961391449, "step": 8392, "token_acc": 0.8617458490876212 }, { "epoch": 0.45286785733556356, "grad_norm": 0.41675063967704773, "learning_rate": 1.1998764188007158e-05, "loss": 0.41096097230911255, "step": 8393, "token_acc": 0.8571055039874493 }, { "epoch": 0.45292181514056007, "grad_norm": 0.44123557209968567, "learning_rate": 1.1997051859104612e-05, "loss": 0.42447084188461304, "step": 8394, "token_acc": 0.85431654676259 }, { "epoch": 0.4529757729455566, "grad_norm": 0.3579249680042267, "learning_rate": 1.1995339469212627e-05, "loss": 0.36550208926200867, "step": 8395, "token_acc": 0.8724044538068011 }, { "epoch": 0.4530297307505531, "grad_norm": 0.4543676972389221, "learning_rate": 1.19936270183835e-05, "loss": 0.35614046454429626, "step": 8396, "token_acc": 0.8687017889381257 }, { "epoch": 0.45308368855554954, "grad_norm": 0.4205949902534485, "learning_rate": 1.1991914506669521e-05, "loss": 0.3494873046875, "step": 8397, "token_acc": 0.873921085080148 }, { "epoch": 0.45313764636054604, "grad_norm": 0.47766292095184326, "learning_rate": 1.1990201934123e-05, "loss": 0.36306965351104736, "step": 8398, "token_acc": 0.8684063373718546 }, { "epoch": 0.45319160416554255, "grad_norm": 0.4863712787628174, "learning_rate": 1.1988489300796232e-05, "loss": 0.42755913734436035, "step": 8399, "token_acc": 0.8532312674812307 }, { "epoch": 0.45324556197053906, "grad_norm": 0.37960219383239746, "learning_rate": 1.1986776606741519e-05, "loss": 0.38094472885131836, "step": 8400, "token_acc": 0.8622862286228623 }, { "epoch": 0.4532995197755355, "grad_norm": 0.38495057821273804, "learning_rate": 1.198506385201117e-05, "loss": 0.37249085307121277, "step": 8401, "token_acc": 0.8691604050963737 }, { "epoch": 0.453353477580532, "grad_norm": 0.41278842091560364, "learning_rate": 1.1983351036657491e-05, "loss": 0.3435237407684326, "step": 8402, "token_acc": 0.8789237668161435 }, { "epoch": 0.4534074353855285, "grad_norm": 0.41834694147109985, "learning_rate": 1.1981638160732794e-05, "loss": 0.33334892988204956, "step": 8403, "token_acc": 0.8821356615177972 }, { "epoch": 0.45346139319052503, "grad_norm": 0.3937675952911377, "learning_rate": 1.1979925224289383e-05, "loss": 0.3672083616256714, "step": 8404, "token_acc": 0.8739942528735632 }, { "epoch": 0.4535153509955215, "grad_norm": 0.3335183560848236, "learning_rate": 1.1978212227379577e-05, "loss": 0.38027775287628174, "step": 8405, "token_acc": 0.8663315287445218 }, { "epoch": 0.453569308800518, "grad_norm": 0.46864086389541626, "learning_rate": 1.1976499170055686e-05, "loss": 0.3655199408531189, "step": 8406, "token_acc": 0.8629657794676806 }, { "epoch": 0.4536232666055145, "grad_norm": 0.3717767298221588, "learning_rate": 1.1974786052370027e-05, "loss": 0.41233029961586, "step": 8407, "token_acc": 0.8573937478047067 }, { "epoch": 0.453677224410511, "grad_norm": 0.40982139110565186, "learning_rate": 1.197307287437492e-05, "loss": 0.35781317949295044, "step": 8408, "token_acc": 0.8767361111111112 }, { "epoch": 0.45373118221550746, "grad_norm": 0.3236372172832489, "learning_rate": 1.1971359636122685e-05, "loss": 0.4085542857646942, "step": 8409, "token_acc": 0.8607708731318126 }, { "epoch": 0.45378514002050396, "grad_norm": 0.398200124502182, "learning_rate": 1.1969646337665646e-05, "loss": 0.38105419278144836, "step": 8410, "token_acc": 0.8675236096537251 }, { "epoch": 0.45383909782550047, "grad_norm": 0.41232627630233765, "learning_rate": 1.196793297905612e-05, "loss": 0.40941452980041504, "step": 8411, "token_acc": 0.8587257617728532 }, { "epoch": 0.453893055630497, "grad_norm": 0.3833736181259155, "learning_rate": 1.196621956034644e-05, "loss": 0.33716604113578796, "step": 8412, "token_acc": 0.8805315796684478 }, { "epoch": 0.45394701343549343, "grad_norm": 0.31246304512023926, "learning_rate": 1.1964506081588922e-05, "loss": 0.3561962842941284, "step": 8413, "token_acc": 0.8732638888888888 }, { "epoch": 0.45400097124048994, "grad_norm": 0.34010612964630127, "learning_rate": 1.1962792542835911e-05, "loss": 0.40681907534599304, "step": 8414, "token_acc": 0.8575559701492538 }, { "epoch": 0.45405492904548644, "grad_norm": 0.3230472803115845, "learning_rate": 1.1961078944139726e-05, "loss": 0.3483748733997345, "step": 8415, "token_acc": 0.8811528035383079 }, { "epoch": 0.4541088868504829, "grad_norm": 0.35347381234169006, "learning_rate": 1.1959365285552705e-05, "loss": 0.3724270164966583, "step": 8416, "token_acc": 0.8747282154206389 }, { "epoch": 0.4541628446554794, "grad_norm": 0.3958403170108795, "learning_rate": 1.195765156712718e-05, "loss": 0.35204267501831055, "step": 8417, "token_acc": 0.8725249313484608 }, { "epoch": 0.4542168024604759, "grad_norm": 0.2830760180950165, "learning_rate": 1.195593778891549e-05, "loss": 0.3549535870552063, "step": 8418, "token_acc": 0.8751138285416937 }, { "epoch": 0.4542707602654724, "grad_norm": 0.40212902426719666, "learning_rate": 1.1954223950969973e-05, "loss": 0.35178065299987793, "step": 8419, "token_acc": 0.8787155306195535 }, { "epoch": 0.45432471807046887, "grad_norm": 0.4356164038181305, "learning_rate": 1.1952510053342969e-05, "loss": 0.35621654987335205, "step": 8420, "token_acc": 0.873690792559012 }, { "epoch": 0.4543786758754654, "grad_norm": 0.35484522581100464, "learning_rate": 1.1950796096086818e-05, "loss": 0.3424503803253174, "step": 8421, "token_acc": 0.878512535756352 }, { "epoch": 0.4544326336804619, "grad_norm": 0.42070844769477844, "learning_rate": 1.1949082079253867e-05, "loss": 0.42178046703338623, "step": 8422, "token_acc": 0.8568894952251023 }, { "epoch": 0.4544865914854584, "grad_norm": 0.4038700759410858, "learning_rate": 1.1947368002896456e-05, "loss": 0.3280336856842041, "step": 8423, "token_acc": 0.8816840811309158 }, { "epoch": 0.45454054929045484, "grad_norm": 0.34652912616729736, "learning_rate": 1.194565386706694e-05, "loss": 0.39081016182899475, "step": 8424, "token_acc": 0.8649078917443712 }, { "epoch": 0.45459450709545135, "grad_norm": 0.428394079208374, "learning_rate": 1.1943939671817664e-05, "loss": 0.398362934589386, "step": 8425, "token_acc": 0.8598209289311696 }, { "epoch": 0.45464846490044786, "grad_norm": 0.3953942358493805, "learning_rate": 1.1942225417200976e-05, "loss": 0.37850460410118103, "step": 8426, "token_acc": 0.8667823070251518 }, { "epoch": 0.45470242270544436, "grad_norm": 0.3890893757343292, "learning_rate": 1.194051110326924e-05, "loss": 0.3333703279495239, "step": 8427, "token_acc": 0.8840319911748483 }, { "epoch": 0.4547563805104408, "grad_norm": 0.41671374440193176, "learning_rate": 1.1938796730074798e-05, "loss": 0.31913697719573975, "step": 8428, "token_acc": 0.8825172768710484 }, { "epoch": 0.4548103383154373, "grad_norm": 0.49800845980644226, "learning_rate": 1.193708229767001e-05, "loss": 0.4137594699859619, "step": 8429, "token_acc": 0.8593726772707002 }, { "epoch": 0.45486429612043383, "grad_norm": 0.3990044891834259, "learning_rate": 1.1935367806107242e-05, "loss": 0.37683916091918945, "step": 8430, "token_acc": 0.8713955469035163 }, { "epoch": 0.45491825392543034, "grad_norm": 0.4453347325325012, "learning_rate": 1.1933653255438844e-05, "loss": 0.33526042103767395, "step": 8431, "token_acc": 0.880466472303207 }, { "epoch": 0.4549722117304268, "grad_norm": 0.37291133403778076, "learning_rate": 1.1931938645717182e-05, "loss": 0.3040372133255005, "step": 8432, "token_acc": 0.8910256410256411 }, { "epoch": 0.4550261695354233, "grad_norm": 0.3369539976119995, "learning_rate": 1.193022397699462e-05, "loss": 0.37903326749801636, "step": 8433, "token_acc": 0.8687313722949332 }, { "epoch": 0.4550801273404198, "grad_norm": 0.37966495752334595, "learning_rate": 1.1928509249323525e-05, "loss": 0.4243285059928894, "step": 8434, "token_acc": 0.8543302472752842 }, { "epoch": 0.4551340851454163, "grad_norm": 0.5186123847961426, "learning_rate": 1.1926794462756262e-05, "loss": 0.3759331703186035, "step": 8435, "token_acc": 0.8655032610611669 }, { "epoch": 0.45518804295041276, "grad_norm": 0.30564427375793457, "learning_rate": 1.1925079617345201e-05, "loss": 0.35322147607803345, "step": 8436, "token_acc": 0.8746814404432133 }, { "epoch": 0.45524200075540927, "grad_norm": 0.35850289463996887, "learning_rate": 1.1923364713142713e-05, "loss": 0.366521954536438, "step": 8437, "token_acc": 0.8678048043511104 }, { "epoch": 0.4552959585604058, "grad_norm": 0.42890509963035583, "learning_rate": 1.1921649750201173e-05, "loss": 0.3360494375228882, "step": 8438, "token_acc": 0.8840115347644986 }, { "epoch": 0.45534991636540223, "grad_norm": 0.3710263669490814, "learning_rate": 1.1919934728572946e-05, "loss": 0.32245469093322754, "step": 8439, "token_acc": 0.8833952075599055 }, { "epoch": 0.45540387417039874, "grad_norm": 0.4256313443183899, "learning_rate": 1.1918219648310421e-05, "loss": 0.35752391815185547, "step": 8440, "token_acc": 0.8756953769422597 }, { "epoch": 0.45545783197539524, "grad_norm": 0.268807053565979, "learning_rate": 1.191650450946597e-05, "loss": 0.4164031147956848, "step": 8441, "token_acc": 0.8578477443609023 }, { "epoch": 0.45551178978039175, "grad_norm": 0.33348655700683594, "learning_rate": 1.191478931209197e-05, "loss": 0.3852781653404236, "step": 8442, "token_acc": 0.8625954198473282 }, { "epoch": 0.4555657475853882, "grad_norm": 0.4145611524581909, "learning_rate": 1.1913074056240807e-05, "loss": 0.3676283657550812, "step": 8443, "token_acc": 0.8701957940536621 }, { "epoch": 0.4556197053903847, "grad_norm": 0.4523269236087799, "learning_rate": 1.1911358741964867e-05, "loss": 0.34059688448905945, "step": 8444, "token_acc": 0.8799838904550946 }, { "epoch": 0.4556736631953812, "grad_norm": 0.420064777135849, "learning_rate": 1.190964336931653e-05, "loss": 0.3479936122894287, "step": 8445, "token_acc": 0.8730289318524612 }, { "epoch": 0.4557276210003777, "grad_norm": 0.4718530476093292, "learning_rate": 1.1907927938348185e-05, "loss": 0.39595651626586914, "step": 8446, "token_acc": 0.8567809491639926 }, { "epoch": 0.4557815788053742, "grad_norm": 0.4019620418548584, "learning_rate": 1.190621244911222e-05, "loss": 0.4361005425453186, "step": 8447, "token_acc": 0.859422164726175 }, { "epoch": 0.4558355366103707, "grad_norm": 0.5127132534980774, "learning_rate": 1.1904496901661026e-05, "loss": 0.36787545680999756, "step": 8448, "token_acc": 0.8694962316541055 }, { "epoch": 0.4558894944153672, "grad_norm": 0.48520633578300476, "learning_rate": 1.1902781296046994e-05, "loss": 0.4414495527744293, "step": 8449, "token_acc": 0.8525877263259093 }, { "epoch": 0.4559434522203637, "grad_norm": 0.44577690958976746, "learning_rate": 1.1901065632322522e-05, "loss": 0.4506109356880188, "step": 8450, "token_acc": 0.8488989121782966 }, { "epoch": 0.45599741002536015, "grad_norm": 0.34721803665161133, "learning_rate": 1.1899349910540004e-05, "loss": 0.3717820942401886, "step": 8451, "token_acc": 0.8678984631324469 }, { "epoch": 0.45605136783035666, "grad_norm": 0.4731959402561188, "learning_rate": 1.1897634130751837e-05, "loss": 0.3852698802947998, "step": 8452, "token_acc": 0.8678087080656673 }, { "epoch": 0.45610532563535316, "grad_norm": 0.47864827513694763, "learning_rate": 1.1895918293010418e-05, "loss": 0.414369136095047, "step": 8453, "token_acc": 0.8580223646186707 }, { "epoch": 0.45615928344034967, "grad_norm": 0.3544929623603821, "learning_rate": 1.1894202397368152e-05, "loss": 0.3390384912490845, "step": 8454, "token_acc": 0.8789068178514319 }, { "epoch": 0.4562132412453461, "grad_norm": 0.36060017347335815, "learning_rate": 1.1892486443877442e-05, "loss": 0.4175039529800415, "step": 8455, "token_acc": 0.8598666666666667 }, { "epoch": 0.45626719905034263, "grad_norm": 0.4407992660999298, "learning_rate": 1.1890770432590693e-05, "loss": 0.41517794132232666, "step": 8456, "token_acc": 0.858800521512386 }, { "epoch": 0.45632115685533914, "grad_norm": 0.44051694869995117, "learning_rate": 1.1889054363560309e-05, "loss": 0.3466351330280304, "step": 8457, "token_acc": 0.8746522184800117 }, { "epoch": 0.4563751146603356, "grad_norm": 0.5017412900924683, "learning_rate": 1.1887338236838697e-05, "loss": 0.38925203680992126, "step": 8458, "token_acc": 0.8625788643533123 }, { "epoch": 0.4564290724653321, "grad_norm": 0.366456001996994, "learning_rate": 1.1885622052478274e-05, "loss": 0.3329448699951172, "step": 8459, "token_acc": 0.879583276958463 }, { "epoch": 0.4564830302703286, "grad_norm": 0.34887099266052246, "learning_rate": 1.1883905810531445e-05, "loss": 0.3487766981124878, "step": 8460, "token_acc": 0.8773606901375612 }, { "epoch": 0.4565369880753251, "grad_norm": 0.36779558658599854, "learning_rate": 1.1882189511050628e-05, "loss": 0.3802765905857086, "step": 8461, "token_acc": 0.8688144090613685 }, { "epoch": 0.45659094588032156, "grad_norm": 0.34559035301208496, "learning_rate": 1.1880473154088234e-05, "loss": 0.3765637278556824, "step": 8462, "token_acc": 0.8644474034620506 }, { "epoch": 0.45664490368531807, "grad_norm": 0.4408178925514221, "learning_rate": 1.1878756739696687e-05, "loss": 0.38163524866104126, "step": 8463, "token_acc": 0.8683812405446294 }, { "epoch": 0.4566988614903146, "grad_norm": 0.46536123752593994, "learning_rate": 1.1877040267928398e-05, "loss": 0.3849230408668518, "step": 8464, "token_acc": 0.8648111332007953 }, { "epoch": 0.4567528192953111, "grad_norm": 0.374691903591156, "learning_rate": 1.1875323738835789e-05, "loss": 0.39463287591934204, "step": 8465, "token_acc": 0.8590440921189677 }, { "epoch": 0.45680677710030754, "grad_norm": 0.49296092987060547, "learning_rate": 1.1873607152471286e-05, "loss": 0.42946749925613403, "step": 8466, "token_acc": 0.8566440349175558 }, { "epoch": 0.45686073490530404, "grad_norm": 0.33299699425697327, "learning_rate": 1.1871890508887312e-05, "loss": 0.29830220341682434, "step": 8467, "token_acc": 0.8943555181128896 }, { "epoch": 0.45691469271030055, "grad_norm": 0.43141379952430725, "learning_rate": 1.187017380813629e-05, "loss": 0.37635964155197144, "step": 8468, "token_acc": 0.866526379077108 }, { "epoch": 0.45696865051529706, "grad_norm": 0.398105651140213, "learning_rate": 1.1868457050270654e-05, "loss": 0.35420912504196167, "step": 8469, "token_acc": 0.8775759219088937 }, { "epoch": 0.4570226083202935, "grad_norm": 0.4400734007358551, "learning_rate": 1.1866740235342826e-05, "loss": 0.3529038727283478, "step": 8470, "token_acc": 0.8726682887266829 }, { "epoch": 0.45707656612529, "grad_norm": 0.3548305630683899, "learning_rate": 1.1865023363405241e-05, "loss": 0.4191195070743561, "step": 8471, "token_acc": 0.8545762711864406 }, { "epoch": 0.4571305239302865, "grad_norm": 0.30501407384872437, "learning_rate": 1.1863306434510331e-05, "loss": 0.35930249094963074, "step": 8472, "token_acc": 0.8725085910652921 }, { "epoch": 0.45718448173528303, "grad_norm": 0.3773164451122284, "learning_rate": 1.1861589448710532e-05, "loss": 0.33488929271698, "step": 8473, "token_acc": 0.8799246813441484 }, { "epoch": 0.4572384395402795, "grad_norm": 0.36170923709869385, "learning_rate": 1.1859872406058277e-05, "loss": 0.3993464410305023, "step": 8474, "token_acc": 0.8640319589099729 }, { "epoch": 0.457292397345276, "grad_norm": 0.41745316982269287, "learning_rate": 1.1858155306606004e-05, "loss": 0.4011233448982239, "step": 8475, "token_acc": 0.8575213423609067 }, { "epoch": 0.4573463551502725, "grad_norm": 0.38795188069343567, "learning_rate": 1.1856438150406158e-05, "loss": 0.3339589834213257, "step": 8476, "token_acc": 0.8716328566677752 }, { "epoch": 0.457400312955269, "grad_norm": 0.360951691865921, "learning_rate": 1.1854720937511177e-05, "loss": 0.36912310123443604, "step": 8477, "token_acc": 0.8759529654994185 }, { "epoch": 0.45745427076026546, "grad_norm": 0.4955836832523346, "learning_rate": 1.1853003667973506e-05, "loss": 0.44212979078292847, "step": 8478, "token_acc": 0.8537066732453436 }, { "epoch": 0.45750822856526197, "grad_norm": 0.3627784550189972, "learning_rate": 1.1851286341845585e-05, "loss": 0.3728705644607544, "step": 8479, "token_acc": 0.873027577053532 }, { "epoch": 0.4575621863702585, "grad_norm": 0.4361112415790558, "learning_rate": 1.1849568959179867e-05, "loss": 0.3969790041446686, "step": 8480, "token_acc": 0.8657249301459174 }, { "epoch": 0.4576161441752549, "grad_norm": 0.4048269987106323, "learning_rate": 1.1847851520028795e-05, "loss": 0.34685876965522766, "step": 8481, "token_acc": 0.8800625081390806 }, { "epoch": 0.45767010198025143, "grad_norm": 0.4867439866065979, "learning_rate": 1.1846134024444822e-05, "loss": 0.3585911989212036, "step": 8482, "token_acc": 0.8711270003404835 }, { "epoch": 0.45772405978524794, "grad_norm": 0.37361592054367065, "learning_rate": 1.18444164724804e-05, "loss": 0.3666239380836487, "step": 8483, "token_acc": 0.8733740071371014 }, { "epoch": 0.45777801759024445, "grad_norm": 0.3427639305591583, "learning_rate": 1.184269886418798e-05, "loss": 0.36317938566207886, "step": 8484, "token_acc": 0.8724787832029097 }, { "epoch": 0.4578319753952409, "grad_norm": 0.4624539017677307, "learning_rate": 1.1840981199620023e-05, "loss": 0.36348649859428406, "step": 8485, "token_acc": 0.8739232576350823 }, { "epoch": 0.4578859332002374, "grad_norm": 0.30490994453430176, "learning_rate": 1.1839263478828985e-05, "loss": 0.309953510761261, "step": 8486, "token_acc": 0.8885875972647961 }, { "epoch": 0.4579398910052339, "grad_norm": 0.40680617094039917, "learning_rate": 1.1837545701867315e-05, "loss": 0.4055282473564148, "step": 8487, "token_acc": 0.8586507572280863 }, { "epoch": 0.4579938488102304, "grad_norm": 0.4211299419403076, "learning_rate": 1.1835827868787484e-05, "loss": 0.37295830249786377, "step": 8488, "token_acc": 0.8705766710353866 }, { "epoch": 0.45804780661522687, "grad_norm": 0.32991984486579895, "learning_rate": 1.1834109979641953e-05, "loss": 0.4111971855163574, "step": 8489, "token_acc": 0.8638062283737025 }, { "epoch": 0.4581017644202234, "grad_norm": 0.38476598262786865, "learning_rate": 1.183239203448318e-05, "loss": 0.34077224135398865, "step": 8490, "token_acc": 0.8803732089303565 }, { "epoch": 0.4581557222252199, "grad_norm": 0.4449487328529358, "learning_rate": 1.1830674033363634e-05, "loss": 0.37478891015052795, "step": 8491, "token_acc": 0.8755537098560354 }, { "epoch": 0.4582096800302164, "grad_norm": 0.43790140748023987, "learning_rate": 1.1828955976335786e-05, "loss": 0.32666316628456116, "step": 8492, "token_acc": 0.8826042378673957 }, { "epoch": 0.45826363783521284, "grad_norm": 0.3702702820301056, "learning_rate": 1.1827237863452099e-05, "loss": 0.39410296082496643, "step": 8493, "token_acc": 0.8611517100263081 }, { "epoch": 0.45831759564020935, "grad_norm": 0.34324735403060913, "learning_rate": 1.1825519694765047e-05, "loss": 0.3302100598812103, "step": 8494, "token_acc": 0.8843416370106761 }, { "epoch": 0.45837155344520586, "grad_norm": 0.4629686176776886, "learning_rate": 1.1823801470327104e-05, "loss": 0.3847847580909729, "step": 8495, "token_acc": 0.873015873015873 }, { "epoch": 0.45842551125020237, "grad_norm": 0.36318153142929077, "learning_rate": 1.1822083190190738e-05, "loss": 0.3381349444389343, "step": 8496, "token_acc": 0.8809785932721712 }, { "epoch": 0.4584794690551988, "grad_norm": 0.41689741611480713, "learning_rate": 1.1820364854408428e-05, "loss": 0.39924052357673645, "step": 8497, "token_acc": 0.8589506611687757 }, { "epoch": 0.4585334268601953, "grad_norm": 0.40695157647132874, "learning_rate": 1.1818646463032658e-05, "loss": 0.3323107063770294, "step": 8498, "token_acc": 0.8767263427109975 }, { "epoch": 0.45858738466519183, "grad_norm": 0.45959314703941345, "learning_rate": 1.1816928016115896e-05, "loss": 0.42232733964920044, "step": 8499, "token_acc": 0.8580044578471221 }, { "epoch": 0.45864134247018834, "grad_norm": 0.33665731549263, "learning_rate": 1.181520951371063e-05, "loss": 0.37033623456954956, "step": 8500, "token_acc": 0.8697981046559539 }, { "epoch": 0.4586953002751848, "grad_norm": 0.4445744454860687, "learning_rate": 1.1813490955869343e-05, "loss": 0.3580881357192993, "step": 8501, "token_acc": 0.8741656851197487 }, { "epoch": 0.4587492580801813, "grad_norm": 0.3560914099216461, "learning_rate": 1.1811772342644513e-05, "loss": 0.30791181325912476, "step": 8502, "token_acc": 0.8896993417541362 }, { "epoch": 0.4588032158851778, "grad_norm": 0.41475552320480347, "learning_rate": 1.1810053674088637e-05, "loss": 0.33923792839050293, "step": 8503, "token_acc": 0.8777312181981443 }, { "epoch": 0.45885717369017426, "grad_norm": 0.413191020488739, "learning_rate": 1.180833495025419e-05, "loss": 0.3876320719718933, "step": 8504, "token_acc": 0.8691685315889137 }, { "epoch": 0.45891113149517077, "grad_norm": 0.516502320766449, "learning_rate": 1.180661617119367e-05, "loss": 0.4114782512187958, "step": 8505, "token_acc": 0.8568612585291888 }, { "epoch": 0.4589650893001673, "grad_norm": 0.3924914300441742, "learning_rate": 1.1804897336959567e-05, "loss": 0.3368784785270691, "step": 8506, "token_acc": 0.8791666666666667 }, { "epoch": 0.4590190471051638, "grad_norm": 0.2925654351711273, "learning_rate": 1.1803178447604367e-05, "loss": 0.3223021626472473, "step": 8507, "token_acc": 0.8836107921928817 }, { "epoch": 0.45907300491016023, "grad_norm": 0.4912921190261841, "learning_rate": 1.1801459503180573e-05, "loss": 0.3820846378803253, "step": 8508, "token_acc": 0.8667046425519795 }, { "epoch": 0.45912696271515674, "grad_norm": 0.39972278475761414, "learning_rate": 1.1799740503740678e-05, "loss": 0.33927685022354126, "step": 8509, "token_acc": 0.8756449030421634 }, { "epoch": 0.45918092052015325, "grad_norm": 0.346582293510437, "learning_rate": 1.1798021449337177e-05, "loss": 0.3835122585296631, "step": 8510, "token_acc": 0.8688813240990727 }, { "epoch": 0.45923487832514975, "grad_norm": 0.35409218072891235, "learning_rate": 1.1796302340022574e-05, "loss": 0.3372732400894165, "step": 8511, "token_acc": 0.8774709302325582 }, { "epoch": 0.4592888361301462, "grad_norm": 0.3303767740726471, "learning_rate": 1.179458317584937e-05, "loss": 0.35874587297439575, "step": 8512, "token_acc": 0.873525307624001 }, { "epoch": 0.4593427939351427, "grad_norm": 0.3873157203197479, "learning_rate": 1.179286395687006e-05, "loss": 0.4038768410682678, "step": 8513, "token_acc": 0.8628909551986476 }, { "epoch": 0.4593967517401392, "grad_norm": 0.45496875047683716, "learning_rate": 1.179114468313716e-05, "loss": 0.35115721821784973, "step": 8514, "token_acc": 0.8811482583581306 }, { "epoch": 0.4594507095451357, "grad_norm": 0.5223584175109863, "learning_rate": 1.1789425354703167e-05, "loss": 0.3474853038787842, "step": 8515, "token_acc": 0.8772265849454127 }, { "epoch": 0.4595046673501322, "grad_norm": 0.3780139684677124, "learning_rate": 1.1787705971620593e-05, "loss": 0.4065897464752197, "step": 8516, "token_acc": 0.8589591254752852 }, { "epoch": 0.4595586251551287, "grad_norm": 0.4333769977092743, "learning_rate": 1.1785986533941949e-05, "loss": 0.4319535493850708, "step": 8517, "token_acc": 0.8513623978201635 }, { "epoch": 0.4596125829601252, "grad_norm": 0.44118642807006836, "learning_rate": 1.1784267041719744e-05, "loss": 0.34199821949005127, "step": 8518, "token_acc": 0.8754674036741993 }, { "epoch": 0.4596665407651217, "grad_norm": 0.3809281587600708, "learning_rate": 1.1782547495006491e-05, "loss": 0.3403348922729492, "step": 8519, "token_acc": 0.8774802275565422 }, { "epoch": 0.45972049857011815, "grad_norm": 0.5158448815345764, "learning_rate": 1.1780827893854702e-05, "loss": 0.3789580166339874, "step": 8520, "token_acc": 0.8684584980237154 }, { "epoch": 0.45977445637511466, "grad_norm": 0.3954957127571106, "learning_rate": 1.1779108238316898e-05, "loss": 0.3637101650238037, "step": 8521, "token_acc": 0.8693960412789714 }, { "epoch": 0.45982841418011117, "grad_norm": 0.42056137323379517, "learning_rate": 1.1777388528445594e-05, "loss": 0.3613693118095398, "step": 8522, "token_acc": 0.8685378590078329 }, { "epoch": 0.4598823719851076, "grad_norm": 0.35771048069000244, "learning_rate": 1.177566876429331e-05, "loss": 0.34909090399742126, "step": 8523, "token_acc": 0.8801473049882825 }, { "epoch": 0.4599363297901041, "grad_norm": 0.3460993766784668, "learning_rate": 1.1773948945912565e-05, "loss": 0.36310508847236633, "step": 8524, "token_acc": 0.8756121449559255 }, { "epoch": 0.45999028759510063, "grad_norm": 0.341641902923584, "learning_rate": 1.1772229073355886e-05, "loss": 0.38300827145576477, "step": 8525, "token_acc": 0.8645677694770544 }, { "epoch": 0.46004424540009714, "grad_norm": 0.3682771325111389, "learning_rate": 1.1770509146675793e-05, "loss": 0.34609657526016235, "step": 8526, "token_acc": 0.8771322303450045 }, { "epoch": 0.4600982032050936, "grad_norm": 0.4795495271682739, "learning_rate": 1.1768789165924818e-05, "loss": 0.3927183747291565, "step": 8527, "token_acc": 0.8619281045751634 }, { "epoch": 0.4601521610100901, "grad_norm": 0.46051979064941406, "learning_rate": 1.1767069131155486e-05, "loss": 0.39892202615737915, "step": 8528, "token_acc": 0.8611629089676216 }, { "epoch": 0.4602061188150866, "grad_norm": 0.3935260474681854, "learning_rate": 1.1765349042420323e-05, "loss": 0.3514925241470337, "step": 8529, "token_acc": 0.8763684913217623 }, { "epoch": 0.4602600766200831, "grad_norm": 0.37872472405433655, "learning_rate": 1.1763628899771862e-05, "loss": 0.371096670627594, "step": 8530, "token_acc": 0.8712457560720814 }, { "epoch": 0.46031403442507957, "grad_norm": 0.44403842091560364, "learning_rate": 1.1761908703262637e-05, "loss": 0.38396328687667847, "step": 8531, "token_acc": 0.8684915550378567 }, { "epoch": 0.4603679922300761, "grad_norm": 0.3707699477672577, "learning_rate": 1.1760188452945183e-05, "loss": 0.36396121978759766, "step": 8532, "token_acc": 0.8726190476190476 }, { "epoch": 0.4604219500350726, "grad_norm": 0.3423306345939636, "learning_rate": 1.175846814887203e-05, "loss": 0.40503016114234924, "step": 8533, "token_acc": 0.8598117306299783 }, { "epoch": 0.4604759078400691, "grad_norm": 0.42218995094299316, "learning_rate": 1.1756747791095726e-05, "loss": 0.42547178268432617, "step": 8534, "token_acc": 0.856457980823463 }, { "epoch": 0.46052986564506554, "grad_norm": 0.4011368751525879, "learning_rate": 1.1755027379668804e-05, "loss": 0.3205544054508209, "step": 8535, "token_acc": 0.8842681663460116 }, { "epoch": 0.46058382345006205, "grad_norm": 0.36045417189598083, "learning_rate": 1.1753306914643804e-05, "loss": 0.39524412155151367, "step": 8536, "token_acc": 0.8645376988701867 }, { "epoch": 0.46063778125505855, "grad_norm": 0.32731547951698303, "learning_rate": 1.1751586396073273e-05, "loss": 0.32214996218681335, "step": 8537, "token_acc": 0.8891963692716405 }, { "epoch": 0.46069173906005506, "grad_norm": 0.37993043661117554, "learning_rate": 1.174986582400975e-05, "loss": 0.27466440200805664, "step": 8538, "token_acc": 0.8965334276618323 }, { "epoch": 0.4607456968650515, "grad_norm": 0.48111388087272644, "learning_rate": 1.1748145198505781e-05, "loss": 0.4283409118652344, "step": 8539, "token_acc": 0.8536332692023088 }, { "epoch": 0.460799654670048, "grad_norm": 0.3589913249015808, "learning_rate": 1.1746424519613919e-05, "loss": 0.37403714656829834, "step": 8540, "token_acc": 0.872639336711193 }, { "epoch": 0.4608536124750445, "grad_norm": 0.47207432985305786, "learning_rate": 1.174470378738671e-05, "loss": 0.38551610708236694, "step": 8541, "token_acc": 0.8638847491306507 }, { "epoch": 0.46090757028004103, "grad_norm": 0.2723828852176666, "learning_rate": 1.1742983001876699e-05, "loss": 0.3260849118232727, "step": 8542, "token_acc": 0.8784092435845761 }, { "epoch": 0.4609615280850375, "grad_norm": 0.30119070410728455, "learning_rate": 1.1741262163136448e-05, "loss": 0.3643864393234253, "step": 8543, "token_acc": 0.8750581125058112 }, { "epoch": 0.461015485890034, "grad_norm": 0.38535186648368835, "learning_rate": 1.1739541271218509e-05, "loss": 0.38691240549087524, "step": 8544, "token_acc": 0.8647911338448423 }, { "epoch": 0.4610694436950305, "grad_norm": 0.37836745381355286, "learning_rate": 1.1737820326175431e-05, "loss": 0.3873867392539978, "step": 8545, "token_acc": 0.8626339780707157 }, { "epoch": 0.46112340150002695, "grad_norm": 0.33671849966049194, "learning_rate": 1.1736099328059777e-05, "loss": 0.3521506190299988, "step": 8546, "token_acc": 0.8754712916083163 }, { "epoch": 0.46117735930502346, "grad_norm": 0.27370020747184753, "learning_rate": 1.1734378276924106e-05, "loss": 0.34899991750717163, "step": 8547, "token_acc": 0.8753358409457281 }, { "epoch": 0.46123131711001997, "grad_norm": 0.4037809669971466, "learning_rate": 1.1732657172820974e-05, "loss": 0.38633522391319275, "step": 8548, "token_acc": 0.8593554443053817 }, { "epoch": 0.4612852749150165, "grad_norm": 0.37009793519973755, "learning_rate": 1.1730936015802945e-05, "loss": 0.3727562129497528, "step": 8549, "token_acc": 0.8688307873090482 }, { "epoch": 0.4613392327200129, "grad_norm": 0.30458587408065796, "learning_rate": 1.1729214805922587e-05, "loss": 0.33820584416389465, "step": 8550, "token_acc": 0.8798449612403101 }, { "epoch": 0.46139319052500943, "grad_norm": 0.34429794549942017, "learning_rate": 1.172749354323246e-05, "loss": 0.3717675805091858, "step": 8551, "token_acc": 0.8681110358835478 }, { "epoch": 0.46144714833000594, "grad_norm": 0.4966811537742615, "learning_rate": 1.1725772227785132e-05, "loss": 0.39944884181022644, "step": 8552, "token_acc": 0.8630164868715652 }, { "epoch": 0.46150110613500245, "grad_norm": 0.42577993869781494, "learning_rate": 1.1724050859633174e-05, "loss": 0.36998218297958374, "step": 8553, "token_acc": 0.8728465955701394 }, { "epoch": 0.4615550639399989, "grad_norm": 0.40364623069763184, "learning_rate": 1.1722329438829154e-05, "loss": 0.39677566289901733, "step": 8554, "token_acc": 0.8589743589743589 }, { "epoch": 0.4616090217449954, "grad_norm": 0.36725419759750366, "learning_rate": 1.1720607965425643e-05, "loss": 0.41519203782081604, "step": 8555, "token_acc": 0.8567973523421588 }, { "epoch": 0.4616629795499919, "grad_norm": 0.3420960009098053, "learning_rate": 1.1718886439475217e-05, "loss": 0.3579118251800537, "step": 8556, "token_acc": 0.8790065214374914 }, { "epoch": 0.4617169373549884, "grad_norm": 0.46967631578445435, "learning_rate": 1.1717164861030448e-05, "loss": 0.40599727630615234, "step": 8557, "token_acc": 0.8589463053358021 }, { "epoch": 0.4617708951599849, "grad_norm": 0.3659186065196991, "learning_rate": 1.1715443230143913e-05, "loss": 0.40888386964797974, "step": 8558, "token_acc": 0.8575721939108846 }, { "epoch": 0.4618248529649814, "grad_norm": 0.3850800395011902, "learning_rate": 1.1713721546868193e-05, "loss": 0.378894180059433, "step": 8559, "token_acc": 0.8706909090909091 }, { "epoch": 0.4618788107699779, "grad_norm": 0.4591948688030243, "learning_rate": 1.1711999811255866e-05, "loss": 0.3586796522140503, "step": 8560, "token_acc": 0.8747280074603668 }, { "epoch": 0.4619327685749744, "grad_norm": 0.3972873091697693, "learning_rate": 1.1710278023359514e-05, "loss": 0.3766501545906067, "step": 8561, "token_acc": 0.8724279835390947 }, { "epoch": 0.46198672637997085, "grad_norm": 0.38818302750587463, "learning_rate": 1.1708556183231719e-05, "loss": 0.3872801959514618, "step": 8562, "token_acc": 0.8684541265186426 }, { "epoch": 0.46204068418496735, "grad_norm": 0.38570636510849, "learning_rate": 1.1706834290925064e-05, "loss": 0.445862352848053, "step": 8563, "token_acc": 0.84592231618174 }, { "epoch": 0.46209464198996386, "grad_norm": 0.40662989020347595, "learning_rate": 1.1705112346492138e-05, "loss": 0.35005420446395874, "step": 8564, "token_acc": 0.8754089778824761 }, { "epoch": 0.46214859979496037, "grad_norm": 0.36674973368644714, "learning_rate": 1.1703390349985526e-05, "loss": 0.3502213954925537, "step": 8565, "token_acc": 0.8781131279020684 }, { "epoch": 0.4622025575999568, "grad_norm": 0.4322316348552704, "learning_rate": 1.170166830145782e-05, "loss": 0.3408089876174927, "step": 8566, "token_acc": 0.8806770571476454 }, { "epoch": 0.4622565154049533, "grad_norm": 0.3643357753753662, "learning_rate": 1.1699946200961611e-05, "loss": 0.3529341220855713, "step": 8567, "token_acc": 0.8780525742728262 }, { "epoch": 0.46231047320994983, "grad_norm": 0.40521952509880066, "learning_rate": 1.1698224048549489e-05, "loss": 0.3880555033683777, "step": 8568, "token_acc": 0.8633660627852302 }, { "epoch": 0.4623644310149463, "grad_norm": 0.3542284369468689, "learning_rate": 1.169650184427405e-05, "loss": 0.2966044843196869, "step": 8569, "token_acc": 0.890933512424446 }, { "epoch": 0.4624183888199428, "grad_norm": 0.3440626263618469, "learning_rate": 1.1694779588187891e-05, "loss": 0.369422048330307, "step": 8570, "token_acc": 0.8700195040401226 }, { "epoch": 0.4624723466249393, "grad_norm": 0.33754345774650574, "learning_rate": 1.1693057280343603e-05, "loss": 0.3737538456916809, "step": 8571, "token_acc": 0.8717119928666964 }, { "epoch": 0.4625263044299358, "grad_norm": 0.3063909411430359, "learning_rate": 1.1691334920793792e-05, "loss": 0.30755525827407837, "step": 8572, "token_acc": 0.8878514702725907 }, { "epoch": 0.46258026223493226, "grad_norm": 0.48876941204071045, "learning_rate": 1.1689612509591056e-05, "loss": 0.37580573558807373, "step": 8573, "token_acc": 0.8668369790835063 }, { "epoch": 0.46263422003992877, "grad_norm": 0.3542802631855011, "learning_rate": 1.1687890046787992e-05, "loss": 0.3099280297756195, "step": 8574, "token_acc": 0.8879498761474574 }, { "epoch": 0.4626881778449253, "grad_norm": 0.44548049569129944, "learning_rate": 1.1686167532437214e-05, "loss": 0.3655422031879425, "step": 8575, "token_acc": 0.8684490353941972 }, { "epoch": 0.4627421356499218, "grad_norm": 0.3915312886238098, "learning_rate": 1.1684444966591319e-05, "loss": 0.3810221552848816, "step": 8576, "token_acc": 0.8710041793742235 }, { "epoch": 0.46279609345491823, "grad_norm": 0.3484460115432739, "learning_rate": 1.1682722349302917e-05, "loss": 0.3352753520011902, "step": 8577, "token_acc": 0.8805697589481373 }, { "epoch": 0.46285005125991474, "grad_norm": 0.3550569415092468, "learning_rate": 1.1680999680624613e-05, "loss": 0.33999747037887573, "step": 8578, "token_acc": 0.8762260625875758 }, { "epoch": 0.46290400906491125, "grad_norm": 0.3538525402545929, "learning_rate": 1.1679276960609023e-05, "loss": 0.3206542432308197, "step": 8579, "token_acc": 0.887648209968919 }, { "epoch": 0.46295796686990776, "grad_norm": 0.43078917264938354, "learning_rate": 1.167755418930875e-05, "loss": 0.34513628482818604, "step": 8580, "token_acc": 0.878570176975644 }, { "epoch": 0.4630119246749042, "grad_norm": 0.3572595715522766, "learning_rate": 1.1675831366776414e-05, "loss": 0.36065995693206787, "step": 8581, "token_acc": 0.8776280873647683 }, { "epoch": 0.4630658824799007, "grad_norm": 0.39759308099746704, "learning_rate": 1.1674108493064627e-05, "loss": 0.37594330310821533, "step": 8582, "token_acc": 0.8718658667897247 }, { "epoch": 0.4631198402848972, "grad_norm": 0.36348387598991394, "learning_rate": 1.1672385568226004e-05, "loss": 0.34189385175704956, "step": 8583, "token_acc": 0.8772681811687384 }, { "epoch": 0.46317379808989373, "grad_norm": 0.45850449800491333, "learning_rate": 1.1670662592313163e-05, "loss": 0.41430434584617615, "step": 8584, "token_acc": 0.8514056224899599 }, { "epoch": 0.4632277558948902, "grad_norm": 0.3837100863456726, "learning_rate": 1.166893956537873e-05, "loss": 0.30921176075935364, "step": 8585, "token_acc": 0.8872721190834588 }, { "epoch": 0.4632817136998867, "grad_norm": 0.34748169779777527, "learning_rate": 1.1667216487475314e-05, "loss": 0.32530477643013, "step": 8586, "token_acc": 0.8835325877362129 }, { "epoch": 0.4633356715048832, "grad_norm": 0.36683189868927, "learning_rate": 1.1665493358655544e-05, "loss": 0.4564260244369507, "step": 8587, "token_acc": 0.8505917159763313 }, { "epoch": 0.46338962930987965, "grad_norm": 0.43860095739364624, "learning_rate": 1.1663770178972044e-05, "loss": 0.3602446913719177, "step": 8588, "token_acc": 0.8708879184861718 }, { "epoch": 0.46344358711487615, "grad_norm": 0.36075010895729065, "learning_rate": 1.1662046948477439e-05, "loss": 0.37748557329177856, "step": 8589, "token_acc": 0.8682660537279127 }, { "epoch": 0.46349754491987266, "grad_norm": 0.5204535126686096, "learning_rate": 1.1660323667224353e-05, "loss": 0.3886103630065918, "step": 8590, "token_acc": 0.860126582278481 }, { "epoch": 0.46355150272486917, "grad_norm": 0.44757741689682007, "learning_rate": 1.165860033526542e-05, "loss": 0.3800494968891144, "step": 8591, "token_acc": 0.8672191173636539 }, { "epoch": 0.4636054605298656, "grad_norm": 0.2991487383842468, "learning_rate": 1.1656876952653266e-05, "loss": 0.32637059688568115, "step": 8592, "token_acc": 0.8853224394514152 }, { "epoch": 0.46365941833486213, "grad_norm": 0.45382705330848694, "learning_rate": 1.1655153519440525e-05, "loss": 0.32299816608428955, "step": 8593, "token_acc": 0.8867307692307692 }, { "epoch": 0.46371337613985864, "grad_norm": 0.3985948860645294, "learning_rate": 1.165343003567983e-05, "loss": 0.3524247109889984, "step": 8594, "token_acc": 0.8745315544895818 }, { "epoch": 0.46376733394485514, "grad_norm": 0.31831246614456177, "learning_rate": 1.1651706501423813e-05, "loss": 0.3447129726409912, "step": 8595, "token_acc": 0.8809675366008911 }, { "epoch": 0.4638212917498516, "grad_norm": 0.36800095438957214, "learning_rate": 1.1649982916725113e-05, "loss": 0.4304782748222351, "step": 8596, "token_acc": 0.8509111780743291 }, { "epoch": 0.4638752495548481, "grad_norm": 0.37472933530807495, "learning_rate": 1.1648259281636366e-05, "loss": 0.3998524248600006, "step": 8597, "token_acc": 0.8621480443368582 }, { "epoch": 0.4639292073598446, "grad_norm": 0.47206512093544006, "learning_rate": 1.1646535596210212e-05, "loss": 0.32527488470077515, "step": 8598, "token_acc": 0.8795280824194085 }, { "epoch": 0.4639831651648411, "grad_norm": 0.403726726770401, "learning_rate": 1.1644811860499291e-05, "loss": 0.37004953622817993, "step": 8599, "token_acc": 0.8712283202660964 }, { "epoch": 0.46403712296983757, "grad_norm": 0.5103225111961365, "learning_rate": 1.1643088074556247e-05, "loss": 0.4163122773170471, "step": 8600, "token_acc": 0.8559795625099792 }, { "epoch": 0.4640910807748341, "grad_norm": 0.41727542877197266, "learning_rate": 1.1641364238433728e-05, "loss": 0.3520016670227051, "step": 8601, "token_acc": 0.8750235626767201 }, { "epoch": 0.4641450385798306, "grad_norm": 0.32135042548179626, "learning_rate": 1.1639640352184372e-05, "loss": 0.35869741439819336, "step": 8602, "token_acc": 0.8764910536779325 }, { "epoch": 0.4641989963848271, "grad_norm": 0.4089958965778351, "learning_rate": 1.163791641586083e-05, "loss": 0.40145236253738403, "step": 8603, "token_acc": 0.857917760279965 }, { "epoch": 0.46425295418982354, "grad_norm": 0.4451148211956024, "learning_rate": 1.163619242951575e-05, "loss": 0.35376784205436707, "step": 8604, "token_acc": 0.8735001714089818 }, { "epoch": 0.46430691199482005, "grad_norm": 0.2787470519542694, "learning_rate": 1.1634468393201784e-05, "loss": 0.35119348764419556, "step": 8605, "token_acc": 0.8774914307477466 }, { "epoch": 0.46436086979981656, "grad_norm": 0.431999534368515, "learning_rate": 1.1632744306971577e-05, "loss": 0.38210833072662354, "step": 8606, "token_acc": 0.8671051139584987 }, { "epoch": 0.46441482760481306, "grad_norm": 0.49898824095726013, "learning_rate": 1.1631020170877786e-05, "loss": 0.38444215059280396, "step": 8607, "token_acc": 0.8694408322496749 }, { "epoch": 0.4644687854098095, "grad_norm": 0.38798630237579346, "learning_rate": 1.1629295984973071e-05, "loss": 0.3104601800441742, "step": 8608, "token_acc": 0.8854197789064834 }, { "epoch": 0.464522743214806, "grad_norm": 0.3842141032218933, "learning_rate": 1.1627571749310081e-05, "loss": 0.3221372663974762, "step": 8609, "token_acc": 0.8827496757457847 }, { "epoch": 0.46457670101980253, "grad_norm": 0.2359185367822647, "learning_rate": 1.1625847463941475e-05, "loss": 0.3378492593765259, "step": 8610, "token_acc": 0.8834806050621536 }, { "epoch": 0.464630658824799, "grad_norm": 0.41959092020988464, "learning_rate": 1.1624123128919914e-05, "loss": 0.3357277512550354, "step": 8611, "token_acc": 0.8830342577487765 }, { "epoch": 0.4646846166297955, "grad_norm": 0.33608388900756836, "learning_rate": 1.162239874429806e-05, "loss": 0.37923216819763184, "step": 8612, "token_acc": 0.869207878701998 }, { "epoch": 0.464738574434792, "grad_norm": 0.3963969051837921, "learning_rate": 1.162067431012857e-05, "loss": 0.39885789155960083, "step": 8613, "token_acc": 0.8622093023255814 }, { "epoch": 0.4647925322397885, "grad_norm": 0.3139587342739105, "learning_rate": 1.1618949826464111e-05, "loss": 0.3696163296699524, "step": 8614, "token_acc": 0.8662233022125592 }, { "epoch": 0.46484649004478495, "grad_norm": 0.4160631000995636, "learning_rate": 1.1617225293357352e-05, "loss": 0.3363475799560547, "step": 8615, "token_acc": 0.8852667231160034 }, { "epoch": 0.46490044784978146, "grad_norm": 0.5284433364868164, "learning_rate": 1.1615500710860953e-05, "loss": 0.40332770347595215, "step": 8616, "token_acc": 0.8543486517280668 }, { "epoch": 0.46495440565477797, "grad_norm": 0.3938907980918884, "learning_rate": 1.1613776079027588e-05, "loss": 0.3619273006916046, "step": 8617, "token_acc": 0.8718627821919536 }, { "epoch": 0.4650083634597745, "grad_norm": 0.4497024118900299, "learning_rate": 1.1612051397909925e-05, "loss": 0.3903615474700928, "step": 8618, "token_acc": 0.8658373461744249 }, { "epoch": 0.46506232126477093, "grad_norm": 0.4086075723171234, "learning_rate": 1.1610326667560632e-05, "loss": 0.34097352623939514, "step": 8619, "token_acc": 0.8739090064995357 }, { "epoch": 0.46511627906976744, "grad_norm": 0.44694986939430237, "learning_rate": 1.1608601888032386e-05, "loss": 0.3625738024711609, "step": 8620, "token_acc": 0.8700159489633174 }, { "epoch": 0.46517023687476394, "grad_norm": 0.5296992063522339, "learning_rate": 1.160687705937786e-05, "loss": 0.3568928837776184, "step": 8621, "token_acc": 0.871067880794702 }, { "epoch": 0.46522419467976045, "grad_norm": 0.4048909842967987, "learning_rate": 1.1605152181649728e-05, "loss": 0.37509453296661377, "step": 8622, "token_acc": 0.867061655249028 }, { "epoch": 0.4652781524847569, "grad_norm": 0.389814555644989, "learning_rate": 1.1603427254900668e-05, "loss": 0.37238234281539917, "step": 8623, "token_acc": 0.8693891716797779 }, { "epoch": 0.4653321102897534, "grad_norm": 0.36235660314559937, "learning_rate": 1.1601702279183362e-05, "loss": 0.3259740471839905, "step": 8624, "token_acc": 0.8824236956848595 }, { "epoch": 0.4653860680947499, "grad_norm": 0.4266258478164673, "learning_rate": 1.1599977254550488e-05, "loss": 0.38469719886779785, "step": 8625, "token_acc": 0.8682870959531688 }, { "epoch": 0.4654400258997464, "grad_norm": 0.3120516240596771, "learning_rate": 1.1598252181054725e-05, "loss": 0.41503041982650757, "step": 8626, "token_acc": 0.863251155624037 }, { "epoch": 0.4654939837047429, "grad_norm": 0.48819923400878906, "learning_rate": 1.1596527058748764e-05, "loss": 0.3724489212036133, "step": 8627, "token_acc": 0.8683807774716865 }, { "epoch": 0.4655479415097394, "grad_norm": 0.4156753420829773, "learning_rate": 1.1594801887685283e-05, "loss": 0.40617749094963074, "step": 8628, "token_acc": 0.8652090611968895 }, { "epoch": 0.4656018993147359, "grad_norm": 0.44361039996147156, "learning_rate": 1.1593076667916967e-05, "loss": 0.4243796467781067, "step": 8629, "token_acc": 0.8515257325034158 }, { "epoch": 0.4656558571197324, "grad_norm": 0.38222023844718933, "learning_rate": 1.159135139949651e-05, "loss": 0.39768505096435547, "step": 8630, "token_acc": 0.8637333034010551 }, { "epoch": 0.46570981492472885, "grad_norm": 0.46347755193710327, "learning_rate": 1.1589626082476597e-05, "loss": 0.42129719257354736, "step": 8631, "token_acc": 0.8586206896551725 }, { "epoch": 0.46576377272972536, "grad_norm": 0.3946443200111389, "learning_rate": 1.1587900716909916e-05, "loss": 0.35648518800735474, "step": 8632, "token_acc": 0.8783282233338713 }, { "epoch": 0.46581773053472186, "grad_norm": 0.4825317859649658, "learning_rate": 1.1586175302849169e-05, "loss": 0.4020105004310608, "step": 8633, "token_acc": 0.8599827139152982 }, { "epoch": 0.4658716883397183, "grad_norm": 0.3471415936946869, "learning_rate": 1.1584449840347043e-05, "loss": 0.28462257981300354, "step": 8634, "token_acc": 0.8952548330404217 }, { "epoch": 0.4659256461447148, "grad_norm": 0.37749025225639343, "learning_rate": 1.1582724329456232e-05, "loss": 0.3681732714176178, "step": 8635, "token_acc": 0.8718105423987776 }, { "epoch": 0.46597960394971133, "grad_norm": 0.47980421781539917, "learning_rate": 1.1580998770229437e-05, "loss": 0.42976364493370056, "step": 8636, "token_acc": 0.8521467603434817 }, { "epoch": 0.46603356175470784, "grad_norm": 0.31543347239494324, "learning_rate": 1.1579273162719351e-05, "loss": 0.35285890102386475, "step": 8637, "token_acc": 0.8747980613893377 }, { "epoch": 0.4660875195597043, "grad_norm": 0.3287140727043152, "learning_rate": 1.157754750697868e-05, "loss": 0.34675490856170654, "step": 8638, "token_acc": 0.8779531235409469 }, { "epoch": 0.4661414773647008, "grad_norm": 0.4090288579463959, "learning_rate": 1.1575821803060117e-05, "loss": 0.3904060125350952, "step": 8639, "token_acc": 0.8673454330824849 }, { "epoch": 0.4661954351696973, "grad_norm": 0.35944971442222595, "learning_rate": 1.157409605101637e-05, "loss": 0.4299962520599365, "step": 8640, "token_acc": 0.85505148005148 }, { "epoch": 0.4662493929746938, "grad_norm": 0.30632343888282776, "learning_rate": 1.1572370250900142e-05, "loss": 0.31156864762306213, "step": 8641, "token_acc": 0.8862303002415737 }, { "epoch": 0.46630335077969026, "grad_norm": 0.3776349127292633, "learning_rate": 1.1570644402764137e-05, "loss": 0.31574854254722595, "step": 8642, "token_acc": 0.8854542601600502 }, { "epoch": 0.46635730858468677, "grad_norm": 0.4022233784198761, "learning_rate": 1.1568918506661066e-05, "loss": 0.35259225964546204, "step": 8643, "token_acc": 0.8762928139691067 }, { "epoch": 0.4664112663896833, "grad_norm": 0.45498326420783997, "learning_rate": 1.1567192562643639e-05, "loss": 0.33110928535461426, "step": 8644, "token_acc": 0.8792599805258033 }, { "epoch": 0.4664652241946798, "grad_norm": 0.38290515542030334, "learning_rate": 1.1565466570764554e-05, "loss": 0.34234002232551575, "step": 8645, "token_acc": 0.8775362318840579 }, { "epoch": 0.46651918199967624, "grad_norm": 0.44034454226493835, "learning_rate": 1.1563740531076534e-05, "loss": 0.38248297572135925, "step": 8646, "token_acc": 0.8708225108225108 }, { "epoch": 0.46657313980467274, "grad_norm": 0.43575942516326904, "learning_rate": 1.1562014443632288e-05, "loss": 0.4273434579372406, "step": 8647, "token_acc": 0.852619518800125 }, { "epoch": 0.46662709760966925, "grad_norm": 0.34657496213912964, "learning_rate": 1.1560288308484526e-05, "loss": 0.3771705627441406, "step": 8648, "token_acc": 0.8649659409643382 }, { "epoch": 0.46668105541466576, "grad_norm": 0.4177165925502777, "learning_rate": 1.1558562125685973e-05, "loss": 0.3732452392578125, "step": 8649, "token_acc": 0.8703332498120772 }, { "epoch": 0.4667350132196622, "grad_norm": 0.3237909972667694, "learning_rate": 1.1556835895289337e-05, "loss": 0.33791857957839966, "step": 8650, "token_acc": 0.8814842392653264 }, { "epoch": 0.4667889710246587, "grad_norm": 0.3735882341861725, "learning_rate": 1.1555109617347345e-05, "loss": 0.39183178544044495, "step": 8651, "token_acc": 0.8615061409179057 }, { "epoch": 0.4668429288296552, "grad_norm": 0.4609566628932953, "learning_rate": 1.155338329191271e-05, "loss": 0.3825893998146057, "step": 8652, "token_acc": 0.8646335770966236 }, { "epoch": 0.4668968866346517, "grad_norm": 0.37919098138809204, "learning_rate": 1.1551656919038154e-05, "loss": 0.3689809739589691, "step": 8653, "token_acc": 0.8722132270519367 }, { "epoch": 0.4669508444396482, "grad_norm": 0.3153962194919586, "learning_rate": 1.1549930498776408e-05, "loss": 0.34885796904563904, "step": 8654, "token_acc": 0.878553270164086 }, { "epoch": 0.4670048022446447, "grad_norm": 0.46571114659309387, "learning_rate": 1.1548204031180184e-05, "loss": 0.39069002866744995, "step": 8655, "token_acc": 0.8609104653116076 }, { "epoch": 0.4670587600496412, "grad_norm": 0.3468049168586731, "learning_rate": 1.1546477516302217e-05, "loss": 0.3749561309814453, "step": 8656, "token_acc": 0.8654190945764231 }, { "epoch": 0.46711271785463765, "grad_norm": 0.3847538232803345, "learning_rate": 1.1544750954195232e-05, "loss": 0.43655240535736084, "step": 8657, "token_acc": 0.8464014419981976 }, { "epoch": 0.46716667565963416, "grad_norm": 0.4152945578098297, "learning_rate": 1.1543024344911957e-05, "loss": 0.4005127251148224, "step": 8658, "token_acc": 0.8621827047181988 }, { "epoch": 0.46722063346463066, "grad_norm": 0.5071538090705872, "learning_rate": 1.1541297688505125e-05, "loss": 0.4187414050102234, "step": 8659, "token_acc": 0.854347483036137 }, { "epoch": 0.46727459126962717, "grad_norm": 0.41091570258140564, "learning_rate": 1.1539570985027462e-05, "loss": 0.3581799566745758, "step": 8660, "token_acc": 0.8666904932094354 }, { "epoch": 0.4673285490746236, "grad_norm": 0.3603880703449249, "learning_rate": 1.1537844234531707e-05, "loss": 0.36595040559768677, "step": 8661, "token_acc": 0.8748685594111462 }, { "epoch": 0.46738250687962013, "grad_norm": 0.3784879148006439, "learning_rate": 1.1536117437070593e-05, "loss": 0.3501690924167633, "step": 8662, "token_acc": 0.8767064846416383 }, { "epoch": 0.46743646468461664, "grad_norm": 0.4020380973815918, "learning_rate": 1.1534390592696853e-05, "loss": 0.37196677923202515, "step": 8663, "token_acc": 0.8686615566037735 }, { "epoch": 0.46749042248961314, "grad_norm": 0.4237383306026459, "learning_rate": 1.1532663701463226e-05, "loss": 0.41057682037353516, "step": 8664, "token_acc": 0.8586239396795476 }, { "epoch": 0.4675443802946096, "grad_norm": 0.3335897624492645, "learning_rate": 1.153093676342245e-05, "loss": 0.33928146958351135, "step": 8665, "token_acc": 0.8752915351900123 }, { "epoch": 0.4675983380996061, "grad_norm": 0.35805660486221313, "learning_rate": 1.1529209778627269e-05, "loss": 0.4322272539138794, "step": 8666, "token_acc": 0.854912358226543 }, { "epoch": 0.4676522959046026, "grad_norm": 0.43970608711242676, "learning_rate": 1.152748274713042e-05, "loss": 0.4042336940765381, "step": 8667, "token_acc": 0.8617531617967727 }, { "epoch": 0.4677062537095991, "grad_norm": 0.3996370732784271, "learning_rate": 1.1525755668984649e-05, "loss": 0.3239728510379791, "step": 8668, "token_acc": 0.8811713910268444 }, { "epoch": 0.46776021151459557, "grad_norm": 0.42077547311782837, "learning_rate": 1.15240285442427e-05, "loss": 0.4114757180213928, "step": 8669, "token_acc": 0.8609406952965235 }, { "epoch": 0.4678141693195921, "grad_norm": 0.3577287495136261, "learning_rate": 1.1522301372957318e-05, "loss": 0.3723175525665283, "step": 8670, "token_acc": 0.8684182073737702 }, { "epoch": 0.4678681271245886, "grad_norm": 0.3602195978164673, "learning_rate": 1.152057415518125e-05, "loss": 0.3742690682411194, "step": 8671, "token_acc": 0.8674493277142283 }, { "epoch": 0.4679220849295851, "grad_norm": 0.5350326895713806, "learning_rate": 1.1518846890967245e-05, "loss": 0.3761520981788635, "step": 8672, "token_acc": 0.8708133971291866 }, { "epoch": 0.46797604273458154, "grad_norm": 0.3922750651836395, "learning_rate": 1.1517119580368057e-05, "loss": 0.39525628089904785, "step": 8673, "token_acc": 0.8681989352032203 }, { "epoch": 0.46803000053957805, "grad_norm": 0.34696412086486816, "learning_rate": 1.151539222343643e-05, "loss": 0.35764825344085693, "step": 8674, "token_acc": 0.8729551451187335 }, { "epoch": 0.46808395834457456, "grad_norm": 0.4091469943523407, "learning_rate": 1.1513664820225123e-05, "loss": 0.3567187786102295, "step": 8675, "token_acc": 0.8761566933991364 }, { "epoch": 0.468137916149571, "grad_norm": 0.43156757950782776, "learning_rate": 1.1511937370786892e-05, "loss": 0.36618372797966003, "step": 8676, "token_acc": 0.8658094296076492 }, { "epoch": 0.4681918739545675, "grad_norm": 0.4987775683403015, "learning_rate": 1.1510209875174487e-05, "loss": 0.3908982276916504, "step": 8677, "token_acc": 0.8627734586873983 }, { "epoch": 0.468245831759564, "grad_norm": 0.36729732155799866, "learning_rate": 1.150848233344067e-05, "loss": 0.3357866108417511, "step": 8678, "token_acc": 0.8807110327603938 }, { "epoch": 0.46829978956456053, "grad_norm": 0.4182374179363251, "learning_rate": 1.1506754745638196e-05, "loss": 0.3995637893676758, "step": 8679, "token_acc": 0.8614388489208633 }, { "epoch": 0.468353747369557, "grad_norm": 0.3398132622241974, "learning_rate": 1.150502711181983e-05, "loss": 0.40198248624801636, "step": 8680, "token_acc": 0.859249525478172 }, { "epoch": 0.4684077051745535, "grad_norm": 0.2994687557220459, "learning_rate": 1.1503299432038324e-05, "loss": 0.35798561573028564, "step": 8681, "token_acc": 0.8761537562799393 }, { "epoch": 0.46846166297955, "grad_norm": 0.3615559935569763, "learning_rate": 1.150157170634645e-05, "loss": 0.3279916048049927, "step": 8682, "token_acc": 0.8859034763784541 }, { "epoch": 0.4685156207845465, "grad_norm": 0.41853946447372437, "learning_rate": 1.149984393479697e-05, "loss": 0.3630007803440094, "step": 8683, "token_acc": 0.8699859254046446 }, { "epoch": 0.46856957858954296, "grad_norm": 0.3958410918712616, "learning_rate": 1.1498116117442648e-05, "loss": 0.3843015730381012, "step": 8684, "token_acc": 0.8645625861804811 }, { "epoch": 0.46862353639453946, "grad_norm": 0.36267784237861633, "learning_rate": 1.1496388254336257e-05, "loss": 0.331022173166275, "step": 8685, "token_acc": 0.8822084116201764 }, { "epoch": 0.46867749419953597, "grad_norm": 0.3680766522884369, "learning_rate": 1.1494660345530558e-05, "loss": 0.3469565808773041, "step": 8686, "token_acc": 0.8804853957951179 }, { "epoch": 0.4687314520045325, "grad_norm": 0.28166767954826355, "learning_rate": 1.149293239107832e-05, "loss": 0.35085344314575195, "step": 8687, "token_acc": 0.8774640911342249 }, { "epoch": 0.46878540980952893, "grad_norm": 0.37304261326789856, "learning_rate": 1.149120439103232e-05, "loss": 0.4134492576122284, "step": 8688, "token_acc": 0.8593433319821646 }, { "epoch": 0.46883936761452544, "grad_norm": 0.34376630187034607, "learning_rate": 1.1489476345445332e-05, "loss": 0.40154513716697693, "step": 8689, "token_acc": 0.8665609199960345 }, { "epoch": 0.46889332541952194, "grad_norm": 0.37873440980911255, "learning_rate": 1.1487748254370121e-05, "loss": 0.42127424478530884, "step": 8690, "token_acc": 0.8595208462974486 }, { "epoch": 0.46894728322451845, "grad_norm": 0.4376349151134491, "learning_rate": 1.148602011785947e-05, "loss": 0.41817301511764526, "step": 8691, "token_acc": 0.8592914438502673 }, { "epoch": 0.4690012410295149, "grad_norm": 0.42963072657585144, "learning_rate": 1.1484291935966157e-05, "loss": 0.2966376543045044, "step": 8692, "token_acc": 0.8869806094182825 }, { "epoch": 0.4690551988345114, "grad_norm": 0.31033483147621155, "learning_rate": 1.1482563708742953e-05, "loss": 0.36612749099731445, "step": 8693, "token_acc": 0.8664047151277013 }, { "epoch": 0.4691091566395079, "grad_norm": 0.2716010808944702, "learning_rate": 1.1480835436242645e-05, "loss": 0.31514203548431396, "step": 8694, "token_acc": 0.882614541317613 }, { "epoch": 0.46916311444450437, "grad_norm": 0.36847472190856934, "learning_rate": 1.147910711851801e-05, "loss": 0.39125746488571167, "step": 8695, "token_acc": 0.8628040057224606 }, { "epoch": 0.4692170722495009, "grad_norm": 0.40694499015808105, "learning_rate": 1.147737875562183e-05, "loss": 0.36277079582214355, "step": 8696, "token_acc": 0.871367635297254 }, { "epoch": 0.4692710300544974, "grad_norm": 0.43519580364227295, "learning_rate": 1.1475650347606888e-05, "loss": 0.3480476438999176, "step": 8697, "token_acc": 0.8768596258653705 }, { "epoch": 0.4693249878594939, "grad_norm": 0.40222081542015076, "learning_rate": 1.1473921894525974e-05, "loss": 0.3342187702655792, "step": 8698, "token_acc": 0.8806787082649151 }, { "epoch": 0.46937894566449034, "grad_norm": 0.34118273854255676, "learning_rate": 1.1472193396431873e-05, "loss": 0.4139019250869751, "step": 8699, "token_acc": 0.8554870892018779 }, { "epoch": 0.46943290346948685, "grad_norm": 0.30912721157073975, "learning_rate": 1.1470464853377366e-05, "loss": 0.34865307807922363, "step": 8700, "token_acc": 0.8762416734836975 }, { "epoch": 0.46948686127448336, "grad_norm": 0.4732460081577301, "learning_rate": 1.146873626541525e-05, "loss": 0.39225834608078003, "step": 8701, "token_acc": 0.8635940409683427 }, { "epoch": 0.46954081907947987, "grad_norm": 0.3302287459373474, "learning_rate": 1.1467007632598316e-05, "loss": 0.36477118730545044, "step": 8702, "token_acc": 0.8678969729155602 }, { "epoch": 0.4695947768844763, "grad_norm": 0.25624513626098633, "learning_rate": 1.1465278954979348e-05, "loss": 0.3003111481666565, "step": 8703, "token_acc": 0.8929242329367564 }, { "epoch": 0.4696487346894728, "grad_norm": 0.45811358094215393, "learning_rate": 1.146355023261115e-05, "loss": 0.3754405975341797, "step": 8704, "token_acc": 0.8710581639803784 }, { "epoch": 0.46970269249446933, "grad_norm": 0.4308820366859436, "learning_rate": 1.1461821465546508e-05, "loss": 0.40744978189468384, "step": 8705, "token_acc": 0.854156378600823 }, { "epoch": 0.46975665029946584, "grad_norm": 0.36465755105018616, "learning_rate": 1.1460092653838219e-05, "loss": 0.3519538342952728, "step": 8706, "token_acc": 0.872848843582155 }, { "epoch": 0.4698106081044623, "grad_norm": 0.4033123254776001, "learning_rate": 1.145836379753909e-05, "loss": 0.3936931788921356, "step": 8707, "token_acc": 0.8616406346082892 }, { "epoch": 0.4698645659094588, "grad_norm": 0.3913263976573944, "learning_rate": 1.1456634896701907e-05, "loss": 0.3490810990333557, "step": 8708, "token_acc": 0.8786428882122662 }, { "epoch": 0.4699185237144553, "grad_norm": 0.4076051414012909, "learning_rate": 1.1454905951379478e-05, "loss": 0.36240899562835693, "step": 8709, "token_acc": 0.8714704159343878 }, { "epoch": 0.4699724815194518, "grad_norm": 0.27945277094841003, "learning_rate": 1.1453176961624601e-05, "loss": 0.43906110525131226, "step": 8710, "token_acc": 0.8541800299337182 }, { "epoch": 0.47002643932444826, "grad_norm": 0.42079901695251465, "learning_rate": 1.1451447927490083e-05, "loss": 0.414115309715271, "step": 8711, "token_acc": 0.857103825136612 }, { "epoch": 0.47008039712944477, "grad_norm": 0.4705442190170288, "learning_rate": 1.1449718849028724e-05, "loss": 0.4106108546257019, "step": 8712, "token_acc": 0.8561810628849735 }, { "epoch": 0.4701343549344413, "grad_norm": 0.444061279296875, "learning_rate": 1.144798972629333e-05, "loss": 0.36552321910858154, "step": 8713, "token_acc": 0.8773482167165805 }, { "epoch": 0.4701883127394378, "grad_norm": 0.4749782383441925, "learning_rate": 1.1446260559336709e-05, "loss": 0.335071861743927, "step": 8714, "token_acc": 0.8826827242524917 }, { "epoch": 0.47024227054443424, "grad_norm": 0.46294480562210083, "learning_rate": 1.1444531348211672e-05, "loss": 0.3441460132598877, "step": 8715, "token_acc": 0.8788263283108644 }, { "epoch": 0.47029622834943074, "grad_norm": 0.3231099247932434, "learning_rate": 1.1442802092971021e-05, "loss": 0.3321162164211273, "step": 8716, "token_acc": 0.8832622601279317 }, { "epoch": 0.47035018615442725, "grad_norm": 0.3559577763080597, "learning_rate": 1.1441072793667576e-05, "loss": 0.44823452830314636, "step": 8717, "token_acc": 0.8463722397476341 }, { "epoch": 0.4704041439594237, "grad_norm": 0.45591914653778076, "learning_rate": 1.1439343450354145e-05, "loss": 0.35494762659072876, "step": 8718, "token_acc": 0.8733846377232467 }, { "epoch": 0.4704581017644202, "grad_norm": 0.3403172492980957, "learning_rate": 1.1437614063083541e-05, "loss": 0.3746906518936157, "step": 8719, "token_acc": 0.8730694980694981 }, { "epoch": 0.4705120595694167, "grad_norm": 0.3222787380218506, "learning_rate": 1.1435884631908582e-05, "loss": 0.36682626605033875, "step": 8720, "token_acc": 0.8750162569905059 }, { "epoch": 0.4705660173744132, "grad_norm": 0.4527839422225952, "learning_rate": 1.1434155156882082e-05, "loss": 0.37741565704345703, "step": 8721, "token_acc": 0.8678650268054242 }, { "epoch": 0.4706199751794097, "grad_norm": 0.468156099319458, "learning_rate": 1.1432425638056857e-05, "loss": 0.3858858048915863, "step": 8722, "token_acc": 0.8634560131344917 }, { "epoch": 0.4706739329844062, "grad_norm": 0.3191421926021576, "learning_rate": 1.143069607548573e-05, "loss": 0.3689599931240082, "step": 8723, "token_acc": 0.8720296742784281 }, { "epoch": 0.4707278907894027, "grad_norm": 0.35748398303985596, "learning_rate": 1.1428966469221522e-05, "loss": 0.3563966751098633, "step": 8724, "token_acc": 0.873092259040286 }, { "epoch": 0.4707818485943992, "grad_norm": 0.3577347993850708, "learning_rate": 1.142723681931705e-05, "loss": 0.3650210499763489, "step": 8725, "token_acc": 0.8726415094339622 }, { "epoch": 0.47083580639939565, "grad_norm": 0.3366183638572693, "learning_rate": 1.142550712582514e-05, "loss": 0.3558521866798401, "step": 8726, "token_acc": 0.8760364842454395 }, { "epoch": 0.47088976420439216, "grad_norm": 0.41249433159828186, "learning_rate": 1.1423777388798614e-05, "loss": 0.3654360771179199, "step": 8727, "token_acc": 0.8717297054866199 }, { "epoch": 0.47094372200938867, "grad_norm": 0.41560083627700806, "learning_rate": 1.1422047608290301e-05, "loss": 0.4139503240585327, "step": 8728, "token_acc": 0.8586756591048437 }, { "epoch": 0.4709976798143852, "grad_norm": 0.43524253368377686, "learning_rate": 1.1420317784353024e-05, "loss": 0.43376708030700684, "step": 8729, "token_acc": 0.8472244440889458 }, { "epoch": 0.4710516376193816, "grad_norm": 0.3847895860671997, "learning_rate": 1.1418587917039616e-05, "loss": 0.3393256366252899, "step": 8730, "token_acc": 0.8771792052772106 }, { "epoch": 0.47110559542437813, "grad_norm": 0.41389936208724976, "learning_rate": 1.1416858006402906e-05, "loss": 0.37560364603996277, "step": 8731, "token_acc": 0.866721273110227 }, { "epoch": 0.47115955322937464, "grad_norm": 0.3652874231338501, "learning_rate": 1.141512805249572e-05, "loss": 0.29680389165878296, "step": 8732, "token_acc": 0.889263048827872 }, { "epoch": 0.47121351103437115, "grad_norm": 0.32556626200675964, "learning_rate": 1.1413398055370897e-05, "loss": 0.3373315930366516, "step": 8733, "token_acc": 0.8776543383490367 }, { "epoch": 0.4712674688393676, "grad_norm": 0.4276330769062042, "learning_rate": 1.1411668015081268e-05, "loss": 0.38353440165519714, "step": 8734, "token_acc": 0.8674441757929171 }, { "epoch": 0.4713214266443641, "grad_norm": 0.450541228055954, "learning_rate": 1.1409937931679667e-05, "loss": 0.33802521228790283, "step": 8735, "token_acc": 0.8808613118005383 }, { "epoch": 0.4713753844493606, "grad_norm": 0.37412339448928833, "learning_rate": 1.1408207805218931e-05, "loss": 0.3137645721435547, "step": 8736, "token_acc": 0.8894173602853745 }, { "epoch": 0.4714293422543571, "grad_norm": 0.4462229013442993, "learning_rate": 1.1406477635751899e-05, "loss": 0.39794883131980896, "step": 8737, "token_acc": 0.8677036561898653 }, { "epoch": 0.47148330005935357, "grad_norm": 0.5458598136901855, "learning_rate": 1.1404747423331407e-05, "loss": 0.3981349468231201, "step": 8738, "token_acc": 0.8636857349746561 }, { "epoch": 0.4715372578643501, "grad_norm": 0.44840008020401, "learning_rate": 1.1403017168010294e-05, "loss": 0.3541995584964752, "step": 8739, "token_acc": 0.8811249137336093 }, { "epoch": 0.4715912156693466, "grad_norm": 0.43698206543922424, "learning_rate": 1.1401286869841405e-05, "loss": 0.39364397525787354, "step": 8740, "token_acc": 0.8598756575801052 }, { "epoch": 0.47164517347434304, "grad_norm": 0.41233983635902405, "learning_rate": 1.1399556528877584e-05, "loss": 0.3545435070991516, "step": 8741, "token_acc": 0.8814664541370816 }, { "epoch": 0.47169913127933955, "grad_norm": 0.3472130000591278, "learning_rate": 1.1397826145171671e-05, "loss": 0.37009111046791077, "step": 8742, "token_acc": 0.8662166308683645 }, { "epoch": 0.47175308908433605, "grad_norm": 0.5460346937179565, "learning_rate": 1.1396095718776518e-05, "loss": 0.40199992060661316, "step": 8743, "token_acc": 0.8625389408099688 }, { "epoch": 0.47180704688933256, "grad_norm": 0.4431576132774353, "learning_rate": 1.1394365249744966e-05, "loss": 0.3376006484031677, "step": 8744, "token_acc": 0.8848419094854308 }, { "epoch": 0.471861004694329, "grad_norm": 0.4051267206668854, "learning_rate": 1.1392634738129862e-05, "loss": 0.3758552372455597, "step": 8745, "token_acc": 0.8653342522398346 }, { "epoch": 0.4719149624993255, "grad_norm": 0.37779825925827026, "learning_rate": 1.139090418398406e-05, "loss": 0.3571130335330963, "step": 8746, "token_acc": 0.8749804167319443 }, { "epoch": 0.471968920304322, "grad_norm": 0.41921189427375793, "learning_rate": 1.1389173587360408e-05, "loss": 0.38480260968208313, "step": 8747, "token_acc": 0.8670991099713381 }, { "epoch": 0.47202287810931853, "grad_norm": 0.38940855860710144, "learning_rate": 1.1387442948311757e-05, "loss": 0.3774973452091217, "step": 8748, "token_acc": 0.8720058511610898 }, { "epoch": 0.472076835914315, "grad_norm": 0.398034930229187, "learning_rate": 1.1385712266890965e-05, "loss": 0.3600752353668213, "step": 8749, "token_acc": 0.868 }, { "epoch": 0.4721307937193115, "grad_norm": 0.39335814118385315, "learning_rate": 1.1383981543150881e-05, "loss": 0.42193832993507385, "step": 8750, "token_acc": 0.8578115857226448 }, { "epoch": 0.472184751524308, "grad_norm": 0.3883860409259796, "learning_rate": 1.1382250777144367e-05, "loss": 0.3504835367202759, "step": 8751, "token_acc": 0.8744413407821229 }, { "epoch": 0.4722387093293045, "grad_norm": 0.37315526604652405, "learning_rate": 1.138051996892427e-05, "loss": 0.3774294853210449, "step": 8752, "token_acc": 0.8706026724843197 }, { "epoch": 0.47229266713430096, "grad_norm": 0.4197571575641632, "learning_rate": 1.137878911854346e-05, "loss": 0.37657737731933594, "step": 8753, "token_acc": 0.8639103013314646 }, { "epoch": 0.47234662493929747, "grad_norm": 0.46903249621391296, "learning_rate": 1.137705822605479e-05, "loss": 0.3877793848514557, "step": 8754, "token_acc": 0.8680121450258975 }, { "epoch": 0.472400582744294, "grad_norm": 0.455965518951416, "learning_rate": 1.137532729151112e-05, "loss": 0.34125202894210815, "step": 8755, "token_acc": 0.8775726392251816 }, { "epoch": 0.4724545405492905, "grad_norm": 0.3891434073448181, "learning_rate": 1.1373596314965318e-05, "loss": 0.3309609889984131, "step": 8756, "token_acc": 0.8759822343696617 }, { "epoch": 0.47250849835428693, "grad_norm": 0.3900918662548065, "learning_rate": 1.1371865296470244e-05, "loss": 0.43007010221481323, "step": 8757, "token_acc": 0.8542418772563177 }, { "epoch": 0.47256245615928344, "grad_norm": 0.5479938387870789, "learning_rate": 1.137013423607876e-05, "loss": 0.3716999888420105, "step": 8758, "token_acc": 0.8693794128576737 }, { "epoch": 0.47261641396427995, "grad_norm": 0.4144256114959717, "learning_rate": 1.136840313384374e-05, "loss": 0.4205302596092224, "step": 8759, "token_acc": 0.856243280602058 }, { "epoch": 0.4726703717692764, "grad_norm": 0.4057427942752838, "learning_rate": 1.1366671989818043e-05, "loss": 0.433967649936676, "step": 8760, "token_acc": 0.8556393339616714 }, { "epoch": 0.4727243295742729, "grad_norm": 0.35025304555892944, "learning_rate": 1.1364940804054544e-05, "loss": 0.34658199548721313, "step": 8761, "token_acc": 0.8783368769194425 }, { "epoch": 0.4727782873792694, "grad_norm": 0.4367945194244385, "learning_rate": 1.1363209576606111e-05, "loss": 0.36809414625167847, "step": 8762, "token_acc": 0.868133772309825 }, { "epoch": 0.4728322451842659, "grad_norm": 0.3596486747264862, "learning_rate": 1.1361478307525612e-05, "loss": 0.3139986991882324, "step": 8763, "token_acc": 0.8904364992322877 }, { "epoch": 0.47288620298926237, "grad_norm": 0.30021414160728455, "learning_rate": 1.1359746996865923e-05, "loss": 0.40688326954841614, "step": 8764, "token_acc": 0.8617148316216862 }, { "epoch": 0.4729401607942589, "grad_norm": 0.3646303117275238, "learning_rate": 1.1358015644679918e-05, "loss": 0.3119988441467285, "step": 8765, "token_acc": 0.8881911094692772 }, { "epoch": 0.4729941185992554, "grad_norm": 0.3584701418876648, "learning_rate": 1.135628425102047e-05, "loss": 0.37032565474510193, "step": 8766, "token_acc": 0.8682634730538922 }, { "epoch": 0.4730480764042519, "grad_norm": 0.34733107686042786, "learning_rate": 1.1354552815940458e-05, "loss": 0.3614678382873535, "step": 8767, "token_acc": 0.8718683972230606 }, { "epoch": 0.47310203420924835, "grad_norm": 0.36252614855766296, "learning_rate": 1.1352821339492757e-05, "loss": 0.40657955408096313, "step": 8768, "token_acc": 0.8636018813533607 }, { "epoch": 0.47315599201424485, "grad_norm": 0.3071306049823761, "learning_rate": 1.1351089821730246e-05, "loss": 0.41444265842437744, "step": 8769, "token_acc": 0.8535217848382198 }, { "epoch": 0.47320994981924136, "grad_norm": 0.3502039611339569, "learning_rate": 1.1349358262705805e-05, "loss": 0.3353954255580902, "step": 8770, "token_acc": 0.8808537350910233 }, { "epoch": 0.47326390762423787, "grad_norm": 0.3225063979625702, "learning_rate": 1.1347626662472316e-05, "loss": 0.3598012626171112, "step": 8771, "token_acc": 0.8733341484289033 }, { "epoch": 0.4733178654292343, "grad_norm": 0.421814888715744, "learning_rate": 1.1345895021082662e-05, "loss": 0.3564566969871521, "step": 8772, "token_acc": 0.8762249359264285 }, { "epoch": 0.4733718232342308, "grad_norm": 0.40152424573898315, "learning_rate": 1.134416333858973e-05, "loss": 0.36885493993759155, "step": 8773, "token_acc": 0.8728662873399715 }, { "epoch": 0.47342578103922733, "grad_norm": 0.4611615836620331, "learning_rate": 1.1342431615046396e-05, "loss": 0.3881874978542328, "step": 8774, "token_acc": 0.8647484228342822 }, { "epoch": 0.47347973884422384, "grad_norm": 0.40360546112060547, "learning_rate": 1.1340699850505557e-05, "loss": 0.37482792139053345, "step": 8775, "token_acc": 0.8739486498450642 }, { "epoch": 0.4735336966492203, "grad_norm": 0.43524351716041565, "learning_rate": 1.1338968045020093e-05, "loss": 0.3780783712863922, "step": 8776, "token_acc": 0.8676848415643734 }, { "epoch": 0.4735876544542168, "grad_norm": 0.4819071292877197, "learning_rate": 1.1337236198642898e-05, "loss": 0.4278334379196167, "step": 8777, "token_acc": 0.8519672131147541 }, { "epoch": 0.4736416122592133, "grad_norm": 0.4099901020526886, "learning_rate": 1.133550431142686e-05, "loss": 0.4097636342048645, "step": 8778, "token_acc": 0.8624544655194071 }, { "epoch": 0.4736955700642098, "grad_norm": 0.37189623713493347, "learning_rate": 1.1333772383424868e-05, "loss": 0.4488832950592041, "step": 8779, "token_acc": 0.8460947109471094 }, { "epoch": 0.47374952786920627, "grad_norm": 0.3974076211452484, "learning_rate": 1.1332040414689815e-05, "loss": 0.33722949028015137, "step": 8780, "token_acc": 0.8794455334640157 }, { "epoch": 0.4738034856742028, "grad_norm": 0.47592946887016296, "learning_rate": 1.13303084052746e-05, "loss": 0.44343101978302, "step": 8781, "token_acc": 0.8501372597953581 }, { "epoch": 0.4738574434791993, "grad_norm": 0.3300011456012726, "learning_rate": 1.1328576355232114e-05, "loss": 0.3894912004470825, "step": 8782, "token_acc": 0.8672942605526875 }, { "epoch": 0.47391140128419573, "grad_norm": 0.23316718637943268, "learning_rate": 1.1326844264615256e-05, "loss": 0.3345870077610016, "step": 8783, "token_acc": 0.8833441348023331 }, { "epoch": 0.47396535908919224, "grad_norm": 0.3556910753250122, "learning_rate": 1.1325112133476921e-05, "loss": 0.33813712000846863, "step": 8784, "token_acc": 0.8778967180331726 }, { "epoch": 0.47401931689418875, "grad_norm": 0.4102634787559509, "learning_rate": 1.1323379961870007e-05, "loss": 0.4109506607055664, "step": 8785, "token_acc": 0.85326278659612 }, { "epoch": 0.47407327469918525, "grad_norm": 0.4679413139820099, "learning_rate": 1.1321647749847418e-05, "loss": 0.41887468099594116, "step": 8786, "token_acc": 0.8568591759060069 }, { "epoch": 0.4741272325041817, "grad_norm": 0.3410608470439911, "learning_rate": 1.1319915497462048e-05, "loss": 0.3755691349506378, "step": 8787, "token_acc": 0.8684939495553288 }, { "epoch": 0.4741811903091782, "grad_norm": 0.3735414445400238, "learning_rate": 1.1318183204766808e-05, "loss": 0.36317408084869385, "step": 8788, "token_acc": 0.875287209599183 }, { "epoch": 0.4742351481141747, "grad_norm": 0.37446972727775574, "learning_rate": 1.1316450871814598e-05, "loss": 0.37781596183776855, "step": 8789, "token_acc": 0.864089622060815 }, { "epoch": 0.4742891059191712, "grad_norm": 0.3343145251274109, "learning_rate": 1.1314718498658321e-05, "loss": 0.36182257533073425, "step": 8790, "token_acc": 0.8741258741258742 }, { "epoch": 0.4743430637241677, "grad_norm": 0.3524681627750397, "learning_rate": 1.1312986085350887e-05, "loss": 0.32886990904808044, "step": 8791, "token_acc": 0.8846153846153846 }, { "epoch": 0.4743970215291642, "grad_norm": 0.30519816279411316, "learning_rate": 1.1311253631945203e-05, "loss": 0.31873244047164917, "step": 8792, "token_acc": 0.8858365543898399 }, { "epoch": 0.4744509793341607, "grad_norm": 0.45535311102867126, "learning_rate": 1.1309521138494178e-05, "loss": 0.3862159550189972, "step": 8793, "token_acc": 0.8670299727520436 }, { "epoch": 0.4745049371391572, "grad_norm": 0.3693971335887909, "learning_rate": 1.1307788605050717e-05, "loss": 0.3111051917076111, "step": 8794, "token_acc": 0.8884114023778829 }, { "epoch": 0.47455889494415365, "grad_norm": 0.3507400453090668, "learning_rate": 1.1306056031667738e-05, "loss": 0.37506943941116333, "step": 8795, "token_acc": 0.8685125563425627 }, { "epoch": 0.47461285274915016, "grad_norm": 0.37516430020332336, "learning_rate": 1.1304323418398146e-05, "loss": 0.39748531579971313, "step": 8796, "token_acc": 0.861671469740634 }, { "epoch": 0.47466681055414667, "grad_norm": 0.3459192216396332, "learning_rate": 1.1302590765294861e-05, "loss": 0.35391584038734436, "step": 8797, "token_acc": 0.8698862797223453 }, { "epoch": 0.4747207683591432, "grad_norm": 0.35329774022102356, "learning_rate": 1.1300858072410794e-05, "loss": 0.33801472187042236, "step": 8798, "token_acc": 0.8803657607315215 }, { "epoch": 0.4747747261641396, "grad_norm": 0.4992658793926239, "learning_rate": 1.1299125339798864e-05, "loss": 0.36582234501838684, "step": 8799, "token_acc": 0.8721665381649961 }, { "epoch": 0.47482868396913613, "grad_norm": 0.39059126377105713, "learning_rate": 1.1297392567511984e-05, "loss": 0.3305198550224304, "step": 8800, "token_acc": 0.8867199053534457 }, { "epoch": 0.47488264177413264, "grad_norm": 0.47388654947280884, "learning_rate": 1.1295659755603076e-05, "loss": 0.34261566400527954, "step": 8801, "token_acc": 0.8800947065787248 }, { "epoch": 0.47493659957912915, "grad_norm": 0.4225001633167267, "learning_rate": 1.1293926904125062e-05, "loss": 0.38782307505607605, "step": 8802, "token_acc": 0.8643860720830788 }, { "epoch": 0.4749905573841256, "grad_norm": 0.4268883764743805, "learning_rate": 1.1292194013130854e-05, "loss": 0.40813201665878296, "step": 8803, "token_acc": 0.8570064128803384 }, { "epoch": 0.4750445151891221, "grad_norm": 0.41931065917015076, "learning_rate": 1.1290461082673383e-05, "loss": 0.3738877475261688, "step": 8804, "token_acc": 0.8684171923133344 }, { "epoch": 0.4750984729941186, "grad_norm": 0.39970314502716064, "learning_rate": 1.1288728112805567e-05, "loss": 0.39617764949798584, "step": 8805, "token_acc": 0.8657296308993258 }, { "epoch": 0.47515243079911507, "grad_norm": 0.3744916319847107, "learning_rate": 1.1286995103580329e-05, "loss": 0.34834957122802734, "step": 8806, "token_acc": 0.8772365156066344 }, { "epoch": 0.4752063886041116, "grad_norm": 0.3180687129497528, "learning_rate": 1.12852620550506e-05, "loss": 0.36842280626296997, "step": 8807, "token_acc": 0.8674682333652138 }, { "epoch": 0.4752603464091081, "grad_norm": 0.3990921080112457, "learning_rate": 1.1283528967269308e-05, "loss": 0.3458852767944336, "step": 8808, "token_acc": 0.8757712189428047 }, { "epoch": 0.4753143042141046, "grad_norm": 0.28870686888694763, "learning_rate": 1.1281795840289376e-05, "loss": 0.33779776096343994, "step": 8809, "token_acc": 0.8829473029690658 }, { "epoch": 0.47536826201910104, "grad_norm": 0.3449453115463257, "learning_rate": 1.1280062674163731e-05, "loss": 0.3326927125453949, "step": 8810, "token_acc": 0.8830204250051579 }, { "epoch": 0.47542221982409755, "grad_norm": 0.35289475321769714, "learning_rate": 1.1278329468945313e-05, "loss": 0.35190722346305847, "step": 8811, "token_acc": 0.8754002134471718 }, { "epoch": 0.47547617762909405, "grad_norm": 0.34190523624420166, "learning_rate": 1.1276596224687045e-05, "loss": 0.38511061668395996, "step": 8812, "token_acc": 0.8674377224199288 }, { "epoch": 0.47553013543409056, "grad_norm": 0.46154582500457764, "learning_rate": 1.127486294144186e-05, "loss": 0.4034479856491089, "step": 8813, "token_acc": 0.8610692140103905 }, { "epoch": 0.475584093239087, "grad_norm": 0.4764596223831177, "learning_rate": 1.1273129619262698e-05, "loss": 0.35942885279655457, "step": 8814, "token_acc": 0.8693173062105101 }, { "epoch": 0.4756380510440835, "grad_norm": 0.3857313394546509, "learning_rate": 1.127139625820249e-05, "loss": 0.34820908308029175, "step": 8815, "token_acc": 0.8763346613545817 }, { "epoch": 0.47569200884908003, "grad_norm": 0.4026854336261749, "learning_rate": 1.1269662858314172e-05, "loss": 0.34988340735435486, "step": 8816, "token_acc": 0.8754331570308226 }, { "epoch": 0.47574596665407654, "grad_norm": 0.43587353825569153, "learning_rate": 1.1267929419650685e-05, "loss": 0.37474632263183594, "step": 8817, "token_acc": 0.8634743530141019 }, { "epoch": 0.475799924459073, "grad_norm": 0.2478039413690567, "learning_rate": 1.1266195942264965e-05, "loss": 0.3739147186279297, "step": 8818, "token_acc": 0.8671925514030777 }, { "epoch": 0.4758538822640695, "grad_norm": 0.4218346178531647, "learning_rate": 1.1264462426209955e-05, "loss": 0.3378138840198517, "step": 8819, "token_acc": 0.8776045461168586 }, { "epoch": 0.475907840069066, "grad_norm": 0.4365471303462982, "learning_rate": 1.1262728871538591e-05, "loss": 0.35384851694107056, "step": 8820, "token_acc": 0.8766458766458767 }, { "epoch": 0.4759617978740625, "grad_norm": 0.33857348561286926, "learning_rate": 1.1260995278303819e-05, "loss": 0.34284669160842896, "step": 8821, "token_acc": 0.8725155827355051 }, { "epoch": 0.47601575567905896, "grad_norm": 0.43026548624038696, "learning_rate": 1.125926164655858e-05, "loss": 0.3755476474761963, "step": 8822, "token_acc": 0.8672712283594394 }, { "epoch": 0.47606971348405547, "grad_norm": 0.4611368775367737, "learning_rate": 1.1257527976355822e-05, "loss": 0.37176814675331116, "step": 8823, "token_acc": 0.8665922856232069 }, { "epoch": 0.476123671289052, "grad_norm": 0.46591708064079285, "learning_rate": 1.1255794267748487e-05, "loss": 0.36066728830337524, "step": 8824, "token_acc": 0.8748931806528798 }, { "epoch": 0.4761776290940484, "grad_norm": 0.3961387276649475, "learning_rate": 1.1254060520789526e-05, "loss": 0.41000494360923767, "step": 8825, "token_acc": 0.8578283091686659 }, { "epoch": 0.47623158689904493, "grad_norm": 0.32707104086875916, "learning_rate": 1.1252326735531887e-05, "loss": 0.3705166280269623, "step": 8826, "token_acc": 0.8711361737677528 }, { "epoch": 0.47628554470404144, "grad_norm": 0.37243860960006714, "learning_rate": 1.1250592912028515e-05, "loss": 0.3384021520614624, "step": 8827, "token_acc": 0.8801045409996733 }, { "epoch": 0.47633950250903795, "grad_norm": 0.2770839035511017, "learning_rate": 1.1248859050332364e-05, "loss": 0.37916800379753113, "step": 8828, "token_acc": 0.8636148007590133 }, { "epoch": 0.4763934603140344, "grad_norm": 0.36506974697113037, "learning_rate": 1.124712515049638e-05, "loss": 0.34783050417900085, "step": 8829, "token_acc": 0.8768882175226587 }, { "epoch": 0.4764474181190309, "grad_norm": 0.45410409569740295, "learning_rate": 1.1245391212573526e-05, "loss": 0.39332565665245056, "step": 8830, "token_acc": 0.8614549771070036 }, { "epoch": 0.4765013759240274, "grad_norm": 0.4589642882347107, "learning_rate": 1.124365723661675e-05, "loss": 0.3772830367088318, "step": 8831, "token_acc": 0.8627703398558187 }, { "epoch": 0.4765553337290239, "grad_norm": 0.4441608190536499, "learning_rate": 1.1241923222679005e-05, "loss": 0.4188553988933563, "step": 8832, "token_acc": 0.858089612447774 }, { "epoch": 0.4766092915340204, "grad_norm": 0.36687418818473816, "learning_rate": 1.1240189170813253e-05, "loss": 0.31122806668281555, "step": 8833, "token_acc": 0.8871043930089749 }, { "epoch": 0.4766632493390169, "grad_norm": 0.385314017534256, "learning_rate": 1.1238455081072448e-05, "loss": 0.3334663212299347, "step": 8834, "token_acc": 0.8812944200816574 }, { "epoch": 0.4767172071440134, "grad_norm": 0.4463388919830322, "learning_rate": 1.1236720953509551e-05, "loss": 0.4646849036216736, "step": 8835, "token_acc": 0.8431600724075511 }, { "epoch": 0.4767711649490099, "grad_norm": 0.4237637221813202, "learning_rate": 1.1234986788177517e-05, "loss": 0.2783808708190918, "step": 8836, "token_acc": 0.8998620145870294 }, { "epoch": 0.47682512275400635, "grad_norm": 0.3688596189022064, "learning_rate": 1.1233252585129311e-05, "loss": 0.33403149247169495, "step": 8837, "token_acc": 0.8775539160045402 }, { "epoch": 0.47687908055900285, "grad_norm": 0.3062759339809418, "learning_rate": 1.1231518344417893e-05, "loss": 0.31940746307373047, "step": 8838, "token_acc": 0.8839005235602094 }, { "epoch": 0.47693303836399936, "grad_norm": 0.4370061755180359, "learning_rate": 1.1229784066096227e-05, "loss": 0.42447030544281006, "step": 8839, "token_acc": 0.8511885379355258 }, { "epoch": 0.47698699616899587, "grad_norm": 0.2905564606189728, "learning_rate": 1.1228049750217282e-05, "loss": 0.33529072999954224, "step": 8840, "token_acc": 0.8797372410308236 }, { "epoch": 0.4770409539739923, "grad_norm": 0.4183816909790039, "learning_rate": 1.1226315396834018e-05, "loss": 0.35341328382492065, "step": 8841, "token_acc": 0.8752605976372481 }, { "epoch": 0.47709491177898883, "grad_norm": 0.3635142743587494, "learning_rate": 1.1224581005999398e-05, "loss": 0.43677785992622375, "step": 8842, "token_acc": 0.8474937343358396 }, { "epoch": 0.47714886958398534, "grad_norm": 0.3335916996002197, "learning_rate": 1.1222846577766402e-05, "loss": 0.30726802349090576, "step": 8843, "token_acc": 0.8912270723730983 }, { "epoch": 0.47720282738898184, "grad_norm": 0.38015463948249817, "learning_rate": 1.1221112112187989e-05, "loss": 0.37017393112182617, "step": 8844, "token_acc": 0.8686622744199369 }, { "epoch": 0.4772567851939783, "grad_norm": 0.3400810956954956, "learning_rate": 1.1219377609317128e-05, "loss": 0.31836503744125366, "step": 8845, "token_acc": 0.8849833147942158 }, { "epoch": 0.4773107429989748, "grad_norm": 0.2959795594215393, "learning_rate": 1.1217643069206798e-05, "loss": 0.33694055676460266, "step": 8846, "token_acc": 0.8841346860755771 }, { "epoch": 0.4773647008039713, "grad_norm": 0.4750606119632721, "learning_rate": 1.1215908491909968e-05, "loss": 0.36821967363357544, "step": 8847, "token_acc": 0.8692897632544181 }, { "epoch": 0.47741865860896776, "grad_norm": 0.39454221725463867, "learning_rate": 1.121417387747961e-05, "loss": 0.3763914406299591, "step": 8848, "token_acc": 0.8659029649595688 }, { "epoch": 0.47747261641396427, "grad_norm": 0.44001540541648865, "learning_rate": 1.1212439225968701e-05, "loss": 0.43615686893463135, "step": 8849, "token_acc": 0.8520938520938521 }, { "epoch": 0.4775265742189608, "grad_norm": 0.30951306223869324, "learning_rate": 1.1210704537430215e-05, "loss": 0.37503451108932495, "step": 8850, "token_acc": 0.8695304467039872 }, { "epoch": 0.4775805320239573, "grad_norm": 0.328815758228302, "learning_rate": 1.120896981191713e-05, "loss": 0.32471585273742676, "step": 8851, "token_acc": 0.8837716779602619 }, { "epoch": 0.47763448982895373, "grad_norm": 0.3642997741699219, "learning_rate": 1.1207235049482424e-05, "loss": 0.32044717669487, "step": 8852, "token_acc": 0.8827880808501771 }, { "epoch": 0.47768844763395024, "grad_norm": 0.44055575132369995, "learning_rate": 1.1205500250179075e-05, "loss": 0.39768147468566895, "step": 8853, "token_acc": 0.8609850395024374 }, { "epoch": 0.47774240543894675, "grad_norm": 0.42640572786331177, "learning_rate": 1.1203765414060061e-05, "loss": 0.41426098346710205, "step": 8854, "token_acc": 0.8577942433741806 }, { "epoch": 0.47779636324394326, "grad_norm": 0.43214163184165955, "learning_rate": 1.1202030541178372e-05, "loss": 0.36465299129486084, "step": 8855, "token_acc": 0.8666441593517893 }, { "epoch": 0.4778503210489397, "grad_norm": 0.3668852746486664, "learning_rate": 1.1200295631586983e-05, "loss": 0.4381088614463806, "step": 8856, "token_acc": 0.8487603305785124 }, { "epoch": 0.4779042788539362, "grad_norm": 0.431962788105011, "learning_rate": 1.119856068533888e-05, "loss": 0.40119051933288574, "step": 8857, "token_acc": 0.8552773083422953 }, { "epoch": 0.4779582366589327, "grad_norm": 0.3003970682621002, "learning_rate": 1.1196825702487047e-05, "loss": 0.33768659830093384, "step": 8858, "token_acc": 0.8828518039384793 }, { "epoch": 0.47801219446392923, "grad_norm": 0.3886561095714569, "learning_rate": 1.119509068308447e-05, "loss": 0.3836071491241455, "step": 8859, "token_acc": 0.8672079262419156 }, { "epoch": 0.4780661522689257, "grad_norm": 0.3920682370662689, "learning_rate": 1.1193355627184142e-05, "loss": 0.3356319069862366, "step": 8860, "token_acc": 0.8799937820612467 }, { "epoch": 0.4781201100739222, "grad_norm": 0.39073100686073303, "learning_rate": 1.1191620534839039e-05, "loss": 0.3441705107688904, "step": 8861, "token_acc": 0.8759033142287566 }, { "epoch": 0.4781740678789187, "grad_norm": 0.3730666935443878, "learning_rate": 1.1189885406102159e-05, "loss": 0.3874204754829407, "step": 8862, "token_acc": 0.8666756903086086 }, { "epoch": 0.4782280256839152, "grad_norm": 0.4730449318885803, "learning_rate": 1.1188150241026492e-05, "loss": 0.36504441499710083, "step": 8863, "token_acc": 0.8710743801652893 }, { "epoch": 0.47828198348891166, "grad_norm": 0.25156691670417786, "learning_rate": 1.1186415039665024e-05, "loss": 0.3048997223377228, "step": 8864, "token_acc": 0.8931754364546454 }, { "epoch": 0.47833594129390816, "grad_norm": 0.3484175205230713, "learning_rate": 1.1184679802070755e-05, "loss": 0.3806130588054657, "step": 8865, "token_acc": 0.8660401002506266 }, { "epoch": 0.47838989909890467, "grad_norm": 0.3932305872440338, "learning_rate": 1.1182944528296675e-05, "loss": 0.3303511142730713, "step": 8866, "token_acc": 0.8808417997097242 }, { "epoch": 0.4784438569039012, "grad_norm": 0.47647446393966675, "learning_rate": 1.1181209218395779e-05, "loss": 0.3960873782634735, "step": 8867, "token_acc": 0.8649411764705882 }, { "epoch": 0.47849781470889763, "grad_norm": 0.36561182141304016, "learning_rate": 1.1179473872421062e-05, "loss": 0.34101614356040955, "step": 8868, "token_acc": 0.876437199825353 }, { "epoch": 0.47855177251389414, "grad_norm": 0.40370506048202515, "learning_rate": 1.1177738490425522e-05, "loss": 0.3728944957256317, "step": 8869, "token_acc": 0.8697857026010142 }, { "epoch": 0.47860573031889064, "grad_norm": 0.3408125936985016, "learning_rate": 1.1176003072462158e-05, "loss": 0.3984453082084656, "step": 8870, "token_acc": 0.8665224779854781 }, { "epoch": 0.4786596881238871, "grad_norm": 0.3733094036579132, "learning_rate": 1.1174267618583967e-05, "loss": 0.35862839221954346, "step": 8871, "token_acc": 0.8744038155802861 }, { "epoch": 0.4787136459288836, "grad_norm": 0.48631590604782104, "learning_rate": 1.1172532128843951e-05, "loss": 0.35956090688705444, "step": 8872, "token_acc": 0.8721835313396149 }, { "epoch": 0.4787676037338801, "grad_norm": 0.4900575876235962, "learning_rate": 1.117079660329511e-05, "loss": 0.33329659700393677, "step": 8873, "token_acc": 0.8862962962962962 }, { "epoch": 0.4788215615388766, "grad_norm": 0.4792681634426117, "learning_rate": 1.1169061041990448e-05, "loss": 0.4136459231376648, "step": 8874, "token_acc": 0.8586436446848346 }, { "epoch": 0.47887551934387307, "grad_norm": 0.4096948504447937, "learning_rate": 1.1167325444982968e-05, "loss": 0.40615952014923096, "step": 8875, "token_acc": 0.8616922051965357 }, { "epoch": 0.4789294771488696, "grad_norm": 0.41988739371299744, "learning_rate": 1.1165589812325678e-05, "loss": 0.4012834131717682, "step": 8876, "token_acc": 0.8617782726260538 }, { "epoch": 0.4789834349538661, "grad_norm": 0.4188288152217865, "learning_rate": 1.1163854144071577e-05, "loss": 0.35865163803100586, "step": 8877, "token_acc": 0.8722288035200542 }, { "epoch": 0.4790373927588626, "grad_norm": 0.4547799825668335, "learning_rate": 1.1162118440273677e-05, "loss": 0.31936854124069214, "step": 8878, "token_acc": 0.8835741909220436 }, { "epoch": 0.47909135056385904, "grad_norm": 0.401468425989151, "learning_rate": 1.1160382700984982e-05, "loss": 0.40648722648620605, "step": 8879, "token_acc": 0.858462004238571 }, { "epoch": 0.47914530836885555, "grad_norm": 0.4090456962585449, "learning_rate": 1.1158646926258503e-05, "loss": 0.3679802417755127, "step": 8880, "token_acc": 0.864516129032258 }, { "epoch": 0.47919926617385206, "grad_norm": 0.41504672169685364, "learning_rate": 1.1156911116147253e-05, "loss": 0.3767605721950531, "step": 8881, "token_acc": 0.872730094676393 }, { "epoch": 0.47925322397884856, "grad_norm": 0.38714903593063354, "learning_rate": 1.115517527070424e-05, "loss": 0.3488612174987793, "step": 8882, "token_acc": 0.8775088886340177 }, { "epoch": 0.479307181783845, "grad_norm": 0.3333089053630829, "learning_rate": 1.1153439389982474e-05, "loss": 0.3337569832801819, "step": 8883, "token_acc": 0.8754971676509582 }, { "epoch": 0.4793611395888415, "grad_norm": 0.2991739809513092, "learning_rate": 1.1151703474034976e-05, "loss": 0.30528974533081055, "step": 8884, "token_acc": 0.8884084084084084 }, { "epoch": 0.47941509739383803, "grad_norm": 0.42889347672462463, "learning_rate": 1.1149967522914755e-05, "loss": 0.3619782328605652, "step": 8885, "token_acc": 0.8722259583053127 }, { "epoch": 0.47946905519883454, "grad_norm": 0.38979434967041016, "learning_rate": 1.1148231536674825e-05, "loss": 0.3302736282348633, "step": 8886, "token_acc": 0.8812961180622393 }, { "epoch": 0.479523013003831, "grad_norm": 0.38690701127052307, "learning_rate": 1.1146495515368203e-05, "loss": 0.3633899688720703, "step": 8887, "token_acc": 0.8712555768005099 }, { "epoch": 0.4795769708088275, "grad_norm": 0.4056381285190582, "learning_rate": 1.1144759459047912e-05, "loss": 0.42749565839767456, "step": 8888, "token_acc": 0.8578287130246245 }, { "epoch": 0.479630928613824, "grad_norm": 0.4720448851585388, "learning_rate": 1.1143023367766965e-05, "loss": 0.38002660870552063, "step": 8889, "token_acc": 0.8676031081888822 }, { "epoch": 0.47968488641882046, "grad_norm": 0.41952139139175415, "learning_rate": 1.1141287241578383e-05, "loss": 0.3339001536369324, "step": 8890, "token_acc": 0.8818037974683545 }, { "epoch": 0.47973884422381696, "grad_norm": 0.35374715924263, "learning_rate": 1.1139551080535186e-05, "loss": 0.3506332039833069, "step": 8891, "token_acc": 0.8816611539875046 }, { "epoch": 0.47979280202881347, "grad_norm": 0.42509347200393677, "learning_rate": 1.1137814884690403e-05, "loss": 0.32502275705337524, "step": 8892, "token_acc": 0.8833305355044485 }, { "epoch": 0.47984675983381, "grad_norm": 0.365267813205719, "learning_rate": 1.113607865409705e-05, "loss": 0.34872791171073914, "step": 8893, "token_acc": 0.8759453170447935 }, { "epoch": 0.47990071763880643, "grad_norm": 0.34451282024383545, "learning_rate": 1.1134342388808151e-05, "loss": 0.36265110969543457, "step": 8894, "token_acc": 0.8760364278918037 }, { "epoch": 0.47995467544380294, "grad_norm": 0.41405749320983887, "learning_rate": 1.1132606088876732e-05, "loss": 0.3532795011997223, "step": 8895, "token_acc": 0.8758969341161122 }, { "epoch": 0.48000863324879944, "grad_norm": 0.4691231846809387, "learning_rate": 1.113086975435582e-05, "loss": 0.3670440912246704, "step": 8896, "token_acc": 0.874251497005988 }, { "epoch": 0.48006259105379595, "grad_norm": 0.36197277903556824, "learning_rate": 1.112913338529844e-05, "loss": 0.3098127245903015, "step": 8897, "token_acc": 0.8863130563798219 }, { "epoch": 0.4801165488587924, "grad_norm": 0.41643795371055603, "learning_rate": 1.1127396981757627e-05, "loss": 0.38206809759140015, "step": 8898, "token_acc": 0.8646225569302493 }, { "epoch": 0.4801705066637889, "grad_norm": 0.325560986995697, "learning_rate": 1.1125660543786402e-05, "loss": 0.4023541212081909, "step": 8899, "token_acc": 0.8650056306306306 }, { "epoch": 0.4802244644687854, "grad_norm": 0.4204997718334198, "learning_rate": 1.11239240714378e-05, "loss": 0.46108946204185486, "step": 8900, "token_acc": 0.8459277403551745 }, { "epoch": 0.4802784222737819, "grad_norm": 0.35850784182548523, "learning_rate": 1.1122187564764855e-05, "loss": 0.3849172592163086, "step": 8901, "token_acc": 0.8697358095736333 }, { "epoch": 0.4803323800787784, "grad_norm": 0.5005583763122559, "learning_rate": 1.1120451023820592e-05, "loss": 0.3927496075630188, "step": 8902, "token_acc": 0.8604746317512275 }, { "epoch": 0.4803863378837749, "grad_norm": 0.36001044511795044, "learning_rate": 1.1118714448658045e-05, "loss": 0.3423619866371155, "step": 8903, "token_acc": 0.8785687095546251 }, { "epoch": 0.4804402956887714, "grad_norm": 0.28454482555389404, "learning_rate": 1.1116977839330256e-05, "loss": 0.35662299394607544, "step": 8904, "token_acc": 0.8716208699521637 }, { "epoch": 0.4804942534937679, "grad_norm": 0.4156618118286133, "learning_rate": 1.1115241195890256e-05, "loss": 0.39215928316116333, "step": 8905, "token_acc": 0.862026188835286 }, { "epoch": 0.48054821129876435, "grad_norm": 0.3886735141277313, "learning_rate": 1.1113504518391081e-05, "loss": 0.3808872103691101, "step": 8906, "token_acc": 0.8642904290429043 }, { "epoch": 0.48060216910376086, "grad_norm": 0.3316175937652588, "learning_rate": 1.111176780688577e-05, "loss": 0.35723042488098145, "step": 8907, "token_acc": 0.8731154684095861 }, { "epoch": 0.48065612690875736, "grad_norm": 0.3739760220050812, "learning_rate": 1.1110031061427361e-05, "loss": 0.37714287638664246, "step": 8908, "token_acc": 0.8691588785046729 }, { "epoch": 0.48071008471375387, "grad_norm": 0.31531020998954773, "learning_rate": 1.1108294282068897e-05, "loss": 0.3635408282279968, "step": 8909, "token_acc": 0.8724589670230387 }, { "epoch": 0.4807640425187503, "grad_norm": 0.42687559127807617, "learning_rate": 1.1106557468863413e-05, "loss": 0.4133407771587372, "step": 8910, "token_acc": 0.8606745252128356 }, { "epoch": 0.48081800032374683, "grad_norm": 0.3166813850402832, "learning_rate": 1.1104820621863955e-05, "loss": 0.3597453236579895, "step": 8911, "token_acc": 0.8715779579367877 }, { "epoch": 0.48087195812874334, "grad_norm": 0.3723002076148987, "learning_rate": 1.1103083741123563e-05, "loss": 0.3596011996269226, "step": 8912, "token_acc": 0.8720491538212785 }, { "epoch": 0.4809259159337398, "grad_norm": 0.4436568021774292, "learning_rate": 1.1101346826695285e-05, "loss": 0.37445372343063354, "step": 8913, "token_acc": 0.8674427337589186 }, { "epoch": 0.4809798737387363, "grad_norm": 0.3760734796524048, "learning_rate": 1.1099609878632163e-05, "loss": 0.3200628161430359, "step": 8914, "token_acc": 0.8863849765258216 }, { "epoch": 0.4810338315437328, "grad_norm": 0.39090266823768616, "learning_rate": 1.1097872896987241e-05, "loss": 0.3643304109573364, "step": 8915, "token_acc": 0.8719300888955853 }, { "epoch": 0.4810877893487293, "grad_norm": 0.43321356177330017, "learning_rate": 1.1096135881813568e-05, "loss": 0.3972422778606415, "step": 8916, "token_acc": 0.8607715294756859 }, { "epoch": 0.48114174715372576, "grad_norm": 0.3333131968975067, "learning_rate": 1.1094398833164195e-05, "loss": 0.37159237265586853, "step": 8917, "token_acc": 0.866262319939348 }, { "epoch": 0.48119570495872227, "grad_norm": 0.4504168927669525, "learning_rate": 1.109266175109217e-05, "loss": 0.3939380347728729, "step": 8918, "token_acc": 0.861605141234494 }, { "epoch": 0.4812496627637188, "grad_norm": 0.48722103238105774, "learning_rate": 1.1090924635650537e-05, "loss": 0.3766186237335205, "step": 8919, "token_acc": 0.872996300863132 }, { "epoch": 0.4813036205687153, "grad_norm": 0.31343793869018555, "learning_rate": 1.1089187486892354e-05, "loss": 0.34642189741134644, "step": 8920, "token_acc": 0.8775081967213115 }, { "epoch": 0.48135757837371174, "grad_norm": 0.31938204169273376, "learning_rate": 1.108745030487067e-05, "loss": 0.35068848729133606, "step": 8921, "token_acc": 0.879158585707892 }, { "epoch": 0.48141153617870824, "grad_norm": 0.30702662467956543, "learning_rate": 1.1085713089638537e-05, "loss": 0.299781858921051, "step": 8922, "token_acc": 0.8919727438923052 }, { "epoch": 0.48146549398370475, "grad_norm": 0.3661940395832062, "learning_rate": 1.1083975841249012e-05, "loss": 0.411396861076355, "step": 8923, "token_acc": 0.8587515640205755 }, { "epoch": 0.48151945178870126, "grad_norm": 0.3449201285839081, "learning_rate": 1.108223855975515e-05, "loss": 0.35257959365844727, "step": 8924, "token_acc": 0.8759469696969697 }, { "epoch": 0.4815734095936977, "grad_norm": 0.3886245787143707, "learning_rate": 1.1080501245210005e-05, "loss": 0.3186812996864319, "step": 8925, "token_acc": 0.8880560928433269 }, { "epoch": 0.4816273673986942, "grad_norm": 0.3519372344017029, "learning_rate": 1.1078763897666636e-05, "loss": 0.33303171396255493, "step": 8926, "token_acc": 0.8777982214044772 }, { "epoch": 0.4816813252036907, "grad_norm": 0.4656018018722534, "learning_rate": 1.1077026517178098e-05, "loss": 0.3815385103225708, "step": 8927, "token_acc": 0.8624672724472509 }, { "epoch": 0.48173528300868723, "grad_norm": 0.3308076858520508, "learning_rate": 1.107528910379745e-05, "loss": 0.35947227478027344, "step": 8928, "token_acc": 0.8764031940747599 }, { "epoch": 0.4817892408136837, "grad_norm": 0.4087957441806793, "learning_rate": 1.1073551657577758e-05, "loss": 0.36236557364463806, "step": 8929, "token_acc": 0.8738425925925926 }, { "epoch": 0.4818431986186802, "grad_norm": 0.38988494873046875, "learning_rate": 1.1071814178572082e-05, "loss": 0.4113619327545166, "step": 8930, "token_acc": 0.8583458646616542 }, { "epoch": 0.4818971564236767, "grad_norm": 0.5178189873695374, "learning_rate": 1.1070076666833478e-05, "loss": 0.3976612985134125, "step": 8931, "token_acc": 0.8583180987202925 }, { "epoch": 0.4819511142286732, "grad_norm": 0.3876258432865143, "learning_rate": 1.1068339122415013e-05, "loss": 0.37204158306121826, "step": 8932, "token_acc": 0.8691083758631042 }, { "epoch": 0.48200507203366966, "grad_norm": 0.3131168484687805, "learning_rate": 1.1066601545369754e-05, "loss": 0.35049575567245483, "step": 8933, "token_acc": 0.8763999185501934 }, { "epoch": 0.48205902983866616, "grad_norm": 0.31078892946243286, "learning_rate": 1.106486393575076e-05, "loss": 0.36790505051612854, "step": 8934, "token_acc": 0.8724496029321931 }, { "epoch": 0.48211298764366267, "grad_norm": 0.4098268449306488, "learning_rate": 1.1063126293611102e-05, "loss": 0.38701844215393066, "step": 8935, "token_acc": 0.863456173836441 }, { "epoch": 0.4821669454486591, "grad_norm": 0.29217180609703064, "learning_rate": 1.1061388619003846e-05, "loss": 0.36896151304244995, "step": 8936, "token_acc": 0.8728743260058067 }, { "epoch": 0.48222090325365563, "grad_norm": 0.3661608099937439, "learning_rate": 1.1059650911982058e-05, "loss": 0.3426305055618286, "step": 8937, "token_acc": 0.8810425160163076 }, { "epoch": 0.48227486105865214, "grad_norm": 0.44351762533187866, "learning_rate": 1.105791317259881e-05, "loss": 0.3728863298892975, "step": 8938, "token_acc": 0.8729550991994431 }, { "epoch": 0.48232881886364865, "grad_norm": 0.45624861121177673, "learning_rate": 1.1056175400907168e-05, "loss": 0.37459221482276917, "step": 8939, "token_acc": 0.8735036406269283 }, { "epoch": 0.4823827766686451, "grad_norm": 0.3566522002220154, "learning_rate": 1.105443759696021e-05, "loss": 0.37373268604278564, "step": 8940, "token_acc": 0.8670402445236882 }, { "epoch": 0.4824367344736416, "grad_norm": 0.42644959688186646, "learning_rate": 1.1052699760811e-05, "loss": 0.3659570813179016, "step": 8941, "token_acc": 0.8715107913669065 }, { "epoch": 0.4824906922786381, "grad_norm": 0.38165372610092163, "learning_rate": 1.105096189251262e-05, "loss": 0.3546587824821472, "step": 8942, "token_acc": 0.8737535913469664 }, { "epoch": 0.4825446500836346, "grad_norm": 0.29332661628723145, "learning_rate": 1.1049223992118136e-05, "loss": 0.33635374903678894, "step": 8943, "token_acc": 0.8787266296109146 }, { "epoch": 0.48259860788863107, "grad_norm": 0.4760946035385132, "learning_rate": 1.1047486059680627e-05, "loss": 0.35520535707473755, "step": 8944, "token_acc": 0.8766609348131937 }, { "epoch": 0.4826525656936276, "grad_norm": 0.4243836998939514, "learning_rate": 1.1045748095253166e-05, "loss": 0.39067840576171875, "step": 8945, "token_acc": 0.8635566940811649 }, { "epoch": 0.4827065234986241, "grad_norm": 0.36127978563308716, "learning_rate": 1.1044010098888834e-05, "loss": 0.38071316480636597, "step": 8946, "token_acc": 0.868486646884273 }, { "epoch": 0.4827604813036206, "grad_norm": 0.34387272596359253, "learning_rate": 1.1042272070640707e-05, "loss": 0.3730148673057556, "step": 8947, "token_acc": 0.8640281655942698 }, { "epoch": 0.48281443910861704, "grad_norm": 0.48223409056663513, "learning_rate": 1.1040534010561862e-05, "loss": 0.36881929636001587, "step": 8948, "token_acc": 0.8718767351471405 }, { "epoch": 0.48286839691361355, "grad_norm": 0.3422224223613739, "learning_rate": 1.1038795918705385e-05, "loss": 0.37656763195991516, "step": 8949, "token_acc": 0.8708720330237358 }, { "epoch": 0.48292235471861006, "grad_norm": 0.30547645688056946, "learning_rate": 1.1037057795124351e-05, "loss": 0.33590632677078247, "step": 8950, "token_acc": 0.8808564006091553 }, { "epoch": 0.48297631252360657, "grad_norm": 0.5107697248458862, "learning_rate": 1.1035319639871842e-05, "loss": 0.39943498373031616, "step": 8951, "token_acc": 0.8669012957926732 }, { "epoch": 0.483030270328603, "grad_norm": 0.338771253824234, "learning_rate": 1.1033581453000944e-05, "loss": 0.39175164699554443, "step": 8952, "token_acc": 0.8617713451068915 }, { "epoch": 0.4830842281335995, "grad_norm": 0.3993791937828064, "learning_rate": 1.1031843234564739e-05, "loss": 0.30699968338012695, "step": 8953, "token_acc": 0.8918437082157183 }, { "epoch": 0.48313818593859603, "grad_norm": 0.3955168128013611, "learning_rate": 1.103010498461631e-05, "loss": 0.37009063363075256, "step": 8954, "token_acc": 0.8714094359279655 }, { "epoch": 0.4831921437435925, "grad_norm": 0.4079422056674957, "learning_rate": 1.1028366703208748e-05, "loss": 0.3154505491256714, "step": 8955, "token_acc": 0.8829009864815491 }, { "epoch": 0.483246101548589, "grad_norm": 0.3719826340675354, "learning_rate": 1.1026628390395133e-05, "loss": 0.3778664469718933, "step": 8956, "token_acc": 0.8695915646588293 }, { "epoch": 0.4833000593535855, "grad_norm": 0.3087109625339508, "learning_rate": 1.1024890046228557e-05, "loss": 0.3108154237270355, "step": 8957, "token_acc": 0.8893463073852296 }, { "epoch": 0.483354017158582, "grad_norm": 0.3650115728378296, "learning_rate": 1.1023151670762109e-05, "loss": 0.37409690022468567, "step": 8958, "token_acc": 0.8734939759036144 }, { "epoch": 0.48340797496357846, "grad_norm": 0.34063658118247986, "learning_rate": 1.1021413264048875e-05, "loss": 0.4138711094856262, "step": 8959, "token_acc": 0.8621978278640663 }, { "epoch": 0.48346193276857496, "grad_norm": 0.4230414927005768, "learning_rate": 1.1019674826141951e-05, "loss": 0.33207181096076965, "step": 8960, "token_acc": 0.8837373916121041 }, { "epoch": 0.48351589057357147, "grad_norm": 0.3412534296512604, "learning_rate": 1.101793635709442e-05, "loss": 0.3530564308166504, "step": 8961, "token_acc": 0.8790111718564297 }, { "epoch": 0.483569848378568, "grad_norm": 0.46799638867378235, "learning_rate": 1.1016197856959384e-05, "loss": 0.4042091369628906, "step": 8962, "token_acc": 0.8622014364456322 }, { "epoch": 0.48362380618356443, "grad_norm": 0.3826320171356201, "learning_rate": 1.1014459325789928e-05, "loss": 0.3588801622390747, "step": 8963, "token_acc": 0.875645883568722 }, { "epoch": 0.48367776398856094, "grad_norm": 0.38910332322120667, "learning_rate": 1.101272076363915e-05, "loss": 0.35637763142585754, "step": 8964, "token_acc": 0.8708179926364655 }, { "epoch": 0.48373172179355745, "grad_norm": 0.3045211136341095, "learning_rate": 1.1010982170560148e-05, "loss": 0.37826967239379883, "step": 8965, "token_acc": 0.868389819332718 }, { "epoch": 0.48378567959855395, "grad_norm": 0.4284789562225342, "learning_rate": 1.1009243546606015e-05, "loss": 0.3800315856933594, "step": 8966, "token_acc": 0.8681568732432071 }, { "epoch": 0.4838396374035504, "grad_norm": 0.3661145567893982, "learning_rate": 1.1007504891829845e-05, "loss": 0.32822665572166443, "step": 8967, "token_acc": 0.8837590045841519 }, { "epoch": 0.4838935952085469, "grad_norm": 0.3894345164299011, "learning_rate": 1.1005766206284743e-05, "loss": 0.3555639982223511, "step": 8968, "token_acc": 0.8727468802957942 }, { "epoch": 0.4839475530135434, "grad_norm": 0.4528527557849884, "learning_rate": 1.1004027490023805e-05, "loss": 0.36745449900627136, "step": 8969, "token_acc": 0.8694768918232605 }, { "epoch": 0.4840015108185399, "grad_norm": 0.5060460567474365, "learning_rate": 1.1002288743100126e-05, "loss": 0.3437850773334503, "step": 8970, "token_acc": 0.8763636363636363 }, { "epoch": 0.4840554686235364, "grad_norm": 0.3634917438030243, "learning_rate": 1.1000549965566816e-05, "loss": 0.34567975997924805, "step": 8971, "token_acc": 0.8784133048657152 }, { "epoch": 0.4841094264285329, "grad_norm": 0.35120970010757446, "learning_rate": 1.099881115747697e-05, "loss": 0.33018001914024353, "step": 8972, "token_acc": 0.8786388924828568 }, { "epoch": 0.4841633842335294, "grad_norm": 0.4325469732284546, "learning_rate": 1.0997072318883693e-05, "loss": 0.4033682346343994, "step": 8973, "token_acc": 0.8597955952209587 }, { "epoch": 0.4842173420385259, "grad_norm": 0.3717867434024811, "learning_rate": 1.0995333449840087e-05, "loss": 0.37656012177467346, "step": 8974, "token_acc": 0.8728518346493265 }, { "epoch": 0.48427129984352235, "grad_norm": 0.5041441321372986, "learning_rate": 1.099359455039926e-05, "loss": 0.39409536123275757, "step": 8975, "token_acc": 0.8662989729935336 }, { "epoch": 0.48432525764851886, "grad_norm": 0.39908385276794434, "learning_rate": 1.0991855620614318e-05, "loss": 0.34706130623817444, "step": 8976, "token_acc": 0.875503355704698 }, { "epoch": 0.48437921545351537, "grad_norm": 0.4817845821380615, "learning_rate": 1.0990116660538361e-05, "loss": 0.40669816732406616, "step": 8977, "token_acc": 0.8598057644110275 }, { "epoch": 0.4844331732585118, "grad_norm": 0.3682728111743927, "learning_rate": 1.0988377670224503e-05, "loss": 0.34173208475112915, "step": 8978, "token_acc": 0.8781809019904258 }, { "epoch": 0.4844871310635083, "grad_norm": 0.41872841119766235, "learning_rate": 1.0986638649725851e-05, "loss": 0.33890795707702637, "step": 8979, "token_acc": 0.8764003673094583 }, { "epoch": 0.48454108886850483, "grad_norm": 0.46239563822746277, "learning_rate": 1.0984899599095508e-05, "loss": 0.3904765844345093, "step": 8980, "token_acc": 0.8636213174445548 }, { "epoch": 0.48459504667350134, "grad_norm": 0.44514501094818115, "learning_rate": 1.0983160518386595e-05, "loss": 0.37100955843925476, "step": 8981, "token_acc": 0.8741721854304636 }, { "epoch": 0.4846490044784978, "grad_norm": 0.40142253041267395, "learning_rate": 1.0981421407652214e-05, "loss": 0.3552490472793579, "step": 8982, "token_acc": 0.8747999507571095 }, { "epoch": 0.4847029622834943, "grad_norm": 0.3042201101779938, "learning_rate": 1.097968226694548e-05, "loss": 0.3434509038925171, "step": 8983, "token_acc": 0.878392305049811 }, { "epoch": 0.4847569200884908, "grad_norm": 0.34409424662590027, "learning_rate": 1.097794309631951e-05, "loss": 0.34274959564208984, "step": 8984, "token_acc": 0.882466281310212 }, { "epoch": 0.4848108778934873, "grad_norm": 0.44062095880508423, "learning_rate": 1.0976203895827413e-05, "loss": 0.34616661071777344, "step": 8985, "token_acc": 0.8753667737879 }, { "epoch": 0.48486483569848376, "grad_norm": 0.3943678140640259, "learning_rate": 1.0974464665522302e-05, "loss": 0.3281592130661011, "step": 8986, "token_acc": 0.8852708460133901 }, { "epoch": 0.48491879350348027, "grad_norm": 0.3707767426967621, "learning_rate": 1.0972725405457297e-05, "loss": 0.3485654294490814, "step": 8987, "token_acc": 0.8746193565470674 }, { "epoch": 0.4849727513084768, "grad_norm": 0.3549264669418335, "learning_rate": 1.097098611568551e-05, "loss": 0.3575457036495209, "step": 8988, "token_acc": 0.8755722694571615 }, { "epoch": 0.4850267091134733, "grad_norm": 0.4090730845928192, "learning_rate": 1.0969246796260067e-05, "loss": 0.37428346276283264, "step": 8989, "token_acc": 0.87331273377785 }, { "epoch": 0.48508066691846974, "grad_norm": 0.44102540612220764, "learning_rate": 1.0967507447234076e-05, "loss": 0.35409897565841675, "step": 8990, "token_acc": 0.874465284633103 }, { "epoch": 0.48513462472346625, "grad_norm": 0.4115648865699768, "learning_rate": 1.0965768068660662e-05, "loss": 0.3655560612678528, "step": 8991, "token_acc": 0.8665501948662814 }, { "epoch": 0.48518858252846275, "grad_norm": 0.4492977559566498, "learning_rate": 1.0964028660592947e-05, "loss": 0.36405929923057556, "step": 8992, "token_acc": 0.8721965112139551 }, { "epoch": 0.48524254033345926, "grad_norm": 0.3678096532821655, "learning_rate": 1.0962289223084046e-05, "loss": 0.34178078174591064, "step": 8993, "token_acc": 0.880955829109341 }, { "epoch": 0.4852964981384557, "grad_norm": 0.4785839915275574, "learning_rate": 1.0960549756187087e-05, "loss": 0.39362832903862, "step": 8994, "token_acc": 0.8593256934064109 }, { "epoch": 0.4853504559434522, "grad_norm": 0.5211049914360046, "learning_rate": 1.0958810259955189e-05, "loss": 0.42324310541152954, "step": 8995, "token_acc": 0.8588899341486359 }, { "epoch": 0.4854044137484487, "grad_norm": 0.5784413814544678, "learning_rate": 1.0957070734441476e-05, "loss": 0.37006592750549316, "step": 8996, "token_acc": 0.8679782552420399 }, { "epoch": 0.4854583715534452, "grad_norm": 0.30717310309410095, "learning_rate": 1.0955331179699073e-05, "loss": 0.3592122793197632, "step": 8997, "token_acc": 0.8750758035172832 }, { "epoch": 0.4855123293584417, "grad_norm": 0.34623709321022034, "learning_rate": 1.0953591595781106e-05, "loss": 0.3867092728614807, "step": 8998, "token_acc": 0.867217197535719 }, { "epoch": 0.4855662871634382, "grad_norm": 0.4486273229122162, "learning_rate": 1.0951851982740702e-05, "loss": 0.34298303723335266, "step": 8999, "token_acc": 0.878125 }, { "epoch": 0.4856202449684347, "grad_norm": 0.3224739134311676, "learning_rate": 1.0950112340630988e-05, "loss": 0.3364541530609131, "step": 9000, "token_acc": 0.8814589665653495 }, { "epoch": 0.48567420277343115, "grad_norm": 0.362720787525177, "learning_rate": 1.0948372669505094e-05, "loss": 0.3393135070800781, "step": 9001, "token_acc": 0.87740225314778 }, { "epoch": 0.48572816057842766, "grad_norm": 0.5136604309082031, "learning_rate": 1.0946632969416147e-05, "loss": 0.43644607067108154, "step": 9002, "token_acc": 0.8563112745098039 }, { "epoch": 0.48578211838342417, "grad_norm": 0.4226994514465332, "learning_rate": 1.0944893240417273e-05, "loss": 0.31851762533187866, "step": 9003, "token_acc": 0.889726865063188 }, { "epoch": 0.4858360761884207, "grad_norm": 0.44944998621940613, "learning_rate": 1.0943153482561611e-05, "loss": 0.3469140827655792, "step": 9004, "token_acc": 0.8730133752950433 }, { "epoch": 0.4858900339934171, "grad_norm": 0.42203783988952637, "learning_rate": 1.0941413695902285e-05, "loss": 0.3696051239967346, "step": 9005, "token_acc": 0.8716744913928013 }, { "epoch": 0.48594399179841363, "grad_norm": 0.40758320689201355, "learning_rate": 1.0939673880492432e-05, "loss": 0.35580724477767944, "step": 9006, "token_acc": 0.8723696878008527 }, { "epoch": 0.48599794960341014, "grad_norm": 0.5445874929428101, "learning_rate": 1.0937934036385187e-05, "loss": 0.36260291934013367, "step": 9007, "token_acc": 0.8751622674167028 }, { "epoch": 0.48605190740840665, "grad_norm": 0.37836670875549316, "learning_rate": 1.0936194163633683e-05, "loss": 0.3609795570373535, "step": 9008, "token_acc": 0.8691782853752595 }, { "epoch": 0.4861058652134031, "grad_norm": 0.46742087602615356, "learning_rate": 1.0934454262291053e-05, "loss": 0.39887186884880066, "step": 9009, "token_acc": 0.861800346220427 }, { "epoch": 0.4861598230183996, "grad_norm": 0.3899938762187958, "learning_rate": 1.0932714332410434e-05, "loss": 0.3508865237236023, "step": 9010, "token_acc": 0.8707256046705588 }, { "epoch": 0.4862137808233961, "grad_norm": 0.34384000301361084, "learning_rate": 1.0930974374044965e-05, "loss": 0.37718579173088074, "step": 9011, "token_acc": 0.8670088660755949 }, { "epoch": 0.4862677386283926, "grad_norm": 0.5202506184577942, "learning_rate": 1.092923438724778e-05, "loss": 0.4016152620315552, "step": 9012, "token_acc": 0.8602997991036934 }, { "epoch": 0.4863216964333891, "grad_norm": 0.29809126257896423, "learning_rate": 1.0927494372072023e-05, "loss": 0.31873294711112976, "step": 9013, "token_acc": 0.8854603801530486 }, { "epoch": 0.4863756542383856, "grad_norm": 0.4128292202949524, "learning_rate": 1.092575432857083e-05, "loss": 0.40221548080444336, "step": 9014, "token_acc": 0.8591214325685507 }, { "epoch": 0.4864296120433821, "grad_norm": 0.38047558069229126, "learning_rate": 1.092401425679734e-05, "loss": 0.31464892625808716, "step": 9015, "token_acc": 0.8881939014855356 }, { "epoch": 0.4864835698483786, "grad_norm": 0.31484854221343994, "learning_rate": 1.09222741568047e-05, "loss": 0.32592588663101196, "step": 9016, "token_acc": 0.8827214452214452 }, { "epoch": 0.48653752765337505, "grad_norm": 0.37544044852256775, "learning_rate": 1.0920534028646047e-05, "loss": 0.38676780462265015, "step": 9017, "token_acc": 0.8673576336322347 }, { "epoch": 0.48659148545837155, "grad_norm": 0.376542866230011, "learning_rate": 1.0918793872374529e-05, "loss": 0.3071190118789673, "step": 9018, "token_acc": 0.8877930476960388 }, { "epoch": 0.48664544326336806, "grad_norm": 0.35580092668533325, "learning_rate": 1.0917053688043281e-05, "loss": 0.34368687868118286, "step": 9019, "token_acc": 0.8765107724645297 }, { "epoch": 0.4866994010683645, "grad_norm": 0.34978002309799194, "learning_rate": 1.0915313475705461e-05, "loss": 0.3666744828224182, "step": 9020, "token_acc": 0.8726618705035971 }, { "epoch": 0.486753358873361, "grad_norm": 0.38191527128219604, "learning_rate": 1.0913573235414201e-05, "loss": 0.30748942494392395, "step": 9021, "token_acc": 0.8863636363636364 }, { "epoch": 0.4868073166783575, "grad_norm": 0.42598193883895874, "learning_rate": 1.0911832967222655e-05, "loss": 0.3188338577747345, "step": 9022, "token_acc": 0.8859922178988326 }, { "epoch": 0.48686127448335403, "grad_norm": 0.3903194069862366, "learning_rate": 1.091009267118397e-05, "loss": 0.36872291564941406, "step": 9023, "token_acc": 0.8678972712680578 }, { "epoch": 0.4869152322883505, "grad_norm": 0.4893418550491333, "learning_rate": 1.0908352347351294e-05, "loss": 0.3832210302352905, "step": 9024, "token_acc": 0.8671505739365294 }, { "epoch": 0.486969190093347, "grad_norm": 0.370040625333786, "learning_rate": 1.0906611995777774e-05, "loss": 0.3798750936985016, "step": 9025, "token_acc": 0.8604072749883414 }, { "epoch": 0.4870231478983435, "grad_norm": 0.46468502283096313, "learning_rate": 1.0904871616516563e-05, "loss": 0.3732905387878418, "step": 9026, "token_acc": 0.869453044375645 }, { "epoch": 0.48707710570334, "grad_norm": 0.38094642758369446, "learning_rate": 1.0903131209620809e-05, "loss": 0.4165515601634979, "step": 9027, "token_acc": 0.8552614074763869 }, { "epoch": 0.48713106350833646, "grad_norm": 0.4245698153972626, "learning_rate": 1.0901390775143661e-05, "loss": 0.3959633708000183, "step": 9028, "token_acc": 0.8631076647778109 }, { "epoch": 0.48718502131333297, "grad_norm": 0.32355594635009766, "learning_rate": 1.0899650313138279e-05, "loss": 0.4174528419971466, "step": 9029, "token_acc": 0.8569870483980914 }, { "epoch": 0.4872389791183295, "grad_norm": 0.39328598976135254, "learning_rate": 1.089790982365781e-05, "loss": 0.36946791410446167, "step": 9030, "token_acc": 0.8662035250225139 }, { "epoch": 0.487292936923326, "grad_norm": 0.41174256801605225, "learning_rate": 1.0896169306755414e-05, "loss": 0.45018255710601807, "step": 9031, "token_acc": 0.850253807106599 }, { "epoch": 0.48734689472832243, "grad_norm": 0.4913678467273712, "learning_rate": 1.0894428762484238e-05, "loss": 0.40518951416015625, "step": 9032, "token_acc": 0.8616898148148148 }, { "epoch": 0.48740085253331894, "grad_norm": 0.38999393582344055, "learning_rate": 1.0892688190897442e-05, "loss": 0.3616548180580139, "step": 9033, "token_acc": 0.8726009265387161 }, { "epoch": 0.48745481033831545, "grad_norm": 0.48613491654396057, "learning_rate": 1.0890947592048184e-05, "loss": 0.3539441227912903, "step": 9034, "token_acc": 0.8716508210890234 }, { "epoch": 0.48750876814331195, "grad_norm": 0.4550894498825073, "learning_rate": 1.0889206965989621e-05, "loss": 0.4027802348136902, "step": 9035, "token_acc": 0.8660177497852849 }, { "epoch": 0.4875627259483084, "grad_norm": 0.3693532645702362, "learning_rate": 1.0887466312774907e-05, "loss": 0.4038041830062866, "step": 9036, "token_acc": 0.8611914401388086 }, { "epoch": 0.4876166837533049, "grad_norm": 0.4776936173439026, "learning_rate": 1.0885725632457208e-05, "loss": 0.43551766872406006, "step": 9037, "token_acc": 0.8528012279355334 }, { "epoch": 0.4876706415583014, "grad_norm": 0.41803163290023804, "learning_rate": 1.0883984925089677e-05, "loss": 0.419523149728775, "step": 9038, "token_acc": 0.8539154863631043 }, { "epoch": 0.48772459936329793, "grad_norm": 0.387408971786499, "learning_rate": 1.088224419072548e-05, "loss": 0.37551069259643555, "step": 9039, "token_acc": 0.8683286516853933 }, { "epoch": 0.4877785571682944, "grad_norm": 0.38905560970306396, "learning_rate": 1.0880503429417774e-05, "loss": 0.34647032618522644, "step": 9040, "token_acc": 0.878207489257213 }, { "epoch": 0.4878325149732909, "grad_norm": 0.4132244288921356, "learning_rate": 1.0878762641219724e-05, "loss": 0.3731471002101898, "step": 9041, "token_acc": 0.8687055915971579 }, { "epoch": 0.4878864727782874, "grad_norm": 0.32681334018707275, "learning_rate": 1.0877021826184497e-05, "loss": 0.2932756841182709, "step": 9042, "token_acc": 0.8926021570712441 }, { "epoch": 0.48794043058328385, "grad_norm": 0.36704081296920776, "learning_rate": 1.0875280984365253e-05, "loss": 0.3734409213066101, "step": 9043, "token_acc": 0.8693887590702251 }, { "epoch": 0.48799438838828035, "grad_norm": 0.37280160188674927, "learning_rate": 1.0873540115815153e-05, "loss": 0.3835054039955139, "step": 9044, "token_acc": 0.8669244401482198 }, { "epoch": 0.48804834619327686, "grad_norm": 0.4340065121650696, "learning_rate": 1.087179922058737e-05, "loss": 0.31649044156074524, "step": 9045, "token_acc": 0.8819111203838932 }, { "epoch": 0.48810230399827337, "grad_norm": 0.33494746685028076, "learning_rate": 1.0870058298735064e-05, "loss": 0.3362866938114166, "step": 9046, "token_acc": 0.8810204530459643 }, { "epoch": 0.4881562618032698, "grad_norm": 0.4193035066127777, "learning_rate": 1.0868317350311411e-05, "loss": 0.3098934292793274, "step": 9047, "token_acc": 0.8895504003284747 }, { "epoch": 0.4882102196082663, "grad_norm": 0.4594590663909912, "learning_rate": 1.0866576375369567e-05, "loss": 0.35251763463020325, "step": 9048, "token_acc": 0.8750244092950595 }, { "epoch": 0.48826417741326283, "grad_norm": 0.36161884665489197, "learning_rate": 1.086483537396271e-05, "loss": 0.31653401255607605, "step": 9049, "token_acc": 0.8855010935288821 }, { "epoch": 0.48831813521825934, "grad_norm": 0.31528744101524353, "learning_rate": 1.086309434614401e-05, "loss": 0.3350004553794861, "step": 9050, "token_acc": 0.8806701618418054 }, { "epoch": 0.4883720930232558, "grad_norm": 0.5472491383552551, "learning_rate": 1.0861353291966631e-05, "loss": 0.4138188362121582, "step": 9051, "token_acc": 0.8562644119907763 }, { "epoch": 0.4884260508282523, "grad_norm": 0.4827525317668915, "learning_rate": 1.085961221148375e-05, "loss": 0.3526076674461365, "step": 9052, "token_acc": 0.8759485318376773 }, { "epoch": 0.4884800086332488, "grad_norm": 0.44127511978149414, "learning_rate": 1.0857871104748538e-05, "loss": 0.3794691562652588, "step": 9053, "token_acc": 0.8685464654487689 }, { "epoch": 0.4885339664382453, "grad_norm": 0.44259485602378845, "learning_rate": 1.0856129971814165e-05, "loss": 0.3347516655921936, "step": 9054, "token_acc": 0.8817067345964018 }, { "epoch": 0.48858792424324177, "grad_norm": 0.3779233694076538, "learning_rate": 1.0854388812733808e-05, "loss": 0.3463485836982727, "step": 9055, "token_acc": 0.8782441049977755 }, { "epoch": 0.4886418820482383, "grad_norm": 0.3535013794898987, "learning_rate": 1.085264762756064e-05, "loss": 0.36928921937942505, "step": 9056, "token_acc": 0.8680769230769231 }, { "epoch": 0.4886958398532348, "grad_norm": 0.4217889606952667, "learning_rate": 1.0850906416347835e-05, "loss": 0.37712326645851135, "step": 9057, "token_acc": 0.8670382165605095 }, { "epoch": 0.4887497976582313, "grad_norm": 0.328167200088501, "learning_rate": 1.0849165179148575e-05, "loss": 0.3227123022079468, "step": 9058, "token_acc": 0.8854812772800368 }, { "epoch": 0.48880375546322774, "grad_norm": 0.5705316662788391, "learning_rate": 1.0847423916016034e-05, "loss": 0.3705734610557556, "step": 9059, "token_acc": 0.8714940421631531 }, { "epoch": 0.48885771326822425, "grad_norm": 0.3605082631111145, "learning_rate": 1.0845682627003384e-05, "loss": 0.3496556878089905, "step": 9060, "token_acc": 0.878514731602314 }, { "epoch": 0.48891167107322075, "grad_norm": 0.4442797601222992, "learning_rate": 1.084394131216381e-05, "loss": 0.3479171097278595, "step": 9061, "token_acc": 0.8742733540624578 }, { "epoch": 0.4889656288782172, "grad_norm": 0.436217725276947, "learning_rate": 1.0842199971550489e-05, "loss": 0.4024890661239624, "step": 9062, "token_acc": 0.865567533291059 }, { "epoch": 0.4890195866832137, "grad_norm": 0.41484180092811584, "learning_rate": 1.0840458605216602e-05, "loss": 0.32774627208709717, "step": 9063, "token_acc": 0.8854103343465045 }, { "epoch": 0.4890735444882102, "grad_norm": 0.3857543468475342, "learning_rate": 1.0838717213215328e-05, "loss": 0.35122770071029663, "step": 9064, "token_acc": 0.8723548562126967 }, { "epoch": 0.48912750229320673, "grad_norm": 0.3872217833995819, "learning_rate": 1.0836975795599851e-05, "loss": 0.3444761335849762, "step": 9065, "token_acc": 0.8813936249073387 }, { "epoch": 0.4891814600982032, "grad_norm": 0.32637283205986023, "learning_rate": 1.0835234352423354e-05, "loss": 0.3115244507789612, "step": 9066, "token_acc": 0.8917875682150939 }, { "epoch": 0.4892354179031997, "grad_norm": 0.46321597695350647, "learning_rate": 1.0833492883739017e-05, "loss": 0.3988918662071228, "step": 9067, "token_acc": 0.858968058968059 }, { "epoch": 0.4892893757081962, "grad_norm": 0.49395355582237244, "learning_rate": 1.0831751389600027e-05, "loss": 0.3168586492538452, "step": 9068, "token_acc": 0.8915183969597512 }, { "epoch": 0.4893433335131927, "grad_norm": 0.37559565901756287, "learning_rate": 1.0830009870059566e-05, "loss": 0.3728271722793579, "step": 9069, "token_acc": 0.8701371711266022 }, { "epoch": 0.48939729131818915, "grad_norm": 0.44917407631874084, "learning_rate": 1.0828268325170819e-05, "loss": 0.34602293372154236, "step": 9070, "token_acc": 0.8809431021044427 }, { "epoch": 0.48945124912318566, "grad_norm": 0.33555489778518677, "learning_rate": 1.082652675498698e-05, "loss": 0.3853898048400879, "step": 9071, "token_acc": 0.8678560315319988 }, { "epoch": 0.48950520692818217, "grad_norm": 0.37869125604629517, "learning_rate": 1.0824785159561226e-05, "loss": 0.4275529384613037, "step": 9072, "token_acc": 0.847569391197669 }, { "epoch": 0.4895591647331787, "grad_norm": 0.290017694234848, "learning_rate": 1.082304353894675e-05, "loss": 0.3208579421043396, "step": 9073, "token_acc": 0.8832428238944918 }, { "epoch": 0.4896131225381751, "grad_norm": 0.4766619801521301, "learning_rate": 1.0821301893196738e-05, "loss": 0.40956205129623413, "step": 9074, "token_acc": 0.8621389539422326 }, { "epoch": 0.48966708034317163, "grad_norm": 0.3187757730484009, "learning_rate": 1.0819560222364385e-05, "loss": 0.3932611346244812, "step": 9075, "token_acc": 0.866580181618601 }, { "epoch": 0.48972103814816814, "grad_norm": 0.5203517079353333, "learning_rate": 1.0817818526502879e-05, "loss": 0.36641937494277954, "step": 9076, "token_acc": 0.8686348325738062 }, { "epoch": 0.48977499595316465, "grad_norm": 0.35571736097335815, "learning_rate": 1.0816076805665405e-05, "loss": 0.3835422992706299, "step": 9077, "token_acc": 0.8653821615414061 }, { "epoch": 0.4898289537581611, "grad_norm": 0.37949901819229126, "learning_rate": 1.0814335059905162e-05, "loss": 0.3412145972251892, "step": 9078, "token_acc": 0.879770725569691 }, { "epoch": 0.4898829115631576, "grad_norm": 0.3760692775249481, "learning_rate": 1.0812593289275338e-05, "loss": 0.3441172242164612, "step": 9079, "token_acc": 0.877321404364918 }, { "epoch": 0.4899368693681541, "grad_norm": 0.4131886065006256, "learning_rate": 1.0810851493829127e-05, "loss": 0.3861164450645447, "step": 9080, "token_acc": 0.8650427913100724 }, { "epoch": 0.4899908271731506, "grad_norm": 0.26564639806747437, "learning_rate": 1.0809109673619725e-05, "loss": 0.2952961027622223, "step": 9081, "token_acc": 0.8944500215115445 }, { "epoch": 0.4900447849781471, "grad_norm": 0.31215304136276245, "learning_rate": 1.0807367828700326e-05, "loss": 0.38593995571136475, "step": 9082, "token_acc": 0.8665480427046264 }, { "epoch": 0.4900987427831436, "grad_norm": 0.43288174271583557, "learning_rate": 1.0805625959124124e-05, "loss": 0.32071125507354736, "step": 9083, "token_acc": 0.8830914723626284 }, { "epoch": 0.4901527005881401, "grad_norm": 0.3345317244529724, "learning_rate": 1.0803884064944316e-05, "loss": 0.3559240698814392, "step": 9084, "token_acc": 0.876889052709178 }, { "epoch": 0.49020665839313654, "grad_norm": 0.37072330713272095, "learning_rate": 1.0802142146214103e-05, "loss": 0.38783299922943115, "step": 9085, "token_acc": 0.864516129032258 }, { "epoch": 0.49026061619813305, "grad_norm": 0.35621580481529236, "learning_rate": 1.0800400202986674e-05, "loss": 0.32604628801345825, "step": 9086, "token_acc": 0.8861913937058445 }, { "epoch": 0.49031457400312956, "grad_norm": 0.34035906195640564, "learning_rate": 1.0798658235315233e-05, "loss": 0.37818068265914917, "step": 9087, "token_acc": 0.867582344312575 }, { "epoch": 0.49036853180812606, "grad_norm": 0.321118026971817, "learning_rate": 1.0796916243252982e-05, "loss": 0.39420366287231445, "step": 9088, "token_acc": 0.8611851287907626 }, { "epoch": 0.4904224896131225, "grad_norm": 0.34451788663864136, "learning_rate": 1.0795174226853113e-05, "loss": 0.3398486375808716, "step": 9089, "token_acc": 0.8855592870059417 }, { "epoch": 0.490476447418119, "grad_norm": 0.44737741351127625, "learning_rate": 1.0793432186168834e-05, "loss": 0.3324895203113556, "step": 9090, "token_acc": 0.8800991661032229 }, { "epoch": 0.49053040522311553, "grad_norm": 0.3494558334350586, "learning_rate": 1.0791690121253347e-05, "loss": 0.35947567224502563, "step": 9091, "token_acc": 0.876008326827999 }, { "epoch": 0.49058436302811204, "grad_norm": 0.48202332854270935, "learning_rate": 1.0789948032159847e-05, "loss": 0.3715900182723999, "step": 9092, "token_acc": 0.8647032441321573 }, { "epoch": 0.4906383208331085, "grad_norm": 0.4110218286514282, "learning_rate": 1.0788205918941543e-05, "loss": 0.33282291889190674, "step": 9093, "token_acc": 0.8781542898341744 }, { "epoch": 0.490692278638105, "grad_norm": 0.41153308749198914, "learning_rate": 1.0786463781651637e-05, "loss": 0.3554806113243103, "step": 9094, "token_acc": 0.8740975300823306 }, { "epoch": 0.4907462364431015, "grad_norm": 0.4360233545303345, "learning_rate": 1.0784721620343333e-05, "loss": 0.36639323830604553, "step": 9095, "token_acc": 0.867712340929269 }, { "epoch": 0.490800194248098, "grad_norm": 0.5289731621742249, "learning_rate": 1.0782979435069833e-05, "loss": 0.3603266477584839, "step": 9096, "token_acc": 0.8740932642487047 }, { "epoch": 0.49085415205309446, "grad_norm": 0.4597420394420624, "learning_rate": 1.0781237225884348e-05, "loss": 0.35429054498672485, "step": 9097, "token_acc": 0.8742839915586373 }, { "epoch": 0.49090810985809097, "grad_norm": 0.417879581451416, "learning_rate": 1.0779494992840085e-05, "loss": 0.38694876432418823, "step": 9098, "token_acc": 0.8643223443223443 }, { "epoch": 0.4909620676630875, "grad_norm": 0.4539049565792084, "learning_rate": 1.0777752735990246e-05, "loss": 0.381320059299469, "step": 9099, "token_acc": 0.8699955680307283 }, { "epoch": 0.491016025468084, "grad_norm": 0.38483577966690063, "learning_rate": 1.0776010455388045e-05, "loss": 0.39096158742904663, "step": 9100, "token_acc": 0.8631341034655952 }, { "epoch": 0.49106998327308043, "grad_norm": 0.2716898024082184, "learning_rate": 1.0774268151086688e-05, "loss": 0.35327988862991333, "step": 9101, "token_acc": 0.869923399335164 }, { "epoch": 0.49112394107807694, "grad_norm": 0.3831576108932495, "learning_rate": 1.0772525823139382e-05, "loss": 0.3305419683456421, "step": 9102, "token_acc": 0.883245928788297 }, { "epoch": 0.49117789888307345, "grad_norm": 0.43754780292510986, "learning_rate": 1.0770783471599341e-05, "loss": 0.4103049337863922, "step": 9103, "token_acc": 0.8554096725943161 }, { "epoch": 0.49123185668806996, "grad_norm": 0.5011740922927856, "learning_rate": 1.0769041096519777e-05, "loss": 0.3643040657043457, "step": 9104, "token_acc": 0.8716577540106952 }, { "epoch": 0.4912858144930664, "grad_norm": 0.4785897135734558, "learning_rate": 1.0767298697953896e-05, "loss": 0.42716747522354126, "step": 9105, "token_acc": 0.8562596599690881 }, { "epoch": 0.4913397722980629, "grad_norm": 0.343360960483551, "learning_rate": 1.0765556275954913e-05, "loss": 0.3386973738670349, "step": 9106, "token_acc": 0.8787061994609164 }, { "epoch": 0.4913937301030594, "grad_norm": 0.43669983744621277, "learning_rate": 1.0763813830576045e-05, "loss": 0.37212443351745605, "step": 9107, "token_acc": 0.8713450292397661 }, { "epoch": 0.4914476879080559, "grad_norm": 0.4298836588859558, "learning_rate": 1.07620713618705e-05, "loss": 0.36139094829559326, "step": 9108, "token_acc": 0.8720594930945516 }, { "epoch": 0.4915016457130524, "grad_norm": 0.3463951349258423, "learning_rate": 1.07603288698915e-05, "loss": 0.3245827555656433, "step": 9109, "token_acc": 0.8813816663503511 }, { "epoch": 0.4915556035180489, "grad_norm": 0.37720176577568054, "learning_rate": 1.075858635469225e-05, "loss": 0.3371467590332031, "step": 9110, "token_acc": 0.8783485660258431 }, { "epoch": 0.4916095613230454, "grad_norm": 0.439710795879364, "learning_rate": 1.0756843816325974e-05, "loss": 0.3369573950767517, "step": 9111, "token_acc": 0.8812926274764534 }, { "epoch": 0.49166351912804185, "grad_norm": 0.43950989842414856, "learning_rate": 1.0755101254845884e-05, "loss": 0.4152749180793762, "step": 9112, "token_acc": 0.8545632015617375 }, { "epoch": 0.49171747693303836, "grad_norm": 0.388888955116272, "learning_rate": 1.0753358670305203e-05, "loss": 0.42197316884994507, "step": 9113, "token_acc": 0.8556502612042892 }, { "epoch": 0.49177143473803486, "grad_norm": 0.515927791595459, "learning_rate": 1.0751616062757141e-05, "loss": 0.36401206254959106, "step": 9114, "token_acc": 0.8688061190555371 }, { "epoch": 0.49182539254303137, "grad_norm": 0.42501455545425415, "learning_rate": 1.0749873432254924e-05, "loss": 0.37140053510665894, "step": 9115, "token_acc": 0.8675370597035224 }, { "epoch": 0.4918793503480278, "grad_norm": 0.29231977462768555, "learning_rate": 1.0748130778851769e-05, "loss": 0.3549937903881073, "step": 9116, "token_acc": 0.8740301232314012 }, { "epoch": 0.49193330815302433, "grad_norm": 0.4322817325592041, "learning_rate": 1.0746388102600897e-05, "loss": 0.3914869427680969, "step": 9117, "token_acc": 0.8631098240142571 }, { "epoch": 0.49198726595802084, "grad_norm": 0.49103909730911255, "learning_rate": 1.0744645403555527e-05, "loss": 0.36443907022476196, "step": 9118, "token_acc": 0.8727858293075684 }, { "epoch": 0.49204122376301734, "grad_norm": 0.37274158000946045, "learning_rate": 1.0742902681768881e-05, "loss": 0.3168787956237793, "step": 9119, "token_acc": 0.8861673327317262 }, { "epoch": 0.4920951815680138, "grad_norm": 0.3867775499820709, "learning_rate": 1.0741159937294183e-05, "loss": 0.35756242275238037, "step": 9120, "token_acc": 0.8764744429882044 }, { "epoch": 0.4921491393730103, "grad_norm": 0.3964098393917084, "learning_rate": 1.0739417170184654e-05, "loss": 0.41655901074409485, "step": 9121, "token_acc": 0.8583743842364532 }, { "epoch": 0.4922030971780068, "grad_norm": 0.4323561489582062, "learning_rate": 1.0737674380493516e-05, "loss": 0.423430860042572, "step": 9122, "token_acc": 0.8557324840764331 }, { "epoch": 0.4922570549830033, "grad_norm": 0.44046181440353394, "learning_rate": 1.0735931568274e-05, "loss": 0.36784881353378296, "step": 9123, "token_acc": 0.8731245064490656 }, { "epoch": 0.49231101278799977, "grad_norm": 0.3746708631515503, "learning_rate": 1.0734188733579323e-05, "loss": 0.3719547688961029, "step": 9124, "token_acc": 0.870846981750117 }, { "epoch": 0.4923649705929963, "grad_norm": 0.38411882519721985, "learning_rate": 1.0732445876462716e-05, "loss": 0.4025556445121765, "step": 9125, "token_acc": 0.86850887731945 }, { "epoch": 0.4924189283979928, "grad_norm": 0.5281585454940796, "learning_rate": 1.0730702996977404e-05, "loss": 0.4097580909729004, "step": 9126, "token_acc": 0.8583858385838584 }, { "epoch": 0.49247288620298924, "grad_norm": 0.37782350182533264, "learning_rate": 1.0728960095176612e-05, "loss": 0.3984041213989258, "step": 9127, "token_acc": 0.8621199892386333 }, { "epoch": 0.49252684400798574, "grad_norm": 0.3768961429595947, "learning_rate": 1.072721717111357e-05, "loss": 0.31865859031677246, "step": 9128, "token_acc": 0.8856363897259291 }, { "epoch": 0.49258080181298225, "grad_norm": 0.360758513212204, "learning_rate": 1.0725474224841507e-05, "loss": 0.358359158039093, "step": 9129, "token_acc": 0.8750468574284643 }, { "epoch": 0.49263475961797876, "grad_norm": 0.436261922121048, "learning_rate": 1.0723731256413651e-05, "loss": 0.40225136280059814, "step": 9130, "token_acc": 0.8632635746606335 }, { "epoch": 0.4926887174229752, "grad_norm": 0.38718149065971375, "learning_rate": 1.0721988265883233e-05, "loss": 0.42108091711997986, "step": 9131, "token_acc": 0.8540317022742936 }, { "epoch": 0.4927426752279717, "grad_norm": 0.46036842465400696, "learning_rate": 1.0720245253303481e-05, "loss": 0.36222314834594727, "step": 9132, "token_acc": 0.8764947044755722 }, { "epoch": 0.4927966330329682, "grad_norm": 0.32332688570022583, "learning_rate": 1.0718502218727629e-05, "loss": 0.3731783926486969, "step": 9133, "token_acc": 0.8686196182867288 }, { "epoch": 0.49285059083796473, "grad_norm": 0.338550329208374, "learning_rate": 1.0716759162208907e-05, "loss": 0.3659931421279907, "step": 9134, "token_acc": 0.8697624190064794 }, { "epoch": 0.4929045486429612, "grad_norm": 0.5060283541679382, "learning_rate": 1.071501608380055e-05, "loss": 0.41081416606903076, "step": 9135, "token_acc": 0.8573880212802472 }, { "epoch": 0.4929585064479577, "grad_norm": 0.4415648579597473, "learning_rate": 1.0713272983555786e-05, "loss": 0.34432104229927063, "step": 9136, "token_acc": 0.8810580836108028 }, { "epoch": 0.4930124642529542, "grad_norm": 0.5660061240196228, "learning_rate": 1.0711529861527854e-05, "loss": 0.3328152298927307, "step": 9137, "token_acc": 0.8828278420248745 }, { "epoch": 0.4930664220579507, "grad_norm": 0.542388916015625, "learning_rate": 1.0709786717769984e-05, "loss": 0.3997098207473755, "step": 9138, "token_acc": 0.8638146167557932 }, { "epoch": 0.49312037986294716, "grad_norm": 0.4968905746936798, "learning_rate": 1.0708043552335413e-05, "loss": 0.39920878410339355, "step": 9139, "token_acc": 0.8632250407951343 }, { "epoch": 0.49317433766794366, "grad_norm": 0.4495362341403961, "learning_rate": 1.0706300365277383e-05, "loss": 0.4308733344078064, "step": 9140, "token_acc": 0.8532088405625813 }, { "epoch": 0.49322829547294017, "grad_norm": 0.37421974539756775, "learning_rate": 1.070455715664912e-05, "loss": 0.36105453968048096, "step": 9141, "token_acc": 0.87330502670867 }, { "epoch": 0.4932822532779367, "grad_norm": 0.3428300619125366, "learning_rate": 1.0702813926503868e-05, "loss": 0.3424321115016937, "step": 9142, "token_acc": 0.8813349814585909 }, { "epoch": 0.49333621108293313, "grad_norm": 0.3835994005203247, "learning_rate": 1.0701070674894862e-05, "loss": 0.4009210169315338, "step": 9143, "token_acc": 0.8600319318786589 }, { "epoch": 0.49339016888792964, "grad_norm": 0.4079909920692444, "learning_rate": 1.0699327401875339e-05, "loss": 0.33642929792404175, "step": 9144, "token_acc": 0.8795106086417841 }, { "epoch": 0.49344412669292614, "grad_norm": 0.35560640692710876, "learning_rate": 1.0697584107498544e-05, "loss": 0.32268887758255005, "step": 9145, "token_acc": 0.8857913669064749 }, { "epoch": 0.49349808449792265, "grad_norm": 0.28325146436691284, "learning_rate": 1.0695840791817712e-05, "loss": 0.3274078369140625, "step": 9146, "token_acc": 0.8812291249164996 }, { "epoch": 0.4935520423029191, "grad_norm": 0.40568864345550537, "learning_rate": 1.0694097454886083e-05, "loss": 0.3307134211063385, "step": 9147, "token_acc": 0.883514560679915 }, { "epoch": 0.4936060001079156, "grad_norm": 0.35738521814346313, "learning_rate": 1.06923540967569e-05, "loss": 0.3482409715652466, "step": 9148, "token_acc": 0.8750218188165474 }, { "epoch": 0.4936599579129121, "grad_norm": 0.3483234643936157, "learning_rate": 1.0690610717483405e-05, "loss": 0.3210543990135193, "step": 9149, "token_acc": 0.8844209288653733 }, { "epoch": 0.49371391571790857, "grad_norm": 0.5295870304107666, "learning_rate": 1.068886731711884e-05, "loss": 0.3916642963886261, "step": 9150, "token_acc": 0.8685069753636094 }, { "epoch": 0.4937678735229051, "grad_norm": 0.36897149682044983, "learning_rate": 1.068712389571645e-05, "loss": 0.32924944162368774, "step": 9151, "token_acc": 0.8796787392029095 }, { "epoch": 0.4938218313279016, "grad_norm": 0.3752669394016266, "learning_rate": 1.0685380453329473e-05, "loss": 0.3976370096206665, "step": 9152, "token_acc": 0.8649885583524027 }, { "epoch": 0.4938757891328981, "grad_norm": 0.5163038969039917, "learning_rate": 1.0683636990011157e-05, "loss": 0.3599480390548706, "step": 9153, "token_acc": 0.8756015399422522 }, { "epoch": 0.49392974693789454, "grad_norm": 0.4327514171600342, "learning_rate": 1.0681893505814745e-05, "loss": 0.35022419691085815, "step": 9154, "token_acc": 0.8785159620362382 }, { "epoch": 0.49398370474289105, "grad_norm": 0.37671777606010437, "learning_rate": 1.0680150000793485e-05, "loss": 0.36235809326171875, "step": 9155, "token_acc": 0.8788913700249573 }, { "epoch": 0.49403766254788756, "grad_norm": 0.4683631658554077, "learning_rate": 1.0678406475000623e-05, "loss": 0.42700159549713135, "step": 9156, "token_acc": 0.8537841084606144 }, { "epoch": 0.49409162035288406, "grad_norm": 0.40840181708335876, "learning_rate": 1.0676662928489404e-05, "loss": 0.353520929813385, "step": 9157, "token_acc": 0.8773247936938047 }, { "epoch": 0.4941455781578805, "grad_norm": 0.4086451232433319, "learning_rate": 1.0674919361313076e-05, "loss": 0.40376171469688416, "step": 9158, "token_acc": 0.8588798820928519 }, { "epoch": 0.494199535962877, "grad_norm": 0.43240758776664734, "learning_rate": 1.067317577352489e-05, "loss": 0.2920874357223511, "step": 9159, "token_acc": 0.8907330567081605 }, { "epoch": 0.49425349376787353, "grad_norm": 0.41798415780067444, "learning_rate": 1.0671432165178091e-05, "loss": 0.4423341155052185, "step": 9160, "token_acc": 0.8529123837493882 }, { "epoch": 0.49430745157287004, "grad_norm": 0.45585545897483826, "learning_rate": 1.0669688536325929e-05, "loss": 0.3619025945663452, "step": 9161, "token_acc": 0.874025974025974 }, { "epoch": 0.4943614093778665, "grad_norm": 0.35047581791877747, "learning_rate": 1.0667944887021656e-05, "loss": 0.4057242274284363, "step": 9162, "token_acc": 0.8654109228664633 }, { "epoch": 0.494415367182863, "grad_norm": 0.35924988985061646, "learning_rate": 1.066620121731852e-05, "loss": 0.2766842842102051, "step": 9163, "token_acc": 0.8961003913102146 }, { "epoch": 0.4944693249878595, "grad_norm": 0.4260634183883667, "learning_rate": 1.066445752726977e-05, "loss": 0.39800742268562317, "step": 9164, "token_acc": 0.8626239402212962 }, { "epoch": 0.494523282792856, "grad_norm": 0.32907623052597046, "learning_rate": 1.0662713816928665e-05, "loss": 0.33554884791374207, "step": 9165, "token_acc": 0.8797233252209555 }, { "epoch": 0.49457724059785246, "grad_norm": 0.32845261693000793, "learning_rate": 1.0660970086348454e-05, "loss": 0.35767221450805664, "step": 9166, "token_acc": 0.8760797342192691 }, { "epoch": 0.49463119840284897, "grad_norm": 0.3630920350551605, "learning_rate": 1.065922633558239e-05, "loss": 0.33338871598243713, "step": 9167, "token_acc": 0.8867209432902864 }, { "epoch": 0.4946851562078455, "grad_norm": 0.42076200246810913, "learning_rate": 1.0657482564683723e-05, "loss": 0.356334924697876, "step": 9168, "token_acc": 0.8693959731543625 }, { "epoch": 0.494739114012842, "grad_norm": 0.4023963212966919, "learning_rate": 1.0655738773705714e-05, "loss": 0.3583923876285553, "step": 9169, "token_acc": 0.8736998514115899 }, { "epoch": 0.49479307181783844, "grad_norm": 0.33473923802375793, "learning_rate": 1.0653994962701612e-05, "loss": 0.36956727504730225, "step": 9170, "token_acc": 0.8739811912225706 }, { "epoch": 0.49484702962283494, "grad_norm": 0.47035184502601624, "learning_rate": 1.0652251131724678e-05, "loss": 0.36752021312713623, "step": 9171, "token_acc": 0.8722883135059483 }, { "epoch": 0.49490098742783145, "grad_norm": 0.35593339800834656, "learning_rate": 1.0650507280828165e-05, "loss": 0.34090864658355713, "step": 9172, "token_acc": 0.8815177786154039 }, { "epoch": 0.4949549452328279, "grad_norm": 0.40334880352020264, "learning_rate": 1.0648763410065325e-05, "loss": 0.32615789771080017, "step": 9173, "token_acc": 0.8839432904714803 }, { "epoch": 0.4950089030378244, "grad_norm": 0.438450425863266, "learning_rate": 1.0647019519489425e-05, "loss": 0.3065521717071533, "step": 9174, "token_acc": 0.8854603387556599 }, { "epoch": 0.4950628608428209, "grad_norm": 0.38359057903289795, "learning_rate": 1.0645275609153718e-05, "loss": 0.33349859714508057, "step": 9175, "token_acc": 0.8837766594775941 }, { "epoch": 0.4951168186478174, "grad_norm": 0.5630736351013184, "learning_rate": 1.0643531679111463e-05, "loss": 0.3711664080619812, "step": 9176, "token_acc": 0.8671739130434782 }, { "epoch": 0.4951707764528139, "grad_norm": 0.35905197262763977, "learning_rate": 1.064178772941592e-05, "loss": 0.34588614106178284, "step": 9177, "token_acc": 0.8775459819775336 }, { "epoch": 0.4952247342578104, "grad_norm": 0.35204407572746277, "learning_rate": 1.0640043760120347e-05, "loss": 0.39147353172302246, "step": 9178, "token_acc": 0.8676719278466741 }, { "epoch": 0.4952786920628069, "grad_norm": 0.4555162787437439, "learning_rate": 1.0638299771278005e-05, "loss": 0.3528481721878052, "step": 9179, "token_acc": 0.8732298242620713 }, { "epoch": 0.4953326498678034, "grad_norm": 0.3181662857532501, "learning_rate": 1.0636555762942153e-05, "loss": 0.3085136413574219, "step": 9180, "token_acc": 0.8931758530183727 }, { "epoch": 0.49538660767279985, "grad_norm": 0.4083646833896637, "learning_rate": 1.0634811735166057e-05, "loss": 0.4293137490749359, "step": 9181, "token_acc": 0.8519061583577713 }, { "epoch": 0.49544056547779636, "grad_norm": 0.516659140586853, "learning_rate": 1.0633067688002979e-05, "loss": 0.43646156787872314, "step": 9182, "token_acc": 0.85381091211995 }, { "epoch": 0.49549452328279286, "grad_norm": 0.3017267882823944, "learning_rate": 1.0631323621506177e-05, "loss": 0.3301767110824585, "step": 9183, "token_acc": 0.8812970469021424 }, { "epoch": 0.49554848108778937, "grad_norm": 0.3347298204898834, "learning_rate": 1.0629579535728918e-05, "loss": 0.34057608246803284, "step": 9184, "token_acc": 0.881888710540396 }, { "epoch": 0.4956024388927858, "grad_norm": 0.3947940170764923, "learning_rate": 1.0627835430724464e-05, "loss": 0.4065852463245392, "step": 9185, "token_acc": 0.8539353565260301 }, { "epoch": 0.49565639669778233, "grad_norm": 0.4140148460865021, "learning_rate": 1.062609130654608e-05, "loss": 0.3403775691986084, "step": 9186, "token_acc": 0.8816183166384806 }, { "epoch": 0.49571035450277884, "grad_norm": 0.4479691684246063, "learning_rate": 1.0624347163247033e-05, "loss": 0.3885064125061035, "step": 9187, "token_acc": 0.8591734667017636 }, { "epoch": 0.49576431230777535, "grad_norm": 0.3436964154243469, "learning_rate": 1.0622603000880588e-05, "loss": 0.3197244107723236, "step": 9188, "token_acc": 0.8822676526025809 }, { "epoch": 0.4958182701127718, "grad_norm": 0.43850111961364746, "learning_rate": 1.0620858819500004e-05, "loss": 0.3535540699958801, "step": 9189, "token_acc": 0.876676344532006 }, { "epoch": 0.4958722279177683, "grad_norm": 0.3389238715171814, "learning_rate": 1.0619114619158561e-05, "loss": 0.34668684005737305, "step": 9190, "token_acc": 0.8796435272045028 }, { "epoch": 0.4959261857227648, "grad_norm": 0.3439169228076935, "learning_rate": 1.0617370399909517e-05, "loss": 0.30031195282936096, "step": 9191, "token_acc": 0.8849363197189284 }, { "epoch": 0.49598014352776126, "grad_norm": 0.3337711691856384, "learning_rate": 1.0615626161806146e-05, "loss": 0.33878791332244873, "step": 9192, "token_acc": 0.8791908806437193 }, { "epoch": 0.49603410133275777, "grad_norm": 0.39879879355430603, "learning_rate": 1.0613881904901711e-05, "loss": 0.38221025466918945, "step": 9193, "token_acc": 0.862012987012987 }, { "epoch": 0.4960880591377543, "grad_norm": 0.3124849200248718, "learning_rate": 1.0612137629249483e-05, "loss": 0.3629176616668701, "step": 9194, "token_acc": 0.8670792808723843 }, { "epoch": 0.4961420169427508, "grad_norm": 0.5121060013771057, "learning_rate": 1.0610393334902733e-05, "loss": 0.42678695917129517, "step": 9195, "token_acc": 0.8574777308954524 }, { "epoch": 0.49619597474774724, "grad_norm": 0.3500332534313202, "learning_rate": 1.0608649021914727e-05, "loss": 0.32763439416885376, "step": 9196, "token_acc": 0.88243103702611 }, { "epoch": 0.49624993255274374, "grad_norm": 0.47650501132011414, "learning_rate": 1.0606904690338744e-05, "loss": 0.3000471889972687, "step": 9197, "token_acc": 0.8928319209039548 }, { "epoch": 0.49630389035774025, "grad_norm": 0.4503544569015503, "learning_rate": 1.060516034022805e-05, "loss": 0.32325518131256104, "step": 9198, "token_acc": 0.8786265148733015 }, { "epoch": 0.49635784816273676, "grad_norm": 0.41724854707717896, "learning_rate": 1.0603415971635914e-05, "loss": 0.31586378812789917, "step": 9199, "token_acc": 0.8827132139029246 }, { "epoch": 0.4964118059677332, "grad_norm": 0.3568626642227173, "learning_rate": 1.0601671584615618e-05, "loss": 0.3654005825519562, "step": 9200, "token_acc": 0.8708869034648071 }, { "epoch": 0.4964657637727297, "grad_norm": 0.3888840973377228, "learning_rate": 1.0599927179220425e-05, "loss": 0.3659743070602417, "step": 9201, "token_acc": 0.8719019396551724 }, { "epoch": 0.4965197215777262, "grad_norm": 0.36420008540153503, "learning_rate": 1.0598182755503614e-05, "loss": 0.32138335704803467, "step": 9202, "token_acc": 0.8833533757086411 }, { "epoch": 0.49657367938272273, "grad_norm": 0.39491090178489685, "learning_rate": 1.059643831351846e-05, "loss": 0.3559555113315582, "step": 9203, "token_acc": 0.871398278657852 }, { "epoch": 0.4966276371877192, "grad_norm": 0.3466635048389435, "learning_rate": 1.0594693853318235e-05, "loss": 0.26854103803634644, "step": 9204, "token_acc": 0.9010439010439011 }, { "epoch": 0.4966815949927157, "grad_norm": 0.4188435673713684, "learning_rate": 1.0592949374956214e-05, "loss": 0.3817285895347595, "step": 9205, "token_acc": 0.8631008801624915 }, { "epoch": 0.4967355527977122, "grad_norm": 0.3806723356246948, "learning_rate": 1.0591204878485676e-05, "loss": 0.35289132595062256, "step": 9206, "token_acc": 0.8775510204081632 }, { "epoch": 0.4967895106027087, "grad_norm": 0.42717254161834717, "learning_rate": 1.0589460363959896e-05, "loss": 0.3640976548194885, "step": 9207, "token_acc": 0.8734247334164243 }, { "epoch": 0.49684346840770516, "grad_norm": 0.36419448256492615, "learning_rate": 1.0587715831432153e-05, "loss": 0.3413165807723999, "step": 9208, "token_acc": 0.8810817336713751 }, { "epoch": 0.49689742621270167, "grad_norm": 0.3023770749568939, "learning_rate": 1.058597128095572e-05, "loss": 0.33608779311180115, "step": 9209, "token_acc": 0.8810493531384763 }, { "epoch": 0.4969513840176982, "grad_norm": 0.37788522243499756, "learning_rate": 1.0584226712583879e-05, "loss": 0.3348793387413025, "step": 9210, "token_acc": 0.8818877551020409 }, { "epoch": 0.4970053418226947, "grad_norm": 0.38387423753738403, "learning_rate": 1.0582482126369906e-05, "loss": 0.4196334481239319, "step": 9211, "token_acc": 0.8596600074450924 }, { "epoch": 0.49705929962769113, "grad_norm": 0.3662417232990265, "learning_rate": 1.058073752236708e-05, "loss": 0.33367010951042175, "step": 9212, "token_acc": 0.8807216341136126 }, { "epoch": 0.49711325743268764, "grad_norm": 0.41604605317115784, "learning_rate": 1.0578992900628684e-05, "loss": 0.37927526235580444, "step": 9213, "token_acc": 0.8680347513481126 }, { "epoch": 0.49716721523768415, "grad_norm": 0.37893053889274597, "learning_rate": 1.0577248261207995e-05, "loss": 0.3670528829097748, "step": 9214, "token_acc": 0.8713720316622692 }, { "epoch": 0.4972211730426806, "grad_norm": 0.38610416650772095, "learning_rate": 1.0575503604158295e-05, "loss": 0.36731186509132385, "step": 9215, "token_acc": 0.867104440275172 }, { "epoch": 0.4972751308476771, "grad_norm": 0.47067928314208984, "learning_rate": 1.0573758929532869e-05, "loss": 0.32635819911956787, "step": 9216, "token_acc": 0.8862961391599491 }, { "epoch": 0.4973290886526736, "grad_norm": 0.3845326900482178, "learning_rate": 1.0572014237384995e-05, "loss": 0.31553155183792114, "step": 9217, "token_acc": 0.8881819412481387 }, { "epoch": 0.4973830464576701, "grad_norm": 0.4619467854499817, "learning_rate": 1.0570269527767953e-05, "loss": 0.3624066412448883, "step": 9218, "token_acc": 0.8784664343082286 }, { "epoch": 0.49743700426266657, "grad_norm": 0.3966755270957947, "learning_rate": 1.0568524800735032e-05, "loss": 0.3760175108909607, "step": 9219, "token_acc": 0.868113706478625 }, { "epoch": 0.4974909620676631, "grad_norm": 0.4485889971256256, "learning_rate": 1.0566780056339511e-05, "loss": 0.36478012800216675, "step": 9220, "token_acc": 0.8737422467263956 }, { "epoch": 0.4975449198726596, "grad_norm": 0.46782681345939636, "learning_rate": 1.0565035294634675e-05, "loss": 0.414291650056839, "step": 9221, "token_acc": 0.861699703804967 }, { "epoch": 0.4975988776776561, "grad_norm": 0.3945498764514923, "learning_rate": 1.056329051567381e-05, "loss": 0.37883931398391724, "step": 9222, "token_acc": 0.866833839250363 }, { "epoch": 0.49765283548265254, "grad_norm": 0.3215138614177704, "learning_rate": 1.0561545719510203e-05, "loss": 0.3337891697883606, "step": 9223, "token_acc": 0.8819460726846424 }, { "epoch": 0.49770679328764905, "grad_norm": 0.3838067650794983, "learning_rate": 1.0559800906197133e-05, "loss": 0.3740846514701843, "step": 9224, "token_acc": 0.8732272069464544 }, { "epoch": 0.49776075109264556, "grad_norm": 0.3801296353340149, "learning_rate": 1.055805607578789e-05, "loss": 0.39826077222824097, "step": 9225, "token_acc": 0.86535404182463 }, { "epoch": 0.49781470889764207, "grad_norm": 0.3316574990749359, "learning_rate": 1.0556311228335764e-05, "loss": 0.36231884360313416, "step": 9226, "token_acc": 0.8717923214640939 }, { "epoch": 0.4978686667026385, "grad_norm": 0.4062434732913971, "learning_rate": 1.0554566363894037e-05, "loss": 0.3616599440574646, "step": 9227, "token_acc": 0.8732496030027429 }, { "epoch": 0.497922624507635, "grad_norm": 0.3221960663795471, "learning_rate": 1.0552821482515995e-05, "loss": 0.38000917434692383, "step": 9228, "token_acc": 0.864900468384075 }, { "epoch": 0.49797658231263153, "grad_norm": 0.37625929713249207, "learning_rate": 1.0551076584254933e-05, "loss": 0.36780810356140137, "step": 9229, "token_acc": 0.870054730258014 }, { "epoch": 0.49803054011762804, "grad_norm": 0.36724328994750977, "learning_rate": 1.0549331669164138e-05, "loss": 0.29630160331726074, "step": 9230, "token_acc": 0.8935660506502395 }, { "epoch": 0.4980844979226245, "grad_norm": 0.4363482594490051, "learning_rate": 1.0547586737296894e-05, "loss": 0.3655807375907898, "step": 9231, "token_acc": 0.8684713375796178 }, { "epoch": 0.498138455727621, "grad_norm": 0.49023112654685974, "learning_rate": 1.0545841788706496e-05, "loss": 0.33237797021865845, "step": 9232, "token_acc": 0.8845346396081175 }, { "epoch": 0.4981924135326175, "grad_norm": 0.371003121137619, "learning_rate": 1.0544096823446234e-05, "loss": 0.36070165038108826, "step": 9233, "token_acc": 0.8739130434782608 }, { "epoch": 0.498246371337614, "grad_norm": 0.4144255220890045, "learning_rate": 1.0542351841569399e-05, "loss": 0.2996911406517029, "step": 9234, "token_acc": 0.8830625307730182 }, { "epoch": 0.49830032914261047, "grad_norm": 0.42790380120277405, "learning_rate": 1.0540606843129277e-05, "loss": 0.38363754749298096, "step": 9235, "token_acc": 0.8657199800697558 }, { "epoch": 0.498354286947607, "grad_norm": 0.4610443413257599, "learning_rate": 1.0538861828179168e-05, "loss": 0.37686699628829956, "step": 9236, "token_acc": 0.8700802948532316 }, { "epoch": 0.4984082447526035, "grad_norm": 0.37241145968437195, "learning_rate": 1.0537116796772355e-05, "loss": 0.3335576057434082, "step": 9237, "token_acc": 0.8820981149748273 }, { "epoch": 0.49846220255759993, "grad_norm": 0.31095245480537415, "learning_rate": 1.0535371748962137e-05, "loss": 0.33662348985671997, "step": 9238, "token_acc": 0.8799311433079903 }, { "epoch": 0.49851616036259644, "grad_norm": 0.33626264333724976, "learning_rate": 1.0533626684801807e-05, "loss": 0.3356638550758362, "step": 9239, "token_acc": 0.8842947273546643 }, { "epoch": 0.49857011816759295, "grad_norm": 0.3593045473098755, "learning_rate": 1.0531881604344658e-05, "loss": 0.35033687949180603, "step": 9240, "token_acc": 0.8786999419616948 }, { "epoch": 0.49862407597258945, "grad_norm": 0.3216971158981323, "learning_rate": 1.0530136507643987e-05, "loss": 0.4011201858520508, "step": 9241, "token_acc": 0.8589923023093072 }, { "epoch": 0.4986780337775859, "grad_norm": 0.3722531795501709, "learning_rate": 1.052839139475308e-05, "loss": 0.34440818428993225, "step": 9242, "token_acc": 0.881543523781035 }, { "epoch": 0.4987319915825824, "grad_norm": 0.3875456750392914, "learning_rate": 1.0526646265725245e-05, "loss": 0.40017133951187134, "step": 9243, "token_acc": 0.8608053504249686 }, { "epoch": 0.4987859493875789, "grad_norm": 0.3805512487888336, "learning_rate": 1.0524901120613764e-05, "loss": 0.4302632212638855, "step": 9244, "token_acc": 0.8543859649122807 }, { "epoch": 0.4988399071925754, "grad_norm": 0.35745519399642944, "learning_rate": 1.0523155959471944e-05, "loss": 0.36446672677993774, "step": 9245, "token_acc": 0.8735951158595809 }, { "epoch": 0.4988938649975719, "grad_norm": 0.3977612257003784, "learning_rate": 1.0521410782353076e-05, "loss": 0.3742232024669647, "step": 9246, "token_acc": 0.8680136447126738 }, { "epoch": 0.4989478228025684, "grad_norm": 0.44209542870521545, "learning_rate": 1.0519665589310459e-05, "loss": 0.3705909252166748, "step": 9247, "token_acc": 0.8646148372175769 }, { "epoch": 0.4990017806075649, "grad_norm": 0.4321199059486389, "learning_rate": 1.0517920380397395e-05, "loss": 0.37034669518470764, "step": 9248, "token_acc": 0.8735604891368871 }, { "epoch": 0.4990557384125614, "grad_norm": 0.46651512384414673, "learning_rate": 1.0516175155667177e-05, "loss": 0.40987247228622437, "step": 9249, "token_acc": 0.8594315965449986 }, { "epoch": 0.49910969621755785, "grad_norm": 0.309457927942276, "learning_rate": 1.0514429915173104e-05, "loss": 0.38880425691604614, "step": 9250, "token_acc": 0.8580873133477716 }, { "epoch": 0.49916365402255436, "grad_norm": 0.36270207166671753, "learning_rate": 1.0512684658968475e-05, "loss": 0.34173133969306946, "step": 9251, "token_acc": 0.8798268398268398 }, { "epoch": 0.49921761182755087, "grad_norm": 0.43528127670288086, "learning_rate": 1.0510939387106594e-05, "loss": 0.3471054136753082, "step": 9252, "token_acc": 0.8743594998975199 }, { "epoch": 0.4992715696325474, "grad_norm": 0.42973804473876953, "learning_rate": 1.0509194099640756e-05, "loss": 0.3504754602909088, "step": 9253, "token_acc": 0.8774714966084572 }, { "epoch": 0.4993255274375438, "grad_norm": 0.40044140815734863, "learning_rate": 1.0507448796624262e-05, "loss": 0.39565330743789673, "step": 9254, "token_acc": 0.8583309016632623 }, { "epoch": 0.49937948524254033, "grad_norm": 0.43456482887268066, "learning_rate": 1.0505703478110417e-05, "loss": 0.3305112421512604, "step": 9255, "token_acc": 0.8808099401748735 }, { "epoch": 0.49943344304753684, "grad_norm": 0.3530067205429077, "learning_rate": 1.0503958144152519e-05, "loss": 0.3586857318878174, "step": 9256, "token_acc": 0.8736373748609566 }, { "epoch": 0.4994874008525333, "grad_norm": 0.36259472370147705, "learning_rate": 1.050221279480387e-05, "loss": 0.38526201248168945, "step": 9257, "token_acc": 0.8655451053119618 }, { "epoch": 0.4995413586575298, "grad_norm": 0.39898961782455444, "learning_rate": 1.0500467430117778e-05, "loss": 0.3808603286743164, "step": 9258, "token_acc": 0.8649411764705882 }, { "epoch": 0.4995953164625263, "grad_norm": 0.36258798837661743, "learning_rate": 1.049872205014754e-05, "loss": 0.3370007574558258, "step": 9259, "token_acc": 0.883068417415342 }, { "epoch": 0.4996492742675228, "grad_norm": 0.3755646049976349, "learning_rate": 1.0496976654946458e-05, "loss": 0.3424173593521118, "step": 9260, "token_acc": 0.8742550655542313 }, { "epoch": 0.49970323207251927, "grad_norm": 0.37010401487350464, "learning_rate": 1.0495231244567843e-05, "loss": 0.33842575550079346, "step": 9261, "token_acc": 0.8816689466484268 }, { "epoch": 0.4997571898775158, "grad_norm": 0.38274484872817993, "learning_rate": 1.0493485819064994e-05, "loss": 0.35452359914779663, "step": 9262, "token_acc": 0.8768055555555555 }, { "epoch": 0.4998111476825123, "grad_norm": 0.3358404338359833, "learning_rate": 1.0491740378491217e-05, "loss": 0.3267221450805664, "step": 9263, "token_acc": 0.8848048503220917 }, { "epoch": 0.4998651054875088, "grad_norm": 0.39064279198646545, "learning_rate": 1.0489994922899818e-05, "loss": 0.4106350541114807, "step": 9264, "token_acc": 0.8619251577998197 }, { "epoch": 0.49991906329250524, "grad_norm": 0.39617982506752014, "learning_rate": 1.0488249452344103e-05, "loss": 0.3662188649177551, "step": 9265, "token_acc": 0.8658045977011494 }, { "epoch": 0.49997302109750175, "grad_norm": 0.40816691517829895, "learning_rate": 1.048650396687738e-05, "loss": 0.38548406958580017, "step": 9266, "token_acc": 0.8683038920005745 }, { "epoch": 0.5000269789024983, "grad_norm": 0.4577849507331848, "learning_rate": 1.0484758466552949e-05, "loss": 0.3844812512397766, "step": 9267, "token_acc": 0.8679387019230769 }, { "epoch": 0.5000809367074948, "grad_norm": 0.37426629662513733, "learning_rate": 1.0483012951424122e-05, "loss": 0.38263970613479614, "step": 9268, "token_acc": 0.8612707535121328 }, { "epoch": 0.5001348945124913, "grad_norm": 0.43166375160217285, "learning_rate": 1.048126742154421e-05, "loss": 0.38483917713165283, "step": 9269, "token_acc": 0.8606622940329547 }, { "epoch": 0.5001888523174878, "grad_norm": 0.3642409145832062, "learning_rate": 1.0479521876966512e-05, "loss": 0.34618645906448364, "step": 9270, "token_acc": 0.877315860822413 }, { "epoch": 0.5002428101224842, "grad_norm": 0.3644884526729584, "learning_rate": 1.0477776317744343e-05, "loss": 0.3602047562599182, "step": 9271, "token_acc": 0.8783398184176394 }, { "epoch": 0.5002967679274807, "grad_norm": 0.378991037607193, "learning_rate": 1.0476030743931012e-05, "loss": 0.2803906798362732, "step": 9272, "token_acc": 0.8987613751263903 }, { "epoch": 0.5003507257324772, "grad_norm": 0.38970237970352173, "learning_rate": 1.0474285155579825e-05, "loss": 0.325050413608551, "step": 9273, "token_acc": 0.8852937598832259 }, { "epoch": 0.5004046835374737, "grad_norm": 0.3361870050430298, "learning_rate": 1.0472539552744094e-05, "loss": 0.36603009700775146, "step": 9274, "token_acc": 0.8744027303754266 }, { "epoch": 0.5004586413424702, "grad_norm": 0.4214963912963867, "learning_rate": 1.047079393547713e-05, "loss": 0.33651259541511536, "step": 9275, "token_acc": 0.8819391008024624 }, { "epoch": 0.5005125991474667, "grad_norm": 0.3012166917324066, "learning_rate": 1.0469048303832242e-05, "loss": 0.32785558700561523, "step": 9276, "token_acc": 0.8815697266993693 }, { "epoch": 0.5005665569524632, "grad_norm": 0.48172491788864136, "learning_rate": 1.046730265786274e-05, "loss": 0.3987252116203308, "step": 9277, "token_acc": 0.8652934444989374 }, { "epoch": 0.5006205147574596, "grad_norm": 0.453237920999527, "learning_rate": 1.0465556997621942e-05, "loss": 0.3757862448692322, "step": 9278, "token_acc": 0.8723805038850954 }, { "epoch": 0.5006744725624561, "grad_norm": 0.3910086154937744, "learning_rate": 1.0463811323163149e-05, "loss": 0.3790385127067566, "step": 9279, "token_acc": 0.8689180737961226 }, { "epoch": 0.5007284303674526, "grad_norm": 0.47019994258880615, "learning_rate": 1.0462065634539683e-05, "loss": 0.3558885157108307, "step": 9280, "token_acc": 0.8769282360831656 }, { "epoch": 0.5007823881724491, "grad_norm": 0.36055222153663635, "learning_rate": 1.0460319931804854e-05, "loss": 0.3261854946613312, "step": 9281, "token_acc": 0.8835136855506047 }, { "epoch": 0.5008363459774456, "grad_norm": 0.3437782824039459, "learning_rate": 1.0458574215011974e-05, "loss": 0.32968348264694214, "step": 9282, "token_acc": 0.8835671883432272 }, { "epoch": 0.5008903037824421, "grad_norm": 0.4419214427471161, "learning_rate": 1.0456828484214358e-05, "loss": 0.37582147121429443, "step": 9283, "token_acc": 0.8694335389792485 }, { "epoch": 0.5009442615874387, "grad_norm": 0.39669761061668396, "learning_rate": 1.0455082739465321e-05, "loss": 0.37521541118621826, "step": 9284, "token_acc": 0.8661662654015012 }, { "epoch": 0.5009982193924352, "grad_norm": 0.3985481262207031, "learning_rate": 1.0453336980818175e-05, "loss": 0.3704112768173218, "step": 9285, "token_acc": 0.8672581767571329 }, { "epoch": 0.5010521771974316, "grad_norm": 0.2773226499557495, "learning_rate": 1.0451591208326233e-05, "loss": 0.33016109466552734, "step": 9286, "token_acc": 0.8819927348209652 }, { "epoch": 0.5011061350024281, "grad_norm": 0.3293072283267975, "learning_rate": 1.0449845422042817e-05, "loss": 0.344390332698822, "step": 9287, "token_acc": 0.8752790593838369 }, { "epoch": 0.5011600928074246, "grad_norm": 0.5304578542709351, "learning_rate": 1.0448099622021242e-05, "loss": 0.3778868317604065, "step": 9288, "token_acc": 0.8714842948059947 }, { "epoch": 0.5012140506124211, "grad_norm": 0.42854052782058716, "learning_rate": 1.0446353808314817e-05, "loss": 0.43726277351379395, "step": 9289, "token_acc": 0.8528618519628409 }, { "epoch": 0.5012680084174176, "grad_norm": 0.3271608054637909, "learning_rate": 1.0444607980976867e-05, "loss": 0.305624783039093, "step": 9290, "token_acc": 0.8910468972051161 }, { "epoch": 0.5013219662224141, "grad_norm": 0.3435986042022705, "learning_rate": 1.0442862140060705e-05, "loss": 0.3639366626739502, "step": 9291, "token_acc": 0.8672222222222222 }, { "epoch": 0.5013759240274106, "grad_norm": 0.4234103858470917, "learning_rate": 1.0441116285619648e-05, "loss": 0.3906483054161072, "step": 9292, "token_acc": 0.8676233183856502 }, { "epoch": 0.5014298818324071, "grad_norm": 0.4097382426261902, "learning_rate": 1.0439370417707017e-05, "loss": 0.3696325123310089, "step": 9293, "token_acc": 0.8711635364177737 }, { "epoch": 0.5014838396374035, "grad_norm": 0.41688770055770874, "learning_rate": 1.043762453637613e-05, "loss": 0.34951096773147583, "step": 9294, "token_acc": 0.8726164426383245 }, { "epoch": 0.5015377974424, "grad_norm": 0.45903080701828003, "learning_rate": 1.04358786416803e-05, "loss": 0.3937174081802368, "step": 9295, "token_acc": 0.863963813034009 }, { "epoch": 0.5015917552473965, "grad_norm": 0.3313889801502228, "learning_rate": 1.0434132733672852e-05, "loss": 0.39631956815719604, "step": 9296, "token_acc": 0.8633561643835617 }, { "epoch": 0.501645713052393, "grad_norm": 0.3533974587917328, "learning_rate": 1.0432386812407105e-05, "loss": 0.3723316192626953, "step": 9297, "token_acc": 0.8700612131329994 }, { "epoch": 0.5016996708573895, "grad_norm": 0.3364550471305847, "learning_rate": 1.043064087793638e-05, "loss": 0.30899330973625183, "step": 9298, "token_acc": 0.8875909937362452 }, { "epoch": 0.501753628662386, "grad_norm": 0.2971934974193573, "learning_rate": 1.0428894930313994e-05, "loss": 0.4000491499900818, "step": 9299, "token_acc": 0.861351228389445 }, { "epoch": 0.5018075864673825, "grad_norm": 0.5156638622283936, "learning_rate": 1.0427148969593268e-05, "loss": 0.33215898275375366, "step": 9300, "token_acc": 0.882120253164557 }, { "epoch": 0.501861544272379, "grad_norm": 0.32034462690353394, "learning_rate": 1.0425402995827524e-05, "loss": 0.3311619162559509, "step": 9301, "token_acc": 0.8830319353597538 }, { "epoch": 0.5019155020773755, "grad_norm": 0.41680625081062317, "learning_rate": 1.0423657009070084e-05, "loss": 0.3464703857898712, "step": 9302, "token_acc": 0.8747146501947093 }, { "epoch": 0.501969459882372, "grad_norm": 0.41293275356292725, "learning_rate": 1.042191100937427e-05, "loss": 0.35818585753440857, "step": 9303, "token_acc": 0.8711572351674173 }, { "epoch": 0.5020234176873685, "grad_norm": 0.3661949038505554, "learning_rate": 1.0420164996793404e-05, "loss": 0.37638232111930847, "step": 9304, "token_acc": 0.8731662591687042 }, { "epoch": 0.502077375492365, "grad_norm": 0.43153485655784607, "learning_rate": 1.041841897138081e-05, "loss": 0.43439629673957825, "step": 9305, "token_acc": 0.84993564993565 }, { "epoch": 0.5021313332973615, "grad_norm": 0.36814555525779724, "learning_rate": 1.0416672933189811e-05, "loss": 0.3636547029018402, "step": 9306, "token_acc": 0.87087839620153 }, { "epoch": 0.502185291102358, "grad_norm": 0.3038780689239502, "learning_rate": 1.0414926882273728e-05, "loss": 0.3332509994506836, "step": 9307, "token_acc": 0.8820884699057288 }, { "epoch": 0.5022392489073545, "grad_norm": 0.39645180106163025, "learning_rate": 1.0413180818685888e-05, "loss": 0.39764025807380676, "step": 9308, "token_acc": 0.8625688952791757 }, { "epoch": 0.5022932067123509, "grad_norm": 0.3732631802558899, "learning_rate": 1.0411434742479613e-05, "loss": 0.3452056050300598, "step": 9309, "token_acc": 0.8803567292100996 }, { "epoch": 0.5023471645173474, "grad_norm": 0.4056815505027771, "learning_rate": 1.0409688653708231e-05, "loss": 0.42687827348709106, "step": 9310, "token_acc": 0.849609375 }, { "epoch": 0.5024011223223439, "grad_norm": 0.3130638897418976, "learning_rate": 1.0407942552425063e-05, "loss": 0.36850544810295105, "step": 9311, "token_acc": 0.8753813658765548 }, { "epoch": 0.5024550801273404, "grad_norm": 0.3968304693698883, "learning_rate": 1.0406196438683437e-05, "loss": 0.32310009002685547, "step": 9312, "token_acc": 0.8836588620647823 }, { "epoch": 0.5025090379323369, "grad_norm": 0.3877292573451996, "learning_rate": 1.0404450312536675e-05, "loss": 0.40087032318115234, "step": 9313, "token_acc": 0.8607932875667429 }, { "epoch": 0.5025629957373334, "grad_norm": 0.39693790674209595, "learning_rate": 1.0402704174038112e-05, "loss": 0.327412486076355, "step": 9314, "token_acc": 0.8849032258064516 }, { "epoch": 0.5026169535423299, "grad_norm": 0.41669178009033203, "learning_rate": 1.0400958023241064e-05, "loss": 0.3515869975090027, "step": 9315, "token_acc": 0.8757239781565448 }, { "epoch": 0.5026709113473264, "grad_norm": 0.3671600818634033, "learning_rate": 1.0399211860198865e-05, "loss": 0.37389838695526123, "step": 9316, "token_acc": 0.8673764610577479 }, { "epoch": 0.5027248691523228, "grad_norm": 0.3564675450325012, "learning_rate": 1.0397465684964842e-05, "loss": 0.36354178190231323, "step": 9317, "token_acc": 0.8758161399843302 }, { "epoch": 0.5027788269573193, "grad_norm": 0.34470993280410767, "learning_rate": 1.0395719497592318e-05, "loss": 0.3881549835205078, "step": 9318, "token_acc": 0.8605299860529986 }, { "epoch": 0.5028327847623159, "grad_norm": 0.3569655418395996, "learning_rate": 1.0393973298134627e-05, "loss": 0.39118319749832153, "step": 9319, "token_acc": 0.8669317428760769 }, { "epoch": 0.5028867425673124, "grad_norm": 0.357611745595932, "learning_rate": 1.0392227086645095e-05, "loss": 0.3740498423576355, "step": 9320, "token_acc": 0.8751112806816737 }, { "epoch": 0.5029407003723089, "grad_norm": 0.36268526315689087, "learning_rate": 1.0390480863177046e-05, "loss": 0.3346760869026184, "step": 9321, "token_acc": 0.8811021640664318 }, { "epoch": 0.5029946581773054, "grad_norm": 0.31428268551826477, "learning_rate": 1.038873462778382e-05, "loss": 0.34670281410217285, "step": 9322, "token_acc": 0.872921320426905 }, { "epoch": 0.5030486159823019, "grad_norm": 0.4776073098182678, "learning_rate": 1.0386988380518734e-05, "loss": 0.40019139647483826, "step": 9323, "token_acc": 0.8631776239907728 }, { "epoch": 0.5031025737872983, "grad_norm": 0.45079880952835083, "learning_rate": 1.038524212143513e-05, "loss": 0.37318599224090576, "step": 9324, "token_acc": 0.8670212765957447 }, { "epoch": 0.5031565315922948, "grad_norm": 0.4305064082145691, "learning_rate": 1.038349585058633e-05, "loss": 0.31222954392433167, "step": 9325, "token_acc": 0.8881313482677056 }, { "epoch": 0.5032104893972913, "grad_norm": 0.41978976130485535, "learning_rate": 1.0381749568025668e-05, "loss": 0.3136562705039978, "step": 9326, "token_acc": 0.888203449578245 }, { "epoch": 0.5032644472022878, "grad_norm": 0.47825494408607483, "learning_rate": 1.0380003273806472e-05, "loss": 0.3298224210739136, "step": 9327, "token_acc": 0.8815901360544217 }, { "epoch": 0.5033184050072843, "grad_norm": 0.37712663412094116, "learning_rate": 1.0378256967982074e-05, "loss": 0.39569568634033203, "step": 9328, "token_acc": 0.8642179116797358 }, { "epoch": 0.5033723628122808, "grad_norm": 0.24459019303321838, "learning_rate": 1.037651065060581e-05, "loss": 0.31674090027809143, "step": 9329, "token_acc": 0.8863152918931695 }, { "epoch": 0.5034263206172773, "grad_norm": 0.3248274028301239, "learning_rate": 1.0374764321731012e-05, "loss": 0.3866032361984253, "step": 9330, "token_acc": 0.8645859238995275 }, { "epoch": 0.5034802784222738, "grad_norm": 0.3711797893047333, "learning_rate": 1.0373017981411004e-05, "loss": 0.34877410531044006, "step": 9331, "token_acc": 0.875900805426028 }, { "epoch": 0.5035342362272702, "grad_norm": 0.36289462447166443, "learning_rate": 1.0371271629699129e-05, "loss": 0.40754881501197815, "step": 9332, "token_acc": 0.8581943081452404 }, { "epoch": 0.5035881940322667, "grad_norm": 0.4646957516670227, "learning_rate": 1.0369525266648714e-05, "loss": 0.35574501752853394, "step": 9333, "token_acc": 0.878935095219588 }, { "epoch": 0.5036421518372632, "grad_norm": 0.4263378083705902, "learning_rate": 1.0367778892313097e-05, "loss": 0.37549057602882385, "step": 9334, "token_acc": 0.8692685589519651 }, { "epoch": 0.5036961096422597, "grad_norm": 0.44558054208755493, "learning_rate": 1.0366032506745607e-05, "loss": 0.43471720814704895, "step": 9335, "token_acc": 0.8533639143730887 }, { "epoch": 0.5037500674472563, "grad_norm": 0.49978458881378174, "learning_rate": 1.036428610999958e-05, "loss": 0.39412546157836914, "step": 9336, "token_acc": 0.8617741360883505 }, { "epoch": 0.5038040252522528, "grad_norm": 0.25704267621040344, "learning_rate": 1.0362539702128347e-05, "loss": 0.3295917809009552, "step": 9337, "token_acc": 0.8820406577754758 }, { "epoch": 0.5038579830572493, "grad_norm": 0.4297104477882385, "learning_rate": 1.036079328318525e-05, "loss": 0.4079049825668335, "step": 9338, "token_acc": 0.8633771057169386 }, { "epoch": 0.5039119408622458, "grad_norm": 0.37994226813316345, "learning_rate": 1.0359046853223622e-05, "loss": 0.3300018310546875, "step": 9339, "token_acc": 0.8805250468791856 }, { "epoch": 0.5039658986672422, "grad_norm": 0.28181320428848267, "learning_rate": 1.0357300412296795e-05, "loss": 0.3145790696144104, "step": 9340, "token_acc": 0.8878150711767011 }, { "epoch": 0.5040198564722387, "grad_norm": 0.3619556427001953, "learning_rate": 1.0355553960458109e-05, "loss": 0.38787806034088135, "step": 9341, "token_acc": 0.871773220747889 }, { "epoch": 0.5040738142772352, "grad_norm": 0.3675873875617981, "learning_rate": 1.0353807497760897e-05, "loss": 0.3506840467453003, "step": 9342, "token_acc": 0.8785079108442083 }, { "epoch": 0.5041277720822317, "grad_norm": 0.5179433226585388, "learning_rate": 1.0352061024258497e-05, "loss": 0.36530882120132446, "step": 9343, "token_acc": 0.8771262757654593 }, { "epoch": 0.5041817298872282, "grad_norm": 0.28474295139312744, "learning_rate": 1.0350314540004244e-05, "loss": 0.42858633399009705, "step": 9344, "token_acc": 0.8519984676286554 }, { "epoch": 0.5042356876922247, "grad_norm": 0.5131003260612488, "learning_rate": 1.0348568045051479e-05, "loss": 0.35084429383277893, "step": 9345, "token_acc": 0.8769324654190399 }, { "epoch": 0.5042896454972212, "grad_norm": 0.35751160979270935, "learning_rate": 1.0346821539453535e-05, "loss": 0.3322914242744446, "step": 9346, "token_acc": 0.882998382998383 }, { "epoch": 0.5043436033022176, "grad_norm": 0.38985002040863037, "learning_rate": 1.0345075023263752e-05, "loss": 0.37699756026268005, "step": 9347, "token_acc": 0.870376771517456 }, { "epoch": 0.5043975611072141, "grad_norm": 0.39020398259162903, "learning_rate": 1.034332849653547e-05, "loss": 0.33043748140335083, "step": 9348, "token_acc": 0.885547201336675 }, { "epoch": 0.5044515189122106, "grad_norm": 0.3179048001766205, "learning_rate": 1.0341581959322024e-05, "loss": 0.3151465654373169, "step": 9349, "token_acc": 0.8908159243200631 }, { "epoch": 0.5045054767172071, "grad_norm": 0.37039265036582947, "learning_rate": 1.0339835411676757e-05, "loss": 0.3875539004802704, "step": 9350, "token_acc": 0.85917799246766 }, { "epoch": 0.5045594345222036, "grad_norm": 0.5064960718154907, "learning_rate": 1.0338088853653003e-05, "loss": 0.4080876410007477, "step": 9351, "token_acc": 0.8589364844903988 }, { "epoch": 0.5046133923272001, "grad_norm": 0.40363171696662903, "learning_rate": 1.0336342285304106e-05, "loss": 0.3179866373538971, "step": 9352, "token_acc": 0.8923144360023378 }, { "epoch": 0.5046673501321967, "grad_norm": 0.3550790846347809, "learning_rate": 1.0334595706683402e-05, "loss": 0.3357298970222473, "step": 9353, "token_acc": 0.8789638932496076 }, { "epoch": 0.5047213079371932, "grad_norm": 0.36157119274139404, "learning_rate": 1.0332849117844232e-05, "loss": 0.3641209602355957, "step": 9354, "token_acc": 0.8720682302771855 }, { "epoch": 0.5047752657421896, "grad_norm": 0.4557740092277527, "learning_rate": 1.0331102518839939e-05, "loss": 0.3381989598274231, "step": 9355, "token_acc": 0.8771006620268206 }, { "epoch": 0.5048292235471861, "grad_norm": 0.4267757534980774, "learning_rate": 1.0329355909723861e-05, "loss": 0.37352025508880615, "step": 9356, "token_acc": 0.8620516258799866 }, { "epoch": 0.5048831813521826, "grad_norm": 0.4405554533004761, "learning_rate": 1.032760929054934e-05, "loss": 0.3508830666542053, "step": 9357, "token_acc": 0.8790637191157347 }, { "epoch": 0.5049371391571791, "grad_norm": 0.45726609230041504, "learning_rate": 1.0325862661369719e-05, "loss": 0.3777647614479065, "step": 9358, "token_acc": 0.867954485313575 }, { "epoch": 0.5049910969621756, "grad_norm": 0.3958742916584015, "learning_rate": 1.0324116022238337e-05, "loss": 0.3591271936893463, "step": 9359, "token_acc": 0.869752113483307 }, { "epoch": 0.5050450547671721, "grad_norm": 0.42007678747177124, "learning_rate": 1.0322369373208534e-05, "loss": 0.3445296585559845, "step": 9360, "token_acc": 0.8787320309620347 }, { "epoch": 0.5050990125721686, "grad_norm": 0.48998889327049255, "learning_rate": 1.0320622714333656e-05, "loss": 0.353434681892395, "step": 9361, "token_acc": 0.8761020151133502 }, { "epoch": 0.5051529703771651, "grad_norm": 0.45845431089401245, "learning_rate": 1.0318876045667046e-05, "loss": 0.3635447323322296, "step": 9362, "token_acc": 0.8811898512685914 }, { "epoch": 0.5052069281821615, "grad_norm": 0.38757646083831787, "learning_rate": 1.0317129367262042e-05, "loss": 0.3895437717437744, "step": 9363, "token_acc": 0.8639091025150006 }, { "epoch": 0.505260885987158, "grad_norm": 0.35839784145355225, "learning_rate": 1.0315382679171992e-05, "loss": 0.3566477298736572, "step": 9364, "token_acc": 0.8771929824561403 }, { "epoch": 0.5053148437921545, "grad_norm": 0.4438108503818512, "learning_rate": 1.0313635981450238e-05, "loss": 0.3760124444961548, "step": 9365, "token_acc": 0.8660461351679482 }, { "epoch": 0.505368801597151, "grad_norm": 0.37478452920913696, "learning_rate": 1.0311889274150122e-05, "loss": 0.35309478640556335, "step": 9366, "token_acc": 0.8742053293656161 }, { "epoch": 0.5054227594021475, "grad_norm": 0.5273544192314148, "learning_rate": 1.0310142557324992e-05, "loss": 0.32442814111709595, "step": 9367, "token_acc": 0.8800827015851137 }, { "epoch": 0.505476717207144, "grad_norm": 0.38279539346694946, "learning_rate": 1.0308395831028186e-05, "loss": 0.3626355826854706, "step": 9368, "token_acc": 0.8698698698698699 }, { "epoch": 0.5055306750121406, "grad_norm": 0.43651869893074036, "learning_rate": 1.0306649095313055e-05, "loss": 0.37853413820266724, "step": 9369, "token_acc": 0.8693770289070953 }, { "epoch": 0.505584632817137, "grad_norm": 0.3173031508922577, "learning_rate": 1.0304902350232937e-05, "loss": 0.3364614248275757, "step": 9370, "token_acc": 0.8825081678813772 }, { "epoch": 0.5056385906221335, "grad_norm": 0.3874518573284149, "learning_rate": 1.0303155595841185e-05, "loss": 0.3501623272895813, "step": 9371, "token_acc": 0.8807622090369694 }, { "epoch": 0.50569254842713, "grad_norm": 0.47028619050979614, "learning_rate": 1.030140883219114e-05, "loss": 0.31403008103370667, "step": 9372, "token_acc": 0.8836267605633803 }, { "epoch": 0.5057465062321265, "grad_norm": 0.3601909875869751, "learning_rate": 1.0299662059336147e-05, "loss": 0.3184441328048706, "step": 9373, "token_acc": 0.8840921710161022 }, { "epoch": 0.505800464037123, "grad_norm": 0.371733158826828, "learning_rate": 1.0297915277329554e-05, "loss": 0.3477112352848053, "step": 9374, "token_acc": 0.8763411529368976 }, { "epoch": 0.5058544218421195, "grad_norm": 0.4044668972492218, "learning_rate": 1.0296168486224707e-05, "loss": 0.39038535952568054, "step": 9375, "token_acc": 0.8630410022779044 }, { "epoch": 0.505908379647116, "grad_norm": 0.3571518063545227, "learning_rate": 1.0294421686074949e-05, "loss": 0.36174601316452026, "step": 9376, "token_acc": 0.8773068050749712 }, { "epoch": 0.5059623374521125, "grad_norm": 0.29038935899734497, "learning_rate": 1.0292674876933633e-05, "loss": 0.3582345247268677, "step": 9377, "token_acc": 0.8684044033911174 }, { "epoch": 0.5060162952571089, "grad_norm": 0.4023993909358978, "learning_rate": 1.0290928058854102e-05, "loss": 0.3676757216453552, "step": 9378, "token_acc": 0.8712824173209612 }, { "epoch": 0.5060702530621054, "grad_norm": 0.3826204538345337, "learning_rate": 1.0289181231889702e-05, "loss": 0.34817588329315186, "step": 9379, "token_acc": 0.8789007918025151 }, { "epoch": 0.5061242108671019, "grad_norm": 0.3142373263835907, "learning_rate": 1.0287434396093788e-05, "loss": 0.3527596592903137, "step": 9380, "token_acc": 0.8764696408439362 }, { "epoch": 0.5061781686720984, "grad_norm": 0.3923608660697937, "learning_rate": 1.0285687551519698e-05, "loss": 0.3575526475906372, "step": 9381, "token_acc": 0.876045074518357 }, { "epoch": 0.5062321264770949, "grad_norm": 0.4640728235244751, "learning_rate": 1.028394069822079e-05, "loss": 0.3369642198085785, "step": 9382, "token_acc": 0.879139351151454 }, { "epoch": 0.5062860842820914, "grad_norm": 0.38582780957221985, "learning_rate": 1.0282193836250403e-05, "loss": 0.3236130475997925, "step": 9383, "token_acc": 0.8771602846492714 }, { "epoch": 0.5063400420870879, "grad_norm": 0.33412429690361023, "learning_rate": 1.0280446965661892e-05, "loss": 0.3774257302284241, "step": 9384, "token_acc": 0.8731439638476436 }, { "epoch": 0.5063939998920844, "grad_norm": 0.4052795171737671, "learning_rate": 1.0278700086508604e-05, "loss": 0.36828479170799255, "step": 9385, "token_acc": 0.8746333375844918 }, { "epoch": 0.5064479576970808, "grad_norm": 0.43790698051452637, "learning_rate": 1.0276953198843886e-05, "loss": 0.36657196283340454, "step": 9386, "token_acc": 0.8691003513059417 }, { "epoch": 0.5065019155020773, "grad_norm": 0.3598085343837738, "learning_rate": 1.0275206302721092e-05, "loss": 0.34625935554504395, "step": 9387, "token_acc": 0.8797493145319232 }, { "epoch": 0.5065558733070739, "grad_norm": 0.38140150904655457, "learning_rate": 1.027345939819357e-05, "loss": 0.3972942531108856, "step": 9388, "token_acc": 0.8678424456202234 }, { "epoch": 0.5066098311120704, "grad_norm": 0.4096618890762329, "learning_rate": 1.0271712485314669e-05, "loss": 0.34191492199897766, "step": 9389, "token_acc": 0.8748798769940419 }, { "epoch": 0.5066637889170669, "grad_norm": 0.30239078402519226, "learning_rate": 1.026996556413774e-05, "loss": 0.3310837149620056, "step": 9390, "token_acc": 0.8819231675701155 }, { "epoch": 0.5067177467220634, "grad_norm": 0.33147260546684265, "learning_rate": 1.0268218634716133e-05, "loss": 0.38773030042648315, "step": 9391, "token_acc": 0.8632102272727272 }, { "epoch": 0.5067717045270599, "grad_norm": 0.4258992373943329, "learning_rate": 1.0266471697103203e-05, "loss": 0.3770292401313782, "step": 9392, "token_acc": 0.8644187358916479 }, { "epoch": 0.5068256623320563, "grad_norm": 0.36801302433013916, "learning_rate": 1.0264724751352293e-05, "loss": 0.3261025846004486, "step": 9393, "token_acc": 0.8813947226709747 }, { "epoch": 0.5068796201370528, "grad_norm": 0.3749217689037323, "learning_rate": 1.026297779751676e-05, "loss": 0.35291191935539246, "step": 9394, "token_acc": 0.8775682521812552 }, { "epoch": 0.5069335779420493, "grad_norm": 0.40480780601501465, "learning_rate": 1.026123083564995e-05, "loss": 0.34745579957962036, "step": 9395, "token_acc": 0.8811448900388098 }, { "epoch": 0.5069875357470458, "grad_norm": 0.45711979269981384, "learning_rate": 1.0259483865805224e-05, "loss": 0.3697672486305237, "step": 9396, "token_acc": 0.8729650386976248 }, { "epoch": 0.5070414935520423, "grad_norm": 0.36729326844215393, "learning_rate": 1.0257736888035927e-05, "loss": 0.3722214698791504, "step": 9397, "token_acc": 0.87085346215781 }, { "epoch": 0.5070954513570388, "grad_norm": 0.362271249294281, "learning_rate": 1.0255989902395413e-05, "loss": 0.41783758997917175, "step": 9398, "token_acc": 0.8631112148442058 }, { "epoch": 0.5071494091620353, "grad_norm": 0.3835180997848511, "learning_rate": 1.0254242908937033e-05, "loss": 0.31868037581443787, "step": 9399, "token_acc": 0.8830902735928648 }, { "epoch": 0.5072033669670318, "grad_norm": 0.33868664503097534, "learning_rate": 1.0252495907714144e-05, "loss": 0.32134002447128296, "step": 9400, "token_acc": 0.8907401946328517 }, { "epoch": 0.5072573247720282, "grad_norm": 0.31763800978660583, "learning_rate": 1.0250748898780094e-05, "loss": 0.3759145736694336, "step": 9401, "token_acc": 0.8701607012417824 }, { "epoch": 0.5073112825770247, "grad_norm": 0.4713592827320099, "learning_rate": 1.0249001882188237e-05, "loss": 0.39096498489379883, "step": 9402, "token_acc": 0.8656114678214126 }, { "epoch": 0.5073652403820212, "grad_norm": 0.3778354525566101, "learning_rate": 1.024725485799193e-05, "loss": 0.35901790857315063, "step": 9403, "token_acc": 0.8687789799072643 }, { "epoch": 0.5074191981870178, "grad_norm": 0.31670916080474854, "learning_rate": 1.0245507826244525e-05, "loss": 0.4007994532585144, "step": 9404, "token_acc": 0.8633720930232558 }, { "epoch": 0.5074731559920143, "grad_norm": 0.4098363220691681, "learning_rate": 1.0243760786999373e-05, "loss": 0.33736708760261536, "step": 9405, "token_acc": 0.8790607129821146 }, { "epoch": 0.5075271137970108, "grad_norm": 0.3674323260784149, "learning_rate": 1.024201374030983e-05, "loss": 0.387752503156662, "step": 9406, "token_acc": 0.8701378884544145 }, { "epoch": 0.5075810716020073, "grad_norm": 0.48002195358276367, "learning_rate": 1.0240266686229254e-05, "loss": 0.3858892321586609, "step": 9407, "token_acc": 0.8680719339622641 }, { "epoch": 0.5076350294070037, "grad_norm": 0.33766254782676697, "learning_rate": 1.0238519624810995e-05, "loss": 0.333686500787735, "step": 9408, "token_acc": 0.8790104367993815 }, { "epoch": 0.5076889872120002, "grad_norm": 0.36808064579963684, "learning_rate": 1.023677255610841e-05, "loss": 0.3519018888473511, "step": 9409, "token_acc": 0.8767285657842749 }, { "epoch": 0.5077429450169967, "grad_norm": 0.3769279718399048, "learning_rate": 1.023502548017485e-05, "loss": 0.3550233244895935, "step": 9410, "token_acc": 0.8750296700688346 }, { "epoch": 0.5077969028219932, "grad_norm": 0.4551638066768646, "learning_rate": 1.0233278397063676e-05, "loss": 0.45782050490379333, "step": 9411, "token_acc": 0.852859703438162 }, { "epoch": 0.5078508606269897, "grad_norm": 0.3402767479419708, "learning_rate": 1.0231531306828239e-05, "loss": 0.3498990535736084, "step": 9412, "token_acc": 0.8768264172998247 }, { "epoch": 0.5079048184319862, "grad_norm": 0.36239394545555115, "learning_rate": 1.0229784209521898e-05, "loss": 0.2791284918785095, "step": 9413, "token_acc": 0.8938495739162653 }, { "epoch": 0.5079587762369827, "grad_norm": 0.4798787236213684, "learning_rate": 1.0228037105198008e-05, "loss": 0.37946730852127075, "step": 9414, "token_acc": 0.8750402576489533 }, { "epoch": 0.5080127340419792, "grad_norm": 0.4536610543727875, "learning_rate": 1.0226289993909921e-05, "loss": 0.37765318155288696, "step": 9415, "token_acc": 0.8682269958319975 }, { "epoch": 0.5080666918469756, "grad_norm": 0.3767011761665344, "learning_rate": 1.0224542875711003e-05, "loss": 0.3575681447982788, "step": 9416, "token_acc": 0.8738781294284365 }, { "epoch": 0.5081206496519721, "grad_norm": 0.47038108110427856, "learning_rate": 1.0222795750654602e-05, "loss": 0.38169610500335693, "step": 9417, "token_acc": 0.8690543869622892 }, { "epoch": 0.5081746074569686, "grad_norm": 0.33583059906959534, "learning_rate": 1.0221048618794074e-05, "loss": 0.361167311668396, "step": 9418, "token_acc": 0.8741496598639455 }, { "epoch": 0.5082285652619651, "grad_norm": 0.36348956823349, "learning_rate": 1.0219301480182781e-05, "loss": 0.3822445869445801, "step": 9419, "token_acc": 0.8691099476439791 }, { "epoch": 0.5082825230669616, "grad_norm": 0.4761369228363037, "learning_rate": 1.021755433487408e-05, "loss": 0.409687876701355, "step": 9420, "token_acc": 0.8604900254384791 }, { "epoch": 0.5083364808719582, "grad_norm": 0.36593636870384216, "learning_rate": 1.0215807182921322e-05, "loss": 0.3859705924987793, "step": 9421, "token_acc": 0.8684355997788834 }, { "epoch": 0.5083904386769547, "grad_norm": 0.41248050332069397, "learning_rate": 1.0214060024377874e-05, "loss": 0.36065930128097534, "step": 9422, "token_acc": 0.8734835355285961 }, { "epoch": 0.5084443964819512, "grad_norm": 0.4207994043827057, "learning_rate": 1.0212312859297086e-05, "loss": 0.3408306837081909, "step": 9423, "token_acc": 0.8813760379596679 }, { "epoch": 0.5084983542869476, "grad_norm": 0.39641082286834717, "learning_rate": 1.021056568773232e-05, "loss": 0.35520628094673157, "step": 9424, "token_acc": 0.8807736353225601 }, { "epoch": 0.5085523120919441, "grad_norm": 0.44926902651786804, "learning_rate": 1.0208818509736935e-05, "loss": 0.3880799114704132, "step": 9425, "token_acc": 0.8653545359749739 }, { "epoch": 0.5086062698969406, "grad_norm": 0.34851473569869995, "learning_rate": 1.0207071325364286e-05, "loss": 0.38533103466033936, "step": 9426, "token_acc": 0.8617640715980159 }, { "epoch": 0.5086602277019371, "grad_norm": 0.4681624174118042, "learning_rate": 1.0205324134667731e-05, "loss": 0.34906619787216187, "step": 9427, "token_acc": 0.8758915834522111 }, { "epoch": 0.5087141855069336, "grad_norm": 0.40726786851882935, "learning_rate": 1.0203576937700632e-05, "loss": 0.37613990902900696, "step": 9428, "token_acc": 0.8686464877213022 }, { "epoch": 0.5087681433119301, "grad_norm": 0.230645090341568, "learning_rate": 1.0201829734516349e-05, "loss": 0.3407846987247467, "step": 9429, "token_acc": 0.8813605442176871 }, { "epoch": 0.5088221011169266, "grad_norm": 0.4176986813545227, "learning_rate": 1.0200082525168235e-05, "loss": 0.31969672441482544, "step": 9430, "token_acc": 0.8849403245272898 }, { "epoch": 0.508876058921923, "grad_norm": 0.3343740999698639, "learning_rate": 1.0198335309709655e-05, "loss": 0.37660086154937744, "step": 9431, "token_acc": 0.8660968660968661 }, { "epoch": 0.5089300167269195, "grad_norm": 0.39232566952705383, "learning_rate": 1.0196588088193965e-05, "loss": 0.3542545437812805, "step": 9432, "token_acc": 0.8704181724315952 }, { "epoch": 0.508983974531916, "grad_norm": 0.4914688766002655, "learning_rate": 1.0194840860674529e-05, "loss": 0.3783263564109802, "step": 9433, "token_acc": 0.8705164458996875 }, { "epoch": 0.5090379323369125, "grad_norm": 0.440613716840744, "learning_rate": 1.0193093627204704e-05, "loss": 0.351571649312973, "step": 9434, "token_acc": 0.867182320441989 }, { "epoch": 0.509091890141909, "grad_norm": 0.41681602597236633, "learning_rate": 1.0191346387837848e-05, "loss": 0.40250375866889954, "step": 9435, "token_acc": 0.8605065546646302 }, { "epoch": 0.5091458479469055, "grad_norm": 0.4105214774608612, "learning_rate": 1.0189599142627326e-05, "loss": 0.4117053151130676, "step": 9436, "token_acc": 0.85625 }, { "epoch": 0.509199805751902, "grad_norm": 0.3789310157299042, "learning_rate": 1.0187851891626493e-05, "loss": 0.33017629384994507, "step": 9437, "token_acc": 0.8877818563188253 }, { "epoch": 0.5092537635568986, "grad_norm": 0.4834668040275574, "learning_rate": 1.0186104634888714e-05, "loss": 0.40754789113998413, "step": 9438, "token_acc": 0.8612365675128485 }, { "epoch": 0.509307721361895, "grad_norm": 0.3977799117565155, "learning_rate": 1.0184357372467349e-05, "loss": 0.3806777596473694, "step": 9439, "token_acc": 0.8690044766574291 }, { "epoch": 0.5093616791668915, "grad_norm": 0.3844354748725891, "learning_rate": 1.0182610104415756e-05, "loss": 0.40150341391563416, "step": 9440, "token_acc": 0.8661429218915244 }, { "epoch": 0.509415636971888, "grad_norm": 0.35501208901405334, "learning_rate": 1.01808628307873e-05, "loss": 0.38215988874435425, "step": 9441, "token_acc": 0.868260960771974 }, { "epoch": 0.5094695947768845, "grad_norm": 0.4398363530635834, "learning_rate": 1.0179115551635341e-05, "loss": 0.3604311943054199, "step": 9442, "token_acc": 0.8741579475419234 }, { "epoch": 0.509523552581881, "grad_norm": 0.396392822265625, "learning_rate": 1.017736826701324e-05, "loss": 0.3567281663417816, "step": 9443, "token_acc": 0.8778929730861871 }, { "epoch": 0.5095775103868775, "grad_norm": 0.36897939443588257, "learning_rate": 1.0175620976974353e-05, "loss": 0.3465591371059418, "step": 9444, "token_acc": 0.8769446172993155 }, { "epoch": 0.509631468191874, "grad_norm": 0.3986753225326538, "learning_rate": 1.0173873681572053e-05, "loss": 0.37436455488204956, "step": 9445, "token_acc": 0.8673728181411309 }, { "epoch": 0.5096854259968705, "grad_norm": 0.37892165780067444, "learning_rate": 1.0172126380859698e-05, "loss": 0.3445662260055542, "step": 9446, "token_acc": 0.8817987689988694 }, { "epoch": 0.5097393838018669, "grad_norm": 0.4991154670715332, "learning_rate": 1.0170379074890641e-05, "loss": 0.3429405689239502, "step": 9447, "token_acc": 0.8812319210481538 }, { "epoch": 0.5097933416068634, "grad_norm": 0.2878975570201874, "learning_rate": 1.0168631763718257e-05, "loss": 0.36075031757354736, "step": 9448, "token_acc": 0.8756590509666081 }, { "epoch": 0.5098472994118599, "grad_norm": 0.3762751519680023, "learning_rate": 1.0166884447395903e-05, "loss": 0.374031662940979, "step": 9449, "token_acc": 0.8744968898646176 }, { "epoch": 0.5099012572168564, "grad_norm": 0.36894670128822327, "learning_rate": 1.0165137125976943e-05, "loss": 0.3173227310180664, "step": 9450, "token_acc": 0.8865656565656566 }, { "epoch": 0.5099552150218529, "grad_norm": 0.42705926299095154, "learning_rate": 1.0163389799514736e-05, "loss": 0.3172866106033325, "step": 9451, "token_acc": 0.8828912962271199 }, { "epoch": 0.5100091728268494, "grad_norm": 0.4048711061477661, "learning_rate": 1.0161642468062649e-05, "loss": 0.35014647245407104, "step": 9452, "token_acc": 0.880814385495616 }, { "epoch": 0.5100631306318459, "grad_norm": 0.397763192653656, "learning_rate": 1.015989513167404e-05, "loss": 0.30530357360839844, "step": 9453, "token_acc": 0.8880597014925373 }, { "epoch": 0.5101170884368423, "grad_norm": 0.3881213963031769, "learning_rate": 1.0158147790402278e-05, "loss": 0.33545053005218506, "step": 9454, "token_acc": 0.8783841856467555 }, { "epoch": 0.5101710462418388, "grad_norm": 0.4079597592353821, "learning_rate": 1.0156400444300724e-05, "loss": 0.3831174969673157, "step": 9455, "token_acc": 0.8651019147621989 }, { "epoch": 0.5102250040468354, "grad_norm": 0.31897565722465515, "learning_rate": 1.015465309342274e-05, "loss": 0.28921079635620117, "step": 9456, "token_acc": 0.8942667819072313 }, { "epoch": 0.5102789618518319, "grad_norm": 0.4009583592414856, "learning_rate": 1.0152905737821694e-05, "loss": 0.37408560514450073, "step": 9457, "token_acc": 0.8654382161659957 }, { "epoch": 0.5103329196568284, "grad_norm": 0.3661171495914459, "learning_rate": 1.0151158377550945e-05, "loss": 0.30626749992370605, "step": 9458, "token_acc": 0.8857486470234516 }, { "epoch": 0.5103868774618249, "grad_norm": 0.33543798327445984, "learning_rate": 1.0149411012663859e-05, "loss": 0.2703782320022583, "step": 9459, "token_acc": 0.896339759709416 }, { "epoch": 0.5104408352668214, "grad_norm": 0.32397207617759705, "learning_rate": 1.01476636432138e-05, "loss": 0.3505253195762634, "step": 9460, "token_acc": 0.8776589882067674 }, { "epoch": 0.5104947930718179, "grad_norm": 0.3523974120616913, "learning_rate": 1.014591626925413e-05, "loss": 0.3855665624141693, "step": 9461, "token_acc": 0.8671681780708986 }, { "epoch": 0.5105487508768143, "grad_norm": 0.42083480954170227, "learning_rate": 1.0144168890838218e-05, "loss": 0.4173023998737335, "step": 9462, "token_acc": 0.8555508534348991 }, { "epoch": 0.5106027086818108, "grad_norm": 0.4142151176929474, "learning_rate": 1.0142421508019422e-05, "loss": 0.35252153873443604, "step": 9463, "token_acc": 0.8692084942084942 }, { "epoch": 0.5106566664868073, "grad_norm": 0.35330531001091003, "learning_rate": 1.0140674120851113e-05, "loss": 0.30673637986183167, "step": 9464, "token_acc": 0.8906556054845028 }, { "epoch": 0.5107106242918038, "grad_norm": 0.2965170741081238, "learning_rate": 1.0138926729386654e-05, "loss": 0.3372463583946228, "step": 9465, "token_acc": 0.8816046966731899 }, { "epoch": 0.5107645820968003, "grad_norm": 0.43173491954803467, "learning_rate": 1.013717933367941e-05, "loss": 0.35329270362854004, "step": 9466, "token_acc": 0.8804984236601111 }, { "epoch": 0.5108185399017968, "grad_norm": 0.36992931365966797, "learning_rate": 1.0135431933782742e-05, "loss": 0.37978270649909973, "step": 9467, "token_acc": 0.8662404412686474 }, { "epoch": 0.5108724977067933, "grad_norm": 0.3714071214199066, "learning_rate": 1.0133684529750022e-05, "loss": 0.3191920518875122, "step": 9468, "token_acc": 0.8883919062832801 }, { "epoch": 0.5109264555117898, "grad_norm": 0.3919534683227539, "learning_rate": 1.0131937121634607e-05, "loss": 0.31645581126213074, "step": 9469, "token_acc": 0.8850154303348954 }, { "epoch": 0.5109804133167862, "grad_norm": 0.29297009110450745, "learning_rate": 1.0130189709489869e-05, "loss": 0.34970298409461975, "step": 9470, "token_acc": 0.8750548005260851 }, { "epoch": 0.5110343711217827, "grad_norm": 0.16744089126586914, "learning_rate": 1.0128442293369171e-05, "loss": 0.29001325368881226, "step": 9471, "token_acc": 0.895709127550592 }, { "epoch": 0.5110883289267792, "grad_norm": 0.3094228208065033, "learning_rate": 1.0126694873325883e-05, "loss": 0.39724835753440857, "step": 9472, "token_acc": 0.8640642650935108 }, { "epoch": 0.5111422867317758, "grad_norm": 0.381340891122818, "learning_rate": 1.0124947449413358e-05, "loss": 0.3586011528968811, "step": 9473, "token_acc": 0.8757069577798238 }, { "epoch": 0.5111962445367723, "grad_norm": 0.40006160736083984, "learning_rate": 1.0123200021684978e-05, "loss": 0.3612597584724426, "step": 9474, "token_acc": 0.871479522813922 }, { "epoch": 0.5112502023417688, "grad_norm": 0.37142491340637207, "learning_rate": 1.01214525901941e-05, "loss": 0.34353798627853394, "step": 9475, "token_acc": 0.8762040107374073 }, { "epoch": 0.5113041601467653, "grad_norm": 0.3241707384586334, "learning_rate": 1.0119705154994091e-05, "loss": 0.33874064683914185, "step": 9476, "token_acc": 0.8801778136997374 }, { "epoch": 0.5113581179517617, "grad_norm": 0.38719040155410767, "learning_rate": 1.0117957716138318e-05, "loss": 0.35123762488365173, "step": 9477, "token_acc": 0.8750306899091579 }, { "epoch": 0.5114120757567582, "grad_norm": 0.35845327377319336, "learning_rate": 1.0116210273680149e-05, "loss": 0.31142812967300415, "step": 9478, "token_acc": 0.8883038736367055 }, { "epoch": 0.5114660335617547, "grad_norm": 0.3531959354877472, "learning_rate": 1.0114462827672944e-05, "loss": 0.3917074203491211, "step": 9479, "token_acc": 0.8647331221057762 }, { "epoch": 0.5115199913667512, "grad_norm": 0.285710871219635, "learning_rate": 1.0112715378170079e-05, "loss": 0.3436095416545868, "step": 9480, "token_acc": 0.879109818832476 }, { "epoch": 0.5115739491717477, "grad_norm": 0.4011315107345581, "learning_rate": 1.0110967925224915e-05, "loss": 0.34149691462516785, "step": 9481, "token_acc": 0.8833629366489046 }, { "epoch": 0.5116279069767442, "grad_norm": 0.39802059531211853, "learning_rate": 1.0109220468890818e-05, "loss": 0.38259732723236084, "step": 9482, "token_acc": 0.8684536082474227 }, { "epoch": 0.5116818647817407, "grad_norm": 0.48856136202812195, "learning_rate": 1.0107473009221159e-05, "loss": 0.31607797741889954, "step": 9483, "token_acc": 0.8834151128557409 }, { "epoch": 0.5117358225867372, "grad_norm": 0.3726690411567688, "learning_rate": 1.01057255462693e-05, "loss": 0.3169424533843994, "step": 9484, "token_acc": 0.8898388687931601 }, { "epoch": 0.5117897803917336, "grad_norm": 0.5710633993148804, "learning_rate": 1.010397808008861e-05, "loss": 0.3697682023048401, "step": 9485, "token_acc": 0.8700875585420484 }, { "epoch": 0.5118437381967301, "grad_norm": 0.473915159702301, "learning_rate": 1.0102230610732458e-05, "loss": 0.37978595495224, "step": 9486, "token_acc": 0.8653109342764516 }, { "epoch": 0.5118976960017266, "grad_norm": 0.375961035490036, "learning_rate": 1.010048313825421e-05, "loss": 0.41199439764022827, "step": 9487, "token_acc": 0.8595238095238096 }, { "epoch": 0.5119516538067231, "grad_norm": 0.26861411333084106, "learning_rate": 1.0098735662707232e-05, "loss": 0.2943153381347656, "step": 9488, "token_acc": 0.8964367576578454 }, { "epoch": 0.5120056116117196, "grad_norm": 0.31806454062461853, "learning_rate": 1.0096988184144894e-05, "loss": 0.30483782291412354, "step": 9489, "token_acc": 0.889839970227019 }, { "epoch": 0.5120595694167162, "grad_norm": 0.34162530303001404, "learning_rate": 1.0095240702620561e-05, "loss": 0.37806838750839233, "step": 9490, "token_acc": 0.8667603325137572 }, { "epoch": 0.5121135272217127, "grad_norm": 0.4385417401790619, "learning_rate": 1.0093493218187605e-05, "loss": 0.34511420130729675, "step": 9491, "token_acc": 0.8777614138438881 }, { "epoch": 0.5121674850267092, "grad_norm": 0.4086315333843231, "learning_rate": 1.0091745730899389e-05, "loss": 0.34402137994766235, "step": 9492, "token_acc": 0.8766797478891664 }, { "epoch": 0.5122214428317056, "grad_norm": 0.38417449593544006, "learning_rate": 1.0089998240809283e-05, "loss": 0.3244031071662903, "step": 9493, "token_acc": 0.8814638027048528 }, { "epoch": 0.5122754006367021, "grad_norm": 0.3975718319416046, "learning_rate": 1.0088250747970653e-05, "loss": 0.37036609649658203, "step": 9494, "token_acc": 0.8711340206185567 }, { "epoch": 0.5123293584416986, "grad_norm": 0.4687989056110382, "learning_rate": 1.0086503252436866e-05, "loss": 0.4127587676048279, "step": 9495, "token_acc": 0.8559811122770199 }, { "epoch": 0.5123833162466951, "grad_norm": 0.44642969965934753, "learning_rate": 1.0084755754261295e-05, "loss": 0.3103598952293396, "step": 9496, "token_acc": 0.8904059712361186 }, { "epoch": 0.5124372740516916, "grad_norm": 0.40348994731903076, "learning_rate": 1.0083008253497307e-05, "loss": 0.31722092628479004, "step": 9497, "token_acc": 0.8866380989481886 }, { "epoch": 0.5124912318566881, "grad_norm": 0.26356279850006104, "learning_rate": 1.0081260750198265e-05, "loss": 0.34095299243927, "step": 9498, "token_acc": 0.8767106697347705 }, { "epoch": 0.5125451896616846, "grad_norm": 0.30535534024238586, "learning_rate": 1.0079513244417545e-05, "loss": 0.3697197437286377, "step": 9499, "token_acc": 0.8744470465781942 }, { "epoch": 0.512599147466681, "grad_norm": 0.44950053095817566, "learning_rate": 1.0077765736208511e-05, "loss": 0.38163048028945923, "step": 9500, "token_acc": 0.8681667387855185 }, { "epoch": 0.5126531052716775, "grad_norm": 0.4656934440135956, "learning_rate": 1.0076018225624531e-05, "loss": 0.3438911437988281, "step": 9501, "token_acc": 0.8813157002373686 }, { "epoch": 0.512707063076674, "grad_norm": 0.3826844096183777, "learning_rate": 1.0074270712718969e-05, "loss": 0.3631127178668976, "step": 9502, "token_acc": 0.8711094837056024 }, { "epoch": 0.5127610208816705, "grad_norm": 0.3916129171848297, "learning_rate": 1.0072523197545205e-05, "loss": 0.3356226086616516, "step": 9503, "token_acc": 0.8803180914512923 }, { "epoch": 0.512814978686667, "grad_norm": 0.4518144428730011, "learning_rate": 1.0070775680156601e-05, "loss": 0.37660086154937744, "step": 9504, "token_acc": 0.870972002113048 }, { "epoch": 0.5128689364916635, "grad_norm": 0.35988956689834595, "learning_rate": 1.0069028160606525e-05, "loss": 0.3615598678588867, "step": 9505, "token_acc": 0.8747576997630843 }, { "epoch": 0.51292289429666, "grad_norm": 0.5097105503082275, "learning_rate": 1.0067280638948346e-05, "loss": 0.35956907272338867, "step": 9506, "token_acc": 0.8731750219876869 }, { "epoch": 0.5129768521016566, "grad_norm": 0.41136208176612854, "learning_rate": 1.0065533115235435e-05, "loss": 0.37225091457366943, "step": 9507, "token_acc": 0.871071716357776 }, { "epoch": 0.513030809906653, "grad_norm": 0.4600568413734436, "learning_rate": 1.006378558952116e-05, "loss": 0.3412407636642456, "step": 9508, "token_acc": 0.8770351992556985 }, { "epoch": 0.5130847677116495, "grad_norm": 0.35412898659706116, "learning_rate": 1.0062038061858893e-05, "loss": 0.38021591305732727, "step": 9509, "token_acc": 0.8652220685415706 }, { "epoch": 0.513138725516646, "grad_norm": 0.36726900935173035, "learning_rate": 1.0060290532301994e-05, "loss": 0.3894871175289154, "step": 9510, "token_acc": 0.868253770410071 }, { "epoch": 0.5131926833216425, "grad_norm": 0.42582932114601135, "learning_rate": 1.005854300090384e-05, "loss": 0.3484739661216736, "step": 9511, "token_acc": 0.8806416530723219 }, { "epoch": 0.513246641126639, "grad_norm": 0.40391606092453003, "learning_rate": 1.00567954677178e-05, "loss": 0.37018802762031555, "step": 9512, "token_acc": 0.8692689850958126 }, { "epoch": 0.5133005989316355, "grad_norm": 0.4711930751800537, "learning_rate": 1.005504793279724e-05, "loss": 0.3813324272632599, "step": 9513, "token_acc": 0.8651384340577553 }, { "epoch": 0.513354556736632, "grad_norm": 0.35669857263565063, "learning_rate": 1.0053300396195528e-05, "loss": 0.3293085992336273, "step": 9514, "token_acc": 0.88266220575935 }, { "epoch": 0.5134085145416284, "grad_norm": 0.42593735456466675, "learning_rate": 1.0051552857966038e-05, "loss": 0.38439929485321045, "step": 9515, "token_acc": 0.8695599758890898 }, { "epoch": 0.5134624723466249, "grad_norm": 0.43861815333366394, "learning_rate": 1.004980531816214e-05, "loss": 0.4134555160999298, "step": 9516, "token_acc": 0.854310887350535 }, { "epoch": 0.5135164301516214, "grad_norm": 0.39059188961982727, "learning_rate": 1.00480577768372e-05, "loss": 0.42523807287216187, "step": 9517, "token_acc": 0.8543029370270617 }, { "epoch": 0.5135703879566179, "grad_norm": 0.34052497148513794, "learning_rate": 1.0046310234044585e-05, "loss": 0.37730473279953003, "step": 9518, "token_acc": 0.8717357910906298 }, { "epoch": 0.5136243457616144, "grad_norm": 0.5186291337013245, "learning_rate": 1.0044562689837669e-05, "loss": 0.3670029640197754, "step": 9519, "token_acc": 0.8652444444444445 }, { "epoch": 0.5136783035666109, "grad_norm": 0.25372564792633057, "learning_rate": 1.004281514426982e-05, "loss": 0.32785719633102417, "step": 9520, "token_acc": 0.8803278688524591 }, { "epoch": 0.5137322613716074, "grad_norm": 0.42486444115638733, "learning_rate": 1.0041067597394407e-05, "loss": 0.34452080726623535, "step": 9521, "token_acc": 0.8798844075888931 }, { "epoch": 0.5137862191766039, "grad_norm": 0.36721429228782654, "learning_rate": 1.00393200492648e-05, "loss": 0.3601018786430359, "step": 9522, "token_acc": 0.8749652681300362 }, { "epoch": 0.5138401769816003, "grad_norm": 0.3402371108531952, "learning_rate": 1.0037572499934372e-05, "loss": 0.369500070810318, "step": 9523, "token_acc": 0.8681844716870987 }, { "epoch": 0.5138941347865968, "grad_norm": 0.329023540019989, "learning_rate": 1.0035824949456487e-05, "loss": 0.32309412956237793, "step": 9524, "token_acc": 0.8870796928779454 }, { "epoch": 0.5139480925915934, "grad_norm": 0.29658758640289307, "learning_rate": 1.0034077397884518e-05, "loss": 0.3608444631099701, "step": 9525, "token_acc": 0.8718477762494269 }, { "epoch": 0.5140020503965899, "grad_norm": 0.2731381356716156, "learning_rate": 1.0032329845271835e-05, "loss": 0.356442928314209, "step": 9526, "token_acc": 0.8766843118383061 }, { "epoch": 0.5140560082015864, "grad_norm": 0.3816496729850769, "learning_rate": 1.0030582291671805e-05, "loss": 0.36055612564086914, "step": 9527, "token_acc": 0.8723749854971574 }, { "epoch": 0.5141099660065829, "grad_norm": 0.48470962047576904, "learning_rate": 1.0028834737137803e-05, "loss": 0.367251992225647, "step": 9528, "token_acc": 0.8647035415837644 }, { "epoch": 0.5141639238115794, "grad_norm": 0.29771268367767334, "learning_rate": 1.0027087181723194e-05, "loss": 0.34300535917282104, "step": 9529, "token_acc": 0.8812082139446036 }, { "epoch": 0.5142178816165759, "grad_norm": 0.3797472417354584, "learning_rate": 1.002533962548135e-05, "loss": 0.3790861964225769, "step": 9530, "token_acc": 0.8620118669794398 }, { "epoch": 0.5142718394215723, "grad_norm": 0.41125985980033875, "learning_rate": 1.0023592068465637e-05, "loss": 0.37207627296447754, "step": 9531, "token_acc": 0.8682312835977448 }, { "epoch": 0.5143257972265688, "grad_norm": 0.49082738161087036, "learning_rate": 1.0021844510729432e-05, "loss": 0.43559134006500244, "step": 9532, "token_acc": 0.8470712560386473 }, { "epoch": 0.5143797550315653, "grad_norm": 0.46313560009002686, "learning_rate": 1.0020096952326103e-05, "loss": 0.3785487711429596, "step": 9533, "token_acc": 0.8662377243481205 }, { "epoch": 0.5144337128365618, "grad_norm": 0.41176143288612366, "learning_rate": 1.0018349393309015e-05, "loss": 0.3664403259754181, "step": 9534, "token_acc": 0.8727097396335584 }, { "epoch": 0.5144876706415583, "grad_norm": 0.4440966844558716, "learning_rate": 1.0016601833731543e-05, "loss": 0.3662424087524414, "step": 9535, "token_acc": 0.8731364275668073 }, { "epoch": 0.5145416284465548, "grad_norm": 0.45005083084106445, "learning_rate": 1.0014854273647054e-05, "loss": 0.39893633127212524, "step": 9536, "token_acc": 0.8555640828856486 }, { "epoch": 0.5145955862515513, "grad_norm": 0.3262282907962799, "learning_rate": 1.0013106713108919e-05, "loss": 0.3992545008659363, "step": 9537, "token_acc": 0.8625112222649737 }, { "epoch": 0.5146495440565477, "grad_norm": 0.4074709117412567, "learning_rate": 1.001135915217051e-05, "loss": 0.33325353264808655, "step": 9538, "token_acc": 0.8843816067653277 }, { "epoch": 0.5147035018615442, "grad_norm": 0.31657513976097107, "learning_rate": 1.0009611590885195e-05, "loss": 0.3926495313644409, "step": 9539, "token_acc": 0.8664528110754686 }, { "epoch": 0.5147574596665407, "grad_norm": 0.3392007052898407, "learning_rate": 1.0007864029306345e-05, "loss": 0.40155887603759766, "step": 9540, "token_acc": 0.8614594039054471 }, { "epoch": 0.5148114174715372, "grad_norm": 0.3680442273616791, "learning_rate": 1.000611646748733e-05, "loss": 0.33526432514190674, "step": 9541, "token_acc": 0.8829849867530174 }, { "epoch": 0.5148653752765338, "grad_norm": 0.34024137258529663, "learning_rate": 1.0004368905481518e-05, "loss": 0.31931382417678833, "step": 9542, "token_acc": 0.8808072314483918 }, { "epoch": 0.5149193330815303, "grad_norm": 0.3342622220516205, "learning_rate": 1.000262134334228e-05, "loss": 0.43828800320625305, "step": 9543, "token_acc": 0.8499327052489906 }, { "epoch": 0.5149732908865268, "grad_norm": 0.3803826868534088, "learning_rate": 1.000087378112299e-05, "loss": 0.3970595896244049, "step": 9544, "token_acc": 0.8612765957446809 }, { "epoch": 0.5150272486915233, "grad_norm": 0.33299922943115234, "learning_rate": 9.999126218877013e-06, "loss": 0.3912670314311981, "step": 9545, "token_acc": 0.8620689655172413 }, { "epoch": 0.5150812064965197, "grad_norm": 0.3959731459617615, "learning_rate": 9.997378656657722e-06, "loss": 0.3490733802318573, "step": 9546, "token_acc": 0.8803819444444444 }, { "epoch": 0.5151351643015162, "grad_norm": 0.394499272108078, "learning_rate": 9.995631094518487e-06, "loss": 0.3113311529159546, "step": 9547, "token_acc": 0.8812762384550797 }, { "epoch": 0.5151891221065127, "grad_norm": 0.4295498728752136, "learning_rate": 9.993883532512674e-06, "loss": 0.36805641651153564, "step": 9548, "token_acc": 0.8694278694278694 }, { "epoch": 0.5152430799115092, "grad_norm": 0.35607168078422546, "learning_rate": 9.992135970693658e-06, "loss": 0.3570597171783447, "step": 9549, "token_acc": 0.8727094698559239 }, { "epoch": 0.5152970377165057, "grad_norm": 0.412805438041687, "learning_rate": 9.990388409114807e-06, "loss": 0.3781806230545044, "step": 9550, "token_acc": 0.870707778556412 }, { "epoch": 0.5153509955215022, "grad_norm": 0.4163312017917633, "learning_rate": 9.988640847829493e-06, "loss": 0.4036794900894165, "step": 9551, "token_acc": 0.8611169431030863 }, { "epoch": 0.5154049533264987, "grad_norm": 0.41720741987228394, "learning_rate": 9.986893286891084e-06, "loss": 0.36148107051849365, "step": 9552, "token_acc": 0.868295994568907 }, { "epoch": 0.5154589111314952, "grad_norm": 0.3560563921928406, "learning_rate": 9.98514572635295e-06, "loss": 0.4310472309589386, "step": 9553, "token_acc": 0.8500910161687547 }, { "epoch": 0.5155128689364916, "grad_norm": 0.40270212292671204, "learning_rate": 9.983398166268462e-06, "loss": 0.3912304639816284, "step": 9554, "token_acc": 0.8641535298149418 }, { "epoch": 0.5155668267414881, "grad_norm": 0.3481921851634979, "learning_rate": 9.981650606690985e-06, "loss": 0.38828572630882263, "step": 9555, "token_acc": 0.8660867806210175 }, { "epoch": 0.5156207845464846, "grad_norm": 0.3701050579547882, "learning_rate": 9.9799030476739e-06, "loss": 0.3391391634941101, "step": 9556, "token_acc": 0.8798467432950191 }, { "epoch": 0.5156747423514811, "grad_norm": 0.4741852879524231, "learning_rate": 9.978155489270571e-06, "loss": 0.45002931356430054, "step": 9557, "token_acc": 0.8515715202052598 }, { "epoch": 0.5157287001564776, "grad_norm": 0.3898979425430298, "learning_rate": 9.976407931534366e-06, "loss": 0.3197306990623474, "step": 9558, "token_acc": 0.8865086599817684 }, { "epoch": 0.5157826579614742, "grad_norm": 0.304243803024292, "learning_rate": 9.974660374518655e-06, "loss": 0.30162864923477173, "step": 9559, "token_acc": 0.8937393927965303 }, { "epoch": 0.5158366157664707, "grad_norm": 0.40146970748901367, "learning_rate": 9.972912818276812e-06, "loss": 0.30625516176223755, "step": 9560, "token_acc": 0.8885992353145638 }, { "epoch": 0.5158905735714671, "grad_norm": 0.38349804282188416, "learning_rate": 9.971165262862199e-06, "loss": 0.3702086806297302, "step": 9561, "token_acc": 0.868766404199475 }, { "epoch": 0.5159445313764636, "grad_norm": 0.49550652503967285, "learning_rate": 9.969417708328195e-06, "loss": 0.39188963174819946, "step": 9562, "token_acc": 0.8622900763358778 }, { "epoch": 0.5159984891814601, "grad_norm": 0.42121607065200806, "learning_rate": 9.967670154728167e-06, "loss": 0.3567480146884918, "step": 9563, "token_acc": 0.8782907287294925 }, { "epoch": 0.5160524469864566, "grad_norm": 0.43669554591178894, "learning_rate": 9.965922602115484e-06, "loss": 0.3818032741546631, "step": 9564, "token_acc": 0.8630066700872242 }, { "epoch": 0.5161064047914531, "grad_norm": 0.26630693674087524, "learning_rate": 9.964175050543515e-06, "loss": 0.2992464303970337, "step": 9565, "token_acc": 0.8920489674786695 }, { "epoch": 0.5161603625964496, "grad_norm": 0.42971691489219666, "learning_rate": 9.962427500065631e-06, "loss": 0.310356080532074, "step": 9566, "token_acc": 0.883795510281079 }, { "epoch": 0.5162143204014461, "grad_norm": 0.31745925545692444, "learning_rate": 9.960679950735203e-06, "loss": 0.3061511218547821, "step": 9567, "token_acc": 0.8886282086668507 }, { "epoch": 0.5162682782064426, "grad_norm": 0.41354990005493164, "learning_rate": 9.958932402605595e-06, "loss": 0.3596240282058716, "step": 9568, "token_acc": 0.8722574353973671 }, { "epoch": 0.516322236011439, "grad_norm": 0.3670790195465088, "learning_rate": 9.957184855730183e-06, "loss": 0.3922034502029419, "step": 9569, "token_acc": 0.8610890302066773 }, { "epoch": 0.5163761938164355, "grad_norm": 0.4718244671821594, "learning_rate": 9.955437310162334e-06, "loss": 0.35793519020080566, "step": 9570, "token_acc": 0.8700488791505141 }, { "epoch": 0.516430151621432, "grad_norm": 0.3843942880630493, "learning_rate": 9.953689765955419e-06, "loss": 0.38223135471343994, "step": 9571, "token_acc": 0.8634478672985783 }, { "epoch": 0.5164841094264285, "grad_norm": 0.3345191776752472, "learning_rate": 9.951942223162806e-06, "loss": 0.35771864652633667, "step": 9572, "token_acc": 0.8772225827384815 }, { "epoch": 0.516538067231425, "grad_norm": 0.39396125078201294, "learning_rate": 9.950194681837864e-06, "loss": 0.4007873237133026, "step": 9573, "token_acc": 0.8602397766464116 }, { "epoch": 0.5165920250364215, "grad_norm": 0.3250894248485565, "learning_rate": 9.94844714203396e-06, "loss": 0.28917500376701355, "step": 9574, "token_acc": 0.8954052511415526 }, { "epoch": 0.516645982841418, "grad_norm": 0.3531365990638733, "learning_rate": 9.946699603804473e-06, "loss": 0.34832465648651123, "step": 9575, "token_acc": 0.8764421287681429 }, { "epoch": 0.5166999406464146, "grad_norm": 0.4907378852367401, "learning_rate": 9.944952067202763e-06, "loss": 0.4219892919063568, "step": 9576, "token_acc": 0.8567982734543608 }, { "epoch": 0.516753898451411, "grad_norm": 0.3733054995536804, "learning_rate": 9.943204532282203e-06, "loss": 0.34796029329299927, "step": 9577, "token_acc": 0.8726019994596055 }, { "epoch": 0.5168078562564075, "grad_norm": 0.3041066825389862, "learning_rate": 9.941456999096164e-06, "loss": 0.3066985607147217, "step": 9578, "token_acc": 0.8895472860867504 }, { "epoch": 0.516861814061404, "grad_norm": 0.3749343156814575, "learning_rate": 9.93970946769801e-06, "loss": 0.3962979316711426, "step": 9579, "token_acc": 0.8611196554906183 }, { "epoch": 0.5169157718664005, "grad_norm": 0.3903326094150543, "learning_rate": 9.937961938141109e-06, "loss": 0.35624587535858154, "step": 9580, "token_acc": 0.8755429084918377 }, { "epoch": 0.516969729671397, "grad_norm": 0.3378359377384186, "learning_rate": 9.936214410478842e-06, "loss": 0.3841314911842346, "step": 9581, "token_acc": 0.868002672010688 }, { "epoch": 0.5170236874763935, "grad_norm": 0.3769449293613434, "learning_rate": 9.934466884764567e-06, "loss": 0.2863263487815857, "step": 9582, "token_acc": 0.8946705077421678 }, { "epoch": 0.51707764528139, "grad_norm": 0.4882771372795105, "learning_rate": 9.932719361051657e-06, "loss": 0.35198545455932617, "step": 9583, "token_acc": 0.8826086956521739 }, { "epoch": 0.5171316030863864, "grad_norm": 0.5080475211143494, "learning_rate": 9.93097183939348e-06, "loss": 0.4283796548843384, "step": 9584, "token_acc": 0.8594122319301033 }, { "epoch": 0.5171855608913829, "grad_norm": 0.3640860319137573, "learning_rate": 9.929224319843404e-06, "loss": 0.3568301796913147, "step": 9585, "token_acc": 0.8728772440562833 }, { "epoch": 0.5172395186963794, "grad_norm": 0.4574452042579651, "learning_rate": 9.9274768024548e-06, "loss": 0.35336607694625854, "step": 9586, "token_acc": 0.8740450903670579 }, { "epoch": 0.5172934765013759, "grad_norm": 0.4425407946109772, "learning_rate": 9.92572928728103e-06, "loss": 0.3831644356250763, "step": 9587, "token_acc": 0.8585123966942149 }, { "epoch": 0.5173474343063724, "grad_norm": 0.32347139716148376, "learning_rate": 9.923981774375472e-06, "loss": 0.3713075518608093, "step": 9588, "token_acc": 0.8715676051442475 }, { "epoch": 0.5174013921113689, "grad_norm": 0.40967851877212524, "learning_rate": 9.922234263791492e-06, "loss": 0.3466436266899109, "step": 9589, "token_acc": 0.874427003750521 }, { "epoch": 0.5174553499163654, "grad_norm": 0.4364698827266693, "learning_rate": 9.920486755582457e-06, "loss": 0.3586716651916504, "step": 9590, "token_acc": 0.8725118483412322 }, { "epoch": 0.5175093077213619, "grad_norm": 0.38825854659080505, "learning_rate": 9.918739249801738e-06, "loss": 0.32692697644233704, "step": 9591, "token_acc": 0.8832063305978898 }, { "epoch": 0.5175632655263583, "grad_norm": 0.4774875044822693, "learning_rate": 9.916991746502698e-06, "loss": 0.3934568762779236, "step": 9592, "token_acc": 0.866112650046168 }, { "epoch": 0.5176172233313548, "grad_norm": 0.3652905225753784, "learning_rate": 9.915244245738705e-06, "loss": 0.358102023601532, "step": 9593, "token_acc": 0.8747420225432608 }, { "epoch": 0.5176711811363514, "grad_norm": 0.4336813986301422, "learning_rate": 9.913496747563134e-06, "loss": 0.3580154478549957, "step": 9594, "token_acc": 0.880374113742646 }, { "epoch": 0.5177251389413479, "grad_norm": 0.3947131931781769, "learning_rate": 9.91174925202935e-06, "loss": 0.31461578607559204, "step": 9595, "token_acc": 0.8847402597402597 }, { "epoch": 0.5177790967463444, "grad_norm": 0.2762347161769867, "learning_rate": 9.91000175919072e-06, "loss": 0.34122467041015625, "step": 9596, "token_acc": 0.8832827516439049 }, { "epoch": 0.5178330545513409, "grad_norm": 0.45300954580307007, "learning_rate": 9.908254269100614e-06, "loss": 0.4454711079597473, "step": 9597, "token_acc": 0.8515387663452648 }, { "epoch": 0.5178870123563374, "grad_norm": 0.41044747829437256, "learning_rate": 9.906506781812398e-06, "loss": 0.3611171245574951, "step": 9598, "token_acc": 0.8712507074136955 }, { "epoch": 0.5179409701613339, "grad_norm": 0.44250544905662537, "learning_rate": 9.90475929737944e-06, "loss": 0.38855108618736267, "step": 9599, "token_acc": 0.864910790144435 }, { "epoch": 0.5179949279663303, "grad_norm": 0.37306860089302063, "learning_rate": 9.903011815855108e-06, "loss": 0.34018537402153015, "step": 9600, "token_acc": 0.8792436613665664 }, { "epoch": 0.5180488857713268, "grad_norm": 0.31797751784324646, "learning_rate": 9.90126433729277e-06, "loss": 0.33514732122421265, "step": 9601, "token_acc": 0.8824618891579997 }, { "epoch": 0.5181028435763233, "grad_norm": 0.33142539858818054, "learning_rate": 9.899516861745792e-06, "loss": 0.3482929766178131, "step": 9602, "token_acc": 0.8764797855706946 }, { "epoch": 0.5181568013813198, "grad_norm": 0.4906247854232788, "learning_rate": 9.897769389267546e-06, "loss": 0.4263242483139038, "step": 9603, "token_acc": 0.8610651670525967 }, { "epoch": 0.5182107591863163, "grad_norm": 0.4157051742076874, "learning_rate": 9.896021919911393e-06, "loss": 0.3522505760192871, "step": 9604, "token_acc": 0.8756056042948802 }, { "epoch": 0.5182647169913128, "grad_norm": 0.389986515045166, "learning_rate": 9.894274453730706e-06, "loss": 0.2863170802593231, "step": 9605, "token_acc": 0.8936229462545252 }, { "epoch": 0.5183186747963093, "grad_norm": 0.445404976606369, "learning_rate": 9.892526990778846e-06, "loss": 0.33833247423171997, "step": 9606, "token_acc": 0.8770320967069613 }, { "epoch": 0.5183726326013057, "grad_norm": 0.45419347286224365, "learning_rate": 9.890779531109185e-06, "loss": 0.3769470751285553, "step": 9607, "token_acc": 0.8724272204709809 }, { "epoch": 0.5184265904063022, "grad_norm": 0.4470103681087494, "learning_rate": 9.889032074775088e-06, "loss": 0.33316755294799805, "step": 9608, "token_acc": 0.8797546012269939 }, { "epoch": 0.5184805482112987, "grad_norm": 0.4136967360973358, "learning_rate": 9.887284621829925e-06, "loss": 0.324715256690979, "step": 9609, "token_acc": 0.8854296388542964 }, { "epoch": 0.5185345060162952, "grad_norm": 0.38499870896339417, "learning_rate": 9.885537172327059e-06, "loss": 0.3973405957221985, "step": 9610, "token_acc": 0.8629098121572746 }, { "epoch": 0.5185884638212918, "grad_norm": 0.3489283323287964, "learning_rate": 9.883789726319856e-06, "loss": 0.287888765335083, "step": 9611, "token_acc": 0.8965740142210731 }, { "epoch": 0.5186424216262883, "grad_norm": 0.42072218656539917, "learning_rate": 9.882042283861687e-06, "loss": 0.3732856810092926, "step": 9612, "token_acc": 0.8668035592060233 }, { "epoch": 0.5186963794312848, "grad_norm": 0.4415591359138489, "learning_rate": 9.88029484500591e-06, "loss": 0.3909762501716614, "step": 9613, "token_acc": 0.8641278796426892 }, { "epoch": 0.5187503372362813, "grad_norm": 0.41515204310417175, "learning_rate": 9.878547409805901e-06, "loss": 0.36582791805267334, "step": 9614, "token_acc": 0.8710856779526307 }, { "epoch": 0.5188042950412777, "grad_norm": 0.3944396376609802, "learning_rate": 9.876799978315027e-06, "loss": 0.3288213312625885, "step": 9615, "token_acc": 0.8809461918377957 }, { "epoch": 0.5188582528462742, "grad_norm": 0.34647485613822937, "learning_rate": 9.875052550586644e-06, "loss": 0.3422446846961975, "step": 9616, "token_acc": 0.8758518518518519 }, { "epoch": 0.5189122106512707, "grad_norm": 0.4162827432155609, "learning_rate": 9.873305126674124e-06, "loss": 0.4297395944595337, "step": 9617, "token_acc": 0.8545766947828803 }, { "epoch": 0.5189661684562672, "grad_norm": 0.3312510848045349, "learning_rate": 9.871557706630834e-06, "loss": 0.37141960859298706, "step": 9618, "token_acc": 0.8662627622573769 }, { "epoch": 0.5190201262612637, "grad_norm": 0.32944419980049133, "learning_rate": 9.869810290510131e-06, "loss": 0.3372296690940857, "step": 9619, "token_acc": 0.8774937195212058 }, { "epoch": 0.5190740840662602, "grad_norm": 0.3900006115436554, "learning_rate": 9.868062878365395e-06, "loss": 0.3644927144050598, "step": 9620, "token_acc": 0.8765116675183103 }, { "epoch": 0.5191280418712567, "grad_norm": 0.34373775124549866, "learning_rate": 9.866315470249982e-06, "loss": 0.3095617890357971, "step": 9621, "token_acc": 0.8889392052530284 }, { "epoch": 0.5191819996762532, "grad_norm": 0.3116007447242737, "learning_rate": 9.86456806621726e-06, "loss": 0.3873715102672577, "step": 9622, "token_acc": 0.8688172043010752 }, { "epoch": 0.5192359574812496, "grad_norm": 0.5003300905227661, "learning_rate": 9.862820666320594e-06, "loss": 0.32955002784729004, "step": 9623, "token_acc": 0.8812802381838482 }, { "epoch": 0.5192899152862461, "grad_norm": 0.36638331413269043, "learning_rate": 9.861073270613348e-06, "loss": 0.33698704838752747, "step": 9624, "token_acc": 0.8818997094859163 }, { "epoch": 0.5193438730912426, "grad_norm": 0.42201271653175354, "learning_rate": 9.859325879148888e-06, "loss": 0.30958110094070435, "step": 9625, "token_acc": 0.8876686539064952 }, { "epoch": 0.5193978308962391, "grad_norm": 0.4572997987270355, "learning_rate": 9.857578491980578e-06, "loss": 0.40399786829948425, "step": 9626, "token_acc": 0.8571958915975746 }, { "epoch": 0.5194517887012357, "grad_norm": 0.405863493680954, "learning_rate": 9.855831109161786e-06, "loss": 0.3617861270904541, "step": 9627, "token_acc": 0.878103306271326 }, { "epoch": 0.5195057465062322, "grad_norm": 0.33502069115638733, "learning_rate": 9.854083730745874e-06, "loss": 0.30885353684425354, "step": 9628, "token_acc": 0.8845726970033296 }, { "epoch": 0.5195597043112287, "grad_norm": 0.3500870168209076, "learning_rate": 9.852336356786206e-06, "loss": 0.33906790614128113, "step": 9629, "token_acc": 0.877655490145892 }, { "epoch": 0.5196136621162251, "grad_norm": 0.4298880696296692, "learning_rate": 9.850588987336146e-06, "loss": 0.3691813349723816, "step": 9630, "token_acc": 0.8725404185988219 }, { "epoch": 0.5196676199212216, "grad_norm": 0.30899620056152344, "learning_rate": 9.84884162244906e-06, "loss": 0.3259528577327728, "step": 9631, "token_acc": 0.8872444011684518 }, { "epoch": 0.5197215777262181, "grad_norm": 0.3839881420135498, "learning_rate": 9.84709426217831e-06, "loss": 0.3222324848175049, "step": 9632, "token_acc": 0.8850328117112569 }, { "epoch": 0.5197755355312146, "grad_norm": 0.371707022190094, "learning_rate": 9.845346906577262e-06, "loss": 0.35367345809936523, "step": 9633, "token_acc": 0.8753048104633119 }, { "epoch": 0.5198294933362111, "grad_norm": 0.38982945680618286, "learning_rate": 9.84359955569928e-06, "loss": 0.34824568033218384, "step": 9634, "token_acc": 0.8800746616892208 }, { "epoch": 0.5198834511412076, "grad_norm": 0.4084601104259491, "learning_rate": 9.841852209597725e-06, "loss": 0.35328084230422974, "step": 9635, "token_acc": 0.8771379703534777 }, { "epoch": 0.5199374089462041, "grad_norm": 0.3335093557834625, "learning_rate": 9.840104868325963e-06, "loss": 0.3184205889701843, "step": 9636, "token_acc": 0.8811795316565482 }, { "epoch": 0.5199913667512006, "grad_norm": 0.5028857588768005, "learning_rate": 9.838357531937356e-06, "loss": 0.32908374071121216, "step": 9637, "token_acc": 0.8906569674926119 }, { "epoch": 0.520045324556197, "grad_norm": 0.33008071780204773, "learning_rate": 9.836610200485264e-06, "loss": 0.309669554233551, "step": 9638, "token_acc": 0.887737007229573 }, { "epoch": 0.5200992823611935, "grad_norm": 0.3837248682975769, "learning_rate": 9.834862874023062e-06, "loss": 0.3515221178531647, "step": 9639, "token_acc": 0.8741825419391527 }, { "epoch": 0.52015324016619, "grad_norm": 0.3625733256340027, "learning_rate": 9.833115552604098e-06, "loss": 0.3440168797969818, "step": 9640, "token_acc": 0.8779132582188609 }, { "epoch": 0.5202071979711865, "grad_norm": 0.32476142048835754, "learning_rate": 9.831368236281746e-06, "loss": 0.37411201000213623, "step": 9641, "token_acc": 0.870015609479211 }, { "epoch": 0.520261155776183, "grad_norm": 0.44157782196998596, "learning_rate": 9.82962092510936e-06, "loss": 0.3715905547142029, "step": 9642, "token_acc": 0.8714165968147527 }, { "epoch": 0.5203151135811795, "grad_norm": 0.42627596855163574, "learning_rate": 9.827873619140309e-06, "loss": 0.35492491722106934, "step": 9643, "token_acc": 0.8771570634688506 }, { "epoch": 0.520369071386176, "grad_norm": 0.3465251326560974, "learning_rate": 9.82612631842795e-06, "loss": 0.33743229508399963, "step": 9644, "token_acc": 0.8739675306180575 }, { "epoch": 0.5204230291911724, "grad_norm": 0.32807931303977966, "learning_rate": 9.824379023025647e-06, "loss": 0.3382084369659424, "step": 9645, "token_acc": 0.8778089887640449 }, { "epoch": 0.520476986996169, "grad_norm": 0.35376206040382385, "learning_rate": 9.822631732986764e-06, "loss": 0.3198885917663574, "step": 9646, "token_acc": 0.8824531516183987 }, { "epoch": 0.5205309448011655, "grad_norm": 0.4171229898929596, "learning_rate": 9.820884448364662e-06, "loss": 0.3831336498260498, "step": 9647, "token_acc": 0.8653146853146854 }, { "epoch": 0.520584902606162, "grad_norm": 0.3007495105266571, "learning_rate": 9.819137169212702e-06, "loss": 0.37764957547187805, "step": 9648, "token_acc": 0.8693940056880333 }, { "epoch": 0.5206388604111585, "grad_norm": 0.4016614556312561, "learning_rate": 9.817389895584246e-06, "loss": 0.378030002117157, "step": 9649, "token_acc": 0.863894856565146 }, { "epoch": 0.520692818216155, "grad_norm": 0.3509279191493988, "learning_rate": 9.815642627532656e-06, "loss": 0.3816685974597931, "step": 9650, "token_acc": 0.8633540372670807 }, { "epoch": 0.5207467760211515, "grad_norm": 0.4164111614227295, "learning_rate": 9.813895365111286e-06, "loss": 0.4248485267162323, "step": 9651, "token_acc": 0.8581943081452404 }, { "epoch": 0.520800733826148, "grad_norm": 0.3251146078109741, "learning_rate": 9.812148108373509e-06, "loss": 0.3164253830909729, "step": 9652, "token_acc": 0.8874580673087328 }, { "epoch": 0.5208546916311444, "grad_norm": 0.39740294218063354, "learning_rate": 9.810400857372677e-06, "loss": 0.33673399686813354, "step": 9653, "token_acc": 0.8784934628763345 }, { "epoch": 0.5209086494361409, "grad_norm": 0.3197910189628601, "learning_rate": 9.808653612162154e-06, "loss": 0.3575872480869293, "step": 9654, "token_acc": 0.8718556565257474 }, { "epoch": 0.5209626072411374, "grad_norm": 0.4040147364139557, "learning_rate": 9.8069063727953e-06, "loss": 0.36212384700775146, "step": 9655, "token_acc": 0.8749574105621806 }, { "epoch": 0.5210165650461339, "grad_norm": 0.41707172989845276, "learning_rate": 9.805159139325475e-06, "loss": 0.4098895788192749, "step": 9656, "token_acc": 0.8590613318565693 }, { "epoch": 0.5210705228511304, "grad_norm": 0.41365718841552734, "learning_rate": 9.803411911806036e-06, "loss": 0.34826210141181946, "step": 9657, "token_acc": 0.8807998954385048 }, { "epoch": 0.5211244806561269, "grad_norm": 0.48023521900177, "learning_rate": 9.801664690290347e-06, "loss": 0.38448262214660645, "step": 9658, "token_acc": 0.8702976774615636 }, { "epoch": 0.5211784384611234, "grad_norm": 0.4095314145088196, "learning_rate": 9.799917474831766e-06, "loss": 0.3893013596534729, "step": 9659, "token_acc": 0.8657469717362046 }, { "epoch": 0.52123239626612, "grad_norm": 0.42009198665618896, "learning_rate": 9.798170265483655e-06, "loss": 0.3292175531387329, "step": 9660, "token_acc": 0.8840979088993354 }, { "epoch": 0.5212863540711163, "grad_norm": 0.3772442638874054, "learning_rate": 9.79642306229937e-06, "loss": 0.31876957416534424, "step": 9661, "token_acc": 0.8835184250378597 }, { "epoch": 0.5213403118761128, "grad_norm": 0.455731600522995, "learning_rate": 9.794675865332272e-06, "loss": 0.3438417911529541, "step": 9662, "token_acc": 0.875111032154912 }, { "epoch": 0.5213942696811094, "grad_norm": 0.4713578224182129, "learning_rate": 9.792928674635719e-06, "loss": 0.35311806201934814, "step": 9663, "token_acc": 0.8741308611160634 }, { "epoch": 0.5214482274861059, "grad_norm": 0.4413629472255707, "learning_rate": 9.791181490263068e-06, "loss": 0.3898954391479492, "step": 9664, "token_acc": 0.8629872061886343 }, { "epoch": 0.5215021852911024, "grad_norm": 0.39977067708969116, "learning_rate": 9.78943431226768e-06, "loss": 0.3356894850730896, "step": 9665, "token_acc": 0.87464858607574 }, { "epoch": 0.5215561430960989, "grad_norm": 0.3053780198097229, "learning_rate": 9.787687140702915e-06, "loss": 0.3753536343574524, "step": 9666, "token_acc": 0.868848167539267 }, { "epoch": 0.5216101009010954, "grad_norm": 0.3295215666294098, "learning_rate": 9.78593997562213e-06, "loss": 0.3382400870323181, "step": 9667, "token_acc": 0.8827060725005772 }, { "epoch": 0.5216640587060918, "grad_norm": 0.4922255575656891, "learning_rate": 9.784192817078679e-06, "loss": 0.40244758129119873, "step": 9668, "token_acc": 0.8648022249690976 }, { "epoch": 0.5217180165110883, "grad_norm": 0.3475850820541382, "learning_rate": 9.782445665125925e-06, "loss": 0.3631371259689331, "step": 9669, "token_acc": 0.8721406849133941 }, { "epoch": 0.5217719743160848, "grad_norm": 0.3648916780948639, "learning_rate": 9.780698519817224e-06, "loss": 0.35160377621650696, "step": 9670, "token_acc": 0.8810994830412306 }, { "epoch": 0.5218259321210813, "grad_norm": 0.3744772970676422, "learning_rate": 9.778951381205926e-06, "loss": 0.37967294454574585, "step": 9671, "token_acc": 0.8754607688256978 }, { "epoch": 0.5218798899260778, "grad_norm": 0.38400596380233765, "learning_rate": 9.777204249345401e-06, "loss": 0.3624712824821472, "step": 9672, "token_acc": 0.8764140615496898 }, { "epoch": 0.5219338477310743, "grad_norm": 0.4215583801269531, "learning_rate": 9.775457124288999e-06, "loss": 0.37252333760261536, "step": 9673, "token_acc": 0.869853717388026 }, { "epoch": 0.5219878055360708, "grad_norm": 0.3143932819366455, "learning_rate": 9.77371000609008e-06, "loss": 0.36883753538131714, "step": 9674, "token_acc": 0.872511967750063 }, { "epoch": 0.5220417633410673, "grad_norm": 0.3544169068336487, "learning_rate": 9.771962894801997e-06, "loss": 0.3276292681694031, "step": 9675, "token_acc": 0.881813614669681 }, { "epoch": 0.5220957211460637, "grad_norm": 0.4973323345184326, "learning_rate": 9.770215790478107e-06, "loss": 0.42912501096725464, "step": 9676, "token_acc": 0.852930335422042 }, { "epoch": 0.5221496789510602, "grad_norm": 0.30976083874702454, "learning_rate": 9.768468693171761e-06, "loss": 0.3019041121006012, "step": 9677, "token_acc": 0.8908026940488021 }, { "epoch": 0.5222036367560567, "grad_norm": 0.41402795910835266, "learning_rate": 9.766721602936326e-06, "loss": 0.3468015789985657, "step": 9678, "token_acc": 0.876505586997533 }, { "epoch": 0.5222575945610533, "grad_norm": 0.2756749987602234, "learning_rate": 9.764974519825152e-06, "loss": 0.3161078095436096, "step": 9679, "token_acc": 0.8916088174400195 }, { "epoch": 0.5223115523660498, "grad_norm": 0.4533557593822479, "learning_rate": 9.763227443891594e-06, "loss": 0.33923253417015076, "step": 9680, "token_acc": 0.8818740399385561 }, { "epoch": 0.5223655101710463, "grad_norm": 0.3353564441204071, "learning_rate": 9.761480375189009e-06, "loss": 0.36400818824768066, "step": 9681, "token_acc": 0.8698315467075038 }, { "epoch": 0.5224194679760428, "grad_norm": 0.41137003898620605, "learning_rate": 9.759733313770749e-06, "loss": 0.412855327129364, "step": 9682, "token_acc": 0.859588003635262 }, { "epoch": 0.5224734257810393, "grad_norm": 0.4677436649799347, "learning_rate": 9.75798625969017e-06, "loss": 0.4199105501174927, "step": 9683, "token_acc": 0.8596413874191652 }, { "epoch": 0.5225273835860357, "grad_norm": 0.3569146990776062, "learning_rate": 9.756239213000629e-06, "loss": 0.3210326135158539, "step": 9684, "token_acc": 0.8857688113413305 }, { "epoch": 0.5225813413910322, "grad_norm": 0.35191676020622253, "learning_rate": 9.75449217375548e-06, "loss": 0.37104201316833496, "step": 9685, "token_acc": 0.8711304347826087 }, { "epoch": 0.5226352991960287, "grad_norm": 0.30007219314575195, "learning_rate": 9.752745142008073e-06, "loss": 0.34213876724243164, "step": 9686, "token_acc": 0.8771860488094062 }, { "epoch": 0.5226892570010252, "grad_norm": 0.46674901247024536, "learning_rate": 9.750998117811764e-06, "loss": 0.37437668442726135, "step": 9687, "token_acc": 0.8662716885315277 }, { "epoch": 0.5227432148060217, "grad_norm": 0.39969539642333984, "learning_rate": 9.749251101219911e-06, "loss": 0.39268720149993896, "step": 9688, "token_acc": 0.8677275620623807 }, { "epoch": 0.5227971726110182, "grad_norm": 0.4658876061439514, "learning_rate": 9.747504092285861e-06, "loss": 0.39033937454223633, "step": 9689, "token_acc": 0.8661010208139998 }, { "epoch": 0.5228511304160147, "grad_norm": 0.48772817850112915, "learning_rate": 9.745757091062969e-06, "loss": 0.4131966531276703, "step": 9690, "token_acc": 0.8616865453023645 }, { "epoch": 0.5229050882210111, "grad_norm": 0.41827887296676636, "learning_rate": 9.74401009760459e-06, "loss": 0.36579400300979614, "step": 9691, "token_acc": 0.8666747250090656 }, { "epoch": 0.5229590460260076, "grad_norm": 0.4019709527492523, "learning_rate": 9.742263111964076e-06, "loss": 0.30852410197257996, "step": 9692, "token_acc": 0.886495083863505 }, { "epoch": 0.5230130038310041, "grad_norm": 0.36506596207618713, "learning_rate": 9.740516134194779e-06, "loss": 0.39014914631843567, "step": 9693, "token_acc": 0.8655034138940068 }, { "epoch": 0.5230669616360006, "grad_norm": 0.3900656998157501, "learning_rate": 9.738769164350051e-06, "loss": 0.373706191778183, "step": 9694, "token_acc": 0.8668230747998896 }, { "epoch": 0.5231209194409971, "grad_norm": 0.3723040521144867, "learning_rate": 9.737022202483247e-06, "loss": 0.2873002290725708, "step": 9695, "token_acc": 0.8956419316843345 }, { "epoch": 0.5231748772459937, "grad_norm": 0.38519078493118286, "learning_rate": 9.735275248647707e-06, "loss": 0.3438962996006012, "step": 9696, "token_acc": 0.8790288229447678 }, { "epoch": 0.5232288350509902, "grad_norm": 0.3915989398956299, "learning_rate": 9.733528302896799e-06, "loss": 0.32085806131362915, "step": 9697, "token_acc": 0.8850999726102438 }, { "epoch": 0.5232827928559867, "grad_norm": 0.35087111592292786, "learning_rate": 9.731781365283868e-06, "loss": 0.30332857370376587, "step": 9698, "token_acc": 0.8911932233094786 }, { "epoch": 0.5233367506609831, "grad_norm": 0.46381449699401855, "learning_rate": 9.730034435862261e-06, "loss": 0.3730214238166809, "step": 9699, "token_acc": 0.875203119922002 }, { "epoch": 0.5233907084659796, "grad_norm": 0.33035725355148315, "learning_rate": 9.728287514685334e-06, "loss": 0.3331022262573242, "step": 9700, "token_acc": 0.8805831676607024 }, { "epoch": 0.5234446662709761, "grad_norm": 0.39038750529289246, "learning_rate": 9.726540601806433e-06, "loss": 0.2983161211013794, "step": 9701, "token_acc": 0.8841861444514635 }, { "epoch": 0.5234986240759726, "grad_norm": 0.3596839904785156, "learning_rate": 9.724793697278911e-06, "loss": 0.39307263493537903, "step": 9702, "token_acc": 0.8668941979522184 }, { "epoch": 0.5235525818809691, "grad_norm": 0.4369618594646454, "learning_rate": 9.723046801156114e-06, "loss": 0.3282921314239502, "step": 9703, "token_acc": 0.8816193746611242 }, { "epoch": 0.5236065396859656, "grad_norm": 0.21490781009197235, "learning_rate": 9.721299913491398e-06, "loss": 0.3072201609611511, "step": 9704, "token_acc": 0.8857873924168411 }, { "epoch": 0.5236604974909621, "grad_norm": 0.36876407265663147, "learning_rate": 9.71955303433811e-06, "loss": 0.3597796559333801, "step": 9705, "token_acc": 0.8746694870438921 }, { "epoch": 0.5237144552959586, "grad_norm": 0.37025919556617737, "learning_rate": 9.717806163749599e-06, "loss": 0.3913613557815552, "step": 9706, "token_acc": 0.8636697247706422 }, { "epoch": 0.523768413100955, "grad_norm": 0.37024450302124023, "learning_rate": 9.716059301779214e-06, "loss": 0.3732629120349884, "step": 9707, "token_acc": 0.8747323340471093 }, { "epoch": 0.5238223709059515, "grad_norm": 0.37124770879745483, "learning_rate": 9.714312448480307e-06, "loss": 0.33021971583366394, "step": 9708, "token_acc": 0.8778733111072118 }, { "epoch": 0.523876328710948, "grad_norm": 0.40043216943740845, "learning_rate": 9.712565603906214e-06, "loss": 0.3827173709869385, "step": 9709, "token_acc": 0.8658020862700874 }, { "epoch": 0.5239302865159445, "grad_norm": 0.3756014108657837, "learning_rate": 9.710818768110298e-06, "loss": 0.3250836133956909, "step": 9710, "token_acc": 0.8825304701102727 }, { "epoch": 0.523984244320941, "grad_norm": 0.4349949061870575, "learning_rate": 9.7090719411459e-06, "loss": 0.3637523651123047, "step": 9711, "token_acc": 0.869140937450008 }, { "epoch": 0.5240382021259375, "grad_norm": 0.37503060698509216, "learning_rate": 9.70732512306637e-06, "loss": 0.3687725067138672, "step": 9712, "token_acc": 0.8721093202522775 }, { "epoch": 0.524092159930934, "grad_norm": 0.4193265438079834, "learning_rate": 9.705578313925053e-06, "loss": 0.34632837772369385, "step": 9713, "token_acc": 0.8753559000316355 }, { "epoch": 0.5241461177359305, "grad_norm": 0.2767987847328186, "learning_rate": 9.703831513775298e-06, "loss": 0.34259432554244995, "step": 9714, "token_acc": 0.8791160544416801 }, { "epoch": 0.524200075540927, "grad_norm": 0.45354926586151123, "learning_rate": 9.702084722670451e-06, "loss": 0.37387266755104065, "step": 9715, "token_acc": 0.8686484111875824 }, { "epoch": 0.5242540333459235, "grad_norm": 0.3662056624889374, "learning_rate": 9.700337940663855e-06, "loss": 0.3762819170951843, "step": 9716, "token_acc": 0.8697200392927309 }, { "epoch": 0.52430799115092, "grad_norm": 0.3738538920879364, "learning_rate": 9.698591167808863e-06, "loss": 0.3734833598136902, "step": 9717, "token_acc": 0.8682344599385488 }, { "epoch": 0.5243619489559165, "grad_norm": 0.41747382283210754, "learning_rate": 9.696844404158817e-06, "loss": 0.30351924896240234, "step": 9718, "token_acc": 0.8927982407916437 }, { "epoch": 0.524415906760913, "grad_norm": 0.4024118185043335, "learning_rate": 9.695097649767065e-06, "loss": 0.2838882803916931, "step": 9719, "token_acc": 0.8956237753102547 }, { "epoch": 0.5244698645659095, "grad_norm": 0.407208651304245, "learning_rate": 9.69335090468695e-06, "loss": 0.3541294038295746, "step": 9720, "token_acc": 0.8737898257349058 }, { "epoch": 0.524523822370906, "grad_norm": 0.35620278120040894, "learning_rate": 9.691604168971817e-06, "loss": 0.37833911180496216, "step": 9721, "token_acc": 0.8625893566322478 }, { "epoch": 0.5245777801759024, "grad_norm": 0.4454860985279083, "learning_rate": 9.689857442675011e-06, "loss": 0.39781227707862854, "step": 9722, "token_acc": 0.8601388273519421 }, { "epoch": 0.5246317379808989, "grad_norm": 0.3699105381965637, "learning_rate": 9.68811072584988e-06, "loss": 0.35907620191574097, "step": 9723, "token_acc": 0.8781424148606811 }, { "epoch": 0.5246856957858954, "grad_norm": 0.3456933796405792, "learning_rate": 9.686364018549765e-06, "loss": 0.31591135263442993, "step": 9724, "token_acc": 0.890856731461483 }, { "epoch": 0.5247396535908919, "grad_norm": 0.33891943097114563, "learning_rate": 9.684617320828012e-06, "loss": 0.3278574049472809, "step": 9725, "token_acc": 0.8828001267025657 }, { "epoch": 0.5247936113958884, "grad_norm": 0.3666270971298218, "learning_rate": 9.682870632737962e-06, "loss": 0.3214346170425415, "step": 9726, "token_acc": 0.8884062312762133 }, { "epoch": 0.5248475692008849, "grad_norm": 0.46187958121299744, "learning_rate": 9.681123954332959e-06, "loss": 0.3399679660797119, "step": 9727, "token_acc": 0.8798206278026905 }, { "epoch": 0.5249015270058814, "grad_norm": 0.3518005609512329, "learning_rate": 9.679377285666349e-06, "loss": 0.32264700531959534, "step": 9728, "token_acc": 0.8843373493975903 }, { "epoch": 0.524955484810878, "grad_norm": 0.3444030284881592, "learning_rate": 9.677630626791466e-06, "loss": 0.35763347148895264, "step": 9729, "token_acc": 0.8741095553917956 }, { "epoch": 0.5250094426158743, "grad_norm": 0.37617841362953186, "learning_rate": 9.675883977761666e-06, "loss": 0.3874762952327728, "step": 9730, "token_acc": 0.8632139928942334 }, { "epoch": 0.5250634004208709, "grad_norm": 0.35191842913627625, "learning_rate": 9.674137338630283e-06, "loss": 0.33045536279678345, "step": 9731, "token_acc": 0.8809213385484572 }, { "epoch": 0.5251173582258674, "grad_norm": 0.36601945757865906, "learning_rate": 9.672390709450663e-06, "loss": 0.38809671998023987, "step": 9732, "token_acc": 0.865696765660695 }, { "epoch": 0.5251713160308639, "grad_norm": 0.5195035338401794, "learning_rate": 9.670644090276142e-06, "loss": 0.39616668224334717, "step": 9733, "token_acc": 0.8649396875617955 }, { "epoch": 0.5252252738358604, "grad_norm": 0.37812134623527527, "learning_rate": 9.668897481160066e-06, "loss": 0.3879773020744324, "step": 9734, "token_acc": 0.8636714528331018 }, { "epoch": 0.5252792316408569, "grad_norm": 0.2531130611896515, "learning_rate": 9.667150882155768e-06, "loss": 0.3509378433227539, "step": 9735, "token_acc": 0.8764784646284312 }, { "epoch": 0.5253331894458534, "grad_norm": 0.43841928243637085, "learning_rate": 9.6654042933166e-06, "loss": 0.3613974452018738, "step": 9736, "token_acc": 0.8721608348680172 }, { "epoch": 0.5253871472508498, "grad_norm": 0.41752737760543823, "learning_rate": 9.663657714695897e-06, "loss": 0.3292997181415558, "step": 9737, "token_acc": 0.8825448613376835 }, { "epoch": 0.5254411050558463, "grad_norm": 0.4441292881965637, "learning_rate": 9.661911146347e-06, "loss": 0.3744218945503235, "step": 9738, "token_acc": 0.869121338912134 }, { "epoch": 0.5254950628608428, "grad_norm": 0.29384517669677734, "learning_rate": 9.660164588323247e-06, "loss": 0.3824619650840759, "step": 9739, "token_acc": 0.8680351906158358 }, { "epoch": 0.5255490206658393, "grad_norm": 0.3768838047981262, "learning_rate": 9.658418040677978e-06, "loss": 0.3760499060153961, "step": 9740, "token_acc": 0.8681598371086791 }, { "epoch": 0.5256029784708358, "grad_norm": 0.35558271408081055, "learning_rate": 9.656671503464531e-06, "loss": 0.322115033864975, "step": 9741, "token_acc": 0.8837239414735206 }, { "epoch": 0.5256569362758323, "grad_norm": 0.4146331250667572, "learning_rate": 9.654924976736248e-06, "loss": 0.3298882246017456, "step": 9742, "token_acc": 0.8799624463519313 }, { "epoch": 0.5257108940808288, "grad_norm": 0.4500432312488556, "learning_rate": 9.653178460546468e-06, "loss": 0.37264713644981384, "step": 9743, "token_acc": 0.8735544735240414 }, { "epoch": 0.5257648518858253, "grad_norm": 0.35454273223876953, "learning_rate": 9.651431954948525e-06, "loss": 0.3559325337409973, "step": 9744, "token_acc": 0.8712142261739372 }, { "epoch": 0.5258188096908217, "grad_norm": 0.3620225489139557, "learning_rate": 9.64968545999576e-06, "loss": 0.3811168074607849, "step": 9745, "token_acc": 0.8719497749348496 }, { "epoch": 0.5258727674958182, "grad_norm": 0.33330032229423523, "learning_rate": 9.647938975741508e-06, "loss": 0.3596961200237274, "step": 9746, "token_acc": 0.8729765203441738 }, { "epoch": 0.5259267253008147, "grad_norm": 0.3260866701602936, "learning_rate": 9.646192502239108e-06, "loss": 0.36915165185928345, "step": 9747, "token_acc": 0.8743861872326945 }, { "epoch": 0.5259806831058113, "grad_norm": 0.4291359484195709, "learning_rate": 9.644446039541893e-06, "loss": 0.39603298902511597, "step": 9748, "token_acc": 0.8642533936651584 }, { "epoch": 0.5260346409108078, "grad_norm": 0.38458898663520813, "learning_rate": 9.642699587703206e-06, "loss": 0.3526442348957062, "step": 9749, "token_acc": 0.8738826986653606 }, { "epoch": 0.5260885987158043, "grad_norm": 0.4224850535392761, "learning_rate": 9.64095314677638e-06, "loss": 0.4198933243751526, "step": 9750, "token_acc": 0.857563724862303 }, { "epoch": 0.5261425565208008, "grad_norm": 0.45420047640800476, "learning_rate": 9.639206716814751e-06, "loss": 0.40621110796928406, "step": 9751, "token_acc": 0.8631349782293178 }, { "epoch": 0.5261965143257972, "grad_norm": 0.34599167108535767, "learning_rate": 9.637460297871654e-06, "loss": 0.36933186650276184, "step": 9752, "token_acc": 0.8708112874779541 }, { "epoch": 0.5262504721307937, "grad_norm": 0.4017822742462158, "learning_rate": 9.635713890000425e-06, "loss": 0.3567778170108795, "step": 9753, "token_acc": 0.8768619662363456 }, { "epoch": 0.5263044299357902, "grad_norm": 0.30208733677864075, "learning_rate": 9.633967493254395e-06, "loss": 0.3197353184223175, "step": 9754, "token_acc": 0.8850540660642868 }, { "epoch": 0.5263583877407867, "grad_norm": 0.36175116896629333, "learning_rate": 9.632221107686905e-06, "loss": 0.36122941970825195, "step": 9755, "token_acc": 0.8676892201834863 }, { "epoch": 0.5264123455457832, "grad_norm": 0.44372496008872986, "learning_rate": 9.630474733351288e-06, "loss": 0.329150915145874, "step": 9756, "token_acc": 0.880635551142006 }, { "epoch": 0.5264663033507797, "grad_norm": 0.46436256170272827, "learning_rate": 9.628728370300875e-06, "loss": 0.39519643783569336, "step": 9757, "token_acc": 0.8609679446888749 }, { "epoch": 0.5265202611557762, "grad_norm": 0.4006153643131256, "learning_rate": 9.626982018588997e-06, "loss": 0.4266275465488434, "step": 9758, "token_acc": 0.8541336353340884 }, { "epoch": 0.5265742189607727, "grad_norm": 0.4058306813240051, "learning_rate": 9.625235678268993e-06, "loss": 0.3508734107017517, "step": 9759, "token_acc": 0.8797398064413772 }, { "epoch": 0.5266281767657691, "grad_norm": 0.5407326221466064, "learning_rate": 9.623489349394193e-06, "loss": 0.43641895055770874, "step": 9760, "token_acc": 0.8577777777777778 }, { "epoch": 0.5266821345707656, "grad_norm": 0.459144651889801, "learning_rate": 9.621743032017926e-06, "loss": 0.3244529068470001, "step": 9761, "token_acc": 0.8863460646358435 }, { "epoch": 0.5267360923757621, "grad_norm": 0.26020175218582153, "learning_rate": 9.61999672619353e-06, "loss": 0.3483794331550598, "step": 9762, "token_acc": 0.879972470750172 }, { "epoch": 0.5267900501807586, "grad_norm": 0.42194297909736633, "learning_rate": 9.618250431974337e-06, "loss": 0.40625864267349243, "step": 9763, "token_acc": 0.8601294847585649 }, { "epoch": 0.5268440079857551, "grad_norm": 0.4221559166908264, "learning_rate": 9.616504149413674e-06, "loss": 0.37948179244995117, "step": 9764, "token_acc": 0.8719845609520747 }, { "epoch": 0.5268979657907517, "grad_norm": 0.35830771923065186, "learning_rate": 9.614757878564874e-06, "loss": 0.2854660749435425, "step": 9765, "token_acc": 0.8958955223880597 }, { "epoch": 0.5269519235957482, "grad_norm": 0.3948439061641693, "learning_rate": 9.613011619481267e-06, "loss": 0.36013609170913696, "step": 9766, "token_acc": 0.8732624693376942 }, { "epoch": 0.5270058814007447, "grad_norm": 0.3368290066719055, "learning_rate": 9.611265372216182e-06, "loss": 0.33046215772628784, "step": 9767, "token_acc": 0.8748283303904257 }, { "epoch": 0.5270598392057411, "grad_norm": 0.3588155508041382, "learning_rate": 9.609519136822955e-06, "loss": 0.4068203866481781, "step": 9768, "token_acc": 0.8625858838226109 }, { "epoch": 0.5271137970107376, "grad_norm": 0.4050371050834656, "learning_rate": 9.607772913354909e-06, "loss": 0.3753359019756317, "step": 9769, "token_acc": 0.8653295128939829 }, { "epoch": 0.5271677548157341, "grad_norm": 0.413979709148407, "learning_rate": 9.606026701865375e-06, "loss": 0.3291614055633545, "step": 9770, "token_acc": 0.8786450662739322 }, { "epoch": 0.5272217126207306, "grad_norm": 0.38596034049987793, "learning_rate": 9.604280502407683e-06, "loss": 0.38681328296661377, "step": 9771, "token_acc": 0.8694724001219885 }, { "epoch": 0.5272756704257271, "grad_norm": 0.4136821925640106, "learning_rate": 9.602534315035161e-06, "loss": 0.35083645582199097, "step": 9772, "token_acc": 0.8746438746438746 }, { "epoch": 0.5273296282307236, "grad_norm": 0.4185906648635864, "learning_rate": 9.600788139801138e-06, "loss": 0.36621078848838806, "step": 9773, "token_acc": 0.8681234718826406 }, { "epoch": 0.5273835860357201, "grad_norm": 0.3992801010608673, "learning_rate": 9.599041976758938e-06, "loss": 0.38655322790145874, "step": 9774, "token_acc": 0.8667857598570882 }, { "epoch": 0.5274375438407165, "grad_norm": 0.29824748635292053, "learning_rate": 9.597295825961891e-06, "loss": 0.33202439546585083, "step": 9775, "token_acc": 0.8823389305704418 }, { "epoch": 0.527491501645713, "grad_norm": 0.41491127014160156, "learning_rate": 9.595549687463326e-06, "loss": 0.35645848512649536, "step": 9776, "token_acc": 0.8742216687422167 }, { "epoch": 0.5275454594507095, "grad_norm": 0.44353410601615906, "learning_rate": 9.593803561316567e-06, "loss": 0.3363412618637085, "step": 9777, "token_acc": 0.8759265795975997 }, { "epoch": 0.527599417255706, "grad_norm": 0.29729726910591125, "learning_rate": 9.592057447574942e-06, "loss": 0.3688323199748993, "step": 9778, "token_acc": 0.8700162074554295 }, { "epoch": 0.5276533750607025, "grad_norm": 0.2948402762413025, "learning_rate": 9.590311346291774e-06, "loss": 0.3365051746368408, "step": 9779, "token_acc": 0.8799304719882337 }, { "epoch": 0.527707332865699, "grad_norm": 0.35250571370124817, "learning_rate": 9.588565257520389e-06, "loss": 0.36364224553108215, "step": 9780, "token_acc": 0.8727218847236627 }, { "epoch": 0.5277612906706955, "grad_norm": 0.3813339173793793, "learning_rate": 9.586819181314114e-06, "loss": 0.38224464654922485, "step": 9781, "token_acc": 0.8710655501010685 }, { "epoch": 0.5278152484756921, "grad_norm": 0.42095866799354553, "learning_rate": 9.585073117726275e-06, "loss": 0.28522929549217224, "step": 9782, "token_acc": 0.8924185361061078 }, { "epoch": 0.5278692062806885, "grad_norm": 0.4436837136745453, "learning_rate": 9.583327066810194e-06, "loss": 0.4340060353279114, "step": 9783, "token_acc": 0.8505348366579937 }, { "epoch": 0.527923164085685, "grad_norm": 0.40345054864883423, "learning_rate": 9.581581028619193e-06, "loss": 0.35349494218826294, "step": 9784, "token_acc": 0.8748636859323882 }, { "epoch": 0.5279771218906815, "grad_norm": 0.42523884773254395, "learning_rate": 9.5798350032066e-06, "loss": 0.3745071589946747, "step": 9785, "token_acc": 0.8645336963921034 }, { "epoch": 0.528031079695678, "grad_norm": 0.4458959400653839, "learning_rate": 9.578088990625731e-06, "loss": 0.3884909749031067, "step": 9786, "token_acc": 0.8639556876519859 }, { "epoch": 0.5280850375006745, "grad_norm": 0.3656560480594635, "learning_rate": 9.576342990929918e-06, "loss": 0.3169049918651581, "step": 9787, "token_acc": 0.8853438671123266 }, { "epoch": 0.528138995305671, "grad_norm": 0.36472365260124207, "learning_rate": 9.574597004172479e-06, "loss": 0.34690484404563904, "step": 9788, "token_acc": 0.8784753816600951 }, { "epoch": 0.5281929531106675, "grad_norm": 0.4256521165370941, "learning_rate": 9.572851030406735e-06, "loss": 0.3609326183795929, "step": 9789, "token_acc": 0.8754181934044926 }, { "epoch": 0.528246910915664, "grad_norm": 0.5028364062309265, "learning_rate": 9.571105069686011e-06, "loss": 0.3362891674041748, "step": 9790, "token_acc": 0.8736487683856105 }, { "epoch": 0.5283008687206604, "grad_norm": 0.3300210237503052, "learning_rate": 9.569359122063626e-06, "loss": 0.317128986120224, "step": 9791, "token_acc": 0.8850514104930134 }, { "epoch": 0.5283548265256569, "grad_norm": 0.3833562731742859, "learning_rate": 9.567613187592899e-06, "loss": 0.336659699678421, "step": 9792, "token_acc": 0.8769805680119581 }, { "epoch": 0.5284087843306534, "grad_norm": 0.4686068296432495, "learning_rate": 9.565867266327148e-06, "loss": 0.3835316300392151, "step": 9793, "token_acc": 0.8692143727161997 }, { "epoch": 0.5284627421356499, "grad_norm": 0.3763081133365631, "learning_rate": 9.564121358319702e-06, "loss": 0.3312608599662781, "step": 9794, "token_acc": 0.8868469015795869 }, { "epoch": 0.5285166999406464, "grad_norm": 0.3672597110271454, "learning_rate": 9.562375463623873e-06, "loss": 0.3729342818260193, "step": 9795, "token_acc": 0.8709720550875786 }, { "epoch": 0.5285706577456429, "grad_norm": 0.38411790132522583, "learning_rate": 9.560629582292985e-06, "loss": 0.3335932493209839, "step": 9796, "token_acc": 0.8790215368253124 }, { "epoch": 0.5286246155506394, "grad_norm": 0.38490229845046997, "learning_rate": 9.558883714380353e-06, "loss": 0.34621497988700867, "step": 9797, "token_acc": 0.8798459563543004 }, { "epoch": 0.5286785733556358, "grad_norm": 0.3969864547252655, "learning_rate": 9.557137859939298e-06, "loss": 0.3767501413822174, "step": 9798, "token_acc": 0.8679130566955464 }, { "epoch": 0.5287325311606323, "grad_norm": 0.44379398226737976, "learning_rate": 9.555392019023135e-06, "loss": 0.38258638978004456, "step": 9799, "token_acc": 0.8666590362824769 }, { "epoch": 0.5287864889656289, "grad_norm": 0.38488656282424927, "learning_rate": 9.553646191685184e-06, "loss": 0.342751681804657, "step": 9800, "token_acc": 0.8753799392097265 }, { "epoch": 0.5288404467706254, "grad_norm": 0.38193097710609436, "learning_rate": 9.551900377978762e-06, "loss": 0.34172487258911133, "step": 9801, "token_acc": 0.877240841777085 }, { "epoch": 0.5288944045756219, "grad_norm": 0.4227403700351715, "learning_rate": 9.550154577957185e-06, "loss": 0.3957623541355133, "step": 9802, "token_acc": 0.8660113812726332 }, { "epoch": 0.5289483623806184, "grad_norm": 0.45341143012046814, "learning_rate": 9.548408791673769e-06, "loss": 0.416709303855896, "step": 9803, "token_acc": 0.858810361368724 }, { "epoch": 0.5290023201856149, "grad_norm": 0.45028945803642273, "learning_rate": 9.54666301918183e-06, "loss": 0.36626267433166504, "step": 9804, "token_acc": 0.871331828442438 }, { "epoch": 0.5290562779906114, "grad_norm": 0.3519997298717499, "learning_rate": 9.544917260534686e-06, "loss": 0.3508421778678894, "step": 9805, "token_acc": 0.876278233509383 }, { "epoch": 0.5291102357956078, "grad_norm": 0.3815615177154541, "learning_rate": 9.543171515785646e-06, "loss": 0.3670179843902588, "step": 9806, "token_acc": 0.8700929752066116 }, { "epoch": 0.5291641936006043, "grad_norm": 0.3552888035774231, "learning_rate": 9.54142578498803e-06, "loss": 0.37319332361221313, "step": 9807, "token_acc": 0.8707439290761917 }, { "epoch": 0.5292181514056008, "grad_norm": 0.3945486843585968, "learning_rate": 9.53968006819515e-06, "loss": 0.34987443685531616, "step": 9808, "token_acc": 0.8764867337602927 }, { "epoch": 0.5292721092105973, "grad_norm": 0.4064342677593231, "learning_rate": 9.537934365460319e-06, "loss": 0.348257452249527, "step": 9809, "token_acc": 0.8817755640199239 }, { "epoch": 0.5293260670155938, "grad_norm": 0.38827595114707947, "learning_rate": 9.536188676836854e-06, "loss": 0.3593646287918091, "step": 9810, "token_acc": 0.8707174231332357 }, { "epoch": 0.5293800248205903, "grad_norm": 0.3830929696559906, "learning_rate": 9.534443002378064e-06, "loss": 0.36414605379104614, "step": 9811, "token_acc": 0.8677471235927254 }, { "epoch": 0.5294339826255868, "grad_norm": 0.4397636353969574, "learning_rate": 9.53269734213726e-06, "loss": 0.373582661151886, "step": 9812, "token_acc": 0.8638838475499092 }, { "epoch": 0.5294879404305833, "grad_norm": 0.37710410356521606, "learning_rate": 9.530951696167758e-06, "loss": 0.33191823959350586, "step": 9813, "token_acc": 0.8801578354002255 }, { "epoch": 0.5295418982355797, "grad_norm": 0.49034321308135986, "learning_rate": 9.529206064522874e-06, "loss": 0.4013388752937317, "step": 9814, "token_acc": 0.8588333565362661 }, { "epoch": 0.5295958560405762, "grad_norm": 0.36034879088401794, "learning_rate": 9.527460447255909e-06, "loss": 0.3042488098144531, "step": 9815, "token_acc": 0.8883185175910855 }, { "epoch": 0.5296498138455727, "grad_norm": 0.36341166496276855, "learning_rate": 9.525714844420179e-06, "loss": 0.3405930697917938, "step": 9816, "token_acc": 0.880453972257251 }, { "epoch": 0.5297037716505693, "grad_norm": 0.34992945194244385, "learning_rate": 9.523969256068993e-06, "loss": 0.32338830828666687, "step": 9817, "token_acc": 0.8830486202365309 }, { "epoch": 0.5297577294555658, "grad_norm": 0.36012548208236694, "learning_rate": 9.522223682255662e-06, "loss": 0.29603517055511475, "step": 9818, "token_acc": 0.890466816647919 }, { "epoch": 0.5298116872605623, "grad_norm": 0.31399521231651306, "learning_rate": 9.520478123033488e-06, "loss": 0.3330879807472229, "step": 9819, "token_acc": 0.8790027177123951 }, { "epoch": 0.5298656450655588, "grad_norm": 0.3896551728248596, "learning_rate": 9.518732578455795e-06, "loss": 0.32455453276634216, "step": 9820, "token_acc": 0.8878458498023716 }, { "epoch": 0.5299196028705552, "grad_norm": 0.37498733401298523, "learning_rate": 9.51698704857588e-06, "loss": 0.39840400218963623, "step": 9821, "token_acc": 0.8645806617081303 }, { "epoch": 0.5299735606755517, "grad_norm": 0.486857533454895, "learning_rate": 9.515241533447054e-06, "loss": 0.3826802372932434, "step": 9822, "token_acc": 0.8674077654261317 }, { "epoch": 0.5300275184805482, "grad_norm": 0.4222123324871063, "learning_rate": 9.513496033122624e-06, "loss": 0.34785133600234985, "step": 9823, "token_acc": 0.8740359897172236 }, { "epoch": 0.5300814762855447, "grad_norm": 0.3936317265033722, "learning_rate": 9.511750547655899e-06, "loss": 0.41286417841911316, "step": 9824, "token_acc": 0.8582840236686391 }, { "epoch": 0.5301354340905412, "grad_norm": 0.36024436354637146, "learning_rate": 9.510005077100182e-06, "loss": 0.34964796900749207, "step": 9825, "token_acc": 0.8760539629005059 }, { "epoch": 0.5301893918955377, "grad_norm": 0.4198608100414276, "learning_rate": 9.508259621508785e-06, "loss": 0.4268019497394562, "step": 9826, "token_acc": 0.8502169869807812 }, { "epoch": 0.5302433497005342, "grad_norm": 0.33234602212905884, "learning_rate": 9.506514180935009e-06, "loss": 0.3783043920993805, "step": 9827, "token_acc": 0.869334679964651 }, { "epoch": 0.5302973075055307, "grad_norm": 0.35068267583847046, "learning_rate": 9.50476875543216e-06, "loss": 0.33933699131011963, "step": 9828, "token_acc": 0.8820032694308219 }, { "epoch": 0.5303512653105271, "grad_norm": 0.3722977340221405, "learning_rate": 9.503023345053543e-06, "loss": 0.39398229122161865, "step": 9829, "token_acc": 0.8618603042876902 }, { "epoch": 0.5304052231155236, "grad_norm": 0.3999224901199341, "learning_rate": 9.501277949852466e-06, "loss": 0.33011311292648315, "step": 9830, "token_acc": 0.8843891782893631 }, { "epoch": 0.5304591809205201, "grad_norm": 0.43877506256103516, "learning_rate": 9.499532569882227e-06, "loss": 0.3735334277153015, "step": 9831, "token_acc": 0.8668012108980827 }, { "epoch": 0.5305131387255166, "grad_norm": 0.4382437765598297, "learning_rate": 9.49778720519613e-06, "loss": 0.36958932876586914, "step": 9832, "token_acc": 0.8701388888888889 }, { "epoch": 0.5305670965305131, "grad_norm": 0.3823903203010559, "learning_rate": 9.496041855847485e-06, "loss": 0.3363814055919647, "step": 9833, "token_acc": 0.8814432989690721 }, { "epoch": 0.5306210543355097, "grad_norm": 0.47778141498565674, "learning_rate": 9.494296521889586e-06, "loss": 0.37979260087013245, "step": 9834, "token_acc": 0.8678589170392449 }, { "epoch": 0.5306750121405062, "grad_norm": 0.36190029978752136, "learning_rate": 9.492551203375742e-06, "loss": 0.3948628604412079, "step": 9835, "token_acc": 0.8721031538595574 }, { "epoch": 0.5307289699455027, "grad_norm": 0.4690110385417938, "learning_rate": 9.49080590035925e-06, "loss": 0.39389538764953613, "step": 9836, "token_acc": 0.8666464615850886 }, { "epoch": 0.5307829277504991, "grad_norm": 0.43421733379364014, "learning_rate": 9.48906061289341e-06, "loss": 0.3499988615512848, "step": 9837, "token_acc": 0.8730243060167353 }, { "epoch": 0.5308368855554956, "grad_norm": 0.4718198776245117, "learning_rate": 9.487315341031527e-06, "loss": 0.3921095132827759, "step": 9838, "token_acc": 0.8632025450689289 }, { "epoch": 0.5308908433604921, "grad_norm": 0.37406492233276367, "learning_rate": 9.4855700848269e-06, "loss": 0.37398117780685425, "step": 9839, "token_acc": 0.8656059099303879 }, { "epoch": 0.5309448011654886, "grad_norm": 0.4834464490413666, "learning_rate": 9.483824844332828e-06, "loss": 0.40450721979141235, "step": 9840, "token_acc": 0.863441603317208 }, { "epoch": 0.5309987589704851, "grad_norm": 0.42365211248397827, "learning_rate": 9.482079619602608e-06, "loss": 0.3443530201911926, "step": 9841, "token_acc": 0.87951607557176 }, { "epoch": 0.5310527167754816, "grad_norm": 0.3645905554294586, "learning_rate": 9.480334410689543e-06, "loss": 0.35251516103744507, "step": 9842, "token_acc": 0.8742593031524057 }, { "epoch": 0.5311066745804781, "grad_norm": 0.3381368815898895, "learning_rate": 9.478589217646928e-06, "loss": 0.35970932245254517, "step": 9843, "token_acc": 0.8741712894060344 }, { "epoch": 0.5311606323854745, "grad_norm": 0.32672643661499023, "learning_rate": 9.476844040528057e-06, "loss": 0.34019768238067627, "step": 9844, "token_acc": 0.8806060606060606 }, { "epoch": 0.531214590190471, "grad_norm": 0.39217111468315125, "learning_rate": 9.475098879386238e-06, "loss": 0.3728669285774231, "step": 9845, "token_acc": 0.8718045586198212 }, { "epoch": 0.5312685479954675, "grad_norm": 0.3835601210594177, "learning_rate": 9.473353734274759e-06, "loss": 0.3486858308315277, "step": 9846, "token_acc": 0.8738724226804123 }, { "epoch": 0.531322505800464, "grad_norm": 0.344508558511734, "learning_rate": 9.471608605246921e-06, "loss": 0.38871490955352783, "step": 9847, "token_acc": 0.8668142168988413 }, { "epoch": 0.5313764636054605, "grad_norm": 0.3942199945449829, "learning_rate": 9.469863492356018e-06, "loss": 0.35214686393737793, "step": 9848, "token_acc": 0.877420253517203 }, { "epoch": 0.531430421410457, "grad_norm": 0.3394049108028412, "learning_rate": 9.468118395655345e-06, "loss": 0.3262254297733307, "step": 9849, "token_acc": 0.882313681868743 }, { "epoch": 0.5314843792154536, "grad_norm": 0.4310115873813629, "learning_rate": 9.466373315198198e-06, "loss": 0.32531023025512695, "step": 9850, "token_acc": 0.8862791483609328 }, { "epoch": 0.5315383370204501, "grad_norm": 0.3715217709541321, "learning_rate": 9.464628251037862e-06, "loss": 0.33104628324508667, "step": 9851, "token_acc": 0.8792162351294611 }, { "epoch": 0.5315922948254465, "grad_norm": 0.3200223743915558, "learning_rate": 9.462883203227647e-06, "loss": 0.31403905153274536, "step": 9852, "token_acc": 0.8874129481557905 }, { "epoch": 0.531646252630443, "grad_norm": 0.3704315423965454, "learning_rate": 9.461138171820835e-06, "loss": 0.36129701137542725, "step": 9853, "token_acc": 0.8765501691093573 }, { "epoch": 0.5317002104354395, "grad_norm": 0.36786314845085144, "learning_rate": 9.459393156870726e-06, "loss": 0.3325009047985077, "step": 9854, "token_acc": 0.8817845978971298 }, { "epoch": 0.531754168240436, "grad_norm": 0.37567898631095886, "learning_rate": 9.457648158430605e-06, "loss": 0.29752665758132935, "step": 9855, "token_acc": 0.8954430073733833 }, { "epoch": 0.5318081260454325, "grad_norm": 0.35624808073043823, "learning_rate": 9.455903176553767e-06, "loss": 0.3478887975215912, "step": 9856, "token_acc": 0.8780841799709724 }, { "epoch": 0.531862083850429, "grad_norm": 0.39335376024246216, "learning_rate": 9.454158211293504e-06, "loss": 0.3441973924636841, "step": 9857, "token_acc": 0.8776581426648722 }, { "epoch": 0.5319160416554255, "grad_norm": 0.44161972403526306, "learning_rate": 9.452413262703106e-06, "loss": 0.35692426562309265, "step": 9858, "token_acc": 0.8724555160142349 }, { "epoch": 0.531969999460422, "grad_norm": 0.4005189836025238, "learning_rate": 9.450668330835865e-06, "loss": 0.32561105489730835, "step": 9859, "token_acc": 0.8805549969837121 }, { "epoch": 0.5320239572654184, "grad_norm": 0.4297999441623688, "learning_rate": 9.44892341574507e-06, "loss": 0.3526398837566376, "step": 9860, "token_acc": 0.8792031647358011 }, { "epoch": 0.5320779150704149, "grad_norm": 0.3314369022846222, "learning_rate": 9.447178517484006e-06, "loss": 0.38367927074432373, "step": 9861, "token_acc": 0.867254556143445 }, { "epoch": 0.5321318728754114, "grad_norm": 0.3780372738838196, "learning_rate": 9.445433636105968e-06, "loss": 0.33252209424972534, "step": 9862, "token_acc": 0.8803443747263971 }, { "epoch": 0.5321858306804079, "grad_norm": 0.392716646194458, "learning_rate": 9.443688771664241e-06, "loss": 0.43140435218811035, "step": 9863, "token_acc": 0.8501769128704113 }, { "epoch": 0.5322397884854044, "grad_norm": 0.3215444087982178, "learning_rate": 9.441943924212112e-06, "loss": 0.3358253836631775, "step": 9864, "token_acc": 0.8828446662507798 }, { "epoch": 0.5322937462904009, "grad_norm": 0.3787553906440735, "learning_rate": 9.44019909380287e-06, "loss": 0.3971174657344818, "step": 9865, "token_acc": 0.8597869507323569 }, { "epoch": 0.5323477040953974, "grad_norm": 0.31877002120018005, "learning_rate": 9.438454280489802e-06, "loss": 0.3390626013278961, "step": 9866, "token_acc": 0.8802511586186276 }, { "epoch": 0.5324016619003938, "grad_norm": 0.4080777168273926, "learning_rate": 9.436709484326192e-06, "loss": 0.3946935534477234, "step": 9867, "token_acc": 0.8616564816018724 }, { "epoch": 0.5324556197053903, "grad_norm": 0.4550364017486572, "learning_rate": 9.434964705365328e-06, "loss": 0.3961317539215088, "step": 9868, "token_acc": 0.8626349892008639 }, { "epoch": 0.5325095775103869, "grad_norm": 0.46579861640930176, "learning_rate": 9.433219943660494e-06, "loss": 0.3612608313560486, "step": 9869, "token_acc": 0.8733660130718954 }, { "epoch": 0.5325635353153834, "grad_norm": 0.4002545177936554, "learning_rate": 9.43147519926497e-06, "loss": 0.2960110604763031, "step": 9870, "token_acc": 0.8979749574895656 }, { "epoch": 0.5326174931203799, "grad_norm": 0.4985225200653076, "learning_rate": 9.429730472232046e-06, "loss": 0.4033685028553009, "step": 9871, "token_acc": 0.8629362448820432 }, { "epoch": 0.5326714509253764, "grad_norm": 0.5346928834915161, "learning_rate": 9.427985762615007e-06, "loss": 0.3330317735671997, "step": 9872, "token_acc": 0.8852672750977836 }, { "epoch": 0.5327254087303729, "grad_norm": 0.45455315709114075, "learning_rate": 9.426241070467134e-06, "loss": 0.34118539094924927, "step": 9873, "token_acc": 0.8774666416087201 }, { "epoch": 0.5327793665353694, "grad_norm": 0.3663916289806366, "learning_rate": 9.424496395841707e-06, "loss": 0.3404601514339447, "step": 9874, "token_acc": 0.8798140770252324 }, { "epoch": 0.5328333243403658, "grad_norm": 0.4285067617893219, "learning_rate": 9.422751738792008e-06, "loss": 0.37541916966438293, "step": 9875, "token_acc": 0.8649035025017869 }, { "epoch": 0.5328872821453623, "grad_norm": 0.3628186583518982, "learning_rate": 9.421007099371323e-06, "loss": 0.30787086486816406, "step": 9876, "token_acc": 0.8878977499667154 }, { "epoch": 0.5329412399503588, "grad_norm": 0.4183782637119293, "learning_rate": 9.419262477632922e-06, "loss": 0.3992452621459961, "step": 9877, "token_acc": 0.8635751178980463 }, { "epoch": 0.5329951977553553, "grad_norm": 0.3909640908241272, "learning_rate": 9.417517873630098e-06, "loss": 0.3850044906139374, "step": 9878, "token_acc": 0.8647731332618792 }, { "epoch": 0.5330491555603518, "grad_norm": 0.38843247294425964, "learning_rate": 9.415773287416126e-06, "loss": 0.3875504434108734, "step": 9879, "token_acc": 0.8663538408063955 }, { "epoch": 0.5331031133653483, "grad_norm": 0.4333968162536621, "learning_rate": 9.414028719044283e-06, "loss": 0.35464102029800415, "step": 9880, "token_acc": 0.8756964712125233 }, { "epoch": 0.5331570711703448, "grad_norm": 0.44362351298332214, "learning_rate": 9.412284168567852e-06, "loss": 0.3384626507759094, "step": 9881, "token_acc": 0.8801542568970632 }, { "epoch": 0.5332110289753412, "grad_norm": 0.29849499464035034, "learning_rate": 9.410539636040105e-06, "loss": 0.3269598186016083, "step": 9882, "token_acc": 0.8833379083173208 }, { "epoch": 0.5332649867803377, "grad_norm": 0.31104180216789246, "learning_rate": 9.408795121514325e-06, "loss": 0.38114872574806213, "step": 9883, "token_acc": 0.8685915653063544 }, { "epoch": 0.5333189445853342, "grad_norm": 0.37676262855529785, "learning_rate": 9.407050625043786e-06, "loss": 0.3512580394744873, "step": 9884, "token_acc": 0.8762029032784211 }, { "epoch": 0.5333729023903307, "grad_norm": 0.4020073115825653, "learning_rate": 9.405306146681767e-06, "loss": 0.36293867230415344, "step": 9885, "token_acc": 0.8727487452022439 }, { "epoch": 0.5334268601953273, "grad_norm": 0.42032188177108765, "learning_rate": 9.403561686481543e-06, "loss": 0.3640511631965637, "step": 9886, "token_acc": 0.8755286157316041 }, { "epoch": 0.5334808180003238, "grad_norm": 0.444686621427536, "learning_rate": 9.401817244496389e-06, "loss": 0.37054383754730225, "step": 9887, "token_acc": 0.8698664027709055 }, { "epoch": 0.5335347758053203, "grad_norm": 0.4924299716949463, "learning_rate": 9.40007282077958e-06, "loss": 0.3970838785171509, "step": 9888, "token_acc": 0.8630114830988194 }, { "epoch": 0.5335887336103168, "grad_norm": 0.35395047068595886, "learning_rate": 9.398328415384386e-06, "loss": 0.3718075156211853, "step": 9889, "token_acc": 0.8698562709194724 }, { "epoch": 0.5336426914153132, "grad_norm": 0.39834263920783997, "learning_rate": 9.396584028364086e-06, "loss": 0.36009520292282104, "step": 9890, "token_acc": 0.8704551961064987 }, { "epoch": 0.5336966492203097, "grad_norm": 0.34527677297592163, "learning_rate": 9.394839659771954e-06, "loss": 0.3746914267539978, "step": 9891, "token_acc": 0.8727140783744557 }, { "epoch": 0.5337506070253062, "grad_norm": 0.3659760057926178, "learning_rate": 9.39309530966126e-06, "loss": 0.31477898359298706, "step": 9892, "token_acc": 0.8841348622283074 }, { "epoch": 0.5338045648303027, "grad_norm": 0.43913546204566956, "learning_rate": 9.391350978085274e-06, "loss": 0.3867829144001007, "step": 9893, "token_acc": 0.8653669020571078 }, { "epoch": 0.5338585226352992, "grad_norm": 0.37214580178260803, "learning_rate": 9.389606665097272e-06, "loss": 0.3693215847015381, "step": 9894, "token_acc": 0.8734497595545432 }, { "epoch": 0.5339124804402957, "grad_norm": 0.44946956634521484, "learning_rate": 9.387862370750522e-06, "loss": 0.34788018465042114, "step": 9895, "token_acc": 0.8800904977375565 }, { "epoch": 0.5339664382452922, "grad_norm": 0.38839805126190186, "learning_rate": 9.38611809509829e-06, "loss": 0.35717809200286865, "step": 9896, "token_acc": 0.8765634306134604 }, { "epoch": 0.5340203960502887, "grad_norm": 0.42150095105171204, "learning_rate": 9.384373838193858e-06, "loss": 0.3169167935848236, "step": 9897, "token_acc": 0.8887636226982337 }, { "epoch": 0.5340743538552851, "grad_norm": 0.49896377325057983, "learning_rate": 9.382629600090485e-06, "loss": 0.34797143936157227, "step": 9898, "token_acc": 0.8788009488893681 }, { "epoch": 0.5341283116602816, "grad_norm": 0.44589075446128845, "learning_rate": 9.380885380841442e-06, "loss": 0.36661458015441895, "step": 9899, "token_acc": 0.8718400940623163 }, { "epoch": 0.5341822694652781, "grad_norm": 0.4152432382106781, "learning_rate": 9.379141180499997e-06, "loss": 0.377224862575531, "step": 9900, "token_acc": 0.8678219503907577 }, { "epoch": 0.5342362272702746, "grad_norm": 0.3156057894229889, "learning_rate": 9.377396999119419e-06, "loss": 0.37070131301879883, "step": 9901, "token_acc": 0.8744126441691584 }, { "epoch": 0.5342901850752712, "grad_norm": 0.36099448800086975, "learning_rate": 9.375652836752969e-06, "loss": 0.2991384267807007, "step": 9902, "token_acc": 0.8896146309601568 }, { "epoch": 0.5343441428802677, "grad_norm": 0.386928528547287, "learning_rate": 9.373908693453921e-06, "loss": 0.3579540252685547, "step": 9903, "token_acc": 0.8753060225232577 }, { "epoch": 0.5343981006852642, "grad_norm": 0.47486433386802673, "learning_rate": 9.372164569275538e-06, "loss": 0.370140016078949, "step": 9904, "token_acc": 0.8669354838709677 }, { "epoch": 0.5344520584902606, "grad_norm": 0.4004264771938324, "learning_rate": 9.370420464271085e-06, "loss": 0.38820117712020874, "step": 9905, "token_acc": 0.8637140228676567 }, { "epoch": 0.5345060162952571, "grad_norm": 0.4631018042564392, "learning_rate": 9.368676378493826e-06, "loss": 0.3501308858394623, "step": 9906, "token_acc": 0.8753507605966623 }, { "epoch": 0.5345599741002536, "grad_norm": 0.36578699946403503, "learning_rate": 9.366932311997026e-06, "loss": 0.29285669326782227, "step": 9907, "token_acc": 0.8951019252548131 }, { "epoch": 0.5346139319052501, "grad_norm": 0.36381256580352783, "learning_rate": 9.365188264833948e-06, "loss": 0.38580450415611267, "step": 9908, "token_acc": 0.8647315767225588 }, { "epoch": 0.5346678897102466, "grad_norm": 0.3518417477607727, "learning_rate": 9.363444237057847e-06, "loss": 0.3769243657588959, "step": 9909, "token_acc": 0.8711826762909495 }, { "epoch": 0.5347218475152431, "grad_norm": 0.4944736063480377, "learning_rate": 9.361700228721997e-06, "loss": 0.369806170463562, "step": 9910, "token_acc": 0.8657889925031999 }, { "epoch": 0.5347758053202396, "grad_norm": 0.4228651523590088, "learning_rate": 9.359956239879656e-06, "loss": 0.4401385188102722, "step": 9911, "token_acc": 0.8483917892354937 }, { "epoch": 0.5348297631252361, "grad_norm": 0.4519957900047302, "learning_rate": 9.358212270584083e-06, "loss": 0.359195351600647, "step": 9912, "token_acc": 0.8704469646430953 }, { "epoch": 0.5348837209302325, "grad_norm": 0.46873238682746887, "learning_rate": 9.356468320888539e-06, "loss": 0.38927334547042847, "step": 9913, "token_acc": 0.8623388581952118 }, { "epoch": 0.534937678735229, "grad_norm": 0.4312695264816284, "learning_rate": 9.354724390846283e-06, "loss": 0.34083452820777893, "step": 9914, "token_acc": 0.8764293618590926 }, { "epoch": 0.5349916365402255, "grad_norm": 0.39243829250335693, "learning_rate": 9.352980480510575e-06, "loss": 0.30548524856567383, "step": 9915, "token_acc": 0.8878974691758599 }, { "epoch": 0.535045594345222, "grad_norm": 0.3940754234790802, "learning_rate": 9.351236589934675e-06, "loss": 0.34919440746307373, "step": 9916, "token_acc": 0.876977422822915 }, { "epoch": 0.5350995521502185, "grad_norm": 0.48782604932785034, "learning_rate": 9.34949271917184e-06, "loss": 0.423900306224823, "step": 9917, "token_acc": 0.8508557457212714 }, { "epoch": 0.535153509955215, "grad_norm": 0.3875187635421753, "learning_rate": 9.347748868275326e-06, "loss": 0.37801700830459595, "step": 9918, "token_acc": 0.8644192011725907 }, { "epoch": 0.5352074677602116, "grad_norm": 0.42085063457489014, "learning_rate": 9.346005037298391e-06, "loss": 0.3618379533290863, "step": 9919, "token_acc": 0.8717551336691205 }, { "epoch": 0.5352614255652081, "grad_norm": 0.34055814146995544, "learning_rate": 9.34426122629429e-06, "loss": 0.3809276223182678, "step": 9920, "token_acc": 0.8668975754576942 }, { "epoch": 0.5353153833702045, "grad_norm": 0.4160059988498688, "learning_rate": 9.34251743531628e-06, "loss": 0.3717234432697296, "step": 9921, "token_acc": 0.8692985818321196 }, { "epoch": 0.535369341175201, "grad_norm": 0.3614484965801239, "learning_rate": 9.340773664417614e-06, "loss": 0.3486550450325012, "step": 9922, "token_acc": 0.8738202410338318 }, { "epoch": 0.5354232989801975, "grad_norm": 0.5191227197647095, "learning_rate": 9.339029913651549e-06, "loss": 0.32943758368492126, "step": 9923, "token_acc": 0.8822310156397212 }, { "epoch": 0.535477256785194, "grad_norm": 0.41476529836654663, "learning_rate": 9.337286183071338e-06, "loss": 0.33490827679634094, "step": 9924, "token_acc": 0.8800743281875357 }, { "epoch": 0.5355312145901905, "grad_norm": 0.40696704387664795, "learning_rate": 9.335542472730232e-06, "loss": 0.39099931716918945, "step": 9925, "token_acc": 0.8653487504662439 }, { "epoch": 0.535585172395187, "grad_norm": 0.41585907340049744, "learning_rate": 9.333798782681486e-06, "loss": 0.3419896364212036, "step": 9926, "token_acc": 0.8787291747384735 }, { "epoch": 0.5356391302001835, "grad_norm": 0.44411084055900574, "learning_rate": 9.33205511297835e-06, "loss": 0.39577099680900574, "step": 9927, "token_acc": 0.8645728643216081 }, { "epoch": 0.5356930880051799, "grad_norm": 0.3570024073123932, "learning_rate": 9.330311463674071e-06, "loss": 0.3695099949836731, "step": 9928, "token_acc": 0.8691068814055637 }, { "epoch": 0.5357470458101764, "grad_norm": 0.40618306398391724, "learning_rate": 9.32856783482191e-06, "loss": 0.4110183119773865, "step": 9929, "token_acc": 0.8598474888246121 }, { "epoch": 0.5358010036151729, "grad_norm": 0.45238590240478516, "learning_rate": 9.326824226475112e-06, "loss": 0.3135242462158203, "step": 9930, "token_acc": 0.8886061080657792 }, { "epoch": 0.5358549614201694, "grad_norm": 0.411298006772995, "learning_rate": 9.325080638686927e-06, "loss": 0.37294459342956543, "step": 9931, "token_acc": 0.8748790266832573 }, { "epoch": 0.5359089192251659, "grad_norm": 0.36498838663101196, "learning_rate": 9.3233370715106e-06, "loss": 0.29407429695129395, "step": 9932, "token_acc": 0.8908647640900617 }, { "epoch": 0.5359628770301624, "grad_norm": 0.301851361989975, "learning_rate": 9.321593524999382e-06, "loss": 0.3601571321487427, "step": 9933, "token_acc": 0.8739782016348774 }, { "epoch": 0.5360168348351589, "grad_norm": 0.342771053314209, "learning_rate": 9.31984999920652e-06, "loss": 0.3118269443511963, "step": 9934, "token_acc": 0.8869415807560137 }, { "epoch": 0.5360707926401554, "grad_norm": 0.26805776357650757, "learning_rate": 9.318106494185257e-06, "loss": 0.2782362103462219, "step": 9935, "token_acc": 0.8966942148760331 }, { "epoch": 0.5361247504451518, "grad_norm": 0.404983252286911, "learning_rate": 9.316363009988846e-06, "loss": 0.3659464418888092, "step": 9936, "token_acc": 0.8706885739370375 }, { "epoch": 0.5361787082501484, "grad_norm": 0.35882893204689026, "learning_rate": 9.31461954667053e-06, "loss": 0.32381951808929443, "step": 9937, "token_acc": 0.8840227944280287 }, { "epoch": 0.5362326660551449, "grad_norm": 0.4379163980484009, "learning_rate": 9.312876104283555e-06, "loss": 0.3048238158226013, "step": 9938, "token_acc": 0.8932876916207769 }, { "epoch": 0.5362866238601414, "grad_norm": 0.3006640672683716, "learning_rate": 9.311132682881163e-06, "loss": 0.35407641530036926, "step": 9939, "token_acc": 0.8742393509127789 }, { "epoch": 0.5363405816651379, "grad_norm": 0.3555561304092407, "learning_rate": 9.309389282516597e-06, "loss": 0.31932592391967773, "step": 9940, "token_acc": 0.8851302272491439 }, { "epoch": 0.5363945394701344, "grad_norm": 0.4420876204967499, "learning_rate": 9.307645903243099e-06, "loss": 0.3838924765586853, "step": 9941, "token_acc": 0.8680312864037277 }, { "epoch": 0.5364484972751309, "grad_norm": 0.45905980467796326, "learning_rate": 9.305902545113919e-06, "loss": 0.3577667474746704, "step": 9942, "token_acc": 0.8758643953449148 }, { "epoch": 0.5365024550801274, "grad_norm": 0.4133666753768921, "learning_rate": 9.30415920818229e-06, "loss": 0.42852458357810974, "step": 9943, "token_acc": 0.8550074738415545 }, { "epoch": 0.5365564128851238, "grad_norm": 0.4580537676811218, "learning_rate": 9.30241589250146e-06, "loss": 0.3806140422821045, "step": 9944, "token_acc": 0.8702995931451116 }, { "epoch": 0.5366103706901203, "grad_norm": 0.4067341983318329, "learning_rate": 9.300672598124663e-06, "loss": 0.3335493206977844, "step": 9945, "token_acc": 0.8831454092839361 }, { "epoch": 0.5366643284951168, "grad_norm": 0.4704335927963257, "learning_rate": 9.298929325105143e-06, "loss": 0.38542044162750244, "step": 9946, "token_acc": 0.8631595428213559 }, { "epoch": 0.5367182863001133, "grad_norm": 0.3393911123275757, "learning_rate": 9.297186073496135e-06, "loss": 0.39457058906555176, "step": 9947, "token_acc": 0.8641337759744252 }, { "epoch": 0.5367722441051098, "grad_norm": 0.29996272921562195, "learning_rate": 9.295442843350882e-06, "loss": 0.3656368851661682, "step": 9948, "token_acc": 0.8739079410614161 }, { "epoch": 0.5368262019101063, "grad_norm": 0.4436246156692505, "learning_rate": 9.293699634722622e-06, "loss": 0.38695651292800903, "step": 9949, "token_acc": 0.865379113018598 }, { "epoch": 0.5368801597151028, "grad_norm": 0.47647523880004883, "learning_rate": 9.291956447664588e-06, "loss": 0.3877200484275818, "step": 9950, "token_acc": 0.8666305965102123 }, { "epoch": 0.5369341175200992, "grad_norm": 0.40964964032173157, "learning_rate": 9.29021328223002e-06, "loss": 0.2913209795951843, "step": 9951, "token_acc": 0.8930927435001941 }, { "epoch": 0.5369880753250957, "grad_norm": 0.36132916808128357, "learning_rate": 9.28847013847215e-06, "loss": 0.3126640021800995, "step": 9952, "token_acc": 0.887685364281109 }, { "epoch": 0.5370420331300922, "grad_norm": 0.45230481028556824, "learning_rate": 9.286727016444217e-06, "loss": 0.289821982383728, "step": 9953, "token_acc": 0.895150999574649 }, { "epoch": 0.5370959909350888, "grad_norm": 0.4881933033466339, "learning_rate": 9.284983916199452e-06, "loss": 0.3876730501651764, "step": 9954, "token_acc": 0.8639509590666403 }, { "epoch": 0.5371499487400853, "grad_norm": 0.3646411597728729, "learning_rate": 9.283240837791095e-06, "loss": 0.3270186185836792, "step": 9955, "token_acc": 0.8802960694231751 }, { "epoch": 0.5372039065450818, "grad_norm": 0.37833330035209656, "learning_rate": 9.281497781272373e-06, "loss": 0.34661081433296204, "step": 9956, "token_acc": 0.8743002544529263 }, { "epoch": 0.5372578643500783, "grad_norm": 0.36476194858551025, "learning_rate": 9.27975474669652e-06, "loss": 0.3397181034088135, "step": 9957, "token_acc": 0.8793519476042744 }, { "epoch": 0.5373118221550748, "grad_norm": 0.486562043428421, "learning_rate": 9.27801173411677e-06, "loss": 0.3970758020877838, "step": 9958, "token_acc": 0.8595401691331924 }, { "epoch": 0.5373657799600712, "grad_norm": 0.3212883472442627, "learning_rate": 9.276268743586352e-06, "loss": 0.3920382857322693, "step": 9959, "token_acc": 0.8625890238793464 }, { "epoch": 0.5374197377650677, "grad_norm": 0.38631966710090637, "learning_rate": 9.274525775158492e-06, "loss": 0.3592812418937683, "step": 9960, "token_acc": 0.879386658235852 }, { "epoch": 0.5374736955700642, "grad_norm": 0.3102160096168518, "learning_rate": 9.272782828886431e-06, "loss": 0.40400010347366333, "step": 9961, "token_acc": 0.859346348829684 }, { "epoch": 0.5375276533750607, "grad_norm": 0.4004240036010742, "learning_rate": 9.27103990482339e-06, "loss": 0.40104907751083374, "step": 9962, "token_acc": 0.8573252020989931 }, { "epoch": 0.5375816111800572, "grad_norm": 0.4802926778793335, "learning_rate": 9.2692970030226e-06, "loss": 0.3773207664489746, "step": 9963, "token_acc": 0.8700622380287057 }, { "epoch": 0.5376355689850537, "grad_norm": 0.29748445749282837, "learning_rate": 9.267554123537287e-06, "loss": 0.366729736328125, "step": 9964, "token_acc": 0.8720043572984749 }, { "epoch": 0.5376895267900502, "grad_norm": 0.40935996174812317, "learning_rate": 9.26581126642068e-06, "loss": 0.36153310537338257, "step": 9965, "token_acc": 0.8722822174226061 }, { "epoch": 0.5377434845950467, "grad_norm": 0.370435893535614, "learning_rate": 9.264068431726008e-06, "loss": 0.33026114106178284, "step": 9966, "token_acc": 0.884710797754276 }, { "epoch": 0.5377974424000431, "grad_norm": 0.29203540086746216, "learning_rate": 9.262325619506485e-06, "loss": 0.32153189182281494, "step": 9967, "token_acc": 0.8829918262232672 }, { "epoch": 0.5378514002050396, "grad_norm": 0.3548729419708252, "learning_rate": 9.260582829815349e-06, "loss": 0.3626081347465515, "step": 9968, "token_acc": 0.8721815965874467 }, { "epoch": 0.5379053580100361, "grad_norm": 0.320119708776474, "learning_rate": 9.25884006270582e-06, "loss": 0.27924418449401855, "step": 9969, "token_acc": 0.8983084776298669 }, { "epoch": 0.5379593158150326, "grad_norm": 0.4028749465942383, "learning_rate": 9.257097318231122e-06, "loss": 0.396967351436615, "step": 9970, "token_acc": 0.8621376135373896 }, { "epoch": 0.5380132736200292, "grad_norm": 0.44992440938949585, "learning_rate": 9.255354596444476e-06, "loss": 0.37323302030563354, "step": 9971, "token_acc": 0.8669855029143626 }, { "epoch": 0.5380672314250257, "grad_norm": 0.40120062232017517, "learning_rate": 9.253611897399106e-06, "loss": 0.39488446712493896, "step": 9972, "token_acc": 0.8646961846443711 }, { "epoch": 0.5381211892300222, "grad_norm": 0.43129763007164, "learning_rate": 9.251869221148231e-06, "loss": 0.38373005390167236, "step": 9973, "token_acc": 0.8692779046017061 }, { "epoch": 0.5381751470350186, "grad_norm": 0.38833633065223694, "learning_rate": 9.250126567745077e-06, "loss": 0.33508265018463135, "step": 9974, "token_acc": 0.8806678921568627 }, { "epoch": 0.5382291048400151, "grad_norm": 0.4467926323413849, "learning_rate": 9.24838393724286e-06, "loss": 0.3452758193016052, "step": 9975, "token_acc": 0.8773024813518039 }, { "epoch": 0.5382830626450116, "grad_norm": 0.4647490382194519, "learning_rate": 9.2466413296948e-06, "loss": 0.39720243215560913, "step": 9976, "token_acc": 0.8657334666476217 }, { "epoch": 0.5383370204500081, "grad_norm": 0.4279974699020386, "learning_rate": 9.244898745154118e-06, "loss": 0.35002338886260986, "step": 9977, "token_acc": 0.8764287920914426 }, { "epoch": 0.5383909782550046, "grad_norm": 0.455196350812912, "learning_rate": 9.24315618367403e-06, "loss": 0.397105872631073, "step": 9978, "token_acc": 0.8644353479412473 }, { "epoch": 0.5384449360600011, "grad_norm": 0.3382333517074585, "learning_rate": 9.241413645307755e-06, "loss": 0.34196770191192627, "step": 9979, "token_acc": 0.8770402611534276 }, { "epoch": 0.5384988938649976, "grad_norm": 0.43915411829948425, "learning_rate": 9.239671130108506e-06, "loss": 0.37154221534729004, "step": 9980, "token_acc": 0.8656323954772548 }, { "epoch": 0.5385528516699941, "grad_norm": 0.3496268391609192, "learning_rate": 9.237928638129501e-06, "loss": 0.3429256081581116, "step": 9981, "token_acc": 0.877872286403453 }, { "epoch": 0.5386068094749905, "grad_norm": 0.39964449405670166, "learning_rate": 9.236186169423959e-06, "loss": 0.36794108152389526, "step": 9982, "token_acc": 0.8688969258589512 }, { "epoch": 0.538660767279987, "grad_norm": 0.39396360516548157, "learning_rate": 9.23444372404509e-06, "loss": 0.2858406901359558, "step": 9983, "token_acc": 0.8950153636053261 }, { "epoch": 0.5387147250849835, "grad_norm": 0.42242684960365295, "learning_rate": 9.23270130204611e-06, "loss": 0.3594909906387329, "step": 9984, "token_acc": 0.871718882303133 }, { "epoch": 0.53876868288998, "grad_norm": 0.47351816296577454, "learning_rate": 9.23095890348023e-06, "loss": 0.3602146506309509, "step": 9985, "token_acc": 0.8742274929012861 }, { "epoch": 0.5388226406949765, "grad_norm": 0.34177565574645996, "learning_rate": 9.22921652840066e-06, "loss": 0.3456785976886749, "step": 9986, "token_acc": 0.8746783324755533 }, { "epoch": 0.538876598499973, "grad_norm": 0.3856970965862274, "learning_rate": 9.227474176860618e-06, "loss": 0.3738350570201874, "step": 9987, "token_acc": 0.8664950349393159 }, { "epoch": 0.5389305563049696, "grad_norm": 0.36655694246292114, "learning_rate": 9.225731848913315e-06, "loss": 0.36495694518089294, "step": 9988, "token_acc": 0.8703222453222453 }, { "epoch": 0.5389845141099661, "grad_norm": 0.47981882095336914, "learning_rate": 9.223989544611956e-06, "loss": 0.3720915913581848, "step": 9989, "token_acc": 0.8693606421256573 }, { "epoch": 0.5390384719149625, "grad_norm": 0.5239025950431824, "learning_rate": 9.222247264009756e-06, "loss": 0.37917861342430115, "step": 9990, "token_acc": 0.8697977603209093 }, { "epoch": 0.539092429719959, "grad_norm": 0.35043656826019287, "learning_rate": 9.220505007159918e-06, "loss": 0.4042123258113861, "step": 9991, "token_acc": 0.8626925653047556 }, { "epoch": 0.5391463875249555, "grad_norm": 0.3378079831600189, "learning_rate": 9.21876277411565e-06, "loss": 0.3949802815914154, "step": 9992, "token_acc": 0.8602547315795739 }, { "epoch": 0.539200345329952, "grad_norm": 0.4289409816265106, "learning_rate": 9.217020564930167e-06, "loss": 0.33979320526123047, "step": 9993, "token_acc": 0.8795897130964769 }, { "epoch": 0.5392543031349485, "grad_norm": 0.4518034756183624, "learning_rate": 9.21527837965667e-06, "loss": 0.41855430603027344, "step": 9994, "token_acc": 0.857767698039851 }, { "epoch": 0.539308260939945, "grad_norm": 0.37374067306518555, "learning_rate": 9.213536218348366e-06, "loss": 0.3611421585083008, "step": 9995, "token_acc": 0.8713250958670643 }, { "epoch": 0.5393622187449415, "grad_norm": 0.4750383198261261, "learning_rate": 9.21179408105846e-06, "loss": 0.4178584814071655, "step": 9996, "token_acc": 0.8557317390174162 }, { "epoch": 0.5394161765499379, "grad_norm": 0.44914624094963074, "learning_rate": 9.210051967840155e-06, "loss": 0.3201492428779602, "step": 9997, "token_acc": 0.8817169598192673 }, { "epoch": 0.5394701343549344, "grad_norm": 0.40874597430229187, "learning_rate": 9.208309878746656e-06, "loss": 0.41950809955596924, "step": 9998, "token_acc": 0.8546532736294324 }, { "epoch": 0.5395240921599309, "grad_norm": 0.5871412754058838, "learning_rate": 9.206567813831164e-06, "loss": 0.3711473345756531, "step": 9999, "token_acc": 0.87062223733424 }, { "epoch": 0.5395780499649274, "grad_norm": 0.4958711862564087, "learning_rate": 9.204825773146885e-06, "loss": 0.3522576689720154, "step": 10000, "token_acc": 0.8726556129382984 }, { "epoch": 0.5396320077699239, "grad_norm": 0.3756565451622009, "learning_rate": 9.203083756747021e-06, "loss": 0.38129833340644836, "step": 10001, "token_acc": 0.8618131231161604 }, { "epoch": 0.5396859655749204, "grad_norm": 0.3636655807495117, "learning_rate": 9.201341764684768e-06, "loss": 0.34058624505996704, "step": 10002, "token_acc": 0.8779810141236397 }, { "epoch": 0.5397399233799169, "grad_norm": 0.4235275685787201, "learning_rate": 9.19959979701333e-06, "loss": 0.3279275894165039, "step": 10003, "token_acc": 0.8829305135951662 }, { "epoch": 0.5397938811849134, "grad_norm": 0.3473140597343445, "learning_rate": 9.197857853785903e-06, "loss": 0.3249189555644989, "step": 10004, "token_acc": 0.8855354659248957 }, { "epoch": 0.5398478389899098, "grad_norm": 0.3875090181827545, "learning_rate": 9.196115935055685e-06, "loss": 0.2734963595867157, "step": 10005, "token_acc": 0.8965283657917019 }, { "epoch": 0.5399017967949064, "grad_norm": 0.3769420385360718, "learning_rate": 9.194374040875879e-06, "loss": 0.3944852948188782, "step": 10006, "token_acc": 0.8558673469387755 }, { "epoch": 0.5399557545999029, "grad_norm": 0.4399421811103821, "learning_rate": 9.192632171299678e-06, "loss": 0.3561902940273285, "step": 10007, "token_acc": 0.8715930622358257 }, { "epoch": 0.5400097124048994, "grad_norm": 0.3297187387943268, "learning_rate": 9.190890326380278e-06, "loss": 0.4133254289627075, "step": 10008, "token_acc": 0.8564240580388486 }, { "epoch": 0.5400636702098959, "grad_norm": 0.4034998416900635, "learning_rate": 9.189148506170876e-06, "loss": 0.33425965905189514, "step": 10009, "token_acc": 0.8785221391604371 }, { "epoch": 0.5401176280148924, "grad_norm": 0.3104862868785858, "learning_rate": 9.187406710724667e-06, "loss": 0.31756073236465454, "step": 10010, "token_acc": 0.891675338189386 }, { "epoch": 0.5401715858198889, "grad_norm": 0.41529056429862976, "learning_rate": 9.185664940094843e-06, "loss": 0.3722771406173706, "step": 10011, "token_acc": 0.8706787642127024 }, { "epoch": 0.5402255436248853, "grad_norm": 0.3776181936264038, "learning_rate": 9.183923194334596e-06, "loss": 0.34034955501556396, "step": 10012, "token_acc": 0.8821263482280431 }, { "epoch": 0.5402795014298818, "grad_norm": 0.4047573506832123, "learning_rate": 9.182181473497124e-06, "loss": 0.36055123805999756, "step": 10013, "token_acc": 0.87199300189532 }, { "epoch": 0.5403334592348783, "grad_norm": 0.3936099112033844, "learning_rate": 9.180439777635619e-06, "loss": 0.31861943006515503, "step": 10014, "token_acc": 0.8831708486495126 }, { "epoch": 0.5403874170398748, "grad_norm": 0.373281866312027, "learning_rate": 9.178698106803264e-06, "loss": 0.3710355758666992, "step": 10015, "token_acc": 0.8687348521570528 }, { "epoch": 0.5404413748448713, "grad_norm": 0.3014023005962372, "learning_rate": 9.176956461053255e-06, "loss": 0.39159101247787476, "step": 10016, "token_acc": 0.8591140159767611 }, { "epoch": 0.5404953326498678, "grad_norm": 0.3827270567417145, "learning_rate": 9.17521484043878e-06, "loss": 0.3373727798461914, "step": 10017, "token_acc": 0.880100046896983 }, { "epoch": 0.5405492904548643, "grad_norm": 0.40845155715942383, "learning_rate": 9.173473245013022e-06, "loss": 0.36277326941490173, "step": 10018, "token_acc": 0.8766620013995802 }, { "epoch": 0.5406032482598608, "grad_norm": 0.5017718076705933, "learning_rate": 9.171731674829181e-06, "loss": 0.3785319924354553, "step": 10019, "token_acc": 0.8699009568574786 }, { "epoch": 0.5406572060648572, "grad_norm": 0.4319474399089813, "learning_rate": 9.169990129940437e-06, "loss": 0.4070517420768738, "step": 10020, "token_acc": 0.8601964949548593 }, { "epoch": 0.5407111638698537, "grad_norm": 0.36646607518196106, "learning_rate": 9.168248610399976e-06, "loss": 0.3526706397533417, "step": 10021, "token_acc": 0.8789840637450199 }, { "epoch": 0.5407651216748502, "grad_norm": 0.46527376770973206, "learning_rate": 9.166507116260985e-06, "loss": 0.3890683650970459, "step": 10022, "token_acc": 0.8663101604278075 }, { "epoch": 0.5408190794798468, "grad_norm": 0.38447532057762146, "learning_rate": 9.16476564757665e-06, "loss": 0.38095512986183167, "step": 10023, "token_acc": 0.8641641911963273 }, { "epoch": 0.5408730372848433, "grad_norm": 0.3526350259780884, "learning_rate": 9.163024204400154e-06, "loss": 0.3430059850215912, "step": 10024, "token_acc": 0.8801050175029171 }, { "epoch": 0.5409269950898398, "grad_norm": 0.3277839422225952, "learning_rate": 9.161282786784673e-06, "loss": 0.3557422459125519, "step": 10025, "token_acc": 0.8750259929299231 }, { "epoch": 0.5409809528948363, "grad_norm": 0.3911854326725006, "learning_rate": 9.1595413947834e-06, "loss": 0.3160397708415985, "step": 10026, "token_acc": 0.8888121546961326 }, { "epoch": 0.5410349106998328, "grad_norm": 0.3956829607486725, "learning_rate": 9.157800028449513e-06, "loss": 0.3841286897659302, "step": 10027, "token_acc": 0.8632855567805954 }, { "epoch": 0.5410888685048292, "grad_norm": 0.40124988555908203, "learning_rate": 9.156058687836194e-06, "loss": 0.29251158237457275, "step": 10028, "token_acc": 0.8946075564393496 }, { "epoch": 0.5411428263098257, "grad_norm": 0.4030572772026062, "learning_rate": 9.15431737299662e-06, "loss": 0.36897191405296326, "step": 10029, "token_acc": 0.8679632761536603 }, { "epoch": 0.5411967841148222, "grad_norm": 0.4679744243621826, "learning_rate": 9.152576083983971e-06, "loss": 0.3404722511768341, "step": 10030, "token_acc": 0.8778409090909091 }, { "epoch": 0.5412507419198187, "grad_norm": 0.46422603726387024, "learning_rate": 9.150834820851427e-06, "loss": 0.38687336444854736, "step": 10031, "token_acc": 0.8602834302325582 }, { "epoch": 0.5413046997248152, "grad_norm": 0.39283257722854614, "learning_rate": 9.149093583652165e-06, "loss": 0.36799901723861694, "step": 10032, "token_acc": 0.8714172604908947 }, { "epoch": 0.5413586575298117, "grad_norm": 0.44342052936553955, "learning_rate": 9.147352372439362e-06, "loss": 0.38200828433036804, "step": 10033, "token_acc": 0.8662737987307344 }, { "epoch": 0.5414126153348082, "grad_norm": 0.3676283061504364, "learning_rate": 9.145611187266196e-06, "loss": 0.33723926544189453, "step": 10034, "token_acc": 0.8833269156719291 }, { "epoch": 0.5414665731398046, "grad_norm": 0.3897014260292053, "learning_rate": 9.143870028185838e-06, "loss": 0.3815804123878479, "step": 10035, "token_acc": 0.8698399326032014 }, { "epoch": 0.5415205309448011, "grad_norm": 0.42161864042282104, "learning_rate": 9.142128895251467e-06, "loss": 0.28913864493370056, "step": 10036, "token_acc": 0.8945724907063197 }, { "epoch": 0.5415744887497976, "grad_norm": 0.3611016869544983, "learning_rate": 9.140387788516255e-06, "loss": 0.3772953450679779, "step": 10037, "token_acc": 0.8709140435835351 }, { "epoch": 0.5416284465547941, "grad_norm": 0.4622092843055725, "learning_rate": 9.138646708033372e-06, "loss": 0.3899969458580017, "step": 10038, "token_acc": 0.8672416708946389 }, { "epoch": 0.5416824043597906, "grad_norm": 0.39308875799179077, "learning_rate": 9.136905653855993e-06, "loss": 0.3674914836883545, "step": 10039, "token_acc": 0.8692464358452139 }, { "epoch": 0.5417363621647872, "grad_norm": 0.41310638189315796, "learning_rate": 9.135164626037293e-06, "loss": 0.38598984479904175, "step": 10040, "token_acc": 0.864841745081266 }, { "epoch": 0.5417903199697837, "grad_norm": 0.4081738591194153, "learning_rate": 9.133423624630436e-06, "loss": 0.38855305314064026, "step": 10041, "token_acc": 0.8612210288298474 }, { "epoch": 0.5418442777747802, "grad_norm": 0.3527773916721344, "learning_rate": 9.131682649688596e-06, "loss": 0.3703750967979431, "step": 10042, "token_acc": 0.8740088105726872 }, { "epoch": 0.5418982355797766, "grad_norm": 0.3382166028022766, "learning_rate": 9.12994170126494e-06, "loss": 0.38553354144096375, "step": 10043, "token_acc": 0.8691361033264287 }, { "epoch": 0.5419521933847731, "grad_norm": 0.3639231026172638, "learning_rate": 9.128200779412632e-06, "loss": 0.3904762268066406, "step": 10044, "token_acc": 0.8616976052146804 }, { "epoch": 0.5420061511897696, "grad_norm": 0.24244272708892822, "learning_rate": 9.126459884184847e-06, "loss": 0.3297736346721649, "step": 10045, "token_acc": 0.8787660668380463 }, { "epoch": 0.5420601089947661, "grad_norm": 0.4367528557777405, "learning_rate": 9.12471901563475e-06, "loss": 0.3801967203617096, "step": 10046, "token_acc": 0.8727272727272727 }, { "epoch": 0.5421140667997626, "grad_norm": 0.4742015302181244, "learning_rate": 9.122978173815505e-06, "loss": 0.3567010462284088, "step": 10047, "token_acc": 0.8734577665295793 }, { "epoch": 0.5421680246047591, "grad_norm": 0.4692799150943756, "learning_rate": 9.121237358780278e-06, "loss": 0.359050452709198, "step": 10048, "token_acc": 0.8745194947830862 }, { "epoch": 0.5422219824097556, "grad_norm": 0.47533562779426575, "learning_rate": 9.11949657058223e-06, "loss": 0.38795334100723267, "step": 10049, "token_acc": 0.8633776091081594 }, { "epoch": 0.5422759402147521, "grad_norm": 0.38674241304397583, "learning_rate": 9.117755809274522e-06, "loss": 0.3712373971939087, "step": 10050, "token_acc": 0.8709993011879804 }, { "epoch": 0.5423298980197485, "grad_norm": 0.4263800382614136, "learning_rate": 9.116015074910325e-06, "loss": 0.3528019189834595, "step": 10051, "token_acc": 0.8756137479541735 }, { "epoch": 0.542383855824745, "grad_norm": 0.3012048304080963, "learning_rate": 9.114274367542794e-06, "loss": 0.31444206833839417, "step": 10052, "token_acc": 0.887567846988886 }, { "epoch": 0.5424378136297415, "grad_norm": 0.3865755796432495, "learning_rate": 9.112533687225094e-06, "loss": 0.37253308296203613, "step": 10053, "token_acc": 0.8656890872383382 }, { "epoch": 0.542491771434738, "grad_norm": 0.378580778837204, "learning_rate": 9.110793034010384e-06, "loss": 0.3520054519176483, "step": 10054, "token_acc": 0.8766300617707619 }, { "epoch": 0.5425457292397345, "grad_norm": 0.4742916524410248, "learning_rate": 9.109052407951819e-06, "loss": 0.32867318391799927, "step": 10055, "token_acc": 0.8806603059215508 }, { "epoch": 0.542599687044731, "grad_norm": 0.3137461841106415, "learning_rate": 9.107311809102561e-06, "loss": 0.3257461488246918, "step": 10056, "token_acc": 0.8777276147479308 }, { "epoch": 0.5426536448497276, "grad_norm": 0.30694812536239624, "learning_rate": 9.105571237515764e-06, "loss": 0.3619624376296997, "step": 10057, "token_acc": 0.8741620379237693 }, { "epoch": 0.542707602654724, "grad_norm": 0.2934371531009674, "learning_rate": 9.10383069324459e-06, "loss": 0.3095523715019226, "step": 10058, "token_acc": 0.8890565002742732 }, { "epoch": 0.5427615604597205, "grad_norm": 0.36573171615600586, "learning_rate": 9.102090176342192e-06, "loss": 0.4199560880661011, "step": 10059, "token_acc": 0.8588597391176902 }, { "epoch": 0.542815518264717, "grad_norm": 0.4332863986492157, "learning_rate": 9.100349686861723e-06, "loss": 0.345497727394104, "step": 10060, "token_acc": 0.871081119077745 }, { "epoch": 0.5428694760697135, "grad_norm": 0.2859290540218353, "learning_rate": 9.09860922485634e-06, "loss": 0.3849719762802124, "step": 10061, "token_acc": 0.8657091561938959 }, { "epoch": 0.54292343387471, "grad_norm": 0.38873088359832764, "learning_rate": 9.096868790379196e-06, "loss": 0.3202001750469208, "step": 10062, "token_acc": 0.8841145833333334 }, { "epoch": 0.5429773916797065, "grad_norm": 0.39354124665260315, "learning_rate": 9.09512838348344e-06, "loss": 0.32964587211608887, "step": 10063, "token_acc": 0.8772753963593658 }, { "epoch": 0.543031349484703, "grad_norm": 0.44770053029060364, "learning_rate": 9.093388004222228e-06, "loss": 0.36830246448516846, "step": 10064, "token_acc": 0.8647127784290739 }, { "epoch": 0.5430853072896995, "grad_norm": 0.3383074402809143, "learning_rate": 9.09164765264871e-06, "loss": 0.3800791800022125, "step": 10065, "token_acc": 0.8671460568195214 }, { "epoch": 0.5431392650946959, "grad_norm": 0.36307597160339355, "learning_rate": 9.089907328816032e-06, "loss": 0.323294997215271, "step": 10066, "token_acc": 0.8925063904210951 }, { "epoch": 0.5431932228996924, "grad_norm": 0.2743413746356964, "learning_rate": 9.088167032777348e-06, "loss": 0.31016993522644043, "step": 10067, "token_acc": 0.8907409289561947 }, { "epoch": 0.5432471807046889, "grad_norm": 0.44730302691459656, "learning_rate": 9.086426764585802e-06, "loss": 0.39007657766342163, "step": 10068, "token_acc": 0.8722769322590271 }, { "epoch": 0.5433011385096854, "grad_norm": 0.4187370836734772, "learning_rate": 9.084686524294546e-06, "loss": 0.335837185382843, "step": 10069, "token_acc": 0.8814051386267964 }, { "epoch": 0.5433550963146819, "grad_norm": 0.36003339290618896, "learning_rate": 9.082946311956717e-06, "loss": 0.353368878364563, "step": 10070, "token_acc": 0.8757291402485418 }, { "epoch": 0.5434090541196784, "grad_norm": 0.3228822946548462, "learning_rate": 9.081206127625473e-06, "loss": 0.3035042881965637, "step": 10071, "token_acc": 0.8870618228170809 }, { "epoch": 0.5434630119246749, "grad_norm": 0.3134741187095642, "learning_rate": 9.079465971353956e-06, "loss": 0.3631100058555603, "step": 10072, "token_acc": 0.8722477334275286 }, { "epoch": 0.5435169697296715, "grad_norm": 0.3996589779853821, "learning_rate": 9.077725843195305e-06, "loss": 0.3086424469947815, "step": 10073, "token_acc": 0.8890739506995337 }, { "epoch": 0.5435709275346678, "grad_norm": 0.39182019233703613, "learning_rate": 9.075985743202662e-06, "loss": 0.39165207743644714, "step": 10074, "token_acc": 0.8663347853142501 }, { "epoch": 0.5436248853396644, "grad_norm": 0.39503568410873413, "learning_rate": 9.074245671429177e-06, "loss": 0.34283646941185, "step": 10075, "token_acc": 0.8780519312750291 }, { "epoch": 0.5436788431446609, "grad_norm": 0.3058825433254242, "learning_rate": 9.072505627927979e-06, "loss": 0.3456149697303772, "step": 10076, "token_acc": 0.8825631252977608 }, { "epoch": 0.5437328009496574, "grad_norm": 0.4247828722000122, "learning_rate": 9.07076561275222e-06, "loss": 0.36232274770736694, "step": 10077, "token_acc": 0.8720344009489917 }, { "epoch": 0.5437867587546539, "grad_norm": 0.3648141622543335, "learning_rate": 9.069025625955038e-06, "loss": 0.3961066007614136, "step": 10078, "token_acc": 0.8615687238750922 }, { "epoch": 0.5438407165596504, "grad_norm": 0.271826833486557, "learning_rate": 9.067285667589569e-06, "loss": 0.314157634973526, "step": 10079, "token_acc": 0.8900468384074941 }, { "epoch": 0.5438946743646469, "grad_norm": 0.430093914270401, "learning_rate": 9.06554573770895e-06, "loss": 0.33855143189430237, "step": 10080, "token_acc": 0.8780183829256893 }, { "epoch": 0.5439486321696433, "grad_norm": 0.32198041677474976, "learning_rate": 9.06380583636632e-06, "loss": 0.28673550486564636, "step": 10081, "token_acc": 0.8959251837007348 }, { "epoch": 0.5440025899746398, "grad_norm": 0.39414289593696594, "learning_rate": 9.062065963614815e-06, "loss": 0.4020686149597168, "step": 10082, "token_acc": 0.8581413820492454 }, { "epoch": 0.5440565477796363, "grad_norm": 0.5061278343200684, "learning_rate": 9.060326119507568e-06, "loss": 0.4459771513938904, "step": 10083, "token_acc": 0.8464610300192784 }, { "epoch": 0.5441105055846328, "grad_norm": 0.37556084990501404, "learning_rate": 9.058586304097717e-06, "loss": 0.374025821685791, "step": 10084, "token_acc": 0.8631425543190249 }, { "epoch": 0.5441644633896293, "grad_norm": 0.311676025390625, "learning_rate": 9.056846517438394e-06, "loss": 0.32628294825553894, "step": 10085, "token_acc": 0.8854110256687967 }, { "epoch": 0.5442184211946258, "grad_norm": 0.4174920916557312, "learning_rate": 9.05510675958273e-06, "loss": 0.37092188000679016, "step": 10086, "token_acc": 0.866997953814674 }, { "epoch": 0.5442723789996223, "grad_norm": 0.3922709822654724, "learning_rate": 9.053367030583858e-06, "loss": 0.4001116156578064, "step": 10087, "token_acc": 0.861244019138756 }, { "epoch": 0.5443263368046188, "grad_norm": 0.4094446897506714, "learning_rate": 9.051627330494911e-06, "loss": 0.43556684255599976, "step": 10088, "token_acc": 0.8566360052562418 }, { "epoch": 0.5443802946096152, "grad_norm": 0.33473753929138184, "learning_rate": 9.049887659369014e-06, "loss": 0.4000406265258789, "step": 10089, "token_acc": 0.8634120734908136 }, { "epoch": 0.5444342524146117, "grad_norm": 0.4122662842273712, "learning_rate": 9.0481480172593e-06, "loss": 0.38496506214141846, "step": 10090, "token_acc": 0.8704745166959578 }, { "epoch": 0.5444882102196082, "grad_norm": 0.4353574812412262, "learning_rate": 9.046408404218895e-06, "loss": 0.36060288548469543, "step": 10091, "token_acc": 0.8746817538896747 }, { "epoch": 0.5445421680246048, "grad_norm": 0.47843244671821594, "learning_rate": 9.04466882030093e-06, "loss": 0.40903472900390625, "step": 10092, "token_acc": 0.8601861427094105 }, { "epoch": 0.5445961258296013, "grad_norm": 0.4670596420764923, "learning_rate": 9.04292926555853e-06, "loss": 0.36346179246902466, "step": 10093, "token_acc": 0.8719271623672231 }, { "epoch": 0.5446500836345978, "grad_norm": 0.3298347294330597, "learning_rate": 9.041189740044816e-06, "loss": 0.28790605068206787, "step": 10094, "token_acc": 0.8959798994974875 }, { "epoch": 0.5447040414395943, "grad_norm": 0.45556527376174927, "learning_rate": 9.039450243812918e-06, "loss": 0.4000735878944397, "step": 10095, "token_acc": 0.863598878443015 }, { "epoch": 0.5447579992445908, "grad_norm": 0.34318119287490845, "learning_rate": 9.037710776915955e-06, "loss": 0.34320294857025146, "step": 10096, "token_acc": 0.8779200211165369 }, { "epoch": 0.5448119570495872, "grad_norm": 0.4029640257358551, "learning_rate": 9.035971339407058e-06, "loss": 0.3563495874404907, "step": 10097, "token_acc": 0.870722433460076 }, { "epoch": 0.5448659148545837, "grad_norm": 0.4274490177631378, "learning_rate": 9.03423193133934e-06, "loss": 0.34144294261932373, "step": 10098, "token_acc": 0.8787109064930941 }, { "epoch": 0.5449198726595802, "grad_norm": 0.3707653284072876, "learning_rate": 9.032492552765927e-06, "loss": 0.3622797727584839, "step": 10099, "token_acc": 0.8720474526003601 }, { "epoch": 0.5449738304645767, "grad_norm": 0.44664567708969116, "learning_rate": 9.030753203739938e-06, "loss": 0.3699972629547119, "step": 10100, "token_acc": 0.8693246852346432 }, { "epoch": 0.5450277882695732, "grad_norm": 0.395742267370224, "learning_rate": 9.029013884314493e-06, "loss": 0.3780149221420288, "step": 10101, "token_acc": 0.8687150837988827 }, { "epoch": 0.5450817460745697, "grad_norm": 0.3590392470359802, "learning_rate": 9.027274594542705e-06, "loss": 0.33580899238586426, "step": 10102, "token_acc": 0.8789740849585894 }, { "epoch": 0.5451357038795662, "grad_norm": 0.4299260675907135, "learning_rate": 9.0255353344777e-06, "loss": 0.33089563250541687, "step": 10103, "token_acc": 0.8812851697222727 }, { "epoch": 0.5451896616845626, "grad_norm": 0.4206581115722656, "learning_rate": 9.023796104172592e-06, "loss": 0.31521517038345337, "step": 10104, "token_acc": 0.8893934479403178 }, { "epoch": 0.5452436194895591, "grad_norm": 0.5019368529319763, "learning_rate": 9.022056903680492e-06, "loss": 0.4012848138809204, "step": 10105, "token_acc": 0.857521847690387 }, { "epoch": 0.5452975772945556, "grad_norm": 0.41702982783317566, "learning_rate": 9.020317733054522e-06, "loss": 0.3395400643348694, "step": 10106, "token_acc": 0.873075478783327 }, { "epoch": 0.5453515350995521, "grad_norm": 0.3751215636730194, "learning_rate": 9.018578592347789e-06, "loss": 0.29613709449768066, "step": 10107, "token_acc": 0.889120177629753 }, { "epoch": 0.5454054929045486, "grad_norm": 0.42041754722595215, "learning_rate": 9.016839481613406e-06, "loss": 0.4053117632865906, "step": 10108, "token_acc": 0.8629203916560239 }, { "epoch": 0.5454594507095452, "grad_norm": 0.3966118097305298, "learning_rate": 9.015100400904491e-06, "loss": 0.3719452917575836, "step": 10109, "token_acc": 0.8715107308177865 }, { "epoch": 0.5455134085145417, "grad_norm": 0.4606238901615143, "learning_rate": 9.013361350274152e-06, "loss": 0.30166465044021606, "step": 10110, "token_acc": 0.890761300555115 }, { "epoch": 0.5455673663195382, "grad_norm": 0.37673071026802063, "learning_rate": 9.011622329775499e-06, "loss": 0.3739045262336731, "step": 10111, "token_acc": 0.870908039329092 }, { "epoch": 0.5456213241245346, "grad_norm": 0.3796520531177521, "learning_rate": 9.00988333946164e-06, "loss": 0.3683156967163086, "step": 10112, "token_acc": 0.872285906945381 }, { "epoch": 0.5456752819295311, "grad_norm": 0.45575669407844543, "learning_rate": 9.008144379385686e-06, "loss": 0.32429414987564087, "step": 10113, "token_acc": 0.8829908675799086 }, { "epoch": 0.5457292397345276, "grad_norm": 0.37784627079963684, "learning_rate": 9.006405449600742e-06, "loss": 0.28749513626098633, "step": 10114, "token_acc": 0.8968695143358689 }, { "epoch": 0.5457831975395241, "grad_norm": 0.45699799060821533, "learning_rate": 9.004666550159913e-06, "loss": 0.3624993562698364, "step": 10115, "token_acc": 0.873015873015873 }, { "epoch": 0.5458371553445206, "grad_norm": 0.3482757806777954, "learning_rate": 9.002927681116308e-06, "loss": 0.33677899837493896, "step": 10116, "token_acc": 0.8771581170040988 }, { "epoch": 0.5458911131495171, "grad_norm": 0.44081610441207886, "learning_rate": 9.001188842523034e-06, "loss": 0.3561699092388153, "step": 10117, "token_acc": 0.8716140407116525 }, { "epoch": 0.5459450709545136, "grad_norm": 0.35545751452445984, "learning_rate": 8.999450034433188e-06, "loss": 0.38504505157470703, "step": 10118, "token_acc": 0.8728752693320565 }, { "epoch": 0.54599902875951, "grad_norm": 0.49328091740608215, "learning_rate": 8.997711256899877e-06, "loss": 0.41061925888061523, "step": 10119, "token_acc": 0.8566010474986455 }, { "epoch": 0.5460529865645065, "grad_norm": 0.47175362706184387, "learning_rate": 8.995972509976201e-06, "loss": 0.36203619837760925, "step": 10120, "token_acc": 0.873627844712182 }, { "epoch": 0.546106944369503, "grad_norm": 0.3250161111354828, "learning_rate": 8.994233793715259e-06, "loss": 0.3721052408218384, "step": 10121, "token_acc": 0.8754457463066735 }, { "epoch": 0.5461609021744995, "grad_norm": 0.2935097813606262, "learning_rate": 8.992495108170157e-06, "loss": 0.3839905261993408, "step": 10122, "token_acc": 0.8655670547649849 }, { "epoch": 0.546214859979496, "grad_norm": 0.33040520548820496, "learning_rate": 8.990756453393989e-06, "loss": 0.35100746154785156, "step": 10123, "token_acc": 0.8772519494487766 }, { "epoch": 0.5462688177844925, "grad_norm": 0.4272598624229431, "learning_rate": 8.989017829439856e-06, "loss": 0.4057920575141907, "step": 10124, "token_acc": 0.8619556285949055 }, { "epoch": 0.546322775589489, "grad_norm": 0.3650575578212738, "learning_rate": 8.987279236360851e-06, "loss": 0.2752408981323242, "step": 10125, "token_acc": 0.8983811508254528 }, { "epoch": 0.5463767333944856, "grad_norm": 0.4233793616294861, "learning_rate": 8.985540674210076e-06, "loss": 0.44892221689224243, "step": 10126, "token_acc": 0.8534086895090074 }, { "epoch": 0.546430691199482, "grad_norm": 0.4222933351993561, "learning_rate": 8.983802143040621e-06, "loss": 0.36537814140319824, "step": 10127, "token_acc": 0.8720503408495018 }, { "epoch": 0.5464846490044785, "grad_norm": 0.4368836581707001, "learning_rate": 8.98206364290558e-06, "loss": 0.38373351097106934, "step": 10128, "token_acc": 0.8709794657459567 }, { "epoch": 0.546538606809475, "grad_norm": 0.40350058674812317, "learning_rate": 8.98032517385805e-06, "loss": 0.3439238667488098, "step": 10129, "token_acc": 0.8788511108383454 }, { "epoch": 0.5465925646144715, "grad_norm": 0.47711285948753357, "learning_rate": 8.978586735951126e-06, "loss": 0.40245145559310913, "step": 10130, "token_acc": 0.8612724757952974 }, { "epoch": 0.546646522419468, "grad_norm": 0.4452807903289795, "learning_rate": 8.976848329237895e-06, "loss": 0.3934354782104492, "step": 10131, "token_acc": 0.8650347337082368 }, { "epoch": 0.5467004802244645, "grad_norm": 0.38638725876808167, "learning_rate": 8.975109953771446e-06, "loss": 0.4014509320259094, "step": 10132, "token_acc": 0.8613062165845476 }, { "epoch": 0.546754438029461, "grad_norm": 0.42155301570892334, "learning_rate": 8.97337160960487e-06, "loss": 0.4005468189716339, "step": 10133, "token_acc": 0.8542432475627297 }, { "epoch": 0.5468083958344575, "grad_norm": 0.43992364406585693, "learning_rate": 8.971633296791254e-06, "loss": 0.3391854763031006, "step": 10134, "token_acc": 0.8800469828219057 }, { "epoch": 0.5468623536394539, "grad_norm": 0.4968635141849518, "learning_rate": 8.96989501538369e-06, "loss": 0.3304067850112915, "step": 10135, "token_acc": 0.8773922514392407 }, { "epoch": 0.5469163114444504, "grad_norm": 0.3767463266849518, "learning_rate": 8.968156765435263e-06, "loss": 0.38543105125427246, "step": 10136, "token_acc": 0.8691904047976012 }, { "epoch": 0.5469702692494469, "grad_norm": 0.331712931394577, "learning_rate": 8.966418546999058e-06, "loss": 0.3957695960998535, "step": 10137, "token_acc": 0.8603676669370101 }, { "epoch": 0.5470242270544434, "grad_norm": 0.4255019426345825, "learning_rate": 8.964680360128161e-06, "loss": 0.3788788318634033, "step": 10138, "token_acc": 0.8622950819672132 }, { "epoch": 0.5470781848594399, "grad_norm": 0.3780376613140106, "learning_rate": 8.962942204875654e-06, "loss": 0.3819539248943329, "step": 10139, "token_acc": 0.867553865652725 }, { "epoch": 0.5471321426644364, "grad_norm": 0.3798049986362457, "learning_rate": 8.961204081294618e-06, "loss": 0.41011765599250793, "step": 10140, "token_acc": 0.8611667334134294 }, { "epoch": 0.547186100469433, "grad_norm": 0.4522702991962433, "learning_rate": 8.959465989438138e-06, "loss": 0.4532950520515442, "step": 10141, "token_acc": 0.8525057721037621 }, { "epoch": 0.5472400582744293, "grad_norm": 0.45517367124557495, "learning_rate": 8.957727929359296e-06, "loss": 0.3849908411502838, "step": 10142, "token_acc": 0.8615251940372058 }, { "epoch": 0.5472940160794258, "grad_norm": 0.45080801844596863, "learning_rate": 8.955989901111168e-06, "loss": 0.3650966286659241, "step": 10143, "token_acc": 0.874869500372856 }, { "epoch": 0.5473479738844224, "grad_norm": 0.39812976121902466, "learning_rate": 8.954251904746837e-06, "loss": 0.3801823556423187, "step": 10144, "token_acc": 0.8680465717981889 }, { "epoch": 0.5474019316894189, "grad_norm": 0.535214900970459, "learning_rate": 8.952513940319378e-06, "loss": 0.3946688175201416, "step": 10145, "token_acc": 0.8595854922279793 }, { "epoch": 0.5474558894944154, "grad_norm": 0.3999768793582916, "learning_rate": 8.950776007881869e-06, "loss": 0.3477822244167328, "step": 10146, "token_acc": 0.8765200088436879 }, { "epoch": 0.5475098472994119, "grad_norm": 0.30716073513031006, "learning_rate": 8.949038107487384e-06, "loss": 0.35954782366752625, "step": 10147, "token_acc": 0.8732929458876681 }, { "epoch": 0.5475638051044084, "grad_norm": 0.41332128643989563, "learning_rate": 8.947300239189e-06, "loss": 0.3483005166053772, "step": 10148, "token_acc": 0.8762289325842697 }, { "epoch": 0.5476177629094049, "grad_norm": 0.36833199858665466, "learning_rate": 8.945562403039793e-06, "loss": 0.35824069380760193, "step": 10149, "token_acc": 0.876405686399321 }, { "epoch": 0.5476717207144013, "grad_norm": 0.4045180380344391, "learning_rate": 8.943824599092834e-06, "loss": 0.3248770236968994, "step": 10150, "token_acc": 0.8851729582316635 }, { "epoch": 0.5477256785193978, "grad_norm": 0.4113391637802124, "learning_rate": 8.942086827401196e-06, "loss": 0.3440142869949341, "step": 10151, "token_acc": 0.8776371308016878 }, { "epoch": 0.5477796363243943, "grad_norm": 0.4426211714744568, "learning_rate": 8.940349088017947e-06, "loss": 0.3607870936393738, "step": 10152, "token_acc": 0.8746019108280255 }, { "epoch": 0.5478335941293908, "grad_norm": 0.4875621199607849, "learning_rate": 8.938611380996154e-06, "loss": 0.3700803220272064, "step": 10153, "token_acc": 0.8732498157700811 }, { "epoch": 0.5478875519343873, "grad_norm": 0.4108086824417114, "learning_rate": 8.936873706388902e-06, "loss": 0.3449394702911377, "step": 10154, "token_acc": 0.875834238708676 }, { "epoch": 0.5479415097393838, "grad_norm": 0.2805241644382477, "learning_rate": 8.935136064249243e-06, "loss": 0.34832942485809326, "step": 10155, "token_acc": 0.8787206134597921 }, { "epoch": 0.5479954675443803, "grad_norm": 0.4451012909412384, "learning_rate": 8.933398454630251e-06, "loss": 0.37835508584976196, "step": 10156, "token_acc": 0.8697976326842306 }, { "epoch": 0.5480494253493768, "grad_norm": 0.42643794417381287, "learning_rate": 8.93166087758499e-06, "loss": 0.3177175521850586, "step": 10157, "token_acc": 0.8867274918230332 }, { "epoch": 0.5481033831543732, "grad_norm": 0.33589860796928406, "learning_rate": 8.929923333166525e-06, "loss": 0.29776543378829956, "step": 10158, "token_acc": 0.8907656098573999 }, { "epoch": 0.5481573409593697, "grad_norm": 0.38405051827430725, "learning_rate": 8.928185821427923e-06, "loss": 0.4014148712158203, "step": 10159, "token_acc": 0.8630710826024349 }, { "epoch": 0.5482112987643663, "grad_norm": 0.49961674213409424, "learning_rate": 8.926448342422242e-06, "loss": 0.3529320955276489, "step": 10160, "token_acc": 0.8792528352234823 }, { "epoch": 0.5482652565693628, "grad_norm": 0.342070072889328, "learning_rate": 8.92471089620255e-06, "loss": 0.3964269459247589, "step": 10161, "token_acc": 0.8673823808020208 }, { "epoch": 0.5483192143743593, "grad_norm": 0.5404859185218811, "learning_rate": 8.922973482821906e-06, "loss": 0.3864985704421997, "step": 10162, "token_acc": 0.8631981080589413 }, { "epoch": 0.5483731721793558, "grad_norm": 0.40310877561569214, "learning_rate": 8.921236102333368e-06, "loss": 0.3908422589302063, "step": 10163, "token_acc": 0.8664543004165646 }, { "epoch": 0.5484271299843523, "grad_norm": 0.37655773758888245, "learning_rate": 8.919498754789998e-06, "loss": 0.40219902992248535, "step": 10164, "token_acc": 0.8601893032272779 }, { "epoch": 0.5484810877893487, "grad_norm": 0.41337403655052185, "learning_rate": 8.917761440244855e-06, "loss": 0.4129682183265686, "step": 10165, "token_acc": 0.8592797783933518 }, { "epoch": 0.5485350455943452, "grad_norm": 0.4562237858772278, "learning_rate": 8.916024158750987e-06, "loss": 0.4013108015060425, "step": 10166, "token_acc": 0.865559783235328 }, { "epoch": 0.5485890033993417, "grad_norm": 0.41695937514305115, "learning_rate": 8.914286910361464e-06, "loss": 0.32396751642227173, "step": 10167, "token_acc": 0.8870735851522676 }, { "epoch": 0.5486429612043382, "grad_norm": 0.4294660985469818, "learning_rate": 8.912549695129332e-06, "loss": 0.3856474757194519, "step": 10168, "token_acc": 0.8643082754264055 }, { "epoch": 0.5486969190093347, "grad_norm": 0.34086647629737854, "learning_rate": 8.910812513107649e-06, "loss": 0.3941049575805664, "step": 10169, "token_acc": 0.8623666038920276 }, { "epoch": 0.5487508768143312, "grad_norm": 0.3726945221424103, "learning_rate": 8.909075364349465e-06, "loss": 0.35990285873413086, "step": 10170, "token_acc": 0.8710883929684577 }, { "epoch": 0.5488048346193277, "grad_norm": 0.49620693922042847, "learning_rate": 8.907338248907834e-06, "loss": 0.4257451891899109, "step": 10171, "token_acc": 0.8639195016770483 }, { "epoch": 0.5488587924243242, "grad_norm": 0.3239763379096985, "learning_rate": 8.905601166835808e-06, "loss": 0.36392414569854736, "step": 10172, "token_acc": 0.8696567959360674 }, { "epoch": 0.5489127502293206, "grad_norm": 0.3788743317127228, "learning_rate": 8.903864118186432e-06, "loss": 0.38645586371421814, "step": 10173, "token_acc": 0.8716693855356172 }, { "epoch": 0.5489667080343171, "grad_norm": 0.46470263600349426, "learning_rate": 8.90212710301276e-06, "loss": 0.35853636264801025, "step": 10174, "token_acc": 0.8744531353572849 }, { "epoch": 0.5490206658393136, "grad_norm": 0.44712355732917786, "learning_rate": 8.90039012136784e-06, "loss": 0.3291076123714447, "step": 10175, "token_acc": 0.8834414043966472 }, { "epoch": 0.5490746236443101, "grad_norm": 0.44539859890937805, "learning_rate": 8.89865317330472e-06, "loss": 0.3652758002281189, "step": 10176, "token_acc": 0.8713991769547325 }, { "epoch": 0.5491285814493067, "grad_norm": 0.4187907874584198, "learning_rate": 8.89691625887644e-06, "loss": 0.37626269459724426, "step": 10177, "token_acc": 0.8712241653418124 }, { "epoch": 0.5491825392543032, "grad_norm": 0.40130963921546936, "learning_rate": 8.89517937813605e-06, "loss": 0.3874049484729767, "step": 10178, "token_acc": 0.8646952155112289 }, { "epoch": 0.5492364970592997, "grad_norm": 0.514655590057373, "learning_rate": 8.893442531136589e-06, "loss": 0.3715735673904419, "step": 10179, "token_acc": 0.8739495798319328 }, { "epoch": 0.5492904548642962, "grad_norm": 0.43368232250213623, "learning_rate": 8.891705717931108e-06, "loss": 0.3102462887763977, "step": 10180, "token_acc": 0.8951419774204584 }, { "epoch": 0.5493444126692926, "grad_norm": 0.42287129163742065, "learning_rate": 8.88996893857264e-06, "loss": 0.328207790851593, "step": 10181, "token_acc": 0.8825757575757576 }, { "epoch": 0.5493983704742891, "grad_norm": 0.43652939796447754, "learning_rate": 8.888232193114233e-06, "loss": 0.3840627670288086, "step": 10182, "token_acc": 0.8672985781990521 }, { "epoch": 0.5494523282792856, "grad_norm": 0.35760000348091125, "learning_rate": 8.886495481608924e-06, "loss": 0.2887897491455078, "step": 10183, "token_acc": 0.8907012586694066 }, { "epoch": 0.5495062860842821, "grad_norm": 0.4496324062347412, "learning_rate": 8.88475880410975e-06, "loss": 0.3513824939727783, "step": 10184, "token_acc": 0.874054054054054 }, { "epoch": 0.5495602438892786, "grad_norm": 0.4847041070461273, "learning_rate": 8.88302216066975e-06, "loss": 0.338769793510437, "step": 10185, "token_acc": 0.8802442078897934 }, { "epoch": 0.5496142016942751, "grad_norm": 0.4920636713504791, "learning_rate": 8.881285551341955e-06, "loss": 0.3557059168815613, "step": 10186, "token_acc": 0.8723919523099851 }, { "epoch": 0.5496681594992716, "grad_norm": 0.3974246680736542, "learning_rate": 8.879548976179412e-06, "loss": 0.3725219666957855, "step": 10187, "token_acc": 0.8716600963644328 }, { "epoch": 0.549722117304268, "grad_norm": 0.29149195551872253, "learning_rate": 8.877812435235148e-06, "loss": 0.3811168372631073, "step": 10188, "token_acc": 0.8684012471777228 }, { "epoch": 0.5497760751092645, "grad_norm": 0.4732033610343933, "learning_rate": 8.876075928562204e-06, "loss": 0.3752018213272095, "step": 10189, "token_acc": 0.8747241554914276 }, { "epoch": 0.549830032914261, "grad_norm": 0.3367753028869629, "learning_rate": 8.874339456213601e-06, "loss": 0.38076645135879517, "step": 10190, "token_acc": 0.874151018777467 }, { "epoch": 0.5498839907192575, "grad_norm": 0.36858469247817993, "learning_rate": 8.872603018242378e-06, "loss": 0.2976694405078888, "step": 10191, "token_acc": 0.891566265060241 }, { "epoch": 0.549937948524254, "grad_norm": 0.4146480858325958, "learning_rate": 8.87086661470156e-06, "loss": 0.3207267224788666, "step": 10192, "token_acc": 0.8839373163565132 }, { "epoch": 0.5499919063292505, "grad_norm": 0.4045209288597107, "learning_rate": 8.869130245644181e-06, "loss": 0.3465679883956909, "step": 10193, "token_acc": 0.8809338521400778 }, { "epoch": 0.550045864134247, "grad_norm": 0.3220883905887604, "learning_rate": 8.867393911123272e-06, "loss": 0.3251715898513794, "step": 10194, "token_acc": 0.8833333333333333 }, { "epoch": 0.5500998219392436, "grad_norm": 0.4478353261947632, "learning_rate": 8.865657611191854e-06, "loss": 0.3607712984085083, "step": 10195, "token_acc": 0.8748091603053435 }, { "epoch": 0.55015377974424, "grad_norm": 0.39496809244155884, "learning_rate": 8.863921345902956e-06, "loss": 0.32193583250045776, "step": 10196, "token_acc": 0.885034225264468 }, { "epoch": 0.5502077375492365, "grad_norm": 0.2825489044189453, "learning_rate": 8.8621851153096e-06, "loss": 0.3048126697540283, "step": 10197, "token_acc": 0.8876655052264808 }, { "epoch": 0.550261695354233, "grad_norm": 0.4352312386035919, "learning_rate": 8.860448919464815e-06, "loss": 0.3285900056362152, "step": 10198, "token_acc": 0.8819239720713732 }, { "epoch": 0.5503156531592295, "grad_norm": 0.36491602659225464, "learning_rate": 8.858712758421619e-06, "loss": 0.3651435375213623, "step": 10199, "token_acc": 0.8765765765765766 }, { "epoch": 0.550369610964226, "grad_norm": 0.5095048546791077, "learning_rate": 8.856976632233038e-06, "loss": 0.4376969337463379, "step": 10200, "token_acc": 0.855110159118727 }, { "epoch": 0.5504235687692225, "grad_norm": 0.44755956530570984, "learning_rate": 8.855240540952092e-06, "loss": 0.3610506057739258, "step": 10201, "token_acc": 0.8729046022554099 }, { "epoch": 0.550477526574219, "grad_norm": 0.4889961779117584, "learning_rate": 8.8535044846318e-06, "loss": 0.40619736909866333, "step": 10202, "token_acc": 0.8649730561970747 }, { "epoch": 0.5505314843792155, "grad_norm": 0.3894205689430237, "learning_rate": 8.85176846332518e-06, "loss": 0.3761428892612457, "step": 10203, "token_acc": 0.8707677634716261 }, { "epoch": 0.5505854421842119, "grad_norm": 0.4216923415660858, "learning_rate": 8.85003247708525e-06, "loss": 0.38608282804489136, "step": 10204, "token_acc": 0.8642539682539683 }, { "epoch": 0.5506393999892084, "grad_norm": 0.34612515568733215, "learning_rate": 8.848296525965026e-06, "loss": 0.3305252194404602, "step": 10205, "token_acc": 0.8815740626160129 }, { "epoch": 0.5506933577942049, "grad_norm": 0.31386062502861023, "learning_rate": 8.846560610017526e-06, "loss": 0.33759167790412903, "step": 10206, "token_acc": 0.877729812087354 }, { "epoch": 0.5507473155992014, "grad_norm": 0.3854213356971741, "learning_rate": 8.844824729295763e-06, "loss": 0.3371385335922241, "step": 10207, "token_acc": 0.8815482667495725 }, { "epoch": 0.5508012734041979, "grad_norm": 0.3358782231807709, "learning_rate": 8.84308888385275e-06, "loss": 0.35676562786102295, "step": 10208, "token_acc": 0.8797943851324634 }, { "epoch": 0.5508552312091944, "grad_norm": 0.45141106843948364, "learning_rate": 8.841353073741499e-06, "loss": 0.43189823627471924, "step": 10209, "token_acc": 0.8510096994752743 }, { "epoch": 0.550909189014191, "grad_norm": 0.33078908920288086, "learning_rate": 8.839617299015021e-06, "loss": 0.337067574262619, "step": 10210, "token_acc": 0.8784436239704629 }, { "epoch": 0.5509631468191873, "grad_norm": 0.33687546849250793, "learning_rate": 8.837881559726324e-06, "loss": 0.3560905158519745, "step": 10211, "token_acc": 0.8735723170869615 }, { "epoch": 0.5510171046241839, "grad_norm": 0.38912343978881836, "learning_rate": 8.836145855928424e-06, "loss": 0.30546343326568604, "step": 10212, "token_acc": 0.8875864990820506 }, { "epoch": 0.5510710624291804, "grad_norm": 0.41540348529815674, "learning_rate": 8.834410187674326e-06, "loss": 0.37595129013061523, "step": 10213, "token_acc": 0.8657599798514041 }, { "epoch": 0.5511250202341769, "grad_norm": 0.4367222487926483, "learning_rate": 8.832674555017034e-06, "loss": 0.3306223750114441, "step": 10214, "token_acc": 0.8757404795486601 }, { "epoch": 0.5511789780391734, "grad_norm": 0.33959171175956726, "learning_rate": 8.830938958009555e-06, "loss": 0.33356982469558716, "step": 10215, "token_acc": 0.8804251550044287 }, { "epoch": 0.5512329358441699, "grad_norm": 0.45145586133003235, "learning_rate": 8.829203396704894e-06, "loss": 0.3676401376724243, "step": 10216, "token_acc": 0.8748427672955975 }, { "epoch": 0.5512868936491664, "grad_norm": 0.436115026473999, "learning_rate": 8.827467871156054e-06, "loss": 0.3415168821811676, "step": 10217, "token_acc": 0.8787966028501512 }, { "epoch": 0.5513408514541629, "grad_norm": 0.37835508584976196, "learning_rate": 8.825732381416034e-06, "loss": 0.36277008056640625, "step": 10218, "token_acc": 0.86940938133084 }, { "epoch": 0.5513948092591593, "grad_norm": 0.4385060667991638, "learning_rate": 8.823996927537843e-06, "loss": 0.3325139284133911, "step": 10219, "token_acc": 0.884346662702542 }, { "epoch": 0.5514487670641558, "grad_norm": 0.38385719060897827, "learning_rate": 8.822261509574481e-06, "loss": 0.3392452597618103, "step": 10220, "token_acc": 0.8822129684711482 }, { "epoch": 0.5515027248691523, "grad_norm": 0.3833923935890198, "learning_rate": 8.82052612757894e-06, "loss": 0.31763285398483276, "step": 10221, "token_acc": 0.8865420293991723 }, { "epoch": 0.5515566826741488, "grad_norm": 0.38238680362701416, "learning_rate": 8.818790781604225e-06, "loss": 0.30670297145843506, "step": 10222, "token_acc": 0.8914050437467833 }, { "epoch": 0.5516106404791453, "grad_norm": 0.47441262006759644, "learning_rate": 8.81705547170333e-06, "loss": 0.3760043978691101, "step": 10223, "token_acc": 0.8638232271325796 }, { "epoch": 0.5516645982841418, "grad_norm": 0.4125097990036011, "learning_rate": 8.815320197929246e-06, "loss": 0.31139272451400757, "step": 10224, "token_acc": 0.8882063882063882 }, { "epoch": 0.5517185560891383, "grad_norm": 0.3565122187137604, "learning_rate": 8.813584960334977e-06, "loss": 0.34472110867500305, "step": 10225, "token_acc": 0.8815486665764181 }, { "epoch": 0.5517725138941348, "grad_norm": 0.3853541612625122, "learning_rate": 8.811849758973511e-06, "loss": 0.3433866500854492, "step": 10226, "token_acc": 0.874439461883408 }, { "epoch": 0.5518264716991312, "grad_norm": 0.3700346052646637, "learning_rate": 8.810114593897844e-06, "loss": 0.3746640086174011, "step": 10227, "token_acc": 0.868345482659598 }, { "epoch": 0.5518804295041277, "grad_norm": 0.3416321575641632, "learning_rate": 8.808379465160965e-06, "loss": 0.31432193517684937, "step": 10228, "token_acc": 0.8850042636131076 }, { "epoch": 0.5519343873091243, "grad_norm": 0.3693341314792633, "learning_rate": 8.806644372815865e-06, "loss": 0.3829672932624817, "step": 10229, "token_acc": 0.871579196777633 }, { "epoch": 0.5519883451141208, "grad_norm": 0.30916908383369446, "learning_rate": 8.804909316915531e-06, "loss": 0.3098788261413574, "step": 10230, "token_acc": 0.8873925868923945 }, { "epoch": 0.5520423029191173, "grad_norm": 0.37985292077064514, "learning_rate": 8.803174297512955e-06, "loss": 0.337690144777298, "step": 10231, "token_acc": 0.8778076171875 }, { "epoch": 0.5520962607241138, "grad_norm": 0.45270484685897827, "learning_rate": 8.801439314661122e-06, "loss": 0.381750226020813, "step": 10232, "token_acc": 0.8650734378889718 }, { "epoch": 0.5521502185291103, "grad_norm": 0.3131259083747864, "learning_rate": 8.79970436841302e-06, "loss": 0.32348161935806274, "step": 10233, "token_acc": 0.8861956843038723 }, { "epoch": 0.5522041763341067, "grad_norm": 0.44706568121910095, "learning_rate": 8.797969458821632e-06, "loss": 0.3553025722503662, "step": 10234, "token_acc": 0.8682138416908413 }, { "epoch": 0.5522581341391032, "grad_norm": 0.4073270857334137, "learning_rate": 8.79623458593994e-06, "loss": 0.3730772137641907, "step": 10235, "token_acc": 0.868162188099808 }, { "epoch": 0.5523120919440997, "grad_norm": 0.3999028205871582, "learning_rate": 8.79449974982093e-06, "loss": 0.35466301441192627, "step": 10236, "token_acc": 0.8746434231378764 }, { "epoch": 0.5523660497490962, "grad_norm": 0.39659392833709717, "learning_rate": 8.79276495051758e-06, "loss": 0.3938564658164978, "step": 10237, "token_acc": 0.8638829317522392 }, { "epoch": 0.5524200075540927, "grad_norm": 0.35576581954956055, "learning_rate": 8.791030188082873e-06, "loss": 0.3487657904624939, "step": 10238, "token_acc": 0.8798535611271355 }, { "epoch": 0.5524739653590892, "grad_norm": 0.33090004324913025, "learning_rate": 8.789295462569789e-06, "loss": 0.35558271408081055, "step": 10239, "token_acc": 0.8737952148769702 }, { "epoch": 0.5525279231640857, "grad_norm": 0.49078837037086487, "learning_rate": 8.787560774031304e-06, "loss": 0.4205859303474426, "step": 10240, "token_acc": 0.8552298765064722 }, { "epoch": 0.5525818809690822, "grad_norm": 0.36725521087646484, "learning_rate": 8.785826122520393e-06, "loss": 0.3715074360370636, "step": 10241, "token_acc": 0.8732193732193733 }, { "epoch": 0.5526358387740786, "grad_norm": 0.37918365001678467, "learning_rate": 8.784091508090035e-06, "loss": 0.3561948835849762, "step": 10242, "token_acc": 0.8733812949640288 }, { "epoch": 0.5526897965790751, "grad_norm": 0.4261106252670288, "learning_rate": 8.782356930793205e-06, "loss": 0.3473869562149048, "step": 10243, "token_acc": 0.8777012586084065 }, { "epoch": 0.5527437543840716, "grad_norm": 0.46392467617988586, "learning_rate": 8.780622390682872e-06, "loss": 0.3832953870296478, "step": 10244, "token_acc": 0.8669394435351883 }, { "epoch": 0.5527977121890681, "grad_norm": 0.3009525537490845, "learning_rate": 8.778887887812015e-06, "loss": 0.3353026509284973, "step": 10245, "token_acc": 0.8820022497187852 }, { "epoch": 0.5528516699940647, "grad_norm": 0.4829986095428467, "learning_rate": 8.777153422233601e-06, "loss": 0.390541672706604, "step": 10246, "token_acc": 0.8704891005678695 }, { "epoch": 0.5529056277990612, "grad_norm": 0.31411609053611755, "learning_rate": 8.775418994000604e-06, "loss": 0.31877779960632324, "step": 10247, "token_acc": 0.8882198952879581 }, { "epoch": 0.5529595856040577, "grad_norm": 0.4336080551147461, "learning_rate": 8.773684603165989e-06, "loss": 0.37471067905426025, "step": 10248, "token_acc": 0.8740355585374036 }, { "epoch": 0.5530135434090541, "grad_norm": 0.37178754806518555, "learning_rate": 8.771950249782723e-06, "loss": 0.35313093662261963, "step": 10249, "token_acc": 0.8783304895655598 }, { "epoch": 0.5530675012140506, "grad_norm": 0.3906105160713196, "learning_rate": 8.770215933903771e-06, "loss": 0.3313864469528198, "step": 10250, "token_acc": 0.8792889109895574 }, { "epoch": 0.5531214590190471, "grad_norm": 0.41648465394973755, "learning_rate": 8.768481655582109e-06, "loss": 0.38105934858322144, "step": 10251, "token_acc": 0.8635885095503083 }, { "epoch": 0.5531754168240436, "grad_norm": 0.5650871992111206, "learning_rate": 8.76674741487069e-06, "loss": 0.37619662284851074, "step": 10252, "token_acc": 0.8658420551855376 }, { "epoch": 0.5532293746290401, "grad_norm": 0.3412112295627594, "learning_rate": 8.765013211822486e-06, "loss": 0.34548163414001465, "step": 10253, "token_acc": 0.8757134703196348 }, { "epoch": 0.5532833324340366, "grad_norm": 0.47999224066734314, "learning_rate": 8.763279046490454e-06, "loss": 0.38490137457847595, "step": 10254, "token_acc": 0.8624816292252782 }, { "epoch": 0.5533372902390331, "grad_norm": 0.4580979645252228, "learning_rate": 8.761544918927555e-06, "loss": 0.3615657389163971, "step": 10255, "token_acc": 0.8726345236136251 }, { "epoch": 0.5533912480440296, "grad_norm": 0.3964705169200897, "learning_rate": 8.759810829186749e-06, "loss": 0.3948397636413574, "step": 10256, "token_acc": 0.8645117693433946 }, { "epoch": 0.553445205849026, "grad_norm": 0.41187790036201477, "learning_rate": 8.758076777320995e-06, "loss": 0.39138078689575195, "step": 10257, "token_acc": 0.8652317880794702 }, { "epoch": 0.5534991636540225, "grad_norm": 0.3416683077812195, "learning_rate": 8.756342763383252e-06, "loss": 0.32494908571243286, "step": 10258, "token_acc": 0.8910330631673743 }, { "epoch": 0.553553121459019, "grad_norm": 0.3776397705078125, "learning_rate": 8.754608787426476e-06, "loss": 0.36620521545410156, "step": 10259, "token_acc": 0.8754300628781587 }, { "epoch": 0.5536070792640155, "grad_norm": 0.4031880497932434, "learning_rate": 8.752874849503621e-06, "loss": 0.36505651473999023, "step": 10260, "token_acc": 0.8740660530590146 }, { "epoch": 0.553661037069012, "grad_norm": 0.32290753722190857, "learning_rate": 8.75114094966764e-06, "loss": 0.36318159103393555, "step": 10261, "token_acc": 0.8739686134929623 }, { "epoch": 0.5537149948740085, "grad_norm": 0.380601704120636, "learning_rate": 8.74940708797149e-06, "loss": 0.35396113991737366, "step": 10262, "token_acc": 0.8723175965665236 }, { "epoch": 0.553768952679005, "grad_norm": 0.3095702528953552, "learning_rate": 8.747673264468117e-06, "loss": 0.2953386902809143, "step": 10263, "token_acc": 0.8939714436805922 }, { "epoch": 0.5538229104840016, "grad_norm": 0.3652943968772888, "learning_rate": 8.745939479210476e-06, "loss": 0.32832831144332886, "step": 10264, "token_acc": 0.8805970149253731 }, { "epoch": 0.553876868288998, "grad_norm": 0.37692317366600037, "learning_rate": 8.744205732251515e-06, "loss": 0.3692479133605957, "step": 10265, "token_acc": 0.8685781006746238 }, { "epoch": 0.5539308260939945, "grad_norm": 0.45741474628448486, "learning_rate": 8.742472023644183e-06, "loss": 0.3065910339355469, "step": 10266, "token_acc": 0.8880072545907958 }, { "epoch": 0.553984783898991, "grad_norm": 0.3507741689682007, "learning_rate": 8.740738353441422e-06, "loss": 0.3535512387752533, "step": 10267, "token_acc": 0.8759761611179614 }, { "epoch": 0.5540387417039875, "grad_norm": 0.4277370870113373, "learning_rate": 8.739004721696186e-06, "loss": 0.3743157386779785, "step": 10268, "token_acc": 0.8754414125200642 }, { "epoch": 0.554092699508984, "grad_norm": 0.35276395082473755, "learning_rate": 8.737271128461409e-06, "loss": 0.3479388952255249, "step": 10269, "token_acc": 0.8716174471765723 }, { "epoch": 0.5541466573139805, "grad_norm": 0.3091804087162018, "learning_rate": 8.735537573790047e-06, "loss": 0.36348479986190796, "step": 10270, "token_acc": 0.8758177306564403 }, { "epoch": 0.554200615118977, "grad_norm": 0.43579667806625366, "learning_rate": 8.733804057735036e-06, "loss": 0.3958375155925751, "step": 10271, "token_acc": 0.8589920824431319 }, { "epoch": 0.5542545729239734, "grad_norm": 0.41903507709503174, "learning_rate": 8.732070580349317e-06, "loss": 0.40633562207221985, "step": 10272, "token_acc": 0.8633164723993842 }, { "epoch": 0.5543085307289699, "grad_norm": 0.3992308974266052, "learning_rate": 8.73033714168583e-06, "loss": 0.3491443395614624, "step": 10273, "token_acc": 0.8773742592311199 }, { "epoch": 0.5543624885339664, "grad_norm": 0.36845913529396057, "learning_rate": 8.728603741797514e-06, "loss": 0.3222098648548126, "step": 10274, "token_acc": 0.8893728222996515 }, { "epoch": 0.5544164463389629, "grad_norm": 0.34768250584602356, "learning_rate": 8.726870380737307e-06, "loss": 0.35857802629470825, "step": 10275, "token_acc": 0.8735934881493895 }, { "epoch": 0.5544704041439594, "grad_norm": 0.44101178646087646, "learning_rate": 8.725137058558139e-06, "loss": 0.36575764417648315, "step": 10276, "token_acc": 0.8762310354005856 }, { "epoch": 0.5545243619489559, "grad_norm": 0.3504287302494049, "learning_rate": 8.723403775312957e-06, "loss": 0.34999334812164307, "step": 10277, "token_acc": 0.8792521109770808 }, { "epoch": 0.5545783197539524, "grad_norm": 0.4386540353298187, "learning_rate": 8.72167053105469e-06, "loss": 0.37586143612861633, "step": 10278, "token_acc": 0.8680210918114144 }, { "epoch": 0.554632277558949, "grad_norm": 0.44121649861335754, "learning_rate": 8.71993732583627e-06, "loss": 0.379753977060318, "step": 10279, "token_acc": 0.8690157597383289 }, { "epoch": 0.5546862353639453, "grad_norm": 0.30622124671936035, "learning_rate": 8.718204159710629e-06, "loss": 0.29705512523651123, "step": 10280, "token_acc": 0.8921544515494343 }, { "epoch": 0.5547401931689419, "grad_norm": 0.4697973430156708, "learning_rate": 8.716471032730696e-06, "loss": 0.3258729577064514, "step": 10281, "token_acc": 0.8796494992846924 }, { "epoch": 0.5547941509739384, "grad_norm": 0.35720354318618774, "learning_rate": 8.714737944949398e-06, "loss": 0.35090962052345276, "step": 10282, "token_acc": 0.8777612310747084 }, { "epoch": 0.5548481087789349, "grad_norm": 0.35068950057029724, "learning_rate": 8.71300489641967e-06, "loss": 0.38219165802001953, "step": 10283, "token_acc": 0.8677087304613038 }, { "epoch": 0.5549020665839314, "grad_norm": 0.2774293124675751, "learning_rate": 8.711271887194437e-06, "loss": 0.3388010561466217, "step": 10284, "token_acc": 0.8793149585228793 }, { "epoch": 0.5549560243889279, "grad_norm": 0.3962814509868622, "learning_rate": 8.709538917326622e-06, "loss": 0.3342249393463135, "step": 10285, "token_acc": 0.8794875404758553 }, { "epoch": 0.5550099821939244, "grad_norm": 0.38225671648979187, "learning_rate": 8.70780598686915e-06, "loss": 0.29197239875793457, "step": 10286, "token_acc": 0.8897458369851008 }, { "epoch": 0.5550639399989209, "grad_norm": 0.25552043318748474, "learning_rate": 8.706073095874943e-06, "loss": 0.32852017879486084, "step": 10287, "token_acc": 0.8838847187471041 }, { "epoch": 0.5551178978039173, "grad_norm": 0.40475353598594666, "learning_rate": 8.704340244396925e-06, "loss": 0.32001516222953796, "step": 10288, "token_acc": 0.8853789055756948 }, { "epoch": 0.5551718556089138, "grad_norm": 0.3883582651615143, "learning_rate": 8.702607432488017e-06, "loss": 0.33994418382644653, "step": 10289, "token_acc": 0.8772404463983767 }, { "epoch": 0.5552258134139103, "grad_norm": 0.4558391273021698, "learning_rate": 8.700874660201139e-06, "loss": 0.3975280523300171, "step": 10290, "token_acc": 0.8620689655172413 }, { "epoch": 0.5552797712189068, "grad_norm": 0.47363218665122986, "learning_rate": 8.699141927589208e-06, "loss": 0.45661091804504395, "step": 10291, "token_acc": 0.8495367349601983 }, { "epoch": 0.5553337290239033, "grad_norm": 0.4200882911682129, "learning_rate": 8.697409234705142e-06, "loss": 0.3462684750556946, "step": 10292, "token_acc": 0.8730797912005965 }, { "epoch": 0.5553876868288998, "grad_norm": 0.38620004057884216, "learning_rate": 8.695676581601858e-06, "loss": 0.30944573879241943, "step": 10293, "token_acc": 0.8916556291390728 }, { "epoch": 0.5554416446338963, "grad_norm": 0.4126400649547577, "learning_rate": 8.693943968332267e-06, "loss": 0.30939579010009766, "step": 10294, "token_acc": 0.8805160807778609 }, { "epoch": 0.5554956024388927, "grad_norm": 0.31782352924346924, "learning_rate": 8.692211394949284e-06, "loss": 0.30186623334884644, "step": 10295, "token_acc": 0.8902439024390244 }, { "epoch": 0.5555495602438892, "grad_norm": 0.4773556888103485, "learning_rate": 8.690478861505826e-06, "loss": 0.39945507049560547, "step": 10296, "token_acc": 0.8575256107171001 }, { "epoch": 0.5556035180488857, "grad_norm": 0.4395066797733307, "learning_rate": 8.688746368054799e-06, "loss": 0.357530415058136, "step": 10297, "token_acc": 0.8760951188986232 }, { "epoch": 0.5556574758538823, "grad_norm": 0.49468305706977844, "learning_rate": 8.687013914649115e-06, "loss": 0.32722654938697815, "step": 10298, "token_acc": 0.8794738452126032 }, { "epoch": 0.5557114336588788, "grad_norm": 0.35705769062042236, "learning_rate": 8.685281501341682e-06, "loss": 0.3465864956378937, "step": 10299, "token_acc": 0.8746031746031746 }, { "epoch": 0.5557653914638753, "grad_norm": 0.49639374017715454, "learning_rate": 8.683549128185407e-06, "loss": 0.3870657682418823, "step": 10300, "token_acc": 0.8716227280170297 }, { "epoch": 0.5558193492688718, "grad_norm": 0.3375871181488037, "learning_rate": 8.681816795233197e-06, "loss": 0.3615938425064087, "step": 10301, "token_acc": 0.8748400307141029 }, { "epoch": 0.5558733070738683, "grad_norm": 0.4971248209476471, "learning_rate": 8.680084502537954e-06, "loss": 0.36494165658950806, "step": 10302, "token_acc": 0.868788386376326 }, { "epoch": 0.5559272648788647, "grad_norm": 0.38079312443733215, "learning_rate": 8.678352250152587e-06, "loss": 0.2982296943664551, "step": 10303, "token_acc": 0.8933420365535248 }, { "epoch": 0.5559812226838612, "grad_norm": 0.4251067042350769, "learning_rate": 8.676620038129995e-06, "loss": 0.350017249584198, "step": 10304, "token_acc": 0.8766551525618883 }, { "epoch": 0.5560351804888577, "grad_norm": 0.376828134059906, "learning_rate": 8.674887866523082e-06, "loss": 0.33704841136932373, "step": 10305, "token_acc": 0.8823951751866743 }, { "epoch": 0.5560891382938542, "grad_norm": 0.4950999617576599, "learning_rate": 8.673155735384749e-06, "loss": 0.38626402616500854, "step": 10306, "token_acc": 0.869432918395574 }, { "epoch": 0.5561430960988507, "grad_norm": 0.4478786289691925, "learning_rate": 8.67142364476789e-06, "loss": 0.3620007634162903, "step": 10307, "token_acc": 0.8726790450928382 }, { "epoch": 0.5561970539038472, "grad_norm": 0.5152307748794556, "learning_rate": 8.6696915947254e-06, "loss": 0.38218292593955994, "step": 10308, "token_acc": 0.8666666666666667 }, { "epoch": 0.5562510117088437, "grad_norm": 0.3578583300113678, "learning_rate": 8.667959585310185e-06, "loss": 0.3568362295627594, "step": 10309, "token_acc": 0.8738420282788883 }, { "epoch": 0.5563049695138402, "grad_norm": 0.3911735415458679, "learning_rate": 8.666227616575134e-06, "loss": 0.356094092130661, "step": 10310, "token_acc": 0.8698074168390896 }, { "epoch": 0.5563589273188366, "grad_norm": 0.38357552886009216, "learning_rate": 8.664495688573143e-06, "loss": 0.3143191933631897, "step": 10311, "token_acc": 0.888187734174719 }, { "epoch": 0.5564128851238331, "grad_norm": 0.498612642288208, "learning_rate": 8.662763801357105e-06, "loss": 0.4129002094268799, "step": 10312, "token_acc": 0.8573757131214343 }, { "epoch": 0.5564668429288296, "grad_norm": 0.44696304202079773, "learning_rate": 8.661031954979909e-06, "loss": 0.3357415795326233, "step": 10313, "token_acc": 0.878545327997216 }, { "epoch": 0.5565208007338261, "grad_norm": 0.305419921875, "learning_rate": 8.659300149494444e-06, "loss": 0.30250656604766846, "step": 10314, "token_acc": 0.8889133788847255 }, { "epoch": 0.5565747585388227, "grad_norm": 0.35450664162635803, "learning_rate": 8.657568384953604e-06, "loss": 0.35973766446113586, "step": 10315, "token_acc": 0.8737781555461114 }, { "epoch": 0.5566287163438192, "grad_norm": 0.5510839819908142, "learning_rate": 8.655836661410274e-06, "loss": 0.38159680366516113, "step": 10316, "token_acc": 0.8675251367084141 }, { "epoch": 0.5566826741488157, "grad_norm": 0.42233070731163025, "learning_rate": 8.654104978917339e-06, "loss": 0.34525638818740845, "step": 10317, "token_acc": 0.8806073153899241 }, { "epoch": 0.5567366319538121, "grad_norm": 0.46120232343673706, "learning_rate": 8.652373337527686e-06, "loss": 0.395586222410202, "step": 10318, "token_acc": 0.8600326674205302 }, { "epoch": 0.5567905897588086, "grad_norm": 0.40849435329437256, "learning_rate": 8.650641737294198e-06, "loss": 0.3089548349380493, "step": 10319, "token_acc": 0.8870989215816802 }, { "epoch": 0.5568445475638051, "grad_norm": 0.3913017213344574, "learning_rate": 8.64891017826976e-06, "loss": 0.3755412697792053, "step": 10320, "token_acc": 0.8676031081888822 }, { "epoch": 0.5568985053688016, "grad_norm": 0.37174177169799805, "learning_rate": 8.647178660507247e-06, "loss": 0.3107620179653168, "step": 10321, "token_acc": 0.8888167294453825 }, { "epoch": 0.5569524631737981, "grad_norm": 0.5639927387237549, "learning_rate": 8.645447184059545e-06, "loss": 0.43897753953933716, "step": 10322, "token_acc": 0.8491176946737898 }, { "epoch": 0.5570064209787946, "grad_norm": 0.46715399622917175, "learning_rate": 8.643715748979532e-06, "loss": 0.3915971517562866, "step": 10323, "token_acc": 0.8640314908971626 }, { "epoch": 0.5570603787837911, "grad_norm": 0.36926132440567017, "learning_rate": 8.641984355320085e-06, "loss": 0.35041525959968567, "step": 10324, "token_acc": 0.8763277693474962 }, { "epoch": 0.5571143365887876, "grad_norm": 0.4325594902038574, "learning_rate": 8.64025300313408e-06, "loss": 0.36270397901535034, "step": 10325, "token_acc": 0.8696198993851314 }, { "epoch": 0.557168294393784, "grad_norm": 0.41733455657958984, "learning_rate": 8.638521692474393e-06, "loss": 0.3173482418060303, "step": 10326, "token_acc": 0.8835132117603275 }, { "epoch": 0.5572222521987805, "grad_norm": 0.4617338180541992, "learning_rate": 8.63679042339389e-06, "loss": 0.3560880124568939, "step": 10327, "token_acc": 0.8773692191053829 }, { "epoch": 0.557276210003777, "grad_norm": 0.5214876532554626, "learning_rate": 8.635059195945456e-06, "loss": 0.38875141739845276, "step": 10328, "token_acc": 0.8658049800418172 }, { "epoch": 0.5573301678087735, "grad_norm": 0.38406041264533997, "learning_rate": 8.633328010181959e-06, "loss": 0.4167301654815674, "step": 10329, "token_acc": 0.8593243868579361 }, { "epoch": 0.55738412561377, "grad_norm": 0.3379671573638916, "learning_rate": 8.631596866156264e-06, "loss": 0.3527657389640808, "step": 10330, "token_acc": 0.8779585798816568 }, { "epoch": 0.5574380834187666, "grad_norm": 0.4917815029621124, "learning_rate": 8.629865763921242e-06, "loss": 0.42427900433540344, "step": 10331, "token_acc": 0.8568807339449541 }, { "epoch": 0.5574920412237631, "grad_norm": 0.5065521001815796, "learning_rate": 8.628134703529761e-06, "loss": 0.36129775643348694, "step": 10332, "token_acc": 0.872372624852867 }, { "epoch": 0.5575459990287596, "grad_norm": 0.39770060777664185, "learning_rate": 8.626403685034688e-06, "loss": 0.3361768126487732, "step": 10333, "token_acc": 0.8749815280035467 }, { "epoch": 0.557599956833756, "grad_norm": 0.4418262839317322, "learning_rate": 8.62467270848888e-06, "loss": 0.3821622431278229, "step": 10334, "token_acc": 0.8639346653055272 }, { "epoch": 0.5576539146387525, "grad_norm": 0.3227326273918152, "learning_rate": 8.622941773945212e-06, "loss": 0.3281368613243103, "step": 10335, "token_acc": 0.8823597994636819 }, { "epoch": 0.557707872443749, "grad_norm": 0.32486894726753235, "learning_rate": 8.621210881456543e-06, "loss": 0.36094266176223755, "step": 10336, "token_acc": 0.8717023049153013 }, { "epoch": 0.5577618302487455, "grad_norm": 0.41456693410873413, "learning_rate": 8.619480031075731e-06, "loss": 0.3743469715118408, "step": 10337, "token_acc": 0.8696357735304724 }, { "epoch": 0.557815788053742, "grad_norm": 0.38696831464767456, "learning_rate": 8.617749222855639e-06, "loss": 0.4109443426132202, "step": 10338, "token_acc": 0.8571254567600487 }, { "epoch": 0.5578697458587385, "grad_norm": 0.4821944832801819, "learning_rate": 8.616018456849122e-06, "loss": 0.37186890840530396, "step": 10339, "token_acc": 0.8690738699007717 }, { "epoch": 0.557923703663735, "grad_norm": 0.3452490270137787, "learning_rate": 8.614287733109036e-06, "loss": 0.34656745195388794, "step": 10340, "token_acc": 0.8789497613093885 }, { "epoch": 0.5579776614687314, "grad_norm": 0.40933290123939514, "learning_rate": 8.612557051688244e-06, "loss": 0.3422756791114807, "step": 10341, "token_acc": 0.8754439681567667 }, { "epoch": 0.5580316192737279, "grad_norm": 0.39195671677589417, "learning_rate": 8.610826412639595e-06, "loss": 0.30616241693496704, "step": 10342, "token_acc": 0.8869100623330365 }, { "epoch": 0.5580855770787244, "grad_norm": 0.4366631805896759, "learning_rate": 8.609095816015944e-06, "loss": 0.38850677013397217, "step": 10343, "token_acc": 0.8612180858650692 }, { "epoch": 0.5581395348837209, "grad_norm": 0.4346286356449127, "learning_rate": 8.60736526187014e-06, "loss": 0.31218698620796204, "step": 10344, "token_acc": 0.8897017445132246 }, { "epoch": 0.5581934926887174, "grad_norm": 0.31531476974487305, "learning_rate": 8.60563475025504e-06, "loss": 0.34029772877693176, "step": 10345, "token_acc": 0.8793518034500785 }, { "epoch": 0.5582474504937139, "grad_norm": 0.408690482378006, "learning_rate": 8.603904281223487e-06, "loss": 0.33142518997192383, "step": 10346, "token_acc": 0.8802315963606286 }, { "epoch": 0.5583014082987104, "grad_norm": 0.39070767164230347, "learning_rate": 8.602173854828329e-06, "loss": 0.32125574350357056, "step": 10347, "token_acc": 0.8865349623082922 }, { "epoch": 0.558355366103707, "grad_norm": 0.34140023589134216, "learning_rate": 8.600443471122418e-06, "loss": 0.33537179231643677, "step": 10348, "token_acc": 0.8791253290139704 }, { "epoch": 0.5584093239087033, "grad_norm": 0.45467495918273926, "learning_rate": 8.598713130158596e-06, "loss": 0.37208613753318787, "step": 10349, "token_acc": 0.8703649635036497 }, { "epoch": 0.5584632817136999, "grad_norm": 0.41335606575012207, "learning_rate": 8.59698283198971e-06, "loss": 0.338131308555603, "step": 10350, "token_acc": 0.8762308533916849 }, { "epoch": 0.5585172395186964, "grad_norm": 0.4432186484336853, "learning_rate": 8.5952525766686e-06, "loss": 0.36710846424102783, "step": 10351, "token_acc": 0.8713918262360675 }, { "epoch": 0.5585711973236929, "grad_norm": 0.3915809094905853, "learning_rate": 8.593522364248108e-06, "loss": 0.31558072566986084, "step": 10352, "token_acc": 0.8865997534777249 }, { "epoch": 0.5586251551286894, "grad_norm": 0.4290919899940491, "learning_rate": 8.591792194781072e-06, "loss": 0.42059126496315, "step": 10353, "token_acc": 0.8573283323716099 }, { "epoch": 0.5586791129336859, "grad_norm": 0.40085625648498535, "learning_rate": 8.590062068320336e-06, "loss": 0.34173843264579773, "step": 10354, "token_acc": 0.8799402191962803 }, { "epoch": 0.5587330707386824, "grad_norm": 0.3871172070503235, "learning_rate": 8.588331984918736e-06, "loss": 0.35382622480392456, "step": 10355, "token_acc": 0.8778588206205958 }, { "epoch": 0.5587870285436789, "grad_norm": 0.5304540395736694, "learning_rate": 8.586601944629106e-06, "loss": 0.3826811909675598, "step": 10356, "token_acc": 0.8659983291562239 }, { "epoch": 0.5588409863486753, "grad_norm": 0.39481714367866516, "learning_rate": 8.584871947504282e-06, "loss": 0.41312870383262634, "step": 10357, "token_acc": 0.8591008202678849 }, { "epoch": 0.5588949441536718, "grad_norm": 0.4448411166667938, "learning_rate": 8.583141993597099e-06, "loss": 0.3503987789154053, "step": 10358, "token_acc": 0.8729005167958657 }, { "epoch": 0.5589489019586683, "grad_norm": 0.3914484679698944, "learning_rate": 8.581412082960389e-06, "loss": 0.33749058842658997, "step": 10359, "token_acc": 0.8813255174063883 }, { "epoch": 0.5590028597636648, "grad_norm": 0.47757020592689514, "learning_rate": 8.579682215646976e-06, "loss": 0.3354150652885437, "step": 10360, "token_acc": 0.8822348000730327 }, { "epoch": 0.5590568175686613, "grad_norm": 0.48881796002388, "learning_rate": 8.577952391709702e-06, "loss": 0.34418386220932007, "step": 10361, "token_acc": 0.8750213419839509 }, { "epoch": 0.5591107753736578, "grad_norm": 0.4827691912651062, "learning_rate": 8.57622261120139e-06, "loss": 0.39659735560417175, "step": 10362, "token_acc": 0.8629119233073697 }, { "epoch": 0.5591647331786543, "grad_norm": 0.42776569724082947, "learning_rate": 8.574492874174865e-06, "loss": 0.4045976400375366, "step": 10363, "token_acc": 0.8613564904925887 }, { "epoch": 0.5592186909836507, "grad_norm": 0.4024900794029236, "learning_rate": 8.572763180682957e-06, "loss": 0.35962843894958496, "step": 10364, "token_acc": 0.8692307692307693 }, { "epoch": 0.5592726487886472, "grad_norm": 0.3934604227542877, "learning_rate": 8.571033530778485e-06, "loss": 0.3412686884403229, "step": 10365, "token_acc": 0.876839659178931 }, { "epoch": 0.5593266065936437, "grad_norm": 0.355577677488327, "learning_rate": 8.56930392451427e-06, "loss": 0.4031246602535248, "step": 10366, "token_acc": 0.864062081883864 }, { "epoch": 0.5593805643986403, "grad_norm": 0.4135141968727112, "learning_rate": 8.567574361943144e-06, "loss": 0.3828774690628052, "step": 10367, "token_acc": 0.8667145938173976 }, { "epoch": 0.5594345222036368, "grad_norm": 0.4397406578063965, "learning_rate": 8.565844843117921e-06, "loss": 0.3406297266483307, "step": 10368, "token_acc": 0.8728464704985516 }, { "epoch": 0.5594884800086333, "grad_norm": 0.36046767234802246, "learning_rate": 8.56411536809142e-06, "loss": 0.3354610204696655, "step": 10369, "token_acc": 0.8816840811309158 }, { "epoch": 0.5595424378136298, "grad_norm": 0.4124579131603241, "learning_rate": 8.56238593691646e-06, "loss": 0.32274380326271057, "step": 10370, "token_acc": 0.8839173610318457 }, { "epoch": 0.5595963956186263, "grad_norm": 0.32268545031547546, "learning_rate": 8.560656549645858e-06, "loss": 0.30416369438171387, "step": 10371, "token_acc": 0.8885354539430086 }, { "epoch": 0.5596503534236227, "grad_norm": 0.34183627367019653, "learning_rate": 8.558927206332424e-06, "loss": 0.34162768721580505, "step": 10372, "token_acc": 0.8769368295589988 }, { "epoch": 0.5597043112286192, "grad_norm": 0.3452329933643341, "learning_rate": 8.55719790702898e-06, "loss": 0.3713259696960449, "step": 10373, "token_acc": 0.8676844783715013 }, { "epoch": 0.5597582690336157, "grad_norm": 0.4026746451854706, "learning_rate": 8.555468651788333e-06, "loss": 0.34420153498649597, "step": 10374, "token_acc": 0.8775397373876986 }, { "epoch": 0.5598122268386122, "grad_norm": 0.28479689359664917, "learning_rate": 8.553739440663293e-06, "loss": 0.29160842299461365, "step": 10375, "token_acc": 0.8942786069651741 }, { "epoch": 0.5598661846436087, "grad_norm": 0.37161803245544434, "learning_rate": 8.552010273706675e-06, "loss": 0.3656632900238037, "step": 10376, "token_acc": 0.8757177033492823 }, { "epoch": 0.5599201424486052, "grad_norm": 0.3300777077674866, "learning_rate": 8.550281150971281e-06, "loss": 0.29810091853141785, "step": 10377, "token_acc": 0.8875739644970414 }, { "epoch": 0.5599741002536017, "grad_norm": 0.50095534324646, "learning_rate": 8.548552072509922e-06, "loss": 0.4267616271972656, "step": 10378, "token_acc": 0.8543675751222921 }, { "epoch": 0.5600280580585981, "grad_norm": 0.42138051986694336, "learning_rate": 8.5468230383754e-06, "loss": 0.3710542917251587, "step": 10379, "token_acc": 0.8674274443505213 }, { "epoch": 0.5600820158635946, "grad_norm": 0.5136852860450745, "learning_rate": 8.545094048620525e-06, "loss": 0.35505953431129456, "step": 10380, "token_acc": 0.8755584974350488 }, { "epoch": 0.5601359736685911, "grad_norm": 0.31939470767974854, "learning_rate": 8.543365103298095e-06, "loss": 0.3571489453315735, "step": 10381, "token_acc": 0.8766581992661586 }, { "epoch": 0.5601899314735876, "grad_norm": 0.3518415093421936, "learning_rate": 8.541636202460915e-06, "loss": 0.3657691776752472, "step": 10382, "token_acc": 0.8735865158950288 }, { "epoch": 0.5602438892785842, "grad_norm": 0.3397139012813568, "learning_rate": 8.539907346161783e-06, "loss": 0.39072513580322266, "step": 10383, "token_acc": 0.8658609548881178 }, { "epoch": 0.5602978470835807, "grad_norm": 0.44278714060783386, "learning_rate": 8.538178534453496e-06, "loss": 0.35714566707611084, "step": 10384, "token_acc": 0.8717451075529679 }, { "epoch": 0.5603518048885772, "grad_norm": 0.47407200932502747, "learning_rate": 8.53644976738885e-06, "loss": 0.38821539282798767, "step": 10385, "token_acc": 0.8667440847728263 }, { "epoch": 0.5604057626935737, "grad_norm": 0.3909723162651062, "learning_rate": 8.53472104502065e-06, "loss": 0.3739367127418518, "step": 10386, "token_acc": 0.8704924389298178 }, { "epoch": 0.5604597204985701, "grad_norm": 0.41234090924263, "learning_rate": 8.532992367401687e-06, "loss": 0.4002807140350342, "step": 10387, "token_acc": 0.8553577007185255 }, { "epoch": 0.5605136783035666, "grad_norm": 0.32570880651474, "learning_rate": 8.531263734584752e-06, "loss": 0.30000048875808716, "step": 10388, "token_acc": 0.8938018672199171 }, { "epoch": 0.5605676361085631, "grad_norm": 0.41273459792137146, "learning_rate": 8.529535146622637e-06, "loss": 0.3252330422401428, "step": 10389, "token_acc": 0.8831923890063424 }, { "epoch": 0.5606215939135596, "grad_norm": 0.42889952659606934, "learning_rate": 8.527806603568134e-06, "loss": 0.36263588070869446, "step": 10390, "token_acc": 0.8680040703590638 }, { "epoch": 0.5606755517185561, "grad_norm": 0.4008170962333679, "learning_rate": 8.526078105474031e-06, "loss": 0.3510782718658447, "step": 10391, "token_acc": 0.882941855987678 }, { "epoch": 0.5607295095235526, "grad_norm": 0.4450259208679199, "learning_rate": 8.52434965239311e-06, "loss": 0.3981325924396515, "step": 10392, "token_acc": 0.8625835535519976 }, { "epoch": 0.5607834673285491, "grad_norm": 0.368710994720459, "learning_rate": 8.522621244378171e-06, "loss": 0.404660701751709, "step": 10393, "token_acc": 0.8601513655807832 }, { "epoch": 0.5608374251335456, "grad_norm": 0.44165942072868347, "learning_rate": 8.520892881481992e-06, "loss": 0.40046611428260803, "step": 10394, "token_acc": 0.8617005707595492 }, { "epoch": 0.560891382938542, "grad_norm": 0.5348368287086487, "learning_rate": 8.519164563757358e-06, "loss": 0.3954940736293793, "step": 10395, "token_acc": 0.8629126213592233 }, { "epoch": 0.5609453407435385, "grad_norm": 0.3476531207561493, "learning_rate": 8.517436291257049e-06, "loss": 0.3608028292655945, "step": 10396, "token_acc": 0.8702050056596654 }, { "epoch": 0.560999298548535, "grad_norm": 0.3382382094860077, "learning_rate": 8.515708064033847e-06, "loss": 0.37500226497650146, "step": 10397, "token_acc": 0.8694437388874777 }, { "epoch": 0.5610532563535315, "grad_norm": 0.41920074820518494, "learning_rate": 8.51397988214053e-06, "loss": 0.37666571140289307, "step": 10398, "token_acc": 0.8693595342066958 }, { "epoch": 0.561107214158528, "grad_norm": 0.34526148438453674, "learning_rate": 8.51225174562988e-06, "loss": 0.3819926977157593, "step": 10399, "token_acc": 0.8676166118621361 }, { "epoch": 0.5611611719635246, "grad_norm": 0.3835757076740265, "learning_rate": 8.510523654554672e-06, "loss": 0.3456079959869385, "step": 10400, "token_acc": 0.8790061746545134 }, { "epoch": 0.5612151297685211, "grad_norm": 0.3516612648963928, "learning_rate": 8.508795608967682e-06, "loss": 0.3827151656150818, "step": 10401, "token_acc": 0.8617470264739736 }, { "epoch": 0.5612690875735175, "grad_norm": 0.4535220265388489, "learning_rate": 8.507067608921682e-06, "loss": 0.4290357828140259, "step": 10402, "token_acc": 0.8591878469179083 }, { "epoch": 0.561323045378514, "grad_norm": 0.4766729772090912, "learning_rate": 8.505339654469447e-06, "loss": 0.36415839195251465, "step": 10403, "token_acc": 0.8705035971223022 }, { "epoch": 0.5613770031835105, "grad_norm": 0.4866489768028259, "learning_rate": 8.503611745663748e-06, "loss": 0.3913835883140564, "step": 10404, "token_acc": 0.8635800557448127 }, { "epoch": 0.561430960988507, "grad_norm": 0.36011165380477905, "learning_rate": 8.501883882557352e-06, "loss": 0.35184136033058167, "step": 10405, "token_acc": 0.8804954817747995 }, { "epoch": 0.5614849187935035, "grad_norm": 0.43062981963157654, "learning_rate": 8.500156065203031e-06, "loss": 0.358102023601532, "step": 10406, "token_acc": 0.8696575148598924 }, { "epoch": 0.5615388765985, "grad_norm": 0.43161797523498535, "learning_rate": 8.498428293653553e-06, "loss": 0.37444379925727844, "step": 10407, "token_acc": 0.8697448359659782 }, { "epoch": 0.5615928344034965, "grad_norm": 0.3646284341812134, "learning_rate": 8.49670056796168e-06, "loss": 0.2802995443344116, "step": 10408, "token_acc": 0.8984997972699014 }, { "epoch": 0.561646792208493, "grad_norm": 0.38406214118003845, "learning_rate": 8.494972888180177e-06, "loss": 0.3374713659286499, "step": 10409, "token_acc": 0.8794773251345119 }, { "epoch": 0.5617007500134894, "grad_norm": 0.3430143892765045, "learning_rate": 8.493245254361809e-06, "loss": 0.36990293860435486, "step": 10410, "token_acc": 0.8660926365795725 }, { "epoch": 0.5617547078184859, "grad_norm": 0.377082496881485, "learning_rate": 8.491517666559332e-06, "loss": 0.34416258335113525, "step": 10411, "token_acc": 0.8765249537892791 }, { "epoch": 0.5618086656234824, "grad_norm": 0.28377842903137207, "learning_rate": 8.489790124825515e-06, "loss": 0.32496294379234314, "step": 10412, "token_acc": 0.8842460694312546 }, { "epoch": 0.5618626234284789, "grad_norm": 0.4062812924385071, "learning_rate": 8.488062629213113e-06, "loss": 0.3533318340778351, "step": 10413, "token_acc": 0.8766865390649514 }, { "epoch": 0.5619165812334754, "grad_norm": 0.39882737398147583, "learning_rate": 8.486335179774879e-06, "loss": 0.37495744228363037, "step": 10414, "token_acc": 0.8658064516129033 }, { "epoch": 0.5619705390384719, "grad_norm": 0.4046122431755066, "learning_rate": 8.484607776563575e-06, "loss": 0.3474726378917694, "step": 10415, "token_acc": 0.8735314443676572 }, { "epoch": 0.5620244968434684, "grad_norm": 0.3791635036468506, "learning_rate": 8.482880419631948e-06, "loss": 0.3608182668685913, "step": 10416, "token_acc": 0.8711270003404835 }, { "epoch": 0.562078454648465, "grad_norm": 0.36680832505226135, "learning_rate": 8.481153109032755e-06, "loss": 0.36132311820983887, "step": 10417, "token_acc": 0.8759919300605246 }, { "epoch": 0.5621324124534613, "grad_norm": 0.40473559498786926, "learning_rate": 8.479425844818751e-06, "loss": 0.3638341426849365, "step": 10418, "token_acc": 0.8721040509416068 }, { "epoch": 0.5621863702584579, "grad_norm": 0.36805459856987, "learning_rate": 8.477698627042685e-06, "loss": 0.3092646598815918, "step": 10419, "token_acc": 0.8881249271646662 }, { "epoch": 0.5622403280634544, "grad_norm": 0.42824897170066833, "learning_rate": 8.475971455757304e-06, "loss": 0.3215203583240509, "step": 10420, "token_acc": 0.8795930762222897 }, { "epoch": 0.5622942858684509, "grad_norm": 0.33654478192329407, "learning_rate": 8.474244331015353e-06, "loss": 0.34997427463531494, "step": 10421, "token_acc": 0.8804030761071334 }, { "epoch": 0.5623482436734474, "grad_norm": 0.4861412048339844, "learning_rate": 8.472517252869586e-06, "loss": 0.3783203959465027, "step": 10422, "token_acc": 0.8685453931825852 }, { "epoch": 0.5624022014784439, "grad_norm": 0.4891733229160309, "learning_rate": 8.470790221372736e-06, "loss": 0.37650007009506226, "step": 10423, "token_acc": 0.8698498882146279 }, { "epoch": 0.5624561592834404, "grad_norm": 0.38701799511909485, "learning_rate": 8.469063236577551e-06, "loss": 0.3900691568851471, "step": 10424, "token_acc": 0.8693962166364343 }, { "epoch": 0.5625101170884368, "grad_norm": 0.468348890542984, "learning_rate": 8.467336298536777e-06, "loss": 0.4281260669231415, "step": 10425, "token_acc": 0.8529594050201426 }, { "epoch": 0.5625640748934333, "grad_norm": 0.43814921379089355, "learning_rate": 8.46560940730315e-06, "loss": 0.4104450047016144, "step": 10426, "token_acc": 0.8554848966613673 }, { "epoch": 0.5626180326984298, "grad_norm": 0.41658034920692444, "learning_rate": 8.46388256292941e-06, "loss": 0.3726641535758972, "step": 10427, "token_acc": 0.8695791840668166 }, { "epoch": 0.5626719905034263, "grad_norm": 0.42698851227760315, "learning_rate": 8.462155765468294e-06, "loss": 0.3472524583339691, "step": 10428, "token_acc": 0.8775841102553709 }, { "epoch": 0.5627259483084228, "grad_norm": 0.3352632522583008, "learning_rate": 8.46042901497254e-06, "loss": 0.3297073543071747, "step": 10429, "token_acc": 0.880669262338209 }, { "epoch": 0.5627799061134193, "grad_norm": 0.4358365833759308, "learning_rate": 8.458702311494877e-06, "loss": 0.332200288772583, "step": 10430, "token_acc": 0.8815055762081785 }, { "epoch": 0.5628338639184158, "grad_norm": 0.40639495849609375, "learning_rate": 8.456975655088045e-06, "loss": 0.33698952198028564, "step": 10431, "token_acc": 0.876910135987663 }, { "epoch": 0.5628878217234123, "grad_norm": 0.34820231795310974, "learning_rate": 8.45524904580477e-06, "loss": 0.3024883568286896, "step": 10432, "token_acc": 0.892198039964363 }, { "epoch": 0.5629417795284087, "grad_norm": 0.42404231429100037, "learning_rate": 8.453522483697787e-06, "loss": 0.31921476125717163, "step": 10433, "token_acc": 0.8830472103004292 }, { "epoch": 0.5629957373334052, "grad_norm": 0.30956751108169556, "learning_rate": 8.45179596881982e-06, "loss": 0.30951735377311707, "step": 10434, "token_acc": 0.8901010749237928 }, { "epoch": 0.5630496951384018, "grad_norm": 0.3964296877384186, "learning_rate": 8.450069501223599e-06, "loss": 0.26727497577667236, "step": 10435, "token_acc": 0.8987736437331116 }, { "epoch": 0.5631036529433983, "grad_norm": 0.39889854192733765, "learning_rate": 8.44834308096185e-06, "loss": 0.35753780603408813, "step": 10436, "token_acc": 0.8731176798661461 }, { "epoch": 0.5631576107483948, "grad_norm": 0.48612546920776367, "learning_rate": 8.446616708087294e-06, "loss": 0.4007384777069092, "step": 10437, "token_acc": 0.8603970061828832 }, { "epoch": 0.5632115685533913, "grad_norm": 0.35775578022003174, "learning_rate": 8.44489038265266e-06, "loss": 0.3414660096168518, "step": 10438, "token_acc": 0.8777297074577668 }, { "epoch": 0.5632655263583878, "grad_norm": 0.400086373090744, "learning_rate": 8.443164104710664e-06, "loss": 0.32324671745300293, "step": 10439, "token_acc": 0.8841834663780274 }, { "epoch": 0.5633194841633843, "grad_norm": 0.49702808260917664, "learning_rate": 8.441437874314032e-06, "loss": 0.407701313495636, "step": 10440, "token_acc": 0.860216606498195 }, { "epoch": 0.5633734419683807, "grad_norm": 0.42881545424461365, "learning_rate": 8.439711691515477e-06, "loss": 0.36398401856422424, "step": 10441, "token_acc": 0.8698932384341637 }, { "epoch": 0.5634273997733772, "grad_norm": 0.5570023059844971, "learning_rate": 8.437985556367718e-06, "loss": 0.4458156228065491, "step": 10442, "token_acc": 0.8479355488418933 }, { "epoch": 0.5634813575783737, "grad_norm": 0.43347564339637756, "learning_rate": 8.436259468923467e-06, "loss": 0.3715420961380005, "step": 10443, "token_acc": 0.8709677419354839 }, { "epoch": 0.5635353153833702, "grad_norm": 0.4535634517669678, "learning_rate": 8.434533429235446e-06, "loss": 0.3305065333843231, "step": 10444, "token_acc": 0.8766103635843114 }, { "epoch": 0.5635892731883667, "grad_norm": 0.3523634076118469, "learning_rate": 8.432807437356364e-06, "loss": 0.27262142300605774, "step": 10445, "token_acc": 0.897325004004485 }, { "epoch": 0.5636432309933632, "grad_norm": 0.35399678349494934, "learning_rate": 8.431081493338935e-06, "loss": 0.3674648404121399, "step": 10446, "token_acc": 0.8665809355409099 }, { "epoch": 0.5636971887983597, "grad_norm": 0.49999600648880005, "learning_rate": 8.429355597235865e-06, "loss": 0.39239147305488586, "step": 10447, "token_acc": 0.8645025627546327 }, { "epoch": 0.5637511466033561, "grad_norm": 0.4011366367340088, "learning_rate": 8.427629749099862e-06, "loss": 0.3008233308792114, "step": 10448, "token_acc": 0.8858753618771903 }, { "epoch": 0.5638051044083526, "grad_norm": 0.3458721935749054, "learning_rate": 8.425903948983635e-06, "loss": 0.3137979507446289, "step": 10449, "token_acc": 0.8883861236802413 }, { "epoch": 0.5638590622133491, "grad_norm": 0.3309147357940674, "learning_rate": 8.424178196939885e-06, "loss": 0.3080770969390869, "step": 10450, "token_acc": 0.8897669434770962 }, { "epoch": 0.5639130200183456, "grad_norm": 0.38346055150032043, "learning_rate": 8.422452493021326e-06, "loss": 0.3271890878677368, "step": 10451, "token_acc": 0.8875047911077041 }, { "epoch": 0.5639669778233422, "grad_norm": 0.4007541537284851, "learning_rate": 8.42072683728065e-06, "loss": 0.33073461055755615, "step": 10452, "token_acc": 0.8841184387617765 }, { "epoch": 0.5640209356283387, "grad_norm": 0.459503173828125, "learning_rate": 8.419001229770567e-06, "loss": 0.4332233965396881, "step": 10453, "token_acc": 0.8500497937117656 }, { "epoch": 0.5640748934333352, "grad_norm": 0.28719183802604675, "learning_rate": 8.41727567054377e-06, "loss": 0.35417526960372925, "step": 10454, "token_acc": 0.8750392793547711 }, { "epoch": 0.5641288512383317, "grad_norm": 0.4408276379108429, "learning_rate": 8.41555015965296e-06, "loss": 0.32932889461517334, "step": 10455, "token_acc": 0.885012356447158 }, { "epoch": 0.5641828090433281, "grad_norm": 0.3732490837574005, "learning_rate": 8.413824697150833e-06, "loss": 0.4177657663822174, "step": 10456, "token_acc": 0.8579667381974249 }, { "epoch": 0.5642367668483246, "grad_norm": 0.3443205952644348, "learning_rate": 8.412099283090082e-06, "loss": 0.2582307457923889, "step": 10457, "token_acc": 0.9031800113571834 }, { "epoch": 0.5642907246533211, "grad_norm": 0.4916576147079468, "learning_rate": 8.410373917523406e-06, "loss": 0.3492143154144287, "step": 10458, "token_acc": 0.8786360473208072 }, { "epoch": 0.5643446824583176, "grad_norm": 0.38624662160873413, "learning_rate": 8.408648600503494e-06, "loss": 0.415600448846817, "step": 10459, "token_acc": 0.8581571230128107 }, { "epoch": 0.5643986402633141, "grad_norm": 0.43545001745224, "learning_rate": 8.406923332083037e-06, "loss": 0.3973572254180908, "step": 10460, "token_acc": 0.8592848904267589 }, { "epoch": 0.5644525980683106, "grad_norm": 0.4209197461605072, "learning_rate": 8.405198112314722e-06, "loss": 0.3284686803817749, "step": 10461, "token_acc": 0.8815515084109551 }, { "epoch": 0.5645065558733071, "grad_norm": 0.4669249355792999, "learning_rate": 8.403472941251241e-06, "loss": 0.36569705605506897, "step": 10462, "token_acc": 0.8667779168404951 }, { "epoch": 0.5645605136783036, "grad_norm": 0.41264447569847107, "learning_rate": 8.401747818945275e-06, "loss": 0.346258282661438, "step": 10463, "token_acc": 0.8776178010471204 }, { "epoch": 0.5646144714833, "grad_norm": 0.3788122832775116, "learning_rate": 8.400022745449513e-06, "loss": 0.32794034481048584, "step": 10464, "token_acc": 0.8819089642359499 }, { "epoch": 0.5646684292882965, "grad_norm": 0.3154219388961792, "learning_rate": 8.398297720816641e-06, "loss": 0.36548489332199097, "step": 10465, "token_acc": 0.8713638531235097 }, { "epoch": 0.564722387093293, "grad_norm": 0.27676138281822205, "learning_rate": 8.396572745099334e-06, "loss": 0.3547517657279968, "step": 10466, "token_acc": 0.874536538691544 }, { "epoch": 0.5647763448982895, "grad_norm": 0.39930322766304016, "learning_rate": 8.394847818350275e-06, "loss": 0.3571731746196747, "step": 10467, "token_acc": 0.8737273568689673 }, { "epoch": 0.564830302703286, "grad_norm": 0.4323939085006714, "learning_rate": 8.393122940622146e-06, "loss": 0.37608766555786133, "step": 10468, "token_acc": 0.8697903822441431 }, { "epoch": 0.5648842605082826, "grad_norm": 0.44897744059562683, "learning_rate": 8.391398111967615e-06, "loss": 0.38936886191368103, "step": 10469, "token_acc": 0.8624078624078624 }, { "epoch": 0.5649382183132791, "grad_norm": 0.4619198143482208, "learning_rate": 8.38967333243937e-06, "loss": 0.300960898399353, "step": 10470, "token_acc": 0.8825341806277681 }, { "epoch": 0.5649921761182755, "grad_norm": 0.3879351019859314, "learning_rate": 8.38794860209008e-06, "loss": 0.35116147994995117, "step": 10471, "token_acc": 0.8733655486071632 }, { "epoch": 0.565046133923272, "grad_norm": 0.4620829224586487, "learning_rate": 8.386223920972414e-06, "loss": 0.3607848286628723, "step": 10472, "token_acc": 0.8716026241799437 }, { "epoch": 0.5651000917282685, "grad_norm": 0.40756452083587646, "learning_rate": 8.384499289139048e-06, "loss": 0.35492265224456787, "step": 10473, "token_acc": 0.8752160387141376 }, { "epoch": 0.565154049533265, "grad_norm": 0.38542720675468445, "learning_rate": 8.382774706642653e-06, "loss": 0.36607974767684937, "step": 10474, "token_acc": 0.8671976291905908 }, { "epoch": 0.5652080073382615, "grad_norm": 0.4330422282218933, "learning_rate": 8.381050173535887e-06, "loss": 0.39302825927734375, "step": 10475, "token_acc": 0.8615565690745918 }, { "epoch": 0.565261965143258, "grad_norm": 0.28856876492500305, "learning_rate": 8.379325689871432e-06, "loss": 0.35319840908050537, "step": 10476, "token_acc": 0.8733815115928937 }, { "epoch": 0.5653159229482545, "grad_norm": 0.42796212434768677, "learning_rate": 8.377601255701944e-06, "loss": 0.3219023048877716, "step": 10477, "token_acc": 0.8883658372562205 }, { "epoch": 0.565369880753251, "grad_norm": 0.3961099684238434, "learning_rate": 8.375876871080089e-06, "loss": 0.37086668610572815, "step": 10478, "token_acc": 0.871506810841174 }, { "epoch": 0.5654238385582474, "grad_norm": 0.23954638838768005, "learning_rate": 8.374152536058527e-06, "loss": 0.39074820280075073, "step": 10479, "token_acc": 0.8639117789626757 }, { "epoch": 0.5654777963632439, "grad_norm": 0.2840978801250458, "learning_rate": 8.372428250689924e-06, "loss": 0.2805548310279846, "step": 10480, "token_acc": 0.8956697898202076 }, { "epoch": 0.5655317541682404, "grad_norm": 0.3682440221309662, "learning_rate": 8.370704015026936e-06, "loss": 0.31692570447921753, "step": 10481, "token_acc": 0.8864295527400397 }, { "epoch": 0.5655857119732369, "grad_norm": 0.4721119999885559, "learning_rate": 8.368979829122215e-06, "loss": 0.39856284856796265, "step": 10482, "token_acc": 0.8625238673841347 }, { "epoch": 0.5656396697782334, "grad_norm": 0.31232237815856934, "learning_rate": 8.367255693028426e-06, "loss": 0.36008089780807495, "step": 10483, "token_acc": 0.8741801632980859 }, { "epoch": 0.5656936275832299, "grad_norm": 0.3855714201927185, "learning_rate": 8.365531606798221e-06, "loss": 0.3609159588813782, "step": 10484, "token_acc": 0.8744365743721829 }, { "epoch": 0.5657475853882264, "grad_norm": 0.5106236934661865, "learning_rate": 8.363807570484253e-06, "loss": 0.38032498955726624, "step": 10485, "token_acc": 0.8670418006430868 }, { "epoch": 0.5658015431932228, "grad_norm": 0.3922814130783081, "learning_rate": 8.362083584139173e-06, "loss": 0.378066748380661, "step": 10486, "token_acc": 0.869593795188642 }, { "epoch": 0.5658555009982194, "grad_norm": 0.3397541344165802, "learning_rate": 8.36035964781563e-06, "loss": 0.4220719337463379, "step": 10487, "token_acc": 0.8512016117426968 }, { "epoch": 0.5659094588032159, "grad_norm": 0.4453430473804474, "learning_rate": 8.358635761566273e-06, "loss": 0.3808034062385559, "step": 10488, "token_acc": 0.8611350698637226 }, { "epoch": 0.5659634166082124, "grad_norm": 0.41293975710868835, "learning_rate": 8.35691192544375e-06, "loss": 0.4152049422264099, "step": 10489, "token_acc": 0.8494688014364806 }, { "epoch": 0.5660173744132089, "grad_norm": 0.34008315205574036, "learning_rate": 8.35518813950071e-06, "loss": 0.38451915979385376, "step": 10490, "token_acc": 0.8614087726818117 }, { "epoch": 0.5660713322182054, "grad_norm": 0.46459445357322693, "learning_rate": 8.35346440378979e-06, "loss": 0.32579314708709717, "step": 10491, "token_acc": 0.8778443113772455 }, { "epoch": 0.5661252900232019, "grad_norm": 0.3876025378704071, "learning_rate": 8.351740718363638e-06, "loss": 0.35256698727607727, "step": 10492, "token_acc": 0.876023686531435 }, { "epoch": 0.5661792478281984, "grad_norm": 0.47593486309051514, "learning_rate": 8.350017083274892e-06, "loss": 0.3729134798049927, "step": 10493, "token_acc": 0.8686735653809972 }, { "epoch": 0.5662332056331948, "grad_norm": 0.39222800731658936, "learning_rate": 8.348293498576192e-06, "loss": 0.41335952281951904, "step": 10494, "token_acc": 0.8588371420306624 }, { "epoch": 0.5662871634381913, "grad_norm": 0.45272430777549744, "learning_rate": 8.346569964320174e-06, "loss": 0.39712756872177124, "step": 10495, "token_acc": 0.8613337365794647 }, { "epoch": 0.5663411212431878, "grad_norm": 0.41548967361450195, "learning_rate": 8.344846480559478e-06, "loss": 0.33972471952438354, "step": 10496, "token_acc": 0.8796600059648076 }, { "epoch": 0.5663950790481843, "grad_norm": 0.36889493465423584, "learning_rate": 8.343123047346736e-06, "loss": 0.3975011706352234, "step": 10497, "token_acc": 0.8616710013003901 }, { "epoch": 0.5664490368531808, "grad_norm": 0.3594743609428406, "learning_rate": 8.341399664734582e-06, "loss": 0.34206175804138184, "step": 10498, "token_acc": 0.8789023989484062 }, { "epoch": 0.5665029946581773, "grad_norm": 0.37526512145996094, "learning_rate": 8.33967633277565e-06, "loss": 0.3534812927246094, "step": 10499, "token_acc": 0.875989800026842 }, { "epoch": 0.5665569524631738, "grad_norm": 0.3021244406700134, "learning_rate": 8.337953051522566e-06, "loss": 0.2867642343044281, "step": 10500, "token_acc": 0.8975150713130422 }, { "epoch": 0.5666109102681703, "grad_norm": 0.30816689133644104, "learning_rate": 8.336229821027958e-06, "loss": 0.28867676854133606, "step": 10501, "token_acc": 0.8968062669478759 }, { "epoch": 0.5666648680731667, "grad_norm": 0.37980780005455017, "learning_rate": 8.334506641344458e-06, "loss": 0.2779655456542969, "step": 10502, "token_acc": 0.8947368421052632 }, { "epoch": 0.5667188258781632, "grad_norm": 0.40259724855422974, "learning_rate": 8.332783512524687e-06, "loss": 0.3378182351589203, "step": 10503, "token_acc": 0.8823142669296515 }, { "epoch": 0.5667727836831598, "grad_norm": 0.39770838618278503, "learning_rate": 8.331060434621275e-06, "loss": 0.350114107131958, "step": 10504, "token_acc": 0.8695408734602463 }, { "epoch": 0.5668267414881563, "grad_norm": 0.5020418167114258, "learning_rate": 8.329337407686839e-06, "loss": 0.4183602035045624, "step": 10505, "token_acc": 0.8556823990458341 }, { "epoch": 0.5668806992931528, "grad_norm": 0.4306213855743408, "learning_rate": 8.327614431774001e-06, "loss": 0.2882753014564514, "step": 10506, "token_acc": 0.8902676830349229 }, { "epoch": 0.5669346570981493, "grad_norm": 0.43854445219039917, "learning_rate": 8.325891506935378e-06, "loss": 0.3775925636291504, "step": 10507, "token_acc": 0.8713386257870244 }, { "epoch": 0.5669886149031458, "grad_norm": 0.47724655270576477, "learning_rate": 8.324168633223588e-06, "loss": 0.358122318983078, "step": 10508, "token_acc": 0.8689105403011514 }, { "epoch": 0.5670425727081422, "grad_norm": 0.4223294258117676, "learning_rate": 8.322445810691251e-06, "loss": 0.33310267329216003, "step": 10509, "token_acc": 0.884828349944629 }, { "epoch": 0.5670965305131387, "grad_norm": 0.3713296353816986, "learning_rate": 8.32072303939098e-06, "loss": 0.3800435960292816, "step": 10510, "token_acc": 0.8678621518733721 }, { "epoch": 0.5671504883181352, "grad_norm": 0.3313518166542053, "learning_rate": 8.319000319375388e-06, "loss": 0.3299916982650757, "step": 10511, "token_acc": 0.8791678540894842 }, { "epoch": 0.5672044461231317, "grad_norm": 0.4621894657611847, "learning_rate": 8.317277650697086e-06, "loss": 0.3325629234313965, "step": 10512, "token_acc": 0.8855843909831408 }, { "epoch": 0.5672584039281282, "grad_norm": 0.43326109647750854, "learning_rate": 8.315555033408684e-06, "loss": 0.39778393507003784, "step": 10513, "token_acc": 0.8590308370044053 }, { "epoch": 0.5673123617331247, "grad_norm": 0.3770851194858551, "learning_rate": 8.313832467562787e-06, "loss": 0.3392232060432434, "step": 10514, "token_acc": 0.8781484002722941 }, { "epoch": 0.5673663195381212, "grad_norm": 0.4187150299549103, "learning_rate": 8.312109953212008e-06, "loss": 0.37958386540412903, "step": 10515, "token_acc": 0.8701825557809331 }, { "epoch": 0.5674202773431177, "grad_norm": 0.42992082238197327, "learning_rate": 8.310387490408948e-06, "loss": 0.358786940574646, "step": 10516, "token_acc": 0.8773630634997577 }, { "epoch": 0.5674742351481141, "grad_norm": 0.48191696405410767, "learning_rate": 8.30866507920621e-06, "loss": 0.434688538312912, "step": 10517, "token_acc": 0.8517079705291359 }, { "epoch": 0.5675281929531106, "grad_norm": 0.3869132399559021, "learning_rate": 8.3069427196564e-06, "loss": 0.38782501220703125, "step": 10518, "token_acc": 0.8686131386861314 }, { "epoch": 0.5675821507581071, "grad_norm": 0.44377824664115906, "learning_rate": 8.305220411812114e-06, "loss": 0.31369173526763916, "step": 10519, "token_acc": 0.884990253411306 }, { "epoch": 0.5676361085631036, "grad_norm": 0.5192607641220093, "learning_rate": 8.303498155725953e-06, "loss": 0.40937280654907227, "step": 10520, "token_acc": 0.8613775065387969 }, { "epoch": 0.5676900663681002, "grad_norm": 0.33142223954200745, "learning_rate": 8.301775951450513e-06, "loss": 0.3536415994167328, "step": 10521, "token_acc": 0.8754301445285616 }, { "epoch": 0.5677440241730967, "grad_norm": 0.2769632935523987, "learning_rate": 8.300053799038392e-06, "loss": 0.3082959055900574, "step": 10522, "token_acc": 0.8899779465817201 }, { "epoch": 0.5677979819780932, "grad_norm": 0.46204298734664917, "learning_rate": 8.298331698542183e-06, "loss": 0.3687438666820526, "step": 10523, "token_acc": 0.869654146522871 }, { "epoch": 0.5678519397830897, "grad_norm": 0.3648741543292999, "learning_rate": 8.296609650014477e-06, "loss": 0.43647074699401855, "step": 10524, "token_acc": 0.8542614498028511 }, { "epoch": 0.5679058975880861, "grad_norm": 0.31813186407089233, "learning_rate": 8.294887653507867e-06, "loss": 0.3593097925186157, "step": 10525, "token_acc": 0.8792022295767288 }, { "epoch": 0.5679598553930826, "grad_norm": 0.44377779960632324, "learning_rate": 8.293165709074943e-06, "loss": 0.41482388973236084, "step": 10526, "token_acc": 0.8556624722427831 }, { "epoch": 0.5680138131980791, "grad_norm": 0.362753689289093, "learning_rate": 8.291443816768283e-06, "loss": 0.3734894394874573, "step": 10527, "token_acc": 0.871797941106057 }, { "epoch": 0.5680677710030756, "grad_norm": 0.4182668626308441, "learning_rate": 8.289721976640488e-06, "loss": 0.36122554540634155, "step": 10528, "token_acc": 0.8709246392303581 }, { "epoch": 0.5681217288080721, "grad_norm": 0.43589988350868225, "learning_rate": 8.288000188744136e-06, "loss": 0.34029293060302734, "step": 10529, "token_acc": 0.8785615491009682 }, { "epoch": 0.5681756866130686, "grad_norm": 0.3581244945526123, "learning_rate": 8.28627845313181e-06, "loss": 0.3149757981300354, "step": 10530, "token_acc": 0.8893975009278733 }, { "epoch": 0.5682296444180651, "grad_norm": 0.40123113989830017, "learning_rate": 8.28455676985609e-06, "loss": 0.37321460247039795, "step": 10531, "token_acc": 0.8692277842452464 }, { "epoch": 0.5682836022230615, "grad_norm": 0.3529263138771057, "learning_rate": 8.282835138969557e-06, "loss": 0.36622360348701477, "step": 10532, "token_acc": 0.8725078698845751 }, { "epoch": 0.568337560028058, "grad_norm": 0.3932911157608032, "learning_rate": 8.281113560524785e-06, "loss": 0.3255988359451294, "step": 10533, "token_acc": 0.8838238548351405 }, { "epoch": 0.5683915178330545, "grad_norm": 0.3337612450122833, "learning_rate": 8.279392034574359e-06, "loss": 0.3854106068611145, "step": 10534, "token_acc": 0.8650213284582572 }, { "epoch": 0.568445475638051, "grad_norm": 0.42932671308517456, "learning_rate": 8.277670561170849e-06, "loss": 0.34156984090805054, "step": 10535, "token_acc": 0.8712057689082676 }, { "epoch": 0.5684994334430475, "grad_norm": 0.3081341087818146, "learning_rate": 8.275949140366828e-06, "loss": 0.3280440866947174, "step": 10536, "token_acc": 0.8839198435972629 }, { "epoch": 0.568553391248044, "grad_norm": 0.5109415054321289, "learning_rate": 8.27422777221487e-06, "loss": 0.4044110178947449, "step": 10537, "token_acc": 0.8601162572660791 }, { "epoch": 0.5686073490530406, "grad_norm": 0.4860354959964752, "learning_rate": 8.272506456767544e-06, "loss": 0.3550153970718384, "step": 10538, "token_acc": 0.8764465622872702 }, { "epoch": 0.5686613068580371, "grad_norm": 0.3266516327857971, "learning_rate": 8.27078519407742e-06, "loss": 0.3381640315055847, "step": 10539, "token_acc": 0.8755043227665706 }, { "epoch": 0.5687152646630335, "grad_norm": 0.3098805546760559, "learning_rate": 8.269063984197055e-06, "loss": 0.31183144450187683, "step": 10540, "token_acc": 0.8833621185952792 }, { "epoch": 0.56876922246803, "grad_norm": 0.3958788812160492, "learning_rate": 8.26734282717903e-06, "loss": 0.291862428188324, "step": 10541, "token_acc": 0.8865056578280696 }, { "epoch": 0.5688231802730265, "grad_norm": 0.46551457047462463, "learning_rate": 8.265621723075898e-06, "loss": 0.30297428369522095, "step": 10542, "token_acc": 0.8876336493847085 }, { "epoch": 0.568877138078023, "grad_norm": 0.4415990114212036, "learning_rate": 8.263900671940226e-06, "loss": 0.39084386825561523, "step": 10543, "token_acc": 0.8639789658194567 }, { "epoch": 0.5689310958830195, "grad_norm": 0.42740774154663086, "learning_rate": 8.262179673824574e-06, "loss": 0.39679670333862305, "step": 10544, "token_acc": 0.861193148257531 }, { "epoch": 0.568985053688016, "grad_norm": 0.2596275508403778, "learning_rate": 8.260458728781496e-06, "loss": 0.3210826516151428, "step": 10545, "token_acc": 0.8840614498319731 }, { "epoch": 0.5690390114930125, "grad_norm": 0.45696669816970825, "learning_rate": 8.258737836863553e-06, "loss": 0.4137531518936157, "step": 10546, "token_acc": 0.8555104113763332 }, { "epoch": 0.569092969298009, "grad_norm": 0.45921728014945984, "learning_rate": 8.257016998123303e-06, "loss": 0.36153754591941833, "step": 10547, "token_acc": 0.8709677419354839 }, { "epoch": 0.5691469271030054, "grad_norm": 0.5242077708244324, "learning_rate": 8.255296212613296e-06, "loss": 0.35536304116249084, "step": 10548, "token_acc": 0.8758339510748703 }, { "epoch": 0.5692008849080019, "grad_norm": 0.35315263271331787, "learning_rate": 8.253575480386084e-06, "loss": 0.37792378664016724, "step": 10549, "token_acc": 0.8686903529952857 }, { "epoch": 0.5692548427129984, "grad_norm": 0.45322760939598083, "learning_rate": 8.25185480149422e-06, "loss": 0.3736305832862854, "step": 10550, "token_acc": 0.8671031096563011 }, { "epoch": 0.5693088005179949, "grad_norm": 0.3683665096759796, "learning_rate": 8.250134175990255e-06, "loss": 0.3594496548175812, "step": 10551, "token_acc": 0.8751997159595243 }, { "epoch": 0.5693627583229914, "grad_norm": 0.3905598521232605, "learning_rate": 8.248413603926732e-06, "loss": 0.3452414274215698, "step": 10552, "token_acc": 0.8748713424496397 }, { "epoch": 0.5694167161279879, "grad_norm": 0.37256696820259094, "learning_rate": 8.246693085356197e-06, "loss": 0.3354305028915405, "step": 10553, "token_acc": 0.8828313253012048 }, { "epoch": 0.5694706739329845, "grad_norm": 0.3041762709617615, "learning_rate": 8.2449726203312e-06, "loss": 0.3117918372154236, "step": 10554, "token_acc": 0.8848519525103705 }, { "epoch": 0.5695246317379808, "grad_norm": 0.44544902443885803, "learning_rate": 8.243252208904276e-06, "loss": 0.3702014088630676, "step": 10555, "token_acc": 0.8732133403917417 }, { "epoch": 0.5695785895429774, "grad_norm": 0.417791485786438, "learning_rate": 8.24153185112797e-06, "loss": 0.32549700140953064, "step": 10556, "token_acc": 0.8878853939582685 }, { "epoch": 0.5696325473479739, "grad_norm": 0.426643967628479, "learning_rate": 8.239811547054822e-06, "loss": 0.4001466631889343, "step": 10557, "token_acc": 0.8602949994004078 }, { "epoch": 0.5696865051529704, "grad_norm": 0.3075270652770996, "learning_rate": 8.238091296737366e-06, "loss": 0.31208544969558716, "step": 10558, "token_acc": 0.8850574712643678 }, { "epoch": 0.5697404629579669, "grad_norm": 0.44472596049308777, "learning_rate": 8.236371100228138e-06, "loss": 0.3710194528102875, "step": 10559, "token_acc": 0.8648177726150719 }, { "epoch": 0.5697944207629634, "grad_norm": 0.436689168214798, "learning_rate": 8.234650957579679e-06, "loss": 0.333330899477005, "step": 10560, "token_acc": 0.8774957698815566 }, { "epoch": 0.5698483785679599, "grad_norm": 0.40095359086990356, "learning_rate": 8.232930868844517e-06, "loss": 0.3966873586177826, "step": 10561, "token_acc": 0.8645873944119559 }, { "epoch": 0.5699023363729564, "grad_norm": 0.5304909348487854, "learning_rate": 8.231210834075184e-06, "loss": 0.325285941362381, "step": 10562, "token_acc": 0.8873041028350062 }, { "epoch": 0.5699562941779528, "grad_norm": 0.46766397356987, "learning_rate": 8.229490853324209e-06, "loss": 0.3690651059150696, "step": 10563, "token_acc": 0.876279740447008 }, { "epoch": 0.5700102519829493, "grad_norm": 0.4335258901119232, "learning_rate": 8.227770926644118e-06, "loss": 0.38691824674606323, "step": 10564, "token_acc": 0.8601760745727602 }, { "epoch": 0.5700642097879458, "grad_norm": 0.46632325649261475, "learning_rate": 8.22605105408744e-06, "loss": 0.3528725504875183, "step": 10565, "token_acc": 0.8761429758935994 }, { "epoch": 0.5701181675929423, "grad_norm": 0.3226894736289978, "learning_rate": 8.224331235706692e-06, "loss": 0.3733329176902771, "step": 10566, "token_acc": 0.8640043173232596 }, { "epoch": 0.5701721253979388, "grad_norm": 0.43183276057243347, "learning_rate": 8.222611471554407e-06, "loss": 0.3539568781852722, "step": 10567, "token_acc": 0.876385336743393 }, { "epoch": 0.5702260832029353, "grad_norm": 0.5242037177085876, "learning_rate": 8.220891761683104e-06, "loss": 0.4006221890449524, "step": 10568, "token_acc": 0.8635612817971589 }, { "epoch": 0.5702800410079318, "grad_norm": 0.4352101683616638, "learning_rate": 8.2191721061453e-06, "loss": 0.3440076410770416, "step": 10569, "token_acc": 0.8736855128046518 }, { "epoch": 0.5703339988129283, "grad_norm": 0.3851880431175232, "learning_rate": 8.217452504993512e-06, "loss": 0.3887910544872284, "step": 10570, "token_acc": 0.8664424433112726 }, { "epoch": 0.5703879566179247, "grad_norm": 0.3658493757247925, "learning_rate": 8.21573295828026e-06, "loss": 0.36086779832839966, "step": 10571, "token_acc": 0.8717523493642897 }, { "epoch": 0.5704419144229212, "grad_norm": 0.45712190866470337, "learning_rate": 8.214013466058051e-06, "loss": 0.3908548355102539, "step": 10572, "token_acc": 0.8644700031084862 }, { "epoch": 0.5704958722279178, "grad_norm": 0.4439387321472168, "learning_rate": 8.212294028379406e-06, "loss": 0.3514641523361206, "step": 10573, "token_acc": 0.874173253750605 }, { "epoch": 0.5705498300329143, "grad_norm": 0.33706071972846985, "learning_rate": 8.210574645296834e-06, "loss": 0.32387369871139526, "step": 10574, "token_acc": 0.8797902379991932 }, { "epoch": 0.5706037878379108, "grad_norm": 0.3703277111053467, "learning_rate": 8.208855316862844e-06, "loss": 0.4064747095108032, "step": 10575, "token_acc": 0.8581832119586048 }, { "epoch": 0.5706577456429073, "grad_norm": 0.4223736524581909, "learning_rate": 8.207136043129942e-06, "loss": 0.3710298240184784, "step": 10576, "token_acc": 0.8692827442827443 }, { "epoch": 0.5707117034479038, "grad_norm": 0.4700990915298462, "learning_rate": 8.205416824150637e-06, "loss": 0.3421415090560913, "step": 10577, "token_acc": 0.8760683760683761 }, { "epoch": 0.5707656612529002, "grad_norm": 0.44790393114089966, "learning_rate": 8.203697659977428e-06, "loss": 0.3916000723838806, "step": 10578, "token_acc": 0.860296965784377 }, { "epoch": 0.5708196190578967, "grad_norm": 0.38449469208717346, "learning_rate": 8.201978550662824e-06, "loss": 0.32382792234420776, "step": 10579, "token_acc": 0.8844038844038844 }, { "epoch": 0.5708735768628932, "grad_norm": 0.44159162044525146, "learning_rate": 8.200259496259326e-06, "loss": 0.34849804639816284, "step": 10580, "token_acc": 0.8764180130629082 }, { "epoch": 0.5709275346678897, "grad_norm": 0.5047808289527893, "learning_rate": 8.19854049681943e-06, "loss": 0.3670445382595062, "step": 10581, "token_acc": 0.8700970873786408 }, { "epoch": 0.5709814924728862, "grad_norm": 0.3174089193344116, "learning_rate": 8.196821552395637e-06, "loss": 0.3274747133255005, "step": 10582, "token_acc": 0.8829741686884544 }, { "epoch": 0.5710354502778827, "grad_norm": 0.38548871874809265, "learning_rate": 8.19510266304044e-06, "loss": 0.36268049478530884, "step": 10583, "token_acc": 0.8739124233347597 }, { "epoch": 0.5710894080828792, "grad_norm": 0.44385820627212524, "learning_rate": 8.193383828806335e-06, "loss": 0.369766503572464, "step": 10584, "token_acc": 0.8683247988295537 }, { "epoch": 0.5711433658878757, "grad_norm": 0.35994240641593933, "learning_rate": 8.191665049745809e-06, "loss": 0.3697129487991333, "step": 10585, "token_acc": 0.8706536856745479 }, { "epoch": 0.5711973236928721, "grad_norm": 0.34327155351638794, "learning_rate": 8.189946325911365e-06, "loss": 0.33911779522895813, "step": 10586, "token_acc": 0.8791572810388234 }, { "epoch": 0.5712512814978686, "grad_norm": 0.3531038761138916, "learning_rate": 8.188227657355488e-06, "loss": 0.34150761365890503, "step": 10587, "token_acc": 0.8787842527222688 }, { "epoch": 0.5713052393028651, "grad_norm": 0.31337788701057434, "learning_rate": 8.186509044130662e-06, "loss": 0.3795548975467682, "step": 10588, "token_acc": 0.8669799092676604 }, { "epoch": 0.5713591971078616, "grad_norm": 0.3659709393978119, "learning_rate": 8.184790486289373e-06, "loss": 0.3440171480178833, "step": 10589, "token_acc": 0.8800225098480585 }, { "epoch": 0.5714131549128582, "grad_norm": 0.40016210079193115, "learning_rate": 8.183071983884107e-06, "loss": 0.32950711250305176, "step": 10590, "token_acc": 0.8796104434314131 }, { "epoch": 0.5714671127178547, "grad_norm": 0.5506012439727783, "learning_rate": 8.181353536967344e-06, "loss": 0.3965698182582855, "step": 10591, "token_acc": 0.8545667056269828 }, { "epoch": 0.5715210705228512, "grad_norm": 0.3253932297229767, "learning_rate": 8.179635145591572e-06, "loss": 0.36008983850479126, "step": 10592, "token_acc": 0.8745688664060703 }, { "epoch": 0.5715750283278477, "grad_norm": 0.39616015553474426, "learning_rate": 8.177916809809265e-06, "loss": 0.42411208152770996, "step": 10593, "token_acc": 0.854506919336741 }, { "epoch": 0.5716289861328441, "grad_norm": 0.43127545714378357, "learning_rate": 8.176198529672901e-06, "loss": 0.358455628156662, "step": 10594, "token_acc": 0.8771354380210469 }, { "epoch": 0.5716829439378406, "grad_norm": 0.43067485094070435, "learning_rate": 8.174480305234955e-06, "loss": 0.3565678596496582, "step": 10595, "token_acc": 0.8738901042336894 }, { "epoch": 0.5717369017428371, "grad_norm": 0.46023306250572205, "learning_rate": 8.172762136547905e-06, "loss": 0.37687546014785767, "step": 10596, "token_acc": 0.868619390202124 }, { "epoch": 0.5717908595478336, "grad_norm": 0.3578926920890808, "learning_rate": 8.171044023664219e-06, "loss": 0.3423507809638977, "step": 10597, "token_acc": 0.879480766940574 }, { "epoch": 0.5718448173528301, "grad_norm": 0.41217803955078125, "learning_rate": 8.169325966636366e-06, "loss": 0.33104878664016724, "step": 10598, "token_acc": 0.8855463347164592 }, { "epoch": 0.5718987751578266, "grad_norm": 0.3879720866680145, "learning_rate": 8.167607965516823e-06, "loss": 0.35269102454185486, "step": 10599, "token_acc": 0.8741676858268786 }, { "epoch": 0.5719527329628231, "grad_norm": 0.38195016980171204, "learning_rate": 8.16589002035805e-06, "loss": 0.3703955113887787, "step": 10600, "token_acc": 0.8691998996739403 }, { "epoch": 0.5720066907678195, "grad_norm": 0.4983515739440918, "learning_rate": 8.16417213121252e-06, "loss": 0.3937709629535675, "step": 10601, "token_acc": 0.8646245059288538 }, { "epoch": 0.572060648572816, "grad_norm": 0.35196009278297424, "learning_rate": 8.162454298132688e-06, "loss": 0.36908581852912903, "step": 10602, "token_acc": 0.8692041522491349 }, { "epoch": 0.5721146063778125, "grad_norm": 0.5623912811279297, "learning_rate": 8.160736521171022e-06, "loss": 0.3820344805717468, "step": 10603, "token_acc": 0.8613521034539204 }, { "epoch": 0.572168564182809, "grad_norm": 0.3767924904823303, "learning_rate": 8.159018800379979e-06, "loss": 0.36139214038848877, "step": 10604, "token_acc": 0.8742070454852207 }, { "epoch": 0.5722225219878055, "grad_norm": 0.4314681589603424, "learning_rate": 8.15730113581202e-06, "loss": 0.408186137676239, "step": 10605, "token_acc": 0.859933242675238 }, { "epoch": 0.572276479792802, "grad_norm": 0.3867969214916229, "learning_rate": 8.155583527519603e-06, "loss": 0.34400883316993713, "step": 10606, "token_acc": 0.8795902800479459 }, { "epoch": 0.5723304375977986, "grad_norm": 0.4811582863330841, "learning_rate": 8.15386597555518e-06, "loss": 0.37947988510131836, "step": 10607, "token_acc": 0.8672117743254293 }, { "epoch": 0.5723843954027951, "grad_norm": 0.3178696930408478, "learning_rate": 8.152148479971208e-06, "loss": 0.3395940661430359, "step": 10608, "token_acc": 0.882138822643404 }, { "epoch": 0.5724383532077915, "grad_norm": 0.3227432072162628, "learning_rate": 8.150431040820138e-06, "loss": 0.31734997034072876, "step": 10609, "token_acc": 0.8797125033902902 }, { "epoch": 0.572492311012788, "grad_norm": 0.44195592403411865, "learning_rate": 8.14871365815442e-06, "loss": 0.3909044861793518, "step": 10610, "token_acc": 0.8627995324371712 }, { "epoch": 0.5725462688177845, "grad_norm": 0.4030389189720154, "learning_rate": 8.146996332026498e-06, "loss": 0.40087834000587463, "step": 10611, "token_acc": 0.8592944906856916 }, { "epoch": 0.572600226622781, "grad_norm": 0.3314662575721741, "learning_rate": 8.145279062488825e-06, "loss": 0.3549507260322571, "step": 10612, "token_acc": 0.8699701282727113 }, { "epoch": 0.5726541844277775, "grad_norm": 0.4350767433643341, "learning_rate": 8.143561849593844e-06, "loss": 0.4360775351524353, "step": 10613, "token_acc": 0.8523029539409211 }, { "epoch": 0.572708142232774, "grad_norm": 0.40878164768218994, "learning_rate": 8.141844693393997e-06, "loss": 0.4032210111618042, "step": 10614, "token_acc": 0.8614109967361285 }, { "epoch": 0.5727621000377705, "grad_norm": 0.4280377924442291, "learning_rate": 8.140127593941728e-06, "loss": 0.38557684421539307, "step": 10615, "token_acc": 0.865992414664981 }, { "epoch": 0.5728160578427669, "grad_norm": 0.40599942207336426, "learning_rate": 8.138410551289473e-06, "loss": 0.3618188500404358, "step": 10616, "token_acc": 0.8734158561744768 }, { "epoch": 0.5728700156477634, "grad_norm": 0.5211436748504639, "learning_rate": 8.136693565489669e-06, "loss": 0.4120544195175171, "step": 10617, "token_acc": 0.8552986239182863 }, { "epoch": 0.5729239734527599, "grad_norm": 0.49925559759140015, "learning_rate": 8.13497663659476e-06, "loss": 0.403340220451355, "step": 10618, "token_acc": 0.8588203896450494 }, { "epoch": 0.5729779312577564, "grad_norm": 0.32744836807250977, "learning_rate": 8.133259764657175e-06, "loss": 0.34132063388824463, "step": 10619, "token_acc": 0.8757549611734253 }, { "epoch": 0.5730318890627529, "grad_norm": 0.41819319128990173, "learning_rate": 8.131542949729348e-06, "loss": 0.33469587564468384, "step": 10620, "token_acc": 0.8812437023175471 }, { "epoch": 0.5730858468677494, "grad_norm": 0.5015286803245544, "learning_rate": 8.129826191863714e-06, "loss": 0.36087554693222046, "step": 10621, "token_acc": 0.8734098018769552 }, { "epoch": 0.573139804672746, "grad_norm": 0.4344436228275299, "learning_rate": 8.128109491112693e-06, "loss": 0.3624076843261719, "step": 10622, "token_acc": 0.8736310025273799 }, { "epoch": 0.5731937624777425, "grad_norm": 0.39267396926879883, "learning_rate": 8.126392847528714e-06, "loss": 0.3409644365310669, "step": 10623, "token_acc": 0.8746559919939955 }, { "epoch": 0.5732477202827388, "grad_norm": 0.3535629212856293, "learning_rate": 8.124676261164213e-06, "loss": 0.35316020250320435, "step": 10624, "token_acc": 0.8739804241435563 }, { "epoch": 0.5733016780877354, "grad_norm": 0.38065847754478455, "learning_rate": 8.122959732071606e-06, "loss": 0.38987913727760315, "step": 10625, "token_acc": 0.8639158354966525 }, { "epoch": 0.5733556358927319, "grad_norm": 0.4041624963283539, "learning_rate": 8.121243260303318e-06, "loss": 0.34836676716804504, "step": 10626, "token_acc": 0.8765731614859742 }, { "epoch": 0.5734095936977284, "grad_norm": 0.3739878833293915, "learning_rate": 8.119526845911767e-06, "loss": 0.36133596301078796, "step": 10627, "token_acc": 0.8698118428334256 }, { "epoch": 0.5734635515027249, "grad_norm": 0.4400321841239929, "learning_rate": 8.117810488949375e-06, "loss": 0.3446047902107239, "step": 10628, "token_acc": 0.8759466828233868 }, { "epoch": 0.5735175093077214, "grad_norm": 0.32599493861198425, "learning_rate": 8.116094189468557e-06, "loss": 0.34840700030326843, "step": 10629, "token_acc": 0.8766793947107906 }, { "epoch": 0.5735714671127179, "grad_norm": 0.41605523228645325, "learning_rate": 8.114377947521728e-06, "loss": 0.33355963230133057, "step": 10630, "token_acc": 0.886246863424543 }, { "epoch": 0.5736254249177144, "grad_norm": 0.3559480607509613, "learning_rate": 8.112661763161303e-06, "loss": 0.3278382420539856, "step": 10631, "token_acc": 0.8881060425125388 }, { "epoch": 0.5736793827227108, "grad_norm": 0.4218343198299408, "learning_rate": 8.110945636439694e-06, "loss": 0.3609437048435211, "step": 10632, "token_acc": 0.8707342295760083 }, { "epoch": 0.5737333405277073, "grad_norm": 0.4564938247203827, "learning_rate": 8.10922956740931e-06, "loss": 0.30028897523880005, "step": 10633, "token_acc": 0.888745148771022 }, { "epoch": 0.5737872983327038, "grad_norm": 0.4310554265975952, "learning_rate": 8.10751355612256e-06, "loss": 0.40155377984046936, "step": 10634, "token_acc": 0.8619860847564832 }, { "epoch": 0.5738412561377003, "grad_norm": 0.4655219316482544, "learning_rate": 8.105797602631851e-06, "loss": 0.337958425283432, "step": 10635, "token_acc": 0.884435261707989 }, { "epoch": 0.5738952139426968, "grad_norm": 0.4287114143371582, "learning_rate": 8.104081706989584e-06, "loss": 0.33703288435935974, "step": 10636, "token_acc": 0.882300357568534 }, { "epoch": 0.5739491717476933, "grad_norm": 0.4981454014778137, "learning_rate": 8.102365869248168e-06, "loss": 0.4581030011177063, "step": 10637, "token_acc": 0.8557909895083317 }, { "epoch": 0.5740031295526898, "grad_norm": 0.41665250062942505, "learning_rate": 8.10065008946e-06, "loss": 0.3568230867385864, "step": 10638, "token_acc": 0.8736263736263736 }, { "epoch": 0.5740570873576862, "grad_norm": 0.3833327293395996, "learning_rate": 8.098934367677481e-06, "loss": 0.32723501324653625, "step": 10639, "token_acc": 0.8813775510204082 }, { "epoch": 0.5741110451626827, "grad_norm": 0.37476998567581177, "learning_rate": 8.09721870395301e-06, "loss": 0.33974871039390564, "step": 10640, "token_acc": 0.8767411916188692 }, { "epoch": 0.5741650029676792, "grad_norm": 0.4229144752025604, "learning_rate": 8.095503098338979e-06, "loss": 0.3140419125556946, "step": 10641, "token_acc": 0.8836609732516919 }, { "epoch": 0.5742189607726758, "grad_norm": 0.31106066703796387, "learning_rate": 8.093787550887786e-06, "loss": 0.3348367214202881, "step": 10642, "token_acc": 0.8799129804205946 }, { "epoch": 0.5742729185776723, "grad_norm": 0.4326323866844177, "learning_rate": 8.092072061651817e-06, "loss": 0.37747442722320557, "step": 10643, "token_acc": 0.8707328174783796 }, { "epoch": 0.5743268763826688, "grad_norm": 0.4331974387168884, "learning_rate": 8.090356630683471e-06, "loss": 0.3583222031593323, "step": 10644, "token_acc": 0.8760236145496096 }, { "epoch": 0.5743808341876653, "grad_norm": 0.4437943696975708, "learning_rate": 8.088641258035138e-06, "loss": 0.3639294505119324, "step": 10645, "token_acc": 0.8723616420142928 }, { "epoch": 0.5744347919926618, "grad_norm": 0.3881852328777313, "learning_rate": 8.086925943759196e-06, "loss": 0.3886047601699829, "step": 10646, "token_acc": 0.8654295194789234 }, { "epoch": 0.5744887497976582, "grad_norm": 0.42824864387512207, "learning_rate": 8.085210687908034e-06, "loss": 0.34747305512428284, "step": 10647, "token_acc": 0.8776733415066067 }, { "epoch": 0.5745427076026547, "grad_norm": 0.443785697221756, "learning_rate": 8.083495490534037e-06, "loss": 0.3966832160949707, "step": 10648, "token_acc": 0.8596866096866097 }, { "epoch": 0.5745966654076512, "grad_norm": 0.3737145662307739, "learning_rate": 8.08178035168958e-06, "loss": 0.28393298387527466, "step": 10649, "token_acc": 0.8955319832851173 }, { "epoch": 0.5746506232126477, "grad_norm": 0.48190441727638245, "learning_rate": 8.080065271427054e-06, "loss": 0.4156263768672943, "step": 10650, "token_acc": 0.8522776888371416 }, { "epoch": 0.5747045810176442, "grad_norm": 0.5783124566078186, "learning_rate": 8.078350249798832e-06, "loss": 0.34888631105422974, "step": 10651, "token_acc": 0.8734326018808778 }, { "epoch": 0.5747585388226407, "grad_norm": 0.38138535618782043, "learning_rate": 8.07663528685729e-06, "loss": 0.3555964231491089, "step": 10652, "token_acc": 0.874377421140011 }, { "epoch": 0.5748124966276372, "grad_norm": 0.3220466077327728, "learning_rate": 8.074920382654802e-06, "loss": 0.325478732585907, "step": 10653, "token_acc": 0.8824584284316922 }, { "epoch": 0.5748664544326337, "grad_norm": 0.32197487354278564, "learning_rate": 8.073205537243741e-06, "loss": 0.37801259756088257, "step": 10654, "token_acc": 0.8710746182656295 }, { "epoch": 0.5749204122376301, "grad_norm": 0.36571335792541504, "learning_rate": 8.071490750676478e-06, "loss": 0.40578433871269226, "step": 10655, "token_acc": 0.861866538876243 }, { "epoch": 0.5749743700426266, "grad_norm": 0.2761898934841156, "learning_rate": 8.06977602300538e-06, "loss": 0.3330835998058319, "step": 10656, "token_acc": 0.8787335722819594 }, { "epoch": 0.5750283278476231, "grad_norm": 0.4701492190361023, "learning_rate": 8.06806135428282e-06, "loss": 0.3334835171699524, "step": 10657, "token_acc": 0.8833310430122303 }, { "epoch": 0.5750822856526197, "grad_norm": 0.46345242857933044, "learning_rate": 8.06634674456116e-06, "loss": 0.2923068702220917, "step": 10658, "token_acc": 0.8908423493044823 }, { "epoch": 0.5751362434576162, "grad_norm": 0.37144699692726135, "learning_rate": 8.064632193892763e-06, "loss": 0.32732903957366943, "step": 10659, "token_acc": 0.8827129652760429 }, { "epoch": 0.5751902012626127, "grad_norm": 0.40786147117614746, "learning_rate": 8.062917702329993e-06, "loss": 0.3218357563018799, "step": 10660, "token_acc": 0.8856048166392994 }, { "epoch": 0.5752441590676092, "grad_norm": 0.391350656747818, "learning_rate": 8.061203269925207e-06, "loss": 0.4408628046512604, "step": 10661, "token_acc": 0.8512412723041117 }, { "epoch": 0.5752981168726056, "grad_norm": 0.41983669996261597, "learning_rate": 8.059488896730764e-06, "loss": 0.2981146574020386, "step": 10662, "token_acc": 0.8824769938650306 }, { "epoch": 0.5753520746776021, "grad_norm": 0.3646065294742584, "learning_rate": 8.057774582799025e-06, "loss": 0.4057638347148895, "step": 10663, "token_acc": 0.8592364532019704 }, { "epoch": 0.5754060324825986, "grad_norm": 0.39421093463897705, "learning_rate": 8.05606032818234e-06, "loss": 0.30196428298950195, "step": 10664, "token_acc": 0.8886071363883354 }, { "epoch": 0.5754599902875951, "grad_norm": 0.3895556330680847, "learning_rate": 8.054346132933064e-06, "loss": 0.3229038119316101, "step": 10665, "token_acc": 0.8804061677322301 }, { "epoch": 0.5755139480925916, "grad_norm": 0.36318448185920715, "learning_rate": 8.052631997103548e-06, "loss": 0.34226852655410767, "step": 10666, "token_acc": 0.8781620236951649 }, { "epoch": 0.5755679058975881, "grad_norm": 0.3453097343444824, "learning_rate": 8.050917920746138e-06, "loss": 0.3356149196624756, "step": 10667, "token_acc": 0.8791105121293801 }, { "epoch": 0.5756218637025846, "grad_norm": 0.30738967657089233, "learning_rate": 8.049203903913187e-06, "loss": 0.33689871430397034, "step": 10668, "token_acc": 0.8752738225629791 }, { "epoch": 0.5756758215075811, "grad_norm": 0.4260217249393463, "learning_rate": 8.047489946657034e-06, "loss": 0.3782728314399719, "step": 10669, "token_acc": 0.8701517706576728 }, { "epoch": 0.5757297793125775, "grad_norm": 0.4141474664211273, "learning_rate": 8.045776049030029e-06, "loss": 0.3514620363712311, "step": 10670, "token_acc": 0.8770330902972518 }, { "epoch": 0.575783737117574, "grad_norm": 0.3457199037075043, "learning_rate": 8.044062211084513e-06, "loss": 0.31274479627609253, "step": 10671, "token_acc": 0.8865010938103204 }, { "epoch": 0.5758376949225705, "grad_norm": 0.41680067777633667, "learning_rate": 8.042348432872822e-06, "loss": 0.338403582572937, "step": 10672, "token_acc": 0.8791083816672353 }, { "epoch": 0.575891652727567, "grad_norm": 0.4431629180908203, "learning_rate": 8.0406347144473e-06, "loss": 0.34366899728775024, "step": 10673, "token_acc": 0.8780450200431699 }, { "epoch": 0.5759456105325635, "grad_norm": 0.4664441645145416, "learning_rate": 8.038921055860279e-06, "loss": 0.4315447211265564, "step": 10674, "token_acc": 0.8554887879232009 }, { "epoch": 0.57599956833756, "grad_norm": 0.42684751749038696, "learning_rate": 8.03720745716409e-06, "loss": 0.3549012839794159, "step": 10675, "token_acc": 0.8784591194968554 }, { "epoch": 0.5760535261425566, "grad_norm": 0.3761567175388336, "learning_rate": 8.035493918411078e-06, "loss": 0.3055364191532135, "step": 10676, "token_acc": 0.8899972291493489 }, { "epoch": 0.5761074839475531, "grad_norm": 0.4544869065284729, "learning_rate": 8.033780439653564e-06, "loss": 0.35752058029174805, "step": 10677, "token_acc": 0.8745252924198693 }, { "epoch": 0.5761614417525495, "grad_norm": 0.32516247034072876, "learning_rate": 8.032067020943883e-06, "loss": 0.3342229425907135, "step": 10678, "token_acc": 0.8784458834412581 }, { "epoch": 0.576215399557546, "grad_norm": 0.408509761095047, "learning_rate": 8.030353662334359e-06, "loss": 0.4366411566734314, "step": 10679, "token_acc": 0.8533333333333334 }, { "epoch": 0.5762693573625425, "grad_norm": 0.3765048682689667, "learning_rate": 8.028640363877318e-06, "loss": 0.3774789571762085, "step": 10680, "token_acc": 0.8693200663349917 }, { "epoch": 0.576323315167539, "grad_norm": 0.41518086194992065, "learning_rate": 8.02692712562508e-06, "loss": 0.3617790937423706, "step": 10681, "token_acc": 0.8728090584768109 }, { "epoch": 0.5763772729725355, "grad_norm": 0.31143349409103394, "learning_rate": 8.025213947629975e-06, "loss": 0.3686234951019287, "step": 10682, "token_acc": 0.8716755319148937 }, { "epoch": 0.576431230777532, "grad_norm": 0.3330925405025482, "learning_rate": 8.023500829944315e-06, "loss": 0.32674986124038696, "step": 10683, "token_acc": 0.8869287401027921 }, { "epoch": 0.5764851885825285, "grad_norm": 0.5533136129379272, "learning_rate": 8.021787772620426e-06, "loss": 0.4130784571170807, "step": 10684, "token_acc": 0.858745180511742 }, { "epoch": 0.5765391463875249, "grad_norm": 0.4025702178478241, "learning_rate": 8.020074775710618e-06, "loss": 0.3459298014640808, "step": 10685, "token_acc": 0.8753739930955121 }, { "epoch": 0.5765931041925214, "grad_norm": 0.5052570700645447, "learning_rate": 8.01836183926721e-06, "loss": 0.4266367554664612, "step": 10686, "token_acc": 0.8524289642529789 }, { "epoch": 0.5766470619975179, "grad_norm": 0.360247403383255, "learning_rate": 8.01664896334251e-06, "loss": 0.27803075313568115, "step": 10687, "token_acc": 0.8968843903210502 }, { "epoch": 0.5767010198025144, "grad_norm": 0.32341623306274414, "learning_rate": 8.014936147988831e-06, "loss": 0.38846907019615173, "step": 10688, "token_acc": 0.8628432127277393 }, { "epoch": 0.5767549776075109, "grad_norm": 0.37791743874549866, "learning_rate": 8.013223393258483e-06, "loss": 0.3951028287410736, "step": 10689, "token_acc": 0.8650599461707854 }, { "epoch": 0.5768089354125074, "grad_norm": 0.3853965401649475, "learning_rate": 8.011510699203772e-06, "loss": 0.3764316439628601, "step": 10690, "token_acc": 0.8710080533185226 }, { "epoch": 0.576862893217504, "grad_norm": 0.3873760402202606, "learning_rate": 8.009798065877004e-06, "loss": 0.3413442075252533, "step": 10691, "token_acc": 0.877532928064843 }, { "epoch": 0.5769168510225005, "grad_norm": 0.5246126055717468, "learning_rate": 8.00808549333048e-06, "loss": 0.33026599884033203, "step": 10692, "token_acc": 0.8826425933524825 }, { "epoch": 0.5769708088274969, "grad_norm": 0.3875233232975006, "learning_rate": 8.006372981616505e-06, "loss": 0.3877348303794861, "step": 10693, "token_acc": 0.8658794697560432 }, { "epoch": 0.5770247666324934, "grad_norm": 0.3927763104438782, "learning_rate": 8.004660530787374e-06, "loss": 0.37866732478141785, "step": 10694, "token_acc": 0.8671929095693117 }, { "epoch": 0.5770787244374899, "grad_norm": 0.3772107660770416, "learning_rate": 8.00294814089539e-06, "loss": 0.3679463565349579, "step": 10695, "token_acc": 0.8731065019808902 }, { "epoch": 0.5771326822424864, "grad_norm": 0.4124634265899658, "learning_rate": 8.001235811992847e-06, "loss": 0.3986726403236389, "step": 10696, "token_acc": 0.8659666460778258 }, { "epoch": 0.5771866400474829, "grad_norm": 0.4101164638996124, "learning_rate": 7.999523544132039e-06, "loss": 0.329939067363739, "step": 10697, "token_acc": 0.8751326494517156 }, { "epoch": 0.5772405978524794, "grad_norm": 0.41734862327575684, "learning_rate": 7.997811337365257e-06, "loss": 0.43338167667388916, "step": 10698, "token_acc": 0.8588378460602373 }, { "epoch": 0.5772945556574759, "grad_norm": 0.2985628545284271, "learning_rate": 7.996099191744792e-06, "loss": 0.34835386276245117, "step": 10699, "token_acc": 0.874589466001703 }, { "epoch": 0.5773485134624724, "grad_norm": 0.357963502407074, "learning_rate": 7.994387107322932e-06, "loss": 0.3967275023460388, "step": 10700, "token_acc": 0.8663857550632051 }, { "epoch": 0.5774024712674688, "grad_norm": 0.3701015114784241, "learning_rate": 7.992675084151961e-06, "loss": 0.322958379983902, "step": 10701, "token_acc": 0.8887394957983193 }, { "epoch": 0.5774564290724653, "grad_norm": 0.43160566687583923, "learning_rate": 7.990963122284174e-06, "loss": 0.3811684250831604, "step": 10702, "token_acc": 0.8634220532319392 }, { "epoch": 0.5775103868774618, "grad_norm": 0.3981355130672455, "learning_rate": 7.989251221771843e-06, "loss": 0.3342837691307068, "step": 10703, "token_acc": 0.8779761904761905 }, { "epoch": 0.5775643446824583, "grad_norm": 0.607712984085083, "learning_rate": 7.987539382667257e-06, "loss": 0.33307161927223206, "step": 10704, "token_acc": 0.8842899611250857 }, { "epoch": 0.5776183024874548, "grad_norm": 0.4402481019496918, "learning_rate": 7.98582760502269e-06, "loss": 0.38873183727264404, "step": 10705, "token_acc": 0.8642259070760138 }, { "epoch": 0.5776722602924513, "grad_norm": 0.41348716616630554, "learning_rate": 7.984115888890418e-06, "loss": 0.29357028007507324, "step": 10706, "token_acc": 0.8949976026849928 }, { "epoch": 0.5777262180974478, "grad_norm": 0.38731345534324646, "learning_rate": 7.982404234322714e-06, "loss": 0.3642111122608185, "step": 10707, "token_acc": 0.8719692110327133 }, { "epoch": 0.5777801759024442, "grad_norm": 0.37994226813316345, "learning_rate": 7.980692641371863e-06, "loss": 0.34537339210510254, "step": 10708, "token_acc": 0.8769448373408769 }, { "epoch": 0.5778341337074407, "grad_norm": 0.575477123260498, "learning_rate": 7.978981110090128e-06, "loss": 0.37258201837539673, "step": 10709, "token_acc": 0.877968877968878 }, { "epoch": 0.5778880915124373, "grad_norm": 0.3395550549030304, "learning_rate": 7.977269640529782e-06, "loss": 0.2818598747253418, "step": 10710, "token_acc": 0.8979759943516121 }, { "epoch": 0.5779420493174338, "grad_norm": 0.3883558213710785, "learning_rate": 7.975558232743089e-06, "loss": 0.35831785202026367, "step": 10711, "token_acc": 0.8726775956284153 }, { "epoch": 0.5779960071224303, "grad_norm": 0.4484553635120392, "learning_rate": 7.973846886782319e-06, "loss": 0.35417455434799194, "step": 10712, "token_acc": 0.8741731377624389 }, { "epoch": 0.5780499649274268, "grad_norm": 0.5556803941726685, "learning_rate": 7.972135602699733e-06, "loss": 0.35231783986091614, "step": 10713, "token_acc": 0.8736584447373376 }, { "epoch": 0.5781039227324233, "grad_norm": 0.36360642313957214, "learning_rate": 7.970424380547593e-06, "loss": 0.3198978304862976, "step": 10714, "token_acc": 0.8866832092638545 }, { "epoch": 0.5781578805374198, "grad_norm": 0.426158607006073, "learning_rate": 7.968713220378164e-06, "loss": 0.4125942289829254, "step": 10715, "token_acc": 0.8562603585957511 }, { "epoch": 0.5782118383424162, "grad_norm": 0.38338014483451843, "learning_rate": 7.967002122243699e-06, "loss": 0.3674686551094055, "step": 10716, "token_acc": 0.8703362179032487 }, { "epoch": 0.5782657961474127, "grad_norm": 0.35987111926078796, "learning_rate": 7.965291086196458e-06, "loss": 0.38840287923812866, "step": 10717, "token_acc": 0.8677006227235342 }, { "epoch": 0.5783197539524092, "grad_norm": 0.3782360255718231, "learning_rate": 7.963580112288694e-06, "loss": 0.3872680068016052, "step": 10718, "token_acc": 0.86978928080623 }, { "epoch": 0.5783737117574057, "grad_norm": 0.37872669100761414, "learning_rate": 7.96186920057266e-06, "loss": 0.3223194479942322, "step": 10719, "token_acc": 0.8822683377850832 }, { "epoch": 0.5784276695624022, "grad_norm": 0.3828495442867279, "learning_rate": 7.960158351100604e-06, "loss": 0.31530261039733887, "step": 10720, "token_acc": 0.8857420687978607 }, { "epoch": 0.5784816273673987, "grad_norm": 0.4833478629589081, "learning_rate": 7.95844756392478e-06, "loss": 0.3378462493419647, "step": 10721, "token_acc": 0.8723816912335144 }, { "epoch": 0.5785355851723952, "grad_norm": 0.3941812515258789, "learning_rate": 7.956736839097432e-06, "loss": 0.3173961341381073, "step": 10722, "token_acc": 0.8900709219858156 }, { "epoch": 0.5785895429773916, "grad_norm": 0.3500981032848358, "learning_rate": 7.955026176670807e-06, "loss": 0.35646772384643555, "step": 10723, "token_acc": 0.8796720855912186 }, { "epoch": 0.5786435007823881, "grad_norm": 0.4441532790660858, "learning_rate": 7.953315576697147e-06, "loss": 0.34198206663131714, "step": 10724, "token_acc": 0.879440983330527 }, { "epoch": 0.5786974585873846, "grad_norm": 0.42356210947036743, "learning_rate": 7.951605039228692e-06, "loss": 0.40228956937789917, "step": 10725, "token_acc": 0.8621127879269261 }, { "epoch": 0.5787514163923811, "grad_norm": 0.35031360387802124, "learning_rate": 7.949894564317683e-06, "loss": 0.302181601524353, "step": 10726, "token_acc": 0.8891038039974211 }, { "epoch": 0.5788053741973777, "grad_norm": 0.39255040884017944, "learning_rate": 7.948184152016352e-06, "loss": 0.35385674238204956, "step": 10727, "token_acc": 0.8729240060392551 }, { "epoch": 0.5788593320023742, "grad_norm": 0.49402257800102234, "learning_rate": 7.946473802376945e-06, "loss": 0.34289711713790894, "step": 10728, "token_acc": 0.880648330058939 }, { "epoch": 0.5789132898073707, "grad_norm": 0.405039519071579, "learning_rate": 7.944763515451691e-06, "loss": 0.29491835832595825, "step": 10729, "token_acc": 0.8929663608562691 }, { "epoch": 0.5789672476123672, "grad_norm": 0.37032029032707214, "learning_rate": 7.94305329129282e-06, "loss": 0.3707740902900696, "step": 10730, "token_acc": 0.8702827326780731 }, { "epoch": 0.5790212054173636, "grad_norm": 0.38720646500587463, "learning_rate": 7.941343129952561e-06, "loss": 0.3393383026123047, "step": 10731, "token_acc": 0.8791575703094902 }, { "epoch": 0.5790751632223601, "grad_norm": 0.3909091353416443, "learning_rate": 7.939633031483145e-06, "loss": 0.2987521290779114, "step": 10732, "token_acc": 0.8938820368787398 }, { "epoch": 0.5791291210273566, "grad_norm": 0.2881695628166199, "learning_rate": 7.937922995936792e-06, "loss": 0.29926514625549316, "step": 10733, "token_acc": 0.8907069697726705 }, { "epoch": 0.5791830788323531, "grad_norm": 0.3532896041870117, "learning_rate": 7.936213023365733e-06, "loss": 0.40661343932151794, "step": 10734, "token_acc": 0.8586123059659623 }, { "epoch": 0.5792370366373496, "grad_norm": 0.43430930376052856, "learning_rate": 7.93450311382219e-06, "loss": 0.39722374081611633, "step": 10735, "token_acc": 0.8588987217305801 }, { "epoch": 0.5792909944423461, "grad_norm": 0.3602782189846039, "learning_rate": 7.932793267358378e-06, "loss": 0.33707040548324585, "step": 10736, "token_acc": 0.8787373105270477 }, { "epoch": 0.5793449522473426, "grad_norm": 0.4201504588127136, "learning_rate": 7.93108348402652e-06, "loss": 0.33113718032836914, "step": 10737, "token_acc": 0.8769128409846972 }, { "epoch": 0.5793989100523391, "grad_norm": 0.4396244287490845, "learning_rate": 7.929373763878834e-06, "loss": 0.3503372073173523, "step": 10738, "token_acc": 0.8740916713247624 }, { "epoch": 0.5794528678573355, "grad_norm": 0.32571539282798767, "learning_rate": 7.927664106967522e-06, "loss": 0.34666597843170166, "step": 10739, "token_acc": 0.8785012285012285 }, { "epoch": 0.579506825662332, "grad_norm": 0.45541706681251526, "learning_rate": 7.92595451334481e-06, "loss": 0.37461262941360474, "step": 10740, "token_acc": 0.86860484001949 }, { "epoch": 0.5795607834673285, "grad_norm": 0.441997230052948, "learning_rate": 7.924244983062905e-06, "loss": 0.3897610902786255, "step": 10741, "token_acc": 0.8679954255840548 }, { "epoch": 0.579614741272325, "grad_norm": 0.3892807364463806, "learning_rate": 7.922535516174014e-06, "loss": 0.3443520665168762, "step": 10742, "token_acc": 0.8763690673747095 }, { "epoch": 0.5796686990773215, "grad_norm": 0.29460078477859497, "learning_rate": 7.920826112730343e-06, "loss": 0.34314799308776855, "step": 10743, "token_acc": 0.88096028321717 }, { "epoch": 0.579722656882318, "grad_norm": 0.46402162313461304, "learning_rate": 7.9191167727841e-06, "loss": 0.35085034370422363, "step": 10744, "token_acc": 0.87249013214347 }, { "epoch": 0.5797766146873146, "grad_norm": 0.514542281627655, "learning_rate": 7.917407496387484e-06, "loss": 0.4228318929672241, "step": 10745, "token_acc": 0.8462807438512298 }, { "epoch": 0.579830572492311, "grad_norm": 0.34411197900772095, "learning_rate": 7.915698283592694e-06, "loss": 0.3330695331096649, "step": 10746, "token_acc": 0.8826881975777725 }, { "epoch": 0.5798845302973075, "grad_norm": 0.5255561470985413, "learning_rate": 7.913989134451938e-06, "loss": 0.3780646026134491, "step": 10747, "token_acc": 0.87011393514461 }, { "epoch": 0.579938488102304, "grad_norm": 0.43666964769363403, "learning_rate": 7.912280049017404e-06, "loss": 0.38432577252388, "step": 10748, "token_acc": 0.867331963604344 }, { "epoch": 0.5799924459073005, "grad_norm": 0.365171879529953, "learning_rate": 7.910571027341293e-06, "loss": 0.3826322555541992, "step": 10749, "token_acc": 0.8655216160827559 }, { "epoch": 0.580046403712297, "grad_norm": 0.4349941909313202, "learning_rate": 7.908862069475795e-06, "loss": 0.36885732412338257, "step": 10750, "token_acc": 0.8763975155279503 }, { "epoch": 0.5801003615172935, "grad_norm": 0.4314841330051422, "learning_rate": 7.9071531754731e-06, "loss": 0.29592883586883545, "step": 10751, "token_acc": 0.8917450058754407 }, { "epoch": 0.58015431932229, "grad_norm": 0.3772601783275604, "learning_rate": 7.905444345385398e-06, "loss": 0.3631308376789093, "step": 10752, "token_acc": 0.8723945094051856 }, { "epoch": 0.5802082771272865, "grad_norm": 0.4200379252433777, "learning_rate": 7.903735579264878e-06, "loss": 0.3058518171310425, "step": 10753, "token_acc": 0.8829325317301269 }, { "epoch": 0.5802622349322829, "grad_norm": 0.3616023063659668, "learning_rate": 7.902026877163725e-06, "loss": 0.32407909631729126, "step": 10754, "token_acc": 0.8822867150362772 }, { "epoch": 0.5803161927372794, "grad_norm": 0.38200026750564575, "learning_rate": 7.900318239134122e-06, "loss": 0.35400092601776123, "step": 10755, "token_acc": 0.8780945876728701 }, { "epoch": 0.5803701505422759, "grad_norm": 0.4741114377975464, "learning_rate": 7.898609665228249e-06, "loss": 0.36094731092453003, "step": 10756, "token_acc": 0.8751375137513752 }, { "epoch": 0.5804241083472724, "grad_norm": 0.45625096559524536, "learning_rate": 7.896901155498288e-06, "loss": 0.3123784065246582, "step": 10757, "token_acc": 0.8849357861853523 }, { "epoch": 0.5804780661522689, "grad_norm": 0.3044949769973755, "learning_rate": 7.895192709996413e-06, "loss": 0.3346422016620636, "step": 10758, "token_acc": 0.8796361798888327 }, { "epoch": 0.5805320239572654, "grad_norm": 0.3082204759120941, "learning_rate": 7.8934843287748e-06, "loss": 0.28800415992736816, "step": 10759, "token_acc": 0.8953095133068666 }, { "epoch": 0.580585981762262, "grad_norm": 0.3290511965751648, "learning_rate": 7.891776011885623e-06, "loss": 0.3037058413028717, "step": 10760, "token_acc": 0.8919690660321238 }, { "epoch": 0.5806399395672585, "grad_norm": 0.3305324614048004, "learning_rate": 7.890067759381059e-06, "loss": 0.29727959632873535, "step": 10761, "token_acc": 0.8900304414003044 }, { "epoch": 0.5806938973722549, "grad_norm": 0.3639105260372162, "learning_rate": 7.888359571313272e-06, "loss": 0.34947600960731506, "step": 10762, "token_acc": 0.8726146220570012 }, { "epoch": 0.5807478551772514, "grad_norm": 0.4396974444389343, "learning_rate": 7.88665144773443e-06, "loss": 0.39306941628456116, "step": 10763, "token_acc": 0.8642978890782745 }, { "epoch": 0.5808018129822479, "grad_norm": 0.4872751533985138, "learning_rate": 7.884943388696698e-06, "loss": 0.4251822829246521, "step": 10764, "token_acc": 0.85357025337282 }, { "epoch": 0.5808557707872444, "grad_norm": 0.37539196014404297, "learning_rate": 7.883235394252238e-06, "loss": 0.2986849248409271, "step": 10765, "token_acc": 0.8944546008531383 }, { "epoch": 0.5809097285922409, "grad_norm": 0.395953506231308, "learning_rate": 7.881527464453216e-06, "loss": 0.310588002204895, "step": 10766, "token_acc": 0.8923565958676491 }, { "epoch": 0.5809636863972374, "grad_norm": 0.3464292585849762, "learning_rate": 7.879819599351793e-06, "loss": 0.3511366546154022, "step": 10767, "token_acc": 0.8793044274110514 }, { "epoch": 0.5810176442022339, "grad_norm": 0.29650557041168213, "learning_rate": 7.878111799000123e-06, "loss": 0.3148753345012665, "step": 10768, "token_acc": 0.8857226362706415 }, { "epoch": 0.5810716020072303, "grad_norm": 0.401702344417572, "learning_rate": 7.876404063450363e-06, "loss": 0.33479198813438416, "step": 10769, "token_acc": 0.8758005045604502 }, { "epoch": 0.5811255598122268, "grad_norm": 0.3295948803424835, "learning_rate": 7.874696392754666e-06, "loss": 0.31725460290908813, "step": 10770, "token_acc": 0.8861675480100195 }, { "epoch": 0.5811795176172233, "grad_norm": 0.41948869824409485, "learning_rate": 7.872988786965187e-06, "loss": 0.3472787141799927, "step": 10771, "token_acc": 0.8775327006924852 }, { "epoch": 0.5812334754222198, "grad_norm": 0.44146937131881714, "learning_rate": 7.871281246134068e-06, "loss": 0.3352033495903015, "step": 10772, "token_acc": 0.8764577726515442 }, { "epoch": 0.5812874332272163, "grad_norm": 0.4025415778160095, "learning_rate": 7.869573770313466e-06, "loss": 0.34376680850982666, "step": 10773, "token_acc": 0.8795086543830263 }, { "epoch": 0.5813413910322128, "grad_norm": 0.3148731291294098, "learning_rate": 7.867866359555523e-06, "loss": 0.35992103815078735, "step": 10774, "token_acc": 0.8718477762494269 }, { "epoch": 0.5813953488372093, "grad_norm": 0.3479396402835846, "learning_rate": 7.866159013912383e-06, "loss": 0.3622894287109375, "step": 10775, "token_acc": 0.871066259460895 }, { "epoch": 0.5814493066422058, "grad_norm": 0.3782183825969696, "learning_rate": 7.864451733436186e-06, "loss": 0.3546123504638672, "step": 10776, "token_acc": 0.8789875692676352 }, { "epoch": 0.5815032644472022, "grad_norm": 0.34418970346450806, "learning_rate": 7.862744518179074e-06, "loss": 0.34291374683380127, "step": 10777, "token_acc": 0.8730136797015338 }, { "epoch": 0.5815572222521987, "grad_norm": 0.34590768814086914, "learning_rate": 7.861037368193183e-06, "loss": 0.35141584277153015, "step": 10778, "token_acc": 0.8732629238465814 }, { "epoch": 0.5816111800571953, "grad_norm": 0.36452701687812805, "learning_rate": 7.859330283530654e-06, "loss": 0.38055843114852905, "step": 10779, "token_acc": 0.865829782333295 }, { "epoch": 0.5816651378621918, "grad_norm": 0.3246033489704132, "learning_rate": 7.857623264243614e-06, "loss": 0.36055803298950195, "step": 10780, "token_acc": 0.872671422909235 }, { "epoch": 0.5817190956671883, "grad_norm": 0.36936894059181213, "learning_rate": 7.855916310384201e-06, "loss": 0.31666460633277893, "step": 10781, "token_acc": 0.8839240506329114 }, { "epoch": 0.5817730534721848, "grad_norm": 0.4019145667552948, "learning_rate": 7.854209422004542e-06, "loss": 0.3002622127532959, "step": 10782, "token_acc": 0.8929698346585889 }, { "epoch": 0.5818270112771813, "grad_norm": 0.4022599756717682, "learning_rate": 7.852502599156763e-06, "loss": 0.39158397912979126, "step": 10783, "token_acc": 0.8642033978852323 }, { "epoch": 0.5818809690821778, "grad_norm": 0.3394412100315094, "learning_rate": 7.85079584189299e-06, "loss": 0.34153637290000916, "step": 10784, "token_acc": 0.8771186440677966 }, { "epoch": 0.5819349268871742, "grad_norm": 0.3797113001346588, "learning_rate": 7.849089150265351e-06, "loss": 0.31715622544288635, "step": 10785, "token_acc": 0.8841331603977518 }, { "epoch": 0.5819888846921707, "grad_norm": 0.2187177985906601, "learning_rate": 7.847382524325972e-06, "loss": 0.27551573514938354, "step": 10786, "token_acc": 0.9010061668289516 }, { "epoch": 0.5820428424971672, "grad_norm": 0.4926387071609497, "learning_rate": 7.845675964126962e-06, "loss": 0.3463437259197235, "step": 10787, "token_acc": 0.8760477111540941 }, { "epoch": 0.5820968003021637, "grad_norm": 0.43584850430488586, "learning_rate": 7.843969469720444e-06, "loss": 0.33359605073928833, "step": 10788, "token_acc": 0.8846327174402884 }, { "epoch": 0.5821507581071602, "grad_norm": 0.376583069562912, "learning_rate": 7.842263041158534e-06, "loss": 0.3494533896446228, "step": 10789, "token_acc": 0.8715555555555555 }, { "epoch": 0.5822047159121567, "grad_norm": 0.38637158274650574, "learning_rate": 7.840556678493344e-06, "loss": 0.31024372577667236, "step": 10790, "token_acc": 0.88945420906568 }, { "epoch": 0.5822586737171532, "grad_norm": 0.3163110911846161, "learning_rate": 7.838850381776984e-06, "loss": 0.3515523672103882, "step": 10791, "token_acc": 0.8792384814689648 }, { "epoch": 0.5823126315221496, "grad_norm": 0.37173447012901306, "learning_rate": 7.837144151061572e-06, "loss": 0.31339797377586365, "step": 10792, "token_acc": 0.8852810285149167 }, { "epoch": 0.5823665893271461, "grad_norm": 0.41009604930877686, "learning_rate": 7.83543798639921e-06, "loss": 0.33984020352363586, "step": 10793, "token_acc": 0.8753523332289384 }, { "epoch": 0.5824205471321426, "grad_norm": 0.5046628713607788, "learning_rate": 7.833731887842006e-06, "loss": 0.40940019488334656, "step": 10794, "token_acc": 0.8600665557404326 }, { "epoch": 0.5824745049371391, "grad_norm": 0.3852415084838867, "learning_rate": 7.83202585544206e-06, "loss": 0.33999568223953247, "step": 10795, "token_acc": 0.8773510352457721 }, { "epoch": 0.5825284627421357, "grad_norm": 0.4097711145877838, "learning_rate": 7.83031988925148e-06, "loss": 0.3269250988960266, "step": 10796, "token_acc": 0.8847321921590282 }, { "epoch": 0.5825824205471322, "grad_norm": 0.42668914794921875, "learning_rate": 7.828613989322358e-06, "loss": 0.4187845289707184, "step": 10797, "token_acc": 0.8565568676196396 }, { "epoch": 0.5826363783521287, "grad_norm": 0.34657469391822815, "learning_rate": 7.826908155706799e-06, "loss": 0.36958950757980347, "step": 10798, "token_acc": 0.8705856785830708 }, { "epoch": 0.5826903361571252, "grad_norm": 0.38723024725914, "learning_rate": 7.825202388456895e-06, "loss": 0.359826922416687, "step": 10799, "token_acc": 0.8745964316057774 }, { "epoch": 0.5827442939621216, "grad_norm": 0.44410860538482666, "learning_rate": 7.823496687624741e-06, "loss": 0.36033767461776733, "step": 10800, "token_acc": 0.8717277486910995 }, { "epoch": 0.5827982517671181, "grad_norm": 0.415881484746933, "learning_rate": 7.821791053262428e-06, "loss": 0.30573809146881104, "step": 10801, "token_acc": 0.8892307692307693 }, { "epoch": 0.5828522095721146, "grad_norm": 0.30401211977005005, "learning_rate": 7.820085485422045e-06, "loss": 0.3297652006149292, "step": 10802, "token_acc": 0.8835512601857116 }, { "epoch": 0.5829061673771111, "grad_norm": 0.4471197724342346, "learning_rate": 7.818379984155681e-06, "loss": 0.3812185823917389, "step": 10803, "token_acc": 0.870427687113112 }, { "epoch": 0.5829601251821076, "grad_norm": 0.4172236919403076, "learning_rate": 7.816674549515418e-06, "loss": 0.2964712977409363, "step": 10804, "token_acc": 0.8937245313773431 }, { "epoch": 0.5830140829871041, "grad_norm": 0.4864667057991028, "learning_rate": 7.814969181553344e-06, "loss": 0.3864508271217346, "step": 10805, "token_acc": 0.8652748962655602 }, { "epoch": 0.5830680407921006, "grad_norm": 0.3300112187862396, "learning_rate": 7.81326388032154e-06, "loss": 0.3943614959716797, "step": 10806, "token_acc": 0.8604989372413021 }, { "epoch": 0.5831219985970971, "grad_norm": 0.25444138050079346, "learning_rate": 7.811558645872084e-06, "loss": 0.35241377353668213, "step": 10807, "token_acc": 0.8791547435615232 }, { "epoch": 0.5831759564020935, "grad_norm": 0.3192874491214752, "learning_rate": 7.809853478257053e-06, "loss": 0.3127982020378113, "step": 10808, "token_acc": 0.8864532591046161 }, { "epoch": 0.58322991420709, "grad_norm": 0.37659531831741333, "learning_rate": 7.808148377528524e-06, "loss": 0.33998337388038635, "step": 10809, "token_acc": 0.8824595649749024 }, { "epoch": 0.5832838720120865, "grad_norm": 0.58006751537323, "learning_rate": 7.806443343738565e-06, "loss": 0.39472895860671997, "step": 10810, "token_acc": 0.8632972771353973 }, { "epoch": 0.583337829817083, "grad_norm": 0.3134487569332123, "learning_rate": 7.804738376939256e-06, "loss": 0.34657448530197144, "step": 10811, "token_acc": 0.8791551246537396 }, { "epoch": 0.5833917876220795, "grad_norm": 0.489944726228714, "learning_rate": 7.803033477182662e-06, "loss": 0.38194742798805237, "step": 10812, "token_acc": 0.8637203353701861 }, { "epoch": 0.5834457454270761, "grad_norm": 0.427537202835083, "learning_rate": 7.801328644520848e-06, "loss": 0.3393113613128662, "step": 10813, "token_acc": 0.8811739441660702 }, { "epoch": 0.5834997032320726, "grad_norm": 0.3349553644657135, "learning_rate": 7.799623879005883e-06, "loss": 0.34633344411849976, "step": 10814, "token_acc": 0.8760417889423642 }, { "epoch": 0.583553661037069, "grad_norm": 0.40684303641319275, "learning_rate": 7.797919180689828e-06, "loss": 0.3461502194404602, "step": 10815, "token_acc": 0.8758321273516643 }, { "epoch": 0.5836076188420655, "grad_norm": 0.33154040575027466, "learning_rate": 7.796214549624745e-06, "loss": 0.36125102639198303, "step": 10816, "token_acc": 0.8716137994906228 }, { "epoch": 0.583661576647062, "grad_norm": 0.31801289319992065, "learning_rate": 7.794509985862687e-06, "loss": 0.34511369466781616, "step": 10817, "token_acc": 0.878158844765343 }, { "epoch": 0.5837155344520585, "grad_norm": 0.422392338514328, "learning_rate": 7.79280548945572e-06, "loss": 0.3291047215461731, "step": 10818, "token_acc": 0.882703488372093 }, { "epoch": 0.583769492257055, "grad_norm": 0.40648239850997925, "learning_rate": 7.791101060455898e-06, "loss": 0.32899969816207886, "step": 10819, "token_acc": 0.8789310298285868 }, { "epoch": 0.5838234500620515, "grad_norm": 0.4418222904205322, "learning_rate": 7.789396698915267e-06, "loss": 0.3270457684993744, "step": 10820, "token_acc": 0.8839705882352941 }, { "epoch": 0.583877407867048, "grad_norm": 0.37449902296066284, "learning_rate": 7.787692404885885e-06, "loss": 0.2888779044151306, "step": 10821, "token_acc": 0.8917149803628203 }, { "epoch": 0.5839313656720445, "grad_norm": 0.37727081775665283, "learning_rate": 7.785988178419796e-06, "loss": 0.379472017288208, "step": 10822, "token_acc": 0.8664768683274021 }, { "epoch": 0.5839853234770409, "grad_norm": 0.3700401782989502, "learning_rate": 7.784284019569045e-06, "loss": 0.2928240895271301, "step": 10823, "token_acc": 0.8924786766606359 }, { "epoch": 0.5840392812820374, "grad_norm": 0.3588963449001312, "learning_rate": 7.782579928385683e-06, "loss": 0.3465144634246826, "step": 10824, "token_acc": 0.8788033099936347 }, { "epoch": 0.5840932390870339, "grad_norm": 0.35433468222618103, "learning_rate": 7.780875904921749e-06, "loss": 0.3016510605812073, "step": 10825, "token_acc": 0.8895177165354331 }, { "epoch": 0.5841471968920304, "grad_norm": 0.4465087056159973, "learning_rate": 7.779171949229282e-06, "loss": 0.39227670431137085, "step": 10826, "token_acc": 0.8660280029476787 }, { "epoch": 0.5842011546970269, "grad_norm": 0.3471105098724365, "learning_rate": 7.777468061360324e-06, "loss": 0.39732158184051514, "step": 10827, "token_acc": 0.8616217435434758 }, { "epoch": 0.5842551125020234, "grad_norm": 0.42246922850608826, "learning_rate": 7.775764241366908e-06, "loss": 0.3939239978790283, "step": 10828, "token_acc": 0.8596156293750795 }, { "epoch": 0.58430907030702, "grad_norm": 0.4138786196708679, "learning_rate": 7.77406048930107e-06, "loss": 0.41009119153022766, "step": 10829, "token_acc": 0.8596150735888727 }, { "epoch": 0.5843630281120165, "grad_norm": 0.3715650737285614, "learning_rate": 7.772356805214839e-06, "loss": 0.33088868856430054, "step": 10830, "token_acc": 0.8853350057489286 }, { "epoch": 0.5844169859170129, "grad_norm": 0.4081667363643646, "learning_rate": 7.77065318916025e-06, "loss": 0.34663715958595276, "step": 10831, "token_acc": 0.8797719813439281 }, { "epoch": 0.5844709437220094, "grad_norm": 0.4920474588871002, "learning_rate": 7.768949641189328e-06, "loss": 0.3954150080680847, "step": 10832, "token_acc": 0.8569837734648426 }, { "epoch": 0.5845249015270059, "grad_norm": 0.46800774335861206, "learning_rate": 7.767246161354101e-06, "loss": 0.3829396069049835, "step": 10833, "token_acc": 0.869352555026387 }, { "epoch": 0.5845788593320024, "grad_norm": 0.31665289402008057, "learning_rate": 7.76554274970659e-06, "loss": 0.3597007691860199, "step": 10834, "token_acc": 0.8710731339532546 }, { "epoch": 0.5846328171369989, "grad_norm": 0.4102092385292053, "learning_rate": 7.763839406298818e-06, "loss": 0.31139037013053894, "step": 10835, "token_acc": 0.8891213389121339 }, { "epoch": 0.5846867749419954, "grad_norm": 0.49857544898986816, "learning_rate": 7.762136131182803e-06, "loss": 0.41060060262680054, "step": 10836, "token_acc": 0.8600728646606396 }, { "epoch": 0.5847407327469919, "grad_norm": 0.45075929164886475, "learning_rate": 7.760432924410568e-06, "loss": 0.36978644132614136, "step": 10837, "token_acc": 0.8748368382886149 }, { "epoch": 0.5847946905519883, "grad_norm": 0.3025215268135071, "learning_rate": 7.758729786034122e-06, "loss": 0.3396201729774475, "step": 10838, "token_acc": 0.8795708342456755 }, { "epoch": 0.5848486483569848, "grad_norm": 0.44233572483062744, "learning_rate": 7.757026716105482e-06, "loss": 0.3483961522579193, "step": 10839, "token_acc": 0.8803045255886085 }, { "epoch": 0.5849026061619813, "grad_norm": 0.4453556537628174, "learning_rate": 7.75532371467666e-06, "loss": 0.3584901988506317, "step": 10840, "token_acc": 0.8718116415958143 }, { "epoch": 0.5849565639669778, "grad_norm": 0.25884681940078735, "learning_rate": 7.753620781799661e-06, "loss": 0.3205159604549408, "step": 10841, "token_acc": 0.8841010401188707 }, { "epoch": 0.5850105217719743, "grad_norm": 0.3669886589050293, "learning_rate": 7.751917917526493e-06, "loss": 0.34535348415374756, "step": 10842, "token_acc": 0.8783705421861409 }, { "epoch": 0.5850644795769708, "grad_norm": 0.4454612731933594, "learning_rate": 7.750215121909166e-06, "loss": 0.32974356412887573, "step": 10843, "token_acc": 0.8802725502920181 }, { "epoch": 0.5851184373819673, "grad_norm": 0.4399608075618744, "learning_rate": 7.748512394999683e-06, "loss": 0.3309441804885864, "step": 10844, "token_acc": 0.8827535159141376 }, { "epoch": 0.5851723951869638, "grad_norm": 0.3813771903514862, "learning_rate": 7.746809736850039e-06, "loss": 0.3280792534351349, "step": 10845, "token_acc": 0.8808567603748326 }, { "epoch": 0.5852263529919602, "grad_norm": 0.4485081732273102, "learning_rate": 7.745107147512237e-06, "loss": 0.3943324089050293, "step": 10846, "token_acc": 0.8599586711174694 }, { "epoch": 0.5852803107969567, "grad_norm": 0.4290793836116791, "learning_rate": 7.74340462703827e-06, "loss": 0.3399648070335388, "step": 10847, "token_acc": 0.8767228177641654 }, { "epoch": 0.5853342686019533, "grad_norm": 0.534288763999939, "learning_rate": 7.741702175480135e-06, "loss": 0.4268617033958435, "step": 10848, "token_acc": 0.8512170531470545 }, { "epoch": 0.5853882264069498, "grad_norm": 0.38135039806365967, "learning_rate": 7.739999792889818e-06, "loss": 0.33929938077926636, "step": 10849, "token_acc": 0.8787280701754386 }, { "epoch": 0.5854421842119463, "grad_norm": 0.33873623609542847, "learning_rate": 7.738297479319321e-06, "loss": 0.32959040999412537, "step": 10850, "token_acc": 0.8809869375907112 }, { "epoch": 0.5854961420169428, "grad_norm": 0.3996509611606598, "learning_rate": 7.736595234820628e-06, "loss": 0.3579138517379761, "step": 10851, "token_acc": 0.873942470389171 }, { "epoch": 0.5855500998219393, "grad_norm": 0.4747942388057709, "learning_rate": 7.734893059445721e-06, "loss": 0.39350223541259766, "step": 10852, "token_acc": 0.8680312864037277 }, { "epoch": 0.5856040576269357, "grad_norm": 0.34268203377723694, "learning_rate": 7.73319095324659e-06, "loss": 0.3366592526435852, "step": 10853, "token_acc": 0.8854316259773416 }, { "epoch": 0.5856580154319322, "grad_norm": 0.49486085772514343, "learning_rate": 7.73148891627521e-06, "loss": 0.3469909727573395, "step": 10854, "token_acc": 0.8821731748726656 }, { "epoch": 0.5857119732369287, "grad_norm": 0.38854295015335083, "learning_rate": 7.729786948583563e-06, "loss": 0.32940444350242615, "step": 10855, "token_acc": 0.8781447886359747 }, { "epoch": 0.5857659310419252, "grad_norm": 0.40942874550819397, "learning_rate": 7.72808505022363e-06, "loss": 0.3309014141559601, "step": 10856, "token_acc": 0.8823779795686719 }, { "epoch": 0.5858198888469217, "grad_norm": 0.3775953948497772, "learning_rate": 7.726383221247386e-06, "loss": 0.38781026005744934, "step": 10857, "token_acc": 0.8647737585893945 }, { "epoch": 0.5858738466519182, "grad_norm": 0.3886849284172058, "learning_rate": 7.7246814617068e-06, "loss": 0.30005335807800293, "step": 10858, "token_acc": 0.8914056224899598 }, { "epoch": 0.5859278044569147, "grad_norm": 0.34513774514198303, "learning_rate": 7.722979771653848e-06, "loss": 0.30660000443458557, "step": 10859, "token_acc": 0.8882235528942116 }, { "epoch": 0.5859817622619112, "grad_norm": 0.3882950246334076, "learning_rate": 7.721278151140498e-06, "loss": 0.35447704792022705, "step": 10860, "token_acc": 0.8737519705727799 }, { "epoch": 0.5860357200669076, "grad_norm": 0.4676612913608551, "learning_rate": 7.719576600218714e-06, "loss": 0.3457074463367462, "step": 10861, "token_acc": 0.8753173483779971 }, { "epoch": 0.5860896778719041, "grad_norm": 0.3565502166748047, "learning_rate": 7.717875118940464e-06, "loss": 0.3401385247707367, "step": 10862, "token_acc": 0.8766898294377764 }, { "epoch": 0.5861436356769006, "grad_norm": 0.41314882040023804, "learning_rate": 7.716173707357709e-06, "loss": 0.3872162699699402, "step": 10863, "token_acc": 0.8642115509409475 }, { "epoch": 0.5861975934818972, "grad_norm": 0.3436453640460968, "learning_rate": 7.714472365522414e-06, "loss": 0.3578374981880188, "step": 10864, "token_acc": 0.8759785743716523 }, { "epoch": 0.5862515512868937, "grad_norm": 0.39008548855781555, "learning_rate": 7.71277109348653e-06, "loss": 0.36154890060424805, "step": 10865, "token_acc": 0.8768996960486323 }, { "epoch": 0.5863055090918902, "grad_norm": 0.4011998176574707, "learning_rate": 7.711069891302021e-06, "loss": 0.34417232871055603, "step": 10866, "token_acc": 0.8733539261908633 }, { "epoch": 0.5863594668968867, "grad_norm": 0.42507293820381165, "learning_rate": 7.709368759020837e-06, "loss": 0.26099610328674316, "step": 10867, "token_acc": 0.9019384264538198 }, { "epoch": 0.5864134247018832, "grad_norm": 0.4592069983482361, "learning_rate": 7.70766769669493e-06, "loss": 0.3774251341819763, "step": 10868, "token_acc": 0.8694404591104734 }, { "epoch": 0.5864673825068796, "grad_norm": 0.3324388265609741, "learning_rate": 7.70596670437625e-06, "loss": 0.33124563097953796, "step": 10869, "token_acc": 0.8779593707041393 }, { "epoch": 0.5865213403118761, "grad_norm": 0.405139297246933, "learning_rate": 7.704265782116748e-06, "loss": 0.3513006567955017, "step": 10870, "token_acc": 0.8735080058224163 }, { "epoch": 0.5865752981168726, "grad_norm": 0.430308073759079, "learning_rate": 7.702564929968369e-06, "loss": 0.3584214746952057, "step": 10871, "token_acc": 0.8681592039800995 }, { "epoch": 0.5866292559218691, "grad_norm": 0.3261621296405792, "learning_rate": 7.700864147983054e-06, "loss": 0.34901630878448486, "step": 10872, "token_acc": 0.87625250501002 }, { "epoch": 0.5866832137268656, "grad_norm": 0.3837224543094635, "learning_rate": 7.699163436212745e-06, "loss": 0.34702757000923157, "step": 10873, "token_acc": 0.8794979079497908 }, { "epoch": 0.5867371715318621, "grad_norm": 0.3591156303882599, "learning_rate": 7.697462794709382e-06, "loss": 0.380409300327301, "step": 10874, "token_acc": 0.8631868131868132 }, { "epoch": 0.5867911293368586, "grad_norm": 0.40933752059936523, "learning_rate": 7.695762223524898e-06, "loss": 0.3480929434299469, "step": 10875, "token_acc": 0.8758280479210712 }, { "epoch": 0.586845087141855, "grad_norm": 0.4089721441268921, "learning_rate": 7.694061722711236e-06, "loss": 0.3178033232688904, "step": 10876, "token_acc": 0.8888691006233304 }, { "epoch": 0.5868990449468515, "grad_norm": 0.40977272391319275, "learning_rate": 7.692361292320326e-06, "loss": 0.3328941762447357, "step": 10877, "token_acc": 0.8797884505431676 }, { "epoch": 0.586953002751848, "grad_norm": 0.317797988653183, "learning_rate": 7.690660932404097e-06, "loss": 0.2986833453178406, "step": 10878, "token_acc": 0.8896141572381782 }, { "epoch": 0.5870069605568445, "grad_norm": 0.39081650972366333, "learning_rate": 7.68896064301448e-06, "loss": 0.42862749099731445, "step": 10879, "token_acc": 0.8504170951891212 }, { "epoch": 0.587060918361841, "grad_norm": 0.3267669975757599, "learning_rate": 7.6872604242034e-06, "loss": 0.3305240273475647, "step": 10880, "token_acc": 0.8828009828009828 }, { "epoch": 0.5871148761668376, "grad_norm": 0.446028470993042, "learning_rate": 7.685560276022773e-06, "loss": 0.3760391175746918, "step": 10881, "token_acc": 0.8677673874926943 }, { "epoch": 0.5871688339718341, "grad_norm": 0.4276217520236969, "learning_rate": 7.683860198524535e-06, "loss": 0.3249363601207733, "step": 10882, "token_acc": 0.8836949585194639 }, { "epoch": 0.5872227917768306, "grad_norm": 0.3964380621910095, "learning_rate": 7.682160191760598e-06, "loss": 0.3225541114807129, "step": 10883, "token_acc": 0.8847631241997439 }, { "epoch": 0.587276749581827, "grad_norm": 0.38900521397590637, "learning_rate": 7.680460255782884e-06, "loss": 0.36477118730545044, "step": 10884, "token_acc": 0.8708975217682519 }, { "epoch": 0.5873307073868235, "grad_norm": 0.49125272035598755, "learning_rate": 7.678760390643303e-06, "loss": 0.40935003757476807, "step": 10885, "token_acc": 0.8552396648811976 }, { "epoch": 0.58738466519182, "grad_norm": 0.4519451856613159, "learning_rate": 7.677060596393776e-06, "loss": 0.35523948073387146, "step": 10886, "token_acc": 0.876940606751175 }, { "epoch": 0.5874386229968165, "grad_norm": 0.4658076763153076, "learning_rate": 7.675360873086203e-06, "loss": 0.38801759481430054, "step": 10887, "token_acc": 0.8694855532064835 }, { "epoch": 0.587492580801813, "grad_norm": 0.37489691376686096, "learning_rate": 7.673661220772504e-06, "loss": 0.3996390402317047, "step": 10888, "token_acc": 0.8626057114842505 }, { "epoch": 0.5875465386068095, "grad_norm": 0.3786250650882721, "learning_rate": 7.671961639504583e-06, "loss": 0.37708646059036255, "step": 10889, "token_acc": 0.869771332961517 }, { "epoch": 0.587600496411806, "grad_norm": 0.44205647706985474, "learning_rate": 7.670262129334343e-06, "loss": 0.3264552056789398, "step": 10890, "token_acc": 0.881940885016231 }, { "epoch": 0.5876544542168025, "grad_norm": 0.3246332108974457, "learning_rate": 7.668562690313687e-06, "loss": 0.36926430463790894, "step": 10891, "token_acc": 0.8707655826558266 }, { "epoch": 0.5877084120217989, "grad_norm": 0.39706966280937195, "learning_rate": 7.666863322494515e-06, "loss": 0.34844857454299927, "step": 10892, "token_acc": 0.8751492002864646 }, { "epoch": 0.5877623698267954, "grad_norm": 0.39541563391685486, "learning_rate": 7.665164025928728e-06, "loss": 0.3396601676940918, "step": 10893, "token_acc": 0.8771674194958473 }, { "epoch": 0.5878163276317919, "grad_norm": 0.32861462235450745, "learning_rate": 7.663464800668217e-06, "loss": 0.30250653624534607, "step": 10894, "token_acc": 0.8869112483351496 }, { "epoch": 0.5878702854367884, "grad_norm": 0.5174922347068787, "learning_rate": 7.66176564676488e-06, "loss": 0.39151903986930847, "step": 10895, "token_acc": 0.8673528840551563 }, { "epoch": 0.5879242432417849, "grad_norm": 0.3283248841762543, "learning_rate": 7.660066564270607e-06, "loss": 0.32566386461257935, "step": 10896, "token_acc": 0.8819465304896366 }, { "epoch": 0.5879782010467814, "grad_norm": 0.44376251101493835, "learning_rate": 7.65836755323729e-06, "loss": 0.28028497099876404, "step": 10897, "token_acc": 0.8949562317632347 }, { "epoch": 0.588032158851778, "grad_norm": 0.4664969742298126, "learning_rate": 7.656668613716814e-06, "loss": 0.36310526728630066, "step": 10898, "token_acc": 0.8722304283604135 }, { "epoch": 0.5880861166567743, "grad_norm": 0.3369613587856293, "learning_rate": 7.654969745761065e-06, "loss": 0.3009277284145355, "step": 10899, "token_acc": 0.89063817824009 }, { "epoch": 0.5881400744617709, "grad_norm": 0.3483359217643738, "learning_rate": 7.65327094942192e-06, "loss": 0.35685858130455017, "step": 10900, "token_acc": 0.8744806748080071 }, { "epoch": 0.5881940322667674, "grad_norm": 0.5426209568977356, "learning_rate": 7.651572224751271e-06, "loss": 0.39211249351501465, "step": 10901, "token_acc": 0.8651543793320731 }, { "epoch": 0.5882479900717639, "grad_norm": 0.2951603829860687, "learning_rate": 7.649873571800991e-06, "loss": 0.3372691571712494, "step": 10902, "token_acc": 0.8816587625454947 }, { "epoch": 0.5883019478767604, "grad_norm": 0.5077694058418274, "learning_rate": 7.648174990622958e-06, "loss": 0.3303546905517578, "step": 10903, "token_acc": 0.8844663613948573 }, { "epoch": 0.5883559056817569, "grad_norm": 0.34529730677604675, "learning_rate": 7.646476481269043e-06, "loss": 0.3202469050884247, "step": 10904, "token_acc": 0.884351192706382 }, { "epoch": 0.5884098634867534, "grad_norm": 0.36103659868240356, "learning_rate": 7.644778043791119e-06, "loss": 0.33756348490715027, "step": 10905, "token_acc": 0.8787926675094817 }, { "epoch": 0.5884638212917499, "grad_norm": 0.42494067549705505, "learning_rate": 7.643079678241057e-06, "loss": 0.32432928681373596, "step": 10906, "token_acc": 0.8845792930872693 }, { "epoch": 0.5885177790967463, "grad_norm": 0.42206433415412903, "learning_rate": 7.64138138467072e-06, "loss": 0.33173486590385437, "step": 10907, "token_acc": 0.8814249363867684 }, { "epoch": 0.5885717369017428, "grad_norm": 0.5065571665763855, "learning_rate": 7.639683163131984e-06, "loss": 0.39149099588394165, "step": 10908, "token_acc": 0.867564534231201 }, { "epoch": 0.5886256947067393, "grad_norm": 0.38740968704223633, "learning_rate": 7.637985013676703e-06, "loss": 0.35437947511672974, "step": 10909, "token_acc": 0.8753747881632121 }, { "epoch": 0.5886796525117358, "grad_norm": 0.336005300283432, "learning_rate": 7.63628693635674e-06, "loss": 0.3964678645133972, "step": 10910, "token_acc": 0.8641931684334511 }, { "epoch": 0.5887336103167323, "grad_norm": 0.4298775792121887, "learning_rate": 7.634588931223958e-06, "loss": 0.3566676378250122, "step": 10911, "token_acc": 0.8771168649405179 }, { "epoch": 0.5887875681217288, "grad_norm": 0.4962162971496582, "learning_rate": 7.63289099833021e-06, "loss": 0.39031484723091125, "step": 10912, "token_acc": 0.8695722881769393 }, { "epoch": 0.5888415259267253, "grad_norm": 0.40014907717704773, "learning_rate": 7.631193137727347e-06, "loss": 0.3676213026046753, "step": 10913, "token_acc": 0.8686517322944205 }, { "epoch": 0.5888954837317218, "grad_norm": 0.353912353515625, "learning_rate": 7.629495349467227e-06, "loss": 0.33018970489501953, "step": 10914, "token_acc": 0.8829362142962853 }, { "epoch": 0.5889494415367182, "grad_norm": 0.3927198052406311, "learning_rate": 7.6277976336016994e-06, "loss": 0.39565005898475647, "step": 10915, "token_acc": 0.859363754138477 }, { "epoch": 0.5890033993417148, "grad_norm": 0.3488445580005646, "learning_rate": 7.626099990182609e-06, "loss": 0.340573787689209, "step": 10916, "token_acc": 0.8827063740856844 }, { "epoch": 0.5890573571467113, "grad_norm": 0.44681882858276367, "learning_rate": 7.624402419261805e-06, "loss": 0.38119974732398987, "step": 10917, "token_acc": 0.868081880212282 }, { "epoch": 0.5891113149517078, "grad_norm": 0.42466360330581665, "learning_rate": 7.622704920891128e-06, "loss": 0.3253299295902252, "step": 10918, "token_acc": 0.8818875780707842 }, { "epoch": 0.5891652727567043, "grad_norm": 0.4561113715171814, "learning_rate": 7.621007495122419e-06, "loss": 0.3735089898109436, "step": 10919, "token_acc": 0.8692460620890045 }, { "epoch": 0.5892192305617008, "grad_norm": 0.4973183870315552, "learning_rate": 7.619310142007516e-06, "loss": 0.339733362197876, "step": 10920, "token_acc": 0.872957942301008 }, { "epoch": 0.5892731883666973, "grad_norm": 0.45706361532211304, "learning_rate": 7.617612861598262e-06, "loss": 0.33294302225112915, "step": 10921, "token_acc": 0.8846043987432162 }, { "epoch": 0.5893271461716937, "grad_norm": 0.4407670199871063, "learning_rate": 7.615915653946484e-06, "loss": 0.4214519262313843, "step": 10922, "token_acc": 0.8602732980425951 }, { "epoch": 0.5893811039766902, "grad_norm": 0.3342369794845581, "learning_rate": 7.61421851910402e-06, "loss": 0.34925976395606995, "step": 10923, "token_acc": 0.8780959396004051 }, { "epoch": 0.5894350617816867, "grad_norm": 0.4440005123615265, "learning_rate": 7.612521457122696e-06, "loss": 0.3637963831424713, "step": 10924, "token_acc": 0.8784206411258796 }, { "epoch": 0.5894890195866832, "grad_norm": 0.3330897092819214, "learning_rate": 7.610824468054341e-06, "loss": 0.359221488237381, "step": 10925, "token_acc": 0.8755228616760421 }, { "epoch": 0.5895429773916797, "grad_norm": 0.42460891604423523, "learning_rate": 7.609127551950777e-06, "loss": 0.39612674713134766, "step": 10926, "token_acc": 0.8618457300275482 }, { "epoch": 0.5895969351966762, "grad_norm": 0.4712271988391876, "learning_rate": 7.6074307088638375e-06, "loss": 0.3698647618293762, "step": 10927, "token_acc": 0.8677248677248677 }, { "epoch": 0.5896508930016727, "grad_norm": 0.36922943592071533, "learning_rate": 7.6057339388453354e-06, "loss": 0.32173818349838257, "step": 10928, "token_acc": 0.8844380861548243 }, { "epoch": 0.5897048508066692, "grad_norm": 0.42516228556632996, "learning_rate": 7.604037241947092e-06, "loss": 0.3181532025337219, "step": 10929, "token_acc": 0.8885220644877297 }, { "epoch": 0.5897588086116656, "grad_norm": 0.3596116304397583, "learning_rate": 7.602340618220924e-06, "loss": 0.3686347007751465, "step": 10930, "token_acc": 0.8699642431466031 }, { "epoch": 0.5898127664166621, "grad_norm": 0.3967134952545166, "learning_rate": 7.600644067718644e-06, "loss": 0.3953205943107605, "step": 10931, "token_acc": 0.8642741101757495 }, { "epoch": 0.5898667242216586, "grad_norm": 0.30962079763412476, "learning_rate": 7.598947590492066e-06, "loss": 0.36929231882095337, "step": 10932, "token_acc": 0.8701608848667672 }, { "epoch": 0.5899206820266552, "grad_norm": 0.38781389594078064, "learning_rate": 7.597251186592996e-06, "loss": 0.3413015604019165, "step": 10933, "token_acc": 0.8812803889789304 }, { "epoch": 0.5899746398316517, "grad_norm": 0.3240412175655365, "learning_rate": 7.595554856073249e-06, "loss": 0.3477226793766022, "step": 10934, "token_acc": 0.8745448138141666 }, { "epoch": 0.5900285976366482, "grad_norm": 0.4985424876213074, "learning_rate": 7.593858598984627e-06, "loss": 0.4098356366157532, "step": 10935, "token_acc": 0.8633967789165446 }, { "epoch": 0.5900825554416447, "grad_norm": 0.29009032249450684, "learning_rate": 7.592162415378933e-06, "loss": 0.30843478441238403, "step": 10936, "token_acc": 0.8929761042722665 }, { "epoch": 0.5901365132466412, "grad_norm": 0.3968404531478882, "learning_rate": 7.590466305307971e-06, "loss": 0.29250696301460266, "step": 10937, "token_acc": 0.8927816369676835 }, { "epoch": 0.5901904710516376, "grad_norm": 0.41082730889320374, "learning_rate": 7.588770268823533e-06, "loss": 0.40851643681526184, "step": 10938, "token_acc": 0.8606201101130108 }, { "epoch": 0.5902444288566341, "grad_norm": 0.3862511217594147, "learning_rate": 7.587074305977417e-06, "loss": 0.36313143372535706, "step": 10939, "token_acc": 0.8724982505248425 }, { "epoch": 0.5902983866616306, "grad_norm": 0.4078189432621002, "learning_rate": 7.585378416821423e-06, "loss": 0.30632829666137695, "step": 10940, "token_acc": 0.8881947436449806 }, { "epoch": 0.5903523444666271, "grad_norm": 0.34298789501190186, "learning_rate": 7.583682601407338e-06, "loss": 0.39723116159439087, "step": 10941, "token_acc": 0.8642929548910289 }, { "epoch": 0.5904063022716236, "grad_norm": 0.3429970443248749, "learning_rate": 7.581986859786956e-06, "loss": 0.37121152877807617, "step": 10942, "token_acc": 0.8732298739088263 }, { "epoch": 0.5904602600766201, "grad_norm": 0.47380462288856506, "learning_rate": 7.58029119201206e-06, "loss": 0.3819119334220886, "step": 10943, "token_acc": 0.8682913836729258 }, { "epoch": 0.5905142178816166, "grad_norm": 0.4517246186733246, "learning_rate": 7.578595598134439e-06, "loss": 0.38285815715789795, "step": 10944, "token_acc": 0.8610209501327826 }, { "epoch": 0.590568175686613, "grad_norm": 0.3966039717197418, "learning_rate": 7.5769000782058696e-06, "loss": 0.2816831171512604, "step": 10945, "token_acc": 0.8960163111668757 }, { "epoch": 0.5906221334916095, "grad_norm": 0.46505311131477356, "learning_rate": 7.575204632278142e-06, "loss": 0.3361275792121887, "step": 10946, "token_acc": 0.8812044224888261 }, { "epoch": 0.590676091296606, "grad_norm": 0.4127531349658966, "learning_rate": 7.573509260403027e-06, "loss": 0.38487693667411804, "step": 10947, "token_acc": 0.8667144906743185 }, { "epoch": 0.5907300491016025, "grad_norm": 0.409028023481369, "learning_rate": 7.571813962632305e-06, "loss": 0.41551169753074646, "step": 10948, "token_acc": 0.8563793339728656 }, { "epoch": 0.590784006906599, "grad_norm": 0.46009185910224915, "learning_rate": 7.570118739017748e-06, "loss": 0.3535846471786499, "step": 10949, "token_acc": 0.8782696177062375 }, { "epoch": 0.5908379647115956, "grad_norm": 0.4157726466655731, "learning_rate": 7.568423589611129e-06, "loss": 0.3375193178653717, "step": 10950, "token_acc": 0.8842306560753163 }, { "epoch": 0.5908919225165921, "grad_norm": 0.44119828939437866, "learning_rate": 7.566728514464215e-06, "loss": 0.35515499114990234, "step": 10951, "token_acc": 0.8724045545880776 }, { "epoch": 0.5909458803215886, "grad_norm": 0.41163766384124756, "learning_rate": 7.565033513628774e-06, "loss": 0.3708183765411377, "step": 10952, "token_acc": 0.8702609551944854 }, { "epoch": 0.590999838126585, "grad_norm": 0.4296095073223114, "learning_rate": 7.563338587156574e-06, "loss": 0.32815054059028625, "step": 10953, "token_acc": 0.8761431092571795 }, { "epoch": 0.5910537959315815, "grad_norm": 0.3947776257991791, "learning_rate": 7.561643735099374e-06, "loss": 0.3683633804321289, "step": 10954, "token_acc": 0.8687845303867403 }, { "epoch": 0.591107753736578, "grad_norm": 0.43900302052497864, "learning_rate": 7.559948957508935e-06, "loss": 0.39308327436447144, "step": 10955, "token_acc": 0.8621651785714286 }, { "epoch": 0.5911617115415745, "grad_norm": 0.42200130224227905, "learning_rate": 7.558254254437018e-06, "loss": 0.3484359383583069, "step": 10956, "token_acc": 0.8726956986374566 }, { "epoch": 0.591215669346571, "grad_norm": 0.49538108706474304, "learning_rate": 7.5565596259353744e-06, "loss": 0.356078565120697, "step": 10957, "token_acc": 0.8771792052772106 }, { "epoch": 0.5912696271515675, "grad_norm": 0.34703806042671204, "learning_rate": 7.554865072055756e-06, "loss": 0.3199438452720642, "step": 10958, "token_acc": 0.884095238095238 }, { "epoch": 0.591323584956564, "grad_norm": 0.31321585178375244, "learning_rate": 7.553170592849922e-06, "loss": 0.32545047998428345, "step": 10959, "token_acc": 0.8806978937441057 }, { "epoch": 0.5913775427615605, "grad_norm": 0.37034860253334045, "learning_rate": 7.551476188369617e-06, "loss": 0.3127842843532562, "step": 10960, "token_acc": 0.888083652552024 }, { "epoch": 0.5914315005665569, "grad_norm": 0.35329869389533997, "learning_rate": 7.549781858666591e-06, "loss": 0.39038413763046265, "step": 10961, "token_acc": 0.8640731556106633 }, { "epoch": 0.5914854583715534, "grad_norm": 0.38044413924217224, "learning_rate": 7.548087603792582e-06, "loss": 0.3888391852378845, "step": 10962, "token_acc": 0.8655163974547234 }, { "epoch": 0.5915394161765499, "grad_norm": 0.4358319044113159, "learning_rate": 7.546393423799336e-06, "loss": 0.3821972608566284, "step": 10963, "token_acc": 0.8646234194612424 }, { "epoch": 0.5915933739815464, "grad_norm": 0.37817898392677307, "learning_rate": 7.544699318738592e-06, "loss": 0.3166068494319916, "step": 10964, "token_acc": 0.8883727899310758 }, { "epoch": 0.5916473317865429, "grad_norm": 0.4256244897842407, "learning_rate": 7.543005288662082e-06, "loss": 0.3869439959526062, "step": 10965, "token_acc": 0.8641712978097418 }, { "epoch": 0.5917012895915394, "grad_norm": 0.3176330029964447, "learning_rate": 7.541311333621552e-06, "loss": 0.32223138213157654, "step": 10966, "token_acc": 0.8828263318847578 }, { "epoch": 0.591755247396536, "grad_norm": 0.46167829632759094, "learning_rate": 7.53961745366873e-06, "loss": 0.339679479598999, "step": 10967, "token_acc": 0.8796777982883034 }, { "epoch": 0.5918092052015324, "grad_norm": 0.4339895248413086, "learning_rate": 7.537923648855346e-06, "loss": 0.327156126499176, "step": 10968, "token_acc": 0.8842224744608399 }, { "epoch": 0.5918631630065289, "grad_norm": 0.3444671034812927, "learning_rate": 7.53622991923313e-06, "loss": 0.36391276121139526, "step": 10969, "token_acc": 0.8709233521398101 }, { "epoch": 0.5919171208115254, "grad_norm": 0.33040809631347656, "learning_rate": 7.534536264853806e-06, "loss": 0.35919463634490967, "step": 10970, "token_acc": 0.8731285349894643 }, { "epoch": 0.5919710786165219, "grad_norm": 0.40393003821372986, "learning_rate": 7.5328426857690964e-06, "loss": 0.3363872170448303, "step": 10971, "token_acc": 0.8794168911424497 }, { "epoch": 0.5920250364215184, "grad_norm": 0.3644914925098419, "learning_rate": 7.531149182030727e-06, "loss": 0.2949363589286804, "step": 10972, "token_acc": 0.8984949247462373 }, { "epoch": 0.5920789942265149, "grad_norm": 0.33155491948127747, "learning_rate": 7.529455753690414e-06, "loss": 0.30582043528556824, "step": 10973, "token_acc": 0.8882328710696141 }, { "epoch": 0.5921329520315114, "grad_norm": 0.4040263593196869, "learning_rate": 7.527762400799876e-06, "loss": 0.3441022038459778, "step": 10974, "token_acc": 0.874614910659273 }, { "epoch": 0.5921869098365079, "grad_norm": 0.2919166386127472, "learning_rate": 7.526069123410826e-06, "loss": 0.30961933732032776, "step": 10975, "token_acc": 0.8883776414451261 }, { "epoch": 0.5922408676415043, "grad_norm": 0.4350895285606384, "learning_rate": 7.5243759215749775e-06, "loss": 0.3498837351799011, "step": 10976, "token_acc": 0.8718604301398707 }, { "epoch": 0.5922948254465008, "grad_norm": 0.41042232513427734, "learning_rate": 7.522682795344041e-06, "loss": 0.36338257789611816, "step": 10977, "token_acc": 0.8781756593273651 }, { "epoch": 0.5923487832514973, "grad_norm": 0.4835833013057709, "learning_rate": 7.520989744769719e-06, "loss": 0.38669586181640625, "step": 10978, "token_acc": 0.8633175355450237 }, { "epoch": 0.5924027410564938, "grad_norm": 0.3491792678833008, "learning_rate": 7.519296769903724e-06, "loss": 0.3358200788497925, "step": 10979, "token_acc": 0.8824750830564784 }, { "epoch": 0.5924566988614903, "grad_norm": 0.36466866731643677, "learning_rate": 7.517603870797755e-06, "loss": 0.31017398834228516, "step": 10980, "token_acc": 0.8857215576482566 }, { "epoch": 0.5925106566664868, "grad_norm": 0.40147683024406433, "learning_rate": 7.515911047503514e-06, "loss": 0.3372250199317932, "step": 10981, "token_acc": 0.8757271285034374 }, { "epoch": 0.5925646144714833, "grad_norm": 0.4454417824745178, "learning_rate": 7.514218300072699e-06, "loss": 0.40050026774406433, "step": 10982, "token_acc": 0.860610806577917 }, { "epoch": 0.5926185722764797, "grad_norm": 0.36357152462005615, "learning_rate": 7.5125256285570045e-06, "loss": 0.3472386598587036, "step": 10983, "token_acc": 0.8787241111972532 }, { "epoch": 0.5926725300814762, "grad_norm": 0.4208531975746155, "learning_rate": 7.510833033008122e-06, "loss": 0.3561381697654724, "step": 10984, "token_acc": 0.8754934025036653 }, { "epoch": 0.5927264878864728, "grad_norm": 0.3891414403915405, "learning_rate": 7.509140513477755e-06, "loss": 0.38769733905792236, "step": 10985, "token_acc": 0.8653256051824071 }, { "epoch": 0.5927804456914693, "grad_norm": 0.4116816818714142, "learning_rate": 7.50744807001758e-06, "loss": 0.3117096424102783, "step": 10986, "token_acc": 0.8876541330491703 }, { "epoch": 0.5928344034964658, "grad_norm": 0.35836300253868103, "learning_rate": 7.505755702679288e-06, "loss": 0.3281615674495697, "step": 10987, "token_acc": 0.8802955096386933 }, { "epoch": 0.5928883613014623, "grad_norm": 0.4182042181491852, "learning_rate": 7.504063411514565e-06, "loss": 0.3615400195121765, "step": 10988, "token_acc": 0.8745333549748417 }, { "epoch": 0.5929423191064588, "grad_norm": 0.47777679562568665, "learning_rate": 7.50237119657509e-06, "loss": 0.3439422845840454, "step": 10989, "token_acc": 0.8808916113663434 }, { "epoch": 0.5929962769114553, "grad_norm": 0.2886306345462799, "learning_rate": 7.500679057912546e-06, "loss": 0.31299644708633423, "step": 10990, "token_acc": 0.8828102505038872 }, { "epoch": 0.5930502347164517, "grad_norm": 0.39267608523368835, "learning_rate": 7.498986995578603e-06, "loss": 0.3413022458553314, "step": 10991, "token_acc": 0.8772638277043563 }, { "epoch": 0.5931041925214482, "grad_norm": 0.43069085478782654, "learning_rate": 7.497295009624946e-06, "loss": 0.4107198715209961, "step": 10992, "token_acc": 0.862683961087553 }, { "epoch": 0.5931581503264447, "grad_norm": 0.3192806839942932, "learning_rate": 7.4956031001032445e-06, "loss": 0.33100396394729614, "step": 10993, "token_acc": 0.8809130003307972 }, { "epoch": 0.5932121081314412, "grad_norm": 0.336258202791214, "learning_rate": 7.493911267065168e-06, "loss": 0.3618762493133545, "step": 10994, "token_acc": 0.8788066353057687 }, { "epoch": 0.5932660659364377, "grad_norm": 0.41605061292648315, "learning_rate": 7.4922195105623865e-06, "loss": 0.2943626940250397, "step": 10995, "token_acc": 0.8905704307334109 }, { "epoch": 0.5933200237414342, "grad_norm": 0.3451153039932251, "learning_rate": 7.490527830646563e-06, "loss": 0.31432342529296875, "step": 10996, "token_acc": 0.8855695317959469 }, { "epoch": 0.5933739815464307, "grad_norm": 0.3729243576526642, "learning_rate": 7.4888362273693585e-06, "loss": 0.31636449694633484, "step": 10997, "token_acc": 0.8873430189608471 }, { "epoch": 0.5934279393514272, "grad_norm": 0.24682876467704773, "learning_rate": 7.487144700782441e-06, "loss": 0.3177119493484497, "step": 10998, "token_acc": 0.8868026915563273 }, { "epoch": 0.5934818971564236, "grad_norm": 0.3501587510108948, "learning_rate": 7.485453250937467e-06, "loss": 0.33916184306144714, "step": 10999, "token_acc": 0.877649491642254 }, { "epoch": 0.5935358549614201, "grad_norm": 0.26199036836624146, "learning_rate": 7.483761877886091e-06, "loss": 0.26983538269996643, "step": 11000, "token_acc": 0.9020753075680378 }, { "epoch": 0.5935898127664166, "grad_norm": 0.43466266989707947, "learning_rate": 7.482070581679969e-06, "loss": 0.368991494178772, "step": 11001, "token_acc": 0.8680709534368071 }, { "epoch": 0.5936437705714132, "grad_norm": 0.3272233307361603, "learning_rate": 7.480379362370751e-06, "loss": 0.2727174758911133, "step": 11002, "token_acc": 0.8972972972972973 }, { "epoch": 0.5936977283764097, "grad_norm": 0.40685126185417175, "learning_rate": 7.478688220010087e-06, "loss": 0.33130210638046265, "step": 11003, "token_acc": 0.8828650573802929 }, { "epoch": 0.5937516861814062, "grad_norm": 0.3794712722301483, "learning_rate": 7.476997154649624e-06, "loss": 0.34701287746429443, "step": 11004, "token_acc": 0.8764188093583507 }, { "epoch": 0.5938056439864027, "grad_norm": 0.3717101216316223, "learning_rate": 7.475306166341008e-06, "loss": 0.32098856568336487, "step": 11005, "token_acc": 0.8815538910752825 }, { "epoch": 0.5938596017913991, "grad_norm": 0.491147518157959, "learning_rate": 7.47361525513588e-06, "loss": 0.39935222268104553, "step": 11006, "token_acc": 0.8669934640522876 }, { "epoch": 0.5939135595963956, "grad_norm": 0.5121459364891052, "learning_rate": 7.471924421085879e-06, "loss": 0.3562409281730652, "step": 11007, "token_acc": 0.8741232227488152 }, { "epoch": 0.5939675174013921, "grad_norm": 0.41507598757743835, "learning_rate": 7.4702336642426455e-06, "loss": 0.3369217813014984, "step": 11008, "token_acc": 0.883342091434577 }, { "epoch": 0.5940214752063886, "grad_norm": 0.47308653593063354, "learning_rate": 7.468542984657813e-06, "loss": 0.3500310182571411, "step": 11009, "token_acc": 0.8765179752314537 }, { "epoch": 0.5940754330113851, "grad_norm": 0.4175052344799042, "learning_rate": 7.466852382383012e-06, "loss": 0.3019789159297943, "step": 11010, "token_acc": 0.8899375079709221 }, { "epoch": 0.5941293908163816, "grad_norm": 0.3787063658237457, "learning_rate": 7.465161857469878e-06, "loss": 0.31792691349983215, "step": 11011, "token_acc": 0.8872333220453776 }, { "epoch": 0.5941833486213781, "grad_norm": 0.3303217589855194, "learning_rate": 7.463471409970038e-06, "loss": 0.36235320568084717, "step": 11012, "token_acc": 0.8739992375142966 }, { "epoch": 0.5942373064263746, "grad_norm": 0.41236743330955505, "learning_rate": 7.461781039935116e-06, "loss": 0.3752138912677765, "step": 11013, "token_acc": 0.8683973318911123 }, { "epoch": 0.594291264231371, "grad_norm": 0.37348389625549316, "learning_rate": 7.460090747416735e-06, "loss": 0.3283606767654419, "step": 11014, "token_acc": 0.8785973735680358 }, { "epoch": 0.5943452220363675, "grad_norm": 0.4264216721057892, "learning_rate": 7.458400532466519e-06, "loss": 0.3704497814178467, "step": 11015, "token_acc": 0.8668663218077866 }, { "epoch": 0.594399179841364, "grad_norm": 0.366036593914032, "learning_rate": 7.456710395136079e-06, "loss": 0.30553480982780457, "step": 11016, "token_acc": 0.8914197875908328 }, { "epoch": 0.5944531376463605, "grad_norm": 0.4147666394710541, "learning_rate": 7.455020335477041e-06, "loss": 0.32787415385246277, "step": 11017, "token_acc": 0.8825027282648236 }, { "epoch": 0.594507095451357, "grad_norm": 0.33302903175354004, "learning_rate": 7.4533303535410175e-06, "loss": 0.3219819664955139, "step": 11018, "token_acc": 0.8836746302616609 }, { "epoch": 0.5945610532563536, "grad_norm": 0.5218167901039124, "learning_rate": 7.451640449379616e-06, "loss": 0.3935917019844055, "step": 11019, "token_acc": 0.8615357887421821 }, { "epoch": 0.5946150110613501, "grad_norm": 0.39744311571121216, "learning_rate": 7.4499506230444506e-06, "loss": 0.32904428243637085, "step": 11020, "token_acc": 0.8798496240601503 }, { "epoch": 0.5946689688663466, "grad_norm": 0.35147616267204285, "learning_rate": 7.448260874587123e-06, "loss": 0.33224356174468994, "step": 11021, "token_acc": 0.8774867374005305 }, { "epoch": 0.594722926671343, "grad_norm": 0.406922847032547, "learning_rate": 7.446571204059241e-06, "loss": 0.33691316843032837, "step": 11022, "token_acc": 0.8812075274438056 }, { "epoch": 0.5947768844763395, "grad_norm": 0.38843318819999695, "learning_rate": 7.4448816115124e-06, "loss": 0.36375319957733154, "step": 11023, "token_acc": 0.874119367273694 }, { "epoch": 0.594830842281336, "grad_norm": 0.3820764422416687, "learning_rate": 7.443192096998208e-06, "loss": 0.31913596391677856, "step": 11024, "token_acc": 0.8865967500288118 }, { "epoch": 0.5948848000863325, "grad_norm": 0.36267316341400146, "learning_rate": 7.4415026605682614e-06, "loss": 0.3383459448814392, "step": 11025, "token_acc": 0.8803519061583578 }, { "epoch": 0.594938757891329, "grad_norm": 0.40289390087127686, "learning_rate": 7.439813302274154e-06, "loss": 0.35497671365737915, "step": 11026, "token_acc": 0.870527000650618 }, { "epoch": 0.5949927156963255, "grad_norm": 0.3267146050930023, "learning_rate": 7.438124022167476e-06, "loss": 0.3153848350048065, "step": 11027, "token_acc": 0.8840992273281821 }, { "epoch": 0.595046673501322, "grad_norm": 0.4025050103664398, "learning_rate": 7.43643482029982e-06, "loss": 0.3915557265281677, "step": 11028, "token_acc": 0.8644412191582003 }, { "epoch": 0.5951006313063184, "grad_norm": 0.5946228504180908, "learning_rate": 7.434745696722771e-06, "loss": 0.38024112582206726, "step": 11029, "token_acc": 0.8699551569506726 }, { "epoch": 0.5951545891113149, "grad_norm": 0.32599323987960815, "learning_rate": 7.433056651487918e-06, "loss": 0.3509698510169983, "step": 11030, "token_acc": 0.8743676222596964 }, { "epoch": 0.5952085469163114, "grad_norm": 0.48273152112960815, "learning_rate": 7.431367684646843e-06, "loss": 0.3485996425151825, "step": 11031, "token_acc": 0.8797678275290216 }, { "epoch": 0.5952625047213079, "grad_norm": 0.3239796757698059, "learning_rate": 7.429678796251125e-06, "loss": 0.33386433124542236, "step": 11032, "token_acc": 0.881301354652454 }, { "epoch": 0.5953164625263044, "grad_norm": 0.37864118814468384, "learning_rate": 7.427989986352345e-06, "loss": 0.36581164598464966, "step": 11033, "token_acc": 0.8693227091633466 }, { "epoch": 0.5953704203313009, "grad_norm": 0.3935263156890869, "learning_rate": 7.4263012550020764e-06, "loss": 0.34313589334487915, "step": 11034, "token_acc": 0.8737593052109182 }, { "epoch": 0.5954243781362974, "grad_norm": 0.4229734241962433, "learning_rate": 7.424612602251893e-06, "loss": 0.3647165298461914, "step": 11035, "token_acc": 0.8702923686818632 }, { "epoch": 0.595478335941294, "grad_norm": 0.4194849133491516, "learning_rate": 7.422924028153363e-06, "loss": 0.3938048779964447, "step": 11036, "token_acc": 0.8670520231213873 }, { "epoch": 0.5955322937462904, "grad_norm": 0.3606378138065338, "learning_rate": 7.421235532758062e-06, "loss": 0.379777193069458, "step": 11037, "token_acc": 0.8698064516129033 }, { "epoch": 0.5955862515512869, "grad_norm": 0.5126635432243347, "learning_rate": 7.419547116117551e-06, "loss": 0.45248475670814514, "step": 11038, "token_acc": 0.8477466504263094 }, { "epoch": 0.5956402093562834, "grad_norm": 0.3290789723396301, "learning_rate": 7.417858778283395e-06, "loss": 0.3344828486442566, "step": 11039, "token_acc": 0.8811648079306071 }, { "epoch": 0.5956941671612799, "grad_norm": 0.4135064482688904, "learning_rate": 7.416170519307157e-06, "loss": 0.3605791926383972, "step": 11040, "token_acc": 0.8756392122728757 }, { "epoch": 0.5957481249662764, "grad_norm": 0.38522836565971375, "learning_rate": 7.414482339240392e-06, "loss": 0.31297242641448975, "step": 11041, "token_acc": 0.8859484148307362 }, { "epoch": 0.5958020827712729, "grad_norm": 0.42629867792129517, "learning_rate": 7.412794238134657e-06, "loss": 0.42039018869400024, "step": 11042, "token_acc": 0.8546176508261817 }, { "epoch": 0.5958560405762694, "grad_norm": 0.39712703227996826, "learning_rate": 7.411106216041511e-06, "loss": 0.3081175684928894, "step": 11043, "token_acc": 0.8875132649451716 }, { "epoch": 0.5959099983812659, "grad_norm": 0.32288286089897156, "learning_rate": 7.409418273012507e-06, "loss": 0.35057854652404785, "step": 11044, "token_acc": 0.8769650513186956 }, { "epoch": 0.5959639561862623, "grad_norm": 0.3562159836292267, "learning_rate": 7.407730409099187e-06, "loss": 0.33718833327293396, "step": 11045, "token_acc": 0.8771420674405749 }, { "epoch": 0.5960179139912588, "grad_norm": 0.4131040871143341, "learning_rate": 7.4060426243531004e-06, "loss": 0.32374244928359985, "step": 11046, "token_acc": 0.8882667662062959 }, { "epoch": 0.5960718717962553, "grad_norm": 0.30867642164230347, "learning_rate": 7.4043549188257935e-06, "loss": 0.3345114588737488, "step": 11047, "token_acc": 0.882799719560645 }, { "epoch": 0.5961258296012518, "grad_norm": 0.43907904624938965, "learning_rate": 7.402667292568803e-06, "loss": 0.3804924488067627, "step": 11048, "token_acc": 0.8686189790366459 }, { "epoch": 0.5961797874062483, "grad_norm": 0.405730664730072, "learning_rate": 7.400979745633676e-06, "loss": 0.36311137676239014, "step": 11049, "token_acc": 0.8704798726404367 }, { "epoch": 0.5962337452112448, "grad_norm": 0.5139896869659424, "learning_rate": 7.399292278071949e-06, "loss": 0.37995997071266174, "step": 11050, "token_acc": 0.8648751667619592 }, { "epoch": 0.5962877030162413, "grad_norm": 0.30778542160987854, "learning_rate": 7.397604889935153e-06, "loss": 0.3779624104499817, "step": 11051, "token_acc": 0.8685721265420789 }, { "epoch": 0.5963416608212377, "grad_norm": 0.4501536786556244, "learning_rate": 7.395917581274823e-06, "loss": 0.3252449929714203, "step": 11052, "token_acc": 0.8839957035445757 }, { "epoch": 0.5963956186262342, "grad_norm": 0.396465003490448, "learning_rate": 7.3942303521424865e-06, "loss": 0.3711852431297302, "step": 11053, "token_acc": 0.8719536292978527 }, { "epoch": 0.5964495764312308, "grad_norm": 0.34914571046829224, "learning_rate": 7.3925432025896744e-06, "loss": 0.37837839126586914, "step": 11054, "token_acc": 0.8689608636977058 }, { "epoch": 0.5965035342362273, "grad_norm": 0.4579779803752899, "learning_rate": 7.390856132667905e-06, "loss": 0.36499202251434326, "step": 11055, "token_acc": 0.8706215531375764 }, { "epoch": 0.5965574920412238, "grad_norm": 0.3449585735797882, "learning_rate": 7.389169142428709e-06, "loss": 0.3818262219429016, "step": 11056, "token_acc": 0.8618223760092272 }, { "epoch": 0.5966114498462203, "grad_norm": 0.4486021399497986, "learning_rate": 7.387482231923605e-06, "loss": 0.28371506929397583, "step": 11057, "token_acc": 0.892555266579974 }, { "epoch": 0.5966654076512168, "grad_norm": 0.5160385966300964, "learning_rate": 7.385795401204108e-06, "loss": 0.3832181692123413, "step": 11058, "token_acc": 0.8643067846607669 }, { "epoch": 0.5967193654562133, "grad_norm": 0.3638715147972107, "learning_rate": 7.384108650321735e-06, "loss": 0.34507274627685547, "step": 11059, "token_acc": 0.8767013610888711 }, { "epoch": 0.5967733232612097, "grad_norm": 0.3841726779937744, "learning_rate": 7.382421979327998e-06, "loss": 0.3737967014312744, "step": 11060, "token_acc": 0.8743922204213939 }, { "epoch": 0.5968272810662062, "grad_norm": 0.3621448874473572, "learning_rate": 7.380735388274406e-06, "loss": 0.36165308952331543, "step": 11061, "token_acc": 0.8727087576374746 }, { "epoch": 0.5968812388712027, "grad_norm": 0.26779791712760925, "learning_rate": 7.379048877212472e-06, "loss": 0.30966120958328247, "step": 11062, "token_acc": 0.8868428075793968 }, { "epoch": 0.5969351966761992, "grad_norm": 0.4293486773967743, "learning_rate": 7.377362446193698e-06, "loss": 0.3423495590686798, "step": 11063, "token_acc": 0.8780742286480847 }, { "epoch": 0.5969891544811957, "grad_norm": 0.2892945110797882, "learning_rate": 7.375676095269588e-06, "loss": 0.37005212903022766, "step": 11064, "token_acc": 0.8678809957498482 }, { "epoch": 0.5970431122861922, "grad_norm": 0.42890116572380066, "learning_rate": 7.373989824491642e-06, "loss": 0.3885829746723175, "step": 11065, "token_acc": 0.861710383361788 }, { "epoch": 0.5970970700911887, "grad_norm": 0.25545305013656616, "learning_rate": 7.37230363391136e-06, "loss": 0.28881916403770447, "step": 11066, "token_acc": 0.894804436660829 }, { "epoch": 0.5971510278961852, "grad_norm": 0.4354456663131714, "learning_rate": 7.370617523580235e-06, "loss": 0.35754674673080444, "step": 11067, "token_acc": 0.86862635831708 }, { "epoch": 0.5972049857011816, "grad_norm": 0.4378044605255127, "learning_rate": 7.368931493549761e-06, "loss": 0.36166098713874817, "step": 11068, "token_acc": 0.8725797266514806 }, { "epoch": 0.5972589435061781, "grad_norm": 0.35933512449264526, "learning_rate": 7.367245543871433e-06, "loss": 0.31948769092559814, "step": 11069, "token_acc": 0.88282605267308 }, { "epoch": 0.5973129013111746, "grad_norm": 0.37390586733818054, "learning_rate": 7.365559674596734e-06, "loss": 0.3702228367328644, "step": 11070, "token_acc": 0.8703333845444768 }, { "epoch": 0.5973668591161712, "grad_norm": 0.40338143706321716, "learning_rate": 7.363873885777155e-06, "loss": 0.3730488121509552, "step": 11071, "token_acc": 0.8700365903238921 }, { "epoch": 0.5974208169211677, "grad_norm": 0.3685426115989685, "learning_rate": 7.362188177464176e-06, "loss": 0.3754429221153259, "step": 11072, "token_acc": 0.873504743572116 }, { "epoch": 0.5974747747261642, "grad_norm": 0.44900280237197876, "learning_rate": 7.360502549709278e-06, "loss": 0.3106221556663513, "step": 11073, "token_acc": 0.8856819468024901 }, { "epoch": 0.5975287325311607, "grad_norm": 0.3107691705226898, "learning_rate": 7.358817002563937e-06, "loss": 0.3178720772266388, "step": 11074, "token_acc": 0.8846863468634686 }, { "epoch": 0.5975826903361571, "grad_norm": 0.259671688079834, "learning_rate": 7.357131536079638e-06, "loss": 0.3225456774234772, "step": 11075, "token_acc": 0.8867219917012448 }, { "epoch": 0.5976366481411536, "grad_norm": 0.3493615984916687, "learning_rate": 7.355446150307848e-06, "loss": 0.37901732325553894, "step": 11076, "token_acc": 0.8645169200588524 }, { "epoch": 0.5976906059461501, "grad_norm": 0.4109503924846649, "learning_rate": 7.353760845300041e-06, "loss": 0.33967339992523193, "step": 11077, "token_acc": 0.8795344806239941 }, { "epoch": 0.5977445637511466, "grad_norm": 0.39655831456184387, "learning_rate": 7.352075621107686e-06, "loss": 0.31564754247665405, "step": 11078, "token_acc": 0.8879638775222238 }, { "epoch": 0.5977985215561431, "grad_norm": 0.4450758695602417, "learning_rate": 7.350390477782247e-06, "loss": 0.4089651107788086, "step": 11079, "token_acc": 0.8573105298457411 }, { "epoch": 0.5978524793611396, "grad_norm": 0.3478229343891144, "learning_rate": 7.348705415375188e-06, "loss": 0.40498363971710205, "step": 11080, "token_acc": 0.8603557085484796 }, { "epoch": 0.5979064371661361, "grad_norm": 0.4389042854309082, "learning_rate": 7.347020433937966e-06, "loss": 0.36404967308044434, "step": 11081, "token_acc": 0.8655811536462844 }, { "epoch": 0.5979603949711326, "grad_norm": 0.44741398096084595, "learning_rate": 7.345335533522049e-06, "loss": 0.3668650984764099, "step": 11082, "token_acc": 0.8732010890704006 }, { "epoch": 0.598014352776129, "grad_norm": 0.38695812225341797, "learning_rate": 7.343650714178892e-06, "loss": 0.3361435532569885, "step": 11083, "token_acc": 0.8784514659834899 }, { "epoch": 0.5980683105811255, "grad_norm": 0.3389340937137604, "learning_rate": 7.341965975959943e-06, "loss": 0.3201363980770111, "step": 11084, "token_acc": 0.8814765767257077 }, { "epoch": 0.598122268386122, "grad_norm": 0.47973760962486267, "learning_rate": 7.340281318916659e-06, "loss": 0.3769068121910095, "step": 11085, "token_acc": 0.8698860901652494 }, { "epoch": 0.5981762261911185, "grad_norm": 0.360971063375473, "learning_rate": 7.338596743100485e-06, "loss": 0.36202317476272583, "step": 11086, "token_acc": 0.8705720630221 }, { "epoch": 0.598230183996115, "grad_norm": 0.3632356822490692, "learning_rate": 7.336912248562868e-06, "loss": 0.3347458243370056, "step": 11087, "token_acc": 0.8806121300776415 }, { "epoch": 0.5982841418011116, "grad_norm": 0.4540331959724426, "learning_rate": 7.335227835355257e-06, "loss": 0.32438385486602783, "step": 11088, "token_acc": 0.8840249278005776 }, { "epoch": 0.5983380996061081, "grad_norm": 0.3587741553783417, "learning_rate": 7.333543503529088e-06, "loss": 0.3427656292915344, "step": 11089, "token_acc": 0.8817048068773459 }, { "epoch": 0.5983920574111045, "grad_norm": 0.4618873596191406, "learning_rate": 7.331859253135803e-06, "loss": 0.3832697868347168, "step": 11090, "token_acc": 0.8709013582110029 }, { "epoch": 0.598446015216101, "grad_norm": 0.3834807276725769, "learning_rate": 7.330175084226836e-06, "loss": 0.2919189929962158, "step": 11091, "token_acc": 0.8957792651277148 }, { "epoch": 0.5984999730210975, "grad_norm": 0.4249337911605835, "learning_rate": 7.328490996853623e-06, "loss": 0.3832549452781677, "step": 11092, "token_acc": 0.8653820056995521 }, { "epoch": 0.598553930826094, "grad_norm": 0.3901499807834625, "learning_rate": 7.326806991067596e-06, "loss": 0.3346253037452698, "step": 11093, "token_acc": 0.8827905973689886 }, { "epoch": 0.5986078886310905, "grad_norm": 0.34149405360221863, "learning_rate": 7.325123066920181e-06, "loss": 0.31798863410949707, "step": 11094, "token_acc": 0.8853544324237042 }, { "epoch": 0.598661846436087, "grad_norm": 0.41215386986732483, "learning_rate": 7.323439224462809e-06, "loss": 0.33537399768829346, "step": 11095, "token_acc": 0.8856924254016832 }, { "epoch": 0.5987158042410835, "grad_norm": 0.37281501293182373, "learning_rate": 7.3217554637469e-06, "loss": 0.30520009994506836, "step": 11096, "token_acc": 0.8868953638581886 }, { "epoch": 0.59876976204608, "grad_norm": 0.31376659870147705, "learning_rate": 7.320071784823879e-06, "loss": 0.32477617263793945, "step": 11097, "token_acc": 0.8818464144551101 }, { "epoch": 0.5988237198510764, "grad_norm": 0.45288312435150146, "learning_rate": 7.318388187745164e-06, "loss": 0.3931901156902313, "step": 11098, "token_acc": 0.8652641002685766 }, { "epoch": 0.5988776776560729, "grad_norm": 0.38808560371398926, "learning_rate": 7.316704672562171e-06, "loss": 0.3496520519256592, "step": 11099, "token_acc": 0.8793156125878399 }, { "epoch": 0.5989316354610694, "grad_norm": 0.4433937072753906, "learning_rate": 7.31502123932631e-06, "loss": 0.34521546959877014, "step": 11100, "token_acc": 0.8789594284944062 }, { "epoch": 0.5989855932660659, "grad_norm": 0.39255237579345703, "learning_rate": 7.3133378880890004e-06, "loss": 0.36133718490600586, "step": 11101, "token_acc": 0.8734132162771384 }, { "epoch": 0.5990395510710624, "grad_norm": 0.33679214119911194, "learning_rate": 7.3116546189016515e-06, "loss": 0.37212318181991577, "step": 11102, "token_acc": 0.8701298701298701 }, { "epoch": 0.5990935088760589, "grad_norm": 0.35716962814331055, "learning_rate": 7.309971431815664e-06, "loss": 0.3668910562992096, "step": 11103, "token_acc": 0.8727746417716022 }, { "epoch": 0.5991474666810555, "grad_norm": 0.45732223987579346, "learning_rate": 7.308288326882443e-06, "loss": 0.3183634579181671, "step": 11104, "token_acc": 0.8841432655120228 }, { "epoch": 0.599201424486052, "grad_norm": 0.34461623430252075, "learning_rate": 7.306605304153393e-06, "loss": 0.3762865364551544, "step": 11105, "token_acc": 0.8687682546077148 }, { "epoch": 0.5992553822910484, "grad_norm": 0.4088365137577057, "learning_rate": 7.304922363679905e-06, "loss": 0.353152871131897, "step": 11106, "token_acc": 0.8757922828866896 }, { "epoch": 0.5993093400960449, "grad_norm": 0.408852219581604, "learning_rate": 7.303239505513389e-06, "loss": 0.4144825041294098, "step": 11107, "token_acc": 0.8537729145942874 }, { "epoch": 0.5993632979010414, "grad_norm": 0.4028339684009552, "learning_rate": 7.30155672970523e-06, "loss": 0.3826146125793457, "step": 11108, "token_acc": 0.8669016448472642 }, { "epoch": 0.5994172557060379, "grad_norm": 0.37222376465797424, "learning_rate": 7.299874036306822e-06, "loss": 0.32752832770347595, "step": 11109, "token_acc": 0.8860500766479305 }, { "epoch": 0.5994712135110344, "grad_norm": 0.396832138299942, "learning_rate": 7.298191425369554e-06, "loss": 0.3922894597053528, "step": 11110, "token_acc": 0.8667934093789607 }, { "epoch": 0.5995251713160309, "grad_norm": 0.37548062205314636, "learning_rate": 7.2965088969448115e-06, "loss": 0.36340439319610596, "step": 11111, "token_acc": 0.8730861244019139 }, { "epoch": 0.5995791291210274, "grad_norm": 0.3377193212509155, "learning_rate": 7.2948264510839794e-06, "loss": 0.3980479836463928, "step": 11112, "token_acc": 0.8628351539225422 }, { "epoch": 0.5996330869260238, "grad_norm": 0.4400275945663452, "learning_rate": 7.293144087838437e-06, "loss": 0.34335005283355713, "step": 11113, "token_acc": 0.8759615384615385 }, { "epoch": 0.5996870447310203, "grad_norm": 0.4391908049583435, "learning_rate": 7.291461807259567e-06, "loss": 0.34609177708625793, "step": 11114, "token_acc": 0.8799538838449344 }, { "epoch": 0.5997410025360168, "grad_norm": 0.3643571734428406, "learning_rate": 7.289779609398744e-06, "loss": 0.32674047350883484, "step": 11115, "token_acc": 0.8825719604754404 }, { "epoch": 0.5997949603410133, "grad_norm": 0.3156050443649292, "learning_rate": 7.288097494307341e-06, "loss": 0.39509764313697815, "step": 11116, "token_acc": 0.8624884305169906 }, { "epoch": 0.5998489181460098, "grad_norm": 0.36060401797294617, "learning_rate": 7.286415462036731e-06, "loss": 0.34184736013412476, "step": 11117, "token_acc": 0.876669947967937 }, { "epoch": 0.5999028759510063, "grad_norm": 0.35862457752227783, "learning_rate": 7.284733512638281e-06, "loss": 0.3846873641014099, "step": 11118, "token_acc": 0.8686184369707235 }, { "epoch": 0.5999568337560028, "grad_norm": 0.3974241614341736, "learning_rate": 7.283051646163357e-06, "loss": 0.34078478813171387, "step": 11119, "token_acc": 0.8807145871430581 }, { "epoch": 0.6000107915609993, "grad_norm": 0.3887436091899872, "learning_rate": 7.2813698626633255e-06, "loss": 0.33377137780189514, "step": 11120, "token_acc": 0.8844248024641757 }, { "epoch": 0.6000647493659957, "grad_norm": 0.3657439649105072, "learning_rate": 7.2796881621895465e-06, "loss": 0.36880362033843994, "step": 11121, "token_acc": 0.8736634777715251 }, { "epoch": 0.6001187071709922, "grad_norm": 0.45906713604927063, "learning_rate": 7.278006544793379e-06, "loss": 0.28978395462036133, "step": 11122, "token_acc": 0.8908529260339797 }, { "epoch": 0.6001726649759888, "grad_norm": 0.3890622556209564, "learning_rate": 7.276325010526178e-06, "loss": 0.38058650493621826, "step": 11123, "token_acc": 0.8668880940214614 }, { "epoch": 0.6002266227809853, "grad_norm": 0.37279990315437317, "learning_rate": 7.2746435594392975e-06, "loss": 0.34974485635757446, "step": 11124, "token_acc": 0.8801498127340824 }, { "epoch": 0.6002805805859818, "grad_norm": 0.47898462414741516, "learning_rate": 7.272962191584089e-06, "loss": 0.3711521327495575, "step": 11125, "token_acc": 0.8681349316977977 }, { "epoch": 0.6003345383909783, "grad_norm": 0.3909126818180084, "learning_rate": 7.271280907011897e-06, "loss": 0.3233642578125, "step": 11126, "token_acc": 0.8832518337408313 }, { "epoch": 0.6003884961959748, "grad_norm": 0.48351195454597473, "learning_rate": 7.269599705774075e-06, "loss": 0.4194563031196594, "step": 11127, "token_acc": 0.8596491228070176 }, { "epoch": 0.6004424540009713, "grad_norm": 0.4603504240512848, "learning_rate": 7.267918587921964e-06, "loss": 0.3770412802696228, "step": 11128, "token_acc": 0.8651520703554415 }, { "epoch": 0.6004964118059677, "grad_norm": 0.4610532224178314, "learning_rate": 7.266237553506902e-06, "loss": 0.3581241965293884, "step": 11129, "token_acc": 0.8756024096385542 }, { "epoch": 0.6005503696109642, "grad_norm": 0.359541654586792, "learning_rate": 7.264556602580228e-06, "loss": 0.3256590664386749, "step": 11130, "token_acc": 0.8888888888888888 }, { "epoch": 0.6006043274159607, "grad_norm": 0.40374699234962463, "learning_rate": 7.262875735193281e-06, "loss": 0.4294373393058777, "step": 11131, "token_acc": 0.8502002670226969 }, { "epoch": 0.6006582852209572, "grad_norm": 0.32976841926574707, "learning_rate": 7.261194951397386e-06, "loss": 0.3731761872768402, "step": 11132, "token_acc": 0.8689961302071477 }, { "epoch": 0.6007122430259537, "grad_norm": 0.4036150574684143, "learning_rate": 7.259514251243885e-06, "loss": 0.4162321090698242, "step": 11133, "token_acc": 0.8634239846651491 }, { "epoch": 0.6007662008309502, "grad_norm": 0.4721033573150635, "learning_rate": 7.257833634784102e-06, "loss": 0.34045761823654175, "step": 11134, "token_acc": 0.8748623853211009 }, { "epoch": 0.6008201586359467, "grad_norm": 0.4747481644153595, "learning_rate": 7.256153102069359e-06, "loss": 0.40811264514923096, "step": 11135, "token_acc": 0.8622908622908623 }, { "epoch": 0.6008741164409431, "grad_norm": 0.4249733090400696, "learning_rate": 7.254472653150985e-06, "loss": 0.3894347548484802, "step": 11136, "token_acc": 0.8746824724809483 }, { "epoch": 0.6009280742459396, "grad_norm": 0.46673983335494995, "learning_rate": 7.252792288080295e-06, "loss": 0.3685828745365143, "step": 11137, "token_acc": 0.8678125 }, { "epoch": 0.6009820320509361, "grad_norm": 0.47450244426727295, "learning_rate": 7.251112006908607e-06, "loss": 0.2947924733161926, "step": 11138, "token_acc": 0.8919457013574661 }, { "epoch": 0.6010359898559327, "grad_norm": 0.3436620235443115, "learning_rate": 7.249431809687235e-06, "loss": 0.3144550323486328, "step": 11139, "token_acc": 0.8869448183041723 }, { "epoch": 0.6010899476609292, "grad_norm": 0.4192109704017639, "learning_rate": 7.2477516964675e-06, "loss": 0.3494820296764374, "step": 11140, "token_acc": 0.8772777471010491 }, { "epoch": 0.6011439054659257, "grad_norm": 0.37208884954452515, "learning_rate": 7.246071667300707e-06, "loss": 0.3505365252494812, "step": 11141, "token_acc": 0.8730263157894737 }, { "epoch": 0.6011978632709222, "grad_norm": 0.375667542219162, "learning_rate": 7.2443917222381635e-06, "loss": 0.3832038640975952, "step": 11142, "token_acc": 0.8675889328063241 }, { "epoch": 0.6012518210759187, "grad_norm": 0.39537644386291504, "learning_rate": 7.242711861331175e-06, "loss": 0.3690977096557617, "step": 11143, "token_acc": 0.8711692084241104 }, { "epoch": 0.6013057788809151, "grad_norm": 0.454420804977417, "learning_rate": 7.2410320846310435e-06, "loss": 0.3767499029636383, "step": 11144, "token_acc": 0.8663540445486518 }, { "epoch": 0.6013597366859116, "grad_norm": 0.32638517022132874, "learning_rate": 7.239352392189068e-06, "loss": 0.4131738543510437, "step": 11145, "token_acc": 0.8580681183696259 }, { "epoch": 0.6014136944909081, "grad_norm": 0.29817724227905273, "learning_rate": 7.2376727840565485e-06, "loss": 0.3262549340724945, "step": 11146, "token_acc": 0.8797790994052677 }, { "epoch": 0.6014676522959046, "grad_norm": 0.42111387848854065, "learning_rate": 7.23599326028478e-06, "loss": 0.36785995960235596, "step": 11147, "token_acc": 0.8701971266288006 }, { "epoch": 0.6015216101009011, "grad_norm": 0.30055132508277893, "learning_rate": 7.2343138209250515e-06, "loss": 0.3127540647983551, "step": 11148, "token_acc": 0.8864327485380117 }, { "epoch": 0.6015755679058976, "grad_norm": 0.3880367875099182, "learning_rate": 7.2326344660286554e-06, "loss": 0.3371855616569519, "step": 11149, "token_acc": 0.8817250829366796 }, { "epoch": 0.6016295257108941, "grad_norm": 0.531553328037262, "learning_rate": 7.230955195646878e-06, "loss": 0.3710622191429138, "step": 11150, "token_acc": 0.8675569176882661 }, { "epoch": 0.6016834835158906, "grad_norm": 0.44017377495765686, "learning_rate": 7.229276009831001e-06, "loss": 0.34311044216156006, "step": 11151, "token_acc": 0.8775635139271503 }, { "epoch": 0.601737441320887, "grad_norm": 0.44656723737716675, "learning_rate": 7.22759690863231e-06, "loss": 0.38576915860176086, "step": 11152, "token_acc": 0.8666091706195199 }, { "epoch": 0.6017913991258835, "grad_norm": 0.31385669112205505, "learning_rate": 7.225917892102084e-06, "loss": 0.4015907645225525, "step": 11153, "token_acc": 0.862223646267222 }, { "epoch": 0.60184535693088, "grad_norm": 0.38086071610450745, "learning_rate": 7.2242389602916e-06, "loss": 0.322316437959671, "step": 11154, "token_acc": 0.8840788840788841 }, { "epoch": 0.6018993147358765, "grad_norm": 0.4272805452346802, "learning_rate": 7.222560113252129e-06, "loss": 0.3527034819126129, "step": 11155, "token_acc": 0.8742959549411162 }, { "epoch": 0.601953272540873, "grad_norm": 0.4964854121208191, "learning_rate": 7.2208813510349464e-06, "loss": 0.3898884057998657, "step": 11156, "token_acc": 0.8670320747056435 }, { "epoch": 0.6020072303458696, "grad_norm": 0.3897312879562378, "learning_rate": 7.219202673691318e-06, "loss": 0.3398268222808838, "step": 11157, "token_acc": 0.8814914948743945 }, { "epoch": 0.6020611881508661, "grad_norm": 0.4369283616542816, "learning_rate": 7.217524081272508e-06, "loss": 0.2958250045776367, "step": 11158, "token_acc": 0.8913834951456311 }, { "epoch": 0.6021151459558625, "grad_norm": 0.554921567440033, "learning_rate": 7.215845573829787e-06, "loss": 0.38978904485702515, "step": 11159, "token_acc": 0.8645404663923183 }, { "epoch": 0.602169103760859, "grad_norm": 0.47201457619667053, "learning_rate": 7.214167151414415e-06, "loss": 0.2908080220222473, "step": 11160, "token_acc": 0.8901748337847821 }, { "epoch": 0.6022230615658555, "grad_norm": 0.5305390357971191, "learning_rate": 7.212488814077647e-06, "loss": 0.40516191720962524, "step": 11161, "token_acc": 0.8616407206576876 }, { "epoch": 0.602277019370852, "grad_norm": 0.36621126532554626, "learning_rate": 7.21081056187074e-06, "loss": 0.348513126373291, "step": 11162, "token_acc": 0.8758726777896106 }, { "epoch": 0.6023309771758485, "grad_norm": 0.3160155713558197, "learning_rate": 7.209132394844949e-06, "loss": 0.35999947786331177, "step": 11163, "token_acc": 0.8784627919608908 }, { "epoch": 0.602384934980845, "grad_norm": 0.4740918278694153, "learning_rate": 7.207454313051518e-06, "loss": 0.34086012840270996, "step": 11164, "token_acc": 0.8772802653399668 }, { "epoch": 0.6024388927858415, "grad_norm": 0.3991672992706299, "learning_rate": 7.205776316541705e-06, "loss": 0.3510627746582031, "step": 11165, "token_acc": 0.8771900550642416 }, { "epoch": 0.602492850590838, "grad_norm": 0.4758443534374237, "learning_rate": 7.20409840536675e-06, "loss": 0.34862738847732544, "step": 11166, "token_acc": 0.8757714828295616 }, { "epoch": 0.6025468083958344, "grad_norm": 0.3837212026119232, "learning_rate": 7.2024205795779e-06, "loss": 0.3707684278488159, "step": 11167, "token_acc": 0.8693596905887409 }, { "epoch": 0.6026007662008309, "grad_norm": 0.3580017685890198, "learning_rate": 7.200742839226391e-06, "loss": 0.41226881742477417, "step": 11168, "token_acc": 0.8556050482553823 }, { "epoch": 0.6026547240058274, "grad_norm": 0.3887118101119995, "learning_rate": 7.199065184363463e-06, "loss": 0.3687211275100708, "step": 11169, "token_acc": 0.8716429699842022 }, { "epoch": 0.6027086818108239, "grad_norm": 0.3817136585712433, "learning_rate": 7.197387615040351e-06, "loss": 0.32851964235305786, "step": 11170, "token_acc": 0.8860597439544808 }, { "epoch": 0.6027626396158204, "grad_norm": 0.4404096007347107, "learning_rate": 7.195710131308284e-06, "loss": 0.38005226850509644, "step": 11171, "token_acc": 0.8716239915819011 }, { "epoch": 0.602816597420817, "grad_norm": 0.3581578731536865, "learning_rate": 7.1940327332184965e-06, "loss": 0.362671434879303, "step": 11172, "token_acc": 0.8745154502159708 }, { "epoch": 0.6028705552258135, "grad_norm": 0.45718613266944885, "learning_rate": 7.192355420822217e-06, "loss": 0.3255423307418823, "step": 11173, "token_acc": 0.8816556548775748 }, { "epoch": 0.60292451303081, "grad_norm": 0.4546344578266144, "learning_rate": 7.1906781941706635e-06, "loss": 0.41930127143859863, "step": 11174, "token_acc": 0.8529021998409754 }, { "epoch": 0.6029784708358064, "grad_norm": 0.4127902388572693, "learning_rate": 7.189001053315065e-06, "loss": 0.3063512444496155, "step": 11175, "token_acc": 0.8895427795382526 }, { "epoch": 0.6030324286408029, "grad_norm": 0.38670286536216736, "learning_rate": 7.187323998306637e-06, "loss": 0.35228729248046875, "step": 11176, "token_acc": 0.8769251372706576 }, { "epoch": 0.6030863864457994, "grad_norm": 0.4173807203769684, "learning_rate": 7.185647029196595e-06, "loss": 0.33188340067863464, "step": 11177, "token_acc": 0.8767301038062284 }, { "epoch": 0.6031403442507959, "grad_norm": 0.40028250217437744, "learning_rate": 7.183970146036159e-06, "loss": 0.3640230596065521, "step": 11178, "token_acc": 0.8720861611094718 }, { "epoch": 0.6031943020557924, "grad_norm": 0.2889236807823181, "learning_rate": 7.182293348876534e-06, "loss": 0.2856909930706024, "step": 11179, "token_acc": 0.8990056307655445 }, { "epoch": 0.6032482598607889, "grad_norm": 0.40233033895492554, "learning_rate": 7.180616637768934e-06, "loss": 0.36214694380760193, "step": 11180, "token_acc": 0.8707404412614069 }, { "epoch": 0.6033022176657854, "grad_norm": 0.5158228278160095, "learning_rate": 7.178940012764564e-06, "loss": 0.37663203477859497, "step": 11181, "token_acc": 0.8663454675231977 }, { "epoch": 0.6033561754707818, "grad_norm": 0.3758653402328491, "learning_rate": 7.177263473914626e-06, "loss": 0.36291295289993286, "step": 11182, "token_acc": 0.8732948952327381 }, { "epoch": 0.6034101332757783, "grad_norm": 0.4100080132484436, "learning_rate": 7.175587021270323e-06, "loss": 0.3364855647087097, "step": 11183, "token_acc": 0.8797373918233363 }, { "epoch": 0.6034640910807748, "grad_norm": 0.46548131108283997, "learning_rate": 7.173910654882849e-06, "loss": 0.3393813967704773, "step": 11184, "token_acc": 0.8760683760683761 }, { "epoch": 0.6035180488857713, "grad_norm": 0.3809873163700104, "learning_rate": 7.172234374803405e-06, "loss": 0.38271963596343994, "step": 11185, "token_acc": 0.8693750881647623 }, { "epoch": 0.6035720066907678, "grad_norm": 0.3643328547477722, "learning_rate": 7.170558181083184e-06, "loss": 0.363125741481781, "step": 11186, "token_acc": 0.8747113865597278 }, { "epoch": 0.6036259644957643, "grad_norm": 0.3937472403049469, "learning_rate": 7.168882073773373e-06, "loss": 0.3308827579021454, "step": 11187, "token_acc": 0.881126173096976 }, { "epoch": 0.6036799223007608, "grad_norm": 0.5289247632026672, "learning_rate": 7.167206052925164e-06, "loss": 0.4063161015510559, "step": 11188, "token_acc": 0.8626235797550539 }, { "epoch": 0.6037338801057573, "grad_norm": 0.37575769424438477, "learning_rate": 7.165530118589739e-06, "loss": 0.318712055683136, "step": 11189, "token_acc": 0.8810614224137931 }, { "epoch": 0.6037878379107537, "grad_norm": 0.47863903641700745, "learning_rate": 7.1638542708182775e-06, "loss": 0.39288270473480225, "step": 11190, "token_acc": 0.8624979538385988 }, { "epoch": 0.6038417957157503, "grad_norm": 0.5008224248886108, "learning_rate": 7.162178509661968e-06, "loss": 0.3654005229473114, "step": 11191, "token_acc": 0.8734081261370528 }, { "epoch": 0.6038957535207468, "grad_norm": 0.44927504658699036, "learning_rate": 7.160502835171983e-06, "loss": 0.3749374747276306, "step": 11192, "token_acc": 0.8712574850299402 }, { "epoch": 0.6039497113257433, "grad_norm": 0.33996817469596863, "learning_rate": 7.158827247399498e-06, "loss": 0.32400569319725037, "step": 11193, "token_acc": 0.8823306370070778 }, { "epoch": 0.6040036691307398, "grad_norm": 0.4520423412322998, "learning_rate": 7.157151746395684e-06, "loss": 0.4185066819190979, "step": 11194, "token_acc": 0.8534533648170012 }, { "epoch": 0.6040576269357363, "grad_norm": 0.3851757347583771, "learning_rate": 7.155476332211714e-06, "loss": 0.3637685775756836, "step": 11195, "token_acc": 0.8722868826675055 }, { "epoch": 0.6041115847407328, "grad_norm": 0.374075710773468, "learning_rate": 7.153801004898748e-06, "loss": 0.30533432960510254, "step": 11196, "token_acc": 0.8862727676588184 }, { "epoch": 0.6041655425457293, "grad_norm": 0.2816014885902405, "learning_rate": 7.152125764507951e-06, "loss": 0.2793939709663391, "step": 11197, "token_acc": 0.8960573476702509 }, { "epoch": 0.6042195003507257, "grad_norm": 0.3915894329547882, "learning_rate": 7.150450611090491e-06, "loss": 0.31770408153533936, "step": 11198, "token_acc": 0.8815477021930437 }, { "epoch": 0.6042734581557222, "grad_norm": 0.4787982404232025, "learning_rate": 7.148775544697523e-06, "loss": 0.32502278685569763, "step": 11199, "token_acc": 0.8737076829515508 }, { "epoch": 0.6043274159607187, "grad_norm": 0.4333134889602661, "learning_rate": 7.1471005653802026e-06, "loss": 0.35595035552978516, "step": 11200, "token_acc": 0.8727293763739442 }, { "epoch": 0.6043813737657152, "grad_norm": 0.4625411629676819, "learning_rate": 7.145425673189683e-06, "loss": 0.4010511636734009, "step": 11201, "token_acc": 0.859481582537517 }, { "epoch": 0.6044353315707117, "grad_norm": 0.40820735692977905, "learning_rate": 7.1437508681771165e-06, "loss": 0.35049137473106384, "step": 11202, "token_acc": 0.8749701171408081 }, { "epoch": 0.6044892893757082, "grad_norm": 0.37355005741119385, "learning_rate": 7.142076150393647e-06, "loss": 0.3331679105758667, "step": 11203, "token_acc": 0.8760818253343824 }, { "epoch": 0.6045432471807047, "grad_norm": 0.4353173077106476, "learning_rate": 7.140401519890428e-06, "loss": 0.33180010318756104, "step": 11204, "token_acc": 0.8830599547511312 }, { "epoch": 0.6045972049857011, "grad_norm": 0.4877678453922272, "learning_rate": 7.138726976718594e-06, "loss": 0.38470667600631714, "step": 11205, "token_acc": 0.8657354149548069 }, { "epoch": 0.6046511627906976, "grad_norm": 0.41910821199417114, "learning_rate": 7.13705252092929e-06, "loss": 0.37112534046173096, "step": 11206, "token_acc": 0.8681958233481685 }, { "epoch": 0.6047051205956941, "grad_norm": 0.4412980377674103, "learning_rate": 7.135378152573653e-06, "loss": 0.31754612922668457, "step": 11207, "token_acc": 0.8885240518798226 }, { "epoch": 0.6047590784006907, "grad_norm": 0.34999170899391174, "learning_rate": 7.133703871702815e-06, "loss": 0.31437814235687256, "step": 11208, "token_acc": 0.8915077579059576 }, { "epoch": 0.6048130362056872, "grad_norm": 0.47146350145339966, "learning_rate": 7.132029678367909e-06, "loss": 0.37862569093704224, "step": 11209, "token_acc": 0.8671242520239353 }, { "epoch": 0.6048669940106837, "grad_norm": 0.406439870595932, "learning_rate": 7.130355572620067e-06, "loss": 0.3703550696372986, "step": 11210, "token_acc": 0.8718260605139121 }, { "epoch": 0.6049209518156802, "grad_norm": 0.44456765055656433, "learning_rate": 7.128681554510415e-06, "loss": 0.41997045278549194, "step": 11211, "token_acc": 0.8576388888888888 }, { "epoch": 0.6049749096206767, "grad_norm": 0.4680647552013397, "learning_rate": 7.127007624090075e-06, "loss": 0.3187342882156372, "step": 11212, "token_acc": 0.887516625498765 }, { "epoch": 0.6050288674256731, "grad_norm": 0.3812674283981323, "learning_rate": 7.12533378141017e-06, "loss": 0.3524444103240967, "step": 11213, "token_acc": 0.8748241912798875 }, { "epoch": 0.6050828252306696, "grad_norm": 0.3535889983177185, "learning_rate": 7.123660026521818e-06, "loss": 0.3464429974555969, "step": 11214, "token_acc": 0.8776816403935048 }, { "epoch": 0.6051367830356661, "grad_norm": 0.2859225869178772, "learning_rate": 7.121986359476135e-06, "loss": 0.33662116527557373, "step": 11215, "token_acc": 0.8747167799546848 }, { "epoch": 0.6051907408406626, "grad_norm": 0.321982204914093, "learning_rate": 7.120312780324231e-06, "loss": 0.4285886883735657, "step": 11216, "token_acc": 0.8573027420257414 }, { "epoch": 0.6052446986456591, "grad_norm": 0.47671306133270264, "learning_rate": 7.118639289117224e-06, "loss": 0.3500043451786041, "step": 11217, "token_acc": 0.8807972403219624 }, { "epoch": 0.6052986564506556, "grad_norm": 0.4955464005470276, "learning_rate": 7.11696588590622e-06, "loss": 0.2936890125274658, "step": 11218, "token_acc": 0.8921647819063004 }, { "epoch": 0.6053526142556521, "grad_norm": 0.4997740089893341, "learning_rate": 7.115292570742323e-06, "loss": 0.4064488112926483, "step": 11219, "token_acc": 0.8615384615384616 }, { "epoch": 0.6054065720606485, "grad_norm": 0.30352169275283813, "learning_rate": 7.113619343676633e-06, "loss": 0.31342190504074097, "step": 11220, "token_acc": 0.8863220171855191 }, { "epoch": 0.605460529865645, "grad_norm": 0.4343426525592804, "learning_rate": 7.111946204760253e-06, "loss": 0.39705395698547363, "step": 11221, "token_acc": 0.8641225787799942 }, { "epoch": 0.6055144876706415, "grad_norm": 0.4115534722805023, "learning_rate": 7.110273154044274e-06, "loss": 0.374936044216156, "step": 11222, "token_acc": 0.8663380690055088 }, { "epoch": 0.605568445475638, "grad_norm": 0.4911726713180542, "learning_rate": 7.108600191579802e-06, "loss": 0.3962719440460205, "step": 11223, "token_acc": 0.8630971993410215 }, { "epoch": 0.6056224032806345, "grad_norm": 0.3543824255466461, "learning_rate": 7.106927317417921e-06, "loss": 0.3176145553588867, "step": 11224, "token_acc": 0.8862389697114238 }, { "epoch": 0.605676361085631, "grad_norm": 0.46707290410995483, "learning_rate": 7.105254531609722e-06, "loss": 0.37697041034698486, "step": 11225, "token_acc": 0.8621167770759514 }, { "epoch": 0.6057303188906276, "grad_norm": 0.4310678243637085, "learning_rate": 7.103581834206292e-06, "loss": 0.29954999685287476, "step": 11226, "token_acc": 0.8906550218340611 }, { "epoch": 0.6057842766956241, "grad_norm": 0.44868606328964233, "learning_rate": 7.101909225258714e-06, "loss": 0.3830273747444153, "step": 11227, "token_acc": 0.8681534929485077 }, { "epoch": 0.6058382345006205, "grad_norm": 0.3823804259300232, "learning_rate": 7.100236704818069e-06, "loss": 0.3088814914226532, "step": 11228, "token_acc": 0.8870679380214541 }, { "epoch": 0.605892192305617, "grad_norm": 0.446998655796051, "learning_rate": 7.098564272935432e-06, "loss": 0.3450191617012024, "step": 11229, "token_acc": 0.8775340644732469 }, { "epoch": 0.6059461501106135, "grad_norm": 0.4159735441207886, "learning_rate": 7.0968919296618855e-06, "loss": 0.28945425152778625, "step": 11230, "token_acc": 0.8929191587883465 }, { "epoch": 0.60600010791561, "grad_norm": 0.4303871989250183, "learning_rate": 7.095219675048499e-06, "loss": 0.364642858505249, "step": 11231, "token_acc": 0.8740207495236079 }, { "epoch": 0.6060540657206065, "grad_norm": 0.3341771364212036, "learning_rate": 7.093547509146341e-06, "loss": 0.3598942458629608, "step": 11232, "token_acc": 0.8755088195386703 }, { "epoch": 0.606108023525603, "grad_norm": 0.34907475113868713, "learning_rate": 7.091875432006483e-06, "loss": 0.329696387052536, "step": 11233, "token_acc": 0.8809922896413007 }, { "epoch": 0.6061619813305995, "grad_norm": 0.4608713686466217, "learning_rate": 7.090203443679986e-06, "loss": 0.3232885003089905, "step": 11234, "token_acc": 0.8825934579439252 }, { "epoch": 0.606215939135596, "grad_norm": 0.3683429956436157, "learning_rate": 7.088531544217911e-06, "loss": 0.3495759665966034, "step": 11235, "token_acc": 0.8744216787838731 }, { "epoch": 0.6062698969405924, "grad_norm": 0.3333016037940979, "learning_rate": 7.086859733671324e-06, "loss": 0.3680679500102997, "step": 11236, "token_acc": 0.871521268925739 }, { "epoch": 0.6063238547455889, "grad_norm": 0.3827047348022461, "learning_rate": 7.085188012091277e-06, "loss": 0.36625707149505615, "step": 11237, "token_acc": 0.8739579859953318 }, { "epoch": 0.6063778125505854, "grad_norm": 0.3014304041862488, "learning_rate": 7.083516379528823e-06, "loss": 0.30306005477905273, "step": 11238, "token_acc": 0.8903217882584131 }, { "epoch": 0.6064317703555819, "grad_norm": 0.3292248547077179, "learning_rate": 7.0818448360350165e-06, "loss": 0.31918203830718994, "step": 11239, "token_acc": 0.8825180433039295 }, { "epoch": 0.6064857281605784, "grad_norm": 0.3559398055076599, "learning_rate": 7.0801733816609045e-06, "loss": 0.38503938913345337, "step": 11240, "token_acc": 0.8626460372592358 }, { "epoch": 0.606539685965575, "grad_norm": 0.37139463424682617, "learning_rate": 7.078502016457531e-06, "loss": 0.35059067606925964, "step": 11241, "token_acc": 0.8728851624494685 }, { "epoch": 0.6065936437705715, "grad_norm": 0.40895235538482666, "learning_rate": 7.0768307404759375e-06, "loss": 0.3236033320426941, "step": 11242, "token_acc": 0.8837531486146095 }, { "epoch": 0.6066476015755679, "grad_norm": 0.30598416924476624, "learning_rate": 7.075159553767173e-06, "loss": 0.31191903352737427, "step": 11243, "token_acc": 0.8875463698993111 }, { "epoch": 0.6067015593805644, "grad_norm": 0.4704861044883728, "learning_rate": 7.073488456382267e-06, "loss": 0.3780968189239502, "step": 11244, "token_acc": 0.8681952893464359 }, { "epoch": 0.6067555171855609, "grad_norm": 0.3715128004550934, "learning_rate": 7.071817448372258e-06, "loss": 0.3263693153858185, "step": 11245, "token_acc": 0.8864805692391899 }, { "epoch": 0.6068094749905574, "grad_norm": 0.44112858176231384, "learning_rate": 7.070146529788175e-06, "loss": 0.3912225365638733, "step": 11246, "token_acc": 0.860730593607306 }, { "epoch": 0.6068634327955539, "grad_norm": 0.3812476396560669, "learning_rate": 7.068475700681052e-06, "loss": 0.3510924279689789, "step": 11247, "token_acc": 0.8773510352457721 }, { "epoch": 0.6069173906005504, "grad_norm": 0.3899047374725342, "learning_rate": 7.066804961101909e-06, "loss": 0.30191442370414734, "step": 11248, "token_acc": 0.8930663397357308 }, { "epoch": 0.6069713484055469, "grad_norm": 0.30645865201950073, "learning_rate": 7.065134311101777e-06, "loss": 0.34376537799835205, "step": 11249, "token_acc": 0.8810734463276836 }, { "epoch": 0.6070253062105434, "grad_norm": 0.3621366322040558, "learning_rate": 7.063463750731674e-06, "loss": 0.32590508460998535, "step": 11250, "token_acc": 0.880865411632481 }, { "epoch": 0.6070792640155398, "grad_norm": 0.3873692452907562, "learning_rate": 7.061793280042619e-06, "loss": 0.31472450494766235, "step": 11251, "token_acc": 0.8835884076847932 }, { "epoch": 0.6071332218205363, "grad_norm": 0.4585375189781189, "learning_rate": 7.060122899085629e-06, "loss": 0.38053545355796814, "step": 11252, "token_acc": 0.8624390243902439 }, { "epoch": 0.6071871796255328, "grad_norm": 0.40200480818748474, "learning_rate": 7.058452607911717e-06, "loss": 0.3737257421016693, "step": 11253, "token_acc": 0.8670520231213873 }, { "epoch": 0.6072411374305293, "grad_norm": 0.32530859112739563, "learning_rate": 7.056782406571885e-06, "loss": 0.32890892028808594, "step": 11254, "token_acc": 0.8831048105972578 }, { "epoch": 0.6072950952355258, "grad_norm": 0.34975650906562805, "learning_rate": 7.05511229511715e-06, "loss": 0.3697624206542969, "step": 11255, "token_acc": 0.8698159509202454 }, { "epoch": 0.6073490530405223, "grad_norm": 0.3736434578895569, "learning_rate": 7.0534422735985165e-06, "loss": 0.31132808327674866, "step": 11256, "token_acc": 0.8883327580255437 }, { "epoch": 0.6074030108455188, "grad_norm": 0.552045464515686, "learning_rate": 7.051772342066982e-06, "loss": 0.41447561979293823, "step": 11257, "token_acc": 0.8609046849757673 }, { "epoch": 0.6074569686505153, "grad_norm": 0.4696459174156189, "learning_rate": 7.05010250057355e-06, "loss": 0.31853151321411133, "step": 11258, "token_acc": 0.8837888946192195 }, { "epoch": 0.6075109264555117, "grad_norm": 0.24167592823505402, "learning_rate": 7.048432749169214e-06, "loss": 0.2928987145423889, "step": 11259, "token_acc": 0.8927004836351367 }, { "epoch": 0.6075648842605083, "grad_norm": 0.2681341767311096, "learning_rate": 7.046763087904969e-06, "loss": 0.29739394783973694, "step": 11260, "token_acc": 0.8933939623594797 }, { "epoch": 0.6076188420655048, "grad_norm": 0.49559834599494934, "learning_rate": 7.045093516831803e-06, "loss": 0.37073850631713867, "step": 11261, "token_acc": 0.8676328502415459 }, { "epoch": 0.6076727998705013, "grad_norm": 0.3197714388370514, "learning_rate": 7.043424036000708e-06, "loss": 0.26645395159721375, "step": 11262, "token_acc": 0.899546827794562 }, { "epoch": 0.6077267576754978, "grad_norm": 0.330061137676239, "learning_rate": 7.0417546454626695e-06, "loss": 0.34896790981292725, "step": 11263, "token_acc": 0.8708312171999323 }, { "epoch": 0.6077807154804943, "grad_norm": 0.3651929795742035, "learning_rate": 7.040085345268669e-06, "loss": 0.34800398349761963, "step": 11264, "token_acc": 0.8786496350364964 }, { "epoch": 0.6078346732854908, "grad_norm": 0.4501137137413025, "learning_rate": 7.038416135469688e-06, "loss": 0.3791921138763428, "step": 11265, "token_acc": 0.8635502210991788 }, { "epoch": 0.6078886310904872, "grad_norm": 0.4329579770565033, "learning_rate": 7.036747016116701e-06, "loss": 0.42043963074684143, "step": 11266, "token_acc": 0.8539596024366785 }, { "epoch": 0.6079425888954837, "grad_norm": 0.41018468141555786, "learning_rate": 7.035077987260681e-06, "loss": 0.34938672184944153, "step": 11267, "token_acc": 0.8758022549869905 }, { "epoch": 0.6079965467004802, "grad_norm": 0.3314961791038513, "learning_rate": 7.033409048952606e-06, "loss": 0.33636146783828735, "step": 11268, "token_acc": 0.8869634160154037 }, { "epoch": 0.6080505045054767, "grad_norm": 0.3424110412597656, "learning_rate": 7.031740201243441e-06, "loss": 0.34890297055244446, "step": 11269, "token_acc": 0.8718996206594689 }, { "epoch": 0.6081044623104732, "grad_norm": 0.3509100377559662, "learning_rate": 7.030071444184153e-06, "loss": 0.3655470013618469, "step": 11270, "token_acc": 0.8718043719896258 }, { "epoch": 0.6081584201154697, "grad_norm": 0.3647918403148651, "learning_rate": 7.028402777825704e-06, "loss": 0.34236404299736023, "step": 11271, "token_acc": 0.8818452847952298 }, { "epoch": 0.6082123779204662, "grad_norm": 0.43793541193008423, "learning_rate": 7.0267342022190565e-06, "loss": 0.3518613576889038, "step": 11272, "token_acc": 0.8783201223751912 }, { "epoch": 0.6082663357254627, "grad_norm": 0.4413203001022339, "learning_rate": 7.025065717415168e-06, "loss": 0.309469997882843, "step": 11273, "token_acc": 0.8847207947927372 }, { "epoch": 0.6083202935304591, "grad_norm": 0.35612285137176514, "learning_rate": 7.023397323464989e-06, "loss": 0.353951632976532, "step": 11274, "token_acc": 0.8691840863115307 }, { "epoch": 0.6083742513354556, "grad_norm": 0.4123912453651428, "learning_rate": 7.02172902041948e-06, "loss": 0.27415233850479126, "step": 11275, "token_acc": 0.8990359333917616 }, { "epoch": 0.6084282091404521, "grad_norm": 0.40958598256111145, "learning_rate": 7.0200608083295865e-06, "loss": 0.33437442779541016, "step": 11276, "token_acc": 0.8794388426128891 }, { "epoch": 0.6084821669454487, "grad_norm": 0.31281915307044983, "learning_rate": 7.018392687246257e-06, "loss": 0.2907647490501404, "step": 11277, "token_acc": 0.8941899604858774 }, { "epoch": 0.6085361247504452, "grad_norm": 0.4652528762817383, "learning_rate": 7.016724657220432e-06, "loss": 0.3866177499294281, "step": 11278, "token_acc": 0.8668947286580625 }, { "epoch": 0.6085900825554417, "grad_norm": 0.4399227201938629, "learning_rate": 7.015056718303054e-06, "loss": 0.38914573192596436, "step": 11279, "token_acc": 0.8622350674373795 }, { "epoch": 0.6086440403604382, "grad_norm": 0.4069973826408386, "learning_rate": 7.013388870545058e-06, "loss": 0.33440297842025757, "step": 11280, "token_acc": 0.8801266171208367 }, { "epoch": 0.6086979981654347, "grad_norm": 0.4040548801422119, "learning_rate": 7.011721113997387e-06, "loss": 0.3220384120941162, "step": 11281, "token_acc": 0.8876585104482944 }, { "epoch": 0.6087519559704311, "grad_norm": 0.3014536201953888, "learning_rate": 7.010053448710972e-06, "loss": 0.36326122283935547, "step": 11282, "token_acc": 0.8724985706117782 }, { "epoch": 0.6088059137754276, "grad_norm": 0.3526071608066559, "learning_rate": 7.008385874736741e-06, "loss": 0.35257309675216675, "step": 11283, "token_acc": 0.8799624148461358 }, { "epoch": 0.6088598715804241, "grad_norm": 0.48884516954421997, "learning_rate": 7.00671839212562e-06, "loss": 0.32489877939224243, "step": 11284, "token_acc": 0.884788029925187 }, { "epoch": 0.6089138293854206, "grad_norm": 0.48338818550109863, "learning_rate": 7.005051000928536e-06, "loss": 0.4040301442146301, "step": 11285, "token_acc": 0.8671438547486033 }, { "epoch": 0.6089677871904171, "grad_norm": 0.5087319612503052, "learning_rate": 7.0033837011964115e-06, "loss": 0.36191627383232117, "step": 11286, "token_acc": 0.8734396005377376 }, { "epoch": 0.6090217449954136, "grad_norm": 0.34341830015182495, "learning_rate": 7.00171649298016e-06, "loss": 0.337837278842926, "step": 11287, "token_acc": 0.8836061627347135 }, { "epoch": 0.6090757028004101, "grad_norm": 0.41212961077690125, "learning_rate": 7.000049376330704e-06, "loss": 0.3938969075679779, "step": 11288, "token_acc": 0.8630795649685175 }, { "epoch": 0.6091296606054065, "grad_norm": 0.3726300299167633, "learning_rate": 6.998382351298953e-06, "loss": 0.30121365189552307, "step": 11289, "token_acc": 0.888952736675765 }, { "epoch": 0.609183618410403, "grad_norm": 0.3888137936592102, "learning_rate": 6.9967154179358196e-06, "loss": 0.3079254925251007, "step": 11290, "token_acc": 0.8901808785529716 }, { "epoch": 0.6092375762153995, "grad_norm": 0.4287009835243225, "learning_rate": 6.995048576292212e-06, "loss": 0.37232571840286255, "step": 11291, "token_acc": 0.870193740685544 }, { "epoch": 0.609291534020396, "grad_norm": 0.40590226650238037, "learning_rate": 6.993381826419032e-06, "loss": 0.36223453283309937, "step": 11292, "token_acc": 0.8739684921230307 }, { "epoch": 0.6093454918253925, "grad_norm": 0.4222409725189209, "learning_rate": 6.9917151683671815e-06, "loss": 0.3629341125488281, "step": 11293, "token_acc": 0.8706864332876542 }, { "epoch": 0.6093994496303891, "grad_norm": 0.4735182523727417, "learning_rate": 6.9900486021875635e-06, "loss": 0.3462689518928528, "step": 11294, "token_acc": 0.8764964086193137 }, { "epoch": 0.6094534074353856, "grad_norm": 0.4183603525161743, "learning_rate": 6.988382127931073e-06, "loss": 0.33589187264442444, "step": 11295, "token_acc": 0.8739196746314184 }, { "epoch": 0.6095073652403821, "grad_norm": 0.45479899644851685, "learning_rate": 6.986715745648604e-06, "loss": 0.37095290422439575, "step": 11296, "token_acc": 0.8713881019830029 }, { "epoch": 0.6095613230453785, "grad_norm": 0.3940017819404602, "learning_rate": 6.985049455391047e-06, "loss": 0.30789533257484436, "step": 11297, "token_acc": 0.8879249291784702 }, { "epoch": 0.609615280850375, "grad_norm": 0.5273279547691345, "learning_rate": 6.983383257209289e-06, "loss": 0.34098702669143677, "step": 11298, "token_acc": 0.882329713721619 }, { "epoch": 0.6096692386553715, "grad_norm": 0.38724228739738464, "learning_rate": 6.981717151154215e-06, "loss": 0.3787361681461334, "step": 11299, "token_acc": 0.8650881057268722 }, { "epoch": 0.609723196460368, "grad_norm": 0.3910905420780182, "learning_rate": 6.980051137276706e-06, "loss": 0.3335754871368408, "step": 11300, "token_acc": 0.8821640712794991 }, { "epoch": 0.6097771542653645, "grad_norm": 0.32685586810112, "learning_rate": 6.97838521562765e-06, "loss": 0.30719253420829773, "step": 11301, "token_acc": 0.8909048615957199 }, { "epoch": 0.609831112070361, "grad_norm": 0.507462739944458, "learning_rate": 6.976719386257915e-06, "loss": 0.41871726512908936, "step": 11302, "token_acc": 0.8571866850743979 }, { "epoch": 0.6098850698753575, "grad_norm": 0.39869022369384766, "learning_rate": 6.97505364921838e-06, "loss": 0.4017888903617859, "step": 11303, "token_acc": 0.8629216913448329 }, { "epoch": 0.609939027680354, "grad_norm": 0.305759996175766, "learning_rate": 6.973388004559911e-06, "loss": 0.3552706837654114, "step": 11304, "token_acc": 0.8782688766114181 }, { "epoch": 0.6099929854853504, "grad_norm": 0.4108491539955139, "learning_rate": 6.971722452333382e-06, "loss": 0.32987603545188904, "step": 11305, "token_acc": 0.8797843665768194 }, { "epoch": 0.6100469432903469, "grad_norm": 0.3719311058521271, "learning_rate": 6.970056992589651e-06, "loss": 0.31478381156921387, "step": 11306, "token_acc": 0.8838614227592159 }, { "epoch": 0.6101009010953434, "grad_norm": 0.4489218592643738, "learning_rate": 6.96839162537959e-06, "loss": 0.32088324427604675, "step": 11307, "token_acc": 0.8878750658010177 }, { "epoch": 0.6101548589003399, "grad_norm": 0.4138055741786957, "learning_rate": 6.966726350754055e-06, "loss": 0.33273813128471375, "step": 11308, "token_acc": 0.880847308031774 }, { "epoch": 0.6102088167053364, "grad_norm": 0.5385470390319824, "learning_rate": 6.965061168763903e-06, "loss": 0.34626275300979614, "step": 11309, "token_acc": 0.8737603305785124 }, { "epoch": 0.610262774510333, "grad_norm": 0.46627578139305115, "learning_rate": 6.9633960794599854e-06, "loss": 0.3894510865211487, "step": 11310, "token_acc": 0.858693916653361 }, { "epoch": 0.6103167323153295, "grad_norm": 0.3830724060535431, "learning_rate": 6.961731082893159e-06, "loss": 0.31854870915412903, "step": 11311, "token_acc": 0.8830194231901118 }, { "epoch": 0.6103706901203259, "grad_norm": 0.4111272394657135, "learning_rate": 6.9600661791142655e-06, "loss": 0.3487195670604706, "step": 11312, "token_acc": 0.8732782369146006 }, { "epoch": 0.6104246479253224, "grad_norm": 0.3669326901435852, "learning_rate": 6.958401368174157e-06, "loss": 0.41070470213890076, "step": 11313, "token_acc": 0.858560794044665 }, { "epoch": 0.6104786057303189, "grad_norm": 0.32675299048423767, "learning_rate": 6.956736650123674e-06, "loss": 0.3471047282218933, "step": 11314, "token_acc": 0.8737431325800767 }, { "epoch": 0.6105325635353154, "grad_norm": 0.3480125963687897, "learning_rate": 6.955072025013657e-06, "loss": 0.33566075563430786, "step": 11315, "token_acc": 0.8828709288299156 }, { "epoch": 0.6105865213403119, "grad_norm": 0.5359318256378174, "learning_rate": 6.953407492894943e-06, "loss": 0.32230305671691895, "step": 11316, "token_acc": 0.888 }, { "epoch": 0.6106404791453084, "grad_norm": 0.44818106293678284, "learning_rate": 6.951743053818365e-06, "loss": 0.38188469409942627, "step": 11317, "token_acc": 0.8703557312252964 }, { "epoch": 0.6106944369503049, "grad_norm": 0.40290307998657227, "learning_rate": 6.950078707834756e-06, "loss": 0.34814122319221497, "step": 11318, "token_acc": 0.8792363474914903 }, { "epoch": 0.6107483947553014, "grad_norm": 0.4466167390346527, "learning_rate": 6.948414454994941e-06, "loss": 0.3144179582595825, "step": 11319, "token_acc": 0.888131445551477 }, { "epoch": 0.6108023525602978, "grad_norm": 0.6500625610351562, "learning_rate": 6.946750295349753e-06, "loss": 0.39635640382766724, "step": 11320, "token_acc": 0.8637977856695216 }, { "epoch": 0.6108563103652943, "grad_norm": 0.3464709520339966, "learning_rate": 6.945086228950011e-06, "loss": 0.30802667140960693, "step": 11321, "token_acc": 0.8884253316217373 }, { "epoch": 0.6109102681702908, "grad_norm": 0.41842836141586304, "learning_rate": 6.943422255846535e-06, "loss": 0.36317700147628784, "step": 11322, "token_acc": 0.8688610240334378 }, { "epoch": 0.6109642259752873, "grad_norm": 0.44771701097488403, "learning_rate": 6.941758376090142e-06, "loss": 0.33846813440322876, "step": 11323, "token_acc": 0.8792957148918116 }, { "epoch": 0.6110181837802838, "grad_norm": 0.43939679861068726, "learning_rate": 6.940094589731647e-06, "loss": 0.3625338077545166, "step": 11324, "token_acc": 0.8699754426780406 }, { "epoch": 0.6110721415852803, "grad_norm": 0.40454044938087463, "learning_rate": 6.938430896821861e-06, "loss": 0.36546602845191956, "step": 11325, "token_acc": 0.8681361175560711 }, { "epoch": 0.6111260993902768, "grad_norm": 0.4144730865955353, "learning_rate": 6.936767297411594e-06, "loss": 0.41059184074401855, "step": 11326, "token_acc": 0.8625458650860852 }, { "epoch": 0.6111800571952732, "grad_norm": 0.439633309841156, "learning_rate": 6.935103791551652e-06, "loss": 0.3462110757827759, "step": 11327, "token_acc": 0.8757524309306992 }, { "epoch": 0.6112340150002697, "grad_norm": 0.454679012298584, "learning_rate": 6.9334403792928375e-06, "loss": 0.34870943427085876, "step": 11328, "token_acc": 0.8757004584819155 }, { "epoch": 0.6112879728052663, "grad_norm": 0.3280823528766632, "learning_rate": 6.931777060685949e-06, "loss": 0.2893350124359131, "step": 11329, "token_acc": 0.8963082806295043 }, { "epoch": 0.6113419306102628, "grad_norm": 0.3977150321006775, "learning_rate": 6.930113835781786e-06, "loss": 0.3311083912849426, "step": 11330, "token_acc": 0.8818705035971223 }, { "epoch": 0.6113958884152593, "grad_norm": 0.480695515871048, "learning_rate": 6.9284507046311435e-06, "loss": 0.4152299463748932, "step": 11331, "token_acc": 0.8571225879682179 }, { "epoch": 0.6114498462202558, "grad_norm": 0.45494693517684937, "learning_rate": 6.926787667284807e-06, "loss": 0.4336244463920593, "step": 11332, "token_acc": 0.8563972162740899 }, { "epoch": 0.6115038040252523, "grad_norm": 0.3657494783401489, "learning_rate": 6.925124723793573e-06, "loss": 0.37498003244400024, "step": 11333, "token_acc": 0.8697160477964795 }, { "epoch": 0.6115577618302488, "grad_norm": 0.35039275884628296, "learning_rate": 6.923461874208226e-06, "loss": 0.3774755001068115, "step": 11334, "token_acc": 0.8663779444385761 }, { "epoch": 0.6116117196352452, "grad_norm": 0.4524442255496979, "learning_rate": 6.9217991185795475e-06, "loss": 0.377108097076416, "step": 11335, "token_acc": 0.8696575148598924 }, { "epoch": 0.6116656774402417, "grad_norm": 0.4394441843032837, "learning_rate": 6.92013645695832e-06, "loss": 0.3521074652671814, "step": 11336, "token_acc": 0.872948822095857 }, { "epoch": 0.6117196352452382, "grad_norm": 0.3449242413043976, "learning_rate": 6.918473889395314e-06, "loss": 0.3190420866012573, "step": 11337, "token_acc": 0.8835731414868105 }, { "epoch": 0.6117735930502347, "grad_norm": 0.3342745900154114, "learning_rate": 6.916811415941304e-06, "loss": 0.3276302218437195, "step": 11338, "token_acc": 0.8831671379669593 }, { "epoch": 0.6118275508552312, "grad_norm": 0.5485045313835144, "learning_rate": 6.915149036647073e-06, "loss": 0.37118029594421387, "step": 11339, "token_acc": 0.8682918274289723 }, { "epoch": 0.6118815086602277, "grad_norm": 0.4406309723854065, "learning_rate": 6.913486751563379e-06, "loss": 0.4106264114379883, "step": 11340, "token_acc": 0.8499141027643292 }, { "epoch": 0.6119354664652242, "grad_norm": 0.3103378415107727, "learning_rate": 6.911824560740993e-06, "loss": 0.2755315899848938, "step": 11341, "token_acc": 0.898273572377158 }, { "epoch": 0.6119894242702207, "grad_norm": 0.48558664321899414, "learning_rate": 6.910162464230676e-06, "loss": 0.3754193186759949, "step": 11342, "token_acc": 0.8643984220907298 }, { "epoch": 0.6120433820752171, "grad_norm": 0.4487481713294983, "learning_rate": 6.9085004620831865e-06, "loss": 0.36477118730545044, "step": 11343, "token_acc": 0.8710760694991969 }, { "epoch": 0.6120973398802136, "grad_norm": 0.4183390140533447, "learning_rate": 6.906838554349285e-06, "loss": 0.41971153020858765, "step": 11344, "token_acc": 0.8542555580507523 }, { "epoch": 0.6121512976852101, "grad_norm": 0.4833761751651764, "learning_rate": 6.90517674107972e-06, "loss": 0.403814435005188, "step": 11345, "token_acc": 0.855878284923928 }, { "epoch": 0.6122052554902067, "grad_norm": 0.35639244318008423, "learning_rate": 6.903515022325249e-06, "loss": 0.4043053388595581, "step": 11346, "token_acc": 0.864165890027959 }, { "epoch": 0.6122592132952032, "grad_norm": 0.41998282074928284, "learning_rate": 6.901853398136618e-06, "loss": 0.32925695180892944, "step": 11347, "token_acc": 0.8843293858441201 }, { "epoch": 0.6123131711001997, "grad_norm": 0.3346724212169647, "learning_rate": 6.900191868564572e-06, "loss": 0.33936405181884766, "step": 11348, "token_acc": 0.8868355007985398 }, { "epoch": 0.6123671289051962, "grad_norm": 0.47272300720214844, "learning_rate": 6.8985304336598555e-06, "loss": 0.344209760427475, "step": 11349, "token_acc": 0.8790074261909074 }, { "epoch": 0.6124210867101926, "grad_norm": 0.45041027665138245, "learning_rate": 6.896869093473206e-06, "loss": 0.336891233921051, "step": 11350, "token_acc": 0.8785760728218466 }, { "epoch": 0.6124750445151891, "grad_norm": 0.433416485786438, "learning_rate": 6.8952078480553605e-06, "loss": 0.3939746022224426, "step": 11351, "token_acc": 0.8633879781420765 }, { "epoch": 0.6125290023201856, "grad_norm": 0.40234166383743286, "learning_rate": 6.893546697457056e-06, "loss": 0.3521101176738739, "step": 11352, "token_acc": 0.8784046692607004 }, { "epoch": 0.6125829601251821, "grad_norm": 0.4607449769973755, "learning_rate": 6.891885641729022e-06, "loss": 0.3721059262752533, "step": 11353, "token_acc": 0.8654455794293765 }, { "epoch": 0.6126369179301786, "grad_norm": 0.40661048889160156, "learning_rate": 6.890224680921984e-06, "loss": 0.3673745393753052, "step": 11354, "token_acc": 0.870141806722689 }, { "epoch": 0.6126908757351751, "grad_norm": 0.45324546098709106, "learning_rate": 6.888563815086672e-06, "loss": 0.36510026454925537, "step": 11355, "token_acc": 0.8743642870597099 }, { "epoch": 0.6127448335401716, "grad_norm": 0.4044133722782135, "learning_rate": 6.886903044273805e-06, "loss": 0.3185464143753052, "step": 11356, "token_acc": 0.8877658772062151 }, { "epoch": 0.6127987913451681, "grad_norm": 0.4200391471385956, "learning_rate": 6.885242368534103e-06, "loss": 0.3473849296569824, "step": 11357, "token_acc": 0.8767555173402121 }, { "epoch": 0.6128527491501645, "grad_norm": 0.38842537999153137, "learning_rate": 6.88358178791828e-06, "loss": 0.3390860855579376, "step": 11358, "token_acc": 0.8845265588914549 }, { "epoch": 0.612906706955161, "grad_norm": 0.40755048394203186, "learning_rate": 6.881921302477058e-06, "loss": 0.3382289409637451, "step": 11359, "token_acc": 0.8779299141332095 }, { "epoch": 0.6129606647601575, "grad_norm": 0.4107304811477661, "learning_rate": 6.880260912261141e-06, "loss": 0.4107668399810791, "step": 11360, "token_acc": 0.859375 }, { "epoch": 0.613014622565154, "grad_norm": 0.49112749099731445, "learning_rate": 6.878600617321239e-06, "loss": 0.3422286808490753, "step": 11361, "token_acc": 0.8786334465836164 }, { "epoch": 0.6130685803701506, "grad_norm": 0.40478214621543884, "learning_rate": 6.876940417708056e-06, "loss": 0.3672178387641907, "step": 11362, "token_acc": 0.8711875405580791 }, { "epoch": 0.6131225381751471, "grad_norm": 0.4368058741092682, "learning_rate": 6.875280313472295e-06, "loss": 0.3498718738555908, "step": 11363, "token_acc": 0.8753206041607295 }, { "epoch": 0.6131764959801436, "grad_norm": 0.47364118695259094, "learning_rate": 6.8736203046646495e-06, "loss": 0.37801486253738403, "step": 11364, "token_acc": 0.8703395259449072 }, { "epoch": 0.6132304537851401, "grad_norm": 0.38121432065963745, "learning_rate": 6.871960391335825e-06, "loss": 0.3253946304321289, "step": 11365, "token_acc": 0.8789293067947838 }, { "epoch": 0.6132844115901365, "grad_norm": 0.33129802346229553, "learning_rate": 6.870300573536513e-06, "loss": 0.3585056960582733, "step": 11366, "token_acc": 0.8790474990550586 }, { "epoch": 0.613338369395133, "grad_norm": 0.3461032211780548, "learning_rate": 6.868640851317398e-06, "loss": 0.37380877137184143, "step": 11367, "token_acc": 0.8673745801278578 }, { "epoch": 0.6133923272001295, "grad_norm": 0.4149947762489319, "learning_rate": 6.866981224729174e-06, "loss": 0.32236266136169434, "step": 11368, "token_acc": 0.8851135407905804 }, { "epoch": 0.613446285005126, "grad_norm": 0.3403976857662201, "learning_rate": 6.865321693822522e-06, "loss": 0.31624871492385864, "step": 11369, "token_acc": 0.88469424677361 }, { "epoch": 0.6135002428101225, "grad_norm": 0.4120464622974396, "learning_rate": 6.863662258648122e-06, "loss": 0.3600209355354309, "step": 11370, "token_acc": 0.8716792590787229 }, { "epoch": 0.613554200615119, "grad_norm": 0.368810772895813, "learning_rate": 6.862002919256656e-06, "loss": 0.3032380938529968, "step": 11371, "token_acc": 0.8880444856348471 }, { "epoch": 0.6136081584201155, "grad_norm": 0.3165282607078552, "learning_rate": 6.860343675698799e-06, "loss": 0.2872684597969055, "step": 11372, "token_acc": 0.8940835079800097 }, { "epoch": 0.6136621162251119, "grad_norm": 0.3169918656349182, "learning_rate": 6.858684528025225e-06, "loss": 0.3039278984069824, "step": 11373, "token_acc": 0.8951528536557015 }, { "epoch": 0.6137160740301084, "grad_norm": 0.3795909285545349, "learning_rate": 6.857025476286603e-06, "loss": 0.3410586714744568, "step": 11374, "token_acc": 0.8810126582278481 }, { "epoch": 0.6137700318351049, "grad_norm": 0.45328423380851746, "learning_rate": 6.855366520533599e-06, "loss": 0.34907132387161255, "step": 11375, "token_acc": 0.8779089961792289 }, { "epoch": 0.6138239896401014, "grad_norm": 0.37010499835014343, "learning_rate": 6.853707660816877e-06, "loss": 0.40100181102752686, "step": 11376, "token_acc": 0.8621671826625387 }, { "epoch": 0.6138779474450979, "grad_norm": 0.3142491579055786, "learning_rate": 6.8520488971870965e-06, "loss": 0.3189728856086731, "step": 11377, "token_acc": 0.8841610567763242 }, { "epoch": 0.6139319052500944, "grad_norm": 0.3302488923072815, "learning_rate": 6.850390229694921e-06, "loss": 0.36317116022109985, "step": 11378, "token_acc": 0.8716304632295211 }, { "epoch": 0.613985863055091, "grad_norm": 0.30929991602897644, "learning_rate": 6.848731658391003e-06, "loss": 0.2880105972290039, "step": 11379, "token_acc": 0.8925893886966552 }, { "epoch": 0.6140398208600875, "grad_norm": 0.42119288444519043, "learning_rate": 6.847073183325994e-06, "loss": 0.3433319330215454, "step": 11380, "token_acc": 0.8802411252511722 }, { "epoch": 0.6140937786650839, "grad_norm": 0.3470035493373871, "learning_rate": 6.845414804550544e-06, "loss": 0.3253443241119385, "step": 11381, "token_acc": 0.8832214765100671 }, { "epoch": 0.6141477364700804, "grad_norm": 0.4756004214286804, "learning_rate": 6.843756522115299e-06, "loss": 0.3541256785392761, "step": 11382, "token_acc": 0.8764094955489614 }, { "epoch": 0.6142016942750769, "grad_norm": 0.5671918988227844, "learning_rate": 6.842098336070902e-06, "loss": 0.3684813380241394, "step": 11383, "token_acc": 0.8791980808773132 }, { "epoch": 0.6142556520800734, "grad_norm": 0.3349205255508423, "learning_rate": 6.840440246467996e-06, "loss": 0.3111957907676697, "step": 11384, "token_acc": 0.886685552407932 }, { "epoch": 0.6143096098850699, "grad_norm": 0.31617096066474915, "learning_rate": 6.838782253357218e-06, "loss": 0.33254486322402954, "step": 11385, "token_acc": 0.8780487804878049 }, { "epoch": 0.6143635676900664, "grad_norm": 0.4222853183746338, "learning_rate": 6.837124356789201e-06, "loss": 0.34902113676071167, "step": 11386, "token_acc": 0.8753471150879358 }, { "epoch": 0.6144175254950629, "grad_norm": 0.4129348695278168, "learning_rate": 6.835466556814578e-06, "loss": 0.38874268531799316, "step": 11387, "token_acc": 0.8642625607779578 }, { "epoch": 0.6144714833000594, "grad_norm": 0.42892780900001526, "learning_rate": 6.833808853483976e-06, "loss": 0.34075087308883667, "step": 11388, "token_acc": 0.875422138836773 }, { "epoch": 0.6145254411050558, "grad_norm": 0.3985992670059204, "learning_rate": 6.832151246848025e-06, "loss": 0.4221278131008148, "step": 11389, "token_acc": 0.8527163527163527 }, { "epoch": 0.6145793989100523, "grad_norm": 0.4940572679042816, "learning_rate": 6.830493736957338e-06, "loss": 0.3323994278907776, "step": 11390, "token_acc": 0.8795241286863271 }, { "epoch": 0.6146333567150488, "grad_norm": 0.4548149108886719, "learning_rate": 6.8288363238625475e-06, "loss": 0.32150590419769287, "step": 11391, "token_acc": 0.8815104166666666 }, { "epoch": 0.6146873145200453, "grad_norm": 0.44519466161727905, "learning_rate": 6.827179007614265e-06, "loss": 0.38548681139945984, "step": 11392, "token_acc": 0.8652875882946519 }, { "epoch": 0.6147412723250418, "grad_norm": 0.3598194122314453, "learning_rate": 6.8255217882631045e-06, "loss": 0.3394814729690552, "step": 11393, "token_acc": 0.880248833592535 }, { "epoch": 0.6147952301300383, "grad_norm": 0.38246026635169983, "learning_rate": 6.823864665859678e-06, "loss": 0.3521016836166382, "step": 11394, "token_acc": 0.8727622251591974 }, { "epoch": 0.6148491879350348, "grad_norm": 0.4288652837276459, "learning_rate": 6.822207640454593e-06, "loss": 0.32289421558380127, "step": 11395, "token_acc": 0.8842073355562292 }, { "epoch": 0.6149031457400312, "grad_norm": 0.38525381684303284, "learning_rate": 6.820550712098447e-06, "loss": 0.35622549057006836, "step": 11396, "token_acc": 0.8718703976435935 }, { "epoch": 0.6149571035450278, "grad_norm": 0.36490753293037415, "learning_rate": 6.818893880841855e-06, "loss": 0.3711324632167816, "step": 11397, "token_acc": 0.8735105407882676 }, { "epoch": 0.6150110613500243, "grad_norm": 0.5754074454307556, "learning_rate": 6.817237146735411e-06, "loss": 0.4254009425640106, "step": 11398, "token_acc": 0.8513744370243826 }, { "epoch": 0.6150650191550208, "grad_norm": 0.3977879583835602, "learning_rate": 6.81558050982971e-06, "loss": 0.37132948637008667, "step": 11399, "token_acc": 0.8713899558969982 }, { "epoch": 0.6151189769600173, "grad_norm": 0.42617541551589966, "learning_rate": 6.8139239701753464e-06, "loss": 0.3259434700012207, "step": 11400, "token_acc": 0.8852599914052428 }, { "epoch": 0.6151729347650138, "grad_norm": 0.4063488841056824, "learning_rate": 6.812267527822909e-06, "loss": 0.34997954964637756, "step": 11401, "token_acc": 0.8782753361985304 }, { "epoch": 0.6152268925700103, "grad_norm": 0.3865945339202881, "learning_rate": 6.810611182822988e-06, "loss": 0.32848668098449707, "step": 11402, "token_acc": 0.8845635940751622 }, { "epoch": 0.6152808503750068, "grad_norm": 0.4060507118701935, "learning_rate": 6.808954935226162e-06, "loss": 0.3543431758880615, "step": 11403, "token_acc": 0.8757971014492754 }, { "epoch": 0.6153348081800032, "grad_norm": 0.4029846787452698, "learning_rate": 6.80729878508302e-06, "loss": 0.3561928868293762, "step": 11404, "token_acc": 0.8782173849095455 }, { "epoch": 0.6153887659849997, "grad_norm": 0.4392886757850647, "learning_rate": 6.805642732444137e-06, "loss": 0.3711535334587097, "step": 11405, "token_acc": 0.8727100442198358 }, { "epoch": 0.6154427237899962, "grad_norm": 0.36903825402259827, "learning_rate": 6.803986777360087e-06, "loss": 0.3567160367965698, "step": 11406, "token_acc": 0.875387309713054 }, { "epoch": 0.6154966815949927, "grad_norm": 0.3769152760505676, "learning_rate": 6.802330919881444e-06, "loss": 0.38471531867980957, "step": 11407, "token_acc": 0.8682542137003247 }, { "epoch": 0.6155506393999892, "grad_norm": 0.498322993516922, "learning_rate": 6.800675160058776e-06, "loss": 0.32820022106170654, "step": 11408, "token_acc": 0.8808750640916083 }, { "epoch": 0.6156045972049857, "grad_norm": 0.46573275327682495, "learning_rate": 6.79901949794265e-06, "loss": 0.2967674434185028, "step": 11409, "token_acc": 0.8927574555604525 }, { "epoch": 0.6156585550099822, "grad_norm": 0.5312368273735046, "learning_rate": 6.797363933583632e-06, "loss": 0.4093622863292694, "step": 11410, "token_acc": 0.8540511536397782 }, { "epoch": 0.6157125128149787, "grad_norm": 0.38369762897491455, "learning_rate": 6.795708467032279e-06, "loss": 0.3541063964366913, "step": 11411, "token_acc": 0.8751017087062652 }, { "epoch": 0.6157664706199751, "grad_norm": 0.42956963181495667, "learning_rate": 6.794053098339151e-06, "loss": 0.36415573954582214, "step": 11412, "token_acc": 0.8729310569529519 }, { "epoch": 0.6158204284249716, "grad_norm": 0.39765799045562744, "learning_rate": 6.792397827554802e-06, "loss": 0.3547540605068207, "step": 11413, "token_acc": 0.877001837752691 }, { "epoch": 0.6158743862299682, "grad_norm": 0.3845905065536499, "learning_rate": 6.790742654729782e-06, "loss": 0.3311327397823334, "step": 11414, "token_acc": 0.8811018131101813 }, { "epoch": 0.6159283440349647, "grad_norm": 0.3389107286930084, "learning_rate": 6.789087579914637e-06, "loss": 0.34434324502944946, "step": 11415, "token_acc": 0.8778598232659485 }, { "epoch": 0.6159823018399612, "grad_norm": 0.4198492765426636, "learning_rate": 6.78743260315992e-06, "loss": 0.33756935596466064, "step": 11416, "token_acc": 0.8818295739348371 }, { "epoch": 0.6160362596449577, "grad_norm": 0.45475706458091736, "learning_rate": 6.7857777245161704e-06, "loss": 0.35435885190963745, "step": 11417, "token_acc": 0.8725984251968504 }, { "epoch": 0.6160902174499542, "grad_norm": 0.2826787829399109, "learning_rate": 6.784122944033928e-06, "loss": 0.30499333143234253, "step": 11418, "token_acc": 0.8838376795426022 }, { "epoch": 0.6161441752549506, "grad_norm": 0.37590596079826355, "learning_rate": 6.782468261763728e-06, "loss": 0.37516194581985474, "step": 11419, "token_acc": 0.8691150075935385 }, { "epoch": 0.6161981330599471, "grad_norm": 0.38158732652664185, "learning_rate": 6.780813677756104e-06, "loss": 0.35230880975723267, "step": 11420, "token_acc": 0.8751941267824368 }, { "epoch": 0.6162520908649436, "grad_norm": 0.40584683418273926, "learning_rate": 6.7791591920615865e-06, "loss": 0.33008864521980286, "step": 11421, "token_acc": 0.8824773768950523 }, { "epoch": 0.6163060486699401, "grad_norm": 0.4091932475566864, "learning_rate": 6.777504804730699e-06, "loss": 0.34899574518203735, "step": 11422, "token_acc": 0.8762734143936904 }, { "epoch": 0.6163600064749366, "grad_norm": 0.38854527473449707, "learning_rate": 6.775850515813976e-06, "loss": 0.3328912854194641, "step": 11423, "token_acc": 0.8827864954826439 }, { "epoch": 0.6164139642799331, "grad_norm": 0.4852064847946167, "learning_rate": 6.7741963253619325e-06, "loss": 0.36554059386253357, "step": 11424, "token_acc": 0.8677472078452738 }, { "epoch": 0.6164679220849296, "grad_norm": 0.5079774856567383, "learning_rate": 6.772542233425088e-06, "loss": 0.38638874888420105, "step": 11425, "token_acc": 0.8656154810965777 }, { "epoch": 0.6165218798899261, "grad_norm": 0.4508265256881714, "learning_rate": 6.770888240053959e-06, "loss": 0.3372219204902649, "step": 11426, "token_acc": 0.8780918727915195 }, { "epoch": 0.6165758376949225, "grad_norm": 0.378439724445343, "learning_rate": 6.769234345299056e-06, "loss": 0.3501383066177368, "step": 11427, "token_acc": 0.8749048464856636 }, { "epoch": 0.616629795499919, "grad_norm": 0.3326588571071625, "learning_rate": 6.767580549210888e-06, "loss": 0.3082137703895569, "step": 11428, "token_acc": 0.891804501927621 }, { "epoch": 0.6166837533049155, "grad_norm": 0.48475003242492676, "learning_rate": 6.765926851839967e-06, "loss": 0.3828985095024109, "step": 11429, "token_acc": 0.8670923670923671 }, { "epoch": 0.616737711109912, "grad_norm": 0.46323254704475403, "learning_rate": 6.764273253236791e-06, "loss": 0.36064237356185913, "step": 11430, "token_acc": 0.8709507042253521 }, { "epoch": 0.6167916689149086, "grad_norm": 0.51847904920578, "learning_rate": 6.7626197534518635e-06, "loss": 0.33267903327941895, "step": 11431, "token_acc": 0.8776150627615062 }, { "epoch": 0.6168456267199051, "grad_norm": 0.3529947102069855, "learning_rate": 6.76096635253568e-06, "loss": 0.36234384775161743, "step": 11432, "token_acc": 0.8750642013353878 }, { "epoch": 0.6168995845249016, "grad_norm": 0.37818604707717896, "learning_rate": 6.759313050538735e-06, "loss": 0.33501797914505005, "step": 11433, "token_acc": 0.8770414360912404 }, { "epoch": 0.6169535423298981, "grad_norm": 0.37651100754737854, "learning_rate": 6.757659847511522e-06, "loss": 0.3139473795890808, "step": 11434, "token_acc": 0.8862810417928528 }, { "epoch": 0.6170075001348945, "grad_norm": 0.40530019998550415, "learning_rate": 6.756006743504524e-06, "loss": 0.27847549319267273, "step": 11435, "token_acc": 0.8981400755594304 }, { "epoch": 0.617061457939891, "grad_norm": 0.3354794681072235, "learning_rate": 6.7543537385682335e-06, "loss": 0.3284949064254761, "step": 11436, "token_acc": 0.8794272795779955 }, { "epoch": 0.6171154157448875, "grad_norm": 0.3118114173412323, "learning_rate": 6.752700832753129e-06, "loss": 0.314919650554657, "step": 11437, "token_acc": 0.88570232363408 }, { "epoch": 0.617169373549884, "grad_norm": 0.46247434616088867, "learning_rate": 6.75104802610969e-06, "loss": 0.40273457765579224, "step": 11438, "token_acc": 0.8657757117954678 }, { "epoch": 0.6172233313548805, "grad_norm": 0.3865000307559967, "learning_rate": 6.749395318688394e-06, "loss": 0.3357202410697937, "step": 11439, "token_acc": 0.8804474870596093 }, { "epoch": 0.617277289159877, "grad_norm": 0.4073324501514435, "learning_rate": 6.747742710539713e-06, "loss": 0.38152778148651123, "step": 11440, "token_acc": 0.8660275009461335 }, { "epoch": 0.6173312469648735, "grad_norm": 0.3921176493167877, "learning_rate": 6.746090201714115e-06, "loss": 0.325198233127594, "step": 11441, "token_acc": 0.883611903791276 }, { "epoch": 0.6173852047698699, "grad_norm": 0.3979125916957855, "learning_rate": 6.744437792262074e-06, "loss": 0.33167052268981934, "step": 11442, "token_acc": 0.8798545694926458 }, { "epoch": 0.6174391625748664, "grad_norm": 0.5028979182243347, "learning_rate": 6.7427854822340484e-06, "loss": 0.35691630840301514, "step": 11443, "token_acc": 0.8702511268512556 }, { "epoch": 0.6174931203798629, "grad_norm": 0.416028767824173, "learning_rate": 6.741133271680502e-06, "loss": 0.41607242822647095, "step": 11444, "token_acc": 0.8559322033898306 }, { "epoch": 0.6175470781848594, "grad_norm": 0.30710524320602417, "learning_rate": 6.739481160651891e-06, "loss": 0.3500930666923523, "step": 11445, "token_acc": 0.877895300575713 }, { "epoch": 0.6176010359898559, "grad_norm": 0.4636942744255066, "learning_rate": 6.7378291491986714e-06, "loss": 0.41687536239624023, "step": 11446, "token_acc": 0.8579312296589784 }, { "epoch": 0.6176549937948524, "grad_norm": 0.4392053484916687, "learning_rate": 6.736177237371294e-06, "loss": 0.4093513488769531, "step": 11447, "token_acc": 0.8564958592132506 }, { "epoch": 0.617708951599849, "grad_norm": 0.31779056787490845, "learning_rate": 6.734525425220206e-06, "loss": 0.33105048537254333, "step": 11448, "token_acc": 0.8842315369261478 }, { "epoch": 0.6177629094048455, "grad_norm": 0.37552720308303833, "learning_rate": 6.73287371279586e-06, "loss": 0.3737231492996216, "step": 11449, "token_acc": 0.8712131147540984 }, { "epoch": 0.6178168672098419, "grad_norm": 0.43325570225715637, "learning_rate": 6.731222100148693e-06, "loss": 0.3537861108779907, "step": 11450, "token_acc": 0.8745687870843107 }, { "epoch": 0.6178708250148384, "grad_norm": 0.47136178612709045, "learning_rate": 6.7295705873291494e-06, "loss": 0.37667790055274963, "step": 11451, "token_acc": 0.8731626964014192 }, { "epoch": 0.6179247828198349, "grad_norm": 0.31405574083328247, "learning_rate": 6.727919174387665e-06, "loss": 0.33017653226852417, "step": 11452, "token_acc": 0.8820826952526799 }, { "epoch": 0.6179787406248314, "grad_norm": 0.3291102349758148, "learning_rate": 6.7262678613746694e-06, "loss": 0.30334973335266113, "step": 11453, "token_acc": 0.8948110989183258 }, { "epoch": 0.6180326984298279, "grad_norm": 0.41220465302467346, "learning_rate": 6.72461664834059e-06, "loss": 0.37066203355789185, "step": 11454, "token_acc": 0.8705330102967899 }, { "epoch": 0.6180866562348244, "grad_norm": 0.36616721749305725, "learning_rate": 6.722965535335866e-06, "loss": 0.3387165367603302, "step": 11455, "token_acc": 0.8816196270070809 }, { "epoch": 0.6181406140398209, "grad_norm": 0.38691630959510803, "learning_rate": 6.7213145224109155e-06, "loss": 0.35215961933135986, "step": 11456, "token_acc": 0.8802656546489563 }, { "epoch": 0.6181945718448173, "grad_norm": 0.3827003836631775, "learning_rate": 6.71966360961616e-06, "loss": 0.40268224477767944, "step": 11457, "token_acc": 0.8592482577402034 }, { "epoch": 0.6182485296498138, "grad_norm": 0.36903440952301025, "learning_rate": 6.718012797002021e-06, "loss": 0.3263847231864929, "step": 11458, "token_acc": 0.8828263699846136 }, { "epoch": 0.6183024874548103, "grad_norm": 0.4210464656352997, "learning_rate": 6.716362084618908e-06, "loss": 0.3279661238193512, "step": 11459, "token_acc": 0.8777335984095428 }, { "epoch": 0.6183564452598068, "grad_norm": 0.40354934334754944, "learning_rate": 6.714711472517239e-06, "loss": 0.321717232465744, "step": 11460, "token_acc": 0.8874755381604696 }, { "epoch": 0.6184104030648033, "grad_norm": 0.32022762298583984, "learning_rate": 6.7130609607474185e-06, "loss": 0.37352269887924194, "step": 11461, "token_acc": 0.8674104521432766 }, { "epoch": 0.6184643608697998, "grad_norm": 0.4779043197631836, "learning_rate": 6.711410549359857e-06, "loss": 0.3843085467815399, "step": 11462, "token_acc": 0.8619055401209348 }, { "epoch": 0.6185183186747963, "grad_norm": 0.4366152584552765, "learning_rate": 6.709760238404957e-06, "loss": 0.3552478551864624, "step": 11463, "token_acc": 0.8716254060714685 }, { "epoch": 0.6185722764797928, "grad_norm": 0.44732123613357544, "learning_rate": 6.708110027933115e-06, "loss": 0.3592560291290283, "step": 11464, "token_acc": 0.873340454753548 }, { "epoch": 0.6186262342847892, "grad_norm": 0.33276090025901794, "learning_rate": 6.706459917994732e-06, "loss": 0.33318769931793213, "step": 11465, "token_acc": 0.8812515100265764 }, { "epoch": 0.6186801920897858, "grad_norm": 0.3729665279388428, "learning_rate": 6.7048099086402e-06, "loss": 0.31791406869888306, "step": 11466, "token_acc": 0.8867705783846451 }, { "epoch": 0.6187341498947823, "grad_norm": 0.39185813069343567, "learning_rate": 6.703159999919908e-06, "loss": 0.38421201705932617, "step": 11467, "token_acc": 0.870250606305578 }, { "epoch": 0.6187881076997788, "grad_norm": 0.49009764194488525, "learning_rate": 6.7015101918842485e-06, "loss": 0.39984023571014404, "step": 11468, "token_acc": 0.8572354561659374 }, { "epoch": 0.6188420655047753, "grad_norm": 0.3430490493774414, "learning_rate": 6.6998604845836025e-06, "loss": 0.30790019035339355, "step": 11469, "token_acc": 0.8889641929289507 }, { "epoch": 0.6188960233097718, "grad_norm": 0.3486231863498688, "learning_rate": 6.698210878068354e-06, "loss": 0.32463139295578003, "step": 11470, "token_acc": 0.8828473863934857 }, { "epoch": 0.6189499811147683, "grad_norm": 0.335163414478302, "learning_rate": 6.69656137238888e-06, "loss": 0.3231317400932312, "step": 11471, "token_acc": 0.8838168751663561 }, { "epoch": 0.6190039389197648, "grad_norm": 0.46412524580955505, "learning_rate": 6.6949119675955564e-06, "loss": 0.38091427087783813, "step": 11472, "token_acc": 0.8662463107056614 }, { "epoch": 0.6190578967247612, "grad_norm": 0.33264321088790894, "learning_rate": 6.693262663738751e-06, "loss": 0.34139806032180786, "step": 11473, "token_acc": 0.8773828756058158 }, { "epoch": 0.6191118545297577, "grad_norm": 0.370970219373703, "learning_rate": 6.691613460868842e-06, "loss": 0.36835241317749023, "step": 11474, "token_acc": 0.8703894195444526 }, { "epoch": 0.6191658123347542, "grad_norm": 0.44219478964805603, "learning_rate": 6.6899643590361915e-06, "loss": 0.3460072875022888, "step": 11475, "token_acc": 0.8729082515868436 }, { "epoch": 0.6192197701397507, "grad_norm": 0.4275352954864502, "learning_rate": 6.688315358291164e-06, "loss": 0.36801034212112427, "step": 11476, "token_acc": 0.8740326033261979 }, { "epoch": 0.6192737279447472, "grad_norm": 0.34535109996795654, "learning_rate": 6.6866664586841154e-06, "loss": 0.4084985852241516, "step": 11477, "token_acc": 0.8569012178619756 }, { "epoch": 0.6193276857497437, "grad_norm": 0.3780011534690857, "learning_rate": 6.685017660265406e-06, "loss": 0.372435599565506, "step": 11478, "token_acc": 0.8694578384099729 }, { "epoch": 0.6193816435547402, "grad_norm": 0.4609830677509308, "learning_rate": 6.683368963085388e-06, "loss": 0.3332917094230652, "step": 11479, "token_acc": 0.879476458825668 }, { "epoch": 0.6194356013597366, "grad_norm": 0.38573816418647766, "learning_rate": 6.68172036719441e-06, "loss": 0.3878428339958191, "step": 11480, "token_acc": 0.8646328578975172 }, { "epoch": 0.6194895591647331, "grad_norm": 0.3917553424835205, "learning_rate": 6.680071872642826e-06, "loss": 0.3440721333026886, "step": 11481, "token_acc": 0.8820687695368002 }, { "epoch": 0.6195435169697296, "grad_norm": 0.4123062491416931, "learning_rate": 6.678423479480978e-06, "loss": 0.35532835125923157, "step": 11482, "token_acc": 0.8724957069261591 }, { "epoch": 0.6195974747747262, "grad_norm": 0.4170357882976532, "learning_rate": 6.676775187759206e-06, "loss": 0.33253446221351624, "step": 11483, "token_acc": 0.8825668060352663 }, { "epoch": 0.6196514325797227, "grad_norm": 0.3552071750164032, "learning_rate": 6.67512699752785e-06, "loss": 0.32424524426460266, "step": 11484, "token_acc": 0.8836828309305373 }, { "epoch": 0.6197053903847192, "grad_norm": 0.4370326101779938, "learning_rate": 6.673478908837243e-06, "loss": 0.32797878980636597, "step": 11485, "token_acc": 0.8793956043956044 }, { "epoch": 0.6197593481897157, "grad_norm": 0.380173921585083, "learning_rate": 6.671830921737718e-06, "loss": 0.2842594385147095, "step": 11486, "token_acc": 0.8956661316211878 }, { "epoch": 0.6198133059947122, "grad_norm": 0.38953495025634766, "learning_rate": 6.670183036279607e-06, "loss": 0.3473239839076996, "step": 11487, "token_acc": 0.8834926704907584 }, { "epoch": 0.6198672637997086, "grad_norm": 0.46086177229881287, "learning_rate": 6.668535252513233e-06, "loss": 0.38250452280044556, "step": 11488, "token_acc": 0.8632493188010899 }, { "epoch": 0.6199212216047051, "grad_norm": 0.39065608382225037, "learning_rate": 6.66688757048892e-06, "loss": 0.32298052310943604, "step": 11489, "token_acc": 0.8850289495450786 }, { "epoch": 0.6199751794097016, "grad_norm": 0.46398648619651794, "learning_rate": 6.665239990256989e-06, "loss": 0.3222059905529022, "step": 11490, "token_acc": 0.8821016659547202 }, { "epoch": 0.6200291372146981, "grad_norm": 0.3838810324668884, "learning_rate": 6.663592511867753e-06, "loss": 0.36300379037857056, "step": 11491, "token_acc": 0.8704030078721654 }, { "epoch": 0.6200830950196946, "grad_norm": 0.4043976366519928, "learning_rate": 6.661945135371528e-06, "loss": 0.29083454608917236, "step": 11492, "token_acc": 0.8936519790888723 }, { "epoch": 0.6201370528246911, "grad_norm": 0.40961501002311707, "learning_rate": 6.660297860818622e-06, "loss": 0.2995682954788208, "step": 11493, "token_acc": 0.8958366822054332 }, { "epoch": 0.6201910106296876, "grad_norm": 0.5763615369796753, "learning_rate": 6.658650688259347e-06, "loss": 0.3578032851219177, "step": 11494, "token_acc": 0.8729415843300399 }, { "epoch": 0.6202449684346841, "grad_norm": 0.3746455907821655, "learning_rate": 6.657003617744005e-06, "loss": 0.3279711604118347, "step": 11495, "token_acc": 0.8920112404656765 }, { "epoch": 0.6202989262396805, "grad_norm": 0.38107120990753174, "learning_rate": 6.655356649322895e-06, "loss": 0.36208978295326233, "step": 11496, "token_acc": 0.8712859080261374 }, { "epoch": 0.620352884044677, "grad_norm": 0.45119529962539673, "learning_rate": 6.653709783046318e-06, "loss": 0.3966200351715088, "step": 11497, "token_acc": 0.8614912694667296 }, { "epoch": 0.6204068418496735, "grad_norm": 0.4233904182910919, "learning_rate": 6.652063018964569e-06, "loss": 0.3662317991256714, "step": 11498, "token_acc": 0.8699308755760369 }, { "epoch": 0.62046079965467, "grad_norm": 0.4135279953479767, "learning_rate": 6.6504163571279314e-06, "loss": 0.3613404929637909, "step": 11499, "token_acc": 0.8734010759115362 }, { "epoch": 0.6205147574596666, "grad_norm": 0.28822603821754456, "learning_rate": 6.6487697975867094e-06, "loss": 0.3670489192008972, "step": 11500, "token_acc": 0.8744689889549703 }, { "epoch": 0.6205687152646631, "grad_norm": 0.40298157930374146, "learning_rate": 6.647123340391177e-06, "loss": 0.33327585458755493, "step": 11501, "token_acc": 0.8783588558220168 }, { "epoch": 0.6206226730696596, "grad_norm": 0.38665249943733215, "learning_rate": 6.645476985591619e-06, "loss": 0.3406149744987488, "step": 11502, "token_acc": 0.8805486284289277 }, { "epoch": 0.620676630874656, "grad_norm": 0.3238816261291504, "learning_rate": 6.643830733238315e-06, "loss": 0.3203660845756531, "step": 11503, "token_acc": 0.8832147937411096 }, { "epoch": 0.6207305886796525, "grad_norm": 0.5331381559371948, "learning_rate": 6.642184583381541e-06, "loss": 0.39944928884506226, "step": 11504, "token_acc": 0.8620272837161113 }, { "epoch": 0.620784546484649, "grad_norm": 0.4145120084285736, "learning_rate": 6.640538536071571e-06, "loss": 0.2840530574321747, "step": 11505, "token_acc": 0.899365367180417 }, { "epoch": 0.6208385042896455, "grad_norm": 0.4178658127784729, "learning_rate": 6.63889259135867e-06, "loss": 0.36465999484062195, "step": 11506, "token_acc": 0.8721548921075968 }, { "epoch": 0.620892462094642, "grad_norm": 0.4389711320400238, "learning_rate": 6.637246749293112e-06, "loss": 0.3743070363998413, "step": 11507, "token_acc": 0.864939291525848 }, { "epoch": 0.6209464198996385, "grad_norm": 0.4817546010017395, "learning_rate": 6.635601009925159e-06, "loss": 0.34527087211608887, "step": 11508, "token_acc": 0.8722513089005236 }, { "epoch": 0.621000377704635, "grad_norm": 0.3874342739582062, "learning_rate": 6.6339553733050675e-06, "loss": 0.35200029611587524, "step": 11509, "token_acc": 0.8698549588396707 }, { "epoch": 0.6210543355096315, "grad_norm": 0.3569362461566925, "learning_rate": 6.6323098394831e-06, "loss": 0.3449936807155609, "step": 11510, "token_acc": 0.8759018759018758 }, { "epoch": 0.6211082933146279, "grad_norm": 0.427801251411438, "learning_rate": 6.630664408509508e-06, "loss": 0.34631937742233276, "step": 11511, "token_acc": 0.87645394530022 }, { "epoch": 0.6211622511196244, "grad_norm": 0.4023366868495941, "learning_rate": 6.629019080434536e-06, "loss": 0.35380038619041443, "step": 11512, "token_acc": 0.8728813559322034 }, { "epoch": 0.6212162089246209, "grad_norm": 0.3719199001789093, "learning_rate": 6.627373855308444e-06, "loss": 0.3817938268184662, "step": 11513, "token_acc": 0.8660601174709202 }, { "epoch": 0.6212701667296174, "grad_norm": 0.48548173904418945, "learning_rate": 6.625728733181468e-06, "loss": 0.38769981265068054, "step": 11514, "token_acc": 0.8628750872295883 }, { "epoch": 0.6213241245346139, "grad_norm": 0.3566909730434418, "learning_rate": 6.624083714103855e-06, "loss": 0.3335672914981842, "step": 11515, "token_acc": 0.8800096571704491 }, { "epoch": 0.6213780823396104, "grad_norm": 0.45112302899360657, "learning_rate": 6.62243879812584e-06, "loss": 0.3642805516719818, "step": 11516, "token_acc": 0.8692268651666385 }, { "epoch": 0.621432040144607, "grad_norm": 0.39674097299575806, "learning_rate": 6.620793985297659e-06, "loss": 0.3635507822036743, "step": 11517, "token_acc": 0.8704253214638972 }, { "epoch": 0.6214859979496035, "grad_norm": 0.3737102150917053, "learning_rate": 6.619149275669543e-06, "loss": 0.3451107442378998, "step": 11518, "token_acc": 0.8811717782973456 }, { "epoch": 0.6215399557545999, "grad_norm": 0.4566182792186737, "learning_rate": 6.6175046692917246e-06, "loss": 0.33197322487831116, "step": 11519, "token_acc": 0.8806998939554613 }, { "epoch": 0.6215939135595964, "grad_norm": 0.29441192746162415, "learning_rate": 6.615860166214427e-06, "loss": 0.32831764221191406, "step": 11520, "token_acc": 0.8816611539875046 }, { "epoch": 0.6216478713645929, "grad_norm": 0.3374966084957123, "learning_rate": 6.614215766487874e-06, "loss": 0.3228928744792938, "step": 11521, "token_acc": 0.8832853025936599 }, { "epoch": 0.6217018291695894, "grad_norm": 0.44937461614608765, "learning_rate": 6.612571470162283e-06, "loss": 0.3925946056842804, "step": 11522, "token_acc": 0.8628701594533029 }, { "epoch": 0.6217557869745859, "grad_norm": 0.448403000831604, "learning_rate": 6.610927277287874e-06, "loss": 0.3569171130657196, "step": 11523, "token_acc": 0.8698330361484844 }, { "epoch": 0.6218097447795824, "grad_norm": 0.4067728817462921, "learning_rate": 6.609283187914857e-06, "loss": 0.3576645851135254, "step": 11524, "token_acc": 0.8747371583057976 }, { "epoch": 0.6218637025845789, "grad_norm": 0.43666011095046997, "learning_rate": 6.607639202093442e-06, "loss": 0.3687532842159271, "step": 11525, "token_acc": 0.8746134382906944 }, { "epoch": 0.6219176603895753, "grad_norm": 0.37100544571876526, "learning_rate": 6.6059953198738394e-06, "loss": 0.34016573429107666, "step": 11526, "token_acc": 0.8815536848323734 }, { "epoch": 0.6219716181945718, "grad_norm": 0.40178579092025757, "learning_rate": 6.604351541306248e-06, "loss": 0.34954750537872314, "step": 11527, "token_acc": 0.871054114994363 }, { "epoch": 0.6220255759995683, "grad_norm": 0.4921732246875763, "learning_rate": 6.602707866440874e-06, "loss": 0.38077032566070557, "step": 11528, "token_acc": 0.8647854203409759 }, { "epoch": 0.6220795338045648, "grad_norm": 0.3304426670074463, "learning_rate": 6.60106429532791e-06, "loss": 0.3433242440223694, "step": 11529, "token_acc": 0.8820028464225643 }, { "epoch": 0.6221334916095613, "grad_norm": 0.43652838468551636, "learning_rate": 6.5994208280175535e-06, "loss": 0.3357827365398407, "step": 11530, "token_acc": 0.8774901960784314 }, { "epoch": 0.6221874494145578, "grad_norm": 0.49780723452568054, "learning_rate": 6.597777464559989e-06, "loss": 0.3874931335449219, "step": 11531, "token_acc": 0.8691525423728813 }, { "epoch": 0.6222414072195543, "grad_norm": 0.40557190775871277, "learning_rate": 6.596134205005413e-06, "loss": 0.3635691702365875, "step": 11532, "token_acc": 0.8754029432375613 }, { "epoch": 0.6222953650245509, "grad_norm": 0.4197823703289032, "learning_rate": 6.594491049404007e-06, "loss": 0.3560681939125061, "step": 11533, "token_acc": 0.8705860939206407 }, { "epoch": 0.6223493228295472, "grad_norm": 0.4742770493030548, "learning_rate": 6.592847997805954e-06, "loss": 0.3971361815929413, "step": 11534, "token_acc": 0.8624070852443461 }, { "epoch": 0.6224032806345438, "grad_norm": 0.4671068489551544, "learning_rate": 6.59120505026143e-06, "loss": 0.3405444025993347, "step": 11535, "token_acc": 0.8796013289036545 }, { "epoch": 0.6224572384395403, "grad_norm": 0.3910369873046875, "learning_rate": 6.58956220682061e-06, "loss": 0.3704793453216553, "step": 11536, "token_acc": 0.8688736822637322 }, { "epoch": 0.6225111962445368, "grad_norm": 0.39982903003692627, "learning_rate": 6.587919467533667e-06, "loss": 0.3027113676071167, "step": 11537, "token_acc": 0.8913590048961229 }, { "epoch": 0.6225651540495333, "grad_norm": 0.39313387870788574, "learning_rate": 6.586276832450765e-06, "loss": 0.4146331250667572, "step": 11538, "token_acc": 0.8579205225911812 }, { "epoch": 0.6226191118545298, "grad_norm": 0.5621789693832397, "learning_rate": 6.584634301622079e-06, "loss": 0.3939729332923889, "step": 11539, "token_acc": 0.8629768941429339 }, { "epoch": 0.6226730696595263, "grad_norm": 0.44554996490478516, "learning_rate": 6.582991875097766e-06, "loss": 0.3033137321472168, "step": 11540, "token_acc": 0.8895296449810777 }, { "epoch": 0.6227270274645228, "grad_norm": 0.5398468375205994, "learning_rate": 6.581349552927987e-06, "loss": 0.4215414524078369, "step": 11541, "token_acc": 0.8548363846042958 }, { "epoch": 0.6227809852695192, "grad_norm": 0.3278479278087616, "learning_rate": 6.579707335162896e-06, "loss": 0.3574981689453125, "step": 11542, "token_acc": 0.8711909318416017 }, { "epoch": 0.6228349430745157, "grad_norm": 0.5170255303382874, "learning_rate": 6.578065221852648e-06, "loss": 0.3839365839958191, "step": 11543, "token_acc": 0.8695113314447592 }, { "epoch": 0.6228889008795122, "grad_norm": 0.4709700345993042, "learning_rate": 6.576423213047388e-06, "loss": 0.35773396492004395, "step": 11544, "token_acc": 0.8704453441295547 }, { "epoch": 0.6229428586845087, "grad_norm": 0.44983136653900146, "learning_rate": 6.574781308797269e-06, "loss": 0.3559473156929016, "step": 11545, "token_acc": 0.869140937450008 }, { "epoch": 0.6229968164895052, "grad_norm": 0.5206918120384216, "learning_rate": 6.573139509152433e-06, "loss": 0.33827975392341614, "step": 11546, "token_acc": 0.8840125391849529 }, { "epoch": 0.6230507742945017, "grad_norm": 0.3525572121143341, "learning_rate": 6.571497814163017e-06, "loss": 0.3422146737575531, "step": 11547, "token_acc": 0.8810184626523471 }, { "epoch": 0.6231047320994982, "grad_norm": 0.29783689975738525, "learning_rate": 6.569856223879161e-06, "loss": 0.3169288635253906, "step": 11548, "token_acc": 0.8857251811363925 }, { "epoch": 0.6231586899044946, "grad_norm": 0.4745147228240967, "learning_rate": 6.568214738350995e-06, "loss": 0.3359597325325012, "step": 11549, "token_acc": 0.8772720663079832 }, { "epoch": 0.6232126477094911, "grad_norm": 0.41050195693969727, "learning_rate": 6.566573357628654e-06, "loss": 0.30440667271614075, "step": 11550, "token_acc": 0.8913288288288288 }, { "epoch": 0.6232666055144876, "grad_norm": 0.4660256505012512, "learning_rate": 6.5649320817622604e-06, "loss": 0.33242201805114746, "step": 11551, "token_acc": 0.8814800539603006 }, { "epoch": 0.6233205633194842, "grad_norm": 0.5277746319770813, "learning_rate": 6.5632909108019434e-06, "loss": 0.3388531804084778, "step": 11552, "token_acc": 0.879774897680764 }, { "epoch": 0.6233745211244807, "grad_norm": 0.47535812854766846, "learning_rate": 6.561649844797822e-06, "loss": 0.37979763746261597, "step": 11553, "token_acc": 0.8671338472319552 }, { "epoch": 0.6234284789294772, "grad_norm": 0.42865660786628723, "learning_rate": 6.560008883800013e-06, "loss": 0.32196423411369324, "step": 11554, "token_acc": 0.8827685652487383 }, { "epoch": 0.6234824367344737, "grad_norm": 0.4240790605545044, "learning_rate": 6.558368027858633e-06, "loss": 0.3312162458896637, "step": 11555, "token_acc": 0.8791355389541089 }, { "epoch": 0.6235363945394702, "grad_norm": 0.45245131850242615, "learning_rate": 6.55672727702379e-06, "loss": 0.3839718699455261, "step": 11556, "token_acc": 0.8661852166525064 }, { "epoch": 0.6235903523444666, "grad_norm": 0.39789503812789917, "learning_rate": 6.555086631345591e-06, "loss": 0.37131500244140625, "step": 11557, "token_acc": 0.8718871057000553 }, { "epoch": 0.6236443101494631, "grad_norm": 0.49659430980682373, "learning_rate": 6.553446090874147e-06, "loss": 0.4135924279689789, "step": 11558, "token_acc": 0.8556317335945152 }, { "epoch": 0.6236982679544596, "grad_norm": 0.48737001419067383, "learning_rate": 6.551805655659559e-06, "loss": 0.32865411043167114, "step": 11559, "token_acc": 0.8793774319066148 }, { "epoch": 0.6237522257594561, "grad_norm": 0.41764095425605774, "learning_rate": 6.55016532575192e-06, "loss": 0.3402789831161499, "step": 11560, "token_acc": 0.8791258820851354 }, { "epoch": 0.6238061835644526, "grad_norm": 0.3460250198841095, "learning_rate": 6.54852510120133e-06, "loss": 0.35401102900505066, "step": 11561, "token_acc": 0.8746889181671791 }, { "epoch": 0.6238601413694491, "grad_norm": 0.3379003703594208, "learning_rate": 6.5468849820578785e-06, "loss": 0.2882653474807739, "step": 11562, "token_acc": 0.8960463531015678 }, { "epoch": 0.6239140991744456, "grad_norm": 0.4295441806316376, "learning_rate": 6.5452449683716544e-06, "loss": 0.33446013927459717, "step": 11563, "token_acc": 0.8762581409117821 }, { "epoch": 0.6239680569794421, "grad_norm": 0.4311131238937378, "learning_rate": 6.54360506019274e-06, "loss": 0.4135446846485138, "step": 11564, "token_acc": 0.8581048581048581 }, { "epoch": 0.6240220147844385, "grad_norm": 0.356457382440567, "learning_rate": 6.541965257571227e-06, "loss": 0.354544997215271, "step": 11565, "token_acc": 0.875452115112439 }, { "epoch": 0.624075972589435, "grad_norm": 0.4283556044101715, "learning_rate": 6.540325560557188e-06, "loss": 0.34001338481903076, "step": 11566, "token_acc": 0.8821169277799007 }, { "epoch": 0.6241299303944315, "grad_norm": 0.4324376583099365, "learning_rate": 6.5386859692007e-06, "loss": 0.3255345821380615, "step": 11567, "token_acc": 0.8875269396551724 }, { "epoch": 0.624183888199428, "grad_norm": 0.4208853840827942, "learning_rate": 6.5370464835518365e-06, "loss": 0.31962132453918457, "step": 11568, "token_acc": 0.8866605335786568 }, { "epoch": 0.6242378460044246, "grad_norm": 0.2759954333305359, "learning_rate": 6.535407103660667e-06, "loss": 0.34525227546691895, "step": 11569, "token_acc": 0.8827670645852465 }, { "epoch": 0.6242918038094211, "grad_norm": 0.5320701599121094, "learning_rate": 6.533767829577252e-06, "loss": 0.35996368527412415, "step": 11570, "token_acc": 0.8759097525473072 }, { "epoch": 0.6243457616144176, "grad_norm": 0.5046379566192627, "learning_rate": 6.532128661351663e-06, "loss": 0.38837605714797974, "step": 11571, "token_acc": 0.8659689399054693 }, { "epoch": 0.624399719419414, "grad_norm": 0.48270416259765625, "learning_rate": 6.530489599033956e-06, "loss": 0.382528156042099, "step": 11572, "token_acc": 0.8687525396180414 }, { "epoch": 0.6244536772244105, "grad_norm": 0.42750880122184753, "learning_rate": 6.52885064267419e-06, "loss": 0.337999165058136, "step": 11573, "token_acc": 0.8738232614637109 }, { "epoch": 0.624507635029407, "grad_norm": 0.34349605441093445, "learning_rate": 6.527211792322413e-06, "loss": 0.3578464984893799, "step": 11574, "token_acc": 0.875871137409599 }, { "epoch": 0.6245615928344035, "grad_norm": 0.327364444732666, "learning_rate": 6.525573048028679e-06, "loss": 0.3884051442146301, "step": 11575, "token_acc": 0.8668812507184734 }, { "epoch": 0.6246155506394, "grad_norm": 0.41715577244758606, "learning_rate": 6.523934409843033e-06, "loss": 0.3321305513381958, "step": 11576, "token_acc": 0.8785004516711834 }, { "epoch": 0.6246695084443965, "grad_norm": 0.3517376184463501, "learning_rate": 6.522295877815521e-06, "loss": 0.32992327213287354, "step": 11577, "token_acc": 0.8823969104273078 }, { "epoch": 0.624723466249393, "grad_norm": 0.3173430860042572, "learning_rate": 6.520657451996182e-06, "loss": 0.3355438709259033, "step": 11578, "token_acc": 0.8812901661210137 }, { "epoch": 0.6247774240543895, "grad_norm": 0.4294498562812805, "learning_rate": 6.5190191324350515e-06, "loss": 0.2926284670829773, "step": 11579, "token_acc": 0.8943298969072165 }, { "epoch": 0.6248313818593859, "grad_norm": 0.45180314779281616, "learning_rate": 6.517380919182166e-06, "loss": 0.3362196981906891, "step": 11580, "token_acc": 0.880275144078825 }, { "epoch": 0.6248853396643824, "grad_norm": 0.34769341349601746, "learning_rate": 6.515742812287556e-06, "loss": 0.31566178798675537, "step": 11581, "token_acc": 0.8870441322548602 }, { "epoch": 0.6249392974693789, "grad_norm": 0.5232882499694824, "learning_rate": 6.514104811801245e-06, "loss": 0.3446744680404663, "step": 11582, "token_acc": 0.8736401673640167 }, { "epoch": 0.6249932552743754, "grad_norm": 0.3561307191848755, "learning_rate": 6.51246691777326e-06, "loss": 0.3442401885986328, "step": 11583, "token_acc": 0.8758658908284843 }, { "epoch": 0.6250472130793719, "grad_norm": 0.4471670687198639, "learning_rate": 6.5108291302536245e-06, "loss": 0.35620182752609253, "step": 11584, "token_acc": 0.8731751824817519 }, { "epoch": 0.6251011708843685, "grad_norm": 0.46299415826797485, "learning_rate": 6.509191449292351e-06, "loss": 0.4130968451499939, "step": 11585, "token_acc": 0.8595904722106142 }, { "epoch": 0.625155128689365, "grad_norm": 0.44589459896087646, "learning_rate": 6.507553874939457e-06, "loss": 0.3625832796096802, "step": 11586, "token_acc": 0.8673638778220452 }, { "epoch": 0.6252090864943614, "grad_norm": 0.36623936891555786, "learning_rate": 6.505916407244953e-06, "loss": 0.3501048684120178, "step": 11587, "token_acc": 0.8727944193680756 }, { "epoch": 0.6252630442993579, "grad_norm": 0.3651779592037201, "learning_rate": 6.504279046258847e-06, "loss": 0.3608834147453308, "step": 11588, "token_acc": 0.8737387148167818 }, { "epoch": 0.6253170021043544, "grad_norm": 0.3976840078830719, "learning_rate": 6.502641792031138e-06, "loss": 0.3714570701122284, "step": 11589, "token_acc": 0.8725885096459615 }, { "epoch": 0.6253709599093509, "grad_norm": 0.3962946832180023, "learning_rate": 6.501004644611837e-06, "loss": 0.3713420629501343, "step": 11590, "token_acc": 0.870956758597208 }, { "epoch": 0.6254249177143474, "grad_norm": 0.40254610776901245, "learning_rate": 6.499367604050938e-06, "loss": 0.3379923701286316, "step": 11591, "token_acc": 0.8788225207202057 }, { "epoch": 0.6254788755193439, "grad_norm": 0.4521408975124359, "learning_rate": 6.497730670398435e-06, "loss": 0.33384135365486145, "step": 11592, "token_acc": 0.8812180820881407 }, { "epoch": 0.6255328333243404, "grad_norm": 0.4224811792373657, "learning_rate": 6.496093843704323e-06, "loss": 0.33319491147994995, "step": 11593, "token_acc": 0.8799891392886234 }, { "epoch": 0.6255867911293369, "grad_norm": 0.47674649953842163, "learning_rate": 6.494457124018582e-06, "loss": 0.33194485306739807, "step": 11594, "token_acc": 0.8859697135559205 }, { "epoch": 0.6256407489343333, "grad_norm": 0.3967076241970062, "learning_rate": 6.492820511391206e-06, "loss": 0.37007999420166016, "step": 11595, "token_acc": 0.8709171302239338 }, { "epoch": 0.6256947067393298, "grad_norm": 0.510715901851654, "learning_rate": 6.491184005872166e-06, "loss": 0.37116581201553345, "step": 11596, "token_acc": 0.8703703703703703 }, { "epoch": 0.6257486645443263, "grad_norm": 0.3531462848186493, "learning_rate": 6.489547607511453e-06, "loss": 0.3316453695297241, "step": 11597, "token_acc": 0.8839 }, { "epoch": 0.6258026223493228, "grad_norm": 0.38673701882362366, "learning_rate": 6.4879113163590345e-06, "loss": 0.35666245222091675, "step": 11598, "token_acc": 0.8721426394391953 }, { "epoch": 0.6258565801543193, "grad_norm": 0.4136943817138672, "learning_rate": 6.4862751324648865e-06, "loss": 0.2761223614215851, "step": 11599, "token_acc": 0.8980641667794774 }, { "epoch": 0.6259105379593158, "grad_norm": 0.43305864930152893, "learning_rate": 6.484639055878974e-06, "loss": 0.3508497476577759, "step": 11600, "token_acc": 0.8738698010849909 }, { "epoch": 0.6259644957643123, "grad_norm": 0.5242360234260559, "learning_rate": 6.483003086651265e-06, "loss": 0.351455420255661, "step": 11601, "token_acc": 0.8731394946348218 }, { "epoch": 0.6260184535693089, "grad_norm": 0.38207611441612244, "learning_rate": 6.481367224831717e-06, "loss": 0.34519439935684204, "step": 11602, "token_acc": 0.8778398953253241 }, { "epoch": 0.6260724113743052, "grad_norm": 0.3800928294658661, "learning_rate": 6.4797314704702956e-06, "loss": 0.37504467368125916, "step": 11603, "token_acc": 0.8735845171916821 }, { "epoch": 0.6261263691793018, "grad_norm": 0.36013713479042053, "learning_rate": 6.478095823616952e-06, "loss": 0.36942002177238464, "step": 11604, "token_acc": 0.8681919920417807 }, { "epoch": 0.6261803269842983, "grad_norm": 0.4098169803619385, "learning_rate": 6.47646028432164e-06, "loss": 0.36251524090766907, "step": 11605, "token_acc": 0.8732394366197183 }, { "epoch": 0.6262342847892948, "grad_norm": 0.44955992698669434, "learning_rate": 6.474824852634307e-06, "loss": 0.35279983282089233, "step": 11606, "token_acc": 0.8758109360518999 }, { "epoch": 0.6262882425942913, "grad_norm": 0.25038036704063416, "learning_rate": 6.4731895286048995e-06, "loss": 0.33131682872772217, "step": 11607, "token_acc": 0.8789202064311235 }, { "epoch": 0.6263422003992878, "grad_norm": 0.3430149555206299, "learning_rate": 6.471554312283362e-06, "loss": 0.30286669731140137, "step": 11608, "token_acc": 0.8874134764797288 }, { "epoch": 0.6263961582042843, "grad_norm": 0.33742108941078186, "learning_rate": 6.4699192037196265e-06, "loss": 0.3241233825683594, "step": 11609, "token_acc": 0.8858418367346939 }, { "epoch": 0.6264501160092807, "grad_norm": 0.360912561416626, "learning_rate": 6.468284202963637e-06, "loss": 0.30622124671936035, "step": 11610, "token_acc": 0.8831695331695332 }, { "epoch": 0.6265040738142772, "grad_norm": 0.2947980761528015, "learning_rate": 6.466649310065324e-06, "loss": 0.32699471712112427, "step": 11611, "token_acc": 0.8835797837122048 }, { "epoch": 0.6265580316192737, "grad_norm": 0.45574745535850525, "learning_rate": 6.4650145250746134e-06, "loss": 0.38704001903533936, "step": 11612, "token_acc": 0.8683590208522212 }, { "epoch": 0.6266119894242702, "grad_norm": 0.31977781653404236, "learning_rate": 6.463379848041435e-06, "loss": 0.35936033725738525, "step": 11613, "token_acc": 0.8726690941189451 }, { "epoch": 0.6266659472292667, "grad_norm": 0.3808014392852783, "learning_rate": 6.461745279015709e-06, "loss": 0.3499477505683899, "step": 11614, "token_acc": 0.8774912735052359 }, { "epoch": 0.6267199050342632, "grad_norm": 0.4456857442855835, "learning_rate": 6.46011081804735e-06, "loss": 0.37693697214126587, "step": 11615, "token_acc": 0.8678608133901042 }, { "epoch": 0.6267738628392597, "grad_norm": 0.43101632595062256, "learning_rate": 6.458476465186285e-06, "loss": 0.38777896761894226, "step": 11616, "token_acc": 0.8635390652871923 }, { "epoch": 0.6268278206442562, "grad_norm": 0.39903438091278076, "learning_rate": 6.4568422204824225e-06, "loss": 0.33118635416030884, "step": 11617, "token_acc": 0.8821820103024699 }, { "epoch": 0.6268817784492526, "grad_norm": 0.4157073199748993, "learning_rate": 6.455208083985668e-06, "loss": 0.3669508099555969, "step": 11618, "token_acc": 0.8669833729216152 }, { "epoch": 0.6269357362542491, "grad_norm": 0.5581262111663818, "learning_rate": 6.45357405574593e-06, "loss": 0.4491085410118103, "step": 11619, "token_acc": 0.8500757193336699 }, { "epoch": 0.6269896940592457, "grad_norm": 0.3851716220378876, "learning_rate": 6.4519401358131106e-06, "loss": 0.3765416145324707, "step": 11620, "token_acc": 0.8667290469188611 }, { "epoch": 0.6270436518642422, "grad_norm": 0.3923340439796448, "learning_rate": 6.450306324237108e-06, "loss": 0.3257727026939392, "step": 11621, "token_acc": 0.8814142678347935 }, { "epoch": 0.6270976096692387, "grad_norm": 0.37000641226768494, "learning_rate": 6.448672621067823e-06, "loss": 0.3594159781932831, "step": 11622, "token_acc": 0.8705783738474434 }, { "epoch": 0.6271515674742352, "grad_norm": 0.38725829124450684, "learning_rate": 6.447039026355147e-06, "loss": 0.33847978711128235, "step": 11623, "token_acc": 0.883493007378042 }, { "epoch": 0.6272055252792317, "grad_norm": 0.26501980423927307, "learning_rate": 6.4454055401489655e-06, "loss": 0.3410877585411072, "step": 11624, "token_acc": 0.8786112833230006 }, { "epoch": 0.6272594830842282, "grad_norm": 0.44333088397979736, "learning_rate": 6.443772162499171e-06, "loss": 0.28734469413757324, "step": 11625, "token_acc": 0.8951062976333735 }, { "epoch": 0.6273134408892246, "grad_norm": 0.3011534810066223, "learning_rate": 6.44213889345564e-06, "loss": 0.339363694190979, "step": 11626, "token_acc": 0.8765710220535926 }, { "epoch": 0.6273673986942211, "grad_norm": 0.31070253252983093, "learning_rate": 6.440505733068259e-06, "loss": 0.30413955450057983, "step": 11627, "token_acc": 0.8900552486187845 }, { "epoch": 0.6274213564992176, "grad_norm": 0.3929961919784546, "learning_rate": 6.438872681386895e-06, "loss": 0.31654787063598633, "step": 11628, "token_acc": 0.8882048411921086 }, { "epoch": 0.6274753143042141, "grad_norm": 0.4096187651157379, "learning_rate": 6.4372397384614296e-06, "loss": 0.32970571517944336, "step": 11629, "token_acc": 0.8823019587450165 }, { "epoch": 0.6275292721092106, "grad_norm": 0.3670101463794708, "learning_rate": 6.435606904341729e-06, "loss": 0.35750892758369446, "step": 11630, "token_acc": 0.8739455863134133 }, { "epoch": 0.6275832299142071, "grad_norm": 0.30102959275245667, "learning_rate": 6.4339741790776596e-06, "loss": 0.28434914350509644, "step": 11631, "token_acc": 0.8931658407283624 }, { "epoch": 0.6276371877192036, "grad_norm": 0.36616548895835876, "learning_rate": 6.432341562719087e-06, "loss": 0.374551922082901, "step": 11632, "token_acc": 0.8633254406735069 }, { "epoch": 0.6276911455242, "grad_norm": 0.36467576026916504, "learning_rate": 6.430709055315866e-06, "loss": 0.38970720767974854, "step": 11633, "token_acc": 0.8625127980108235 }, { "epoch": 0.6277451033291965, "grad_norm": 0.3700209856033325, "learning_rate": 6.429076656917854e-06, "loss": 0.31275704503059387, "step": 11634, "token_acc": 0.8901618215666619 }, { "epoch": 0.627799061134193, "grad_norm": 0.42137643694877625, "learning_rate": 6.427444367574907e-06, "loss": 0.3200181722640991, "step": 11635, "token_acc": 0.8816390633923472 }, { "epoch": 0.6278530189391895, "grad_norm": 0.4021230638027191, "learning_rate": 6.425812187336874e-06, "loss": 0.3283122777938843, "step": 11636, "token_acc": 0.878392254657474 }, { "epoch": 0.627906976744186, "grad_norm": 0.5503629446029663, "learning_rate": 6.4241801162536e-06, "loss": 0.3638724088668823, "step": 11637, "token_acc": 0.8760078838917756 }, { "epoch": 0.6279609345491826, "grad_norm": 0.33580106496810913, "learning_rate": 6.422548154374928e-06, "loss": 0.3447505831718445, "step": 11638, "token_acc": 0.8782403770620582 }, { "epoch": 0.6280148923541791, "grad_norm": 0.4434106647968292, "learning_rate": 6.420916301750699e-06, "loss": 0.32707905769348145, "step": 11639, "token_acc": 0.8845096241979835 }, { "epoch": 0.6280688501591756, "grad_norm": 0.41003045439720154, "learning_rate": 6.41928455843075e-06, "loss": 0.3368047773838043, "step": 11640, "token_acc": 0.8821317639365702 }, { "epoch": 0.628122807964172, "grad_norm": 0.3818203806877136, "learning_rate": 6.417652924464908e-06, "loss": 0.33523988723754883, "step": 11641, "token_acc": 0.8838060384263495 }, { "epoch": 0.6281767657691685, "grad_norm": 0.39273306727409363, "learning_rate": 6.416021399903009e-06, "loss": 0.33913299441337585, "step": 11642, "token_acc": 0.8762999452654625 }, { "epoch": 0.628230723574165, "grad_norm": 0.40697774291038513, "learning_rate": 6.4143899847948795e-06, "loss": 0.30364900827407837, "step": 11643, "token_acc": 0.8908648446274804 }, { "epoch": 0.6282846813791615, "grad_norm": 0.4102433919906616, "learning_rate": 6.4127586791903405e-06, "loss": 0.37678104639053345, "step": 11644, "token_acc": 0.8705480352214483 }, { "epoch": 0.628338639184158, "grad_norm": 0.4313523769378662, "learning_rate": 6.41112748313921e-06, "loss": 0.3649313449859619, "step": 11645, "token_acc": 0.871229557843731 }, { "epoch": 0.6283925969891545, "grad_norm": 0.3140735328197479, "learning_rate": 6.409496396691308e-06, "loss": 0.31551748514175415, "step": 11646, "token_acc": 0.8863363363363364 }, { "epoch": 0.628446554794151, "grad_norm": 0.47918060421943665, "learning_rate": 6.40786541989644e-06, "loss": 0.3122515082359314, "step": 11647, "token_acc": 0.8883217639853002 }, { "epoch": 0.6285005125991475, "grad_norm": 0.37383222579956055, "learning_rate": 6.406234552804425e-06, "loss": 0.36129528284072876, "step": 11648, "token_acc": 0.8769381746810598 }, { "epoch": 0.6285544704041439, "grad_norm": 0.4043578505516052, "learning_rate": 6.404603795465067e-06, "loss": 0.31115275621414185, "step": 11649, "token_acc": 0.889821029082774 }, { "epoch": 0.6286084282091404, "grad_norm": 0.4296042025089264, "learning_rate": 6.402973147928166e-06, "loss": 0.35662245750427246, "step": 11650, "token_acc": 0.8734139864266746 }, { "epoch": 0.6286623860141369, "grad_norm": 0.29959991574287415, "learning_rate": 6.401342610243524e-06, "loss": 0.3086027503013611, "step": 11651, "token_acc": 0.8873606486173448 }, { "epoch": 0.6287163438191334, "grad_norm": 0.4290509819984436, "learning_rate": 6.399712182460935e-06, "loss": 0.3230838179588318, "step": 11652, "token_acc": 0.8835902607693416 }, { "epoch": 0.62877030162413, "grad_norm": 0.3576243817806244, "learning_rate": 6.3980818646301926e-06, "loss": 0.38968998193740845, "step": 11653, "token_acc": 0.8704865231395151 }, { "epoch": 0.6288242594291265, "grad_norm": 0.3599371016025543, "learning_rate": 6.396451656801083e-06, "loss": 0.3459901511669159, "step": 11654, "token_acc": 0.8763497498024756 }, { "epoch": 0.628878217234123, "grad_norm": 0.3497902452945709, "learning_rate": 6.3948215590233986e-06, "loss": 0.32123303413391113, "step": 11655, "token_acc": 0.8847563458667028 }, { "epoch": 0.6289321750391194, "grad_norm": 0.42659246921539307, "learning_rate": 6.39319157134692e-06, "loss": 0.3405889570713043, "step": 11656, "token_acc": 0.8794742729306487 }, { "epoch": 0.6289861328441159, "grad_norm": 0.3813205063343048, "learning_rate": 6.391561693821426e-06, "loss": 0.38280555605888367, "step": 11657, "token_acc": 0.8677563150074294 }, { "epoch": 0.6290400906491124, "grad_norm": 0.4052734076976776, "learning_rate": 6.389931926496693e-06, "loss": 0.3172735571861267, "step": 11658, "token_acc": 0.891280947255113 }, { "epoch": 0.6290940484541089, "grad_norm": 0.35810163617134094, "learning_rate": 6.388302269422492e-06, "loss": 0.3245384097099304, "step": 11659, "token_acc": 0.8835305829001217 }, { "epoch": 0.6291480062591054, "grad_norm": 0.38824203610420227, "learning_rate": 6.386672722648594e-06, "loss": 0.32476383447647095, "step": 11660, "token_acc": 0.8808427514999302 }, { "epoch": 0.6292019640641019, "grad_norm": 0.4515778720378876, "learning_rate": 6.385043286224764e-06, "loss": 0.41955095529556274, "step": 11661, "token_acc": 0.8582352041596575 }, { "epoch": 0.6292559218690984, "grad_norm": 0.44491147994995117, "learning_rate": 6.383413960200766e-06, "loss": 0.3517152667045593, "step": 11662, "token_acc": 0.8740984634681719 }, { "epoch": 0.6293098796740949, "grad_norm": 0.37224137783050537, "learning_rate": 6.381784744626358e-06, "loss": 0.32300153374671936, "step": 11663, "token_acc": 0.8801386825160971 }, { "epoch": 0.6293638374790913, "grad_norm": 0.2979153096675873, "learning_rate": 6.3801556395512965e-06, "loss": 0.3621521592140198, "step": 11664, "token_acc": 0.8756084369929692 }, { "epoch": 0.6294177952840878, "grad_norm": 0.3594704270362854, "learning_rate": 6.378526645025334e-06, "loss": 0.32792380452156067, "step": 11665, "token_acc": 0.8799148650162428 }, { "epoch": 0.6294717530890843, "grad_norm": 0.39204853773117065, "learning_rate": 6.376897761098217e-06, "loss": 0.3055576682090759, "step": 11666, "token_acc": 0.8877086494688923 }, { "epoch": 0.6295257108940808, "grad_norm": 0.4445127248764038, "learning_rate": 6.375268987819694e-06, "loss": 0.4058960974216461, "step": 11667, "token_acc": 0.8618876635034625 }, { "epoch": 0.6295796686990773, "grad_norm": 0.3500071167945862, "learning_rate": 6.373640325239506e-06, "loss": 0.35099339485168457, "step": 11668, "token_acc": 0.8734905117883841 }, { "epoch": 0.6296336265040738, "grad_norm": 0.47109007835388184, "learning_rate": 6.372011773407394e-06, "loss": 0.35197746753692627, "step": 11669, "token_acc": 0.872772536687631 }, { "epoch": 0.6296875843090703, "grad_norm": 0.3825719952583313, "learning_rate": 6.370383332373091e-06, "loss": 0.3411349654197693, "step": 11670, "token_acc": 0.8768135411069318 }, { "epoch": 0.6297415421140669, "grad_norm": 0.41232118010520935, "learning_rate": 6.368755002186331e-06, "loss": 0.4127045273780823, "step": 11671, "token_acc": 0.8555301514718491 }, { "epoch": 0.6297954999190633, "grad_norm": 0.4689803421497345, "learning_rate": 6.367126782896842e-06, "loss": 0.34456560015678406, "step": 11672, "token_acc": 0.87885811882396 }, { "epoch": 0.6298494577240598, "grad_norm": 0.3417821526527405, "learning_rate": 6.365498674554345e-06, "loss": 0.346221923828125, "step": 11673, "token_acc": 0.8804486724407213 }, { "epoch": 0.6299034155290563, "grad_norm": 0.5798506736755371, "learning_rate": 6.363870677208571e-06, "loss": 0.3932495713233948, "step": 11674, "token_acc": 0.8602455146364495 }, { "epoch": 0.6299573733340528, "grad_norm": 0.39585188031196594, "learning_rate": 6.362242790909235e-06, "loss": 0.37596654891967773, "step": 11675, "token_acc": 0.8738898756660746 }, { "epoch": 0.6300113311390493, "grad_norm": 0.3674882650375366, "learning_rate": 6.360615015706051e-06, "loss": 0.33130210638046265, "step": 11676, "token_acc": 0.8842143549951503 }, { "epoch": 0.6300652889440458, "grad_norm": 0.4209117889404297, "learning_rate": 6.358987351648729e-06, "loss": 0.4037913680076599, "step": 11677, "token_acc": 0.8619449656035022 }, { "epoch": 0.6301192467490423, "grad_norm": 0.4168880581855774, "learning_rate": 6.35735979878698e-06, "loss": 0.43741244077682495, "step": 11678, "token_acc": 0.850302855494664 }, { "epoch": 0.6301732045540387, "grad_norm": 0.31444013118743896, "learning_rate": 6.3557323571705056e-06, "loss": 0.39937302470207214, "step": 11679, "token_acc": 0.8553092182030338 }, { "epoch": 0.6302271623590352, "grad_norm": 0.4441521465778351, "learning_rate": 6.354105026849013e-06, "loss": 0.3563719689846039, "step": 11680, "token_acc": 0.8708774113254512 }, { "epoch": 0.6302811201640317, "grad_norm": 0.42416009306907654, "learning_rate": 6.3524778078721985e-06, "loss": 0.2944523096084595, "step": 11681, "token_acc": 0.8922334232571347 }, { "epoch": 0.6303350779690282, "grad_norm": 0.3657912313938141, "learning_rate": 6.3508507002897555e-06, "loss": 0.31644970178604126, "step": 11682, "token_acc": 0.881491344873502 }, { "epoch": 0.6303890357740247, "grad_norm": 0.5354880094528198, "learning_rate": 6.349223704151377e-06, "loss": 0.3496996760368347, "step": 11683, "token_acc": 0.8775164229709684 }, { "epoch": 0.6304429935790212, "grad_norm": 0.32635849714279175, "learning_rate": 6.34759681950675e-06, "loss": 0.30186358094215393, "step": 11684, "token_acc": 0.8904895104895105 }, { "epoch": 0.6304969513840177, "grad_norm": 0.3947782516479492, "learning_rate": 6.345970046405558e-06, "loss": 0.35846132040023804, "step": 11685, "token_acc": 0.8713315217391304 }, { "epoch": 0.6305509091890142, "grad_norm": 0.35555413365364075, "learning_rate": 6.344343384897484e-06, "loss": 0.3593767285346985, "step": 11686, "token_acc": 0.8718775181305399 }, { "epoch": 0.6306048669940106, "grad_norm": 0.4493328630924225, "learning_rate": 6.3427168350322055e-06, "loss": 0.3477815091609955, "step": 11687, "token_acc": 0.8760941744642318 }, { "epoch": 0.6306588247990071, "grad_norm": 0.4324655830860138, "learning_rate": 6.3410903968593976e-06, "loss": 0.3865368366241455, "step": 11688, "token_acc": 0.8678324522163481 }, { "epoch": 0.6307127826040037, "grad_norm": 0.34516727924346924, "learning_rate": 6.339464070428731e-06, "loss": 0.3367983102798462, "step": 11689, "token_acc": 0.8789407313997478 }, { "epoch": 0.6307667404090002, "grad_norm": 0.4614889323711395, "learning_rate": 6.337837855789872e-06, "loss": 0.37032410502433777, "step": 11690, "token_acc": 0.8665270088283705 }, { "epoch": 0.6308206982139967, "grad_norm": 0.4267619848251343, "learning_rate": 6.336211752992485e-06, "loss": 0.3812820315361023, "step": 11691, "token_acc": 0.8680677174547577 }, { "epoch": 0.6308746560189932, "grad_norm": 0.4378737509250641, "learning_rate": 6.33458576208623e-06, "loss": 0.3304220139980316, "step": 11692, "token_acc": 0.878479293957909 }, { "epoch": 0.6309286138239897, "grad_norm": 0.3481523394584656, "learning_rate": 6.332959883120767e-06, "loss": 0.30128908157348633, "step": 11693, "token_acc": 0.8900182731841023 }, { "epoch": 0.6309825716289861, "grad_norm": 0.3449137508869171, "learning_rate": 6.33133411614575e-06, "loss": 0.3557813763618469, "step": 11694, "token_acc": 0.8758382442592969 }, { "epoch": 0.6310365294339826, "grad_norm": 0.4089016914367676, "learning_rate": 6.329708461210826e-06, "loss": 0.27965500950813293, "step": 11695, "token_acc": 0.8978428351309707 }, { "epoch": 0.6310904872389791, "grad_norm": 0.31299564242362976, "learning_rate": 6.328082918365645e-06, "loss": 0.33286309242248535, "step": 11696, "token_acc": 0.876770538243626 }, { "epoch": 0.6311444450439756, "grad_norm": 0.33069533109664917, "learning_rate": 6.3264574876598505e-06, "loss": 0.33414480090141296, "step": 11697, "token_acc": 0.8845774416310425 }, { "epoch": 0.6311984028489721, "grad_norm": 0.34100672602653503, "learning_rate": 6.324832169143082e-06, "loss": 0.31238701939582825, "step": 11698, "token_acc": 0.8887763143082292 }, { "epoch": 0.6312523606539686, "grad_norm": 0.3490716218948364, "learning_rate": 6.323206962864974e-06, "loss": 0.34529581665992737, "step": 11699, "token_acc": 0.8818222156485727 }, { "epoch": 0.6313063184589651, "grad_norm": 0.3466681241989136, "learning_rate": 6.3215818688751625e-06, "loss": 0.32555437088012695, "step": 11700, "token_acc": 0.8842087598590368 }, { "epoch": 0.6313602762639616, "grad_norm": 0.3990417420864105, "learning_rate": 6.31995688722328e-06, "loss": 0.35593974590301514, "step": 11701, "token_acc": 0.8737285672769544 }, { "epoch": 0.631414234068958, "grad_norm": 0.40676388144493103, "learning_rate": 6.318332017958948e-06, "loss": 0.41358283162117004, "step": 11702, "token_acc": 0.854060035966247 }, { "epoch": 0.6314681918739545, "grad_norm": 0.4579964280128479, "learning_rate": 6.3167072611317915e-06, "loss": 0.3640117049217224, "step": 11703, "token_acc": 0.8726450640542577 }, { "epoch": 0.631522149678951, "grad_norm": 0.4804459512233734, "learning_rate": 6.3150826167914315e-06, "loss": 0.3675481677055359, "step": 11704, "token_acc": 0.8785135135135135 }, { "epoch": 0.6315761074839475, "grad_norm": 0.3378598988056183, "learning_rate": 6.313458084987478e-06, "loss": 0.33107447624206543, "step": 11705, "token_acc": 0.8838464368943947 }, { "epoch": 0.631630065288944, "grad_norm": 0.3273528516292572, "learning_rate": 6.311833665769555e-06, "loss": 0.3245198130607605, "step": 11706, "token_acc": 0.8861587015329125 }, { "epoch": 0.6316840230939406, "grad_norm": 0.43472200632095337, "learning_rate": 6.310209359187264e-06, "loss": 0.3917953670024872, "step": 11707, "token_acc": 0.8651700144413811 }, { "epoch": 0.6317379808989371, "grad_norm": 0.40684977173805237, "learning_rate": 6.308585165290213e-06, "loss": 0.29309800267219543, "step": 11708, "token_acc": 0.8936370989485036 }, { "epoch": 0.6317919387039336, "grad_norm": 0.41423141956329346, "learning_rate": 6.3069610841280026e-06, "loss": 0.33011412620544434, "step": 11709, "token_acc": 0.8855017502917153 }, { "epoch": 0.63184589650893, "grad_norm": 0.3657793402671814, "learning_rate": 6.305337115750237e-06, "loss": 0.3268009424209595, "step": 11710, "token_acc": 0.883008356545961 }, { "epoch": 0.6318998543139265, "grad_norm": 0.29721054434776306, "learning_rate": 6.3037132602065055e-06, "loss": 0.27753713726997375, "step": 11711, "token_acc": 0.8984240334892883 }, { "epoch": 0.631953812118923, "grad_norm": 0.48272261023521423, "learning_rate": 6.302089517546399e-06, "loss": 0.40145543217658997, "step": 11712, "token_acc": 0.8571198193839703 }, { "epoch": 0.6320077699239195, "grad_norm": 0.43643853068351746, "learning_rate": 6.300465887819513e-06, "loss": 0.3489685654640198, "step": 11713, "token_acc": 0.8789056660188369 }, { "epoch": 0.632061727728916, "grad_norm": 0.5032820105552673, "learning_rate": 6.298842371075431e-06, "loss": 0.356573224067688, "step": 11714, "token_acc": 0.8768532122345398 }, { "epoch": 0.6321156855339125, "grad_norm": 0.40727922320365906, "learning_rate": 6.2972189673637315e-06, "loss": 0.36517876386642456, "step": 11715, "token_acc": 0.8695182836324382 }, { "epoch": 0.632169643338909, "grad_norm": 0.29665884375572205, "learning_rate": 6.295595676733997e-06, "loss": 0.35273027420043945, "step": 11716, "token_acc": 0.8796736913664174 }, { "epoch": 0.6322236011439054, "grad_norm": 0.5791164636611938, "learning_rate": 6.293972499235798e-06, "loss": 0.35890305042266846, "step": 11717, "token_acc": 0.8718898104265402 }, { "epoch": 0.6322775589489019, "grad_norm": 0.33002564311027527, "learning_rate": 6.2923494349187074e-06, "loss": 0.33333826065063477, "step": 11718, "token_acc": 0.8816195531415875 }, { "epoch": 0.6323315167538984, "grad_norm": 0.413346529006958, "learning_rate": 6.2907264838322965e-06, "loss": 0.38247883319854736, "step": 11719, "token_acc": 0.8711671024483004 }, { "epoch": 0.6323854745588949, "grad_norm": 0.46226829290390015, "learning_rate": 6.289103646026125e-06, "loss": 0.3666147291660309, "step": 11720, "token_acc": 0.8744894753377317 }, { "epoch": 0.6324394323638914, "grad_norm": 0.35624682903289795, "learning_rate": 6.2874809215497575e-06, "loss": 0.3139336109161377, "step": 11721, "token_acc": 0.8837985436893204 }, { "epoch": 0.632493390168888, "grad_norm": 0.36523163318634033, "learning_rate": 6.28585831045275e-06, "loss": 0.36329352855682373, "step": 11722, "token_acc": 0.8724708767627223 }, { "epoch": 0.6325473479738845, "grad_norm": 0.4152067005634308, "learning_rate": 6.2842358127846555e-06, "loss": 0.30146172642707825, "step": 11723, "token_acc": 0.8896144278606966 }, { "epoch": 0.632601305778881, "grad_norm": 0.39119869470596313, "learning_rate": 6.282613428595028e-06, "loss": 0.3126258850097656, "step": 11724, "token_acc": 0.8836833602584814 }, { "epoch": 0.6326552635838774, "grad_norm": 0.45851680636405945, "learning_rate": 6.28099115793341e-06, "loss": 0.3466333746910095, "step": 11725, "token_acc": 0.8775247888358428 }, { "epoch": 0.6327092213888739, "grad_norm": 0.39761191606521606, "learning_rate": 6.279369000849349e-06, "loss": 0.3550025224685669, "step": 11726, "token_acc": 0.8719787516600266 }, { "epoch": 0.6327631791938704, "grad_norm": 0.4510703384876251, "learning_rate": 6.277746957392383e-06, "loss": 0.3665216565132141, "step": 11727, "token_acc": 0.8719841555635578 }, { "epoch": 0.6328171369988669, "grad_norm": 0.3941185772418976, "learning_rate": 6.276125027612053e-06, "loss": 0.3657582700252533, "step": 11728, "token_acc": 0.8714555765595463 }, { "epoch": 0.6328710948038634, "grad_norm": 0.4128369688987732, "learning_rate": 6.274503211557885e-06, "loss": 0.28590720891952515, "step": 11729, "token_acc": 0.8974960876369327 }, { "epoch": 0.6329250526088599, "grad_norm": 0.4112984538078308, "learning_rate": 6.272881509279416e-06, "loss": 0.3522079586982727, "step": 11730, "token_acc": 0.8741141141141141 }, { "epoch": 0.6329790104138564, "grad_norm": 0.3916579782962799, "learning_rate": 6.2712599208261645e-06, "loss": 0.33113253116607666, "step": 11731, "token_acc": 0.88498248157545 }, { "epoch": 0.6330329682188529, "grad_norm": 0.37961864471435547, "learning_rate": 6.269638446247662e-06, "loss": 0.320791631937027, "step": 11732, "token_acc": 0.8835048958071805 }, { "epoch": 0.6330869260238493, "grad_norm": 0.31094658374786377, "learning_rate": 6.268017085593425e-06, "loss": 0.34374162554740906, "step": 11733, "token_acc": 0.8767154360893729 }, { "epoch": 0.6331408838288458, "grad_norm": 0.36962977051734924, "learning_rate": 6.266395838912969e-06, "loss": 0.32515019178390503, "step": 11734, "token_acc": 0.8815720524017467 }, { "epoch": 0.6331948416338423, "grad_norm": 0.3962295651435852, "learning_rate": 6.264774706255805e-06, "loss": 0.33778712153434753, "step": 11735, "token_acc": 0.8802608180201541 }, { "epoch": 0.6332487994388388, "grad_norm": 0.3359425365924835, "learning_rate": 6.263153687671444e-06, "loss": 0.3284800052642822, "step": 11736, "token_acc": 0.8839169139465876 }, { "epoch": 0.6333027572438353, "grad_norm": 0.3998490571975708, "learning_rate": 6.261532783209384e-06, "loss": 0.3631945550441742, "step": 11737, "token_acc": 0.8759072816670569 }, { "epoch": 0.6333567150488318, "grad_norm": 0.46504154801368713, "learning_rate": 6.259911992919139e-06, "loss": 0.3761424422264099, "step": 11738, "token_acc": 0.8694191070571291 }, { "epoch": 0.6334106728538283, "grad_norm": 0.4660288095474243, "learning_rate": 6.258291316850203e-06, "loss": 0.3707343339920044, "step": 11739, "token_acc": 0.8718525179856115 }, { "epoch": 0.6334646306588247, "grad_norm": 0.4847969114780426, "learning_rate": 6.256670755052068e-06, "loss": 0.38189250230789185, "step": 11740, "token_acc": 0.8708220415537489 }, { "epoch": 0.6335185884638213, "grad_norm": 0.4868440628051758, "learning_rate": 6.255050307574229e-06, "loss": 0.34842193126678467, "step": 11741, "token_acc": 0.8745141588006663 }, { "epoch": 0.6335725462688178, "grad_norm": 0.38771867752075195, "learning_rate": 6.253429974466171e-06, "loss": 0.3454558253288269, "step": 11742, "token_acc": 0.8796269727403156 }, { "epoch": 0.6336265040738143, "grad_norm": 0.3362646698951721, "learning_rate": 6.2518097557773805e-06, "loss": 0.3940812051296234, "step": 11743, "token_acc": 0.8592976667452275 }, { "epoch": 0.6336804618788108, "grad_norm": 0.45898014307022095, "learning_rate": 6.250189651557336e-06, "loss": 0.3706730604171753, "step": 11744, "token_acc": 0.8769111969111969 }, { "epoch": 0.6337344196838073, "grad_norm": 0.337820827960968, "learning_rate": 6.248569661855522e-06, "loss": 0.34275248646736145, "step": 11745, "token_acc": 0.8798578470709618 }, { "epoch": 0.6337883774888038, "grad_norm": 0.396544486284256, "learning_rate": 6.246949786721404e-06, "loss": 0.33080142736434937, "step": 11746, "token_acc": 0.8819540108085197 }, { "epoch": 0.6338423352938003, "grad_norm": 0.37436890602111816, "learning_rate": 6.245330026204458e-06, "loss": 0.35070669651031494, "step": 11747, "token_acc": 0.8745042855315338 }, { "epoch": 0.6338962930987967, "grad_norm": 0.4132118225097656, "learning_rate": 6.243710380354149e-06, "loss": 0.3014237582683563, "step": 11748, "token_acc": 0.8926270579813886 }, { "epoch": 0.6339502509037932, "grad_norm": 0.36676639318466187, "learning_rate": 6.2420908492199415e-06, "loss": 0.2516658306121826, "step": 11749, "token_acc": 0.9030741410488246 }, { "epoch": 0.6340042087087897, "grad_norm": 0.3069291114807129, "learning_rate": 6.240471432851294e-06, "loss": 0.30009299516677856, "step": 11750, "token_acc": 0.892589978828511 }, { "epoch": 0.6340581665137862, "grad_norm": 0.37060219049453735, "learning_rate": 6.238852131297663e-06, "loss": 0.382432758808136, "step": 11751, "token_acc": 0.8646321196555371 }, { "epoch": 0.6341121243187827, "grad_norm": 0.33560824394226074, "learning_rate": 6.237232944608505e-06, "loss": 0.3004123568534851, "step": 11752, "token_acc": 0.8877281308999371 }, { "epoch": 0.6341660821237792, "grad_norm": 0.3142927587032318, "learning_rate": 6.235613872833268e-06, "loss": 0.33472466468811035, "step": 11753, "token_acc": 0.8833576806382739 }, { "epoch": 0.6342200399287757, "grad_norm": 0.4006540775299072, "learning_rate": 6.233994916021395e-06, "loss": 0.32422128319740295, "step": 11754, "token_acc": 0.8805743243243244 }, { "epoch": 0.6342739977337722, "grad_norm": 0.43946218490600586, "learning_rate": 6.232376074222333e-06, "loss": 0.32383301854133606, "step": 11755, "token_acc": 0.8859871660946127 }, { "epoch": 0.6343279555387686, "grad_norm": 0.42131978273391724, "learning_rate": 6.230757347485517e-06, "loss": 0.3389565646648407, "step": 11756, "token_acc": 0.8806276580143716 }, { "epoch": 0.6343819133437651, "grad_norm": 0.4246353209018707, "learning_rate": 6.22913873586038e-06, "loss": 0.36212408542633057, "step": 11757, "token_acc": 0.8680449984797811 }, { "epoch": 0.6344358711487617, "grad_norm": 0.4154210090637207, "learning_rate": 6.227520239396365e-06, "loss": 0.3728288412094116, "step": 11758, "token_acc": 0.8655597214783074 }, { "epoch": 0.6344898289537582, "grad_norm": 0.3730306625366211, "learning_rate": 6.225901858142891e-06, "loss": 0.37647420167922974, "step": 11759, "token_acc": 0.8677988677988678 }, { "epoch": 0.6345437867587547, "grad_norm": 0.5851585268974304, "learning_rate": 6.224283592149386e-06, "loss": 0.38044390082359314, "step": 11760, "token_acc": 0.8655750960177886 }, { "epoch": 0.6345977445637512, "grad_norm": 0.4699289798736572, "learning_rate": 6.222665441465271e-06, "loss": 0.3146611452102661, "step": 11761, "token_acc": 0.8863337713534822 }, { "epoch": 0.6346517023687477, "grad_norm": 0.4465234875679016, "learning_rate": 6.221047406139964e-06, "loss": 0.38743361830711365, "step": 11762, "token_acc": 0.8634627233518176 }, { "epoch": 0.6347056601737441, "grad_norm": 0.3195347487926483, "learning_rate": 6.219429486222874e-06, "loss": 0.3281404376029968, "step": 11763, "token_acc": 0.8845728086947771 }, { "epoch": 0.6347596179787406, "grad_norm": 0.47417569160461426, "learning_rate": 6.217811681763423e-06, "loss": 0.32717040181159973, "step": 11764, "token_acc": 0.8823039786915265 }, { "epoch": 0.6348135757837371, "grad_norm": 0.3931812047958374, "learning_rate": 6.216193992811011e-06, "loss": 0.329122930765152, "step": 11765, "token_acc": 0.8820551284455278 }, { "epoch": 0.6348675335887336, "grad_norm": 0.36442822217941284, "learning_rate": 6.2145764194150435e-06, "loss": 0.286800742149353, "step": 11766, "token_acc": 0.8923803898405198 }, { "epoch": 0.6349214913937301, "grad_norm": 0.5009528994560242, "learning_rate": 6.212958961624922e-06, "loss": 0.33190661668777466, "step": 11767, "token_acc": 0.8781693509807048 }, { "epoch": 0.6349754491987266, "grad_norm": 0.3389662206172943, "learning_rate": 6.211341619490042e-06, "loss": 0.3516649305820465, "step": 11768, "token_acc": 0.8766085387853277 }, { "epoch": 0.6350294070037231, "grad_norm": 0.43184009194374084, "learning_rate": 6.209724393059795e-06, "loss": 0.35881149768829346, "step": 11769, "token_acc": 0.8760393046107332 }, { "epoch": 0.6350833648087196, "grad_norm": 0.41672366857528687, "learning_rate": 6.2081072823835686e-06, "loss": 0.3400266468524933, "step": 11770, "token_acc": 0.8763219741480611 }, { "epoch": 0.635137322613716, "grad_norm": 0.3864595592021942, "learning_rate": 6.206490287510756e-06, "loss": 0.32911980152130127, "step": 11771, "token_acc": 0.8832504912726853 }, { "epoch": 0.6351912804187125, "grad_norm": 0.3796723484992981, "learning_rate": 6.204873408490736e-06, "loss": 0.3529352843761444, "step": 11772, "token_acc": 0.8722358722358723 }, { "epoch": 0.635245238223709, "grad_norm": 0.4089060425758362, "learning_rate": 6.203256645372888e-06, "loss": 0.3674751818180084, "step": 11773, "token_acc": 0.8708956266592147 }, { "epoch": 0.6352991960287055, "grad_norm": 0.3738364577293396, "learning_rate": 6.201639998206588e-06, "loss": 0.35169142484664917, "step": 11774, "token_acc": 0.8762359737806911 }, { "epoch": 0.6353531538337021, "grad_norm": 0.44353583455085754, "learning_rate": 6.200023467041208e-06, "loss": 0.38895970582962036, "step": 11775, "token_acc": 0.8650506060227414 }, { "epoch": 0.6354071116386986, "grad_norm": 0.411149263381958, "learning_rate": 6.198407051926112e-06, "loss": 0.3351265788078308, "step": 11776, "token_acc": 0.8831948291782087 }, { "epoch": 0.6354610694436951, "grad_norm": 0.3731517493724823, "learning_rate": 6.1967907529106706e-06, "loss": 0.3523210883140564, "step": 11777, "token_acc": 0.8781418607711541 }, { "epoch": 0.6355150272486916, "grad_norm": 0.36955884099006653, "learning_rate": 6.195174570044244e-06, "loss": 0.28590986132621765, "step": 11778, "token_acc": 0.8942211889737629 }, { "epoch": 0.635568985053688, "grad_norm": 0.4636615812778473, "learning_rate": 6.193558503376191e-06, "loss": 0.3182591199874878, "step": 11779, "token_acc": 0.8896753346668719 }, { "epoch": 0.6356229428586845, "grad_norm": 0.34961795806884766, "learning_rate": 6.191942552955863e-06, "loss": 0.37606990337371826, "step": 11780, "token_acc": 0.869106846718787 }, { "epoch": 0.635676900663681, "grad_norm": 0.4797568917274475, "learning_rate": 6.190326718832611e-06, "loss": 0.4102562665939331, "step": 11781, "token_acc": 0.8572956455309396 }, { "epoch": 0.6357308584686775, "grad_norm": 0.5079585313796997, "learning_rate": 6.188711001055782e-06, "loss": 0.39472299814224243, "step": 11782, "token_acc": 0.863709032773781 }, { "epoch": 0.635784816273674, "grad_norm": 0.3698442876338959, "learning_rate": 6.187095399674723e-06, "loss": 0.2982826232910156, "step": 11783, "token_acc": 0.8943382618103137 }, { "epoch": 0.6358387740786705, "grad_norm": 0.3872000575065613, "learning_rate": 6.18547991473877e-06, "loss": 0.3486717939376831, "step": 11784, "token_acc": 0.8761974944731025 }, { "epoch": 0.635892731883667, "grad_norm": 0.46941521763801575, "learning_rate": 6.183864546297262e-06, "loss": 0.38069021701812744, "step": 11785, "token_acc": 0.8680416943889998 }, { "epoch": 0.6359466896886634, "grad_norm": 0.405242383480072, "learning_rate": 6.182249294399531e-06, "loss": 0.3615248203277588, "step": 11786, "token_acc": 0.8749294980259448 }, { "epoch": 0.6360006474936599, "grad_norm": 0.3252771496772766, "learning_rate": 6.180634159094907e-06, "loss": 0.313290536403656, "step": 11787, "token_acc": 0.882941104743324 }, { "epoch": 0.6360546052986564, "grad_norm": 0.41986849904060364, "learning_rate": 6.179019140432715e-06, "loss": 0.33678531646728516, "step": 11788, "token_acc": 0.8745221871364467 }, { "epoch": 0.6361085631036529, "grad_norm": 0.33402106165885925, "learning_rate": 6.177404238462273e-06, "loss": 0.3555523157119751, "step": 11789, "token_acc": 0.875593269092478 }, { "epoch": 0.6361625209086494, "grad_norm": 0.2967303395271301, "learning_rate": 6.175789453232909e-06, "loss": 0.318119078874588, "step": 11790, "token_acc": 0.8863396624472574 }, { "epoch": 0.636216478713646, "grad_norm": 0.3524593710899353, "learning_rate": 6.1741747847939326e-06, "loss": 0.30539870262145996, "step": 11791, "token_acc": 0.8879001004736615 }, { "epoch": 0.6362704365186425, "grad_norm": 0.3159884214401245, "learning_rate": 6.1725602331946575e-06, "loss": 0.3618597686290741, "step": 11792, "token_acc": 0.872274726302985 }, { "epoch": 0.636324394323639, "grad_norm": 0.3871214687824249, "learning_rate": 6.170945798484389e-06, "loss": 0.3165038824081421, "step": 11793, "token_acc": 0.8855827134254339 }, { "epoch": 0.6363783521286354, "grad_norm": 0.3660247027873993, "learning_rate": 6.169331480712433e-06, "loss": 0.3571930229663849, "step": 11794, "token_acc": 0.8811804008908686 }, { "epoch": 0.6364323099336319, "grad_norm": 0.4947599768638611, "learning_rate": 6.167717279928086e-06, "loss": 0.3343674838542938, "step": 11795, "token_acc": 0.8797945205479452 }, { "epoch": 0.6364862677386284, "grad_norm": 0.37247270345687866, "learning_rate": 6.166103196180653e-06, "loss": 0.3284313976764679, "step": 11796, "token_acc": 0.8791936433195998 }, { "epoch": 0.6365402255436249, "grad_norm": 0.3195745646953583, "learning_rate": 6.164489229519423e-06, "loss": 0.33658498525619507, "step": 11797, "token_acc": 0.8791569086651054 }, { "epoch": 0.6365941833486214, "grad_norm": 0.37600186467170715, "learning_rate": 6.162875379993689e-06, "loss": 0.28310590982437134, "step": 11798, "token_acc": 0.8956083513318934 }, { "epoch": 0.6366481411536179, "grad_norm": 0.36583036184310913, "learning_rate": 6.1612616476527355e-06, "loss": 0.3460562825202942, "step": 11799, "token_acc": 0.8764684510112631 }, { "epoch": 0.6367020989586144, "grad_norm": 0.4117582142353058, "learning_rate": 6.159648032545845e-06, "loss": 0.35873037576675415, "step": 11800, "token_acc": 0.8726487390274488 }, { "epoch": 0.6367560567636109, "grad_norm": 0.3494768738746643, "learning_rate": 6.158034534722298e-06, "loss": 0.2961828112602234, "step": 11801, "token_acc": 0.8943293576061938 }, { "epoch": 0.6368100145686073, "grad_norm": 0.397296667098999, "learning_rate": 6.156421154231368e-06, "loss": 0.3275177478790283, "step": 11802, "token_acc": 0.883893149960328 }, { "epoch": 0.6368639723736038, "grad_norm": 0.3894321620464325, "learning_rate": 6.15480789112233e-06, "loss": 0.3167537450790405, "step": 11803, "token_acc": 0.8888559573206876 }, { "epoch": 0.6369179301786003, "grad_norm": 0.46567678451538086, "learning_rate": 6.153194745444453e-06, "loss": 0.35171574354171753, "step": 11804, "token_acc": 0.8766785420663196 }, { "epoch": 0.6369718879835968, "grad_norm": 0.3674378991127014, "learning_rate": 6.151581717246999e-06, "loss": 0.3672720193862915, "step": 11805, "token_acc": 0.874907366236846 }, { "epoch": 0.6370258457885933, "grad_norm": 0.38227012753486633, "learning_rate": 6.149968806579233e-06, "loss": 0.32186752557754517, "step": 11806, "token_acc": 0.8900290747391825 }, { "epoch": 0.6370798035935898, "grad_norm": 0.4218175411224365, "learning_rate": 6.148356013490411e-06, "loss": 0.3902733325958252, "step": 11807, "token_acc": 0.8619100809829656 }, { "epoch": 0.6371337613985864, "grad_norm": 0.4927946627140045, "learning_rate": 6.146743338029784e-06, "loss": 0.3328832983970642, "step": 11808, "token_acc": 0.8787366998817767 }, { "epoch": 0.6371877192035827, "grad_norm": 0.3599427342414856, "learning_rate": 6.145130780246609e-06, "loss": 0.35404276847839355, "step": 11809, "token_acc": 0.8799196312492613 }, { "epoch": 0.6372416770085793, "grad_norm": 0.4964708983898163, "learning_rate": 6.14351834019013e-06, "loss": 0.3689427375793457, "step": 11810, "token_acc": 0.8705535055350554 }, { "epoch": 0.6372956348135758, "grad_norm": 0.3949638605117798, "learning_rate": 6.141906017909591e-06, "loss": 0.3390558063983917, "step": 11811, "token_acc": 0.8788883625453972 }, { "epoch": 0.6373495926185723, "grad_norm": 0.5227759480476379, "learning_rate": 6.14029381345423e-06, "loss": 0.3648695647716522, "step": 11812, "token_acc": 0.8691531011745313 }, { "epoch": 0.6374035504235688, "grad_norm": 0.38170120120048523, "learning_rate": 6.138681726873287e-06, "loss": 0.33821526169776917, "step": 11813, "token_acc": 0.8782787587862163 }, { "epoch": 0.6374575082285653, "grad_norm": 0.36345669627189636, "learning_rate": 6.137069758215991e-06, "loss": 0.2880895435810089, "step": 11814, "token_acc": 0.8911742707554225 }, { "epoch": 0.6375114660335618, "grad_norm": 0.3460220396518707, "learning_rate": 6.135457907531569e-06, "loss": 0.3097699284553528, "step": 11815, "token_acc": 0.8888127227858514 }, { "epoch": 0.6375654238385583, "grad_norm": 0.41785913705825806, "learning_rate": 6.133846174869255e-06, "loss": 0.3304270803928375, "step": 11816, "token_acc": 0.879710813720966 }, { "epoch": 0.6376193816435547, "grad_norm": 0.4527028501033783, "learning_rate": 6.132234560278266e-06, "loss": 0.3106013238430023, "step": 11817, "token_acc": 0.8885250375221722 }, { "epoch": 0.6376733394485512, "grad_norm": 0.5466181039810181, "learning_rate": 6.13062306380782e-06, "loss": 0.3809930980205536, "step": 11818, "token_acc": 0.8684304612084376 }, { "epoch": 0.6377272972535477, "grad_norm": 0.3798595070838928, "learning_rate": 6.129011685507131e-06, "loss": 0.3110446035861969, "step": 11819, "token_acc": 0.8857287963433215 }, { "epoch": 0.6377812550585442, "grad_norm": 0.3798142075538635, "learning_rate": 6.1274004254254114e-06, "loss": 0.43674784898757935, "step": 11820, "token_acc": 0.8453131567885667 }, { "epoch": 0.6378352128635407, "grad_norm": 0.4478450119495392, "learning_rate": 6.1257892836118645e-06, "loss": 0.3944551944732666, "step": 11821, "token_acc": 0.8632518796992481 }, { "epoch": 0.6378891706685372, "grad_norm": 0.3755664825439453, "learning_rate": 6.124178260115703e-06, "loss": 0.3479217290878296, "step": 11822, "token_acc": 0.8800172637030643 }, { "epoch": 0.6379431284735337, "grad_norm": 0.47427621483802795, "learning_rate": 6.122567354986121e-06, "loss": 0.33443695306777954, "step": 11823, "token_acc": 0.8826685744947237 }, { "epoch": 0.6379970862785301, "grad_norm": 0.5202350616455078, "learning_rate": 6.120956568272316e-06, "loss": 0.3434835970401764, "step": 11824, "token_acc": 0.881159420289855 }, { "epoch": 0.6380510440835266, "grad_norm": 0.4497542381286621, "learning_rate": 6.1193459000234815e-06, "loss": 0.3612964153289795, "step": 11825, "token_acc": 0.8664369679773004 }, { "epoch": 0.6381050018885231, "grad_norm": 0.4505921006202698, "learning_rate": 6.117735350288807e-06, "loss": 0.39121490716934204, "step": 11826, "token_acc": 0.8649757553151809 }, { "epoch": 0.6381589596935197, "grad_norm": 0.409624844789505, "learning_rate": 6.11612491911748e-06, "loss": 0.33117106556892395, "step": 11827, "token_acc": 0.8766942545280076 }, { "epoch": 0.6382129174985162, "grad_norm": 0.39287176728248596, "learning_rate": 6.114514606558674e-06, "loss": 0.3339603543281555, "step": 11828, "token_acc": 0.8826104257859132 }, { "epoch": 0.6382668753035127, "grad_norm": 0.38240790367126465, "learning_rate": 6.1129044126615775e-06, "loss": 0.3620781898498535, "step": 11829, "token_acc": 0.8654752553024352 }, { "epoch": 0.6383208331085092, "grad_norm": 0.45623818039894104, "learning_rate": 6.111294337475363e-06, "loss": 0.3427480161190033, "step": 11830, "token_acc": 0.8804330520617736 }, { "epoch": 0.6383747909135057, "grad_norm": 0.3649752736091614, "learning_rate": 6.1096843810492e-06, "loss": 0.34335505962371826, "step": 11831, "token_acc": 0.8802308802308803 }, { "epoch": 0.6384287487185021, "grad_norm": 0.41240864992141724, "learning_rate": 6.1080745434322575e-06, "loss": 0.33200106024742126, "step": 11832, "token_acc": 0.8779550827423168 }, { "epoch": 0.6384827065234986, "grad_norm": 0.42605555057525635, "learning_rate": 6.106464824673699e-06, "loss": 0.3944990634918213, "step": 11833, "token_acc": 0.863189513926816 }, { "epoch": 0.6385366643284951, "grad_norm": 0.39921170473098755, "learning_rate": 6.1048552248226824e-06, "loss": 0.41070616245269775, "step": 11834, "token_acc": 0.8615643615643616 }, { "epoch": 0.6385906221334916, "grad_norm": 0.37469953298568726, "learning_rate": 6.103245743928369e-06, "loss": 0.34283095598220825, "step": 11835, "token_acc": 0.8796140939597316 }, { "epoch": 0.6386445799384881, "grad_norm": 0.4344957172870636, "learning_rate": 6.101636382039909e-06, "loss": 0.384715735912323, "step": 11836, "token_acc": 0.8684939495553288 }, { "epoch": 0.6386985377434846, "grad_norm": 0.5370369553565979, "learning_rate": 6.100027139206454e-06, "loss": 0.3902631402015686, "step": 11837, "token_acc": 0.8637610484608351 }, { "epoch": 0.6387524955484811, "grad_norm": 0.5003889799118042, "learning_rate": 6.098418015477148e-06, "loss": 0.3754155933856964, "step": 11838, "token_acc": 0.8644501278772379 }, { "epoch": 0.6388064533534776, "grad_norm": 0.3651465177536011, "learning_rate": 6.096809010901132e-06, "loss": 0.364324152469635, "step": 11839, "token_acc": 0.8695980658809308 }, { "epoch": 0.638860411158474, "grad_norm": 0.4668313264846802, "learning_rate": 6.095200125527547e-06, "loss": 0.3872402608394623, "step": 11840, "token_acc": 0.86563185951709 }, { "epoch": 0.6389143689634705, "grad_norm": 0.39860695600509644, "learning_rate": 6.093591359405528e-06, "loss": 0.33219778537750244, "step": 11841, "token_acc": 0.8784048156508654 }, { "epoch": 0.638968326768467, "grad_norm": 0.42363274097442627, "learning_rate": 6.091982712584206e-06, "loss": 0.33383840322494507, "step": 11842, "token_acc": 0.8826530612244898 }, { "epoch": 0.6390222845734636, "grad_norm": 0.3787490725517273, "learning_rate": 6.090374185112707e-06, "loss": 0.3097248077392578, "step": 11843, "token_acc": 0.8939462903959945 }, { "epoch": 0.6390762423784601, "grad_norm": 0.42688503861427307, "learning_rate": 6.088765777040158e-06, "loss": 0.29969269037246704, "step": 11844, "token_acc": 0.8893617021276595 }, { "epoch": 0.6391302001834566, "grad_norm": 0.454755574464798, "learning_rate": 6.087157488415676e-06, "loss": 0.32839035987854004, "step": 11845, "token_acc": 0.8817026893251939 }, { "epoch": 0.6391841579884531, "grad_norm": 0.5183452367782593, "learning_rate": 6.0855493192883794e-06, "loss": 0.4205796420574188, "step": 11846, "token_acc": 0.8558530017424363 }, { "epoch": 0.6392381157934495, "grad_norm": 0.3371908962726593, "learning_rate": 6.083941269707378e-06, "loss": 0.3669501841068268, "step": 11847, "token_acc": 0.872225416906268 }, { "epoch": 0.639292073598446, "grad_norm": 0.3850041925907135, "learning_rate": 6.082333339721789e-06, "loss": 0.33612409234046936, "step": 11848, "token_acc": 0.8792416244748464 }, { "epoch": 0.6393460314034425, "grad_norm": 0.4234478175640106, "learning_rate": 6.080725529380711e-06, "loss": 0.2816826105117798, "step": 11849, "token_acc": 0.893840830449827 }, { "epoch": 0.639399989208439, "grad_norm": 0.3589780330657959, "learning_rate": 6.079117838733251e-06, "loss": 0.3337171673774719, "step": 11850, "token_acc": 0.8815158546017015 }, { "epoch": 0.6394539470134355, "grad_norm": 0.3579917252063751, "learning_rate": 6.077510267828506e-06, "loss": 0.35855695605278015, "step": 11851, "token_acc": 0.8778992909506069 }, { "epoch": 0.639507904818432, "grad_norm": 0.40177327394485474, "learning_rate": 6.075902816715569e-06, "loss": 0.3588065207004547, "step": 11852, "token_acc": 0.8720884326885117 }, { "epoch": 0.6395618626234285, "grad_norm": 0.3857365548610687, "learning_rate": 6.074295485443527e-06, "loss": 0.36372917890548706, "step": 11853, "token_acc": 0.8712143063167003 }, { "epoch": 0.639615820428425, "grad_norm": 0.41788890957832336, "learning_rate": 6.072688274061476e-06, "loss": 0.3448164463043213, "step": 11854, "token_acc": 0.8779670889762633 }, { "epoch": 0.6396697782334214, "grad_norm": 0.483441561460495, "learning_rate": 6.071081182618497e-06, "loss": 0.34143131971359253, "step": 11855, "token_acc": 0.8790335657532026 }, { "epoch": 0.6397237360384179, "grad_norm": 0.4091905653476715, "learning_rate": 6.069474211163669e-06, "loss": 0.39175134897232056, "step": 11856, "token_acc": 0.8679765185096442 }, { "epoch": 0.6397776938434144, "grad_norm": 0.380520761013031, "learning_rate": 6.067867359746069e-06, "loss": 0.3368358314037323, "step": 11857, "token_acc": 0.878372710750734 }, { "epoch": 0.6398316516484109, "grad_norm": 0.4395587742328644, "learning_rate": 6.066260628414771e-06, "loss": 0.34845203161239624, "step": 11858, "token_acc": 0.8762536873156342 }, { "epoch": 0.6398856094534074, "grad_norm": 0.35592907667160034, "learning_rate": 6.06465401721884e-06, "loss": 0.34751367568969727, "step": 11859, "token_acc": 0.8754636989931107 }, { "epoch": 0.639939567258404, "grad_norm": 0.47041621804237366, "learning_rate": 6.063047526207345e-06, "loss": 0.3412542939186096, "step": 11860, "token_acc": 0.8767007995511292 }, { "epoch": 0.6399935250634005, "grad_norm": 0.39383944869041443, "learning_rate": 6.061441155429349e-06, "loss": 0.3380963206291199, "step": 11861, "token_acc": 0.8788138314955573 }, { "epoch": 0.640047482868397, "grad_norm": 0.44761723279953003, "learning_rate": 6.059834904933908e-06, "loss": 0.37726911902427673, "step": 11862, "token_acc": 0.8676330041414463 }, { "epoch": 0.6401014406733934, "grad_norm": 0.4393390715122223, "learning_rate": 6.058228774770076e-06, "loss": 0.3966237008571625, "step": 11863, "token_acc": 0.8646555709662506 }, { "epoch": 0.6401553984783899, "grad_norm": 0.3257490396499634, "learning_rate": 6.056622764986906e-06, "loss": 0.35328611731529236, "step": 11864, "token_acc": 0.8737408995711579 }, { "epoch": 0.6402093562833864, "grad_norm": 0.5315238237380981, "learning_rate": 6.0550168756334425e-06, "loss": 0.40434226393699646, "step": 11865, "token_acc": 0.8593288123804556 }, { "epoch": 0.6402633140883829, "grad_norm": 0.29817628860473633, "learning_rate": 6.053411106758729e-06, "loss": 0.34670284390449524, "step": 11866, "token_acc": 0.879828809550625 }, { "epoch": 0.6403172718933794, "grad_norm": 0.32015398144721985, "learning_rate": 6.051805458411808e-06, "loss": 0.3119086027145386, "step": 11867, "token_acc": 0.8872632414653511 }, { "epoch": 0.6403712296983759, "grad_norm": 0.5132718682289124, "learning_rate": 6.0501999306417135e-06, "loss": 0.3192005753517151, "step": 11868, "token_acc": 0.8849630238290879 }, { "epoch": 0.6404251875033724, "grad_norm": 0.3515169322490692, "learning_rate": 6.04859452349748e-06, "loss": 0.33789247274398804, "step": 11869, "token_acc": 0.8814666908694863 }, { "epoch": 0.6404791453083688, "grad_norm": 0.4151473939418793, "learning_rate": 6.046989237028134e-06, "loss": 0.31760546565055847, "step": 11870, "token_acc": 0.8856026058631922 }, { "epoch": 0.6405331031133653, "grad_norm": 0.48750120401382446, "learning_rate": 6.045384071282701e-06, "loss": 0.37659016251564026, "step": 11871, "token_acc": 0.8625 }, { "epoch": 0.6405870609183618, "grad_norm": 0.3835359215736389, "learning_rate": 6.043779026310202e-06, "loss": 0.3507307171821594, "step": 11872, "token_acc": 0.8733730689697117 }, { "epoch": 0.6406410187233583, "grad_norm": 0.32696691155433655, "learning_rate": 6.04217410215965e-06, "loss": 0.3319993317127228, "step": 11873, "token_acc": 0.8800492610837438 }, { "epoch": 0.6406949765283548, "grad_norm": 0.3661559522151947, "learning_rate": 6.040569298880073e-06, "loss": 0.3379586637020111, "step": 11874, "token_acc": 0.8818565400843882 }, { "epoch": 0.6407489343333513, "grad_norm": 0.4106564223766327, "learning_rate": 6.038964616520468e-06, "loss": 0.3681958019733429, "step": 11875, "token_acc": 0.872652368835575 }, { "epoch": 0.6408028921383478, "grad_norm": 0.35428667068481445, "learning_rate": 6.037360055129847e-06, "loss": 0.32584327459335327, "step": 11876, "token_acc": 0.8854541097531752 }, { "epoch": 0.6408568499433444, "grad_norm": 0.37108734250068665, "learning_rate": 6.035755614757213e-06, "loss": 0.3190791606903076, "step": 11877, "token_acc": 0.8838463046461478 }, { "epoch": 0.6409108077483407, "grad_norm": 0.4544297456741333, "learning_rate": 6.034151295451562e-06, "loss": 0.34672510623931885, "step": 11878, "token_acc": 0.8741997439180538 }, { "epoch": 0.6409647655533373, "grad_norm": 0.39871326088905334, "learning_rate": 6.0325470972618885e-06, "loss": 0.3717476427555084, "step": 11879, "token_acc": 0.8708224201152436 }, { "epoch": 0.6410187233583338, "grad_norm": 0.38939064741134644, "learning_rate": 6.03094302023719e-06, "loss": 0.37604671716690063, "step": 11880, "token_acc": 0.8681217337841992 }, { "epoch": 0.6410726811633303, "grad_norm": 0.46336087584495544, "learning_rate": 6.029339064426453e-06, "loss": 0.4017917513847351, "step": 11881, "token_acc": 0.8629774360267238 }, { "epoch": 0.6411266389683268, "grad_norm": 0.32786181569099426, "learning_rate": 6.027735229878662e-06, "loss": 0.33071160316467285, "step": 11882, "token_acc": 0.8784584980237155 }, { "epoch": 0.6411805967733233, "grad_norm": 0.35805419087409973, "learning_rate": 6.026131516642795e-06, "loss": 0.32141631841659546, "step": 11883, "token_acc": 0.8856896763873509 }, { "epoch": 0.6412345545783198, "grad_norm": 0.4085432291030884, "learning_rate": 6.024527924767831e-06, "loss": 0.3379659354686737, "step": 11884, "token_acc": 0.8794677798069397 }, { "epoch": 0.6412885123833163, "grad_norm": 0.5249802470207214, "learning_rate": 6.022924454302741e-06, "loss": 0.3568008542060852, "step": 11885, "token_acc": 0.8732259301879555 }, { "epoch": 0.6413424701883127, "grad_norm": 0.3638662099838257, "learning_rate": 6.021321105296497e-06, "loss": 0.345054566860199, "step": 11886, "token_acc": 0.8804756704283461 }, { "epoch": 0.6413964279933092, "grad_norm": 0.38450002670288086, "learning_rate": 6.019717877798066e-06, "loss": 0.36048901081085205, "step": 11887, "token_acc": 0.8727445394112061 }, { "epoch": 0.6414503857983057, "grad_norm": 0.4441643953323364, "learning_rate": 6.018114771856407e-06, "loss": 0.36632242798805237, "step": 11888, "token_acc": 0.8682407812272264 }, { "epoch": 0.6415043436033022, "grad_norm": 0.41090497374534607, "learning_rate": 6.01651178752048e-06, "loss": 0.376363068819046, "step": 11889, "token_acc": 0.8682236376503892 }, { "epoch": 0.6415583014082987, "grad_norm": 0.4588583707809448, "learning_rate": 6.01490892483924e-06, "loss": 0.3426351249217987, "step": 11890, "token_acc": 0.873973468098547 }, { "epoch": 0.6416122592132952, "grad_norm": 0.44500523805618286, "learning_rate": 6.0133061838616356e-06, "loss": 0.3566669821739197, "step": 11891, "token_acc": 0.8740390301596689 }, { "epoch": 0.6416662170182917, "grad_norm": 0.30249765515327454, "learning_rate": 6.011703564636617e-06, "loss": 0.353352427482605, "step": 11892, "token_acc": 0.8781841468621033 }, { "epoch": 0.6417201748232881, "grad_norm": 0.36001941561698914, "learning_rate": 6.010101067213126e-06, "loss": 0.37400931119918823, "step": 11893, "token_acc": 0.8659680088251517 }, { "epoch": 0.6417741326282846, "grad_norm": 0.3611275255680084, "learning_rate": 6.008498691640106e-06, "loss": 0.3535460829734802, "step": 11894, "token_acc": 0.8744985673352436 }, { "epoch": 0.6418280904332812, "grad_norm": 0.4031388759613037, "learning_rate": 6.006896437966487e-06, "loss": 0.249551922082901, "step": 11895, "token_acc": 0.9069650067294751 }, { "epoch": 0.6418820482382777, "grad_norm": 0.4187615215778351, "learning_rate": 6.005294306241207e-06, "loss": 0.29234778881073, "step": 11896, "token_acc": 0.8948292371528886 }, { "epoch": 0.6419360060432742, "grad_norm": 0.3749735653400421, "learning_rate": 6.003692296513192e-06, "loss": 0.34710726141929626, "step": 11897, "token_acc": 0.8819601507808293 }, { "epoch": 0.6419899638482707, "grad_norm": 0.37656572461128235, "learning_rate": 6.002090408831365e-06, "loss": 0.28828078508377075, "step": 11898, "token_acc": 0.8960685091475282 }, { "epoch": 0.6420439216532672, "grad_norm": 0.375051885843277, "learning_rate": 6.000488643244652e-06, "loss": 0.30748289823532104, "step": 11899, "token_acc": 0.8899232022341168 }, { "epoch": 0.6420978794582637, "grad_norm": 0.4072481095790863, "learning_rate": 5.998886999801967e-06, "loss": 0.28680697083473206, "step": 11900, "token_acc": 0.8954518606024808 }, { "epoch": 0.6421518372632601, "grad_norm": 0.40634119510650635, "learning_rate": 5.997285478552226e-06, "loss": 0.29990434646606445, "step": 11901, "token_acc": 0.8899607586657946 }, { "epoch": 0.6422057950682566, "grad_norm": 0.5019161105155945, "learning_rate": 5.9956840795443385e-06, "loss": 0.3338421583175659, "step": 11902, "token_acc": 0.8779298706169135 }, { "epoch": 0.6422597528732531, "grad_norm": 0.44837290048599243, "learning_rate": 5.99408280282721e-06, "loss": 0.3069884777069092, "step": 11903, "token_acc": 0.8888140161725068 }, { "epoch": 0.6423137106782496, "grad_norm": 0.3334600031375885, "learning_rate": 5.992481648449742e-06, "loss": 0.3643796443939209, "step": 11904, "token_acc": 0.8726985202756128 }, { "epoch": 0.6423676684832461, "grad_norm": 0.47839635610580444, "learning_rate": 5.990880616460831e-06, "loss": 0.359358549118042, "step": 11905, "token_acc": 0.8719989561586639 }, { "epoch": 0.6424216262882426, "grad_norm": 0.3968740403652191, "learning_rate": 5.989279706909381e-06, "loss": 0.3684275448322296, "step": 11906, "token_acc": 0.8682527881040892 }, { "epoch": 0.6424755840932391, "grad_norm": 0.3722984194755554, "learning_rate": 5.987678919844277e-06, "loss": 0.3123416006565094, "step": 11907, "token_acc": 0.8865487549453107 }, { "epoch": 0.6425295418982356, "grad_norm": 0.4200625419616699, "learning_rate": 5.986078255314407e-06, "loss": 0.39022964239120483, "step": 11908, "token_acc": 0.86364282667425 }, { "epoch": 0.642583499703232, "grad_norm": 0.43313923478126526, "learning_rate": 5.984477713368659e-06, "loss": 0.38771384954452515, "step": 11909, "token_acc": 0.8661917299055288 }, { "epoch": 0.6426374575082285, "grad_norm": 0.4260011613368988, "learning_rate": 5.9828772940559075e-06, "loss": 0.3282546103000641, "step": 11910, "token_acc": 0.8868048700230339 }, { "epoch": 0.642691415313225, "grad_norm": 0.543062686920166, "learning_rate": 5.981276997425028e-06, "loss": 0.34435662627220154, "step": 11911, "token_acc": 0.875582298136646 }, { "epoch": 0.6427453731182216, "grad_norm": 0.4300592243671417, "learning_rate": 5.979676823524898e-06, "loss": 0.35782307386398315, "step": 11912, "token_acc": 0.8774592467678471 }, { "epoch": 0.6427993309232181, "grad_norm": 0.3602997660636902, "learning_rate": 5.978076772404385e-06, "loss": 0.41901910305023193, "step": 11913, "token_acc": 0.8529898043538164 }, { "epoch": 0.6428532887282146, "grad_norm": 0.419376939535141, "learning_rate": 5.976476844112355e-06, "loss": 0.3688153028488159, "step": 11914, "token_acc": 0.871222076215506 }, { "epoch": 0.6429072465332111, "grad_norm": 0.46926459670066833, "learning_rate": 5.974877038697667e-06, "loss": 0.3325360417366028, "step": 11915, "token_acc": 0.8814093382984818 }, { "epoch": 0.6429612043382075, "grad_norm": 0.46112632751464844, "learning_rate": 5.973277356209182e-06, "loss": 0.3811502456665039, "step": 11916, "token_acc": 0.8686050206545917 }, { "epoch": 0.643015162143204, "grad_norm": 0.4109935760498047, "learning_rate": 5.97167779669575e-06, "loss": 0.41953954100608826, "step": 11917, "token_acc": 0.8578988125967991 }, { "epoch": 0.6430691199482005, "grad_norm": 0.44932591915130615, "learning_rate": 5.9700783602062196e-06, "loss": 0.33732739090919495, "step": 11918, "token_acc": 0.8784026323661382 }, { "epoch": 0.643123077753197, "grad_norm": 0.39673471450805664, "learning_rate": 5.968479046789444e-06, "loss": 0.3425931930541992, "step": 11919, "token_acc": 0.8762420957542909 }, { "epoch": 0.6431770355581935, "grad_norm": 0.37160545587539673, "learning_rate": 5.966879856494261e-06, "loss": 0.2771611511707306, "step": 11920, "token_acc": 0.8989716759877323 }, { "epoch": 0.64323099336319, "grad_norm": 0.34864160418510437, "learning_rate": 5.965280789369512e-06, "loss": 0.31103643774986267, "step": 11921, "token_acc": 0.8894697535474234 }, { "epoch": 0.6432849511681865, "grad_norm": 0.44668877124786377, "learning_rate": 5.963681845464031e-06, "loss": 0.4176531434059143, "step": 11922, "token_acc": 0.8571428571428571 }, { "epoch": 0.643338908973183, "grad_norm": 0.5322494506835938, "learning_rate": 5.962083024826648e-06, "loss": 0.37321943044662476, "step": 11923, "token_acc": 0.8731487443657437 }, { "epoch": 0.6433928667781794, "grad_norm": 0.5005034804344177, "learning_rate": 5.96048432750619e-06, "loss": 0.3332800269126892, "step": 11924, "token_acc": 0.877962962962963 }, { "epoch": 0.6434468245831759, "grad_norm": 0.3321463465690613, "learning_rate": 5.958885753551485e-06, "loss": 0.3731761574745178, "step": 11925, "token_acc": 0.8680762030613368 }, { "epoch": 0.6435007823881724, "grad_norm": 0.4872332215309143, "learning_rate": 5.95728730301135e-06, "loss": 0.3405808210372925, "step": 11926, "token_acc": 0.8781332453825857 }, { "epoch": 0.6435547401931689, "grad_norm": 0.46190714836120605, "learning_rate": 5.9556889759346e-06, "loss": 0.36086732149124146, "step": 11927, "token_acc": 0.8735463173372544 }, { "epoch": 0.6436086979981654, "grad_norm": 0.3938573896884918, "learning_rate": 5.954090772370051e-06, "loss": 0.3198729157447815, "step": 11928, "token_acc": 0.8858488164927463 }, { "epoch": 0.643662655803162, "grad_norm": 0.3160826563835144, "learning_rate": 5.95249269236651e-06, "loss": 0.35522398352622986, "step": 11929, "token_acc": 0.8770128354725788 }, { "epoch": 0.6437166136081585, "grad_norm": 0.375467449426651, "learning_rate": 5.950894735972781e-06, "loss": 0.37419193983078003, "step": 11930, "token_acc": 0.8670228554467201 }, { "epoch": 0.643770571413155, "grad_norm": 0.32541000843048096, "learning_rate": 5.9492969032376624e-06, "loss": 0.33855435252189636, "step": 11931, "token_acc": 0.8791171857494958 }, { "epoch": 0.6438245292181514, "grad_norm": 0.3648870587348938, "learning_rate": 5.94769919420996e-06, "loss": 0.2786606550216675, "step": 11932, "token_acc": 0.8989815007274995 }, { "epoch": 0.6438784870231479, "grad_norm": 0.4163934290409088, "learning_rate": 5.946101608938464e-06, "loss": 0.392788827419281, "step": 11933, "token_acc": 0.8570819718710044 }, { "epoch": 0.6439324448281444, "grad_norm": 0.41713660955429077, "learning_rate": 5.944504147471961e-06, "loss": 0.36784303188323975, "step": 11934, "token_acc": 0.8711755233494364 }, { "epoch": 0.6439864026331409, "grad_norm": 0.36325815320014954, "learning_rate": 5.9429068098592395e-06, "loss": 0.2657316327095032, "step": 11935, "token_acc": 0.9016439511029928 }, { "epoch": 0.6440403604381374, "grad_norm": 0.42873579263687134, "learning_rate": 5.941309596149081e-06, "loss": 0.3913246691226959, "step": 11936, "token_acc": 0.8646706586826347 }, { "epoch": 0.6440943182431339, "grad_norm": 0.35618120431900024, "learning_rate": 5.93971250639026e-06, "loss": 0.3186899423599243, "step": 11937, "token_acc": 0.8824022814963932 }, { "epoch": 0.6441482760481304, "grad_norm": 0.4281352162361145, "learning_rate": 5.938115540631558e-06, "loss": 0.31527066230773926, "step": 11938, "token_acc": 0.8845114492959749 }, { "epoch": 0.6442022338531268, "grad_norm": 0.39859816431999207, "learning_rate": 5.936518698921745e-06, "loss": 0.32278871536254883, "step": 11939, "token_acc": 0.8814661134163209 }, { "epoch": 0.6442561916581233, "grad_norm": 0.5103739500045776, "learning_rate": 5.934921981309586e-06, "loss": 0.3797704577445984, "step": 11940, "token_acc": 0.8661783098088262 }, { "epoch": 0.6443101494631198, "grad_norm": 0.4114731252193451, "learning_rate": 5.933325387843845e-06, "loss": 0.38920801877975464, "step": 11941, "token_acc": 0.864213021051164 }, { "epoch": 0.6443641072681163, "grad_norm": 0.3975068926811218, "learning_rate": 5.931728918573281e-06, "loss": 0.36375588178634644, "step": 11942, "token_acc": 0.871563981042654 }, { "epoch": 0.6444180650731128, "grad_norm": 0.4292415976524353, "learning_rate": 5.930132573546648e-06, "loss": 0.3540116250514984, "step": 11943, "token_acc": 0.8759185980780102 }, { "epoch": 0.6444720228781093, "grad_norm": 0.5159543752670288, "learning_rate": 5.928536352812702e-06, "loss": 0.366539865732193, "step": 11944, "token_acc": 0.8741701404971437 }, { "epoch": 0.6445259806831058, "grad_norm": 0.3644312620162964, "learning_rate": 5.9269402564201905e-06, "loss": 0.3618049919605255, "step": 11945, "token_acc": 0.8746007498958478 }, { "epoch": 0.6445799384881024, "grad_norm": 0.4141341745853424, "learning_rate": 5.925344284417856e-06, "loss": 0.3750409483909607, "step": 11946, "token_acc": 0.8702232311766932 }, { "epoch": 0.6446338962930988, "grad_norm": 0.5198724269866943, "learning_rate": 5.923748436854437e-06, "loss": 0.3611001968383789, "step": 11947, "token_acc": 0.8791788042724372 }, { "epoch": 0.6446878540980953, "grad_norm": 0.3543110489845276, "learning_rate": 5.922152713778676e-06, "loss": 0.34688085317611694, "step": 11948, "token_acc": 0.8779887482419128 }, { "epoch": 0.6447418119030918, "grad_norm": 0.46737805008888245, "learning_rate": 5.920557115239303e-06, "loss": 0.36793601512908936, "step": 11949, "token_acc": 0.873312765136907 }, { "epoch": 0.6447957697080883, "grad_norm": 0.4773206114768982, "learning_rate": 5.918961641285044e-06, "loss": 0.33816176652908325, "step": 11950, "token_acc": 0.8822251195132551 }, { "epoch": 0.6448497275130848, "grad_norm": 0.2915489375591278, "learning_rate": 5.917366291964628e-06, "loss": 0.3410191535949707, "step": 11951, "token_acc": 0.8791738807815979 }, { "epoch": 0.6449036853180813, "grad_norm": 0.29937294125556946, "learning_rate": 5.915771067326778e-06, "loss": 0.36064064502716064, "step": 11952, "token_acc": 0.8730356309320056 }, { "epoch": 0.6449576431230778, "grad_norm": 0.3944016098976135, "learning_rate": 5.914175967420208e-06, "loss": 0.36348608136177063, "step": 11953, "token_acc": 0.8682387619749448 }, { "epoch": 0.6450116009280742, "grad_norm": 0.41813310980796814, "learning_rate": 5.9125809922936354e-06, "loss": 0.3379800319671631, "step": 11954, "token_acc": 0.8809285571342805 }, { "epoch": 0.6450655587330707, "grad_norm": 0.35748061537742615, "learning_rate": 5.910986141995767e-06, "loss": 0.33152204751968384, "step": 11955, "token_acc": 0.8807377049180328 }, { "epoch": 0.6451195165380672, "grad_norm": 0.31788700819015503, "learning_rate": 5.909391416575307e-06, "loss": 0.32876020669937134, "step": 11956, "token_acc": 0.8777914541937752 }, { "epoch": 0.6451734743430637, "grad_norm": 0.328029990196228, "learning_rate": 5.907796816080969e-06, "loss": 0.35771340131759644, "step": 11957, "token_acc": 0.871599251475457 }, { "epoch": 0.6452274321480602, "grad_norm": 0.3260989487171173, "learning_rate": 5.906202340561441e-06, "loss": 0.347902774810791, "step": 11958, "token_acc": 0.8763791040808189 }, { "epoch": 0.6452813899530567, "grad_norm": 0.522880494594574, "learning_rate": 5.90460799006542e-06, "loss": 0.34804558753967285, "step": 11959, "token_acc": 0.8707139975796693 }, { "epoch": 0.6453353477580532, "grad_norm": 0.46055638790130615, "learning_rate": 5.903013764641601e-06, "loss": 0.3268091678619385, "step": 11960, "token_acc": 0.8842005076142132 }, { "epoch": 0.6453893055630497, "grad_norm": 0.3229551613330841, "learning_rate": 5.901419664338667e-06, "loss": 0.35529693961143494, "step": 11961, "token_acc": 0.8746211077431799 }, { "epoch": 0.6454432633680461, "grad_norm": 0.4890376031398773, "learning_rate": 5.8998256892053015e-06, "loss": 0.311932772397995, "step": 11962, "token_acc": 0.8823773037896044 }, { "epoch": 0.6454972211730426, "grad_norm": 0.3589674234390259, "learning_rate": 5.898231839290183e-06, "loss": 0.36529555916786194, "step": 11963, "token_acc": 0.8735028110486434 }, { "epoch": 0.6455511789780392, "grad_norm": 0.3611644208431244, "learning_rate": 5.896638114641993e-06, "loss": 0.3179593086242676, "step": 11964, "token_acc": 0.8825768667642753 }, { "epoch": 0.6456051367830357, "grad_norm": 0.3291175663471222, "learning_rate": 5.8950445153094e-06, "loss": 0.3116074204444885, "step": 11965, "token_acc": 0.8899554675903019 }, { "epoch": 0.6456590945880322, "grad_norm": 0.40032628178596497, "learning_rate": 5.8934510413410715e-06, "loss": 0.3512319326400757, "step": 11966, "token_acc": 0.8805559798380937 }, { "epoch": 0.6457130523930287, "grad_norm": 0.47135910391807556, "learning_rate": 5.891857692785676e-06, "loss": 0.40936991572380066, "step": 11967, "token_acc": 0.8607142857142858 }, { "epoch": 0.6457670101980252, "grad_norm": 0.43289923667907715, "learning_rate": 5.890264469691867e-06, "loss": 0.3438030779361725, "step": 11968, "token_acc": 0.8770334315358012 }, { "epoch": 0.6458209680030217, "grad_norm": 0.3676089644432068, "learning_rate": 5.888671372108301e-06, "loss": 0.28109973669052124, "step": 11969, "token_acc": 0.898989898989899 }, { "epoch": 0.6458749258080181, "grad_norm": 0.46615830063819885, "learning_rate": 5.887078400083639e-06, "loss": 0.3354346752166748, "step": 11970, "token_acc": 0.8814268142681427 }, { "epoch": 0.6459288836130146, "grad_norm": 0.4391458332538605, "learning_rate": 5.885485553666524e-06, "loss": 0.33370891213417053, "step": 11971, "token_acc": 0.8745387453874539 }, { "epoch": 0.6459828414180111, "grad_norm": 0.4077589809894562, "learning_rate": 5.883892832905602e-06, "loss": 0.35139113664627075, "step": 11972, "token_acc": 0.8712941847206386 }, { "epoch": 0.6460367992230076, "grad_norm": 0.42496249079704285, "learning_rate": 5.882300237849517e-06, "loss": 0.3462539315223694, "step": 11973, "token_acc": 0.8847826086956522 }, { "epoch": 0.6460907570280041, "grad_norm": 0.41724565625190735, "learning_rate": 5.880707768546902e-06, "loss": 0.38494688272476196, "step": 11974, "token_acc": 0.8626214867349619 }, { "epoch": 0.6461447148330006, "grad_norm": 0.2680063843727112, "learning_rate": 5.879115425046393e-06, "loss": 0.3078366219997406, "step": 11975, "token_acc": 0.8917117343730349 }, { "epoch": 0.6461986726379971, "grad_norm": 0.530626118183136, "learning_rate": 5.877523207396617e-06, "loss": 0.3449639081954956, "step": 11976, "token_acc": 0.8777039686595128 }, { "epoch": 0.6462526304429935, "grad_norm": 0.38713306188583374, "learning_rate": 5.875931115646205e-06, "loss": 0.40468835830688477, "step": 11977, "token_acc": 0.85832612189393 }, { "epoch": 0.64630658824799, "grad_norm": 0.4277860224246979, "learning_rate": 5.8743391498437755e-06, "loss": 0.30175521969795227, "step": 11978, "token_acc": 0.8872340425531915 }, { "epoch": 0.6463605460529865, "grad_norm": 0.49892866611480713, "learning_rate": 5.872747310037949e-06, "loss": 0.3636676073074341, "step": 11979, "token_acc": 0.8687237026647966 }, { "epoch": 0.646414503857983, "grad_norm": 0.36039891839027405, "learning_rate": 5.871155596277338e-06, "loss": 0.3103405237197876, "step": 11980, "token_acc": 0.8846280991735537 }, { "epoch": 0.6464684616629796, "grad_norm": 0.49087411165237427, "learning_rate": 5.869564008610554e-06, "loss": 0.3794667422771454, "step": 11981, "token_acc": 0.8711458193079306 }, { "epoch": 0.6465224194679761, "grad_norm": 0.48083052039146423, "learning_rate": 5.867972547086199e-06, "loss": 0.35768526792526245, "step": 11982, "token_acc": 0.8732590529247911 }, { "epoch": 0.6465763772729726, "grad_norm": 0.434872567653656, "learning_rate": 5.866381211752883e-06, "loss": 0.4002223014831543, "step": 11983, "token_acc": 0.8641989326671965 }, { "epoch": 0.6466303350779691, "grad_norm": 0.3351290225982666, "learning_rate": 5.864790002659202e-06, "loss": 0.32816725969314575, "step": 11984, "token_acc": 0.8799847114282074 }, { "epoch": 0.6466842928829655, "grad_norm": 0.42540243268013, "learning_rate": 5.863198919853752e-06, "loss": 0.3587510287761688, "step": 11985, "token_acc": 0.872183908045977 }, { "epoch": 0.646738250687962, "grad_norm": 0.3840070366859436, "learning_rate": 5.861607963385123e-06, "loss": 0.313834547996521, "step": 11986, "token_acc": 0.8890906551194301 }, { "epoch": 0.6467922084929585, "grad_norm": 0.3827250003814697, "learning_rate": 5.860017133301902e-06, "loss": 0.35630905628204346, "step": 11987, "token_acc": 0.8736598087510866 }, { "epoch": 0.646846166297955, "grad_norm": 0.4268885850906372, "learning_rate": 5.8584264296526725e-06, "loss": 0.2959916591644287, "step": 11988, "token_acc": 0.8908195771792644 }, { "epoch": 0.6469001241029515, "grad_norm": 0.39827844500541687, "learning_rate": 5.856835852486013e-06, "loss": 0.33368805050849915, "step": 11989, "token_acc": 0.8789390436792797 }, { "epoch": 0.646954081907948, "grad_norm": 0.42126816511154175, "learning_rate": 5.855245401850503e-06, "loss": 0.3265690505504608, "step": 11990, "token_acc": 0.8848684210526315 }, { "epoch": 0.6470080397129445, "grad_norm": 0.4311484694480896, "learning_rate": 5.853655077794717e-06, "loss": 0.34662654995918274, "step": 11991, "token_acc": 0.8727937468482098 }, { "epoch": 0.647061997517941, "grad_norm": 0.2867341637611389, "learning_rate": 5.852064880367214e-06, "loss": 0.3063341975212097, "step": 11992, "token_acc": 0.8905187358314441 }, { "epoch": 0.6471159553229374, "grad_norm": 0.4846467971801758, "learning_rate": 5.850474809616565e-06, "loss": 0.35622096061706543, "step": 11993, "token_acc": 0.8753226126662697 }, { "epoch": 0.6471699131279339, "grad_norm": 0.38839927315711975, "learning_rate": 5.848884865591328e-06, "loss": 0.2817254364490509, "step": 11994, "token_acc": 0.8960339480985801 }, { "epoch": 0.6472238709329304, "grad_norm": 0.37377917766571045, "learning_rate": 5.8472950483400555e-06, "loss": 0.3566128611564636, "step": 11995, "token_acc": 0.8754835936380571 }, { "epoch": 0.6472778287379269, "grad_norm": 0.3997495472431183, "learning_rate": 5.845705357911309e-06, "loss": 0.29340851306915283, "step": 11996, "token_acc": 0.8931791454331635 }, { "epoch": 0.6473317865429234, "grad_norm": 0.4814413785934448, "learning_rate": 5.844115794353631e-06, "loss": 0.3625737428665161, "step": 11997, "token_acc": 0.8726370827188631 }, { "epoch": 0.64738574434792, "grad_norm": 0.342199444770813, "learning_rate": 5.8425263577155685e-06, "loss": 0.4225928783416748, "step": 11998, "token_acc": 0.8563863321144874 }, { "epoch": 0.6474397021529165, "grad_norm": 0.508000910282135, "learning_rate": 5.840937048045663e-06, "loss": 0.3193556070327759, "step": 11999, "token_acc": 0.8811600081119448 }, { "epoch": 0.6474936599579129, "grad_norm": 0.3985472619533539, "learning_rate": 5.8393478653924495e-06, "loss": 0.35826143622398376, "step": 12000, "token_acc": 0.8784653790566286 }, { "epoch": 0.6475476177629094, "grad_norm": 0.422197163105011, "learning_rate": 5.8377588098044604e-06, "loss": 0.35341718792915344, "step": 12001, "token_acc": 0.8751954513148543 }, { "epoch": 0.6476015755679059, "grad_norm": 0.3699517548084259, "learning_rate": 5.836169881330229e-06, "loss": 0.39558491110801697, "step": 12002, "token_acc": 0.8613834788448623 }, { "epoch": 0.6476555333729024, "grad_norm": 0.40306007862091064, "learning_rate": 5.834581080018279e-06, "loss": 0.36171433329582214, "step": 12003, "token_acc": 0.8765650838648712 }, { "epoch": 0.6477094911778989, "grad_norm": 0.35741132497787476, "learning_rate": 5.83299240591713e-06, "loss": 0.31752315163612366, "step": 12004, "token_acc": 0.8902178556172576 }, { "epoch": 0.6477634489828954, "grad_norm": 0.2991311550140381, "learning_rate": 5.831403859075304e-06, "loss": 0.374770849943161, "step": 12005, "token_acc": 0.8664311878597593 }, { "epoch": 0.6478174067878919, "grad_norm": 0.4484872817993164, "learning_rate": 5.829815439541311e-06, "loss": 0.3532160222530365, "step": 12006, "token_acc": 0.8800884955752213 }, { "epoch": 0.6478713645928884, "grad_norm": 0.31649285554885864, "learning_rate": 5.828227147363661e-06, "loss": 0.35725632309913635, "step": 12007, "token_acc": 0.8770711693230235 }, { "epoch": 0.6479253223978848, "grad_norm": 0.38426634669303894, "learning_rate": 5.8266389825908576e-06, "loss": 0.3594352900981903, "step": 12008, "token_acc": 0.873701039168665 }, { "epoch": 0.6479792802028813, "grad_norm": 0.3678554892539978, "learning_rate": 5.825050945271411e-06, "loss": 0.36659520864486694, "step": 12009, "token_acc": 0.8726929225403285 }, { "epoch": 0.6480332380078778, "grad_norm": 0.34523072838783264, "learning_rate": 5.823463035453815e-06, "loss": 0.35493165254592896, "step": 12010, "token_acc": 0.8719219000437126 }, { "epoch": 0.6480871958128743, "grad_norm": 0.5017097592353821, "learning_rate": 5.821875253186563e-06, "loss": 0.3632722795009613, "step": 12011, "token_acc": 0.8791037809242259 }, { "epoch": 0.6481411536178708, "grad_norm": 0.4911036789417267, "learning_rate": 5.820287598518146e-06, "loss": 0.3506298065185547, "step": 12012, "token_acc": 0.878151944037742 }, { "epoch": 0.6481951114228673, "grad_norm": 0.3764919638633728, "learning_rate": 5.818700071497052e-06, "loss": 0.30749696493148804, "step": 12013, "token_acc": 0.8922155688622755 }, { "epoch": 0.6482490692278638, "grad_norm": 0.39151668548583984, "learning_rate": 5.8171126721717605e-06, "loss": 0.2861786484718323, "step": 12014, "token_acc": 0.8961352657004831 }, { "epoch": 0.6483030270328604, "grad_norm": 0.35518714785575867, "learning_rate": 5.815525400590755e-06, "loss": 0.33275383710861206, "step": 12015, "token_acc": 0.8798212605832549 }, { "epoch": 0.6483569848378568, "grad_norm": 0.3975502848625183, "learning_rate": 5.813938256802506e-06, "loss": 0.3187459707260132, "step": 12016, "token_acc": 0.8834149326805385 }, { "epoch": 0.6484109426428533, "grad_norm": 0.3861318528652191, "learning_rate": 5.812351240855486e-06, "loss": 0.3757452964782715, "step": 12017, "token_acc": 0.8679454075986721 }, { "epoch": 0.6484649004478498, "grad_norm": 0.3788829445838928, "learning_rate": 5.8107643527981615e-06, "loss": 0.31033772230148315, "step": 12018, "token_acc": 0.8875718812345968 }, { "epoch": 0.6485188582528463, "grad_norm": 0.2819320559501648, "learning_rate": 5.809177592678999e-06, "loss": 0.34288913011550903, "step": 12019, "token_acc": 0.8779125438876476 }, { "epoch": 0.6485728160578428, "grad_norm": 0.39467746019363403, "learning_rate": 5.807590960546452e-06, "loss": 0.38977816700935364, "step": 12020, "token_acc": 0.8670454545454546 }, { "epoch": 0.6486267738628393, "grad_norm": 0.40929940342903137, "learning_rate": 5.806004456448975e-06, "loss": 0.3358652591705322, "step": 12021, "token_acc": 0.8743275552898984 }, { "epoch": 0.6486807316678358, "grad_norm": 0.41786429286003113, "learning_rate": 5.804418080435028e-06, "loss": 0.33067500591278076, "step": 12022, "token_acc": 0.8807966274167757 }, { "epoch": 0.6487346894728322, "grad_norm": 0.36689522862434387, "learning_rate": 5.802831832553054e-06, "loss": 0.34543052315711975, "step": 12023, "token_acc": 0.8766871165644172 }, { "epoch": 0.6487886472778287, "grad_norm": 0.4781387448310852, "learning_rate": 5.801245712851495e-06, "loss": 0.33507251739501953, "step": 12024, "token_acc": 0.8819822400458321 }, { "epoch": 0.6488426050828252, "grad_norm": 0.3127712905406952, "learning_rate": 5.7996597213787944e-06, "loss": 0.37293773889541626, "step": 12025, "token_acc": 0.8732785310131312 }, { "epoch": 0.6488965628878217, "grad_norm": 0.4986303150653839, "learning_rate": 5.798073858183384e-06, "loss": 0.4168829917907715, "step": 12026, "token_acc": 0.8584571832979476 }, { "epoch": 0.6489505206928182, "grad_norm": 0.36151984333992004, "learning_rate": 5.7964881233136975e-06, "loss": 0.34968072175979614, "step": 12027, "token_acc": 0.8743409490333919 }, { "epoch": 0.6490044784978147, "grad_norm": 0.43063679337501526, "learning_rate": 5.794902516818161e-06, "loss": 0.3206363618373871, "step": 12028, "token_acc": 0.8827411880353883 }, { "epoch": 0.6490584363028112, "grad_norm": 0.4234482944011688, "learning_rate": 5.793317038745203e-06, "loss": 0.3280981481075287, "step": 12029, "token_acc": 0.8838532110091744 }, { "epoch": 0.6491123941078077, "grad_norm": 0.45824283361434937, "learning_rate": 5.791731689143238e-06, "loss": 0.33462315797805786, "step": 12030, "token_acc": 0.8767148344613134 }, { "epoch": 0.6491663519128041, "grad_norm": 0.5055139064788818, "learning_rate": 5.790146468060687e-06, "loss": 0.3848612904548645, "step": 12031, "token_acc": 0.8637449247671364 }, { "epoch": 0.6492203097178006, "grad_norm": 0.3998960852622986, "learning_rate": 5.788561375545958e-06, "loss": 0.32902899384498596, "step": 12032, "token_acc": 0.8818655619403982 }, { "epoch": 0.6492742675227972, "grad_norm": 0.41102179884910583, "learning_rate": 5.7869764116474626e-06, "loss": 0.3586805462837219, "step": 12033, "token_acc": 0.8754697421277698 }, { "epoch": 0.6493282253277937, "grad_norm": 0.4621621072292328, "learning_rate": 5.785391576413601e-06, "loss": 0.3886989653110504, "step": 12034, "token_acc": 0.8644427786106946 }, { "epoch": 0.6493821831327902, "grad_norm": 0.31883928179740906, "learning_rate": 5.7838068698927785e-06, "loss": 0.3526090979576111, "step": 12035, "token_acc": 0.8739370431150231 }, { "epoch": 0.6494361409377867, "grad_norm": 0.4924844205379486, "learning_rate": 5.78222229213339e-06, "loss": 0.30037322640419006, "step": 12036, "token_acc": 0.8923988153998026 }, { "epoch": 0.6494900987427832, "grad_norm": 0.4552627205848694, "learning_rate": 5.7806378431838285e-06, "loss": 0.336783230304718, "step": 12037, "token_acc": 0.8783025099075297 }, { "epoch": 0.6495440565477797, "grad_norm": 0.49542275071144104, "learning_rate": 5.779053523092482e-06, "loss": 0.337582528591156, "step": 12038, "token_acc": 0.8773797031929246 }, { "epoch": 0.6495980143527761, "grad_norm": 0.38688182830810547, "learning_rate": 5.777469331907735e-06, "loss": 0.3128376305103302, "step": 12039, "token_acc": 0.8899146733808925 }, { "epoch": 0.6496519721577726, "grad_norm": 0.4095824360847473, "learning_rate": 5.775885269677963e-06, "loss": 0.34482258558273315, "step": 12040, "token_acc": 0.8788368336025848 }, { "epoch": 0.6497059299627691, "grad_norm": 0.3917515277862549, "learning_rate": 5.774301336451555e-06, "loss": 0.33860158920288086, "step": 12041, "token_acc": 0.8791339519390912 }, { "epoch": 0.6497598877677656, "grad_norm": 0.3743588924407959, "learning_rate": 5.772717532276875e-06, "loss": 0.34367817640304565, "step": 12042, "token_acc": 0.8797889712347695 }, { "epoch": 0.6498138455727621, "grad_norm": 0.4900672137737274, "learning_rate": 5.771133857202296e-06, "loss": 0.37681645154953003, "step": 12043, "token_acc": 0.8755315668956494 }, { "epoch": 0.6498678033777586, "grad_norm": 0.4377688467502594, "learning_rate": 5.76955031127618e-06, "loss": 0.2804200351238251, "step": 12044, "token_acc": 0.8968694945114514 }, { "epoch": 0.6499217611827551, "grad_norm": 0.47421982884407043, "learning_rate": 5.767966894546888e-06, "loss": 0.3726498484611511, "step": 12045, "token_acc": 0.8685272360457297 }, { "epoch": 0.6499757189877515, "grad_norm": 0.409620076417923, "learning_rate": 5.76638360706278e-06, "loss": 0.3602873980998993, "step": 12046, "token_acc": 0.8750817527795945 }, { "epoch": 0.650029676792748, "grad_norm": 0.42748379707336426, "learning_rate": 5.764800448872208e-06, "loss": 0.3589586615562439, "step": 12047, "token_acc": 0.8801916932907349 }, { "epoch": 0.6500836345977445, "grad_norm": 0.43543320894241333, "learning_rate": 5.763217420023521e-06, "loss": 0.35508522391319275, "step": 12048, "token_acc": 0.8769000747570396 }, { "epoch": 0.650137592402741, "grad_norm": 0.3463888168334961, "learning_rate": 5.761634520565062e-06, "loss": 0.34356433153152466, "step": 12049, "token_acc": 0.879768115942029 }, { "epoch": 0.6501915502077376, "grad_norm": 0.4014408588409424, "learning_rate": 5.760051750545177e-06, "loss": 0.34211334586143494, "step": 12050, "token_acc": 0.8767101990049752 }, { "epoch": 0.6502455080127341, "grad_norm": 0.35887089371681213, "learning_rate": 5.758469110012199e-06, "loss": 0.3320050835609436, "step": 12051, "token_acc": 0.884078573274748 }, { "epoch": 0.6502994658177306, "grad_norm": 0.4594925343990326, "learning_rate": 5.756886599014464e-06, "loss": 0.39287447929382324, "step": 12052, "token_acc": 0.8623675368312226 }, { "epoch": 0.6503534236227271, "grad_norm": 0.41259634494781494, "learning_rate": 5.755304217600296e-06, "loss": 0.3355215787887573, "step": 12053, "token_acc": 0.8845104577144589 }, { "epoch": 0.6504073814277235, "grad_norm": 0.4252364933490753, "learning_rate": 5.75372196581803e-06, "loss": 0.34430062770843506, "step": 12054, "token_acc": 0.876502894463411 }, { "epoch": 0.65046133923272, "grad_norm": 0.4544803202152252, "learning_rate": 5.752139843715983e-06, "loss": 0.3474304676055908, "step": 12055, "token_acc": 0.873300889410975 }, { "epoch": 0.6505152970377165, "grad_norm": 0.4964463412761688, "learning_rate": 5.750557851342471e-06, "loss": 0.3261834979057312, "step": 12056, "token_acc": 0.8845434714864444 }, { "epoch": 0.650569254842713, "grad_norm": 0.38404035568237305, "learning_rate": 5.748975988745811e-06, "loss": 0.3336644172668457, "step": 12057, "token_acc": 0.8780366881507189 }, { "epoch": 0.6506232126477095, "grad_norm": 0.39842960238456726, "learning_rate": 5.747394255974309e-06, "loss": 0.3353850245475769, "step": 12058, "token_acc": 0.8826653790439402 }, { "epoch": 0.650677170452706, "grad_norm": 0.4008047878742218, "learning_rate": 5.7458126530762696e-06, "loss": 0.36855176091194153, "step": 12059, "token_acc": 0.8702877989460883 }, { "epoch": 0.6507311282577025, "grad_norm": 0.31486040353775024, "learning_rate": 5.744231180100001e-06, "loss": 0.27174311876296997, "step": 12060, "token_acc": 0.9026907482491706 }, { "epoch": 0.6507850860626989, "grad_norm": 0.5071214437484741, "learning_rate": 5.742649837093796e-06, "loss": 0.41956740617752075, "step": 12061, "token_acc": 0.8508215624099164 }, { "epoch": 0.6508390438676954, "grad_norm": 0.3417113721370697, "learning_rate": 5.741068624105956e-06, "loss": 0.3266680836677551, "step": 12062, "token_acc": 0.8825022665457842 }, { "epoch": 0.6508930016726919, "grad_norm": 0.4391162693500519, "learning_rate": 5.7394875411847585e-06, "loss": 0.3527825176715851, "step": 12063, "token_acc": 0.8770840519719444 }, { "epoch": 0.6509469594776884, "grad_norm": 0.4852074980735779, "learning_rate": 5.737906588378497e-06, "loss": 0.29539209604263306, "step": 12064, "token_acc": 0.8865422396856582 }, { "epoch": 0.6510009172826849, "grad_norm": 0.268900990486145, "learning_rate": 5.7363257657354505e-06, "loss": 0.34949690103530884, "step": 12065, "token_acc": 0.8777518777518778 }, { "epoch": 0.6510548750876815, "grad_norm": 0.4049799144268036, "learning_rate": 5.734745073303895e-06, "loss": 0.33110103011131287, "step": 12066, "token_acc": 0.8754403268986896 }, { "epoch": 0.651108832892678, "grad_norm": 0.3721811771392822, "learning_rate": 5.73316451113211e-06, "loss": 0.33270514011383057, "step": 12067, "token_acc": 0.8838447526972117 }, { "epoch": 0.6511627906976745, "grad_norm": 0.54095458984375, "learning_rate": 5.731584079268365e-06, "loss": 0.35548532009124756, "step": 12068, "token_acc": 0.8766264304749961 }, { "epoch": 0.6512167485026709, "grad_norm": 0.3500751852989197, "learning_rate": 5.730003777760924e-06, "loss": 0.3872184157371521, "step": 12069, "token_acc": 0.8670980587356893 }, { "epoch": 0.6512707063076674, "grad_norm": 0.389481782913208, "learning_rate": 5.7284236066580476e-06, "loss": 0.34996581077575684, "step": 12070, "token_acc": 0.8768071362657643 }, { "epoch": 0.6513246641126639, "grad_norm": 0.4012885093688965, "learning_rate": 5.726843566007996e-06, "loss": 0.3864368200302124, "step": 12071, "token_acc": 0.8664706602964596 }, { "epoch": 0.6513786219176604, "grad_norm": 0.4345525801181793, "learning_rate": 5.725263655859019e-06, "loss": 0.2975758910179138, "step": 12072, "token_acc": 0.8900514864640425 }, { "epoch": 0.6514325797226569, "grad_norm": 0.4463439881801605, "learning_rate": 5.723683876259374e-06, "loss": 0.36711543798446655, "step": 12073, "token_acc": 0.8726711624185621 }, { "epoch": 0.6514865375276534, "grad_norm": 0.3899073004722595, "learning_rate": 5.722104227257302e-06, "loss": 0.31501591205596924, "step": 12074, "token_acc": 0.88478439989014 }, { "epoch": 0.6515404953326499, "grad_norm": 0.45935526490211487, "learning_rate": 5.720524708901047e-06, "loss": 0.3700650930404663, "step": 12075, "token_acc": 0.8690623709211069 }, { "epoch": 0.6515944531376464, "grad_norm": 0.3725847005844116, "learning_rate": 5.718945321238847e-06, "loss": 0.31195563077926636, "step": 12076, "token_acc": 0.8878553648630945 }, { "epoch": 0.6516484109426428, "grad_norm": 0.44036924839019775, "learning_rate": 5.717366064318934e-06, "loss": 0.34195417165756226, "step": 12077, "token_acc": 0.8779282868525896 }, { "epoch": 0.6517023687476393, "grad_norm": 0.3294292390346527, "learning_rate": 5.715786938189542e-06, "loss": 0.29483842849731445, "step": 12078, "token_acc": 0.8946813495782568 }, { "epoch": 0.6517563265526358, "grad_norm": 0.41351866722106934, "learning_rate": 5.714207942898893e-06, "loss": 0.3099174201488495, "step": 12079, "token_acc": 0.8871247113163973 }, { "epoch": 0.6518102843576323, "grad_norm": 0.38513296842575073, "learning_rate": 5.71262907849521e-06, "loss": 0.3926050662994385, "step": 12080, "token_acc": 0.8652622649512229 }, { "epoch": 0.6518642421626288, "grad_norm": 0.4226589500904083, "learning_rate": 5.711050345026712e-06, "loss": 0.3501338064670563, "step": 12081, "token_acc": 0.8776123301985371 }, { "epoch": 0.6519181999676253, "grad_norm": 0.34937041997909546, "learning_rate": 5.7094717425416135e-06, "loss": 0.312946617603302, "step": 12082, "token_acc": 0.8862001308044474 }, { "epoch": 0.6519721577726219, "grad_norm": 0.35509392619132996, "learning_rate": 5.707893271088124e-06, "loss": 0.3273196816444397, "step": 12083, "token_acc": 0.887814085171641 }, { "epoch": 0.6520261155776182, "grad_norm": 0.5045977234840393, "learning_rate": 5.706314930714449e-06, "loss": 0.37527966499328613, "step": 12084, "token_acc": 0.8695943680858197 }, { "epoch": 0.6520800733826148, "grad_norm": 0.27335837483406067, "learning_rate": 5.704736721468789e-06, "loss": 0.29741939902305603, "step": 12085, "token_acc": 0.8871052631578947 }, { "epoch": 0.6521340311876113, "grad_norm": 0.3831958472728729, "learning_rate": 5.703158643399347e-06, "loss": 0.3188125491142273, "step": 12086, "token_acc": 0.8871876354181714 }, { "epoch": 0.6521879889926078, "grad_norm": 0.4038652777671814, "learning_rate": 5.7015806965543145e-06, "loss": 0.3523837924003601, "step": 12087, "token_acc": 0.8762225428892095 }, { "epoch": 0.6522419467976043, "grad_norm": 0.4584585130214691, "learning_rate": 5.7000028809818805e-06, "loss": 0.37395453453063965, "step": 12088, "token_acc": 0.8694508894044857 }, { "epoch": 0.6522959046026008, "grad_norm": 0.44567325711250305, "learning_rate": 5.698425196730233e-06, "loss": 0.2785186767578125, "step": 12089, "token_acc": 0.8955830388692579 }, { "epoch": 0.6523498624075973, "grad_norm": 0.4401336908340454, "learning_rate": 5.696847643847554e-06, "loss": 0.2801578938961029, "step": 12090, "token_acc": 0.8993775933609959 }, { "epoch": 0.6524038202125938, "grad_norm": 0.4802490472793579, "learning_rate": 5.695270222382019e-06, "loss": 0.31842169165611267, "step": 12091, "token_acc": 0.8837692707431145 }, { "epoch": 0.6524577780175902, "grad_norm": 0.39841070771217346, "learning_rate": 5.6936929323818005e-06, "loss": 0.29874187707901, "step": 12092, "token_acc": 0.8911158493248046 }, { "epoch": 0.6525117358225867, "grad_norm": 0.3436160683631897, "learning_rate": 5.692115773895076e-06, "loss": 0.2973909378051758, "step": 12093, "token_acc": 0.8930976430976431 }, { "epoch": 0.6525656936275832, "grad_norm": 0.490827739238739, "learning_rate": 5.690538746970007e-06, "loss": 0.3463568687438965, "step": 12094, "token_acc": 0.8797387476927445 }, { "epoch": 0.6526196514325797, "grad_norm": 0.49877622723579407, "learning_rate": 5.6889618516547555e-06, "loss": 0.37484705448150635, "step": 12095, "token_acc": 0.8725931257872953 }, { "epoch": 0.6526736092375762, "grad_norm": 0.4607187509536743, "learning_rate": 5.6873850879974835e-06, "loss": 0.27475041151046753, "step": 12096, "token_acc": 0.8945833333333333 }, { "epoch": 0.6527275670425727, "grad_norm": 0.4359595775604248, "learning_rate": 5.685808456046338e-06, "loss": 0.3463890254497528, "step": 12097, "token_acc": 0.8756869151938215 }, { "epoch": 0.6527815248475692, "grad_norm": 0.4295007884502411, "learning_rate": 5.684231955849469e-06, "loss": 0.3393901586532593, "step": 12098, "token_acc": 0.8752601637297073 }, { "epoch": 0.6528354826525657, "grad_norm": 0.3060898184776306, "learning_rate": 5.682655587455027e-06, "loss": 0.3534674644470215, "step": 12099, "token_acc": 0.8752295824660218 }, { "epoch": 0.6528894404575621, "grad_norm": 0.35824358463287354, "learning_rate": 5.681079350911154e-06, "loss": 0.32699811458587646, "step": 12100, "token_acc": 0.8832632464255677 }, { "epoch": 0.6529433982625586, "grad_norm": 0.3799644708633423, "learning_rate": 5.679503246265986e-06, "loss": 0.3616894781589508, "step": 12101, "token_acc": 0.8747596814062071 }, { "epoch": 0.6529973560675552, "grad_norm": 0.3944765031337738, "learning_rate": 5.677927273567656e-06, "loss": 0.3710790276527405, "step": 12102, "token_acc": 0.8737737083060824 }, { "epoch": 0.6530513138725517, "grad_norm": 0.3757319748401642, "learning_rate": 5.676351432864297e-06, "loss": 0.31431013345718384, "step": 12103, "token_acc": 0.8872855539522206 }, { "epoch": 0.6531052716775482, "grad_norm": 0.3265916109085083, "learning_rate": 5.674775724204027e-06, "loss": 0.3103189468383789, "step": 12104, "token_acc": 0.8916610169491526 }, { "epoch": 0.6531592294825447, "grad_norm": 0.353238970041275, "learning_rate": 5.673200147634977e-06, "loss": 0.36569398641586304, "step": 12105, "token_acc": 0.8689729067809582 }, { "epoch": 0.6532131872875412, "grad_norm": 0.41133907437324524, "learning_rate": 5.671624703205262e-06, "loss": 0.2988196015357971, "step": 12106, "token_acc": 0.8942210206150727 }, { "epoch": 0.6532671450925376, "grad_norm": 0.41579747200012207, "learning_rate": 5.670049390962995e-06, "loss": 0.33062657713890076, "step": 12107, "token_acc": 0.8869401816473536 }, { "epoch": 0.6533211028975341, "grad_norm": 0.40158000588417053, "learning_rate": 5.668474210956284e-06, "loss": 0.31381288170814514, "step": 12108, "token_acc": 0.8840986269009302 }, { "epoch": 0.6533750607025306, "grad_norm": 0.3371763527393341, "learning_rate": 5.666899163233237e-06, "loss": 0.3657785654067993, "step": 12109, "token_acc": 0.8655646043515255 }, { "epoch": 0.6534290185075271, "grad_norm": 0.4118761718273163, "learning_rate": 5.6653242478419535e-06, "loss": 0.3642390966415405, "step": 12110, "token_acc": 0.8697814619442351 }, { "epoch": 0.6534829763125236, "grad_norm": 0.4378022849559784, "learning_rate": 5.663749464830532e-06, "loss": 0.3446556031703949, "step": 12111, "token_acc": 0.8754059639799232 }, { "epoch": 0.6535369341175201, "grad_norm": 0.37191593647003174, "learning_rate": 5.662174814247067e-06, "loss": 0.3511422872543335, "step": 12112, "token_acc": 0.8752052545155994 }, { "epoch": 0.6535908919225166, "grad_norm": 0.4108271300792694, "learning_rate": 5.660600296139646e-06, "loss": 0.37473100423812866, "step": 12113, "token_acc": 0.8649650061753809 }, { "epoch": 0.6536448497275131, "grad_norm": 0.32694143056869507, "learning_rate": 5.659025910556355e-06, "loss": 0.3088334798812866, "step": 12114, "token_acc": 0.8905392747223337 }, { "epoch": 0.6536988075325095, "grad_norm": 0.37260371446609497, "learning_rate": 5.6574516575452765e-06, "loss": 0.31803807616233826, "step": 12115, "token_acc": 0.8835574195140002 }, { "epoch": 0.653752765337506, "grad_norm": 0.2392994463443756, "learning_rate": 5.655877537154486e-06, "loss": 0.37426862120628357, "step": 12116, "token_acc": 0.869677641785899 }, { "epoch": 0.6538067231425025, "grad_norm": 0.3618009090423584, "learning_rate": 5.654303549432053e-06, "loss": 0.2907623052597046, "step": 12117, "token_acc": 0.8973978697586625 }, { "epoch": 0.653860680947499, "grad_norm": 0.4268273711204529, "learning_rate": 5.6527296944260555e-06, "loss": 0.2870486080646515, "step": 12118, "token_acc": 0.8933469805527123 }, { "epoch": 0.6539146387524956, "grad_norm": 0.45930245518684387, "learning_rate": 5.651155972184554e-06, "loss": 0.36199986934661865, "step": 12119, "token_acc": 0.8687782805429864 }, { "epoch": 0.6539685965574921, "grad_norm": 0.4361560344696045, "learning_rate": 5.649582382755612e-06, "loss": 0.3084118366241455, "step": 12120, "token_acc": 0.8895705521472392 }, { "epoch": 0.6540225543624886, "grad_norm": 0.42181622982025146, "learning_rate": 5.648008926187283e-06, "loss": 0.4029272496700287, "step": 12121, "token_acc": 0.8621674237508111 }, { "epoch": 0.6540765121674851, "grad_norm": 0.4800662100315094, "learning_rate": 5.64643560252762e-06, "loss": 0.3234603703022003, "step": 12122, "token_acc": 0.8834052068359687 }, { "epoch": 0.6541304699724815, "grad_norm": 0.38634583353996277, "learning_rate": 5.644862411824676e-06, "loss": 0.3565874397754669, "step": 12123, "token_acc": 0.8764111006585137 }, { "epoch": 0.654184427777478, "grad_norm": 0.41964179277420044, "learning_rate": 5.6432893541264865e-06, "loss": 0.3333778977394104, "step": 12124, "token_acc": 0.8793718772305497 }, { "epoch": 0.6542383855824745, "grad_norm": 0.4623138904571533, "learning_rate": 5.641716429481104e-06, "loss": 0.3040146231651306, "step": 12125, "token_acc": 0.8890959925442684 }, { "epoch": 0.654292343387471, "grad_norm": 0.34111228585243225, "learning_rate": 5.6401436379365595e-06, "loss": 0.3211669623851776, "step": 12126, "token_acc": 0.886590511474735 }, { "epoch": 0.6543463011924675, "grad_norm": 0.39672306180000305, "learning_rate": 5.638570979540887e-06, "loss": 0.34805774688720703, "step": 12127, "token_acc": 0.8760669730794485 }, { "epoch": 0.654400258997464, "grad_norm": 0.3412143886089325, "learning_rate": 5.636998454342113e-06, "loss": 0.3044688105583191, "step": 12128, "token_acc": 0.8894875478927203 }, { "epoch": 0.6544542168024605, "grad_norm": 0.4540666341781616, "learning_rate": 5.635426062388265e-06, "loss": 0.30652064085006714, "step": 12129, "token_acc": 0.8871173469387755 }, { "epoch": 0.6545081746074569, "grad_norm": 0.46732211112976074, "learning_rate": 5.6338538037273605e-06, "loss": 0.3139602541923523, "step": 12130, "token_acc": 0.8918798665183537 }, { "epoch": 0.6545621324124534, "grad_norm": 0.3917939066886902, "learning_rate": 5.632281678407416e-06, "loss": 0.2955858111381531, "step": 12131, "token_acc": 0.8910714285714286 }, { "epoch": 0.6546160902174499, "grad_norm": 0.4531944990158081, "learning_rate": 5.630709686476447e-06, "loss": 0.33294373750686646, "step": 12132, "token_acc": 0.8839518229166666 }, { "epoch": 0.6546700480224464, "grad_norm": 0.45186975598335266, "learning_rate": 5.629137827982458e-06, "loss": 0.352541983127594, "step": 12133, "token_acc": 0.8704918032786885 }, { "epoch": 0.654724005827443, "grad_norm": 0.4473648965358734, "learning_rate": 5.627566102973456e-06, "loss": 0.3248360753059387, "step": 12134, "token_acc": 0.8845603434568294 }, { "epoch": 0.6547779636324395, "grad_norm": 0.40496930480003357, "learning_rate": 5.625994511497439e-06, "loss": 0.3398127853870392, "step": 12135, "token_acc": 0.8728406055209261 }, { "epoch": 0.654831921437436, "grad_norm": 0.3877302408218384, "learning_rate": 5.6244230536024035e-06, "loss": 0.3445655405521393, "step": 12136, "token_acc": 0.8788109756097561 }, { "epoch": 0.6548858792424325, "grad_norm": 0.2783920168876648, "learning_rate": 5.622851729336339e-06, "loss": 0.317688524723053, "step": 12137, "token_acc": 0.8884484087655544 }, { "epoch": 0.6549398370474289, "grad_norm": 0.41244038939476013, "learning_rate": 5.621280538747239e-06, "loss": 0.3542858362197876, "step": 12138, "token_acc": 0.8764928193499623 }, { "epoch": 0.6549937948524254, "grad_norm": 0.36191216111183167, "learning_rate": 5.619709481883084e-06, "loss": 0.3397619128227234, "step": 12139, "token_acc": 0.8777595559480257 }, { "epoch": 0.6550477526574219, "grad_norm": 0.44921889901161194, "learning_rate": 5.618138558791855e-06, "loss": 0.35222721099853516, "step": 12140, "token_acc": 0.8756582796957285 }, { "epoch": 0.6551017104624184, "grad_norm": 0.33053454756736755, "learning_rate": 5.616567769521526e-06, "loss": 0.2986133098602295, "step": 12141, "token_acc": 0.8924936386768448 }, { "epoch": 0.6551556682674149, "grad_norm": 0.4109610915184021, "learning_rate": 5.6149971141200685e-06, "loss": 0.2905867099761963, "step": 12142, "token_acc": 0.8904761904761904 }, { "epoch": 0.6552096260724114, "grad_norm": 0.33285775780677795, "learning_rate": 5.613426592635447e-06, "loss": 0.33917689323425293, "step": 12143, "token_acc": 0.8817585301837271 }, { "epoch": 0.6552635838774079, "grad_norm": 0.3860383927822113, "learning_rate": 5.611856205115636e-06, "loss": 0.4056904911994934, "step": 12144, "token_acc": 0.8586003924133421 }, { "epoch": 0.6553175416824044, "grad_norm": 0.37332651019096375, "learning_rate": 5.610285951608583e-06, "loss": 0.322085976600647, "step": 12145, "token_acc": 0.8883009153318078 }, { "epoch": 0.6553714994874008, "grad_norm": 0.4685141146183014, "learning_rate": 5.608715832162247e-06, "loss": 0.4055660367012024, "step": 12146, "token_acc": 0.858281665190434 }, { "epoch": 0.6554254572923973, "grad_norm": 0.3424984812736511, "learning_rate": 5.6071458468245795e-06, "loss": 0.34936845302581787, "step": 12147, "token_acc": 0.8768749184818051 }, { "epoch": 0.6554794150973938, "grad_norm": 0.441218763589859, "learning_rate": 5.605575995643526e-06, "loss": 0.2831634283065796, "step": 12148, "token_acc": 0.8964683115626512 }, { "epoch": 0.6555333729023903, "grad_norm": 0.411336213350296, "learning_rate": 5.604006278667028e-06, "loss": 0.34114426374435425, "step": 12149, "token_acc": 0.8764952617679043 }, { "epoch": 0.6555873307073868, "grad_norm": 0.5263086557388306, "learning_rate": 5.6024366959430295e-06, "loss": 0.3331042528152466, "step": 12150, "token_acc": 0.8796261682242991 }, { "epoch": 0.6556412885123833, "grad_norm": 0.4008628726005554, "learning_rate": 5.600867247519464e-06, "loss": 0.3027205467224121, "step": 12151, "token_acc": 0.891223880597015 }, { "epoch": 0.6556952463173799, "grad_norm": 0.24041713774204254, "learning_rate": 5.5992979334442585e-06, "loss": 0.3054153323173523, "step": 12152, "token_acc": 0.8902492993066824 }, { "epoch": 0.6557492041223763, "grad_norm": 0.3888118863105774, "learning_rate": 5.597728753765341e-06, "loss": 0.3195367157459259, "step": 12153, "token_acc": 0.8833303232797544 }, { "epoch": 0.6558031619273728, "grad_norm": 0.4288490414619446, "learning_rate": 5.5961597085306355e-06, "loss": 0.3249858021736145, "step": 12154, "token_acc": 0.885362051863398 }, { "epoch": 0.6558571197323693, "grad_norm": 0.4867244362831116, "learning_rate": 5.5945907977880584e-06, "loss": 0.36661648750305176, "step": 12155, "token_acc": 0.8697152717860225 }, { "epoch": 0.6559110775373658, "grad_norm": 0.4643027186393738, "learning_rate": 5.59302202158552e-06, "loss": 0.37296658754348755, "step": 12156, "token_acc": 0.8683424151872033 }, { "epoch": 0.6559650353423623, "grad_norm": 0.49546101689338684, "learning_rate": 5.591453379970939e-06, "loss": 0.35898470878601074, "step": 12157, "token_acc": 0.8714906417112299 }, { "epoch": 0.6560189931473588, "grad_norm": 0.32928869128227234, "learning_rate": 5.589884872992218e-06, "loss": 0.302324116230011, "step": 12158, "token_acc": 0.8943060498220641 }, { "epoch": 0.6560729509523553, "grad_norm": 0.45041579008102417, "learning_rate": 5.588316500697255e-06, "loss": 0.37404173612594604, "step": 12159, "token_acc": 0.8658077756242162 }, { "epoch": 0.6561269087573518, "grad_norm": 0.4239145815372467, "learning_rate": 5.586748263133952e-06, "loss": 0.33869272470474243, "step": 12160, "token_acc": 0.8789316702819957 }, { "epoch": 0.6561808665623482, "grad_norm": 0.3803260922431946, "learning_rate": 5.585180160350201e-06, "loss": 0.32446908950805664, "step": 12161, "token_acc": 0.8884741713944461 }, { "epoch": 0.6562348243673447, "grad_norm": 0.3835790753364563, "learning_rate": 5.58361219239389e-06, "loss": 0.31204256415367126, "step": 12162, "token_acc": 0.8868071818891491 }, { "epoch": 0.6562887821723412, "grad_norm": 0.3715730607509613, "learning_rate": 5.582044359312907e-06, "loss": 0.31467390060424805, "step": 12163, "token_acc": 0.8849869651893881 }, { "epoch": 0.6563427399773377, "grad_norm": 0.3849918246269226, "learning_rate": 5.580476661155132e-06, "loss": 0.33359089493751526, "step": 12164, "token_acc": 0.8878174220507875 }, { "epoch": 0.6563966977823342, "grad_norm": 0.32751011848449707, "learning_rate": 5.578909097968441e-06, "loss": 0.36398616433143616, "step": 12165, "token_acc": 0.8730314960629921 }, { "epoch": 0.6564506555873307, "grad_norm": 0.40120676159858704, "learning_rate": 5.577341669800709e-06, "loss": 0.32377028465270996, "step": 12166, "token_acc": 0.8807896735003796 }, { "epoch": 0.6565046133923272, "grad_norm": 0.4222072660923004, "learning_rate": 5.575774376699803e-06, "loss": 0.3106759190559387, "step": 12167, "token_acc": 0.886096256684492 }, { "epoch": 0.6565585711973237, "grad_norm": 0.43548548221588135, "learning_rate": 5.57420721871359e-06, "loss": 0.35782694816589355, "step": 12168, "token_acc": 0.8751470780494183 }, { "epoch": 0.6566125290023201, "grad_norm": 0.428163081407547, "learning_rate": 5.572640195889924e-06, "loss": 0.37493667006492615, "step": 12169, "token_acc": 0.8647918652685097 }, { "epoch": 0.6566664868073167, "grad_norm": 0.38851308822631836, "learning_rate": 5.571073308276671e-06, "loss": 0.3712700605392456, "step": 12170, "token_acc": 0.8715098584094217 }, { "epoch": 0.6567204446123132, "grad_norm": 0.46393993496894836, "learning_rate": 5.569506555921678e-06, "loss": 0.37126561999320984, "step": 12171, "token_acc": 0.8658926667456462 }, { "epoch": 0.6567744024173097, "grad_norm": 0.3454849421977997, "learning_rate": 5.567939938872795e-06, "loss": 0.3058426082134247, "step": 12172, "token_acc": 0.8916316958775611 }, { "epoch": 0.6568283602223062, "grad_norm": 0.4217160940170288, "learning_rate": 5.5663734571778646e-06, "loss": 0.30605268478393555, "step": 12173, "token_acc": 0.8908500710083181 }, { "epoch": 0.6568823180273027, "grad_norm": 0.412820965051651, "learning_rate": 5.564807110884729e-06, "loss": 0.3588971793651581, "step": 12174, "token_acc": 0.8732605729877216 }, { "epoch": 0.6569362758322992, "grad_norm": 0.46397608518600464, "learning_rate": 5.563240900041217e-06, "loss": 0.38178688287734985, "step": 12175, "token_acc": 0.864635557269572 }, { "epoch": 0.6569902336372956, "grad_norm": 0.43244531750679016, "learning_rate": 5.561674824695169e-06, "loss": 0.3160247206687927, "step": 12176, "token_acc": 0.8844021043651358 }, { "epoch": 0.6570441914422921, "grad_norm": 0.3658866584300995, "learning_rate": 5.560108884894411e-06, "loss": 0.3284071683883667, "step": 12177, "token_acc": 0.8823126142595978 }, { "epoch": 0.6570981492472886, "grad_norm": 0.38625386357307434, "learning_rate": 5.5585430806867665e-06, "loss": 0.3950725197792053, "step": 12178, "token_acc": 0.864808362369338 }, { "epoch": 0.6571521070522851, "grad_norm": 0.3763057291507721, "learning_rate": 5.55697741212005e-06, "loss": 0.31353309750556946, "step": 12179, "token_acc": 0.8942845450825263 }, { "epoch": 0.6572060648572816, "grad_norm": 0.349205881357193, "learning_rate": 5.555411879242081e-06, "loss": 0.3395152688026428, "step": 12180, "token_acc": 0.8762762762762762 }, { "epoch": 0.6572600226622781, "grad_norm": 0.5419861674308777, "learning_rate": 5.553846482100666e-06, "loss": 0.39291298389434814, "step": 12181, "token_acc": 0.8596318211702827 }, { "epoch": 0.6573139804672746, "grad_norm": 0.30404332280158997, "learning_rate": 5.552281220743611e-06, "loss": 0.3763282597064972, "step": 12182, "token_acc": 0.8726601994808034 }, { "epoch": 0.6573679382722711, "grad_norm": 0.3920389413833618, "learning_rate": 5.550716095218729e-06, "loss": 0.3581441640853882, "step": 12183, "token_acc": 0.8740698340011448 }, { "epoch": 0.6574218960772675, "grad_norm": 0.5429816246032715, "learning_rate": 5.549151105573809e-06, "loss": 0.42520105838775635, "step": 12184, "token_acc": 0.8487715550129712 }, { "epoch": 0.657475853882264, "grad_norm": 0.49402496218681335, "learning_rate": 5.54758625185665e-06, "loss": 0.3483419418334961, "step": 12185, "token_acc": 0.8752275359920569 }, { "epoch": 0.6575298116872605, "grad_norm": 0.4714544713497162, "learning_rate": 5.54602153411504e-06, "loss": 0.32393980026245117, "step": 12186, "token_acc": 0.8829676071055381 }, { "epoch": 0.657583769492257, "grad_norm": 0.43348273634910583, "learning_rate": 5.544456952396766e-06, "loss": 0.36626094579696655, "step": 12187, "token_acc": 0.8768415611269061 }, { "epoch": 0.6576377272972536, "grad_norm": 0.5245306491851807, "learning_rate": 5.542892506749604e-06, "loss": 0.35450172424316406, "step": 12188, "token_acc": 0.8654980909406456 }, { "epoch": 0.6576916851022501, "grad_norm": 0.3898537755012512, "learning_rate": 5.541328197221343e-06, "loss": 0.3078587055206299, "step": 12189, "token_acc": 0.8902948247500949 }, { "epoch": 0.6577456429072466, "grad_norm": 0.419670432806015, "learning_rate": 5.539764023859751e-06, "loss": 0.3411867618560791, "step": 12190, "token_acc": 0.8812702995308553 }, { "epoch": 0.657799600712243, "grad_norm": 0.3583996295928955, "learning_rate": 5.538199986712596e-06, "loss": 0.34614092111587524, "step": 12191, "token_acc": 0.8794269797055312 }, { "epoch": 0.6578535585172395, "grad_norm": 0.32920020818710327, "learning_rate": 5.536636085827644e-06, "loss": 0.35930225253105164, "step": 12192, "token_acc": 0.8718459495351926 }, { "epoch": 0.657907516322236, "grad_norm": 0.4226398766040802, "learning_rate": 5.535072321252658e-06, "loss": 0.3011612594127655, "step": 12193, "token_acc": 0.8910429816651638 }, { "epoch": 0.6579614741272325, "grad_norm": 0.3042139708995819, "learning_rate": 5.533508693035393e-06, "loss": 0.2928628623485565, "step": 12194, "token_acc": 0.8908124009957004 }, { "epoch": 0.658015431932229, "grad_norm": 0.4350663721561432, "learning_rate": 5.531945201223603e-06, "loss": 0.33522334694862366, "step": 12195, "token_acc": 0.8785302156088673 }, { "epoch": 0.6580693897372255, "grad_norm": 0.3986233174800873, "learning_rate": 5.530381845865036e-06, "loss": 0.32405367493629456, "step": 12196, "token_acc": 0.8849504714166369 }, { "epoch": 0.658123347542222, "grad_norm": 0.421010285615921, "learning_rate": 5.528818627007436e-06, "loss": 0.31602635979652405, "step": 12197, "token_acc": 0.887822132390096 }, { "epoch": 0.6581773053472185, "grad_norm": 0.3053661286830902, "learning_rate": 5.527255544698544e-06, "loss": 0.32772570848464966, "step": 12198, "token_acc": 0.8842267135325131 }, { "epoch": 0.6582312631522149, "grad_norm": 0.413238525390625, "learning_rate": 5.525692598986096e-06, "loss": 0.3209572434425354, "step": 12199, "token_acc": 0.8869339879978178 }, { "epoch": 0.6582852209572114, "grad_norm": 0.34135961532592773, "learning_rate": 5.524129789917825e-06, "loss": 0.3212646245956421, "step": 12200, "token_acc": 0.8838493967146388 }, { "epoch": 0.6583391787622079, "grad_norm": 0.28321969509124756, "learning_rate": 5.5225671175414534e-06, "loss": 0.3516845107078552, "step": 12201, "token_acc": 0.8760006534879922 }, { "epoch": 0.6583931365672044, "grad_norm": 0.3684656023979187, "learning_rate": 5.521004581904712e-06, "loss": 0.3791719377040863, "step": 12202, "token_acc": 0.8654342218400688 }, { "epoch": 0.658447094372201, "grad_norm": 0.5094403028488159, "learning_rate": 5.519442183055317e-06, "loss": 0.3517434000968933, "step": 12203, "token_acc": 0.8788550008519339 }, { "epoch": 0.6585010521771975, "grad_norm": 0.3376014828681946, "learning_rate": 5.517879921040985e-06, "loss": 0.3263523280620575, "step": 12204, "token_acc": 0.8861349433322742 }, { "epoch": 0.658555009982194, "grad_norm": 0.4252183735370636, "learning_rate": 5.516317795909426e-06, "loss": 0.3760109543800354, "step": 12205, "token_acc": 0.8694249649368864 }, { "epoch": 0.6586089677871905, "grad_norm": 0.47428250312805176, "learning_rate": 5.514755807708347e-06, "loss": 0.39062029123306274, "step": 12206, "token_acc": 0.8678012253233492 }, { "epoch": 0.6586629255921869, "grad_norm": 0.3578907549381256, "learning_rate": 5.513193956485446e-06, "loss": 0.3266613781452179, "step": 12207, "token_acc": 0.8813232076200284 }, { "epoch": 0.6587168833971834, "grad_norm": 0.3914302587509155, "learning_rate": 5.511632242288431e-06, "loss": 0.37622255086898804, "step": 12208, "token_acc": 0.8685234168352264 }, { "epoch": 0.6587708412021799, "grad_norm": 0.41087600588798523, "learning_rate": 5.510070665164992e-06, "loss": 0.3643335700035095, "step": 12209, "token_acc": 0.8694654427645788 }, { "epoch": 0.6588247990071764, "grad_norm": 0.4490385055541992, "learning_rate": 5.508509225162817e-06, "loss": 0.3393533229827881, "step": 12210, "token_acc": 0.8822335025380711 }, { "epoch": 0.6588787568121729, "grad_norm": 0.4811970591545105, "learning_rate": 5.5069479223295955e-06, "loss": 0.3808264136314392, "step": 12211, "token_acc": 0.8708108108108108 }, { "epoch": 0.6589327146171694, "grad_norm": 0.4729587733745575, "learning_rate": 5.505386756713006e-06, "loss": 0.3129095137119293, "step": 12212, "token_acc": 0.8848135798726887 }, { "epoch": 0.6589866724221659, "grad_norm": 0.3190222978591919, "learning_rate": 5.503825728360732e-06, "loss": 0.32234132289886475, "step": 12213, "token_acc": 0.8819250139899273 }, { "epoch": 0.6590406302271623, "grad_norm": 0.48711350560188293, "learning_rate": 5.502264837320434e-06, "loss": 0.3539550304412842, "step": 12214, "token_acc": 0.8735568993952721 }, { "epoch": 0.6590945880321588, "grad_norm": 0.32923340797424316, "learning_rate": 5.500704083639794e-06, "loss": 0.30716851353645325, "step": 12215, "token_acc": 0.884375 }, { "epoch": 0.6591485458371553, "grad_norm": 0.4608725905418396, "learning_rate": 5.499143467366472e-06, "loss": 0.39260053634643555, "step": 12216, "token_acc": 0.8615271659324523 }, { "epoch": 0.6592025036421518, "grad_norm": 0.41335925459861755, "learning_rate": 5.497582988548131e-06, "loss": 0.3687606453895569, "step": 12217, "token_acc": 0.8702318623784593 }, { "epoch": 0.6592564614471483, "grad_norm": 0.35618844628334045, "learning_rate": 5.496022647232425e-06, "loss": 0.3158584237098694, "step": 12218, "token_acc": 0.8856024859017149 }, { "epoch": 0.6593104192521448, "grad_norm": 0.41411590576171875, "learning_rate": 5.494462443467008e-06, "loss": 0.3636728525161743, "step": 12219, "token_acc": 0.8739701338825953 }, { "epoch": 0.6593643770571413, "grad_norm": 0.48059991002082825, "learning_rate": 5.492902377299523e-06, "loss": 0.33165040612220764, "step": 12220, "token_acc": 0.8827520908947452 }, { "epoch": 0.6594183348621379, "grad_norm": 0.3570604920387268, "learning_rate": 5.491342448777624e-06, "loss": 0.3216993808746338, "step": 12221, "token_acc": 0.8817022384174908 }, { "epoch": 0.6594722926671343, "grad_norm": 0.507733166217804, "learning_rate": 5.489782657948943e-06, "loss": 0.34077081084251404, "step": 12222, "token_acc": 0.879489225857941 }, { "epoch": 0.6595262504721308, "grad_norm": 0.42681172490119934, "learning_rate": 5.488223004861119e-06, "loss": 0.28407347202301025, "step": 12223, "token_acc": 0.8951296883746968 }, { "epoch": 0.6595802082771273, "grad_norm": 0.3622361421585083, "learning_rate": 5.486663489561784e-06, "loss": 0.3746870756149292, "step": 12224, "token_acc": 0.8719274185040616 }, { "epoch": 0.6596341660821238, "grad_norm": 0.3318302035331726, "learning_rate": 5.485104112098562e-06, "loss": 0.34692811965942383, "step": 12225, "token_acc": 0.873101120147307 }, { "epoch": 0.6596881238871203, "grad_norm": 0.395711213350296, "learning_rate": 5.48354487251908e-06, "loss": 0.40341609716415405, "step": 12226, "token_acc": 0.8587275277317353 }, { "epoch": 0.6597420816921168, "grad_norm": 0.45908427238464355, "learning_rate": 5.481985770870952e-06, "loss": 0.3430435359477997, "step": 12227, "token_acc": 0.8812357897552494 }, { "epoch": 0.6597960394971133, "grad_norm": 0.5327731370925903, "learning_rate": 5.480426807201797e-06, "loss": 0.3937419652938843, "step": 12228, "token_acc": 0.8658247829518547 }, { "epoch": 0.6598499973021098, "grad_norm": 0.4112148582935333, "learning_rate": 5.478867981559221e-06, "loss": 0.3241630494594574, "step": 12229, "token_acc": 0.8856355993622264 }, { "epoch": 0.6599039551071062, "grad_norm": 0.31660979986190796, "learning_rate": 5.477309293990834e-06, "loss": 0.37793874740600586, "step": 12230, "token_acc": 0.8664247248887849 }, { "epoch": 0.6599579129121027, "grad_norm": 0.3656216561794281, "learning_rate": 5.475750744544237e-06, "loss": 0.32998085021972656, "step": 12231, "token_acc": 0.8848703170028819 }, { "epoch": 0.6600118707170992, "grad_norm": 0.40557393431663513, "learning_rate": 5.4741923332670255e-06, "loss": 0.3150412440299988, "step": 12232, "token_acc": 0.886585687879588 }, { "epoch": 0.6600658285220957, "grad_norm": 0.45471107959747314, "learning_rate": 5.4726340602067915e-06, "loss": 0.3609541952610016, "step": 12233, "token_acc": 0.8710771840542832 }, { "epoch": 0.6601197863270922, "grad_norm": 0.3257124125957489, "learning_rate": 5.471075925411131e-06, "loss": 0.3354567885398865, "step": 12234, "token_acc": 0.8761254268860602 }, { "epoch": 0.6601737441320887, "grad_norm": 0.4233171343803406, "learning_rate": 5.469517928927625e-06, "loss": 0.3353997766971588, "step": 12235, "token_acc": 0.8822656776803776 }, { "epoch": 0.6602277019370852, "grad_norm": 0.4082326889038086, "learning_rate": 5.467960070803855e-06, "loss": 0.3115779161453247, "step": 12236, "token_acc": 0.8835654596100279 }, { "epoch": 0.6602816597420816, "grad_norm": 0.3782070577144623, "learning_rate": 5.466402351087397e-06, "loss": 0.3744640350341797, "step": 12237, "token_acc": 0.8703202166640115 }, { "epoch": 0.6603356175470781, "grad_norm": 0.45065534114837646, "learning_rate": 5.464844769825825e-06, "loss": 0.3580470085144043, "step": 12238, "token_acc": 0.873336881319851 }, { "epoch": 0.6603895753520747, "grad_norm": 0.38986819982528687, "learning_rate": 5.463287327066704e-06, "loss": 0.32554373145103455, "step": 12239, "token_acc": 0.8831440526001195 }, { "epoch": 0.6604435331570712, "grad_norm": 0.49307864904403687, "learning_rate": 5.461730022857597e-06, "loss": 0.35049059987068176, "step": 12240, "token_acc": 0.8722794959908362 }, { "epoch": 0.6604974909620677, "grad_norm": 0.31748175621032715, "learning_rate": 5.460172857246069e-06, "loss": 0.3434125781059265, "step": 12241, "token_acc": 0.8733774185647808 }, { "epoch": 0.6605514487670642, "grad_norm": 0.47461897134780884, "learning_rate": 5.458615830279672e-06, "loss": 0.33722251653671265, "step": 12242, "token_acc": 0.8772563176895307 }, { "epoch": 0.6606054065720607, "grad_norm": 0.3657487630844116, "learning_rate": 5.457058942005959e-06, "loss": 0.3431117534637451, "step": 12243, "token_acc": 0.879026651216686 }, { "epoch": 0.6606593643770572, "grad_norm": 0.48960012197494507, "learning_rate": 5.455502192472475e-06, "loss": 0.39379945397377014, "step": 12244, "token_acc": 0.8622151563328034 }, { "epoch": 0.6607133221820536, "grad_norm": 0.460521936416626, "learning_rate": 5.453945581726765e-06, "loss": 0.36338889598846436, "step": 12245, "token_acc": 0.8682933789954338 }, { "epoch": 0.6607672799870501, "grad_norm": 0.41963085532188416, "learning_rate": 5.452389109816365e-06, "loss": 0.3386740982532501, "step": 12246, "token_acc": 0.8821292775665399 }, { "epoch": 0.6608212377920466, "grad_norm": 0.3971008062362671, "learning_rate": 5.45083277678881e-06, "loss": 0.31533074378967285, "step": 12247, "token_acc": 0.8863011717054824 }, { "epoch": 0.6608751955970431, "grad_norm": 0.3712613880634308, "learning_rate": 5.449276582691632e-06, "loss": 0.29562908411026, "step": 12248, "token_acc": 0.8914526193585837 }, { "epoch": 0.6609291534020396, "grad_norm": 0.47124364972114563, "learning_rate": 5.447720527572353e-06, "loss": 0.32319915294647217, "step": 12249, "token_acc": 0.8817306388173064 }, { "epoch": 0.6609831112070361, "grad_norm": 0.34958547353744507, "learning_rate": 5.4461646114784975e-06, "loss": 0.3159197270870209, "step": 12250, "token_acc": 0.8866657530491983 }, { "epoch": 0.6610370690120326, "grad_norm": 0.44749799370765686, "learning_rate": 5.444608834457581e-06, "loss": 0.3387255072593689, "step": 12251, "token_acc": 0.8781004234724743 }, { "epoch": 0.6610910268170291, "grad_norm": 0.406601220369339, "learning_rate": 5.443053196557114e-06, "loss": 0.3578847050666809, "step": 12252, "token_acc": 0.8733629300776915 }, { "epoch": 0.6611449846220255, "grad_norm": 0.3169263005256653, "learning_rate": 5.441497697824613e-06, "loss": 0.3292449712753296, "step": 12253, "token_acc": 0.8823412698412698 }, { "epoch": 0.661198942427022, "grad_norm": 0.45981162786483765, "learning_rate": 5.439942338307577e-06, "loss": 0.3916313648223877, "step": 12254, "token_acc": 0.8627218934911243 }, { "epoch": 0.6612529002320185, "grad_norm": 0.485598087310791, "learning_rate": 5.438387118053508e-06, "loss": 0.38144761323928833, "step": 12255, "token_acc": 0.8625901751975267 }, { "epoch": 0.661306858037015, "grad_norm": 0.3743170499801636, "learning_rate": 5.436832037109903e-06, "loss": 0.29763853549957275, "step": 12256, "token_acc": 0.88955173001534 }, { "epoch": 0.6613608158420116, "grad_norm": 0.32966840267181396, "learning_rate": 5.43527709552425e-06, "loss": 0.3138783574104309, "step": 12257, "token_acc": 0.8903345724907064 }, { "epoch": 0.6614147736470081, "grad_norm": 0.4740532338619232, "learning_rate": 5.433722293344039e-06, "loss": 0.31580162048339844, "step": 12258, "token_acc": 0.8848983882270498 }, { "epoch": 0.6614687314520046, "grad_norm": 0.5268503427505493, "learning_rate": 5.432167630616749e-06, "loss": 0.36487001180648804, "step": 12259, "token_acc": 0.8719388550928092 }, { "epoch": 0.661522689257001, "grad_norm": 0.4048714339733124, "learning_rate": 5.4306131073898685e-06, "loss": 0.31372106075286865, "step": 12260, "token_acc": 0.8831437435367114 }, { "epoch": 0.6615766470619975, "grad_norm": 0.4220151901245117, "learning_rate": 5.4290587237108696e-06, "loss": 0.3362068831920624, "step": 12261, "token_acc": 0.8837762792542642 }, { "epoch": 0.661630604866994, "grad_norm": 0.3742351531982422, "learning_rate": 5.427504479627216e-06, "loss": 0.2967503070831299, "step": 12262, "token_acc": 0.8938539804757322 }, { "epoch": 0.6616845626719905, "grad_norm": 0.3572884500026703, "learning_rate": 5.425950375186377e-06, "loss": 0.3166401982307434, "step": 12263, "token_acc": 0.8869565217391304 }, { "epoch": 0.661738520476987, "grad_norm": 0.3184548318386078, "learning_rate": 5.424396410435816e-06, "loss": 0.3042117655277252, "step": 12264, "token_acc": 0.8927314460596787 }, { "epoch": 0.6617924782819835, "grad_norm": 0.39547887444496155, "learning_rate": 5.422842585422986e-06, "loss": 0.3530781865119934, "step": 12265, "token_acc": 0.8703728099064681 }, { "epoch": 0.66184643608698, "grad_norm": 0.4617580771446228, "learning_rate": 5.421288900195349e-06, "loss": 0.3906451165676117, "step": 12266, "token_acc": 0.867231638418079 }, { "epoch": 0.6619003938919765, "grad_norm": 0.3265714645385742, "learning_rate": 5.419735354800349e-06, "loss": 0.2941907048225403, "step": 12267, "token_acc": 0.8909007034944066 }, { "epoch": 0.6619543516969729, "grad_norm": 0.32531312108039856, "learning_rate": 5.41818194928543e-06, "loss": 0.2717168927192688, "step": 12268, "token_acc": 0.901281442809682 }, { "epoch": 0.6620083095019694, "grad_norm": 0.4366471767425537, "learning_rate": 5.4166286836980355e-06, "loss": 0.383816659450531, "step": 12269, "token_acc": 0.8722272865390459 }, { "epoch": 0.6620622673069659, "grad_norm": 0.44943809509277344, "learning_rate": 5.415075558085601e-06, "loss": 0.3644227087497711, "step": 12270, "token_acc": 0.8698472182185067 }, { "epoch": 0.6621162251119624, "grad_norm": 0.4912909269332886, "learning_rate": 5.413522572495555e-06, "loss": 0.33850806951522827, "step": 12271, "token_acc": 0.8793787748058671 }, { "epoch": 0.662170182916959, "grad_norm": 0.4650528132915497, "learning_rate": 5.411969726975327e-06, "loss": 0.33257678151130676, "step": 12272, "token_acc": 0.8839910647803425 }, { "epoch": 0.6622241407219555, "grad_norm": 0.4638940691947937, "learning_rate": 5.410417021572345e-06, "loss": 0.36624014377593994, "step": 12273, "token_acc": 0.8771537970644544 }, { "epoch": 0.662278098526952, "grad_norm": 0.3604836165904999, "learning_rate": 5.408864456334024e-06, "loss": 0.327174574136734, "step": 12274, "token_acc": 0.8843399540602621 }, { "epoch": 0.6623320563319485, "grad_norm": 0.35725826025009155, "learning_rate": 5.407312031307781e-06, "loss": 0.371346652507782, "step": 12275, "token_acc": 0.8716107465643185 }, { "epoch": 0.6623860141369449, "grad_norm": 0.42804446816444397, "learning_rate": 5.405759746541024e-06, "loss": 0.3440250754356384, "step": 12276, "token_acc": 0.8811764705882353 }, { "epoch": 0.6624399719419414, "grad_norm": 0.3027639091014862, "learning_rate": 5.404207602081162e-06, "loss": 0.3594226539134979, "step": 12277, "token_acc": 0.8716021545784792 }, { "epoch": 0.6624939297469379, "grad_norm": 0.3627718985080719, "learning_rate": 5.4026555979755945e-06, "loss": 0.33861708641052246, "step": 12278, "token_acc": 0.8773182957393484 }, { "epoch": 0.6625478875519344, "grad_norm": 0.3906058967113495, "learning_rate": 5.401103734271722e-06, "loss": 0.3444663882255554, "step": 12279, "token_acc": 0.8826788333061757 }, { "epoch": 0.6626018453569309, "grad_norm": 0.4067074954509735, "learning_rate": 5.399552011016938e-06, "loss": 0.31750568747520447, "step": 12280, "token_acc": 0.8840070298769771 }, { "epoch": 0.6626558031619274, "grad_norm": 0.29212436079978943, "learning_rate": 5.398000428258627e-06, "loss": 0.31986314058303833, "step": 12281, "token_acc": 0.8910830369189255 }, { "epoch": 0.6627097609669239, "grad_norm": 0.3769219219684601, "learning_rate": 5.3964489860441805e-06, "loss": 0.2899521589279175, "step": 12282, "token_acc": 0.8920094219967385 }, { "epoch": 0.6627637187719203, "grad_norm": 0.30764690041542053, "learning_rate": 5.394897684420974e-06, "loss": 0.2521486282348633, "step": 12283, "token_acc": 0.9067278287461774 }, { "epoch": 0.6628176765769168, "grad_norm": 0.36516329646110535, "learning_rate": 5.393346523436387e-06, "loss": 0.34571850299835205, "step": 12284, "token_acc": 0.8758800817624347 }, { "epoch": 0.6628716343819133, "grad_norm": 0.44637784361839294, "learning_rate": 5.391795503137785e-06, "loss": 0.4078676998615265, "step": 12285, "token_acc": 0.8635086310718587 }, { "epoch": 0.6629255921869098, "grad_norm": 0.27845993638038635, "learning_rate": 5.390244623572547e-06, "loss": 0.319380521774292, "step": 12286, "token_acc": 0.8895342216148503 }, { "epoch": 0.6629795499919063, "grad_norm": 0.42045289278030396, "learning_rate": 5.3886938847880295e-06, "loss": 0.3977859318256378, "step": 12287, "token_acc": 0.8636363636363636 }, { "epoch": 0.6630335077969028, "grad_norm": 0.45879414677619934, "learning_rate": 5.387143286831593e-06, "loss": 0.42053520679473877, "step": 12288, "token_acc": 0.8530476765238383 }, { "epoch": 0.6630874656018994, "grad_norm": 0.4357433021068573, "learning_rate": 5.385592829750592e-06, "loss": 0.41889292001724243, "step": 12289, "token_acc": 0.8525620367386401 }, { "epoch": 0.6631414234068959, "grad_norm": 0.45615333318710327, "learning_rate": 5.384042513592377e-06, "loss": 0.37277206778526306, "step": 12290, "token_acc": 0.867047308319739 }, { "epoch": 0.6631953812118923, "grad_norm": 0.3893730044364929, "learning_rate": 5.38249233840429e-06, "loss": 0.3313586115837097, "step": 12291, "token_acc": 0.8832979288369623 }, { "epoch": 0.6632493390168888, "grad_norm": 0.46877017617225647, "learning_rate": 5.3809423042336826e-06, "loss": 0.33904826641082764, "step": 12292, "token_acc": 0.8812736103615758 }, { "epoch": 0.6633032968218853, "grad_norm": 0.43013980984687805, "learning_rate": 5.379392411127886e-06, "loss": 0.3212657570838928, "step": 12293, "token_acc": 0.8841554559043349 }, { "epoch": 0.6633572546268818, "grad_norm": 0.3736535608768463, "learning_rate": 5.3778426591342355e-06, "loss": 0.3471516966819763, "step": 12294, "token_acc": 0.8726366700864723 }, { "epoch": 0.6634112124318783, "grad_norm": 0.322309285402298, "learning_rate": 5.376293048300063e-06, "loss": 0.3131371736526489, "step": 12295, "token_acc": 0.8860673546985334 }, { "epoch": 0.6634651702368748, "grad_norm": 0.3480410873889923, "learning_rate": 5.3747435786726855e-06, "loss": 0.3235023021697998, "step": 12296, "token_acc": 0.8832786142873316 }, { "epoch": 0.6635191280418713, "grad_norm": 0.4528294503688812, "learning_rate": 5.3731942502994284e-06, "loss": 0.35223639011383057, "step": 12297, "token_acc": 0.870953032375741 }, { "epoch": 0.6635730858468677, "grad_norm": 0.4302791655063629, "learning_rate": 5.371645063227602e-06, "loss": 0.39243656396865845, "step": 12298, "token_acc": 0.8669923995656895 }, { "epoch": 0.6636270436518642, "grad_norm": 0.3827921450138092, "learning_rate": 5.370096017504527e-06, "loss": 0.2809448838233948, "step": 12299, "token_acc": 0.8931665062560153 }, { "epoch": 0.6636810014568607, "grad_norm": 0.40838882327079773, "learning_rate": 5.368547113177507e-06, "loss": 0.3275776207447052, "step": 12300, "token_acc": 0.8848348348348348 }, { "epoch": 0.6637349592618572, "grad_norm": 0.4173926115036011, "learning_rate": 5.366998350293844e-06, "loss": 0.3104400634765625, "step": 12301, "token_acc": 0.8862633451957296 }, { "epoch": 0.6637889170668537, "grad_norm": 0.41617128252983093, "learning_rate": 5.365449728900839e-06, "loss": 0.30824148654937744, "step": 12302, "token_acc": 0.881525804038893 }, { "epoch": 0.6638428748718502, "grad_norm": 0.3164854049682617, "learning_rate": 5.363901249045784e-06, "loss": 0.31782007217407227, "step": 12303, "token_acc": 0.88421875 }, { "epoch": 0.6638968326768467, "grad_norm": 0.4661191403865814, "learning_rate": 5.362352910775968e-06, "loss": 0.4378150701522827, "step": 12304, "token_acc": 0.8512571570823998 }, { "epoch": 0.6639507904818432, "grad_norm": 0.3895832598209381, "learning_rate": 5.360804714138681e-06, "loss": 0.3401659429073334, "step": 12305, "token_acc": 0.8792517006802721 }, { "epoch": 0.6640047482868396, "grad_norm": 0.3161156475543976, "learning_rate": 5.359256659181204e-06, "loss": 0.30628702044487, "step": 12306, "token_acc": 0.889314100677068 }, { "epoch": 0.6640587060918361, "grad_norm": 0.47542497515678406, "learning_rate": 5.357708745950813e-06, "loss": 0.3949710726737976, "step": 12307, "token_acc": 0.8642191928735322 }, { "epoch": 0.6641126638968327, "grad_norm": 0.4153093695640564, "learning_rate": 5.3561609744947805e-06, "loss": 0.3966408371925354, "step": 12308, "token_acc": 0.8632432432432432 }, { "epoch": 0.6641666217018292, "grad_norm": 0.3951164782047272, "learning_rate": 5.354613344860375e-06, "loss": 0.3719891905784607, "step": 12309, "token_acc": 0.8707124010554089 }, { "epoch": 0.6642205795068257, "grad_norm": 0.27931448817253113, "learning_rate": 5.3530658570948615e-06, "loss": 0.35441330075263977, "step": 12310, "token_acc": 0.8741042747714356 }, { "epoch": 0.6642745373118222, "grad_norm": 0.45977041125297546, "learning_rate": 5.351518511245499e-06, "loss": 0.3569888174533844, "step": 12311, "token_acc": 0.8734599217277866 }, { "epoch": 0.6643284951168187, "grad_norm": 0.5078128576278687, "learning_rate": 5.349971307359545e-06, "loss": 0.38446319103240967, "step": 12312, "token_acc": 0.8642433234421365 }, { "epoch": 0.6643824529218152, "grad_norm": 0.40458908677101135, "learning_rate": 5.348424245484247e-06, "loss": 0.3071843385696411, "step": 12313, "token_acc": 0.887636130685458 }, { "epoch": 0.6644364107268116, "grad_norm": 0.4420869052410126, "learning_rate": 5.346877325666855e-06, "loss": 0.3486093580722809, "step": 12314, "token_acc": 0.8780125453945197 }, { "epoch": 0.6644903685318081, "grad_norm": 0.31787821650505066, "learning_rate": 5.345330547954611e-06, "loss": 0.3273424506187439, "step": 12315, "token_acc": 0.8816532857567648 }, { "epoch": 0.6645443263368046, "grad_norm": 0.4163791239261627, "learning_rate": 5.343783912394751e-06, "loss": 0.36136502027511597, "step": 12316, "token_acc": 0.8789988151658767 }, { "epoch": 0.6645982841418011, "grad_norm": 0.4218562841415405, "learning_rate": 5.342237419034509e-06, "loss": 0.29445356130599976, "step": 12317, "token_acc": 0.8927321391805642 }, { "epoch": 0.6646522419467976, "grad_norm": 0.3248704969882965, "learning_rate": 5.340691067921118e-06, "loss": 0.2905946969985962, "step": 12318, "token_acc": 0.894213506421765 }, { "epoch": 0.6647061997517941, "grad_norm": 0.3377043604850769, "learning_rate": 5.339144859101801e-06, "loss": 0.3589903712272644, "step": 12319, "token_acc": 0.876789253468399 }, { "epoch": 0.6647601575567906, "grad_norm": 0.36869338154792786, "learning_rate": 5.3375987926237815e-06, "loss": 0.3326665163040161, "step": 12320, "token_acc": 0.887074260189838 }, { "epoch": 0.664814115361787, "grad_norm": 0.4357892572879791, "learning_rate": 5.336052868534271e-06, "loss": 0.340888649225235, "step": 12321, "token_acc": 0.8812467351558418 }, { "epoch": 0.6648680731667835, "grad_norm": 0.32326415181159973, "learning_rate": 5.334507086880485e-06, "loss": 0.3877537250518799, "step": 12322, "token_acc": 0.8625992063492064 }, { "epoch": 0.66492203097178, "grad_norm": 0.3924238085746765, "learning_rate": 5.332961447709626e-06, "loss": 0.38454729318618774, "step": 12323, "token_acc": 0.8650574712643678 }, { "epoch": 0.6649759887767765, "grad_norm": 0.49694737792015076, "learning_rate": 5.331415951068906e-06, "loss": 0.30709144473075867, "step": 12324, "token_acc": 0.8881638571687511 }, { "epoch": 0.6650299465817731, "grad_norm": 0.439712256193161, "learning_rate": 5.3298705970055205e-06, "loss": 0.33589059114456177, "step": 12325, "token_acc": 0.8797002724795641 }, { "epoch": 0.6650839043867696, "grad_norm": 0.3984426259994507, "learning_rate": 5.328325385566664e-06, "loss": 0.3565264940261841, "step": 12326, "token_acc": 0.8743400211193242 }, { "epoch": 0.6651378621917661, "grad_norm": 0.4354020953178406, "learning_rate": 5.326780316799524e-06, "loss": 0.38853883743286133, "step": 12327, "token_acc": 0.8664929047205329 }, { "epoch": 0.6651918199967626, "grad_norm": 0.3691816031932831, "learning_rate": 5.325235390751291e-06, "loss": 0.3570382595062256, "step": 12328, "token_acc": 0.8752971003010617 }, { "epoch": 0.665245777801759, "grad_norm": 0.45499947667121887, "learning_rate": 5.323690607469144e-06, "loss": 0.39272379875183105, "step": 12329, "token_acc": 0.8626556939501779 }, { "epoch": 0.6652997356067555, "grad_norm": 0.4793212413787842, "learning_rate": 5.322145967000261e-06, "loss": 0.40369802713394165, "step": 12330, "token_acc": 0.8666339187031806 }, { "epoch": 0.665353693411752, "grad_norm": 0.4012776017189026, "learning_rate": 5.320601469391815e-06, "loss": 0.3305450677871704, "step": 12331, "token_acc": 0.8842119602234028 }, { "epoch": 0.6654076512167485, "grad_norm": 0.4467941224575043, "learning_rate": 5.319057114690973e-06, "loss": 0.3658796548843384, "step": 12332, "token_acc": 0.876295036338333 }, { "epoch": 0.665461609021745, "grad_norm": 0.31688669323921204, "learning_rate": 5.317512902944902e-06, "loss": 0.2750782370567322, "step": 12333, "token_acc": 0.8973826339842127 }, { "epoch": 0.6655155668267415, "grad_norm": 0.40001851320266724, "learning_rate": 5.31596883420076e-06, "loss": 0.33189359307289124, "step": 12334, "token_acc": 0.8755794548488782 }, { "epoch": 0.665569524631738, "grad_norm": 0.34237727522850037, "learning_rate": 5.314424908505702e-06, "loss": 0.302192747592926, "step": 12335, "token_acc": 0.8894736842105263 }, { "epoch": 0.6656234824367345, "grad_norm": 0.45347610116004944, "learning_rate": 5.3128811259068754e-06, "loss": 0.4097375273704529, "step": 12336, "token_acc": 0.8554183432773692 }, { "epoch": 0.6656774402417309, "grad_norm": 0.5290217995643616, "learning_rate": 5.3113374864514365e-06, "loss": 0.34234872460365295, "step": 12337, "token_acc": 0.8825019485580671 }, { "epoch": 0.6657313980467274, "grad_norm": 0.38950544595718384, "learning_rate": 5.3097939901865226e-06, "loss": 0.3387537896633148, "step": 12338, "token_acc": 0.8788203753351207 }, { "epoch": 0.6657853558517239, "grad_norm": 0.3629647493362427, "learning_rate": 5.308250637159271e-06, "loss": 0.2959170341491699, "step": 12339, "token_acc": 0.8934603174603175 }, { "epoch": 0.6658393136567204, "grad_norm": 0.42794403433799744, "learning_rate": 5.306707427416815e-06, "loss": 0.36801862716674805, "step": 12340, "token_acc": 0.8714907077896402 }, { "epoch": 0.665893271461717, "grad_norm": 0.33963775634765625, "learning_rate": 5.305164361006286e-06, "loss": 0.29185837507247925, "step": 12341, "token_acc": 0.897025171624714 }, { "epoch": 0.6659472292667135, "grad_norm": 0.3785427212715149, "learning_rate": 5.303621437974807e-06, "loss": 0.36829814314842224, "step": 12342, "token_acc": 0.8710441935034156 }, { "epoch": 0.66600118707171, "grad_norm": 0.4047662615776062, "learning_rate": 5.3020786583695e-06, "loss": 0.366508424282074, "step": 12343, "token_acc": 0.8735119047619048 }, { "epoch": 0.6660551448767064, "grad_norm": 0.35886111855506897, "learning_rate": 5.300536022237479e-06, "loss": 0.28737860918045044, "step": 12344, "token_acc": 0.8969500924214417 }, { "epoch": 0.6661091026817029, "grad_norm": 0.4445081055164337, "learning_rate": 5.2989935296258575e-06, "loss": 0.3385544419288635, "step": 12345, "token_acc": 0.8814264487369985 }, { "epoch": 0.6661630604866994, "grad_norm": 0.3993418514728546, "learning_rate": 5.2974511805817406e-06, "loss": 0.3277212381362915, "step": 12346, "token_acc": 0.8847146081909062 }, { "epoch": 0.6662170182916959, "grad_norm": 0.36170926690101624, "learning_rate": 5.295908975152235e-06, "loss": 0.3378773033618927, "step": 12347, "token_acc": 0.8804123711340206 }, { "epoch": 0.6662709760966924, "grad_norm": 0.38536563515663147, "learning_rate": 5.2943669133844355e-06, "loss": 0.3683391213417053, "step": 12348, "token_acc": 0.872495196266813 }, { "epoch": 0.6663249339016889, "grad_norm": 0.4869116246700287, "learning_rate": 5.292824995325434e-06, "loss": 0.3264341354370117, "step": 12349, "token_acc": 0.8849449204406364 }, { "epoch": 0.6663788917066854, "grad_norm": 0.4344978928565979, "learning_rate": 5.2912832210223274e-06, "loss": 0.3552585244178772, "step": 12350, "token_acc": 0.8774321641297155 }, { "epoch": 0.6664328495116819, "grad_norm": 0.3491675853729248, "learning_rate": 5.289741590522199e-06, "loss": 0.3679894804954529, "step": 12351, "token_acc": 0.8708025042686397 }, { "epoch": 0.6664868073166783, "grad_norm": 0.3337918519973755, "learning_rate": 5.288200103872128e-06, "loss": 0.3646392226219177, "step": 12352, "token_acc": 0.8681863230921705 }, { "epoch": 0.6665407651216748, "grad_norm": 0.3916321396827698, "learning_rate": 5.286658761119192e-06, "loss": 0.2835536003112793, "step": 12353, "token_acc": 0.8904205199790612 }, { "epoch": 0.6665947229266713, "grad_norm": 0.4671849310398102, "learning_rate": 5.2851175623104625e-06, "loss": 0.34297823905944824, "step": 12354, "token_acc": 0.8736363636363637 }, { "epoch": 0.6666486807316678, "grad_norm": 0.45791417360305786, "learning_rate": 5.283576507493007e-06, "loss": 0.36758777499198914, "step": 12355, "token_acc": 0.8660082547169812 }, { "epoch": 0.6667026385366643, "grad_norm": 0.3429187536239624, "learning_rate": 5.282035596713885e-06, "loss": 0.3475276827812195, "step": 12356, "token_acc": 0.875550337540358 }, { "epoch": 0.6667565963416608, "grad_norm": 0.4148435592651367, "learning_rate": 5.280494830020164e-06, "loss": 0.33136552572250366, "step": 12357, "token_acc": 0.8796190476190476 }, { "epoch": 0.6668105541466574, "grad_norm": 0.3702434301376343, "learning_rate": 5.278954207458895e-06, "loss": 0.3308022618293762, "step": 12358, "token_acc": 0.8816384853536556 }, { "epoch": 0.6668645119516539, "grad_norm": 0.5352864861488342, "learning_rate": 5.277413729077128e-06, "loss": 0.3773314356803894, "step": 12359, "token_acc": 0.8661554548623214 }, { "epoch": 0.6669184697566503, "grad_norm": 0.3655351996421814, "learning_rate": 5.2758733949219086e-06, "loss": 0.3168950080871582, "step": 12360, "token_acc": 0.8919077691290939 }, { "epoch": 0.6669724275616468, "grad_norm": 0.3853225111961365, "learning_rate": 5.274333205040279e-06, "loss": 0.2710357904434204, "step": 12361, "token_acc": 0.8981789390340459 }, { "epoch": 0.6670263853666433, "grad_norm": 0.30394142866134644, "learning_rate": 5.272793159479273e-06, "loss": 0.2939542233943939, "step": 12362, "token_acc": 0.8939192777482741 }, { "epoch": 0.6670803431716398, "grad_norm": 0.39591771364212036, "learning_rate": 5.2712532582859264e-06, "loss": 0.37568315863609314, "step": 12363, "token_acc": 0.8650762612436449 }, { "epoch": 0.6671343009766363, "grad_norm": 0.3727773129940033, "learning_rate": 5.269713501507267e-06, "loss": 0.33579152822494507, "step": 12364, "token_acc": 0.8770835695657104 }, { "epoch": 0.6671882587816328, "grad_norm": 0.536435604095459, "learning_rate": 5.268173889190319e-06, "loss": 0.36499953269958496, "step": 12365, "token_acc": 0.8683068017366136 }, { "epoch": 0.6672422165866293, "grad_norm": 0.5523424744606018, "learning_rate": 5.2666344213821e-06, "loss": 0.40962499380111694, "step": 12366, "token_acc": 0.856728778467909 }, { "epoch": 0.6672961743916257, "grad_norm": 0.3748234808444977, "learning_rate": 5.265095098129625e-06, "loss": 0.29957133531570435, "step": 12367, "token_acc": 0.8915434205536594 }, { "epoch": 0.6673501321966222, "grad_norm": 0.37640702724456787, "learning_rate": 5.263555919479903e-06, "loss": 0.3386812210083008, "step": 12368, "token_acc": 0.8811217510259918 }, { "epoch": 0.6674040900016187, "grad_norm": 0.44434455037117004, "learning_rate": 5.262016885479946e-06, "loss": 0.34202152490615845, "step": 12369, "token_acc": 0.8825092250922509 }, { "epoch": 0.6674580478066152, "grad_norm": 0.438123494386673, "learning_rate": 5.260477996176751e-06, "loss": 0.3672999441623688, "step": 12370, "token_acc": 0.8761658031088083 }, { "epoch": 0.6675120056116117, "grad_norm": 0.3475635349750519, "learning_rate": 5.258939251617317e-06, "loss": 0.3638765811920166, "step": 12371, "token_acc": 0.8708016986230859 }, { "epoch": 0.6675659634166082, "grad_norm": 0.446482390165329, "learning_rate": 5.257400651848636e-06, "loss": 0.4100036025047302, "step": 12372, "token_acc": 0.8593009478672986 }, { "epoch": 0.6676199212216047, "grad_norm": 0.4052121639251709, "learning_rate": 5.255862196917698e-06, "loss": 0.38262152671813965, "step": 12373, "token_acc": 0.8696801813145304 }, { "epoch": 0.6676738790266012, "grad_norm": 0.5455482006072998, "learning_rate": 5.254323886871484e-06, "loss": 0.36445432901382446, "step": 12374, "token_acc": 0.8731126596980255 }, { "epoch": 0.6677278368315976, "grad_norm": 0.4630480110645294, "learning_rate": 5.252785721756972e-06, "loss": 0.325265109539032, "step": 12375, "token_acc": 0.8854939187184812 }, { "epoch": 0.6677817946365942, "grad_norm": 0.4989234209060669, "learning_rate": 5.251247701621145e-06, "loss": 0.36560940742492676, "step": 12376, "token_acc": 0.8722134456731613 }, { "epoch": 0.6678357524415907, "grad_norm": 0.42132776975631714, "learning_rate": 5.249709826510972e-06, "loss": 0.3214426040649414, "step": 12377, "token_acc": 0.8847381015687317 }, { "epoch": 0.6678897102465872, "grad_norm": 0.3855847120285034, "learning_rate": 5.248172096473414e-06, "loss": 0.3023408055305481, "step": 12378, "token_acc": 0.8860828241683639 }, { "epoch": 0.6679436680515837, "grad_norm": 0.47411441802978516, "learning_rate": 5.246634511555434e-06, "loss": 0.30367356538772583, "step": 12379, "token_acc": 0.8912476722532588 }, { "epoch": 0.6679976258565802, "grad_norm": 0.39451655745506287, "learning_rate": 5.245097071803991e-06, "loss": 0.3243066966533661, "step": 12380, "token_acc": 0.8871523504799409 }, { "epoch": 0.6680515836615767, "grad_norm": 0.5309362411499023, "learning_rate": 5.243559777266034e-06, "loss": 0.39151492714881897, "step": 12381, "token_acc": 0.8602594339622641 }, { "epoch": 0.6681055414665732, "grad_norm": 0.3669271767139435, "learning_rate": 5.24202262798852e-06, "loss": 0.3173757791519165, "step": 12382, "token_acc": 0.8848086586780054 }, { "epoch": 0.6681594992715696, "grad_norm": 0.3347780704498291, "learning_rate": 5.240485624018388e-06, "loss": 0.2996855080127716, "step": 12383, "token_acc": 0.8910318225650916 }, { "epoch": 0.6682134570765661, "grad_norm": 0.36476826667785645, "learning_rate": 5.238948765402578e-06, "loss": 0.3191526234149933, "step": 12384, "token_acc": 0.8872905235058579 }, { "epoch": 0.6682674148815626, "grad_norm": 0.326876163482666, "learning_rate": 5.237412052188027e-06, "loss": 0.2530404031276703, "step": 12385, "token_acc": 0.9082429753993237 }, { "epoch": 0.6683213726865591, "grad_norm": 0.3324885368347168, "learning_rate": 5.235875484421662e-06, "loss": 0.34772202372550964, "step": 12386, "token_acc": 0.8764063534083388 }, { "epoch": 0.6683753304915556, "grad_norm": 0.34730958938598633, "learning_rate": 5.234339062150413e-06, "loss": 0.3263365626335144, "step": 12387, "token_acc": 0.8867109121778052 }, { "epoch": 0.6684292882965521, "grad_norm": 0.4108399748802185, "learning_rate": 5.232802785421196e-06, "loss": 0.353149950504303, "step": 12388, "token_acc": 0.8762929251137774 }, { "epoch": 0.6684832461015486, "grad_norm": 0.35695528984069824, "learning_rate": 5.231266654280938e-06, "loss": 0.32392874360084534, "step": 12389, "token_acc": 0.8847683818811755 }, { "epoch": 0.668537203906545, "grad_norm": 0.3659548759460449, "learning_rate": 5.229730668776547e-06, "loss": 0.3617735505104065, "step": 12390, "token_acc": 0.8690388306511148 }, { "epoch": 0.6685911617115415, "grad_norm": 0.4168528914451599, "learning_rate": 5.228194828954931e-06, "loss": 0.4260472357273102, "step": 12391, "token_acc": 0.851691129546905 }, { "epoch": 0.668645119516538, "grad_norm": 0.5153011679649353, "learning_rate": 5.226659134862995e-06, "loss": 0.33939820528030396, "step": 12392, "token_acc": 0.8768062245276028 }, { "epoch": 0.6686990773215346, "grad_norm": 0.42551904916763306, "learning_rate": 5.225123586547638e-06, "loss": 0.36714208126068115, "step": 12393, "token_acc": 0.8733869261688174 }, { "epoch": 0.6687530351265311, "grad_norm": 0.48627620935440063, "learning_rate": 5.2235881840557565e-06, "loss": 0.3341670632362366, "step": 12394, "token_acc": 0.879880931535633 }, { "epoch": 0.6688069929315276, "grad_norm": 0.317548006772995, "learning_rate": 5.222052927434241e-06, "loss": 0.292894184589386, "step": 12395, "token_acc": 0.890857980933757 }, { "epoch": 0.6688609507365241, "grad_norm": 0.38817524909973145, "learning_rate": 5.220517816729978e-06, "loss": 0.3247675597667694, "step": 12396, "token_acc": 0.8859511219093897 }, { "epoch": 0.6689149085415206, "grad_norm": 0.42472201585769653, "learning_rate": 5.218982851989848e-06, "loss": 0.317666232585907, "step": 12397, "token_acc": 0.8892528663222881 }, { "epoch": 0.668968866346517, "grad_norm": 0.42192941904067993, "learning_rate": 5.217448033260728e-06, "loss": 0.3869888186454773, "step": 12398, "token_acc": 0.872527145619515 }, { "epoch": 0.6690228241515135, "grad_norm": 0.4125087559223175, "learning_rate": 5.215913360589494e-06, "loss": 0.36250293254852295, "step": 12399, "token_acc": 0.8719325153374233 }, { "epoch": 0.66907678195651, "grad_norm": 0.5256443023681641, "learning_rate": 5.214378834023013e-06, "loss": 0.37643831968307495, "step": 12400, "token_acc": 0.8676122931442081 }, { "epoch": 0.6691307397615065, "grad_norm": 0.4315451979637146, "learning_rate": 5.212844453608144e-06, "loss": 0.34964489936828613, "step": 12401, "token_acc": 0.8698298735757765 }, { "epoch": 0.669184697566503, "grad_norm": 0.37465450167655945, "learning_rate": 5.211310219391754e-06, "loss": 0.2610345184803009, "step": 12402, "token_acc": 0.9039008008266598 }, { "epoch": 0.6692386553714995, "grad_norm": 0.4210439622402191, "learning_rate": 5.209776131420696e-06, "loss": 0.3097699284553528, "step": 12403, "token_acc": 0.8887195121951219 }, { "epoch": 0.669292613176496, "grad_norm": 0.3289535343647003, "learning_rate": 5.208242189741821e-06, "loss": 0.3697446584701538, "step": 12404, "token_acc": 0.8722652689152234 }, { "epoch": 0.6693465709814925, "grad_norm": 0.45469820499420166, "learning_rate": 5.206708394401975e-06, "loss": 0.3318479061126709, "step": 12405, "token_acc": 0.8816862474084313 }, { "epoch": 0.6694005287864889, "grad_norm": 0.3015855550765991, "learning_rate": 5.205174745447998e-06, "loss": 0.3083358705043793, "step": 12406, "token_acc": 0.8884869872077635 }, { "epoch": 0.6694544865914854, "grad_norm": 0.3797347843647003, "learning_rate": 5.203641242926724e-06, "loss": 0.34066078066825867, "step": 12407, "token_acc": 0.8813964066408915 }, { "epoch": 0.6695084443964819, "grad_norm": 0.36694350838661194, "learning_rate": 5.202107886884994e-06, "loss": 0.3814516067504883, "step": 12408, "token_acc": 0.8672690998709072 }, { "epoch": 0.6695624022014784, "grad_norm": 0.40953168272972107, "learning_rate": 5.200574677369632e-06, "loss": 0.36199522018432617, "step": 12409, "token_acc": 0.8721142857142857 }, { "epoch": 0.669616360006475, "grad_norm": 0.3315085470676422, "learning_rate": 5.199041614427463e-06, "loss": 0.32880520820617676, "step": 12410, "token_acc": 0.8812632518692111 }, { "epoch": 0.6696703178114715, "grad_norm": 0.41295748949050903, "learning_rate": 5.197508698105305e-06, "loss": 0.35500839352607727, "step": 12411, "token_acc": 0.8755032517807371 }, { "epoch": 0.669724275616468, "grad_norm": 0.42273709177970886, "learning_rate": 5.195975928449977e-06, "loss": 0.3556174039840698, "step": 12412, "token_acc": 0.8755013077593723 }, { "epoch": 0.6697782334214644, "grad_norm": 0.42410141229629517, "learning_rate": 5.194443305508277e-06, "loss": 0.34725576639175415, "step": 12413, "token_acc": 0.8789377766206717 }, { "epoch": 0.6698321912264609, "grad_norm": 0.369271844625473, "learning_rate": 5.192910829327025e-06, "loss": 0.32030215859413147, "step": 12414, "token_acc": 0.8833207547169811 }, { "epoch": 0.6698861490314574, "grad_norm": 0.38280221819877625, "learning_rate": 5.191378499953016e-06, "loss": 0.33616429567337036, "step": 12415, "token_acc": 0.8814444696797638 }, { "epoch": 0.6699401068364539, "grad_norm": 0.4244336187839508, "learning_rate": 5.189846317433048e-06, "loss": 0.34804844856262207, "step": 12416, "token_acc": 0.8818480904019453 }, { "epoch": 0.6699940646414504, "grad_norm": 0.24366742372512817, "learning_rate": 5.1883142818139135e-06, "loss": 0.3455408811569214, "step": 12417, "token_acc": 0.8814806165343297 }, { "epoch": 0.6700480224464469, "grad_norm": 0.4153999090194702, "learning_rate": 5.186782393142401e-06, "loss": 0.330785870552063, "step": 12418, "token_acc": 0.8813559322033898 }, { "epoch": 0.6701019802514434, "grad_norm": 0.3500928580760956, "learning_rate": 5.185250651465292e-06, "loss": 0.3007430136203766, "step": 12419, "token_acc": 0.8900426742532006 }, { "epoch": 0.6701559380564399, "grad_norm": 0.42909595370292664, "learning_rate": 5.1837190568293635e-06, "loss": 0.3621244430541992, "step": 12420, "token_acc": 0.8679622749356959 }, { "epoch": 0.6702098958614363, "grad_norm": 0.3107110857963562, "learning_rate": 5.182187609281398e-06, "loss": 0.31887388229370117, "step": 12421, "token_acc": 0.8845891578588476 }, { "epoch": 0.6702638536664328, "grad_norm": 0.45624691247940063, "learning_rate": 5.18065630886816e-06, "loss": 0.39962589740753174, "step": 12422, "token_acc": 0.8589054096804809 }, { "epoch": 0.6703178114714293, "grad_norm": 0.4333774149417877, "learning_rate": 5.1791251556364165e-06, "loss": 0.2865488529205322, "step": 12423, "token_acc": 0.8908091123330715 }, { "epoch": 0.6703717692764258, "grad_norm": 0.4242933392524719, "learning_rate": 5.177594149632928e-06, "loss": 0.3511727452278137, "step": 12424, "token_acc": 0.8768953610568984 }, { "epoch": 0.6704257270814223, "grad_norm": 0.3696255087852478, "learning_rate": 5.17606329090445e-06, "loss": 0.2942795157432556, "step": 12425, "token_acc": 0.8932198327359617 }, { "epoch": 0.6704796848864188, "grad_norm": 0.29953110218048096, "learning_rate": 5.174532579497735e-06, "loss": 0.30302000045776367, "step": 12426, "token_acc": 0.8876701966717095 }, { "epoch": 0.6705336426914154, "grad_norm": 0.37102630734443665, "learning_rate": 5.173002015459532e-06, "loss": 0.32669079303741455, "step": 12427, "token_acc": 0.8862808579522461 }, { "epoch": 0.6705876004964118, "grad_norm": 0.35607999563217163, "learning_rate": 5.171471598836582e-06, "loss": 0.3434063196182251, "step": 12428, "token_acc": 0.8805282172667248 }, { "epoch": 0.6706415583014083, "grad_norm": 0.3382711410522461, "learning_rate": 5.169941329675625e-06, "loss": 0.3602921664714813, "step": 12429, "token_acc": 0.8759124087591241 }, { "epoch": 0.6706955161064048, "grad_norm": 0.4094226062297821, "learning_rate": 5.168411208023395e-06, "loss": 0.34186798334121704, "step": 12430, "token_acc": 0.8720338983050847 }, { "epoch": 0.6707494739114013, "grad_norm": 0.5303718447685242, "learning_rate": 5.166881233926621e-06, "loss": 0.36155131459236145, "step": 12431, "token_acc": 0.8776181926989827 }, { "epoch": 0.6708034317163978, "grad_norm": 0.37802326679229736, "learning_rate": 5.165351407432027e-06, "loss": 0.3288695812225342, "step": 12432, "token_acc": 0.880073126142596 }, { "epoch": 0.6708573895213943, "grad_norm": 0.5320161581039429, "learning_rate": 5.163821728586331e-06, "loss": 0.36664673686027527, "step": 12433, "token_acc": 0.8702479338842976 }, { "epoch": 0.6709113473263908, "grad_norm": 0.45654168725013733, "learning_rate": 5.162292197436256e-06, "loss": 0.3670209050178528, "step": 12434, "token_acc": 0.8715277777777778 }, { "epoch": 0.6709653051313873, "grad_norm": 0.48843997716903687, "learning_rate": 5.160762814028512e-06, "loss": 0.33900773525238037, "step": 12435, "token_acc": 0.8770343580470162 }, { "epoch": 0.6710192629363837, "grad_norm": 0.4403286874294281, "learning_rate": 5.1592335784098014e-06, "loss": 0.3664541244506836, "step": 12436, "token_acc": 0.8731099911058405 }, { "epoch": 0.6710732207413802, "grad_norm": 0.444995254278183, "learning_rate": 5.157704490626831e-06, "loss": 0.3151509761810303, "step": 12437, "token_acc": 0.8859419911157564 }, { "epoch": 0.6711271785463767, "grad_norm": 0.32984450459480286, "learning_rate": 5.1561755507262955e-06, "loss": 0.30506452918052673, "step": 12438, "token_acc": 0.88836987607245 }, { "epoch": 0.6711811363513732, "grad_norm": 0.3603835999965668, "learning_rate": 5.154646758754887e-06, "loss": 0.30259189009666443, "step": 12439, "token_acc": 0.8927044025157232 }, { "epoch": 0.6712350941563697, "grad_norm": 0.37897419929504395, "learning_rate": 5.153118114759302e-06, "loss": 0.3622264862060547, "step": 12440, "token_acc": 0.873422159887798 }, { "epoch": 0.6712890519613662, "grad_norm": 0.39160266518592834, "learning_rate": 5.151589618786217e-06, "loss": 0.3294113874435425, "step": 12441, "token_acc": 0.8787878787878788 }, { "epoch": 0.6713430097663627, "grad_norm": 0.3519129455089569, "learning_rate": 5.150061270882317e-06, "loss": 0.28431856632232666, "step": 12442, "token_acc": 0.8956333648776531 }, { "epoch": 0.6713969675713592, "grad_norm": 0.35735800862312317, "learning_rate": 5.148533071094275e-06, "loss": 0.31241869926452637, "step": 12443, "token_acc": 0.8899933730947648 }, { "epoch": 0.6714509253763556, "grad_norm": 0.3573470413684845, "learning_rate": 5.147005019468763e-06, "loss": 0.32214635610580444, "step": 12444, "token_acc": 0.886061137925991 }, { "epoch": 0.6715048831813522, "grad_norm": 0.3702450394630432, "learning_rate": 5.145477116052444e-06, "loss": 0.38144615292549133, "step": 12445, "token_acc": 0.8702587289625722 }, { "epoch": 0.6715588409863487, "grad_norm": 0.42446237802505493, "learning_rate": 5.1439493608919845e-06, "loss": 0.3401784896850586, "step": 12446, "token_acc": 0.8828185627813131 }, { "epoch": 0.6716127987913452, "grad_norm": 0.44304659962654114, "learning_rate": 5.142421754034038e-06, "loss": 0.4069197475910187, "step": 12447, "token_acc": 0.8551407822652567 }, { "epoch": 0.6716667565963417, "grad_norm": 0.3473539650440216, "learning_rate": 5.140894295525258e-06, "loss": 0.3530145585536957, "step": 12448, "token_acc": 0.8844497092397157 }, { "epoch": 0.6717207144013382, "grad_norm": 0.44060808420181274, "learning_rate": 5.139366985412295e-06, "loss": 0.35780811309814453, "step": 12449, "token_acc": 0.8751625487646294 }, { "epoch": 0.6717746722063347, "grad_norm": 0.5283384323120117, "learning_rate": 5.13783982374179e-06, "loss": 0.3993133306503296, "step": 12450, "token_acc": 0.8611691022964509 }, { "epoch": 0.6718286300113311, "grad_norm": 0.4224455952644348, "learning_rate": 5.136312810560384e-06, "loss": 0.3387143015861511, "step": 12451, "token_acc": 0.8780720917531404 }, { "epoch": 0.6718825878163276, "grad_norm": 0.4934447705745697, "learning_rate": 5.134785945914706e-06, "loss": 0.37123623490333557, "step": 12452, "token_acc": 0.8702710333145116 }, { "epoch": 0.6719365456213241, "grad_norm": 0.38571080565452576, "learning_rate": 5.133259229851395e-06, "loss": 0.35384678840637207, "step": 12453, "token_acc": 0.875195822454308 }, { "epoch": 0.6719905034263206, "grad_norm": 0.4032882750034332, "learning_rate": 5.131732662417073e-06, "loss": 0.35317498445510864, "step": 12454, "token_acc": 0.8779815248862539 }, { "epoch": 0.6720444612313171, "grad_norm": 0.45347023010253906, "learning_rate": 5.1302062436583575e-06, "loss": 0.3872452974319458, "step": 12455, "token_acc": 0.8668473927603253 }, { "epoch": 0.6720984190363136, "grad_norm": 0.4317568838596344, "learning_rate": 5.128679973621871e-06, "loss": 0.3116176724433899, "step": 12456, "token_acc": 0.8878034339846063 }, { "epoch": 0.6721523768413101, "grad_norm": 0.37427085638046265, "learning_rate": 5.1271538523542195e-06, "loss": 0.3483012318611145, "step": 12457, "token_acc": 0.875602700096432 }, { "epoch": 0.6722063346463066, "grad_norm": 0.4523475766181946, "learning_rate": 5.1256278799020136e-06, "loss": 0.318321168422699, "step": 12458, "token_acc": 0.8846697636841042 }, { "epoch": 0.672260292451303, "grad_norm": 0.4180670380592346, "learning_rate": 5.124102056311852e-06, "loss": 0.32219260931015015, "step": 12459, "token_acc": 0.8850150854844117 }, { "epoch": 0.6723142502562995, "grad_norm": 0.4695136547088623, "learning_rate": 5.122576381630343e-06, "loss": 0.34150558710098267, "step": 12460, "token_acc": 0.8793332349904116 }, { "epoch": 0.672368208061296, "grad_norm": 0.4389641284942627, "learning_rate": 5.1210508559040706e-06, "loss": 0.3572816848754883, "step": 12461, "token_acc": 0.8756708407871199 }, { "epoch": 0.6724221658662926, "grad_norm": 0.49144765734672546, "learning_rate": 5.119525479179626e-06, "loss": 0.3274489641189575, "step": 12462, "token_acc": 0.8773246886196895 }, { "epoch": 0.6724761236712891, "grad_norm": 0.4520561397075653, "learning_rate": 5.118000251503597e-06, "loss": 0.3437092900276184, "step": 12463, "token_acc": 0.8787981666949584 }, { "epoch": 0.6725300814762856, "grad_norm": 0.33882567286491394, "learning_rate": 5.116475172922559e-06, "loss": 0.3043277859687805, "step": 12464, "token_acc": 0.8918319029298609 }, { "epoch": 0.6725840392812821, "grad_norm": 0.4358920753002167, "learning_rate": 5.114950243483086e-06, "loss": 0.3466433584690094, "step": 12465, "token_acc": 0.8740831295843521 }, { "epoch": 0.6726379970862786, "grad_norm": 0.3761310875415802, "learning_rate": 5.113425463231758e-06, "loss": 0.3613421618938446, "step": 12466, "token_acc": 0.8756617172369271 }, { "epoch": 0.672691954891275, "grad_norm": 0.4010452628135681, "learning_rate": 5.111900832215135e-06, "loss": 0.3498499095439911, "step": 12467, "token_acc": 0.8768364684882604 }, { "epoch": 0.6727459126962715, "grad_norm": 0.36572280526161194, "learning_rate": 5.110376350479782e-06, "loss": 0.29551467299461365, "step": 12468, "token_acc": 0.8893745796906524 }, { "epoch": 0.672799870501268, "grad_norm": 0.45489516854286194, "learning_rate": 5.1088520180722525e-06, "loss": 0.37937310338020325, "step": 12469, "token_acc": 0.8671148902511371 }, { "epoch": 0.6728538283062645, "grad_norm": 0.3998316824436188, "learning_rate": 5.107327835039102e-06, "loss": 0.3433721661567688, "step": 12470, "token_acc": 0.8786566814884156 }, { "epoch": 0.672907786111261, "grad_norm": 0.42460131645202637, "learning_rate": 5.105803801426874e-06, "loss": 0.3088030219078064, "step": 12471, "token_acc": 0.8908982511923689 }, { "epoch": 0.6729617439162575, "grad_norm": 0.419296532869339, "learning_rate": 5.104279917282118e-06, "loss": 0.3484302759170532, "step": 12472, "token_acc": 0.87529848766251 }, { "epoch": 0.673015701721254, "grad_norm": 0.4526416063308716, "learning_rate": 5.102756182651373e-06, "loss": 0.3343861401081085, "step": 12473, "token_acc": 0.8779684520714162 }, { "epoch": 0.6730696595262504, "grad_norm": 0.47759801149368286, "learning_rate": 5.10123259758117e-06, "loss": 0.34792712330818176, "step": 12474, "token_acc": 0.881413293623934 }, { "epoch": 0.6731236173312469, "grad_norm": 0.4188994765281677, "learning_rate": 5.0997091621180404e-06, "loss": 0.3352031111717224, "step": 12475, "token_acc": 0.8828406069138119 }, { "epoch": 0.6731775751362434, "grad_norm": 0.3127063512802124, "learning_rate": 5.09818587630851e-06, "loss": 0.3746181130409241, "step": 12476, "token_acc": 0.8701349757567816 }, { "epoch": 0.6732315329412399, "grad_norm": 0.4116934537887573, "learning_rate": 5.096662740199097e-06, "loss": 0.3190804123878479, "step": 12477, "token_acc": 0.8841803865425912 }, { "epoch": 0.6732854907462364, "grad_norm": 0.4949246346950531, "learning_rate": 5.0951397538363225e-06, "loss": 0.3628925085067749, "step": 12478, "token_acc": 0.871898816280201 }, { "epoch": 0.673339448551233, "grad_norm": 0.3982362151145935, "learning_rate": 5.093616917266693e-06, "loss": 0.32988137006759644, "step": 12479, "token_acc": 0.8894446360814073 }, { "epoch": 0.6733934063562295, "grad_norm": 0.4725560247898102, "learning_rate": 5.092094230536717e-06, "loss": 0.31100744009017944, "step": 12480, "token_acc": 0.8873020800993481 }, { "epoch": 0.673447364161226, "grad_norm": 0.5083101391792297, "learning_rate": 5.090571693692898e-06, "loss": 0.3162168264389038, "step": 12481, "token_acc": 0.8855172413793103 }, { "epoch": 0.6735013219662224, "grad_norm": 0.3112446367740631, "learning_rate": 5.089049306781733e-06, "loss": 0.3067965805530548, "step": 12482, "token_acc": 0.8898163606010017 }, { "epoch": 0.6735552797712189, "grad_norm": 0.43100693821907043, "learning_rate": 5.087527069849717e-06, "loss": 0.30836591124534607, "step": 12483, "token_acc": 0.8880130874479476 }, { "epoch": 0.6736092375762154, "grad_norm": 0.36177298426628113, "learning_rate": 5.0860049829433336e-06, "loss": 0.3287786841392517, "step": 12484, "token_acc": 0.8805722314471836 }, { "epoch": 0.6736631953812119, "grad_norm": 0.43704015016555786, "learning_rate": 5.084483046109073e-06, "loss": 0.2815798819065094, "step": 12485, "token_acc": 0.8976313640076232 }, { "epoch": 0.6737171531862084, "grad_norm": 0.41338464617729187, "learning_rate": 5.082961259393413e-06, "loss": 0.30834442377090454, "step": 12486, "token_acc": 0.8900457485407793 }, { "epoch": 0.6737711109912049, "grad_norm": 0.33990561962127686, "learning_rate": 5.081439622842828e-06, "loss": 0.30079948902130127, "step": 12487, "token_acc": 0.8934106234472968 }, { "epoch": 0.6738250687962014, "grad_norm": 0.5012412667274475, "learning_rate": 5.07991813650379e-06, "loss": 0.3656332492828369, "step": 12488, "token_acc": 0.8665158371040724 }, { "epoch": 0.6738790266011979, "grad_norm": 0.2826533615589142, "learning_rate": 5.0783968004227615e-06, "loss": 0.2983351945877075, "step": 12489, "token_acc": 0.8900689187662411 }, { "epoch": 0.6739329844061943, "grad_norm": 0.45676419138908386, "learning_rate": 5.076875614646206e-06, "loss": 0.32946640253067017, "step": 12490, "token_acc": 0.8763647547285868 }, { "epoch": 0.6739869422111908, "grad_norm": 0.38233500719070435, "learning_rate": 5.075354579220575e-06, "loss": 0.40865281224250793, "step": 12491, "token_acc": 0.8599662162162162 }, { "epoch": 0.6740409000161873, "grad_norm": 0.40243399143218994, "learning_rate": 5.073833694192329e-06, "loss": 0.3210119307041168, "step": 12492, "token_acc": 0.8813257881972514 }, { "epoch": 0.6740948578211838, "grad_norm": 0.4254169166088104, "learning_rate": 5.072312959607912e-06, "loss": 0.3204980492591858, "step": 12493, "token_acc": 0.8840665873959572 }, { "epoch": 0.6741488156261803, "grad_norm": 0.4447709619998932, "learning_rate": 5.070792375513771e-06, "loss": 0.3349969983100891, "step": 12494, "token_acc": 0.8810146041506534 }, { "epoch": 0.6742027734311768, "grad_norm": 0.4710852801799774, "learning_rate": 5.0692719419563354e-06, "loss": 0.3469682037830353, "step": 12495, "token_acc": 0.8752039151712887 }, { "epoch": 0.6742567312361734, "grad_norm": 0.31417953968048096, "learning_rate": 5.067751658982043e-06, "loss": 0.2963760495185852, "step": 12496, "token_acc": 0.8885531461073587 }, { "epoch": 0.6743106890411698, "grad_norm": 0.2601894736289978, "learning_rate": 5.066231526637318e-06, "loss": 0.3625715970993042, "step": 12497, "token_acc": 0.871550017468266 }, { "epoch": 0.6743646468461663, "grad_norm": 0.3552720248699188, "learning_rate": 5.064711544968594e-06, "loss": 0.3488897681236267, "step": 12498, "token_acc": 0.8753628447024674 }, { "epoch": 0.6744186046511628, "grad_norm": 0.42231541872024536, "learning_rate": 5.063191714022287e-06, "loss": 0.3277827501296997, "step": 12499, "token_acc": 0.8838141025641025 }, { "epoch": 0.6744725624561593, "grad_norm": 0.40287309885025024, "learning_rate": 5.061672033844811e-06, "loss": 0.28762781620025635, "step": 12500, "token_acc": 0.8909952606635071 }, { "epoch": 0.6745265202611558, "grad_norm": 0.39962512254714966, "learning_rate": 5.060152504482578e-06, "loss": 0.34491145610809326, "step": 12501, "token_acc": 0.8765547679550622 }, { "epoch": 0.6745804780661523, "grad_norm": 0.3441654145717621, "learning_rate": 5.058633125981993e-06, "loss": 0.26760154962539673, "step": 12502, "token_acc": 0.9026427962489344 }, { "epoch": 0.6746344358711488, "grad_norm": 0.4128384292125702, "learning_rate": 5.057113898389459e-06, "loss": 0.34819483757019043, "step": 12503, "token_acc": 0.88005694625697 }, { "epoch": 0.6746883936761453, "grad_norm": 0.42785030603408813, "learning_rate": 5.055594821751367e-06, "loss": 0.344317227602005, "step": 12504, "token_acc": 0.8779353915842174 }, { "epoch": 0.6747423514811417, "grad_norm": 0.4778544306755066, "learning_rate": 5.054075896114117e-06, "loss": 0.33568087220191956, "step": 12505, "token_acc": 0.8779672350384486 }, { "epoch": 0.6747963092861382, "grad_norm": 0.28530755639076233, "learning_rate": 5.052557121524095e-06, "loss": 0.39214378595352173, "step": 12506, "token_acc": 0.8629952988582942 }, { "epoch": 0.6748502670911347, "grad_norm": 0.39434516429901123, "learning_rate": 5.051038498027682e-06, "loss": 0.3305822014808655, "step": 12507, "token_acc": 0.8808965031682704 }, { "epoch": 0.6749042248961312, "grad_norm": 0.4129379689693451, "learning_rate": 5.0495200256712565e-06, "loss": 0.3403322696685791, "step": 12508, "token_acc": 0.8797299198199465 }, { "epoch": 0.6749581827011277, "grad_norm": 0.45563408732414246, "learning_rate": 5.048001704501192e-06, "loss": 0.3378124237060547, "step": 12509, "token_acc": 0.8716694772344014 }, { "epoch": 0.6750121405061242, "grad_norm": 0.43441832065582275, "learning_rate": 5.046483534563859e-06, "loss": 0.3193528652191162, "step": 12510, "token_acc": 0.8834465498748659 }, { "epoch": 0.6750660983111207, "grad_norm": 0.3495134711265564, "learning_rate": 5.044965515905621e-06, "loss": 0.30230939388275146, "step": 12511, "token_acc": 0.8902889143438971 }, { "epoch": 0.6751200561161173, "grad_norm": 0.4688558280467987, "learning_rate": 5.043447648572838e-06, "loss": 0.36021965742111206, "step": 12512, "token_acc": 0.8719584219229861 }, { "epoch": 0.6751740139211136, "grad_norm": 0.42312896251678467, "learning_rate": 5.041929932611865e-06, "loss": 0.3922576904296875, "step": 12513, "token_acc": 0.8628318584070797 }, { "epoch": 0.6752279717261102, "grad_norm": 0.29941219091415405, "learning_rate": 5.040412368069054e-06, "loss": 0.32255101203918457, "step": 12514, "token_acc": 0.8824198029805507 }, { "epoch": 0.6752819295311067, "grad_norm": 0.2818962335586548, "learning_rate": 5.038894954990749e-06, "loss": 0.3375598192214966, "step": 12515, "token_acc": 0.8818711826762909 }, { "epoch": 0.6753358873361032, "grad_norm": 0.4215185344219208, "learning_rate": 5.03737769342329e-06, "loss": 0.38394880294799805, "step": 12516, "token_acc": 0.8597569901917728 }, { "epoch": 0.6753898451410997, "grad_norm": 0.3137263059616089, "learning_rate": 5.035860583413019e-06, "loss": 0.3282049298286438, "step": 12517, "token_acc": 0.8804201783315012 }, { "epoch": 0.6754438029460962, "grad_norm": 0.4454036056995392, "learning_rate": 5.034343625006266e-06, "loss": 0.3444589674472809, "step": 12518, "token_acc": 0.8784140969162996 }, { "epoch": 0.6754977607510927, "grad_norm": 0.4054024815559387, "learning_rate": 5.032826818249356e-06, "loss": 0.33570384979248047, "step": 12519, "token_acc": 0.8830957739434858 }, { "epoch": 0.6755517185560891, "grad_norm": 0.5444598197937012, "learning_rate": 5.031310163188617e-06, "loss": 0.3047511577606201, "step": 12520, "token_acc": 0.8927392739273927 }, { "epoch": 0.6756056763610856, "grad_norm": 0.4435139000415802, "learning_rate": 5.029793659870362e-06, "loss": 0.35192587971687317, "step": 12521, "token_acc": 0.8732513777024162 }, { "epoch": 0.6756596341660821, "grad_norm": 0.4088864326477051, "learning_rate": 5.028277308340905e-06, "loss": 0.33667582273483276, "step": 12522, "token_acc": 0.876200101061142 }, { "epoch": 0.6757135919710786, "grad_norm": 0.37190741300582886, "learning_rate": 5.026761108646556e-06, "loss": 0.3243139684200287, "step": 12523, "token_acc": 0.8894342194955692 }, { "epoch": 0.6757675497760751, "grad_norm": 0.47982320189476013, "learning_rate": 5.025245060833621e-06, "loss": 0.3292524814605713, "step": 12524, "token_acc": 0.8806550665301944 }, { "epoch": 0.6758215075810716, "grad_norm": 0.3763236403465271, "learning_rate": 5.023729164948401e-06, "loss": 0.32020264863967896, "step": 12525, "token_acc": 0.8858695652173914 }, { "epoch": 0.6758754653860681, "grad_norm": 0.4848799407482147, "learning_rate": 5.0222134210371875e-06, "loss": 0.3966437578201294, "step": 12526, "token_acc": 0.8582089552238806 }, { "epoch": 0.6759294231910646, "grad_norm": 0.36326631903648376, "learning_rate": 5.020697829146273e-06, "loss": 0.30760911107063293, "step": 12527, "token_acc": 0.8917860340196956 }, { "epoch": 0.675983380996061, "grad_norm": 0.3272815942764282, "learning_rate": 5.019182389321941e-06, "loss": 0.32729169726371765, "step": 12528, "token_acc": 0.8850898836799436 }, { "epoch": 0.6760373388010575, "grad_norm": 0.42242276668548584, "learning_rate": 5.017667101610475e-06, "loss": 0.3328772783279419, "step": 12529, "token_acc": 0.8806151971981118 }, { "epoch": 0.676091296606054, "grad_norm": 0.3164187967777252, "learning_rate": 5.01615196605815e-06, "loss": 0.29822638630867004, "step": 12530, "token_acc": 0.8902107409925221 }, { "epoch": 0.6761452544110506, "grad_norm": 0.49813899397850037, "learning_rate": 5.014636982711238e-06, "loss": 0.30449357628822327, "step": 12531, "token_acc": 0.8871031746031746 }, { "epoch": 0.6761992122160471, "grad_norm": 0.3854420781135559, "learning_rate": 5.013122151616008e-06, "loss": 0.3047583997249603, "step": 12532, "token_acc": 0.8885533900631348 }, { "epoch": 0.6762531700210436, "grad_norm": 0.4131786525249481, "learning_rate": 5.0116074728187205e-06, "loss": 0.31527799367904663, "step": 12533, "token_acc": 0.8855728635900422 }, { "epoch": 0.6763071278260401, "grad_norm": 0.42284107208251953, "learning_rate": 5.010092946365632e-06, "loss": 0.3184877634048462, "step": 12534, "token_acc": 0.8887088935687673 }, { "epoch": 0.6763610856310366, "grad_norm": 0.4158880412578583, "learning_rate": 5.008578572303e-06, "loss": 0.3166835308074951, "step": 12535, "token_acc": 0.8851185609157809 }, { "epoch": 0.676415043436033, "grad_norm": 0.3990764021873474, "learning_rate": 5.007064350677067e-06, "loss": 0.3584795296192169, "step": 12536, "token_acc": 0.8721300486006368 }, { "epoch": 0.6764690012410295, "grad_norm": 0.42730268836021423, "learning_rate": 5.005550281534084e-06, "loss": 0.3153457045555115, "step": 12537, "token_acc": 0.8847087378640777 }, { "epoch": 0.676522959046026, "grad_norm": 0.5017180442810059, "learning_rate": 5.004036364920286e-06, "loss": 0.36403733491897583, "step": 12538, "token_acc": 0.8714630401403817 }, { "epoch": 0.6765769168510225, "grad_norm": 0.35439229011535645, "learning_rate": 5.00252260088191e-06, "loss": 0.34781983494758606, "step": 12539, "token_acc": 0.8774725274725275 }, { "epoch": 0.676630874656019, "grad_norm": 0.476163387298584, "learning_rate": 5.001008989465185e-06, "loss": 0.3727668821811676, "step": 12540, "token_acc": 0.8678511937812327 }, { "epoch": 0.6766848324610155, "grad_norm": 0.35526517033576965, "learning_rate": 4.999495530716335e-06, "loss": 0.3024510145187378, "step": 12541, "token_acc": 0.8880418535127055 }, { "epoch": 0.676738790266012, "grad_norm": 0.40415075421333313, "learning_rate": 4.997982224681582e-06, "loss": 0.3288325071334839, "step": 12542, "token_acc": 0.8824360604776035 }, { "epoch": 0.6767927480710084, "grad_norm": 0.4620034694671631, "learning_rate": 4.9964690714071404e-06, "loss": 0.3836084008216858, "step": 12543, "token_acc": 0.8619354838709677 }, { "epoch": 0.6768467058760049, "grad_norm": 0.3306860625743866, "learning_rate": 4.9949560709392246e-06, "loss": 0.3693072497844696, "step": 12544, "token_acc": 0.8708351680610598 }, { "epoch": 0.6769006636810014, "grad_norm": 0.43417394161224365, "learning_rate": 4.9934432233240385e-06, "loss": 0.3184373080730438, "step": 12545, "token_acc": 0.8862789567471245 }, { "epoch": 0.6769546214859979, "grad_norm": 0.35549476742744446, "learning_rate": 4.991930528607784e-06, "loss": 0.33410781621932983, "step": 12546, "token_acc": 0.8812460267005722 }, { "epoch": 0.6770085792909944, "grad_norm": 0.291898638010025, "learning_rate": 4.99041798683666e-06, "loss": 0.276156485080719, "step": 12547, "token_acc": 0.8984956693511624 }, { "epoch": 0.677062537095991, "grad_norm": 0.5274341702461243, "learning_rate": 4.988905598056858e-06, "loss": 0.3810895085334778, "step": 12548, "token_acc": 0.8661779747698602 }, { "epoch": 0.6771164949009875, "grad_norm": 0.4336308538913727, "learning_rate": 4.987393362314562e-06, "loss": 0.3280596137046814, "step": 12549, "token_acc": 0.8859603789836348 }, { "epoch": 0.677170452705984, "grad_norm": 0.29124757647514343, "learning_rate": 4.985881279655964e-06, "loss": 0.31434643268585205, "step": 12550, "token_acc": 0.8842243750747518 }, { "epoch": 0.6772244105109804, "grad_norm": 0.3805074393749237, "learning_rate": 4.984369350127238e-06, "loss": 0.31944552063941956, "step": 12551, "token_acc": 0.8822751322751323 }, { "epoch": 0.6772783683159769, "grad_norm": 0.38662582635879517, "learning_rate": 4.982857573774559e-06, "loss": 0.34899377822875977, "step": 12552, "token_acc": 0.8744453145392848 }, { "epoch": 0.6773323261209734, "grad_norm": 0.3604496419429779, "learning_rate": 4.981345950644094e-06, "loss": 0.3662134110927582, "step": 12553, "token_acc": 0.8710884707350036 }, { "epoch": 0.6773862839259699, "grad_norm": 0.40171948075294495, "learning_rate": 4.9798344807820095e-06, "loss": 0.3376650810241699, "step": 12554, "token_acc": 0.8829426906923181 }, { "epoch": 0.6774402417309664, "grad_norm": 0.43019184470176697, "learning_rate": 4.978323164234461e-06, "loss": 0.38574522733688354, "step": 12555, "token_acc": 0.8697495693653107 }, { "epoch": 0.6774941995359629, "grad_norm": 0.3104168474674225, "learning_rate": 4.976812001047612e-06, "loss": 0.271095335483551, "step": 12556, "token_acc": 0.9002370604504148 }, { "epoch": 0.6775481573409594, "grad_norm": 0.37077704071998596, "learning_rate": 4.975300991267608e-06, "loss": 0.36712831258773804, "step": 12557, "token_acc": 0.8731675177071323 }, { "epoch": 0.6776021151459558, "grad_norm": 0.4120013117790222, "learning_rate": 4.973790134940596e-06, "loss": 0.33987537026405334, "step": 12558, "token_acc": 0.8819033886085076 }, { "epoch": 0.6776560729509523, "grad_norm": 0.40368086099624634, "learning_rate": 4.972279432112715e-06, "loss": 0.3158080577850342, "step": 12559, "token_acc": 0.8887884267631103 }, { "epoch": 0.6777100307559488, "grad_norm": 0.4587722718715668, "learning_rate": 4.9707688828301045e-06, "loss": 0.3824828267097473, "step": 12560, "token_acc": 0.8676751358496108 }, { "epoch": 0.6777639885609453, "grad_norm": 0.3792273998260498, "learning_rate": 4.969258487138895e-06, "loss": 0.3615370988845825, "step": 12561, "token_acc": 0.8697041420118343 }, { "epoch": 0.6778179463659418, "grad_norm": 0.2956504821777344, "learning_rate": 4.9677482450852135e-06, "loss": 0.3621363043785095, "step": 12562, "token_acc": 0.8741524919709766 }, { "epoch": 0.6778719041709383, "grad_norm": 0.41029152274131775, "learning_rate": 4.96623815671518e-06, "loss": 0.3286343812942505, "step": 12563, "token_acc": 0.8770358306188925 }, { "epoch": 0.6779258619759349, "grad_norm": 0.38700881600379944, "learning_rate": 4.964728222074917e-06, "loss": 0.4106336236000061, "step": 12564, "token_acc": 0.8587672688629118 }, { "epoch": 0.6779798197809314, "grad_norm": 0.4200567305088043, "learning_rate": 4.963218441210535e-06, "loss": 0.3516122102737427, "step": 12565, "token_acc": 0.8759402846892721 }, { "epoch": 0.6780337775859278, "grad_norm": 0.3940802812576294, "learning_rate": 4.961708814168141e-06, "loss": 0.398955762386322, "step": 12566, "token_acc": 0.8614812916453101 }, { "epoch": 0.6780877353909243, "grad_norm": 0.3644692599773407, "learning_rate": 4.960199340993841e-06, "loss": 0.34833085536956787, "step": 12567, "token_acc": 0.8748438670996752 }, { "epoch": 0.6781416931959208, "grad_norm": 0.397981196641922, "learning_rate": 4.9586900217337285e-06, "loss": 0.34490644931793213, "step": 12568, "token_acc": 0.8844056706652127 }, { "epoch": 0.6781956510009173, "grad_norm": 0.3977985382080078, "learning_rate": 4.957180856433906e-06, "loss": 0.3694833517074585, "step": 12569, "token_acc": 0.8674630261660978 }, { "epoch": 0.6782496088059138, "grad_norm": 0.47477272152900696, "learning_rate": 4.955671845140459e-06, "loss": 0.37998777627944946, "step": 12570, "token_acc": 0.8656543745480839 }, { "epoch": 0.6783035666109103, "grad_norm": 0.40279656648635864, "learning_rate": 4.954162987899473e-06, "loss": 0.3017617166042328, "step": 12571, "token_acc": 0.8913943856419696 }, { "epoch": 0.6783575244159068, "grad_norm": 0.3687962293624878, "learning_rate": 4.952654284757028e-06, "loss": 0.3066597580909729, "step": 12572, "token_acc": 0.8899222184646601 }, { "epoch": 0.6784114822209033, "grad_norm": 0.3892052471637726, "learning_rate": 4.951145735759198e-06, "loss": 0.36023443937301636, "step": 12573, "token_acc": 0.8746791652717331 }, { "epoch": 0.6784654400258997, "grad_norm": 0.32974618673324585, "learning_rate": 4.94963734095205e-06, "loss": 0.3523522615432739, "step": 12574, "token_acc": 0.8801112012046797 }, { "epoch": 0.6785193978308962, "grad_norm": 0.47825655341148376, "learning_rate": 4.94812910038166e-06, "loss": 0.4103066325187683, "step": 12575, "token_acc": 0.8628311118609752 }, { "epoch": 0.6785733556358927, "grad_norm": 0.39502331614494324, "learning_rate": 4.946621014094083e-06, "loss": 0.32475605607032776, "step": 12576, "token_acc": 0.8831392841742131 }, { "epoch": 0.6786273134408892, "grad_norm": 0.40740278363227844, "learning_rate": 4.945113082135381e-06, "loss": 0.33051905035972595, "step": 12577, "token_acc": 0.8810844115834874 }, { "epoch": 0.6786812712458857, "grad_norm": 0.45711445808410645, "learning_rate": 4.9436053045515965e-06, "loss": 0.34235113859176636, "step": 12578, "token_acc": 0.8742867660592675 }, { "epoch": 0.6787352290508822, "grad_norm": 0.4816049337387085, "learning_rate": 4.942097681388782e-06, "loss": 0.3179109990596771, "step": 12579, "token_acc": 0.8880321285140562 }, { "epoch": 0.6787891868558787, "grad_norm": 0.4603479206562042, "learning_rate": 4.940590212692979e-06, "loss": 0.34951263666152954, "step": 12580, "token_acc": 0.8785569895817136 }, { "epoch": 0.6788431446608751, "grad_norm": 0.3569229543209076, "learning_rate": 4.939082898510222e-06, "loss": 0.3767591118812561, "step": 12581, "token_acc": 0.8701333692577126 }, { "epoch": 0.6788971024658716, "grad_norm": 0.30939555168151855, "learning_rate": 4.937575738886551e-06, "loss": 0.32665181159973145, "step": 12582, "token_acc": 0.8828695255474452 }, { "epoch": 0.6789510602708682, "grad_norm": 0.44986405968666077, "learning_rate": 4.93606873386799e-06, "loss": 0.3710799217224121, "step": 12583, "token_acc": 0.871526262185363 }, { "epoch": 0.6790050180758647, "grad_norm": 0.4319848418235779, "learning_rate": 4.934561883500565e-06, "loss": 0.34054654836654663, "step": 12584, "token_acc": 0.8765359859566998 }, { "epoch": 0.6790589758808612, "grad_norm": 0.3633659780025482, "learning_rate": 4.933055187830293e-06, "loss": 0.34627199172973633, "step": 12585, "token_acc": 0.8737270875763747 }, { "epoch": 0.6791129336858577, "grad_norm": 0.3525809049606323, "learning_rate": 4.931548646903187e-06, "loss": 0.3068954348564148, "step": 12586, "token_acc": 0.8900398406374502 }, { "epoch": 0.6791668914908542, "grad_norm": 0.47426334023475647, "learning_rate": 4.930042260765254e-06, "loss": 0.3120969533920288, "step": 12587, "token_acc": 0.8859299344915978 }, { "epoch": 0.6792208492958507, "grad_norm": 0.4202406108379364, "learning_rate": 4.928536029462506e-06, "loss": 0.36658400297164917, "step": 12588, "token_acc": 0.8709213051823417 }, { "epoch": 0.6792748071008471, "grad_norm": 0.4364902377128601, "learning_rate": 4.927029953040939e-06, "loss": 0.360066294670105, "step": 12589, "token_acc": 0.8721467783877492 }, { "epoch": 0.6793287649058436, "grad_norm": 0.36722803115844727, "learning_rate": 4.925524031546549e-06, "loss": 0.2993970215320587, "step": 12590, "token_acc": 0.8986357635323456 }, { "epoch": 0.6793827227108401, "grad_norm": 0.5020164847373962, "learning_rate": 4.924018265025326e-06, "loss": 0.389934778213501, "step": 12591, "token_acc": 0.8642832937987123 }, { "epoch": 0.6794366805158366, "grad_norm": 0.4122089147567749, "learning_rate": 4.9225126535232545e-06, "loss": 0.3550894856452942, "step": 12592, "token_acc": 0.8789045785194695 }, { "epoch": 0.6794906383208331, "grad_norm": 0.37403789162635803, "learning_rate": 4.9210071970863175e-06, "loss": 0.3228718042373657, "step": 12593, "token_acc": 0.8876146788990825 }, { "epoch": 0.6795445961258296, "grad_norm": 0.45131024718284607, "learning_rate": 4.919501895760489e-06, "loss": 0.34683528542518616, "step": 12594, "token_acc": 0.87461086129367 }, { "epoch": 0.6795985539308261, "grad_norm": 0.502712070941925, "learning_rate": 4.9179967495917424e-06, "loss": 0.31568947434425354, "step": 12595, "token_acc": 0.8906794425087108 }, { "epoch": 0.6796525117358226, "grad_norm": 0.3446502685546875, "learning_rate": 4.916491758626043e-06, "loss": 0.33514896035194397, "step": 12596, "token_acc": 0.8759596567815746 }, { "epoch": 0.679706469540819, "grad_norm": 0.3488839864730835, "learning_rate": 4.9149869229093536e-06, "loss": 0.3385750353336334, "step": 12597, "token_acc": 0.880426693128256 }, { "epoch": 0.6797604273458155, "grad_norm": 0.39938876032829285, "learning_rate": 4.913482242487633e-06, "loss": 0.33691713213920593, "step": 12598, "token_acc": 0.8780124559978337 }, { "epoch": 0.679814385150812, "grad_norm": 0.6055242419242859, "learning_rate": 4.911977717406831e-06, "loss": 0.3787931799888611, "step": 12599, "token_acc": 0.8675725005069965 }, { "epoch": 0.6798683429558086, "grad_norm": 0.3622609078884125, "learning_rate": 4.910473347712893e-06, "loss": 0.34018033742904663, "step": 12600, "token_acc": 0.8812408015396808 }, { "epoch": 0.6799223007608051, "grad_norm": 0.46241891384124756, "learning_rate": 4.90896913345177e-06, "loss": 0.4039182662963867, "step": 12601, "token_acc": 0.8661620505762945 }, { "epoch": 0.6799762585658016, "grad_norm": 0.4682025611400604, "learning_rate": 4.907465074669395e-06, "loss": 0.3398609757423401, "step": 12602, "token_acc": 0.8792380952380953 }, { "epoch": 0.6800302163707981, "grad_norm": 0.40390145778656006, "learning_rate": 4.905961171411704e-06, "loss": 0.3259187638759613, "step": 12603, "token_acc": 0.8808249370277078 }, { "epoch": 0.6800841741757945, "grad_norm": 0.4780350923538208, "learning_rate": 4.904457423724624e-06, "loss": 0.36411920189857483, "step": 12604, "token_acc": 0.8689169261019171 }, { "epoch": 0.680138131980791, "grad_norm": 0.38297078013420105, "learning_rate": 4.902953831654078e-06, "loss": 0.3924461901187897, "step": 12605, "token_acc": 0.8645075206210577 }, { "epoch": 0.6801920897857875, "grad_norm": 0.32837599515914917, "learning_rate": 4.901450395245988e-06, "loss": 0.33212539553642273, "step": 12606, "token_acc": 0.874297581236257 }, { "epoch": 0.680246047590784, "grad_norm": 0.4649963676929474, "learning_rate": 4.899947114546263e-06, "loss": 0.3676134943962097, "step": 12607, "token_acc": 0.8697887970615243 }, { "epoch": 0.6803000053957805, "grad_norm": 0.4444044828414917, "learning_rate": 4.8984439896008206e-06, "loss": 0.33543914556503296, "step": 12608, "token_acc": 0.8823316437364229 }, { "epoch": 0.680353963200777, "grad_norm": 0.4147659242153168, "learning_rate": 4.896941020455564e-06, "loss": 0.3233933448791504, "step": 12609, "token_acc": 0.8817407757805109 }, { "epoch": 0.6804079210057735, "grad_norm": 0.5012925863265991, "learning_rate": 4.89543820715639e-06, "loss": 0.35273391008377075, "step": 12610, "token_acc": 0.8773760715616847 }, { "epoch": 0.68046187881077, "grad_norm": 0.4199371039867401, "learning_rate": 4.893935549749201e-06, "loss": 0.3445243537425995, "step": 12611, "token_acc": 0.8788144510698 }, { "epoch": 0.6805158366157664, "grad_norm": 0.4697980284690857, "learning_rate": 4.892433048279878e-06, "loss": 0.35118067264556885, "step": 12612, "token_acc": 0.8753726893261777 }, { "epoch": 0.6805697944207629, "grad_norm": 0.41725683212280273, "learning_rate": 4.8909307027943075e-06, "loss": 0.33077871799468994, "step": 12613, "token_acc": 0.8792794942144817 }, { "epoch": 0.6806237522257594, "grad_norm": 0.44029203057289124, "learning_rate": 4.889428513338379e-06, "loss": 0.3234449625015259, "step": 12614, "token_acc": 0.8843271932563223 }, { "epoch": 0.680677710030756, "grad_norm": 0.4454309642314911, "learning_rate": 4.887926479957965e-06, "loss": 0.4010484516620636, "step": 12615, "token_acc": 0.8609028084105279 }, { "epoch": 0.6807316678357525, "grad_norm": 0.38760268688201904, "learning_rate": 4.886424602698938e-06, "loss": 0.40164434909820557, "step": 12616, "token_acc": 0.8631246046805819 }, { "epoch": 0.680785625640749, "grad_norm": 0.44624584913253784, "learning_rate": 4.884922881607163e-06, "loss": 0.33369937539100647, "step": 12617, "token_acc": 0.8783913182253431 }, { "epoch": 0.6808395834457455, "grad_norm": 0.413838267326355, "learning_rate": 4.883421316728504e-06, "loss": 0.34861356019973755, "step": 12618, "token_acc": 0.8767550702028081 }, { "epoch": 0.680893541250742, "grad_norm": 0.371160089969635, "learning_rate": 4.881919908108818e-06, "loss": 0.29986944794654846, "step": 12619, "token_acc": 0.8908695652173914 }, { "epoch": 0.6809474990557384, "grad_norm": 0.440552294254303, "learning_rate": 4.880418655793952e-06, "loss": 0.3839195668697357, "step": 12620, "token_acc": 0.8574622694242594 }, { "epoch": 0.6810014568607349, "grad_norm": 0.4273364245891571, "learning_rate": 4.878917559829764e-06, "loss": 0.41397446393966675, "step": 12621, "token_acc": 0.8613451180776136 }, { "epoch": 0.6810554146657314, "grad_norm": 0.4743519723415375, "learning_rate": 4.877416620262092e-06, "loss": 0.3556564152240753, "step": 12622, "token_acc": 0.8695578231292517 }, { "epoch": 0.6811093724707279, "grad_norm": 0.45884448289871216, "learning_rate": 4.875915837136775e-06, "loss": 0.28628602623939514, "step": 12623, "token_acc": 0.8996539792387543 }, { "epoch": 0.6811633302757244, "grad_norm": 0.5863298773765564, "learning_rate": 4.874415210499645e-06, "loss": 0.2860560417175293, "step": 12624, "token_acc": 0.8965592334494773 }, { "epoch": 0.6812172880807209, "grad_norm": 0.40453803539276123, "learning_rate": 4.872914740396532e-06, "loss": 0.40334558486938477, "step": 12625, "token_acc": 0.8631973639359164 }, { "epoch": 0.6812712458857174, "grad_norm": 0.37063318490982056, "learning_rate": 4.87141442687326e-06, "loss": 0.32835686206817627, "step": 12626, "token_acc": 0.8838567350116792 }, { "epoch": 0.6813252036907138, "grad_norm": 0.40286344289779663, "learning_rate": 4.869914269975647e-06, "loss": 0.3035812973976135, "step": 12627, "token_acc": 0.8910547396528705 }, { "epoch": 0.6813791614957103, "grad_norm": 0.351947546005249, "learning_rate": 4.86841426974951e-06, "loss": 0.31321707367897034, "step": 12628, "token_acc": 0.8864767073722297 }, { "epoch": 0.6814331193007068, "grad_norm": 0.39089757204055786, "learning_rate": 4.866914426240655e-06, "loss": 0.34176647663116455, "step": 12629, "token_acc": 0.8769737983689051 }, { "epoch": 0.6814870771057033, "grad_norm": 0.36174291372299194, "learning_rate": 4.86541473949489e-06, "loss": 0.3481440544128418, "step": 12630, "token_acc": 0.8803100775193798 }, { "epoch": 0.6815410349106998, "grad_norm": 0.38634490966796875, "learning_rate": 4.863915209558013e-06, "loss": 0.36641061305999756, "step": 12631, "token_acc": 0.8786759482881091 }, { "epoch": 0.6815949927156963, "grad_norm": 0.4628481864929199, "learning_rate": 4.862415836475817e-06, "loss": 0.36190903186798096, "step": 12632, "token_acc": 0.8755532856656452 }, { "epoch": 0.6816489505206929, "grad_norm": 0.48989033699035645, "learning_rate": 4.860916620294099e-06, "loss": 0.3406776785850525, "step": 12633, "token_acc": 0.8747791952894995 }, { "epoch": 0.6817029083256894, "grad_norm": 0.44036126136779785, "learning_rate": 4.859417561058641e-06, "loss": 0.3475351333618164, "step": 12634, "token_acc": 0.8764934057408844 }, { "epoch": 0.6817568661306858, "grad_norm": 0.3868064880371094, "learning_rate": 4.857918658815224e-06, "loss": 0.3308436870574951, "step": 12635, "token_acc": 0.8814439683158032 }, { "epoch": 0.6818108239356823, "grad_norm": 0.4687115252017975, "learning_rate": 4.856419913609624e-06, "loss": 0.4009740352630615, "step": 12636, "token_acc": 0.8662704309063893 }, { "epoch": 0.6818647817406788, "grad_norm": 0.32117959856987, "learning_rate": 4.854921325487612e-06, "loss": 0.30380839109420776, "step": 12637, "token_acc": 0.8898942659617731 }, { "epoch": 0.6819187395456753, "grad_norm": 0.414810448884964, "learning_rate": 4.853422894494957e-06, "loss": 0.3699280321598053, "step": 12638, "token_acc": 0.8728953905603092 }, { "epoch": 0.6819726973506718, "grad_norm": 0.4550595283508301, "learning_rate": 4.851924620677413e-06, "loss": 0.31468743085861206, "step": 12639, "token_acc": 0.8871553463349025 }, { "epoch": 0.6820266551556683, "grad_norm": 0.4031980037689209, "learning_rate": 4.8504265040807466e-06, "loss": 0.28894293308258057, "step": 12640, "token_acc": 0.8923076923076924 }, { "epoch": 0.6820806129606648, "grad_norm": 0.5199472904205322, "learning_rate": 4.848928544750706e-06, "loss": 0.29822632670402527, "step": 12641, "token_acc": 0.8937041460501621 }, { "epoch": 0.6821345707656613, "grad_norm": 0.4261243939399719, "learning_rate": 4.847430742733038e-06, "loss": 0.35446858406066895, "step": 12642, "token_acc": 0.8699811371597952 }, { "epoch": 0.6821885285706577, "grad_norm": 0.3711063265800476, "learning_rate": 4.845933098073485e-06, "loss": 0.35815489292144775, "step": 12643, "token_acc": 0.873394908763235 }, { "epoch": 0.6822424863756542, "grad_norm": 0.4785357415676117, "learning_rate": 4.844435610817786e-06, "loss": 0.33719855546951294, "step": 12644, "token_acc": 0.8753008423586041 }, { "epoch": 0.6822964441806507, "grad_norm": 0.3519376814365387, "learning_rate": 4.842938281011672e-06, "loss": 0.32412415742874146, "step": 12645, "token_acc": 0.8886630860792568 }, { "epoch": 0.6823504019856472, "grad_norm": 0.423465758562088, "learning_rate": 4.841441108700872e-06, "loss": 0.3706657886505127, "step": 12646, "token_acc": 0.8701671891327064 }, { "epoch": 0.6824043597906437, "grad_norm": 0.48354047536849976, "learning_rate": 4.839944093931109e-06, "loss": 0.34696638584136963, "step": 12647, "token_acc": 0.8736591179976162 }, { "epoch": 0.6824583175956402, "grad_norm": 0.42069852352142334, "learning_rate": 4.838447236748103e-06, "loss": 0.321286678314209, "step": 12648, "token_acc": 0.8809758849245523 }, { "epoch": 0.6825122754006367, "grad_norm": 0.4802326261997223, "learning_rate": 4.8369505371975646e-06, "loss": 0.38878995180130005, "step": 12649, "token_acc": 0.86181640625 }, { "epoch": 0.6825662332056331, "grad_norm": 0.34145697951316833, "learning_rate": 4.8354539953252045e-06, "loss": 0.3210080862045288, "step": 12650, "token_acc": 0.8837679856115108 }, { "epoch": 0.6826201910106297, "grad_norm": 0.3472142815589905, "learning_rate": 4.833957611176727e-06, "loss": 0.3202129602432251, "step": 12651, "token_acc": 0.8841830065359477 }, { "epoch": 0.6826741488156262, "grad_norm": 0.42561784386634827, "learning_rate": 4.8324613847978275e-06, "loss": 0.32854950428009033, "step": 12652, "token_acc": 0.8822486705495062 }, { "epoch": 0.6827281066206227, "grad_norm": 0.46090617775917053, "learning_rate": 4.830965316234206e-06, "loss": 0.3311123549938202, "step": 12653, "token_acc": 0.8826147000153445 }, { "epoch": 0.6827820644256192, "grad_norm": 0.49520498514175415, "learning_rate": 4.82946940553155e-06, "loss": 0.36119866371154785, "step": 12654, "token_acc": 0.8732152072939963 }, { "epoch": 0.6828360222306157, "grad_norm": 0.4942518472671509, "learning_rate": 4.827973652735545e-06, "loss": 0.3474237322807312, "step": 12655, "token_acc": 0.8771186440677966 }, { "epoch": 0.6828899800356122, "grad_norm": 0.3892366290092468, "learning_rate": 4.826478057891868e-06, "loss": 0.3747817873954773, "step": 12656, "token_acc": 0.8674271620163404 }, { "epoch": 0.6829439378406087, "grad_norm": 0.4052903652191162, "learning_rate": 4.824982621046197e-06, "loss": 0.37222713232040405, "step": 12657, "token_acc": 0.8676745635910225 }, { "epoch": 0.6829978956456051, "grad_norm": 0.4340652823448181, "learning_rate": 4.823487342244198e-06, "loss": 0.36557722091674805, "step": 12658, "token_acc": 0.8714969241285031 }, { "epoch": 0.6830518534506016, "grad_norm": 0.4564991295337677, "learning_rate": 4.821992221531545e-06, "loss": 0.33403685688972473, "step": 12659, "token_acc": 0.8847505270555165 }, { "epoch": 0.6831058112555981, "grad_norm": 0.4739786684513092, "learning_rate": 4.820497258953891e-06, "loss": 0.3533118963241577, "step": 12660, "token_acc": 0.8774590779396306 }, { "epoch": 0.6831597690605946, "grad_norm": 0.4320833086967468, "learning_rate": 4.819002454556893e-06, "loss": 0.3552130162715912, "step": 12661, "token_acc": 0.8756622000623248 }, { "epoch": 0.6832137268655911, "grad_norm": 0.46918630599975586, "learning_rate": 4.817507808386203e-06, "loss": 0.34794679284095764, "step": 12662, "token_acc": 0.8740787801778908 }, { "epoch": 0.6832676846705876, "grad_norm": 0.42024439573287964, "learning_rate": 4.816013320487467e-06, "loss": 0.3271515965461731, "step": 12663, "token_acc": 0.8808335779277957 }, { "epoch": 0.6833216424755841, "grad_norm": 0.5037441849708557, "learning_rate": 4.814518990906326e-06, "loss": 0.34610968828201294, "step": 12664, "token_acc": 0.8744703389830508 }, { "epoch": 0.6833756002805805, "grad_norm": 0.3459019362926483, "learning_rate": 4.813024819688413e-06, "loss": 0.32651522755622864, "step": 12665, "token_acc": 0.8791674341499355 }, { "epoch": 0.683429558085577, "grad_norm": 0.3753776550292969, "learning_rate": 4.811530806879367e-06, "loss": 0.3114970028400421, "step": 12666, "token_acc": 0.8856935717738038 }, { "epoch": 0.6834835158905735, "grad_norm": 0.32451507449150085, "learning_rate": 4.810036952524811e-06, "loss": 0.3061152696609497, "step": 12667, "token_acc": 0.8924869187481489 }, { "epoch": 0.68353747369557, "grad_norm": 0.3768054246902466, "learning_rate": 4.808543256670367e-06, "loss": 0.3712272644042969, "step": 12668, "token_acc": 0.8676266962060372 }, { "epoch": 0.6835914315005666, "grad_norm": 0.3716704547405243, "learning_rate": 4.807049719361651e-06, "loss": 0.28278616070747375, "step": 12669, "token_acc": 0.8993009826765272 }, { "epoch": 0.6836453893055631, "grad_norm": 0.35941413044929504, "learning_rate": 4.8055563406442785e-06, "loss": 0.3076882064342499, "step": 12670, "token_acc": 0.8865497076023392 }, { "epoch": 0.6836993471105596, "grad_norm": 0.3762223720550537, "learning_rate": 4.804063120563849e-06, "loss": 0.32266977429389954, "step": 12671, "token_acc": 0.8841071219740382 }, { "epoch": 0.6837533049155561, "grad_norm": 0.36433547735214233, "learning_rate": 4.8025700591659756e-06, "loss": 0.3234294056892395, "step": 12672, "token_acc": 0.8826038868699636 }, { "epoch": 0.6838072627205525, "grad_norm": 0.4030655324459076, "learning_rate": 4.801077156496249e-06, "loss": 0.375133216381073, "step": 12673, "token_acc": 0.8672473344345998 }, { "epoch": 0.683861220525549, "grad_norm": 0.41050010919570923, "learning_rate": 4.7995844126002655e-06, "loss": 0.31890788674354553, "step": 12674, "token_acc": 0.88671875 }, { "epoch": 0.6839151783305455, "grad_norm": 0.3211708664894104, "learning_rate": 4.798091827523612e-06, "loss": 0.3143669068813324, "step": 12675, "token_acc": 0.881578947368421 }, { "epoch": 0.683969136135542, "grad_norm": 0.3907458782196045, "learning_rate": 4.796599401311871e-06, "loss": 0.3099501132965088, "step": 12676, "token_acc": 0.8841396586816972 }, { "epoch": 0.6840230939405385, "grad_norm": 0.42075541615486145, "learning_rate": 4.79510713401062e-06, "loss": 0.39348655939102173, "step": 12677, "token_acc": 0.8648874061718098 }, { "epoch": 0.684077051745535, "grad_norm": 0.3779797852039337, "learning_rate": 4.793615025665436e-06, "loss": 0.3194335699081421, "step": 12678, "token_acc": 0.8844391901876755 }, { "epoch": 0.6841310095505315, "grad_norm": 0.39988330006599426, "learning_rate": 4.792123076321883e-06, "loss": 0.2978239953517914, "step": 12679, "token_acc": 0.8915064102564103 }, { "epoch": 0.684184967355528, "grad_norm": 0.34111329913139343, "learning_rate": 4.790631286025528e-06, "loss": 0.3045749068260193, "step": 12680, "token_acc": 0.8922740712173801 }, { "epoch": 0.6842389251605244, "grad_norm": 0.3604452610015869, "learning_rate": 4.789139654821929e-06, "loss": 0.34702539443969727, "step": 12681, "token_acc": 0.8776290630975143 }, { "epoch": 0.6842928829655209, "grad_norm": 0.47578874230384827, "learning_rate": 4.78764818275664e-06, "loss": 0.3272412121295929, "step": 12682, "token_acc": 0.8852247009376011 }, { "epoch": 0.6843468407705174, "grad_norm": 0.48073917627334595, "learning_rate": 4.786156869875209e-06, "loss": 0.4015398919582367, "step": 12683, "token_acc": 0.8600277478032989 }, { "epoch": 0.684400798575514, "grad_norm": 0.3707929849624634, "learning_rate": 4.7846657162231795e-06, "loss": 0.36598312854766846, "step": 12684, "token_acc": 0.8714568226763348 }, { "epoch": 0.6844547563805105, "grad_norm": 0.3799658715724945, "learning_rate": 4.783174721846095e-06, "loss": 0.33868300914764404, "step": 12685, "token_acc": 0.8785869303110683 }, { "epoch": 0.684508714185507, "grad_norm": 0.3996337354183197, "learning_rate": 4.781683886789488e-06, "loss": 0.32789838314056396, "step": 12686, "token_acc": 0.8777791894295515 }, { "epoch": 0.6845626719905035, "grad_norm": 0.4376952350139618, "learning_rate": 4.780193211098888e-06, "loss": 0.3377188444137573, "step": 12687, "token_acc": 0.8850406287465032 }, { "epoch": 0.6846166297954999, "grad_norm": 0.4181489050388336, "learning_rate": 4.77870269481982e-06, "loss": 0.3024905025959015, "step": 12688, "token_acc": 0.8881019830028328 }, { "epoch": 0.6846705876004964, "grad_norm": 0.4320698082447052, "learning_rate": 4.777212337997804e-06, "loss": 0.34275269508361816, "step": 12689, "token_acc": 0.8776555386949925 }, { "epoch": 0.6847245454054929, "grad_norm": 0.46922391653060913, "learning_rate": 4.7757221406783515e-06, "loss": 0.3746182322502136, "step": 12690, "token_acc": 0.8672072985301571 }, { "epoch": 0.6847785032104894, "grad_norm": 0.40782561898231506, "learning_rate": 4.774232102906981e-06, "loss": 0.33527153730392456, "step": 12691, "token_acc": 0.8807523739956172 }, { "epoch": 0.6848324610154859, "grad_norm": 0.37674039602279663, "learning_rate": 4.772742224729191e-06, "loss": 0.3348710238933563, "step": 12692, "token_acc": 0.8846486352970236 }, { "epoch": 0.6848864188204824, "grad_norm": 0.3276928961277008, "learning_rate": 4.771252506190489e-06, "loss": 0.3155144155025482, "step": 12693, "token_acc": 0.8862385321100917 }, { "epoch": 0.6849403766254789, "grad_norm": 0.28853410482406616, "learning_rate": 4.769762947336362e-06, "loss": 0.31992006301879883, "step": 12694, "token_acc": 0.8838762436963337 }, { "epoch": 0.6849943344304754, "grad_norm": 0.4771251082420349, "learning_rate": 4.7682735482123045e-06, "loss": 0.34557753801345825, "step": 12695, "token_acc": 0.8767418712674188 }, { "epoch": 0.6850482922354718, "grad_norm": 0.4875832498073578, "learning_rate": 4.766784308863803e-06, "loss": 0.37793970108032227, "step": 12696, "token_acc": 0.8705821205821206 }, { "epoch": 0.6851022500404683, "grad_norm": 0.4090585708618164, "learning_rate": 4.765295229336332e-06, "loss": 0.3305077850818634, "step": 12697, "token_acc": 0.8882229062044387 }, { "epoch": 0.6851562078454648, "grad_norm": 0.463633269071579, "learning_rate": 4.763806309675378e-06, "loss": 0.4268243610858917, "step": 12698, "token_acc": 0.8503878832656077 }, { "epoch": 0.6852101656504613, "grad_norm": 0.4071449339389801, "learning_rate": 4.7623175499264086e-06, "loss": 0.31130826473236084, "step": 12699, "token_acc": 0.8838718220338984 }, { "epoch": 0.6852641234554578, "grad_norm": 0.4583703577518463, "learning_rate": 4.760828950134888e-06, "loss": 0.38581588864326477, "step": 12700, "token_acc": 0.864820435002529 }, { "epoch": 0.6853180812604543, "grad_norm": 0.4758577346801758, "learning_rate": 4.759340510346279e-06, "loss": 0.35333558917045593, "step": 12701, "token_acc": 0.8743953563366655 }, { "epoch": 0.6853720390654509, "grad_norm": 0.33239829540252686, "learning_rate": 4.757852230606038e-06, "loss": 0.3238450288772583, "step": 12702, "token_acc": 0.8865594855305466 }, { "epoch": 0.6854259968704474, "grad_norm": 0.6147305965423584, "learning_rate": 4.756364110959613e-06, "loss": 0.3446979522705078, "step": 12703, "token_acc": 0.8760490639122014 }, { "epoch": 0.6854799546754438, "grad_norm": 0.4462675154209137, "learning_rate": 4.754876151452459e-06, "loss": 0.36461275815963745, "step": 12704, "token_acc": 0.8717980295566502 }, { "epoch": 0.6855339124804403, "grad_norm": 0.3948811888694763, "learning_rate": 4.753388352130012e-06, "loss": 0.3199767470359802, "step": 12705, "token_acc": 0.8862779228314477 }, { "epoch": 0.6855878702854368, "grad_norm": 0.42048102617263794, "learning_rate": 4.7519007130377105e-06, "loss": 0.3114202618598938, "step": 12706, "token_acc": 0.8904665314401623 }, { "epoch": 0.6856418280904333, "grad_norm": 0.4470534026622772, "learning_rate": 4.750413234220987e-06, "loss": 0.33659911155700684, "step": 12707, "token_acc": 0.8788216560509554 }, { "epoch": 0.6856957858954298, "grad_norm": 0.31386345624923706, "learning_rate": 4.748925915725267e-06, "loss": 0.32548972964286804, "step": 12708, "token_acc": 0.8825780222408227 }, { "epoch": 0.6857497437004263, "grad_norm": 0.4215186536312103, "learning_rate": 4.747438757595976e-06, "loss": 0.3057764768600464, "step": 12709, "token_acc": 0.8873655346973213 }, { "epoch": 0.6858037015054228, "grad_norm": 0.43545904755592346, "learning_rate": 4.745951759878527e-06, "loss": 0.3231436014175415, "step": 12710, "token_acc": 0.8809867282169648 }, { "epoch": 0.6858576593104192, "grad_norm": 0.34909796714782715, "learning_rate": 4.744464922618337e-06, "loss": 0.32319626212120056, "step": 12711, "token_acc": 0.8849163725640632 }, { "epoch": 0.6859116171154157, "grad_norm": 0.46308109164237976, "learning_rate": 4.74297824586081e-06, "loss": 0.35304707288742065, "step": 12712, "token_acc": 0.877389404696887 }, { "epoch": 0.6859655749204122, "grad_norm": 0.3840459883213043, "learning_rate": 4.741491729651351e-06, "loss": 0.3846483826637268, "step": 12713, "token_acc": 0.8668183643421229 }, { "epoch": 0.6860195327254087, "grad_norm": 0.4043360650539398, "learning_rate": 4.740005374035357e-06, "loss": 0.36786141991615295, "step": 12714, "token_acc": 0.8660686181932641 }, { "epoch": 0.6860734905304052, "grad_norm": 0.37198132276535034, "learning_rate": 4.738519179058221e-06, "loss": 0.3334236443042755, "step": 12715, "token_acc": 0.8784940102681118 }, { "epoch": 0.6861274483354017, "grad_norm": 0.43781355023384094, "learning_rate": 4.737033144765326e-06, "loss": 0.3593496084213257, "step": 12716, "token_acc": 0.8694681549573211 }, { "epoch": 0.6861814061403982, "grad_norm": 0.38873690366744995, "learning_rate": 4.735547271202065e-06, "loss": 0.33360567688941956, "step": 12717, "token_acc": 0.8811509591326105 }, { "epoch": 0.6862353639453947, "grad_norm": 0.4112258553504944, "learning_rate": 4.73406155841381e-06, "loss": 0.3491038680076599, "step": 12718, "token_acc": 0.878453921098866 }, { "epoch": 0.6862893217503911, "grad_norm": 0.37986764311790466, "learning_rate": 4.732576006445937e-06, "loss": 0.3466571271419525, "step": 12719, "token_acc": 0.8759081662307469 }, { "epoch": 0.6863432795553877, "grad_norm": 0.4289359450340271, "learning_rate": 4.731090615343813e-06, "loss": 0.3603735864162445, "step": 12720, "token_acc": 0.8755429335115269 }, { "epoch": 0.6863972373603842, "grad_norm": 0.5031833648681641, "learning_rate": 4.7296053851528005e-06, "loss": 0.3534526526927948, "step": 12721, "token_acc": 0.8732023851280253 }, { "epoch": 0.6864511951653807, "grad_norm": 0.5157005786895752, "learning_rate": 4.728120315918259e-06, "loss": 0.40206998586654663, "step": 12722, "token_acc": 0.8600886162235856 }, { "epoch": 0.6865051529703772, "grad_norm": 0.48943498730659485, "learning_rate": 4.7266354076855385e-06, "loss": 0.3713133633136749, "step": 12723, "token_acc": 0.872371548729201 }, { "epoch": 0.6865591107753737, "grad_norm": 0.3707873225212097, "learning_rate": 4.7251506604999945e-06, "loss": 0.27349913120269775, "step": 12724, "token_acc": 0.8943929493997873 }, { "epoch": 0.6866130685803702, "grad_norm": 0.3905331790447235, "learning_rate": 4.723666074406968e-06, "loss": 0.29747968912124634, "step": 12725, "token_acc": 0.8938385183391842 }, { "epoch": 0.6866670263853667, "grad_norm": 0.37134096026420593, "learning_rate": 4.722181649451797e-06, "loss": 0.31233924627304077, "step": 12726, "token_acc": 0.88499884071412 }, { "epoch": 0.6867209841903631, "grad_norm": 0.4119187593460083, "learning_rate": 4.720697385679816e-06, "loss": 0.33540594577789307, "step": 12727, "token_acc": 0.8782265411478895 }, { "epoch": 0.6867749419953596, "grad_norm": 0.39152950048446655, "learning_rate": 4.7192132831363576e-06, "loss": 0.32113218307495117, "step": 12728, "token_acc": 0.8846442885771543 }, { "epoch": 0.6868288998003561, "grad_norm": 0.4763351082801819, "learning_rate": 4.717729341866735e-06, "loss": 0.3643091320991516, "step": 12729, "token_acc": 0.8716749488453669 }, { "epoch": 0.6868828576053526, "grad_norm": 0.45623844861984253, "learning_rate": 4.716245561916277e-06, "loss": 0.40817990899086, "step": 12730, "token_acc": 0.8592514496573537 }, { "epoch": 0.6869368154103491, "grad_norm": 0.5267603993415833, "learning_rate": 4.714761943330297e-06, "loss": 0.37561172246932983, "step": 12731, "token_acc": 0.8681618427034399 }, { "epoch": 0.6869907732153456, "grad_norm": 0.3704621493816376, "learning_rate": 4.713278486154101e-06, "loss": 0.2704639434814453, "step": 12732, "token_acc": 0.8950892857142857 }, { "epoch": 0.6870447310203421, "grad_norm": 0.40967071056365967, "learning_rate": 4.711795190432997e-06, "loss": 0.35921820998191833, "step": 12733, "token_acc": 0.873693193940687 }, { "epoch": 0.6870986888253385, "grad_norm": 0.36315736174583435, "learning_rate": 4.710312056212282e-06, "loss": 0.3589845299720764, "step": 12734, "token_acc": 0.8687180705714626 }, { "epoch": 0.687152646630335, "grad_norm": 0.29589128494262695, "learning_rate": 4.708829083537246e-06, "loss": 0.3460316061973572, "step": 12735, "token_acc": 0.8792994635531713 }, { "epoch": 0.6872066044353315, "grad_norm": 0.3504601716995239, "learning_rate": 4.70734627245319e-06, "loss": 0.3095727860927582, "step": 12736, "token_acc": 0.8860285779088228 }, { "epoch": 0.687260562240328, "grad_norm": 0.43367236852645874, "learning_rate": 4.70586362300539e-06, "loss": 0.3198225498199463, "step": 12737, "token_acc": 0.8842064566015851 }, { "epoch": 0.6873145200453246, "grad_norm": 0.4656098484992981, "learning_rate": 4.7043811352391285e-06, "loss": 0.2961089611053467, "step": 12738, "token_acc": 0.891914420672409 }, { "epoch": 0.6873684778503211, "grad_norm": 0.47405946254730225, "learning_rate": 4.702898809199681e-06, "loss": 0.36480820178985596, "step": 12739, "token_acc": 0.875599520383693 }, { "epoch": 0.6874224356553176, "grad_norm": 0.3300391733646393, "learning_rate": 4.701416644932315e-06, "loss": 0.31849420070648193, "step": 12740, "token_acc": 0.8881140084899939 }, { "epoch": 0.6874763934603141, "grad_norm": 0.4808851182460785, "learning_rate": 4.699934642482297e-06, "loss": 0.3374675512313843, "step": 12741, "token_acc": 0.8824237560192616 }, { "epoch": 0.6875303512653105, "grad_norm": 0.3982279598712921, "learning_rate": 4.698452801894887e-06, "loss": 0.4064701795578003, "step": 12742, "token_acc": 0.8589777195281783 }, { "epoch": 0.687584309070307, "grad_norm": 0.3945707678794861, "learning_rate": 4.696971123215339e-06, "loss": 0.30007871985435486, "step": 12743, "token_acc": 0.8871057000553404 }, { "epoch": 0.6876382668753035, "grad_norm": 0.30260732769966125, "learning_rate": 4.695489606488904e-06, "loss": 0.3109906315803528, "step": 12744, "token_acc": 0.8871350984385608 }, { "epoch": 0.6876922246803, "grad_norm": 0.42234882712364197, "learning_rate": 4.694008251760827e-06, "loss": 0.3588311970233917, "step": 12745, "token_acc": 0.8714622641509434 }, { "epoch": 0.6877461824852965, "grad_norm": 0.33739447593688965, "learning_rate": 4.692527059076347e-06, "loss": 0.3657580614089966, "step": 12746, "token_acc": 0.8699835422205342 }, { "epoch": 0.687800140290293, "grad_norm": 0.3976746201515198, "learning_rate": 4.6910460284807015e-06, "loss": 0.3553937077522278, "step": 12747, "token_acc": 0.8735452492617682 }, { "epoch": 0.6878540980952895, "grad_norm": 0.4688950777053833, "learning_rate": 4.6895651600191165e-06, "loss": 0.34119558334350586, "step": 12748, "token_acc": 0.8792729698182424 }, { "epoch": 0.687908055900286, "grad_norm": 0.36968526244163513, "learning_rate": 4.6880844537368216e-06, "loss": 0.38715580105781555, "step": 12749, "token_acc": 0.8622833233711895 }, { "epoch": 0.6879620137052824, "grad_norm": 0.36344602704048157, "learning_rate": 4.686603909679038e-06, "loss": 0.2949830889701843, "step": 12750, "token_acc": 0.8918238993710692 }, { "epoch": 0.6880159715102789, "grad_norm": 0.3902125656604767, "learning_rate": 4.685123527890978e-06, "loss": 0.29450541734695435, "step": 12751, "token_acc": 0.8936321593738883 }, { "epoch": 0.6880699293152754, "grad_norm": 0.42864981293678284, "learning_rate": 4.6836433084178535e-06, "loss": 0.3094746172428131, "step": 12752, "token_acc": 0.8879001195377872 }, { "epoch": 0.688123887120272, "grad_norm": 0.445783406496048, "learning_rate": 4.68216325130487e-06, "loss": 0.39226141571998596, "step": 12753, "token_acc": 0.8671405809535482 }, { "epoch": 0.6881778449252685, "grad_norm": 0.3674657642841339, "learning_rate": 4.680683356597228e-06, "loss": 0.3047650456428528, "step": 12754, "token_acc": 0.8881556095866621 }, { "epoch": 0.688231802730265, "grad_norm": 0.3752695620059967, "learning_rate": 4.679203624340118e-06, "loss": 0.3066217303276062, "step": 12755, "token_acc": 0.8860739819707802 }, { "epoch": 0.6882857605352615, "grad_norm": 0.42495474219322205, "learning_rate": 4.67772405457874e-06, "loss": 0.3590207099914551, "step": 12756, "token_acc": 0.8729116945107399 }, { "epoch": 0.6883397183402579, "grad_norm": 0.5074143409729004, "learning_rate": 4.676244647358275e-06, "loss": 0.35650110244750977, "step": 12757, "token_acc": 0.8751657510892215 }, { "epoch": 0.6883936761452544, "grad_norm": 0.3548777103424072, "learning_rate": 4.674765402723903e-06, "loss": 0.33206629753112793, "step": 12758, "token_acc": 0.8833371533348613 }, { "epoch": 0.6884476339502509, "grad_norm": 0.4023151099681854, "learning_rate": 4.6732863207208015e-06, "loss": 0.2991195619106293, "step": 12759, "token_acc": 0.8941587618572142 }, { "epoch": 0.6885015917552474, "grad_norm": 0.46469900012016296, "learning_rate": 4.6718074013941405e-06, "loss": 0.33113667368888855, "step": 12760, "token_acc": 0.8801307664366146 }, { "epoch": 0.6885555495602439, "grad_norm": 0.447957307100296, "learning_rate": 4.670328644789085e-06, "loss": 0.378032922744751, "step": 12761, "token_acc": 0.8648152999738015 }, { "epoch": 0.6886095073652404, "grad_norm": 0.4400540590286255, "learning_rate": 4.668850050950796e-06, "loss": 0.345977246761322, "step": 12762, "token_acc": 0.8766477036214791 }, { "epoch": 0.6886634651702369, "grad_norm": 0.3835453689098358, "learning_rate": 4.6673716199244315e-06, "loss": 0.3083672523498535, "step": 12763, "token_acc": 0.8832419617721143 }, { "epoch": 0.6887174229752334, "grad_norm": 0.406532347202301, "learning_rate": 4.66589335175514e-06, "loss": 0.29005947709083557, "step": 12764, "token_acc": 0.8961538461538462 }, { "epoch": 0.6887713807802298, "grad_norm": 0.46820980310440063, "learning_rate": 4.664415246488067e-06, "loss": 0.34727585315704346, "step": 12765, "token_acc": 0.8831219512195122 }, { "epoch": 0.6888253385852263, "grad_norm": 0.38720256090164185, "learning_rate": 4.662937304168357e-06, "loss": 0.3168891966342926, "step": 12766, "token_acc": 0.8834603316898252 }, { "epoch": 0.6888792963902228, "grad_norm": 0.44284725189208984, "learning_rate": 4.661459524841142e-06, "loss": 0.3172534108161926, "step": 12767, "token_acc": 0.8860182370820668 }, { "epoch": 0.6889332541952193, "grad_norm": 0.336988240480423, "learning_rate": 4.659981908551551e-06, "loss": 0.32535678148269653, "step": 12768, "token_acc": 0.883366562232982 }, { "epoch": 0.6889872120002158, "grad_norm": 0.47247791290283203, "learning_rate": 4.658504455344718e-06, "loss": 0.31236037611961365, "step": 12769, "token_acc": 0.8906420021762785 }, { "epoch": 0.6890411698052124, "grad_norm": 0.3381851315498352, "learning_rate": 4.65702716526576e-06, "loss": 0.33830446004867554, "step": 12770, "token_acc": 0.8734957440563546 }, { "epoch": 0.6890951276102089, "grad_norm": 0.5845190286636353, "learning_rate": 4.655550038359791e-06, "loss": 0.3736085295677185, "step": 12771, "token_acc": 0.8647927415050702 }, { "epoch": 0.6891490854152054, "grad_norm": 0.4324668347835541, "learning_rate": 4.6540730746719245e-06, "loss": 0.3354645073413849, "step": 12772, "token_acc": 0.8809798270893372 }, { "epoch": 0.6892030432202018, "grad_norm": 0.42750388383865356, "learning_rate": 4.652596274247267e-06, "loss": 0.3816113770008087, "step": 12773, "token_acc": 0.8639071809344622 }, { "epoch": 0.6892570010251983, "grad_norm": 0.39001739025115967, "learning_rate": 4.651119637130913e-06, "loss": 0.3736363649368286, "step": 12774, "token_acc": 0.8698079803894866 }, { "epoch": 0.6893109588301948, "grad_norm": 0.38010984659194946, "learning_rate": 4.649643163367968e-06, "loss": 0.34848254919052124, "step": 12775, "token_acc": 0.8772937386188542 }, { "epoch": 0.6893649166351913, "grad_norm": 0.40406334400177, "learning_rate": 4.648166853003524e-06, "loss": 0.33455464243888855, "step": 12776, "token_acc": 0.8819969742813918 }, { "epoch": 0.6894188744401878, "grad_norm": 0.4229799509048462, "learning_rate": 4.6466907060826584e-06, "loss": 0.3084734082221985, "step": 12777, "token_acc": 0.8911063487811411 }, { "epoch": 0.6894728322451843, "grad_norm": 0.39882275462150574, "learning_rate": 4.645214722650456e-06, "loss": 0.3380786180496216, "step": 12778, "token_acc": 0.8831758034026465 }, { "epoch": 0.6895267900501808, "grad_norm": 0.4200773537158966, "learning_rate": 4.643738902751993e-06, "loss": 0.3775704503059387, "step": 12779, "token_acc": 0.8683371298405467 }, { "epoch": 0.6895807478551772, "grad_norm": 0.38559165596961975, "learning_rate": 4.642263246432337e-06, "loss": 0.38369888067245483, "step": 12780, "token_acc": 0.8663594470046083 }, { "epoch": 0.6896347056601737, "grad_norm": 0.41829511523246765, "learning_rate": 4.640787753736562e-06, "loss": 0.39603161811828613, "step": 12781, "token_acc": 0.8631364562118127 }, { "epoch": 0.6896886634651702, "grad_norm": 0.3696974813938141, "learning_rate": 4.6393124247097255e-06, "loss": 0.36580947041511536, "step": 12782, "token_acc": 0.8754317548746519 }, { "epoch": 0.6897426212701667, "grad_norm": 0.35990461707115173, "learning_rate": 4.637837259396882e-06, "loss": 0.3456258177757263, "step": 12783, "token_acc": 0.8738627602158828 }, { "epoch": 0.6897965790751632, "grad_norm": 0.46077996492385864, "learning_rate": 4.636362257843084e-06, "loss": 0.36930373311042786, "step": 12784, "token_acc": 0.8718050065876153 }, { "epoch": 0.6898505368801597, "grad_norm": 0.47128763794898987, "learning_rate": 4.6348874200933794e-06, "loss": 0.33822694420814514, "step": 12785, "token_acc": 0.880941393631749 }, { "epoch": 0.6899044946851562, "grad_norm": 0.4321388900279999, "learning_rate": 4.633412746192806e-06, "loss": 0.35763296484947205, "step": 12786, "token_acc": 0.8750709179621015 }, { "epoch": 0.6899584524901528, "grad_norm": 0.4296913146972656, "learning_rate": 4.631938236186397e-06, "loss": 0.31163257360458374, "step": 12787, "token_acc": 0.8859961190168176 }, { "epoch": 0.6900124102951491, "grad_norm": 0.5131678581237793, "learning_rate": 4.630463890119194e-06, "loss": 0.318653404712677, "step": 12788, "token_acc": 0.8862327691502355 }, { "epoch": 0.6900663681001457, "grad_norm": 0.42446720600128174, "learning_rate": 4.628989708036215e-06, "loss": 0.3135530650615692, "step": 12789, "token_acc": 0.8858350951374208 }, { "epoch": 0.6901203259051422, "grad_norm": 0.4190177023410797, "learning_rate": 4.627515689982484e-06, "loss": 0.3767092227935791, "step": 12790, "token_acc": 0.867773325701202 }, { "epoch": 0.6901742837101387, "grad_norm": 0.4059486985206604, "learning_rate": 4.626041836003017e-06, "loss": 0.3049198389053345, "step": 12791, "token_acc": 0.8863414634146342 }, { "epoch": 0.6902282415151352, "grad_norm": 0.43848204612731934, "learning_rate": 4.624568146142825e-06, "loss": 0.348923921585083, "step": 12792, "token_acc": 0.8763245033112583 }, { "epoch": 0.6902821993201317, "grad_norm": 0.36498668789863586, "learning_rate": 4.623094620446912e-06, "loss": 0.3455258011817932, "step": 12793, "token_acc": 0.8795015955022033 }, { "epoch": 0.6903361571251282, "grad_norm": 0.4167386293411255, "learning_rate": 4.621621258960282e-06, "loss": 0.28817352652549744, "step": 12794, "token_acc": 0.8983908877237389 }, { "epoch": 0.6903901149301246, "grad_norm": 0.4701843559741974, "learning_rate": 4.620148061727929e-06, "loss": 0.3909841775894165, "step": 12795, "token_acc": 0.8601060511540861 }, { "epoch": 0.6904440727351211, "grad_norm": 0.42849990725517273, "learning_rate": 4.6186750287948455e-06, "loss": 0.27908170223236084, "step": 12796, "token_acc": 0.898958047710447 }, { "epoch": 0.6904980305401176, "grad_norm": 0.48574623465538025, "learning_rate": 4.617202160206017e-06, "loss": 0.3784339725971222, "step": 12797, "token_acc": 0.8668304668304668 }, { "epoch": 0.6905519883451141, "grad_norm": 0.2955699563026428, "learning_rate": 4.615729456006424e-06, "loss": 0.3458431363105774, "step": 12798, "token_acc": 0.8800402099854797 }, { "epoch": 0.6906059461501106, "grad_norm": 0.3637695908546448, "learning_rate": 4.614256916241042e-06, "loss": 0.35019657015800476, "step": 12799, "token_acc": 0.8740906547285954 }, { "epoch": 0.6906599039551071, "grad_norm": 0.4246271550655365, "learning_rate": 4.612784540954841e-06, "loss": 0.3627183437347412, "step": 12800, "token_acc": 0.8688038793103449 }, { "epoch": 0.6907138617601036, "grad_norm": 0.3542187511920929, "learning_rate": 4.611312330192792e-06, "loss": 0.2649073898792267, "step": 12801, "token_acc": 0.9031469101581607 }, { "epoch": 0.6907678195651001, "grad_norm": 0.34424012899398804, "learning_rate": 4.609840283999852e-06, "loss": 0.30488067865371704, "step": 12802, "token_acc": 0.8864413216862894 }, { "epoch": 0.6908217773700965, "grad_norm": 0.4122931957244873, "learning_rate": 4.608368402420979e-06, "loss": 0.32837438583374023, "step": 12803, "token_acc": 0.8822674418604651 }, { "epoch": 0.690875735175093, "grad_norm": 0.42162206768989563, "learning_rate": 4.606896685501121e-06, "loss": 0.3396201729774475, "step": 12804, "token_acc": 0.8777608530083778 }, { "epoch": 0.6909296929800895, "grad_norm": 0.3286822736263275, "learning_rate": 4.605425133285226e-06, "loss": 0.341305673122406, "step": 12805, "token_acc": 0.8793103448275862 }, { "epoch": 0.6909836507850861, "grad_norm": 0.3637036085128784, "learning_rate": 4.603953745818232e-06, "loss": 0.2837085425853729, "step": 12806, "token_acc": 0.8961343707546005 }, { "epoch": 0.6910376085900826, "grad_norm": 0.3754900395870209, "learning_rate": 4.602482523145079e-06, "loss": 0.28442323207855225, "step": 12807, "token_acc": 0.8915468351028087 }, { "epoch": 0.6910915663950791, "grad_norm": 0.43717652559280396, "learning_rate": 4.601011465310696e-06, "loss": 0.3628539443016052, "step": 12808, "token_acc": 0.8675952609670189 }, { "epoch": 0.6911455242000756, "grad_norm": 0.5200784206390381, "learning_rate": 4.599540572360009e-06, "loss": 0.3821989595890045, "step": 12809, "token_acc": 0.8670631731232921 }, { "epoch": 0.6911994820050721, "grad_norm": 0.41025686264038086, "learning_rate": 4.598069844337942e-06, "loss": 0.3551419675350189, "step": 12810, "token_acc": 0.8778273335457152 }, { "epoch": 0.6912534398100685, "grad_norm": 0.3230113387107849, "learning_rate": 4.596599281289402e-06, "loss": 0.3515357971191406, "step": 12811, "token_acc": 0.8803991446899501 }, { "epoch": 0.691307397615065, "grad_norm": 0.37642398476600647, "learning_rate": 4.595128883259306e-06, "loss": 0.28613391518592834, "step": 12812, "token_acc": 0.8902248594628357 }, { "epoch": 0.6913613554200615, "grad_norm": 0.4533900320529938, "learning_rate": 4.593658650292554e-06, "loss": 0.3276146352291107, "step": 12813, "token_acc": 0.8808251980106834 }, { "epoch": 0.691415313225058, "grad_norm": 0.34352511167526245, "learning_rate": 4.592188582434054e-06, "loss": 0.3568624258041382, "step": 12814, "token_acc": 0.8743281875357347 }, { "epoch": 0.6914692710300545, "grad_norm": 0.3363932967185974, "learning_rate": 4.590718679728697e-06, "loss": 0.3319411277770996, "step": 12815, "token_acc": 0.8772159428911362 }, { "epoch": 0.691523228835051, "grad_norm": 0.4185357987880707, "learning_rate": 4.589248942221376e-06, "loss": 0.33598265051841736, "step": 12816, "token_acc": 0.878763353836193 }, { "epoch": 0.6915771866400475, "grad_norm": 0.4643368422985077, "learning_rate": 4.587779369956974e-06, "loss": 0.3607144355773926, "step": 12817, "token_acc": 0.8764117457244273 }, { "epoch": 0.6916311444450439, "grad_norm": 0.39706215262413025, "learning_rate": 4.586309962980373e-06, "loss": 0.2730053663253784, "step": 12818, "token_acc": 0.8998147576412473 }, { "epoch": 0.6916851022500404, "grad_norm": 0.4064595401287079, "learning_rate": 4.5848407213364435e-06, "loss": 0.3143187463283539, "step": 12819, "token_acc": 0.8857647814910026 }, { "epoch": 0.6917390600550369, "grad_norm": 0.48627033829689026, "learning_rate": 4.583371645070063e-06, "loss": 0.39319998025894165, "step": 12820, "token_acc": 0.8667760160379078 }, { "epoch": 0.6917930178600334, "grad_norm": 0.3567085564136505, "learning_rate": 4.5819027342260935e-06, "loss": 0.31474220752716064, "step": 12821, "token_acc": 0.8879094855940007 }, { "epoch": 0.69184697566503, "grad_norm": 0.3206787109375, "learning_rate": 4.580433988849396e-06, "loss": 0.3519771993160248, "step": 12822, "token_acc": 0.877396705374021 }, { "epoch": 0.6919009334700265, "grad_norm": 0.33549830317497253, "learning_rate": 4.578965408984824e-06, "loss": 0.3493313193321228, "step": 12823, "token_acc": 0.8805464480874317 }, { "epoch": 0.691954891275023, "grad_norm": 0.43392810225486755, "learning_rate": 4.577496994677229e-06, "loss": 0.33901169896125793, "step": 12824, "token_acc": 0.879008929694378 }, { "epoch": 0.6920088490800195, "grad_norm": 0.3306651711463928, "learning_rate": 4.576028745971455e-06, "loss": 0.338783323764801, "step": 12825, "token_acc": 0.8834025910535321 }, { "epoch": 0.6920628068850159, "grad_norm": 0.4934113025665283, "learning_rate": 4.574560662912342e-06, "loss": 0.31934672594070435, "step": 12826, "token_acc": 0.8820154204769589 }, { "epoch": 0.6921167646900124, "grad_norm": 0.3743484616279602, "learning_rate": 4.573092745544724e-06, "loss": 0.3155273497104645, "step": 12827, "token_acc": 0.8830903790087463 }, { "epoch": 0.6921707224950089, "grad_norm": 0.5163752436637878, "learning_rate": 4.5716249939134335e-06, "loss": 0.36630210280418396, "step": 12828, "token_acc": 0.8754745634016705 }, { "epoch": 0.6922246803000054, "grad_norm": 0.38018473982810974, "learning_rate": 4.570157408063292e-06, "loss": 0.3167366683483124, "step": 12829, "token_acc": 0.8787330316742081 }, { "epoch": 0.6922786381050019, "grad_norm": 0.44209763407707214, "learning_rate": 4.568689988039123e-06, "loss": 0.390577495098114, "step": 12830, "token_acc": 0.862198559348575 }, { "epoch": 0.6923325959099984, "grad_norm": 0.3317655026912689, "learning_rate": 4.5672227338857375e-06, "loss": 0.34598731994628906, "step": 12831, "token_acc": 0.8776988438501184 }, { "epoch": 0.6923865537149949, "grad_norm": 0.3767574429512024, "learning_rate": 4.565755645647943e-06, "loss": 0.33085107803344727, "step": 12832, "token_acc": 0.8762995594713656 }, { "epoch": 0.6924405115199914, "grad_norm": 0.4502560496330261, "learning_rate": 4.564288723370553e-06, "loss": 0.34755462408065796, "step": 12833, "token_acc": 0.8800616505534539 }, { "epoch": 0.6924944693249878, "grad_norm": 0.4778221845626831, "learning_rate": 4.562821967098359e-06, "loss": 0.3487498164176941, "step": 12834, "token_acc": 0.8731656184486373 }, { "epoch": 0.6925484271299843, "grad_norm": 0.3249453604221344, "learning_rate": 4.561355376876159e-06, "loss": 0.33466455340385437, "step": 12835, "token_acc": 0.8778444611152788 }, { "epoch": 0.6926023849349808, "grad_norm": 0.4134264290332794, "learning_rate": 4.55988895274874e-06, "loss": 0.34884166717529297, "step": 12836, "token_acc": 0.8772563176895307 }, { "epoch": 0.6926563427399773, "grad_norm": 0.3300495147705078, "learning_rate": 4.558422694760888e-06, "loss": 0.3138846755027771, "step": 12837, "token_acc": 0.8842046872833171 }, { "epoch": 0.6927103005449738, "grad_norm": 0.35005903244018555, "learning_rate": 4.556956602957377e-06, "loss": 0.339312344789505, "step": 12838, "token_acc": 0.8770794824399261 }, { "epoch": 0.6927642583499704, "grad_norm": 0.31954142451286316, "learning_rate": 4.555490677382991e-06, "loss": 0.32017451524734497, "step": 12839, "token_acc": 0.8838946224877784 }, { "epoch": 0.6928182161549669, "grad_norm": 0.4667803645133972, "learning_rate": 4.5540249180824924e-06, "loss": 0.3314739465713501, "step": 12840, "token_acc": 0.8806797853309482 }, { "epoch": 0.6928721739599633, "grad_norm": 0.4044814705848694, "learning_rate": 4.552559325100646e-06, "loss": 0.3658367991447449, "step": 12841, "token_acc": 0.8750701852891634 }, { "epoch": 0.6929261317649598, "grad_norm": 0.335664302110672, "learning_rate": 4.551093898482211e-06, "loss": 0.28922712802886963, "step": 12842, "token_acc": 0.8923357664233577 }, { "epoch": 0.6929800895699563, "grad_norm": 0.3717261552810669, "learning_rate": 4.54962863827194e-06, "loss": 0.3296103775501251, "step": 12843, "token_acc": 0.8806903741457199 }, { "epoch": 0.6930340473749528, "grad_norm": 0.3990955054759979, "learning_rate": 4.548163544514583e-06, "loss": 0.2812010645866394, "step": 12844, "token_acc": 0.8983611984770733 }, { "epoch": 0.6930880051799493, "grad_norm": 0.4621858596801758, "learning_rate": 4.5466986172548845e-06, "loss": 0.3719688653945923, "step": 12845, "token_acc": 0.8683831990794016 }, { "epoch": 0.6931419629849458, "grad_norm": 0.3510797917842865, "learning_rate": 4.54523385653758e-06, "loss": 0.26137134432792664, "step": 12846, "token_acc": 0.9067976121702291 }, { "epoch": 0.6931959207899423, "grad_norm": 0.4181511402130127, "learning_rate": 4.543769262407406e-06, "loss": 0.3296021521091461, "step": 12847, "token_acc": 0.8779589689637033 }, { "epoch": 0.6932498785949388, "grad_norm": 0.3820086419582367, "learning_rate": 4.542304834909088e-06, "loss": 0.2870076596736908, "step": 12848, "token_acc": 0.8945617402431222 }, { "epoch": 0.6933038363999352, "grad_norm": 0.3592704236507416, "learning_rate": 4.540840574087351e-06, "loss": 0.35459405183792114, "step": 12849, "token_acc": 0.8739946380697051 }, { "epoch": 0.6933577942049317, "grad_norm": 0.3834002614021301, "learning_rate": 4.539376479986914e-06, "loss": 0.3295614421367645, "step": 12850, "token_acc": 0.8838134265397132 }, { "epoch": 0.6934117520099282, "grad_norm": 0.49685391783714294, "learning_rate": 4.537912552652484e-06, "loss": 0.37303197383880615, "step": 12851, "token_acc": 0.8702277294038848 }, { "epoch": 0.6934657098149247, "grad_norm": 0.38079291582107544, "learning_rate": 4.536448792128777e-06, "loss": 0.3485415577888489, "step": 12852, "token_acc": 0.8776978417266187 }, { "epoch": 0.6935196676199212, "grad_norm": 0.3978826403617859, "learning_rate": 4.534985198460493e-06, "loss": 0.321635901927948, "step": 12853, "token_acc": 0.8828764173962969 }, { "epoch": 0.6935736254249177, "grad_norm": 0.5007684826850891, "learning_rate": 4.533521771692329e-06, "loss": 0.33170875906944275, "step": 12854, "token_acc": 0.8785819250551066 }, { "epoch": 0.6936275832299142, "grad_norm": 0.4068934917449951, "learning_rate": 4.53205851186898e-06, "loss": 0.33081066608428955, "step": 12855, "token_acc": 0.8850108225108225 }, { "epoch": 0.6936815410349108, "grad_norm": 0.46343687176704407, "learning_rate": 4.53059541903513e-06, "loss": 0.3936516344547272, "step": 12856, "token_acc": 0.8591322192905306 }, { "epoch": 0.6937354988399071, "grad_norm": 0.3499738872051239, "learning_rate": 4.529132493235463e-06, "loss": 0.3556644320487976, "step": 12857, "token_acc": 0.8761329305135952 }, { "epoch": 0.6937894566449037, "grad_norm": 0.41263291239738464, "learning_rate": 4.527669734514657e-06, "loss": 0.3600495755672455, "step": 12858, "token_acc": 0.8764360479959152 }, { "epoch": 0.6938434144499002, "grad_norm": 0.34207552671432495, "learning_rate": 4.526207142917384e-06, "loss": 0.30991455912590027, "step": 12859, "token_acc": 0.8892768079800498 }, { "epoch": 0.6938973722548967, "grad_norm": 0.4957335293292999, "learning_rate": 4.52474471848831e-06, "loss": 0.3528996706008911, "step": 12860, "token_acc": 0.875 }, { "epoch": 0.6939513300598932, "grad_norm": 0.4267198443412781, "learning_rate": 4.5232824612720985e-06, "loss": 0.3428306579589844, "step": 12861, "token_acc": 0.8809824814881705 }, { "epoch": 0.6940052878648897, "grad_norm": 0.4373832643032074, "learning_rate": 4.5218203713134055e-06, "loss": 0.37699782848358154, "step": 12862, "token_acc": 0.8645506792058516 }, { "epoch": 0.6940592456698862, "grad_norm": 0.36237624287605286, "learning_rate": 4.520358448656884e-06, "loss": 0.3680941164493561, "step": 12863, "token_acc": 0.8699241786015164 }, { "epoch": 0.6941132034748826, "grad_norm": 0.4463076889514923, "learning_rate": 4.518896693347177e-06, "loss": 0.31067201495170593, "step": 12864, "token_acc": 0.8824488317333816 }, { "epoch": 0.6941671612798791, "grad_norm": 0.42264509201049805, "learning_rate": 4.5174351054289315e-06, "loss": 0.368277370929718, "step": 12865, "token_acc": 0.8732602901984009 }, { "epoch": 0.6942211190848756, "grad_norm": 0.5121626853942871, "learning_rate": 4.5159736849467815e-06, "loss": 0.3396850526332855, "step": 12866, "token_acc": 0.8791545659890287 }, { "epoch": 0.6942750768898721, "grad_norm": 0.4631766676902771, "learning_rate": 4.5145124319453595e-06, "loss": 0.3098791837692261, "step": 12867, "token_acc": 0.8896918980600989 }, { "epoch": 0.6943290346948686, "grad_norm": 0.4985593557357788, "learning_rate": 4.5130513464692905e-06, "loss": 0.3100336790084839, "step": 12868, "token_acc": 0.886864336591022 }, { "epoch": 0.6943829924998651, "grad_norm": 0.43897759914398193, "learning_rate": 4.511590428563196e-06, "loss": 0.3630979657173157, "step": 12869, "token_acc": 0.8699021207177814 }, { "epoch": 0.6944369503048616, "grad_norm": 0.37102949619293213, "learning_rate": 4.510129678271692e-06, "loss": 0.345261812210083, "step": 12870, "token_acc": 0.8731787659951856 }, { "epoch": 0.6944909081098581, "grad_norm": 0.4034498631954193, "learning_rate": 4.508669095639385e-06, "loss": 0.3625969886779785, "step": 12871, "token_acc": 0.8705743509047994 }, { "epoch": 0.6945448659148545, "grad_norm": 0.3455018997192383, "learning_rate": 4.507208680710891e-06, "loss": 0.3495044708251953, "step": 12872, "token_acc": 0.8736147428713734 }, { "epoch": 0.694598823719851, "grad_norm": 0.3342286944389343, "learning_rate": 4.505748433530802e-06, "loss": 0.35885921120643616, "step": 12873, "token_acc": 0.8734070690069728 }, { "epoch": 0.6946527815248476, "grad_norm": 0.4055714011192322, "learning_rate": 4.504288354143719e-06, "loss": 0.29721593856811523, "step": 12874, "token_acc": 0.8905357793706719 }, { "epoch": 0.6947067393298441, "grad_norm": 0.42629891633987427, "learning_rate": 4.502828442594229e-06, "loss": 0.3644651770591736, "step": 12875, "token_acc": 0.8741496598639455 }, { "epoch": 0.6947606971348406, "grad_norm": 0.3755702078342438, "learning_rate": 4.501368698926919e-06, "loss": 0.3568006455898285, "step": 12876, "token_acc": 0.8740500985083028 }, { "epoch": 0.6948146549398371, "grad_norm": 0.28133726119995117, "learning_rate": 4.499909123186368e-06, "loss": 0.30241408944129944, "step": 12877, "token_acc": 0.8953667603590534 }, { "epoch": 0.6948686127448336, "grad_norm": 0.3772656321525574, "learning_rate": 4.498449715417151e-06, "loss": 0.40016812086105347, "step": 12878, "token_acc": 0.8594329820394835 }, { "epoch": 0.6949225705498301, "grad_norm": 0.40476757287979126, "learning_rate": 4.49699047566384e-06, "loss": 0.35753825306892395, "step": 12879, "token_acc": 0.8744071954210957 }, { "epoch": 0.6949765283548265, "grad_norm": 0.5090299248695374, "learning_rate": 4.495531403970996e-06, "loss": 0.3685234785079956, "step": 12880, "token_acc": 0.8734035402195832 }, { "epoch": 0.695030486159823, "grad_norm": 0.5099475979804993, "learning_rate": 4.494072500383183e-06, "loss": 0.4197731912136078, "step": 12881, "token_acc": 0.8540392661731574 }, { "epoch": 0.6950844439648195, "grad_norm": 0.3832170069217682, "learning_rate": 4.492613764944952e-06, "loss": 0.3455277383327484, "step": 12882, "token_acc": 0.8735336654346778 }, { "epoch": 0.695138401769816, "grad_norm": 0.42103686928749084, "learning_rate": 4.4911551977008515e-06, "loss": 0.31252437829971313, "step": 12883, "token_acc": 0.8886827458256029 }, { "epoch": 0.6951923595748125, "grad_norm": 0.3796919286251068, "learning_rate": 4.489696798695431e-06, "loss": 0.27719444036483765, "step": 12884, "token_acc": 0.9017100158068688 }, { "epoch": 0.695246317379809, "grad_norm": 0.3876904249191284, "learning_rate": 4.488238567973227e-06, "loss": 0.3384546935558319, "step": 12885, "token_acc": 0.8776180056267584 }, { "epoch": 0.6953002751848055, "grad_norm": 0.42514508962631226, "learning_rate": 4.486780505578774e-06, "loss": 0.314050555229187, "step": 12886, "token_acc": 0.886805436091519 }, { "epoch": 0.6953542329898019, "grad_norm": 0.27509650588035583, "learning_rate": 4.485322611556598e-06, "loss": 0.3262893259525299, "step": 12887, "token_acc": 0.8846913255232328 }, { "epoch": 0.6954081907947984, "grad_norm": 0.3275017738342285, "learning_rate": 4.4838648859512264e-06, "loss": 0.33728599548339844, "step": 12888, "token_acc": 0.877658274260572 }, { "epoch": 0.6954621485997949, "grad_norm": 0.36742764711380005, "learning_rate": 4.482407328807177e-06, "loss": 0.3137134909629822, "step": 12889, "token_acc": 0.8902866035533653 }, { "epoch": 0.6955161064047914, "grad_norm": 0.34635502099990845, "learning_rate": 4.480949940168957e-06, "loss": 0.30348095297813416, "step": 12890, "token_acc": 0.8894566513370805 }, { "epoch": 0.695570064209788, "grad_norm": 0.4844992160797119, "learning_rate": 4.479492720081085e-06, "loss": 0.3381744623184204, "step": 12891, "token_acc": 0.8808634288086343 }, { "epoch": 0.6956240220147845, "grad_norm": 0.4091266095638275, "learning_rate": 4.4780356685880615e-06, "loss": 0.33129310607910156, "step": 12892, "token_acc": 0.8806670140698281 }, { "epoch": 0.695677979819781, "grad_norm": 0.37481409311294556, "learning_rate": 4.476578785734379e-06, "loss": 0.32018381357192993, "step": 12893, "token_acc": 0.8828198573927082 }, { "epoch": 0.6957319376247775, "grad_norm": 0.42837825417518616, "learning_rate": 4.475122071564534e-06, "loss": 0.40880948305130005, "step": 12894, "token_acc": 0.8589904570567554 }, { "epoch": 0.6957858954297739, "grad_norm": 0.368195116519928, "learning_rate": 4.473665526123012e-06, "loss": 0.3108974099159241, "step": 12895, "token_acc": 0.8844914864242982 }, { "epoch": 0.6958398532347704, "grad_norm": 0.3372773826122284, "learning_rate": 4.472209149454295e-06, "loss": 0.29253536462783813, "step": 12896, "token_acc": 0.8939544103072349 }, { "epoch": 0.6958938110397669, "grad_norm": 0.41087114810943604, "learning_rate": 4.470752941602866e-06, "loss": 0.333084374666214, "step": 12897, "token_acc": 0.8819694868238558 }, { "epoch": 0.6959477688447634, "grad_norm": 0.37632277607917786, "learning_rate": 4.469296902613194e-06, "loss": 0.3408195674419403, "step": 12898, "token_acc": 0.8808651026392962 }, { "epoch": 0.6960017266497599, "grad_norm": 0.38280272483825684, "learning_rate": 4.467841032529745e-06, "loss": 0.32957035303115845, "step": 12899, "token_acc": 0.8835793835793836 }, { "epoch": 0.6960556844547564, "grad_norm": 0.4566611051559448, "learning_rate": 4.466385331396982e-06, "loss": 0.3321118950843811, "step": 12900, "token_acc": 0.8823754301006754 }, { "epoch": 0.6961096422597529, "grad_norm": 0.41588959097862244, "learning_rate": 4.464929799259361e-06, "loss": 0.28186923265457153, "step": 12901, "token_acc": 0.8952246625025186 }, { "epoch": 0.6961636000647493, "grad_norm": 0.4779908061027527, "learning_rate": 4.463474436161333e-06, "loss": 0.3270367980003357, "step": 12902, "token_acc": 0.8814872192099148 }, { "epoch": 0.6962175578697458, "grad_norm": 0.39579910039901733, "learning_rate": 4.462019242147343e-06, "loss": 0.32031431794166565, "step": 12903, "token_acc": 0.8846294267981015 }, { "epoch": 0.6962715156747423, "grad_norm": 0.3488208055496216, "learning_rate": 4.460564217261839e-06, "loss": 0.38999176025390625, "step": 12904, "token_acc": 0.8667075225984373 }, { "epoch": 0.6963254734797388, "grad_norm": 0.45218291878700256, "learning_rate": 4.459109361549251e-06, "loss": 0.3033152222633362, "step": 12905, "token_acc": 0.8922258452033637 }, { "epoch": 0.6963794312847353, "grad_norm": 0.5040723085403442, "learning_rate": 4.4576546750540125e-06, "loss": 0.355721652507782, "step": 12906, "token_acc": 0.8758414360508602 }, { "epoch": 0.6964333890897318, "grad_norm": 0.47639063000679016, "learning_rate": 4.456200157820548e-06, "loss": 0.3104434907436371, "step": 12907, "token_acc": 0.8844784620861517 }, { "epoch": 0.6964873468947284, "grad_norm": 0.36068490147590637, "learning_rate": 4.454745809893278e-06, "loss": 0.395680695772171, "step": 12908, "token_acc": 0.8683691236215902 }, { "epoch": 0.6965413046997249, "grad_norm": 0.3409993648529053, "learning_rate": 4.45329163131662e-06, "loss": 0.34339866042137146, "step": 12909, "token_acc": 0.8753447324875896 }, { "epoch": 0.6965952625047213, "grad_norm": 0.4442470967769623, "learning_rate": 4.451837622134981e-06, "loss": 0.3662959933280945, "step": 12910, "token_acc": 0.8683211282885815 }, { "epoch": 0.6966492203097178, "grad_norm": 0.4954547882080078, "learning_rate": 4.450383782392769e-06, "loss": 0.3331325948238373, "step": 12911, "token_acc": 0.878702163061564 }, { "epoch": 0.6967031781147143, "grad_norm": 0.4650729298591614, "learning_rate": 4.448930112134381e-06, "loss": 0.29220378398895264, "step": 12912, "token_acc": 0.8969292389853137 }, { "epoch": 0.6967571359197108, "grad_norm": 0.3726290762424469, "learning_rate": 4.447476611404214e-06, "loss": 0.3591926097869873, "step": 12913, "token_acc": 0.8714737674663855 }, { "epoch": 0.6968110937247073, "grad_norm": 0.2979402542114258, "learning_rate": 4.446023280246655e-06, "loss": 0.351725697517395, "step": 12914, "token_acc": 0.877044909867667 }, { "epoch": 0.6968650515297038, "grad_norm": 0.3627017140388489, "learning_rate": 4.444570118706092e-06, "loss": 0.30329111218452454, "step": 12915, "token_acc": 0.8906267709395898 }, { "epoch": 0.6969190093347003, "grad_norm": 0.38726210594177246, "learning_rate": 4.443117126826898e-06, "loss": 0.33090513944625854, "step": 12916, "token_acc": 0.884503127171647 }, { "epoch": 0.6969729671396968, "grad_norm": 0.44993075728416443, "learning_rate": 4.441664304653456e-06, "loss": 0.3334392011165619, "step": 12917, "token_acc": 0.8826761473820297 }, { "epoch": 0.6970269249446932, "grad_norm": 0.3603866994380951, "learning_rate": 4.4402116522301285e-06, "loss": 0.3768230378627777, "step": 12918, "token_acc": 0.8640776699029126 }, { "epoch": 0.6970808827496897, "grad_norm": 0.4644535779953003, "learning_rate": 4.438759169601281e-06, "loss": 0.376213014125824, "step": 12919, "token_acc": 0.8659255516254001 }, { "epoch": 0.6971348405546862, "grad_norm": 0.3069857954978943, "learning_rate": 4.437306856811272e-06, "loss": 0.30422985553741455, "step": 12920, "token_acc": 0.8960119269474469 }, { "epoch": 0.6971887983596827, "grad_norm": 0.39823952317237854, "learning_rate": 4.435854713904454e-06, "loss": 0.35623323917388916, "step": 12921, "token_acc": 0.8752396603670227 }, { "epoch": 0.6972427561646792, "grad_norm": 0.4181325435638428, "learning_rate": 4.434402740925172e-06, "loss": 0.3513464331626892, "step": 12922, "token_acc": 0.8756174225415357 }, { "epoch": 0.6972967139696757, "grad_norm": 0.43482157588005066, "learning_rate": 4.432950937917776e-06, "loss": 0.3577163815498352, "step": 12923, "token_acc": 0.8698378709085347 }, { "epoch": 0.6973506717746722, "grad_norm": 0.3992227613925934, "learning_rate": 4.4314993049266e-06, "loss": 0.35498368740081787, "step": 12924, "token_acc": 0.8781576782986723 }, { "epoch": 0.6974046295796686, "grad_norm": 0.4761808514595032, "learning_rate": 4.430047841995976e-06, "loss": 0.3647541105747223, "step": 12925, "token_acc": 0.8726937269372693 }, { "epoch": 0.6974585873846652, "grad_norm": 0.48934587836265564, "learning_rate": 4.428596549170232e-06, "loss": 0.3668030798435211, "step": 12926, "token_acc": 0.8731552162849873 }, { "epoch": 0.6975125451896617, "grad_norm": 0.48927292227745056, "learning_rate": 4.427145426493694e-06, "loss": 0.3453979790210724, "step": 12927, "token_acc": 0.876030707989764 }, { "epoch": 0.6975665029946582, "grad_norm": 0.4777621924877167, "learning_rate": 4.425694474010671e-06, "loss": 0.31735625863075256, "step": 12928, "token_acc": 0.8819512195121951 }, { "epoch": 0.6976204607996547, "grad_norm": 0.38968709111213684, "learning_rate": 4.424243691765474e-06, "loss": 0.36965295672416687, "step": 12929, "token_acc": 0.8729092404716206 }, { "epoch": 0.6976744186046512, "grad_norm": 0.39297419786453247, "learning_rate": 4.422793079802419e-06, "loss": 0.3021509349346161, "step": 12930, "token_acc": 0.8866987881442546 }, { "epoch": 0.6977283764096477, "grad_norm": 0.39444512128829956, "learning_rate": 4.421342638165801e-06, "loss": 0.37697285413742065, "step": 12931, "token_acc": 0.8728837080210935 }, { "epoch": 0.6977823342146442, "grad_norm": 0.4184038043022156, "learning_rate": 4.419892366899919e-06, "loss": 0.35312536358833313, "step": 12932, "token_acc": 0.8795695521443266 }, { "epoch": 0.6978362920196406, "grad_norm": 0.44101443886756897, "learning_rate": 4.418442266049061e-06, "loss": 0.3155306577682495, "step": 12933, "token_acc": 0.8811377245508982 }, { "epoch": 0.6978902498246371, "grad_norm": 0.3646237552165985, "learning_rate": 4.416992335657514e-06, "loss": 0.36103200912475586, "step": 12934, "token_acc": 0.873452380952381 }, { "epoch": 0.6979442076296336, "grad_norm": 0.41200876235961914, "learning_rate": 4.415542575769556e-06, "loss": 0.37378817796707153, "step": 12935, "token_acc": 0.8697541966426858 }, { "epoch": 0.6979981654346301, "grad_norm": 0.36590129137039185, "learning_rate": 4.4140929864294664e-06, "loss": 0.33801883459091187, "step": 12936, "token_acc": 0.8827838827838828 }, { "epoch": 0.6980521232396266, "grad_norm": 0.42413780093193054, "learning_rate": 4.412643567681516e-06, "loss": 0.3809150755405426, "step": 12937, "token_acc": 0.8698961225959067 }, { "epoch": 0.6981060810446231, "grad_norm": 0.4228978157043457, "learning_rate": 4.4111943195699656e-06, "loss": 0.33283287286758423, "step": 12938, "token_acc": 0.8817219679633868 }, { "epoch": 0.6981600388496196, "grad_norm": 0.3671632707118988, "learning_rate": 4.409745242139078e-06, "loss": 0.3637654185295105, "step": 12939, "token_acc": 0.8736185578290842 }, { "epoch": 0.6982139966546161, "grad_norm": 0.48896756768226624, "learning_rate": 4.4082963354331055e-06, "loss": 0.2927487790584564, "step": 12940, "token_acc": 0.8951747088186356 }, { "epoch": 0.6982679544596125, "grad_norm": 0.3985111117362976, "learning_rate": 4.406847599496298e-06, "loss": 0.33062952756881714, "step": 12941, "token_acc": 0.8805209513023783 }, { "epoch": 0.698321912264609, "grad_norm": 0.4143427610397339, "learning_rate": 4.4053990343729e-06, "loss": 0.2718980312347412, "step": 12942, "token_acc": 0.9006222222222222 }, { "epoch": 0.6983758700696056, "grad_norm": 0.4693854749202728, "learning_rate": 4.40395064010715e-06, "loss": 0.3950187563896179, "step": 12943, "token_acc": 0.8610579987253028 }, { "epoch": 0.6984298278746021, "grad_norm": 0.4129980504512787, "learning_rate": 4.402502416743281e-06, "loss": 0.39343342185020447, "step": 12944, "token_acc": 0.8638928067700987 }, { "epoch": 0.6984837856795986, "grad_norm": 0.3911221921443939, "learning_rate": 4.4010543643255225e-06, "loss": 0.33096393942832947, "step": 12945, "token_acc": 0.8792773063235697 }, { "epoch": 0.6985377434845951, "grad_norm": 0.4183947443962097, "learning_rate": 4.399606482898097e-06, "loss": 0.3533959984779358, "step": 12946, "token_acc": 0.8755602031670152 }, { "epoch": 0.6985917012895916, "grad_norm": 0.49652552604675293, "learning_rate": 4.398158772505221e-06, "loss": 0.3219086527824402, "step": 12947, "token_acc": 0.8794591953086822 }, { "epoch": 0.698645659094588, "grad_norm": 0.335602730512619, "learning_rate": 4.396711233191107e-06, "loss": 0.3270936608314514, "step": 12948, "token_acc": 0.8826718494271686 }, { "epoch": 0.6986996168995845, "grad_norm": 0.37974244356155396, "learning_rate": 4.395263864999966e-06, "loss": 0.2719334065914154, "step": 12949, "token_acc": 0.8979863784424045 }, { "epoch": 0.698753574704581, "grad_norm": 0.49585896730422974, "learning_rate": 4.3938166679759986e-06, "loss": 0.35092753171920776, "step": 12950, "token_acc": 0.8798840048840049 }, { "epoch": 0.6988075325095775, "grad_norm": 0.3334687352180481, "learning_rate": 4.392369642163402e-06, "loss": 0.33057644963264465, "step": 12951, "token_acc": 0.8855470095281828 }, { "epoch": 0.698861490314574, "grad_norm": 0.49398908019065857, "learning_rate": 4.3909227876063675e-06, "loss": 0.32526832818984985, "step": 12952, "token_acc": 0.8810627674750356 }, { "epoch": 0.6989154481195705, "grad_norm": 0.42126840353012085, "learning_rate": 4.3894761043490815e-06, "loss": 0.385367214679718, "step": 12953, "token_acc": 0.8655417168876389 }, { "epoch": 0.698969405924567, "grad_norm": 0.40236353874206543, "learning_rate": 4.388029592435722e-06, "loss": 0.32243669033050537, "step": 12954, "token_acc": 0.8838390589493383 }, { "epoch": 0.6990233637295635, "grad_norm": 0.3829515874385834, "learning_rate": 4.386583251910472e-06, "loss": 0.3448518216609955, "step": 12955, "token_acc": 0.8771673281804888 }, { "epoch": 0.6990773215345599, "grad_norm": 0.25803324580192566, "learning_rate": 4.3851370828175e-06, "loss": 0.28051361441612244, "step": 12956, "token_acc": 0.9014334794455633 }, { "epoch": 0.6991312793395564, "grad_norm": 0.40991106629371643, "learning_rate": 4.383691085200969e-06, "loss": 0.34893181920051575, "step": 12957, "token_acc": 0.8691036151054966 }, { "epoch": 0.6991852371445529, "grad_norm": 0.40673333406448364, "learning_rate": 4.382245259105042e-06, "loss": 0.3770042657852173, "step": 12958, "token_acc": 0.8646732429099877 }, { "epoch": 0.6992391949495494, "grad_norm": 0.4070673882961273, "learning_rate": 4.380799604573874e-06, "loss": 0.3358188271522522, "step": 12959, "token_acc": 0.8782475019215987 }, { "epoch": 0.699293152754546, "grad_norm": 0.3139205276966095, "learning_rate": 4.379354121651612e-06, "loss": 0.30802902579307556, "step": 12960, "token_acc": 0.8895356528641355 }, { "epoch": 0.6993471105595425, "grad_norm": 0.37189987301826477, "learning_rate": 4.3779088103824045e-06, "loss": 0.3007451295852661, "step": 12961, "token_acc": 0.889627210271427 }, { "epoch": 0.699401068364539, "grad_norm": 0.47464847564697266, "learning_rate": 4.376463670810388e-06, "loss": 0.36619144678115845, "step": 12962, "token_acc": 0.8720072278271345 }, { "epoch": 0.6994550261695355, "grad_norm": 0.4593079090118408, "learning_rate": 4.375018702979698e-06, "loss": 0.3327059745788574, "step": 12963, "token_acc": 0.8834480600750939 }, { "epoch": 0.6995089839745319, "grad_norm": 0.3647128641605377, "learning_rate": 4.373573906934463e-06, "loss": 0.3129993677139282, "step": 12964, "token_acc": 0.8872439893143366 }, { "epoch": 0.6995629417795284, "grad_norm": 0.3911338448524475, "learning_rate": 4.3721292827188076e-06, "loss": 0.3514014184474945, "step": 12965, "token_acc": 0.8771127736215018 }, { "epoch": 0.6996168995845249, "grad_norm": 0.37041357159614563, "learning_rate": 4.370684830376849e-06, "loss": 0.3354784846305847, "step": 12966, "token_acc": 0.8821986094713368 }, { "epoch": 0.6996708573895214, "grad_norm": 0.35916081070899963, "learning_rate": 4.369240549952698e-06, "loss": 0.3423588275909424, "step": 12967, "token_acc": 0.8804000851244945 }, { "epoch": 0.6997248151945179, "grad_norm": 0.339224636554718, "learning_rate": 4.367796441490469e-06, "loss": 0.343973308801651, "step": 12968, "token_acc": 0.8835197785722524 }, { "epoch": 0.6997787729995144, "grad_norm": 0.3976753056049347, "learning_rate": 4.366352505034262e-06, "loss": 0.263450026512146, "step": 12969, "token_acc": 0.903156146179402 }, { "epoch": 0.6998327308045109, "grad_norm": 0.45538488030433655, "learning_rate": 4.364908740628172e-06, "loss": 0.36535733938217163, "step": 12970, "token_acc": 0.8751280925193968 }, { "epoch": 0.6998866886095073, "grad_norm": 0.40394771099090576, "learning_rate": 4.363465148316294e-06, "loss": 0.3900151252746582, "step": 12971, "token_acc": 0.867247294800739 }, { "epoch": 0.6999406464145038, "grad_norm": 0.3695841133594513, "learning_rate": 4.362021728142713e-06, "loss": 0.3242558240890503, "step": 12972, "token_acc": 0.885201793721973 }, { "epoch": 0.6999946042195003, "grad_norm": 0.40982747077941895, "learning_rate": 4.360578480151513e-06, "loss": 0.32311683893203735, "step": 12973, "token_acc": 0.8863125351320967 }, { "epoch": 0.7000485620244968, "grad_norm": 0.44043341279029846, "learning_rate": 4.359135404386764e-06, "loss": 0.3686612844467163, "step": 12974, "token_acc": 0.869615832363213 }, { "epoch": 0.7001025198294933, "grad_norm": 0.3945358693599701, "learning_rate": 4.35769250089255e-06, "loss": 0.3528861105442047, "step": 12975, "token_acc": 0.8751574527641708 }, { "epoch": 0.7001564776344898, "grad_norm": 0.4204568862915039, "learning_rate": 4.356249769712925e-06, "loss": 0.376563161611557, "step": 12976, "token_acc": 0.8631830873554048 }, { "epoch": 0.7002104354394864, "grad_norm": 0.40478515625, "learning_rate": 4.354807210891953e-06, "loss": 0.3705865740776062, "step": 12977, "token_acc": 0.8772125263027603 }, { "epoch": 0.7002643932444829, "grad_norm": 0.38881823420524597, "learning_rate": 4.3533648244736905e-06, "loss": 0.4143206477165222, "step": 12978, "token_acc": 0.8621896350608232 }, { "epoch": 0.7003183510494793, "grad_norm": 0.4546194076538086, "learning_rate": 4.351922610502186e-06, "loss": 0.3537431061267853, "step": 12979, "token_acc": 0.8740409207161125 }, { "epoch": 0.7003723088544758, "grad_norm": 0.41911160945892334, "learning_rate": 4.3504805690214835e-06, "loss": 0.3762211799621582, "step": 12980, "token_acc": 0.8686987104337632 }, { "epoch": 0.7004262666594723, "grad_norm": 0.3938799798488617, "learning_rate": 4.349038700075628e-06, "loss": 0.2948005795478821, "step": 12981, "token_acc": 0.8898829101434022 }, { "epoch": 0.7004802244644688, "grad_norm": 0.5017208456993103, "learning_rate": 4.347597003708649e-06, "loss": 0.3515770733356476, "step": 12982, "token_acc": 0.8696136701337296 }, { "epoch": 0.7005341822694653, "grad_norm": 0.40268418192863464, "learning_rate": 4.3461554799645765e-06, "loss": 0.35160163044929504, "step": 12983, "token_acc": 0.8740823491860836 }, { "epoch": 0.7005881400744618, "grad_norm": 0.3214668929576874, "learning_rate": 4.344714128887436e-06, "loss": 0.3551449775695801, "step": 12984, "token_acc": 0.8743084753263997 }, { "epoch": 0.7006420978794583, "grad_norm": 0.5250051021575928, "learning_rate": 4.343272950521244e-06, "loss": 0.34989455342292786, "step": 12985, "token_acc": 0.8782742681047766 }, { "epoch": 0.7006960556844548, "grad_norm": 0.4226832389831543, "learning_rate": 4.341831944910014e-06, "loss": 0.35068100690841675, "step": 12986, "token_acc": 0.8776680471487734 }, { "epoch": 0.7007500134894512, "grad_norm": 0.4269295334815979, "learning_rate": 4.34039111209775e-06, "loss": 0.31233280897140503, "step": 12987, "token_acc": 0.8881210924646266 }, { "epoch": 0.7008039712944477, "grad_norm": 0.3669895827770233, "learning_rate": 4.338950452128463e-06, "loss": 0.3319542109966278, "step": 12988, "token_acc": 0.8751759076836476 }, { "epoch": 0.7008579290994442, "grad_norm": 0.3638477921485901, "learning_rate": 4.337509965046146e-06, "loss": 0.3951806426048279, "step": 12989, "token_acc": 0.8633161744784611 }, { "epoch": 0.7009118869044407, "grad_norm": 0.4598572850227356, "learning_rate": 4.336069650894792e-06, "loss": 0.3560926914215088, "step": 12990, "token_acc": 0.8779129415213249 }, { "epoch": 0.7009658447094372, "grad_norm": 0.4518909156322479, "learning_rate": 4.334629509718387e-06, "loss": 0.39496833086013794, "step": 12991, "token_acc": 0.861706659094136 }, { "epoch": 0.7010198025144337, "grad_norm": 0.49091634154319763, "learning_rate": 4.333189541560912e-06, "loss": 0.33078986406326294, "step": 12992, "token_acc": 0.8845004269854825 }, { "epoch": 0.7010737603194303, "grad_norm": 0.48007315397262573, "learning_rate": 4.3317497464663435e-06, "loss": 0.32903075218200684, "step": 12993, "token_acc": 0.8893905191873589 }, { "epoch": 0.7011277181244266, "grad_norm": 0.3612505793571472, "learning_rate": 4.330310124478655e-06, "loss": 0.37001654505729675, "step": 12994, "token_acc": 0.8702934476751977 }, { "epoch": 0.7011816759294232, "grad_norm": 0.3656139671802521, "learning_rate": 4.328870675641808e-06, "loss": 0.3206747770309448, "step": 12995, "token_acc": 0.8848002652080226 }, { "epoch": 0.7012356337344197, "grad_norm": 0.36660444736480713, "learning_rate": 4.327431399999765e-06, "loss": 0.31301161646842957, "step": 12996, "token_acc": 0.8872763139354657 }, { "epoch": 0.7012895915394162, "grad_norm": 0.4753868877887726, "learning_rate": 4.3259922975964815e-06, "loss": 0.38082829117774963, "step": 12997, "token_acc": 0.8626766153357676 }, { "epoch": 0.7013435493444127, "grad_norm": 0.4357379972934723, "learning_rate": 4.3245533684759075e-06, "loss": 0.3741036057472229, "step": 12998, "token_acc": 0.8647391159853772 }, { "epoch": 0.7013975071494092, "grad_norm": 0.4057587683200836, "learning_rate": 4.323114612681982e-06, "loss": 0.3594222366809845, "step": 12999, "token_acc": 0.878895402653502 }, { "epoch": 0.7014514649544057, "grad_norm": 0.5066772103309631, "learning_rate": 4.321676030258654e-06, "loss": 0.3477182388305664, "step": 13000, "token_acc": 0.8739909700369407 }, { "epoch": 0.7015054227594022, "grad_norm": 0.3489379286766052, "learning_rate": 4.320237621249852e-06, "loss": 0.2966810166835785, "step": 13001, "token_acc": 0.8918767507002802 }, { "epoch": 0.7015593805643986, "grad_norm": 0.43341758847236633, "learning_rate": 4.318799385699505e-06, "loss": 0.33246394991874695, "step": 13002, "token_acc": 0.8783416039699056 }, { "epoch": 0.7016133383693951, "grad_norm": 0.40796390175819397, "learning_rate": 4.317361323651537e-06, "loss": 0.3820260167121887, "step": 13003, "token_acc": 0.8665391969407266 }, { "epoch": 0.7016672961743916, "grad_norm": 0.37234994769096375, "learning_rate": 4.315923435149865e-06, "loss": 0.3835643529891968, "step": 13004, "token_acc": 0.8686842447071048 }, { "epoch": 0.7017212539793881, "grad_norm": 0.4416543245315552, "learning_rate": 4.3144857202384015e-06, "loss": 0.3986671566963196, "step": 13005, "token_acc": 0.862799690641918 }, { "epoch": 0.7017752117843846, "grad_norm": 0.4161495864391327, "learning_rate": 4.313048178961052e-06, "loss": 0.3197459578514099, "step": 13006, "token_acc": 0.8821451509312781 }, { "epoch": 0.7018291695893811, "grad_norm": 0.4629823863506317, "learning_rate": 4.311610811361725e-06, "loss": 0.32186245918273926, "step": 13007, "token_acc": 0.8869696461512662 }, { "epoch": 0.7018831273943776, "grad_norm": 0.362908273935318, "learning_rate": 4.310173617484313e-06, "loss": 0.3037789463996887, "step": 13008, "token_acc": 0.8911323328785812 }, { "epoch": 0.7019370851993741, "grad_norm": 0.3959549069404602, "learning_rate": 4.3087365973727136e-06, "loss": 0.3489718437194824, "step": 13009, "token_acc": 0.8800505050505051 }, { "epoch": 0.7019910430043705, "grad_norm": 0.5153836607933044, "learning_rate": 4.3072997510708025e-06, "loss": 0.3565692901611328, "step": 13010, "token_acc": 0.8708827404479579 }, { "epoch": 0.702045000809367, "grad_norm": 0.4220113754272461, "learning_rate": 4.305863078622466e-06, "loss": 0.3549983501434326, "step": 13011, "token_acc": 0.8783185840707964 }, { "epoch": 0.7020989586143636, "grad_norm": 0.47004756331443787, "learning_rate": 4.304426580071576e-06, "loss": 0.4024938941001892, "step": 13012, "token_acc": 0.8667995095033721 }, { "epoch": 0.7021529164193601, "grad_norm": 0.4142943322658539, "learning_rate": 4.30299025546201e-06, "loss": 0.3349308967590332, "step": 13013, "token_acc": 0.8757396449704142 }, { "epoch": 0.7022068742243566, "grad_norm": 0.3780541718006134, "learning_rate": 4.3015541048376275e-06, "loss": 0.3301984667778015, "step": 13014, "token_acc": 0.8821454812637767 }, { "epoch": 0.7022608320293531, "grad_norm": 0.35526302456855774, "learning_rate": 4.300118128242291e-06, "loss": 0.30962204933166504, "step": 13015, "token_acc": 0.8861763333851655 }, { "epoch": 0.7023147898343496, "grad_norm": 0.43093162775039673, "learning_rate": 4.298682325719854e-06, "loss": 0.3803926408290863, "step": 13016, "token_acc": 0.8704144234380868 }, { "epoch": 0.702368747639346, "grad_norm": 0.4593546986579895, "learning_rate": 4.297246697314166e-06, "loss": 0.3380877375602722, "step": 13017, "token_acc": 0.8802058961160505 }, { "epoch": 0.7024227054443425, "grad_norm": 0.34832465648651123, "learning_rate": 4.295811243069068e-06, "loss": 0.3623224198818207, "step": 13018, "token_acc": 0.8713470319634703 }, { "epoch": 0.702476663249339, "grad_norm": 0.46238499879837036, "learning_rate": 4.294375963028398e-06, "loss": 0.3808334767818451, "step": 13019, "token_acc": 0.8689354275741711 }, { "epoch": 0.7025306210543355, "grad_norm": 0.37481510639190674, "learning_rate": 4.292940857235995e-06, "loss": 0.29973065853118896, "step": 13020, "token_acc": 0.8917573057266414 }, { "epoch": 0.702584578859332, "grad_norm": 0.38143783807754517, "learning_rate": 4.2915059257356825e-06, "loss": 0.2974398732185364, "step": 13021, "token_acc": 0.8921772516397376 }, { "epoch": 0.7026385366643285, "grad_norm": 0.3995629549026489, "learning_rate": 4.290071168571283e-06, "loss": 0.3775256276130676, "step": 13022, "token_acc": 0.8647984267453294 }, { "epoch": 0.702692494469325, "grad_norm": 0.43858078122138977, "learning_rate": 4.2886365857866166e-06, "loss": 0.3185449540615082, "step": 13023, "token_acc": 0.8892181766864239 }, { "epoch": 0.7027464522743215, "grad_norm": 0.4771212637424469, "learning_rate": 4.287202177425491e-06, "loss": 0.3588426113128662, "step": 13024, "token_acc": 0.8737933500178763 }, { "epoch": 0.7028004100793179, "grad_norm": 0.45802590250968933, "learning_rate": 4.285767943531715e-06, "loss": 0.3385826051235199, "step": 13025, "token_acc": 0.8727430555555555 }, { "epoch": 0.7028543678843144, "grad_norm": 0.4633828401565552, "learning_rate": 4.284333884149089e-06, "loss": 0.3573843836784363, "step": 13026, "token_acc": 0.8714090287277702 }, { "epoch": 0.7029083256893109, "grad_norm": 0.40265417098999023, "learning_rate": 4.282899999321408e-06, "loss": 0.34517794847488403, "step": 13027, "token_acc": 0.8834479487787527 }, { "epoch": 0.7029622834943074, "grad_norm": 0.4534818232059479, "learning_rate": 4.281466289092464e-06, "loss": 0.4401213228702545, "step": 13028, "token_acc": 0.8485017889087657 }, { "epoch": 0.703016241299304, "grad_norm": 0.37674006819725037, "learning_rate": 4.280032753506041e-06, "loss": 0.251709520816803, "step": 13029, "token_acc": 0.9039023551658931 }, { "epoch": 0.7030701991043005, "grad_norm": 0.40389835834503174, "learning_rate": 4.278599392605921e-06, "loss": 0.33334988355636597, "step": 13030, "token_acc": 0.8818400101099456 }, { "epoch": 0.703124156909297, "grad_norm": 0.392691969871521, "learning_rate": 4.2771662064358754e-06, "loss": 0.3108977675437927, "step": 13031, "token_acc": 0.8892333709131905 }, { "epoch": 0.7031781147142934, "grad_norm": 0.37770551443099976, "learning_rate": 4.275733195039671e-06, "loss": 0.31663331389427185, "step": 13032, "token_acc": 0.8882243487506646 }, { "epoch": 0.7032320725192899, "grad_norm": 0.4858543574810028, "learning_rate": 4.274300358461079e-06, "loss": 0.38002148270606995, "step": 13033, "token_acc": 0.874819197896121 }, { "epoch": 0.7032860303242864, "grad_norm": 0.370401531457901, "learning_rate": 4.272867696743854e-06, "loss": 0.39469629526138306, "step": 13034, "token_acc": 0.8617463617463618 }, { "epoch": 0.7033399881292829, "grad_norm": 0.37311065196990967, "learning_rate": 4.271435209931749e-06, "loss": 0.3095532953739166, "step": 13035, "token_acc": 0.8887778055486129 }, { "epoch": 0.7033939459342794, "grad_norm": 0.4305049180984497, "learning_rate": 4.270002898068513e-06, "loss": 0.3266162872314453, "step": 13036, "token_acc": 0.8809059547022648 }, { "epoch": 0.7034479037392759, "grad_norm": 0.440675288438797, "learning_rate": 4.268570761197887e-06, "loss": 0.37193751335144043, "step": 13037, "token_acc": 0.8674136321195145 }, { "epoch": 0.7035018615442724, "grad_norm": 0.45374423265457153, "learning_rate": 4.267138799363605e-06, "loss": 0.3333418071269989, "step": 13038, "token_acc": 0.8850771869639794 }, { "epoch": 0.7035558193492689, "grad_norm": 0.3464607894420624, "learning_rate": 4.265707012609405e-06, "loss": 0.27646440267562866, "step": 13039, "token_acc": 0.8983537730637499 }, { "epoch": 0.7036097771542653, "grad_norm": 0.4480151832103729, "learning_rate": 4.264275400979011e-06, "loss": 0.3200310170650482, "step": 13040, "token_acc": 0.8843719229146153 }, { "epoch": 0.7036637349592618, "grad_norm": 0.4528239369392395, "learning_rate": 4.2628439645161445e-06, "loss": 0.41173630952835083, "step": 13041, "token_acc": 0.8590499098015635 }, { "epoch": 0.7037176927642583, "grad_norm": 0.4585423171520233, "learning_rate": 4.2614127032645195e-06, "loss": 0.35280749201774597, "step": 13042, "token_acc": 0.8780792017461803 }, { "epoch": 0.7037716505692548, "grad_norm": 0.3519632816314697, "learning_rate": 4.2599816172678475e-06, "loss": 0.29286834597587585, "step": 13043, "token_acc": 0.8907222463453467 }, { "epoch": 0.7038256083742513, "grad_norm": 0.42450040578842163, "learning_rate": 4.258550706569834e-06, "loss": 0.31662309169769287, "step": 13044, "token_acc": 0.8886426592797784 }, { "epoch": 0.7038795661792479, "grad_norm": 0.529014527797699, "learning_rate": 4.2571199712141765e-06, "loss": 0.3530108630657196, "step": 13045, "token_acc": 0.866968155266629 }, { "epoch": 0.7039335239842444, "grad_norm": 0.4742780029773712, "learning_rate": 4.255689411244571e-06, "loss": 0.359835684299469, "step": 13046, "token_acc": 0.874075019137535 }, { "epoch": 0.7039874817892409, "grad_norm": 0.3206014335155487, "learning_rate": 4.254259026704707e-06, "loss": 0.33223167061805725, "step": 13047, "token_acc": 0.8836270190895742 }, { "epoch": 0.7040414395942373, "grad_norm": 0.4470093250274658, "learning_rate": 4.252828817638267e-06, "loss": 0.3216612935066223, "step": 13048, "token_acc": 0.8820194862710363 }, { "epoch": 0.7040953973992338, "grad_norm": 0.3720978796482086, "learning_rate": 4.251398784088928e-06, "loss": 0.3209683895111084, "step": 13049, "token_acc": 0.887910428984778 }, { "epoch": 0.7041493552042303, "grad_norm": 0.4722936153411865, "learning_rate": 4.249968926100366e-06, "loss": 0.36526787281036377, "step": 13050, "token_acc": 0.8698511595707857 }, { "epoch": 0.7042033130092268, "grad_norm": 0.4251425266265869, "learning_rate": 4.248539243716242e-06, "loss": 0.33068740367889404, "step": 13051, "token_acc": 0.8789218482601255 }, { "epoch": 0.7042572708142233, "grad_norm": 0.34432339668273926, "learning_rate": 4.2471097369802265e-06, "loss": 0.32931381464004517, "step": 13052, "token_acc": 0.88387690689111 }, { "epoch": 0.7043112286192198, "grad_norm": 0.43084636330604553, "learning_rate": 4.245680405935972e-06, "loss": 0.31213125586509705, "step": 13053, "token_acc": 0.8864168618266979 }, { "epoch": 0.7043651864242163, "grad_norm": 0.45480233430862427, "learning_rate": 4.2442512506271315e-06, "loss": 0.4234507083892822, "step": 13054, "token_acc": 0.8542044767090139 }, { "epoch": 0.7044191442292127, "grad_norm": 0.33880680799484253, "learning_rate": 4.24282227109735e-06, "loss": 0.33226126432418823, "step": 13055, "token_acc": 0.8845169114674573 }, { "epoch": 0.7044731020342092, "grad_norm": 0.47068968415260315, "learning_rate": 4.241393467390268e-06, "loss": 0.35584941506385803, "step": 13056, "token_acc": 0.8730709207560257 }, { "epoch": 0.7045270598392057, "grad_norm": 0.5018187761306763, "learning_rate": 4.23996483954952e-06, "loss": 0.34697699546813965, "step": 13057, "token_acc": 0.8767670048228837 }, { "epoch": 0.7045810176442022, "grad_norm": 0.3586467206478119, "learning_rate": 4.238536387618739e-06, "loss": 0.29924362897872925, "step": 13058, "token_acc": 0.890035472428249 }, { "epoch": 0.7046349754491987, "grad_norm": 0.3855144679546356, "learning_rate": 4.237108111641546e-06, "loss": 0.33219006657600403, "step": 13059, "token_acc": 0.8795625247068125 }, { "epoch": 0.7046889332541952, "grad_norm": 0.3208020329475403, "learning_rate": 4.235680011661562e-06, "loss": 0.3267611861228943, "step": 13060, "token_acc": 0.883138739875881 }, { "epoch": 0.7047428910591917, "grad_norm": 0.4090462923049927, "learning_rate": 4.2342520877224e-06, "loss": 0.3155580461025238, "step": 13061, "token_acc": 0.8882170771272674 }, { "epoch": 0.7047968488641883, "grad_norm": 0.3365843892097473, "learning_rate": 4.232824339867668e-06, "loss": 0.3561525046825409, "step": 13062, "token_acc": 0.8730361476561502 }, { "epoch": 0.7048508066691846, "grad_norm": 0.4333798885345459, "learning_rate": 4.231396768140972e-06, "loss": 0.35103315114974976, "step": 13063, "token_acc": 0.8729818780889621 }, { "epoch": 0.7049047644741812, "grad_norm": 0.3566727936267853, "learning_rate": 4.2299693725859024e-06, "loss": 0.3320489525794983, "step": 13064, "token_acc": 0.8774115145070637 }, { "epoch": 0.7049587222791777, "grad_norm": 0.4442647099494934, "learning_rate": 4.22854215324606e-06, "loss": 0.36438077688217163, "step": 13065, "token_acc": 0.8705803869246164 }, { "epoch": 0.7050126800841742, "grad_norm": 0.41377997398376465, "learning_rate": 4.227115110165029e-06, "loss": 0.34402966499328613, "step": 13066, "token_acc": 0.875 }, { "epoch": 0.7050666378891707, "grad_norm": 0.41090843081474304, "learning_rate": 4.22568824338639e-06, "loss": 0.3223613202571869, "step": 13067, "token_acc": 0.883819747288382 }, { "epoch": 0.7051205956941672, "grad_norm": 0.41352611780166626, "learning_rate": 4.22426155295372e-06, "loss": 0.35824644565582275, "step": 13068, "token_acc": 0.8749460974557999 }, { "epoch": 0.7051745534991637, "grad_norm": 0.3909999430179596, "learning_rate": 4.222835038910589e-06, "loss": 0.3599608540534973, "step": 13069, "token_acc": 0.8722800378429517 }, { "epoch": 0.7052285113041602, "grad_norm": 0.3236851394176483, "learning_rate": 4.221408701300558e-06, "loss": 0.3131633996963501, "step": 13070, "token_acc": 0.8889580738895807 }, { "epoch": 0.7052824691091566, "grad_norm": 0.4025115966796875, "learning_rate": 4.219982540167196e-06, "loss": 0.3151872754096985, "step": 13071, "token_acc": 0.8855325914149443 }, { "epoch": 0.7053364269141531, "grad_norm": 0.502065896987915, "learning_rate": 4.218556555554052e-06, "loss": 0.3462179899215698, "step": 13072, "token_acc": 0.8799351797289334 }, { "epoch": 0.7053903847191496, "grad_norm": 0.43045762181282043, "learning_rate": 4.217130747504677e-06, "loss": 0.3723199665546417, "step": 13073, "token_acc": 0.8645848119233499 }, { "epoch": 0.7054443425241461, "grad_norm": 0.49599266052246094, "learning_rate": 4.215705116062614e-06, "loss": 0.3244926929473877, "step": 13074, "token_acc": 0.885647265796804 }, { "epoch": 0.7054983003291426, "grad_norm": 0.3373919725418091, "learning_rate": 4.214279661271402e-06, "loss": 0.2810870110988617, "step": 13075, "token_acc": 0.8959930144561948 }, { "epoch": 0.7055522581341391, "grad_norm": 0.4626137316226959, "learning_rate": 4.212854383174573e-06, "loss": 0.3577274680137634, "step": 13076, "token_acc": 0.8668217505088689 }, { "epoch": 0.7056062159391356, "grad_norm": 0.46641474962234497, "learning_rate": 4.211429281815655e-06, "loss": 0.36153605580329895, "step": 13077, "token_acc": 0.8686625545358808 }, { "epoch": 0.705660173744132, "grad_norm": 0.37368229031562805, "learning_rate": 4.210004357238171e-06, "loss": 0.3456723093986511, "step": 13078, "token_acc": 0.8782000527843759 }, { "epoch": 0.7057141315491285, "grad_norm": 0.40787243843078613, "learning_rate": 4.208579609485636e-06, "loss": 0.39715802669525146, "step": 13079, "token_acc": 0.8649320083682008 }, { "epoch": 0.705768089354125, "grad_norm": 0.47443893551826477, "learning_rate": 4.2071550386015635e-06, "loss": 0.3777432143688202, "step": 13080, "token_acc": 0.8660214728564948 }, { "epoch": 0.7058220471591216, "grad_norm": 0.3516961634159088, "learning_rate": 4.205730644629457e-06, "loss": 0.28428035974502563, "step": 13081, "token_acc": 0.8984375 }, { "epoch": 0.7058760049641181, "grad_norm": 0.4375705420970917, "learning_rate": 4.204306427612821e-06, "loss": 0.3303990364074707, "step": 13082, "token_acc": 0.8825448613376835 }, { "epoch": 0.7059299627691146, "grad_norm": 0.436811625957489, "learning_rate": 4.202882387595144e-06, "loss": 0.3181019723415375, "step": 13083, "token_acc": 0.885401096163428 }, { "epoch": 0.7059839205741111, "grad_norm": 0.349989116191864, "learning_rate": 4.201458524619923e-06, "loss": 0.31399422883987427, "step": 13084, "token_acc": 0.8829331602855289 }, { "epoch": 0.7060378783791076, "grad_norm": 0.47296422719955444, "learning_rate": 4.200034838730641e-06, "loss": 0.3177507221698761, "step": 13085, "token_acc": 0.8824753711669211 }, { "epoch": 0.706091836184104, "grad_norm": 0.4075208604335785, "learning_rate": 4.198611329970775e-06, "loss": 0.3338709771633148, "step": 13086, "token_acc": 0.88491523326321 }, { "epoch": 0.7061457939891005, "grad_norm": 0.5074401497840881, "learning_rate": 4.197187998383799e-06, "loss": 0.35112324357032776, "step": 13087, "token_acc": 0.873967684021544 }, { "epoch": 0.706199751794097, "grad_norm": 0.41116586327552795, "learning_rate": 4.195764844013181e-06, "loss": 0.3293503224849701, "step": 13088, "token_acc": 0.8786778060297857 }, { "epoch": 0.7062537095990935, "grad_norm": 0.3353692293167114, "learning_rate": 4.194341866902385e-06, "loss": 0.30393946170806885, "step": 13089, "token_acc": 0.8911484659692018 }, { "epoch": 0.70630766740409, "grad_norm": 0.4385901093482971, "learning_rate": 4.192919067094864e-06, "loss": 0.3474630117416382, "step": 13090, "token_acc": 0.8763717182942076 }, { "epoch": 0.7063616252090865, "grad_norm": 0.42993777990341187, "learning_rate": 4.191496444634077e-06, "loss": 0.3013187646865845, "step": 13091, "token_acc": 0.8918763203380066 }, { "epoch": 0.706415583014083, "grad_norm": 0.4526296555995941, "learning_rate": 4.19007399956347e-06, "loss": 0.38416147232055664, "step": 13092, "token_acc": 0.8683422320614663 }, { "epoch": 0.7064695408190795, "grad_norm": 0.40192148089408875, "learning_rate": 4.188651731926478e-06, "loss": 0.40033215284347534, "step": 13093, "token_acc": 0.861624792756026 }, { "epoch": 0.7065234986240759, "grad_norm": 0.4127173125743866, "learning_rate": 4.187229641766539e-06, "loss": 0.32865187525749207, "step": 13094, "token_acc": 0.883931175269758 }, { "epoch": 0.7065774564290724, "grad_norm": 0.37983715534210205, "learning_rate": 4.185807729127085e-06, "loss": 0.33371686935424805, "step": 13095, "token_acc": 0.8808468030324703 }, { "epoch": 0.7066314142340689, "grad_norm": 0.42861950397491455, "learning_rate": 4.184385994051535e-06, "loss": 0.32940149307250977, "step": 13096, "token_acc": 0.8819599109131403 }, { "epoch": 0.7066853720390655, "grad_norm": 0.41208162903785706, "learning_rate": 4.1829644365833175e-06, "loss": 0.3142000436782837, "step": 13097, "token_acc": 0.8871728174280604 }, { "epoch": 0.706739329844062, "grad_norm": 0.420621395111084, "learning_rate": 4.181543056765842e-06, "loss": 0.35792678594589233, "step": 13098, "token_acc": 0.8751496289202777 }, { "epoch": 0.7067932876490585, "grad_norm": 0.37791314721107483, "learning_rate": 4.180121854642518e-06, "loss": 0.3635733723640442, "step": 13099, "token_acc": 0.872940314902966 }, { "epoch": 0.706847245454055, "grad_norm": 0.38682928681373596, "learning_rate": 4.178700830256748e-06, "loss": 0.3378028869628906, "step": 13100, "token_acc": 0.8798478154148257 }, { "epoch": 0.7069012032590514, "grad_norm": 0.3595947027206421, "learning_rate": 4.17727998365193e-06, "loss": 0.2978712320327759, "step": 13101, "token_acc": 0.8942884569234462 }, { "epoch": 0.7069551610640479, "grad_norm": 0.4782157838344574, "learning_rate": 4.175859314871451e-06, "loss": 0.3876968026161194, "step": 13102, "token_acc": 0.8663915765509391 }, { "epoch": 0.7070091188690444, "grad_norm": 0.3971189558506012, "learning_rate": 4.174438823958707e-06, "loss": 0.2820107042789459, "step": 13103, "token_acc": 0.8976059994231324 }, { "epoch": 0.7070630766740409, "grad_norm": 0.32096195220947266, "learning_rate": 4.173018510957075e-06, "loss": 0.2967783510684967, "step": 13104, "token_acc": 0.8889147647880764 }, { "epoch": 0.7071170344790374, "grad_norm": 0.34332600235939026, "learning_rate": 4.171598375909931e-06, "loss": 0.3457786440849304, "step": 13105, "token_acc": 0.880563446969697 }, { "epoch": 0.7071709922840339, "grad_norm": 0.5334781408309937, "learning_rate": 4.170178418860646e-06, "loss": 0.3930039405822754, "step": 13106, "token_acc": 0.8607825295723385 }, { "epoch": 0.7072249500890304, "grad_norm": 0.3855952322483063, "learning_rate": 4.168758639852586e-06, "loss": 0.38072437047958374, "step": 13107, "token_acc": 0.8657099521642749 }, { "epoch": 0.7072789078940269, "grad_norm": 0.4076443314552307, "learning_rate": 4.167339038929109e-06, "loss": 0.3645003139972687, "step": 13108, "token_acc": 0.8710610932475884 }, { "epoch": 0.7073328656990233, "grad_norm": 0.41166558861732483, "learning_rate": 4.165919616133569e-06, "loss": 0.3434422016143799, "step": 13109, "token_acc": 0.8745925215723873 }, { "epoch": 0.7073868235040198, "grad_norm": 0.39633679389953613, "learning_rate": 4.164500371509316e-06, "loss": 0.3351286053657532, "step": 13110, "token_acc": 0.8767587939698492 }, { "epoch": 0.7074407813090163, "grad_norm": 0.45995599031448364, "learning_rate": 4.1630813050996935e-06, "loss": 0.3983767330646515, "step": 13111, "token_acc": 0.8636312408430069 }, { "epoch": 0.7074947391140128, "grad_norm": 0.4818204939365387, "learning_rate": 4.161662416948038e-06, "loss": 0.3426557183265686, "step": 13112, "token_acc": 0.8776495841159109 }, { "epoch": 0.7075486969190093, "grad_norm": 0.3974043130874634, "learning_rate": 4.160243707097683e-06, "loss": 0.3257107436656952, "step": 13113, "token_acc": 0.8865236051502146 }, { "epoch": 0.7076026547240059, "grad_norm": 0.43900057673454285, "learning_rate": 4.158825175591956e-06, "loss": 0.3400689363479614, "step": 13114, "token_acc": 0.8786774628879892 }, { "epoch": 0.7076566125290024, "grad_norm": 0.36978408694267273, "learning_rate": 4.157406822474174e-06, "loss": 0.3118511438369751, "step": 13115, "token_acc": 0.8856889676561808 }, { "epoch": 0.7077105703339989, "grad_norm": 0.43054336309432983, "learning_rate": 4.155988647787661e-06, "loss": 0.340127170085907, "step": 13116, "token_acc": 0.8863277549731607 }, { "epoch": 0.7077645281389953, "grad_norm": 0.38117581605911255, "learning_rate": 4.154570651575724e-06, "loss": 0.29970574378967285, "step": 13117, "token_acc": 0.8946744158462816 }, { "epoch": 0.7078184859439918, "grad_norm": 0.37542322278022766, "learning_rate": 4.153152833881666e-06, "loss": 0.28826892375946045, "step": 13118, "token_acc": 0.8944304953207031 }, { "epoch": 0.7078724437489883, "grad_norm": 0.39962494373321533, "learning_rate": 4.151735194748791e-06, "loss": 0.3134903609752655, "step": 13119, "token_acc": 0.8889291538322159 }, { "epoch": 0.7079264015539848, "grad_norm": 0.38552674651145935, "learning_rate": 4.15031773422039e-06, "loss": 0.32099950313568115, "step": 13120, "token_acc": 0.8848403607116744 }, { "epoch": 0.7079803593589813, "grad_norm": 0.36163702607154846, "learning_rate": 4.148900452339754e-06, "loss": 0.36502450704574585, "step": 13121, "token_acc": 0.8710296056574374 }, { "epoch": 0.7080343171639778, "grad_norm": 0.47804689407348633, "learning_rate": 4.147483349150161e-06, "loss": 0.3516133427619934, "step": 13122, "token_acc": 0.8785683851725608 }, { "epoch": 0.7080882749689743, "grad_norm": 0.49258238077163696, "learning_rate": 4.146066424694897e-06, "loss": 0.3365471363067627, "step": 13123, "token_acc": 0.8776169734438668 }, { "epoch": 0.7081422327739707, "grad_norm": 0.4027293026447296, "learning_rate": 4.144649679017231e-06, "loss": 0.3157370388507843, "step": 13124, "token_acc": 0.886715052066898 }, { "epoch": 0.7081961905789672, "grad_norm": 0.4902019202709198, "learning_rate": 4.14323311216043e-06, "loss": 0.3933265805244446, "step": 13125, "token_acc": 0.8642096642096642 }, { "epoch": 0.7082501483839637, "grad_norm": 0.37690913677215576, "learning_rate": 4.141816724167759e-06, "loss": 0.33311933279037476, "step": 13126, "token_acc": 0.8804780876494024 }, { "epoch": 0.7083041061889602, "grad_norm": 0.4672508239746094, "learning_rate": 4.140400515082467e-06, "loss": 0.3034805655479431, "step": 13127, "token_acc": 0.8874847746650426 }, { "epoch": 0.7083580639939567, "grad_norm": 0.32473063468933105, "learning_rate": 4.138984484947804e-06, "loss": 0.37344419956207275, "step": 13128, "token_acc": 0.8700854700854701 }, { "epoch": 0.7084120217989532, "grad_norm": 0.4234714210033417, "learning_rate": 4.137568633807024e-06, "loss": 0.31658440828323364, "step": 13129, "token_acc": 0.8853876185164529 }, { "epoch": 0.7084659796039497, "grad_norm": 0.47306936979293823, "learning_rate": 4.136152961703363e-06, "loss": 0.3553294837474823, "step": 13130, "token_acc": 0.8736002714625043 }, { "epoch": 0.7085199374089463, "grad_norm": 0.29017478227615356, "learning_rate": 4.134737468680053e-06, "loss": 0.35920944809913635, "step": 13131, "token_acc": 0.8744686503719448 }, { "epoch": 0.7085738952139427, "grad_norm": 0.4080563187599182, "learning_rate": 4.133322154780324e-06, "loss": 0.3766535222530365, "step": 13132, "token_acc": 0.8654677503026111 }, { "epoch": 0.7086278530189392, "grad_norm": 0.32783085107803345, "learning_rate": 4.1319070200474e-06, "loss": 0.3293357789516449, "step": 13133, "token_acc": 0.8820672478206725 }, { "epoch": 0.7086818108239357, "grad_norm": 0.4518289268016815, "learning_rate": 4.130492064524499e-06, "loss": 0.3969801664352417, "step": 13134, "token_acc": 0.8626455288580629 }, { "epoch": 0.7087357686289322, "grad_norm": 0.27745628356933594, "learning_rate": 4.129077288254828e-06, "loss": 0.3231026530265808, "step": 13135, "token_acc": 0.8808219178082192 }, { "epoch": 0.7087897264339287, "grad_norm": 0.4633365273475647, "learning_rate": 4.1276626912816035e-06, "loss": 0.3132419288158417, "step": 13136, "token_acc": 0.8870800138552131 }, { "epoch": 0.7088436842389252, "grad_norm": 0.47139492630958557, "learning_rate": 4.126248273648021e-06, "loss": 0.35757580399513245, "step": 13137, "token_acc": 0.8729163228255488 }, { "epoch": 0.7088976420439217, "grad_norm": 0.43093982338905334, "learning_rate": 4.1248340353972786e-06, "loss": 0.30337095260620117, "step": 13138, "token_acc": 0.8860852884757268 }, { "epoch": 0.7089515998489182, "grad_norm": 0.3326485753059387, "learning_rate": 4.1234199765725644e-06, "loss": 0.2891565263271332, "step": 13139, "token_acc": 0.8959735973597359 }, { "epoch": 0.7090055576539146, "grad_norm": 0.4012024700641632, "learning_rate": 4.122006097217065e-06, "loss": 0.35830938816070557, "step": 13140, "token_acc": 0.8767509931005645 }, { "epoch": 0.7090595154589111, "grad_norm": 0.43653348088264465, "learning_rate": 4.12059239737396e-06, "loss": 0.34935927391052246, "step": 13141, "token_acc": 0.8791889266634288 }, { "epoch": 0.7091134732639076, "grad_norm": 0.34008726477622986, "learning_rate": 4.119178877086423e-06, "loss": 0.32812225818634033, "step": 13142, "token_acc": 0.8851555322220764 }, { "epoch": 0.7091674310689041, "grad_norm": 0.43799883127212524, "learning_rate": 4.117765536397622e-06, "loss": 0.36433500051498413, "step": 13143, "token_acc": 0.8687956204379562 }, { "epoch": 0.7092213888739006, "grad_norm": 0.3934321999549866, "learning_rate": 4.116352375350721e-06, "loss": 0.32366710901260376, "step": 13144, "token_acc": 0.887338097364895 }, { "epoch": 0.7092753466788971, "grad_norm": 0.42699724435806274, "learning_rate": 4.114939393988877e-06, "loss": 0.3718068599700928, "step": 13145, "token_acc": 0.8738105088953247 }, { "epoch": 0.7093293044838936, "grad_norm": 0.31792739033699036, "learning_rate": 4.113526592355243e-06, "loss": 0.3444616198539734, "step": 13146, "token_acc": 0.8762108441313373 }, { "epoch": 0.70938326228889, "grad_norm": 0.4199358820915222, "learning_rate": 4.11211397049296e-06, "loss": 0.31977859139442444, "step": 13147, "token_acc": 0.88276440962507 }, { "epoch": 0.7094372200938865, "grad_norm": 0.3569594621658325, "learning_rate": 4.1107015284451776e-06, "loss": 0.32283926010131836, "step": 13148, "token_acc": 0.8861799217731421 }, { "epoch": 0.709491177898883, "grad_norm": 0.36212536692619324, "learning_rate": 4.1092892662550285e-06, "loss": 0.35309791564941406, "step": 13149, "token_acc": 0.8756868131868132 }, { "epoch": 0.7095451357038796, "grad_norm": 0.404019296169281, "learning_rate": 4.107877183965642e-06, "loss": 0.32991915941238403, "step": 13150, "token_acc": 0.8813496932515338 }, { "epoch": 0.7095990935088761, "grad_norm": 0.301882266998291, "learning_rate": 4.106465281620143e-06, "loss": 0.3007928431034088, "step": 13151, "token_acc": 0.8902810700982052 }, { "epoch": 0.7096530513138726, "grad_norm": 0.40406370162963867, "learning_rate": 4.105053559261649e-06, "loss": 0.35395941138267517, "step": 13152, "token_acc": 0.8747456765005086 }, { "epoch": 0.7097070091188691, "grad_norm": 0.43398231267929077, "learning_rate": 4.103642016933276e-06, "loss": 0.2942303717136383, "step": 13153, "token_acc": 0.8865643886564388 }, { "epoch": 0.7097609669238656, "grad_norm": 0.43595850467681885, "learning_rate": 4.102230654678128e-06, "loss": 0.36583757400512695, "step": 13154, "token_acc": 0.8702993092862624 }, { "epoch": 0.709814924728862, "grad_norm": 0.36198821663856506, "learning_rate": 4.100819472539314e-06, "loss": 0.3537549376487732, "step": 13155, "token_acc": 0.8723139208969168 }, { "epoch": 0.7098688825338585, "grad_norm": 0.4289456307888031, "learning_rate": 4.099408470559928e-06, "loss": 0.34324416518211365, "step": 13156, "token_acc": 0.875796673402767 }, { "epoch": 0.709922840338855, "grad_norm": 0.39887213706970215, "learning_rate": 4.097997648783061e-06, "loss": 0.33269762992858887, "step": 13157, "token_acc": 0.8834909090909091 }, { "epoch": 0.7099767981438515, "grad_norm": 0.35493534803390503, "learning_rate": 4.0965870072518e-06, "loss": 0.32437700033187866, "step": 13158, "token_acc": 0.8819583382370249 }, { "epoch": 0.710030755948848, "grad_norm": 0.4176003336906433, "learning_rate": 4.0951765460092255e-06, "loss": 0.30492132902145386, "step": 13159, "token_acc": 0.8901540522438044 }, { "epoch": 0.7100847137538445, "grad_norm": 0.3974401354789734, "learning_rate": 4.093766265098412e-06, "loss": 0.3234184980392456, "step": 13160, "token_acc": 0.8834391907786403 }, { "epoch": 0.710138671558841, "grad_norm": 0.3090587556362152, "learning_rate": 4.09235616456243e-06, "loss": 0.34447038173675537, "step": 13161, "token_acc": 0.8788357400722022 }, { "epoch": 0.7101926293638374, "grad_norm": 0.3563908636569977, "learning_rate": 4.090946244444344e-06, "loss": 0.24929171800613403, "step": 13162, "token_acc": 0.9064012166459284 }, { "epoch": 0.7102465871688339, "grad_norm": 0.46293750405311584, "learning_rate": 4.08953650478721e-06, "loss": 0.34487801790237427, "step": 13163, "token_acc": 0.8794450957405237 }, { "epoch": 0.7103005449738304, "grad_norm": 0.3749132454395294, "learning_rate": 4.0881269456340835e-06, "loss": 0.3279571533203125, "step": 13164, "token_acc": 0.8815810788919488 }, { "epoch": 0.710354502778827, "grad_norm": 0.5380399823188782, "learning_rate": 4.0867175670280115e-06, "loss": 0.3190971314907074, "step": 13165, "token_acc": 0.8863494701849158 }, { "epoch": 0.7104084605838235, "grad_norm": 0.4763704240322113, "learning_rate": 4.085308369012036e-06, "loss": 0.2793087959289551, "step": 13166, "token_acc": 0.8994252873563219 }, { "epoch": 0.71046241838882, "grad_norm": 0.4615054130554199, "learning_rate": 4.08389935162919e-06, "loss": 0.3252440094947815, "step": 13167, "token_acc": 0.8831309041835358 }, { "epoch": 0.7105163761938165, "grad_norm": 0.38500651717185974, "learning_rate": 4.082490514922512e-06, "loss": 0.3181058466434479, "step": 13168, "token_acc": 0.8872487793800387 }, { "epoch": 0.710570333998813, "grad_norm": 0.3455037474632263, "learning_rate": 4.081081858935022e-06, "loss": 0.31872624158859253, "step": 13169, "token_acc": 0.8846752511673978 }, { "epoch": 0.7106242918038094, "grad_norm": 0.34686803817749023, "learning_rate": 4.079673383709742e-06, "loss": 0.3591689169406891, "step": 13170, "token_acc": 0.8749059160018064 }, { "epoch": 0.7106782496088059, "grad_norm": 0.3578488826751709, "learning_rate": 4.078265089289687e-06, "loss": 0.33016741275787354, "step": 13171, "token_acc": 0.8819706226439621 }, { "epoch": 0.7107322074138024, "grad_norm": 0.41580143570899963, "learning_rate": 4.076856975717865e-06, "loss": 0.27572101354599, "step": 13172, "token_acc": 0.8990812917594655 }, { "epoch": 0.7107861652187989, "grad_norm": 0.37903136014938354, "learning_rate": 4.075449043037274e-06, "loss": 0.3553963005542755, "step": 13173, "token_acc": 0.8740693806431173 }, { "epoch": 0.7108401230237954, "grad_norm": 0.38622093200683594, "learning_rate": 4.074041291290926e-06, "loss": 0.3295416831970215, "step": 13174, "token_acc": 0.8827757810340061 }, { "epoch": 0.7108940808287919, "grad_norm": 0.45735231041908264, "learning_rate": 4.0726337205217995e-06, "loss": 0.35016557574272156, "step": 13175, "token_acc": 0.8779965753424658 }, { "epoch": 0.7109480386337884, "grad_norm": 0.36332327127456665, "learning_rate": 4.071226330772888e-06, "loss": 0.30603379011154175, "step": 13176, "token_acc": 0.8923057292331249 }, { "epoch": 0.7110019964387849, "grad_norm": 0.4553760588169098, "learning_rate": 4.06981912208717e-06, "loss": 0.38572293519973755, "step": 13177, "token_acc": 0.8622771707878091 }, { "epoch": 0.7110559542437813, "grad_norm": 0.3872585594654083, "learning_rate": 4.0684120945076225e-06, "loss": 0.3131190538406372, "step": 13178, "token_acc": 0.8890939855406861 }, { "epoch": 0.7111099120487778, "grad_norm": 0.37212154269218445, "learning_rate": 4.067005248077216e-06, "loss": 0.3366977572441101, "step": 13179, "token_acc": 0.8804960541149943 }, { "epoch": 0.7111638698537743, "grad_norm": 0.4060015380382538, "learning_rate": 4.065598582838912e-06, "loss": 0.30806538462638855, "step": 13180, "token_acc": 0.8904202824570359 }, { "epoch": 0.7112178276587708, "grad_norm": 0.4341847896575928, "learning_rate": 4.064192098835675e-06, "loss": 0.370292067527771, "step": 13181, "token_acc": 0.8758665794637017 }, { "epoch": 0.7112717854637673, "grad_norm": 0.4660889804363251, "learning_rate": 4.062785796110457e-06, "loss": 0.30291351675987244, "step": 13182, "token_acc": 0.8905347355947267 }, { "epoch": 0.7113257432687639, "grad_norm": 0.5522112846374512, "learning_rate": 4.0613796747062065e-06, "loss": 0.43157580494880676, "step": 13183, "token_acc": 0.8507191732998185 }, { "epoch": 0.7113797010737604, "grad_norm": 0.4419151246547699, "learning_rate": 4.059973734665864e-06, "loss": 0.3080475628376007, "step": 13184, "token_acc": 0.8838167435728411 }, { "epoch": 0.7114336588787568, "grad_norm": 0.40987107157707214, "learning_rate": 4.058567976032368e-06, "loss": 0.35860663652420044, "step": 13185, "token_acc": 0.873826714801444 }, { "epoch": 0.7114876166837533, "grad_norm": 0.41686317324638367, "learning_rate": 4.057162398848645e-06, "loss": 0.3606942594051361, "step": 13186, "token_acc": 0.8751507840772015 }, { "epoch": 0.7115415744887498, "grad_norm": 0.40828737616539, "learning_rate": 4.05575700315763e-06, "loss": 0.3446217179298401, "step": 13187, "token_acc": 0.8782017773131208 }, { "epoch": 0.7115955322937463, "grad_norm": 0.3890359103679657, "learning_rate": 4.054351789002239e-06, "loss": 0.35875245928764343, "step": 13188, "token_acc": 0.8756466756083541 }, { "epoch": 0.7116494900987428, "grad_norm": 0.40769723057746887, "learning_rate": 4.052946756425386e-06, "loss": 0.3583510220050812, "step": 13189, "token_acc": 0.8711508443309209 }, { "epoch": 0.7117034479037393, "grad_norm": 0.3006768524646759, "learning_rate": 4.051541905469982e-06, "loss": 0.37103021144866943, "step": 13190, "token_acc": 0.8724820757937862 }, { "epoch": 0.7117574057087358, "grad_norm": 0.40095990896224976, "learning_rate": 4.05013723617893e-06, "loss": 0.35834959149360657, "step": 13191, "token_acc": 0.8804317741489067 }, { "epoch": 0.7118113635137323, "grad_norm": 0.4085708260536194, "learning_rate": 4.0487327485951276e-06, "loss": 0.38352715969085693, "step": 13192, "token_acc": 0.8690152500295544 }, { "epoch": 0.7118653213187287, "grad_norm": 0.38273337483406067, "learning_rate": 4.047328442761469e-06, "loss": 0.342582643032074, "step": 13193, "token_acc": 0.8767998142127265 }, { "epoch": 0.7119192791237252, "grad_norm": 0.4849036633968353, "learning_rate": 4.0459243187208394e-06, "loss": 0.3538799285888672, "step": 13194, "token_acc": 0.8739975323874152 }, { "epoch": 0.7119732369287217, "grad_norm": 0.34264370799064636, "learning_rate": 4.044520376516121e-06, "loss": 0.295626699924469, "step": 13195, "token_acc": 0.8921808579157542 }, { "epoch": 0.7120271947337182, "grad_norm": 0.4377705156803131, "learning_rate": 4.0431166161901915e-06, "loss": 0.33564889430999756, "step": 13196, "token_acc": 0.8828801458301686 }, { "epoch": 0.7120811525387147, "grad_norm": 0.39321592450141907, "learning_rate": 4.041713037785919e-06, "loss": 0.3994697332382202, "step": 13197, "token_acc": 0.863037173741102 }, { "epoch": 0.7121351103437112, "grad_norm": 0.4734969139099121, "learning_rate": 4.0403096413461695e-06, "loss": 0.3814951777458191, "step": 13198, "token_acc": 0.8663853727144867 }, { "epoch": 0.7121890681487077, "grad_norm": 0.41038572788238525, "learning_rate": 4.038906426913799e-06, "loss": 0.33585113286972046, "step": 13199, "token_acc": 0.8781742205078753 }, { "epoch": 0.7122430259537043, "grad_norm": 0.34358978271484375, "learning_rate": 4.037503394531669e-06, "loss": 0.29465001821517944, "step": 13200, "token_acc": 0.8914410368279225 }, { "epoch": 0.7122969837587007, "grad_norm": 0.3625209927558899, "learning_rate": 4.036100544242623e-06, "loss": 0.3124595284461975, "step": 13201, "token_acc": 0.8842443729903537 }, { "epoch": 0.7123509415636972, "grad_norm": 0.4582946300506592, "learning_rate": 4.034697876089503e-06, "loss": 0.31264328956604004, "step": 13202, "token_acc": 0.8874841972187105 }, { "epoch": 0.7124048993686937, "grad_norm": 0.3826253116130829, "learning_rate": 4.033295390115147e-06, "loss": 0.31284716725349426, "step": 13203, "token_acc": 0.8865995847308736 }, { "epoch": 0.7124588571736902, "grad_norm": 0.4887768030166626, "learning_rate": 4.031893086362389e-06, "loss": 0.292869508266449, "step": 13204, "token_acc": 0.8905645784996133 }, { "epoch": 0.7125128149786867, "grad_norm": 0.4173475205898285, "learning_rate": 4.030490964874046e-06, "loss": 0.3488536775112152, "step": 13205, "token_acc": 0.8748394462680177 }, { "epoch": 0.7125667727836832, "grad_norm": 0.29540497064590454, "learning_rate": 4.029089025692951e-06, "loss": 0.29261380434036255, "step": 13206, "token_acc": 0.8937333727460833 }, { "epoch": 0.7126207305886797, "grad_norm": 0.38918471336364746, "learning_rate": 4.027687268861913e-06, "loss": 0.4009591341018677, "step": 13207, "token_acc": 0.8618103229087211 }, { "epoch": 0.7126746883936761, "grad_norm": 0.4740491807460785, "learning_rate": 4.026285694423744e-06, "loss": 0.4080829620361328, "step": 13208, "token_acc": 0.8605297580117724 }, { "epoch": 0.7127286461986726, "grad_norm": 0.37280920147895813, "learning_rate": 4.0248843024212416e-06, "loss": 0.3812906742095947, "step": 13209, "token_acc": 0.8631567377445593 }, { "epoch": 0.7127826040036691, "grad_norm": 0.36605799198150635, "learning_rate": 4.023483092897207e-06, "loss": 0.33248746395111084, "step": 13210, "token_acc": 0.8794998105342933 }, { "epoch": 0.7128365618086656, "grad_norm": 0.41008949279785156, "learning_rate": 4.022082065894433e-06, "loss": 0.3248879313468933, "step": 13211, "token_acc": 0.885800031590586 }, { "epoch": 0.7128905196136621, "grad_norm": 0.3578541576862335, "learning_rate": 4.0206812214557044e-06, "loss": 0.3297792971134186, "step": 13212, "token_acc": 0.8791124713083397 }, { "epoch": 0.7129444774186586, "grad_norm": 0.4359740614891052, "learning_rate": 4.0192805596238085e-06, "loss": 0.3230472803115845, "step": 13213, "token_acc": 0.8825628066002675 }, { "epoch": 0.7129984352236551, "grad_norm": 0.35156258940696716, "learning_rate": 4.017880080441517e-06, "loss": 0.324863076210022, "step": 13214, "token_acc": 0.8847675568743818 }, { "epoch": 0.7130523930286516, "grad_norm": 0.3643287420272827, "learning_rate": 4.016479783951599e-06, "loss": 0.30178600549697876, "step": 13215, "token_acc": 0.8944873208379273 }, { "epoch": 0.713106350833648, "grad_norm": 0.3723515272140503, "learning_rate": 4.015079670196823e-06, "loss": 0.3775133192539215, "step": 13216, "token_acc": 0.8667341918680365 }, { "epoch": 0.7131603086386445, "grad_norm": 0.5031684637069702, "learning_rate": 4.013679739219945e-06, "loss": 0.351553350687027, "step": 13217, "token_acc": 0.8737976144671027 }, { "epoch": 0.713214266443641, "grad_norm": 0.33608517050743103, "learning_rate": 4.012279991063717e-06, "loss": 0.306186318397522, "step": 13218, "token_acc": 0.8913745387453874 }, { "epoch": 0.7132682242486376, "grad_norm": 0.41418758034706116, "learning_rate": 4.010880425770892e-06, "loss": 0.3714161515235901, "step": 13219, "token_acc": 0.8709422011084719 }, { "epoch": 0.7133221820536341, "grad_norm": 0.49061527848243713, "learning_rate": 4.009481043384211e-06, "loss": 0.33547165989875793, "step": 13220, "token_acc": 0.877747502270663 }, { "epoch": 0.7133761398586306, "grad_norm": 0.4461497366428375, "learning_rate": 4.008081843946409e-06, "loss": 0.342684805393219, "step": 13221, "token_acc": 0.8739042481456507 }, { "epoch": 0.7134300976636271, "grad_norm": 0.3661331236362457, "learning_rate": 4.006682827500218e-06, "loss": 0.29081887006759644, "step": 13222, "token_acc": 0.8971905846621109 }, { "epoch": 0.7134840554686236, "grad_norm": 0.5249294638633728, "learning_rate": 4.005283994088364e-06, "loss": 0.3491564393043518, "step": 13223, "token_acc": 0.8794615541505201 }, { "epoch": 0.71353801327362, "grad_norm": 0.4020887017250061, "learning_rate": 4.003885343753565e-06, "loss": 0.3422807455062866, "step": 13224, "token_acc": 0.8801057184046132 }, { "epoch": 0.7135919710786165, "grad_norm": 0.3866213858127594, "learning_rate": 4.002486876538537e-06, "loss": 0.32459402084350586, "step": 13225, "token_acc": 0.8845882186749541 }, { "epoch": 0.713645928883613, "grad_norm": 0.44263899326324463, "learning_rate": 4.00108859248599e-06, "loss": 0.3453439772129059, "step": 13226, "token_acc": 0.8757128045619492 }, { "epoch": 0.7136998866886095, "grad_norm": 0.39784863591194153, "learning_rate": 3.999690491638625e-06, "loss": 0.3256819248199463, "step": 13227, "token_acc": 0.884844361285933 }, { "epoch": 0.713753844493606, "grad_norm": 0.48201414942741394, "learning_rate": 3.9982925740391396e-06, "loss": 0.35268086194992065, "step": 13228, "token_acc": 0.8753159224936815 }, { "epoch": 0.7138078022986025, "grad_norm": 0.4040564298629761, "learning_rate": 3.996894839730229e-06, "loss": 0.29450416564941406, "step": 13229, "token_acc": 0.8895413870246085 }, { "epoch": 0.713861760103599, "grad_norm": 0.41305822134017944, "learning_rate": 3.9954972887545755e-06, "loss": 0.3229774236679077, "step": 13230, "token_acc": 0.886579139314369 }, { "epoch": 0.7139157179085954, "grad_norm": 0.5577483177185059, "learning_rate": 3.994099921154859e-06, "loss": 0.36112910509109497, "step": 13231, "token_acc": 0.8712806181160612 }, { "epoch": 0.7139696757135919, "grad_norm": 0.41401639580726624, "learning_rate": 3.9927027369737614e-06, "loss": 0.33407315611839294, "step": 13232, "token_acc": 0.8796680497925311 }, { "epoch": 0.7140236335185884, "grad_norm": 0.506843090057373, "learning_rate": 3.991305736253948e-06, "loss": 0.3467082679271698, "step": 13233, "token_acc": 0.8775865403210211 }, { "epoch": 0.714077591323585, "grad_norm": 0.45046448707580566, "learning_rate": 3.989908919038084e-06, "loss": 0.3547896146774292, "step": 13234, "token_acc": 0.8737021540368821 }, { "epoch": 0.7141315491285815, "grad_norm": 0.42535296082496643, "learning_rate": 3.9885122853688265e-06, "loss": 0.35998696088790894, "step": 13235, "token_acc": 0.8745704467353952 }, { "epoch": 0.714185506933578, "grad_norm": 0.5442423224449158, "learning_rate": 3.987115835288828e-06, "loss": 0.3488522171974182, "step": 13236, "token_acc": 0.8710601719197708 }, { "epoch": 0.7142394647385745, "grad_norm": 0.4229658842086792, "learning_rate": 3.985719568840739e-06, "loss": 0.3648461103439331, "step": 13237, "token_acc": 0.8699611824425202 }, { "epoch": 0.714293422543571, "grad_norm": 0.41957199573516846, "learning_rate": 3.984323486067194e-06, "loss": 0.3315632939338684, "step": 13238, "token_acc": 0.8811643835616438 }, { "epoch": 0.7143473803485674, "grad_norm": 0.43838948011398315, "learning_rate": 3.982927587010838e-06, "loss": 0.3605881333351135, "step": 13239, "token_acc": 0.8759335467154398 }, { "epoch": 0.7144013381535639, "grad_norm": 0.46338552236557007, "learning_rate": 3.981531871714297e-06, "loss": 0.3016805052757263, "step": 13240, "token_acc": 0.8907666941467436 }, { "epoch": 0.7144552959585604, "grad_norm": 0.5089730620384216, "learning_rate": 3.980136340220195e-06, "loss": 0.4041159152984619, "step": 13241, "token_acc": 0.8624486006101605 }, { "epoch": 0.7145092537635569, "grad_norm": 0.43544986844062805, "learning_rate": 3.978740992571154e-06, "loss": 0.337617963552475, "step": 13242, "token_acc": 0.8789357682619647 }, { "epoch": 0.7145632115685534, "grad_norm": 0.40361282229423523, "learning_rate": 3.977345828809788e-06, "loss": 0.31208938360214233, "step": 13243, "token_acc": 0.8906119027661358 }, { "epoch": 0.7146171693735499, "grad_norm": 0.5901911854743958, "learning_rate": 3.975950848978696e-06, "loss": 0.396295428276062, "step": 13244, "token_acc": 0.8660921445144183 }, { "epoch": 0.7146711271785464, "grad_norm": 0.3243677020072937, "learning_rate": 3.974556053120489e-06, "loss": 0.2873828113079071, "step": 13245, "token_acc": 0.8933066361556065 }, { "epoch": 0.7147250849835429, "grad_norm": 0.3621312379837036, "learning_rate": 3.973161441277764e-06, "loss": 0.3256700038909912, "step": 13246, "token_acc": 0.8860584518167457 }, { "epoch": 0.7147790427885393, "grad_norm": 0.3660629689693451, "learning_rate": 3.97176701349311e-06, "loss": 0.3672333359718323, "step": 13247, "token_acc": 0.8698039215686274 }, { "epoch": 0.7148330005935358, "grad_norm": 0.4104461371898651, "learning_rate": 3.9703727698091095e-06, "loss": 0.3997766375541687, "step": 13248, "token_acc": 0.8629597197898424 }, { "epoch": 0.7148869583985323, "grad_norm": 0.3772922158241272, "learning_rate": 3.968978710268347e-06, "loss": 0.32319116592407227, "step": 13249, "token_acc": 0.8833640978311029 }, { "epoch": 0.7149409162035288, "grad_norm": 0.33146458864212036, "learning_rate": 3.967584834913395e-06, "loss": 0.3155630826950073, "step": 13250, "token_acc": 0.8887760857304005 }, { "epoch": 0.7149948740085253, "grad_norm": 0.45067766308784485, "learning_rate": 3.966191143786817e-06, "loss": 0.33281201124191284, "step": 13251, "token_acc": 0.8823278315310188 }, { "epoch": 0.7150488318135219, "grad_norm": 0.35270804166793823, "learning_rate": 3.964797636931186e-06, "loss": 0.2853766679763794, "step": 13252, "token_acc": 0.899181446111869 }, { "epoch": 0.7151027896185184, "grad_norm": 0.42550045251846313, "learning_rate": 3.9634043143890535e-06, "loss": 0.33814504742622375, "step": 13253, "token_acc": 0.8799129804205946 }, { "epoch": 0.7151567474235148, "grad_norm": 0.537852942943573, "learning_rate": 3.962011176202971e-06, "loss": 0.3611862361431122, "step": 13254, "token_acc": 0.8757172848145557 }, { "epoch": 0.7152107052285113, "grad_norm": 0.39539432525634766, "learning_rate": 3.960618222415487e-06, "loss": 0.34404855966567993, "step": 13255, "token_acc": 0.8769541099344428 }, { "epoch": 0.7152646630335078, "grad_norm": 0.3472656309604645, "learning_rate": 3.959225453069139e-06, "loss": 0.34831345081329346, "step": 13256, "token_acc": 0.8779707495429616 }, { "epoch": 0.7153186208385043, "grad_norm": 0.3948785364627838, "learning_rate": 3.957832868206464e-06, "loss": 0.3453972339630127, "step": 13257, "token_acc": 0.8765938069216758 }, { "epoch": 0.7153725786435008, "grad_norm": 0.4754176735877991, "learning_rate": 3.9564404678699895e-06, "loss": 0.31489789485931396, "step": 13258, "token_acc": 0.8830694275274056 }, { "epoch": 0.7154265364484973, "grad_norm": 0.5370475053787231, "learning_rate": 3.955048252102241e-06, "loss": 0.35820889472961426, "step": 13259, "token_acc": 0.879161302188707 }, { "epoch": 0.7154804942534938, "grad_norm": 0.24718651175498962, "learning_rate": 3.953656220945734e-06, "loss": 0.3514591157436371, "step": 13260, "token_acc": 0.8785874884274567 }, { "epoch": 0.7155344520584903, "grad_norm": 0.44349196553230286, "learning_rate": 3.952264374442983e-06, "loss": 0.3648630976676941, "step": 13261, "token_acc": 0.8752753303964758 }, { "epoch": 0.7155884098634867, "grad_norm": 0.3345611095428467, "learning_rate": 3.950872712636494e-06, "loss": 0.355029821395874, "step": 13262, "token_acc": 0.8784281244248113 }, { "epoch": 0.7156423676684832, "grad_norm": 0.3920590877532959, "learning_rate": 3.949481235568764e-06, "loss": 0.339555948972702, "step": 13263, "token_acc": 0.8789689124903197 }, { "epoch": 0.7156963254734797, "grad_norm": 0.37246593832969666, "learning_rate": 3.948089943282295e-06, "loss": 0.3445137143135071, "step": 13264, "token_acc": 0.8758726612678023 }, { "epoch": 0.7157502832784762, "grad_norm": 0.40339767932891846, "learning_rate": 3.946698835819575e-06, "loss": 0.34452709555625916, "step": 13265, "token_acc": 0.8799377916018662 }, { "epoch": 0.7158042410834727, "grad_norm": 0.3789730966091156, "learning_rate": 3.945307913223085e-06, "loss": 0.3418334126472473, "step": 13266, "token_acc": 0.8753381668650579 }, { "epoch": 0.7158581988884692, "grad_norm": 0.44470909237861633, "learning_rate": 3.943917175535306e-06, "loss": 0.34918782114982605, "step": 13267, "token_acc": 0.8667267808836789 }, { "epoch": 0.7159121566934658, "grad_norm": 0.429400771856308, "learning_rate": 3.94252662279871e-06, "loss": 0.29648250341415405, "step": 13268, "token_acc": 0.8975887995851698 }, { "epoch": 0.7159661144984621, "grad_norm": 0.45256999135017395, "learning_rate": 3.941136255055764e-06, "loss": 0.3233267366886139, "step": 13269, "token_acc": 0.8883888388838884 }, { "epoch": 0.7160200723034587, "grad_norm": 0.43075793981552124, "learning_rate": 3.939746072348927e-06, "loss": 0.2994671165943146, "step": 13270, "token_acc": 0.8919855795895729 }, { "epoch": 0.7160740301084552, "grad_norm": 0.47546324133872986, "learning_rate": 3.93835607472066e-06, "loss": 0.3347112536430359, "step": 13271, "token_acc": 0.8808773132282385 }, { "epoch": 0.7161279879134517, "grad_norm": 0.4821510314941406, "learning_rate": 3.936966262213411e-06, "loss": 0.39322349429130554, "step": 13272, "token_acc": 0.8673823808020208 }, { "epoch": 0.7161819457184482, "grad_norm": 0.48187270760536194, "learning_rate": 3.935576634869623e-06, "loss": 0.3377673029899597, "step": 13273, "token_acc": 0.8739207893654927 }, { "epoch": 0.7162359035234447, "grad_norm": 0.47291526198387146, "learning_rate": 3.934187192731736e-06, "loss": 0.3645968735218048, "step": 13274, "token_acc": 0.8709728867623604 }, { "epoch": 0.7162898613284412, "grad_norm": 0.4597092866897583, "learning_rate": 3.932797935842184e-06, "loss": 0.35430312156677246, "step": 13275, "token_acc": 0.8718617541571568 }, { "epoch": 0.7163438191334377, "grad_norm": 0.48349428176879883, "learning_rate": 3.931408864243393e-06, "loss": 0.3428143262863159, "step": 13276, "token_acc": 0.8725939505041247 }, { "epoch": 0.7163977769384341, "grad_norm": 0.41582924127578735, "learning_rate": 3.930019977977787e-06, "loss": 0.3612017035484314, "step": 13277, "token_acc": 0.8714336661911555 }, { "epoch": 0.7164517347434306, "grad_norm": 0.5678658485412598, "learning_rate": 3.9286312770877795e-06, "loss": 0.35959550738334656, "step": 13278, "token_acc": 0.8721205597416577 }, { "epoch": 0.7165056925484271, "grad_norm": 0.349290668964386, "learning_rate": 3.927242761615783e-06, "loss": 0.3083903193473816, "step": 13279, "token_acc": 0.8885602744587721 }, { "epoch": 0.7165596503534236, "grad_norm": 0.4071010649204254, "learning_rate": 3.9258544316042e-06, "loss": 0.291643351316452, "step": 13280, "token_acc": 0.8938784155899174 }, { "epoch": 0.7166136081584201, "grad_norm": 0.5075812339782715, "learning_rate": 3.924466287095433e-06, "loss": 0.38143882155418396, "step": 13281, "token_acc": 0.8669365369548185 }, { "epoch": 0.7166675659634166, "grad_norm": 0.5007457137107849, "learning_rate": 3.9230783281318745e-06, "loss": 0.38871732354164124, "step": 13282, "token_acc": 0.8664921465968587 }, { "epoch": 0.7167215237684131, "grad_norm": 0.3528461158275604, "learning_rate": 3.921690554755907e-06, "loss": 0.29874950647354126, "step": 13283, "token_acc": 0.8915707736925812 }, { "epoch": 0.7167754815734096, "grad_norm": 0.42883291840553284, "learning_rate": 3.920302967009923e-06, "loss": 0.2563435733318329, "step": 13284, "token_acc": 0.9032147154890837 }, { "epoch": 0.716829439378406, "grad_norm": 0.34838026762008667, "learning_rate": 3.918915564936291e-06, "loss": 0.33198752999305725, "step": 13285, "token_acc": 0.8784947505013566 }, { "epoch": 0.7168833971834025, "grad_norm": 0.45649218559265137, "learning_rate": 3.917528348577386e-06, "loss": 0.34965163469314575, "step": 13286, "token_acc": 0.8763317599460553 }, { "epoch": 0.7169373549883991, "grad_norm": 0.4245811700820923, "learning_rate": 3.916141317975572e-06, "loss": 0.3284933567047119, "step": 13287, "token_acc": 0.8879064072915249 }, { "epoch": 0.7169913127933956, "grad_norm": 0.44847023487091064, "learning_rate": 3.914754473173209e-06, "loss": 0.29644834995269775, "step": 13288, "token_acc": 0.8936139120268888 }, { "epoch": 0.7170452705983921, "grad_norm": 0.3216976225376129, "learning_rate": 3.913367814212646e-06, "loss": 0.25331130623817444, "step": 13289, "token_acc": 0.9048021405435854 }, { "epoch": 0.7170992284033886, "grad_norm": 0.4301765263080597, "learning_rate": 3.9119813411362384e-06, "loss": 0.2881716787815094, "step": 13290, "token_acc": 0.8969963830791005 }, { "epoch": 0.7171531862083851, "grad_norm": 0.43707069754600525, "learning_rate": 3.91059505398633e-06, "loss": 0.3423084616661072, "step": 13291, "token_acc": 0.8785182788741508 }, { "epoch": 0.7172071440133815, "grad_norm": 0.47157973051071167, "learning_rate": 3.9092089528052506e-06, "loss": 0.36937084794044495, "step": 13292, "token_acc": 0.8674551386623165 }, { "epoch": 0.717261101818378, "grad_norm": 0.33142778277397156, "learning_rate": 3.907823037635333e-06, "loss": 0.27798712253570557, "step": 13293, "token_acc": 0.898144844809133 }, { "epoch": 0.7173150596233745, "grad_norm": 0.36695531010627747, "learning_rate": 3.906437308518904e-06, "loss": 0.36740410327911377, "step": 13294, "token_acc": 0.8731375512848197 }, { "epoch": 0.717369017428371, "grad_norm": 0.4224645495414734, "learning_rate": 3.9050517654982844e-06, "loss": 0.3801300525665283, "step": 13295, "token_acc": 0.8635941849675224 }, { "epoch": 0.7174229752333675, "grad_norm": 0.42705225944519043, "learning_rate": 3.9036664086157825e-06, "loss": 0.3184869885444641, "step": 13296, "token_acc": 0.8846445189914511 }, { "epoch": 0.717476933038364, "grad_norm": 0.3454747498035431, "learning_rate": 3.902281237913717e-06, "loss": 0.2881530523300171, "step": 13297, "token_acc": 0.8944657050338534 }, { "epoch": 0.7175308908433605, "grad_norm": 0.3654933571815491, "learning_rate": 3.900896253434383e-06, "loss": 0.28979945182800293, "step": 13298, "token_acc": 0.8947704081632653 }, { "epoch": 0.717584848648357, "grad_norm": 0.3994101583957672, "learning_rate": 3.89951145522008e-06, "loss": 0.32017970085144043, "step": 13299, "token_acc": 0.8842592592592593 }, { "epoch": 0.7176388064533534, "grad_norm": 0.3587645888328552, "learning_rate": 3.898126843313099e-06, "loss": 0.31189560890197754, "step": 13300, "token_acc": 0.8883618695048588 }, { "epoch": 0.7176927642583499, "grad_norm": 0.28505903482437134, "learning_rate": 3.8967424177557265e-06, "loss": 0.2845301926136017, "step": 13301, "token_acc": 0.8987787435361426 }, { "epoch": 0.7177467220633464, "grad_norm": 0.48168936371803284, "learning_rate": 3.895358178590237e-06, "loss": 0.3617619276046753, "step": 13302, "token_acc": 0.8802695502272371 }, { "epoch": 0.717800679868343, "grad_norm": 0.5089925527572632, "learning_rate": 3.893974125858915e-06, "loss": 0.3343767821788788, "step": 13303, "token_acc": 0.8826458036984353 }, { "epoch": 0.7178546376733395, "grad_norm": 0.4367865324020386, "learning_rate": 3.892590259604022e-06, "loss": 0.3762507438659668, "step": 13304, "token_acc": 0.8676381299332119 }, { "epoch": 0.717908595478336, "grad_norm": 0.42767462134361267, "learning_rate": 3.891206579867824e-06, "loss": 0.372722327709198, "step": 13305, "token_acc": 0.8718309859154929 }, { "epoch": 0.7179625532833325, "grad_norm": 0.42528972029685974, "learning_rate": 3.8898230866925745e-06, "loss": 0.33919429779052734, "step": 13306, "token_acc": 0.8797316115964046 }, { "epoch": 0.718016511088329, "grad_norm": 0.4733996093273163, "learning_rate": 3.888439780120528e-06, "loss": 0.29852089285850525, "step": 13307, "token_acc": 0.8892988929889298 }, { "epoch": 0.7180704688933254, "grad_norm": 0.4667308032512665, "learning_rate": 3.887056660193931e-06, "loss": 0.3699584901332855, "step": 13308, "token_acc": 0.8749459732027085 }, { "epoch": 0.7181244266983219, "grad_norm": 0.5153253078460693, "learning_rate": 3.885673726955022e-06, "loss": 0.3834572434425354, "step": 13309, "token_acc": 0.8611578593066308 }, { "epoch": 0.7181783845033184, "grad_norm": 0.4017269015312195, "learning_rate": 3.884290980446035e-06, "loss": 0.3344564735889435, "step": 13310, "token_acc": 0.8820136969842605 }, { "epoch": 0.7182323423083149, "grad_norm": 0.3918636441230774, "learning_rate": 3.8829084207092e-06, "loss": 0.33648407459259033, "step": 13311, "token_acc": 0.8785554490252477 }, { "epoch": 0.7182863001133114, "grad_norm": 0.34627410769462585, "learning_rate": 3.881526047786738e-06, "loss": 0.37355732917785645, "step": 13312, "token_acc": 0.8669789227166277 }, { "epoch": 0.7183402579183079, "grad_norm": 0.40117374062538147, "learning_rate": 3.8801438617208675e-06, "loss": 0.3444117307662964, "step": 13313, "token_acc": 0.8770276533292136 }, { "epoch": 0.7183942157233044, "grad_norm": 0.3812979757785797, "learning_rate": 3.878761862553801e-06, "loss": 0.3601430356502533, "step": 13314, "token_acc": 0.8740740740740741 }, { "epoch": 0.7184481735283008, "grad_norm": 0.25907695293426514, "learning_rate": 3.877380050327741e-06, "loss": 0.27909421920776367, "step": 13315, "token_acc": 0.8988572817894481 }, { "epoch": 0.7185021313332973, "grad_norm": 0.3655286729335785, "learning_rate": 3.875998425084893e-06, "loss": 0.3345678150653839, "step": 13316, "token_acc": 0.8759368154041277 }, { "epoch": 0.7185560891382938, "grad_norm": 0.4424092471599579, "learning_rate": 3.8746169868674495e-06, "loss": 0.3549883961677551, "step": 13317, "token_acc": 0.8744728355246837 }, { "epoch": 0.7186100469432903, "grad_norm": 0.49339693784713745, "learning_rate": 3.873235735717597e-06, "loss": 0.3786439299583435, "step": 13318, "token_acc": 0.86803200868032 }, { "epoch": 0.7186640047482868, "grad_norm": 0.42473798990249634, "learning_rate": 3.871854671677523e-06, "loss": 0.35739079117774963, "step": 13319, "token_acc": 0.8744613616776789 }, { "epoch": 0.7187179625532834, "grad_norm": 0.3761056661605835, "learning_rate": 3.8704737947894e-06, "loss": 0.295920193195343, "step": 13320, "token_acc": 0.8905683192261185 }, { "epoch": 0.7187719203582799, "grad_norm": 0.3956712782382965, "learning_rate": 3.869093105095399e-06, "loss": 0.33584368228912354, "step": 13321, "token_acc": 0.8832126398946676 }, { "epoch": 0.7188258781632764, "grad_norm": 0.4214533567428589, "learning_rate": 3.86771260263769e-06, "loss": 0.3043665289878845, "step": 13322, "token_acc": 0.8844052863436124 }, { "epoch": 0.7188798359682728, "grad_norm": 0.3079968988895416, "learning_rate": 3.866332287458433e-06, "loss": 0.343910813331604, "step": 13323, "token_acc": 0.8809830310122879 }, { "epoch": 0.7189337937732693, "grad_norm": 0.5604569911956787, "learning_rate": 3.8649521595997815e-06, "loss": 0.3437783718109131, "step": 13324, "token_acc": 0.8781979082864039 }, { "epoch": 0.7189877515782658, "grad_norm": 0.38432881236076355, "learning_rate": 3.863572219103887e-06, "loss": 0.31368157267570496, "step": 13325, "token_acc": 0.8843526350032531 }, { "epoch": 0.7190417093832623, "grad_norm": 0.3261500597000122, "learning_rate": 3.862192466012886e-06, "loss": 0.34074103832244873, "step": 13326, "token_acc": 0.8769254232118346 }, { "epoch": 0.7190956671882588, "grad_norm": 0.4416317939758301, "learning_rate": 3.860812900368919e-06, "loss": 0.32657793164253235, "step": 13327, "token_acc": 0.8834 }, { "epoch": 0.7191496249932553, "grad_norm": 0.4944702088832855, "learning_rate": 3.859433522214115e-06, "loss": 0.38409119844436646, "step": 13328, "token_acc": 0.8654715762273901 }, { "epoch": 0.7192035827982518, "grad_norm": 0.5382798314094543, "learning_rate": 3.858054331590605e-06, "loss": 0.304355263710022, "step": 13329, "token_acc": 0.8843347639484979 }, { "epoch": 0.7192575406032483, "grad_norm": 0.4387791156768799, "learning_rate": 3.856675328540508e-06, "loss": 0.31313398480415344, "step": 13330, "token_acc": 0.8906557625321824 }, { "epoch": 0.7193114984082447, "grad_norm": 0.34130457043647766, "learning_rate": 3.855296513105936e-06, "loss": 0.3630065321922302, "step": 13331, "token_acc": 0.8663210516571005 }, { "epoch": 0.7193654562132412, "grad_norm": 0.38438737392425537, "learning_rate": 3.8539178853289995e-06, "loss": 0.28104346990585327, "step": 13332, "token_acc": 0.8963838664812239 }, { "epoch": 0.7194194140182377, "grad_norm": 0.3823124170303345, "learning_rate": 3.8525394452518e-06, "loss": 0.3287944793701172, "step": 13333, "token_acc": 0.8820286659316428 }, { "epoch": 0.7194733718232342, "grad_norm": 0.40654295682907104, "learning_rate": 3.851161192916432e-06, "loss": 0.38054245710372925, "step": 13334, "token_acc": 0.86761980407731 }, { "epoch": 0.7195273296282307, "grad_norm": 0.37040191888809204, "learning_rate": 3.849783128364995e-06, "loss": 0.30535537004470825, "step": 13335, "token_acc": 0.8892112016066049 }, { "epoch": 0.7195812874332272, "grad_norm": 0.4842507541179657, "learning_rate": 3.848405251639568e-06, "loss": 0.32128632068634033, "step": 13336, "token_acc": 0.8800939124601711 }, { "epoch": 0.7196352452382238, "grad_norm": 0.4730783998966217, "learning_rate": 3.847027562782235e-06, "loss": 0.2935832142829895, "step": 13337, "token_acc": 0.8927872308387291 }, { "epoch": 0.7196892030432201, "grad_norm": 0.44510841369628906, "learning_rate": 3.845650061835068e-06, "loss": 0.3421742022037506, "step": 13338, "token_acc": 0.8818316100443131 }, { "epoch": 0.7197431608482167, "grad_norm": 0.45951613783836365, "learning_rate": 3.844272748840137e-06, "loss": 0.3215562105178833, "step": 13339, "token_acc": 0.8838568298027758 }, { "epoch": 0.7197971186532132, "grad_norm": 0.4641905128955841, "learning_rate": 3.842895623839502e-06, "loss": 0.3517412841320038, "step": 13340, "token_acc": 0.8727165586328816 }, { "epoch": 0.7198510764582097, "grad_norm": 0.4937197268009186, "learning_rate": 3.841518686875222e-06, "loss": 0.3371373414993286, "step": 13341, "token_acc": 0.8767149390243902 }, { "epoch": 0.7199050342632062, "grad_norm": 0.3961406648159027, "learning_rate": 3.840141937989348e-06, "loss": 0.3946816623210907, "step": 13342, "token_acc": 0.8686173264486517 }, { "epoch": 0.7199589920682027, "grad_norm": 0.4172351062297821, "learning_rate": 3.8387653772239255e-06, "loss": 0.3806789517402649, "step": 13343, "token_acc": 0.8720785678766783 }, { "epoch": 0.7200129498731992, "grad_norm": 0.435058057308197, "learning_rate": 3.837389004620995e-06, "loss": 0.314802885055542, "step": 13344, "token_acc": 0.882396449704142 }, { "epoch": 0.7200669076781957, "grad_norm": 0.38853517174720764, "learning_rate": 3.836012820222589e-06, "loss": 0.3182488679885864, "step": 13345, "token_acc": 0.8854166666666666 }, { "epoch": 0.7201208654831921, "grad_norm": 0.3850747048854828, "learning_rate": 3.834636824070738e-06, "loss": 0.34018442034721375, "step": 13346, "token_acc": 0.8734742092138076 }, { "epoch": 0.7201748232881886, "grad_norm": 0.3921003043651581, "learning_rate": 3.833261016207458e-06, "loss": 0.31221258640289307, "step": 13347, "token_acc": 0.8892872508246092 }, { "epoch": 0.7202287810931851, "grad_norm": 0.4333342909812927, "learning_rate": 3.831885396674776e-06, "loss": 0.33616918325424194, "step": 13348, "token_acc": 0.877915126800051 }, { "epoch": 0.7202827388981816, "grad_norm": 0.5308000445365906, "learning_rate": 3.8305099655146975e-06, "loss": 0.35926008224487305, "step": 13349, "token_acc": 0.8728995352163031 }, { "epoch": 0.7203366967031781, "grad_norm": 0.4528455436229706, "learning_rate": 3.829134722769229e-06, "loss": 0.28852787613868713, "step": 13350, "token_acc": 0.8999479798855557 }, { "epoch": 0.7203906545081746, "grad_norm": 0.4555613100528717, "learning_rate": 3.827759668480368e-06, "loss": 0.3973638415336609, "step": 13351, "token_acc": 0.8610649980845358 }, { "epoch": 0.7204446123131711, "grad_norm": 0.36813265085220337, "learning_rate": 3.826384802690112e-06, "loss": 0.41340088844299316, "step": 13352, "token_acc": 0.8605108055009824 }, { "epoch": 0.7204985701181676, "grad_norm": 0.43203625082969666, "learning_rate": 3.825010125440446e-06, "loss": 0.35980546474456787, "step": 13353, "token_acc": 0.8749536178107606 }, { "epoch": 0.720552527923164, "grad_norm": 0.3674999177455902, "learning_rate": 3.82363563677335e-06, "loss": 0.32082000374794006, "step": 13354, "token_acc": 0.8837975441749026 }, { "epoch": 0.7206064857281606, "grad_norm": 0.39886826276779175, "learning_rate": 3.822261336730806e-06, "loss": 0.32114654779434204, "step": 13355, "token_acc": 0.8890779176590677 }, { "epoch": 0.7206604435331571, "grad_norm": 0.39288321137428284, "learning_rate": 3.820887225354784e-06, "loss": 0.3466736376285553, "step": 13356, "token_acc": 0.8841234840132304 }, { "epoch": 0.7207144013381536, "grad_norm": 0.3119768798351288, "learning_rate": 3.819513302687246e-06, "loss": 0.3152329921722412, "step": 13357, "token_acc": 0.8840194264569843 }, { "epoch": 0.7207683591431501, "grad_norm": 0.288539856672287, "learning_rate": 3.818139568770153e-06, "loss": 0.33848443627357483, "step": 13358, "token_acc": 0.8829018267840715 }, { "epoch": 0.7208223169481466, "grad_norm": 0.3581247329711914, "learning_rate": 3.816766023645458e-06, "loss": 0.3023475110530853, "step": 13359, "token_acc": 0.8915688141330794 }, { "epoch": 0.7208762747531431, "grad_norm": 0.4151468873023987, "learning_rate": 3.81539266735511e-06, "loss": 0.38363051414489746, "step": 13360, "token_acc": 0.8673212788887559 }, { "epoch": 0.7209302325581395, "grad_norm": 0.4209006726741791, "learning_rate": 3.8140194999410485e-06, "loss": 0.37458887696266174, "step": 13361, "token_acc": 0.8701954397394137 }, { "epoch": 0.720984190363136, "grad_norm": 0.44957441091537476, "learning_rate": 3.8126465214452113e-06, "loss": 0.34999915957450867, "step": 13362, "token_acc": 0.8733373639661427 }, { "epoch": 0.7210381481681325, "grad_norm": 0.49833953380584717, "learning_rate": 3.8112737319095295e-06, "loss": 0.3662468194961548, "step": 13363, "token_acc": 0.873580313683072 }, { "epoch": 0.721092105973129, "grad_norm": 0.3787069618701935, "learning_rate": 3.8099011313759248e-06, "loss": 0.3184371590614319, "step": 13364, "token_acc": 0.882580418351204 }, { "epoch": 0.7211460637781255, "grad_norm": 0.42491039633750916, "learning_rate": 3.8085287198863197e-06, "loss": 0.3020581007003784, "step": 13365, "token_acc": 0.8882435409573403 }, { "epoch": 0.721200021583122, "grad_norm": 0.5215707421302795, "learning_rate": 3.8071564974826214e-06, "loss": 0.31046009063720703, "step": 13366, "token_acc": 0.888354898336414 }, { "epoch": 0.7212539793881185, "grad_norm": 0.48744264245033264, "learning_rate": 3.8057844642067442e-06, "loss": 0.330569326877594, "step": 13367, "token_acc": 0.8856037151702786 }, { "epoch": 0.721307937193115, "grad_norm": 0.4592505097389221, "learning_rate": 3.804412620100588e-06, "loss": 0.32644373178482056, "step": 13368, "token_acc": 0.8885509838998211 }, { "epoch": 0.7213618949981114, "grad_norm": 0.31916823983192444, "learning_rate": 3.8030409652060464e-06, "loss": 0.38325560092926025, "step": 13369, "token_acc": 0.8656977526577849 }, { "epoch": 0.7214158528031079, "grad_norm": 0.40743184089660645, "learning_rate": 3.8016694995650103e-06, "loss": 0.36038607358932495, "step": 13370, "token_acc": 0.8743341073623822 }, { "epoch": 0.7214698106081044, "grad_norm": 0.41558313369750977, "learning_rate": 3.8002982232193653e-06, "loss": 0.3860054016113281, "step": 13371, "token_acc": 0.8667153107138514 }, { "epoch": 0.721523768413101, "grad_norm": 0.4438338875770569, "learning_rate": 3.798927136210987e-06, "loss": 0.3464597165584564, "step": 13372, "token_acc": 0.8789629861002655 }, { "epoch": 0.7215777262180975, "grad_norm": 0.3322874903678894, "learning_rate": 3.79755623858175e-06, "loss": 0.34240129590034485, "step": 13373, "token_acc": 0.8811464276011953 }, { "epoch": 0.721631684023094, "grad_norm": 0.43269869685173035, "learning_rate": 3.7961855303735208e-06, "loss": 0.36750519275665283, "step": 13374, "token_acc": 0.8717368140649974 }, { "epoch": 0.7216856418280905, "grad_norm": 0.4997323453426361, "learning_rate": 3.79481501162816e-06, "loss": 0.31222957372665405, "step": 13375, "token_acc": 0.8927685218007798 }, { "epoch": 0.721739599633087, "grad_norm": 0.3129415214061737, "learning_rate": 3.7934446823875236e-06, "loss": 0.38691991567611694, "step": 13376, "token_acc": 0.8704111175448755 }, { "epoch": 0.7217935574380834, "grad_norm": 0.34590789675712585, "learning_rate": 3.7920745426934602e-06, "loss": 0.28711897134780884, "step": 13377, "token_acc": 0.8953168044077136 }, { "epoch": 0.7218475152430799, "grad_norm": 0.4961603879928589, "learning_rate": 3.790704592587815e-06, "loss": 0.3283877968788147, "step": 13378, "token_acc": 0.8796223446105429 }, { "epoch": 0.7219014730480764, "grad_norm": 0.4973924458026886, "learning_rate": 3.7893348321124203e-06, "loss": 0.35857388377189636, "step": 13379, "token_acc": 0.8768382352941176 }, { "epoch": 0.7219554308530729, "grad_norm": 0.356187105178833, "learning_rate": 3.7879652613091167e-06, "loss": 0.353340744972229, "step": 13380, "token_acc": 0.8769505127061971 }, { "epoch": 0.7220093886580694, "grad_norm": 0.4475288391113281, "learning_rate": 3.786595880219727e-06, "loss": 0.34362754225730896, "step": 13381, "token_acc": 0.8720723631077371 }, { "epoch": 0.7220633464630659, "grad_norm": 0.4500792920589447, "learning_rate": 3.785226688886071e-06, "loss": 0.29660743474960327, "step": 13382, "token_acc": 0.8876028447915214 }, { "epoch": 0.7221173042680624, "grad_norm": 0.46344393491744995, "learning_rate": 3.783857687349964e-06, "loss": 0.36416035890579224, "step": 13383, "token_acc": 0.8672731031631177 }, { "epoch": 0.7221712620730588, "grad_norm": 0.3114002048969269, "learning_rate": 3.782488875653215e-06, "loss": 0.244217649102211, "step": 13384, "token_acc": 0.9106653196709482 }, { "epoch": 0.7222252198780553, "grad_norm": 0.5237839818000793, "learning_rate": 3.781120253837627e-06, "loss": 0.3093670904636383, "step": 13385, "token_acc": 0.8887590103253458 }, { "epoch": 0.7222791776830518, "grad_norm": 0.39373719692230225, "learning_rate": 3.7797518219449934e-06, "loss": 0.35258162021636963, "step": 13386, "token_acc": 0.8740977399124364 }, { "epoch": 0.7223331354880483, "grad_norm": 0.4300439655780792, "learning_rate": 3.778383580017114e-06, "loss": 0.3875078558921814, "step": 13387, "token_acc": 0.8720268495549395 }, { "epoch": 0.7223870932930448, "grad_norm": 0.4262567460536957, "learning_rate": 3.7770155280957698e-06, "loss": 0.31091243028640747, "step": 13388, "token_acc": 0.885334501898919 }, { "epoch": 0.7224410510980414, "grad_norm": 0.4490431547164917, "learning_rate": 3.7756476662227415e-06, "loss": 0.3684465289115906, "step": 13389, "token_acc": 0.8701531443466928 }, { "epoch": 0.7224950089030379, "grad_norm": 0.2760699987411499, "learning_rate": 3.7742799944398025e-06, "loss": 0.2910223603248596, "step": 13390, "token_acc": 0.8940625430500069 }, { "epoch": 0.7225489667080344, "grad_norm": 0.40374651551246643, "learning_rate": 3.772912512788721e-06, "loss": 0.3152284026145935, "step": 13391, "token_acc": 0.8894387105895659 }, { "epoch": 0.7226029245130308, "grad_norm": 0.3799360394477844, "learning_rate": 3.7715452213112603e-06, "loss": 0.37720978260040283, "step": 13392, "token_acc": 0.8709775349119612 }, { "epoch": 0.7226568823180273, "grad_norm": 0.33510908484458923, "learning_rate": 3.770178120049177e-06, "loss": 0.3280397057533264, "step": 13393, "token_acc": 0.8815025171033949 }, { "epoch": 0.7227108401230238, "grad_norm": 0.40192317962646484, "learning_rate": 3.768811209044223e-06, "loss": 0.33269262313842773, "step": 13394, "token_acc": 0.8812217194570136 }, { "epoch": 0.7227647979280203, "grad_norm": 0.45029935240745544, "learning_rate": 3.767444488338141e-06, "loss": 0.329125314950943, "step": 13395, "token_acc": 0.8778123918310834 }, { "epoch": 0.7228187557330168, "grad_norm": 0.3992771506309509, "learning_rate": 3.7660779579726723e-06, "loss": 0.3282471299171448, "step": 13396, "token_acc": 0.8842819994221324 }, { "epoch": 0.7228727135380133, "grad_norm": 0.43887028098106384, "learning_rate": 3.7647116179895495e-06, "loss": 0.3464289605617523, "step": 13397, "token_acc": 0.8795747290942547 }, { "epoch": 0.7229266713430098, "grad_norm": 0.5147687792778015, "learning_rate": 3.7633454684305005e-06, "loss": 0.34762632846832275, "step": 13398, "token_acc": 0.8828112050389524 }, { "epoch": 0.7229806291480062, "grad_norm": 0.3938673436641693, "learning_rate": 3.7619795093372435e-06, "loss": 0.38833221793174744, "step": 13399, "token_acc": 0.8641158591096013 }, { "epoch": 0.7230345869530027, "grad_norm": 0.5005287528038025, "learning_rate": 3.760613740751502e-06, "loss": 0.3642690181732178, "step": 13400, "token_acc": 0.8694817658349329 }, { "epoch": 0.7230885447579992, "grad_norm": 0.41591909527778625, "learning_rate": 3.7592481627149826e-06, "loss": 0.25658735632896423, "step": 13401, "token_acc": 0.9038294168842471 }, { "epoch": 0.7231425025629957, "grad_norm": 0.41861793398857117, "learning_rate": 3.757882775269388e-06, "loss": 0.3392098546028137, "step": 13402, "token_acc": 0.8768203883495146 }, { "epoch": 0.7231964603679922, "grad_norm": 0.41029947996139526, "learning_rate": 3.756517578456419e-06, "loss": 0.3271721303462982, "step": 13403, "token_acc": 0.8838573792688431 }, { "epoch": 0.7232504181729887, "grad_norm": 0.5479055047035217, "learning_rate": 3.7551525723177686e-06, "loss": 0.3194277584552765, "step": 13404, "token_acc": 0.8797008547008547 }, { "epoch": 0.7233043759779852, "grad_norm": 0.39911437034606934, "learning_rate": 3.7537877568951177e-06, "loss": 0.3144160509109497, "step": 13405, "token_acc": 0.8878817459210709 }, { "epoch": 0.7233583337829818, "grad_norm": 0.37942492961883545, "learning_rate": 3.7524231322301574e-06, "loss": 0.2841877341270447, "step": 13406, "token_acc": 0.8965774735532047 }, { "epoch": 0.7234122915879782, "grad_norm": 0.470803439617157, "learning_rate": 3.7510586983645603e-06, "loss": 0.38260623812675476, "step": 13407, "token_acc": 0.8648879767402087 }, { "epoch": 0.7234662493929747, "grad_norm": 0.4579288959503174, "learning_rate": 3.7496944553399907e-06, "loss": 0.376345694065094, "step": 13408, "token_acc": 0.8678867600800686 }, { "epoch": 0.7235202071979712, "grad_norm": 0.4141060411930084, "learning_rate": 3.748330403198115e-06, "loss": 0.3561874330043793, "step": 13409, "token_acc": 0.8755399136138218 }, { "epoch": 0.7235741650029677, "grad_norm": 0.3194204568862915, "learning_rate": 3.74696654198059e-06, "loss": 0.30812591314315796, "step": 13410, "token_acc": 0.8869704236610712 }, { "epoch": 0.7236281228079642, "grad_norm": 0.44469335675239563, "learning_rate": 3.745602871729066e-06, "loss": 0.3372647762298584, "step": 13411, "token_acc": 0.8783170631101334 }, { "epoch": 0.7236820806129607, "grad_norm": 0.3246121406555176, "learning_rate": 3.744239392485196e-06, "loss": 0.3158581256866455, "step": 13412, "token_acc": 0.8851706036745407 }, { "epoch": 0.7237360384179572, "grad_norm": 0.4486202299594879, "learning_rate": 3.742876104290616e-06, "loss": 0.31701380014419556, "step": 13413, "token_acc": 0.8852953859422165 }, { "epoch": 0.7237899962229537, "grad_norm": 0.3965248167514801, "learning_rate": 3.74151300718696e-06, "loss": 0.3164534568786621, "step": 13414, "token_acc": 0.888066750629723 }, { "epoch": 0.7238439540279501, "grad_norm": 0.36172714829444885, "learning_rate": 3.740150101215857e-06, "loss": 0.3014654815196991, "step": 13415, "token_acc": 0.8893798792163794 }, { "epoch": 0.7238979118329466, "grad_norm": 0.5354966521263123, "learning_rate": 3.7387873864189305e-06, "loss": 0.37342798709869385, "step": 13416, "token_acc": 0.8698779060043486 }, { "epoch": 0.7239518696379431, "grad_norm": 0.39657095074653625, "learning_rate": 3.7374248628377975e-06, "loss": 0.32040148973464966, "step": 13417, "token_acc": 0.888101195440645 }, { "epoch": 0.7240058274429396, "grad_norm": 0.3298472762107849, "learning_rate": 3.736062530514065e-06, "loss": 0.33674323558807373, "step": 13418, "token_acc": 0.8774539877300613 }, { "epoch": 0.7240597852479361, "grad_norm": 0.49204397201538086, "learning_rate": 3.7347003894893454e-06, "loss": 0.36923280358314514, "step": 13419, "token_acc": 0.8707534349992451 }, { "epoch": 0.7241137430529326, "grad_norm": 0.37180495262145996, "learning_rate": 3.733338439805234e-06, "loss": 0.34950196743011475, "step": 13420, "token_acc": 0.8768651513038628 }, { "epoch": 0.7241677008579291, "grad_norm": 0.45396319031715393, "learning_rate": 3.731976681503325e-06, "loss": 0.36852002143859863, "step": 13421, "token_acc": 0.8677960471682444 }, { "epoch": 0.7242216586629255, "grad_norm": 0.41502708196640015, "learning_rate": 3.7306151146252068e-06, "loss": 0.3140481114387512, "step": 13422, "token_acc": 0.8905419766206164 }, { "epoch": 0.724275616467922, "grad_norm": 0.38408127427101135, "learning_rate": 3.7292537392124605e-06, "loss": 0.3545609712600708, "step": 13423, "token_acc": 0.8742505266569438 }, { "epoch": 0.7243295742729186, "grad_norm": 0.3932609558105469, "learning_rate": 3.7278925553066625e-06, "loss": 0.3128988742828369, "step": 13424, "token_acc": 0.8921632546616247 }, { "epoch": 0.7243835320779151, "grad_norm": 0.5011353492736816, "learning_rate": 3.7265315629493814e-06, "loss": 0.38514551520347595, "step": 13425, "token_acc": 0.8618287373004354 }, { "epoch": 0.7244374898829116, "grad_norm": 0.3655851483345032, "learning_rate": 3.725170762182184e-06, "loss": 0.2745184302330017, "step": 13426, "token_acc": 0.8966584158415841 }, { "epoch": 0.7244914476879081, "grad_norm": 0.5133529305458069, "learning_rate": 3.7238101530466284e-06, "loss": 0.4173732101917267, "step": 13427, "token_acc": 0.8544115564367654 }, { "epoch": 0.7245454054929046, "grad_norm": 0.40576791763305664, "learning_rate": 3.7224497355842658e-06, "loss": 0.2831317186355591, "step": 13428, "token_acc": 0.8951003669328729 }, { "epoch": 0.7245993632979011, "grad_norm": 0.4981568157672882, "learning_rate": 3.721089509836644e-06, "loss": 0.35665589570999146, "step": 13429, "token_acc": 0.8784436239704629 }, { "epoch": 0.7246533211028975, "grad_norm": 0.36352092027664185, "learning_rate": 3.719729475845303e-06, "loss": 0.3170282542705536, "step": 13430, "token_acc": 0.8851860053144376 }, { "epoch": 0.724707278907894, "grad_norm": 0.4413457214832306, "learning_rate": 3.718369633651776e-06, "loss": 0.34672367572784424, "step": 13431, "token_acc": 0.8769934821800027 }, { "epoch": 0.7247612367128905, "grad_norm": 0.4443356394767761, "learning_rate": 3.7170099832975983e-06, "loss": 0.32502254843711853, "step": 13432, "token_acc": 0.8855325914149443 }, { "epoch": 0.724815194517887, "grad_norm": 0.4320288896560669, "learning_rate": 3.715650524824289e-06, "loss": 0.2976764142513275, "step": 13433, "token_acc": 0.8933939182104159 }, { "epoch": 0.7248691523228835, "grad_norm": 0.4237799644470215, "learning_rate": 3.714291258273367e-06, "loss": 0.3259900212287903, "step": 13434, "token_acc": 0.8845309928688975 }, { "epoch": 0.72492311012788, "grad_norm": 0.3920452892780304, "learning_rate": 3.712932183686343e-06, "loss": 0.3500882089138031, "step": 13435, "token_acc": 0.8788828337874659 }, { "epoch": 0.7249770679328765, "grad_norm": 0.5395898818969727, "learning_rate": 3.711573301104724e-06, "loss": 0.39093542098999023, "step": 13436, "token_acc": 0.8634721131186175 }, { "epoch": 0.725031025737873, "grad_norm": 0.36567825078964233, "learning_rate": 3.7102146105700044e-06, "loss": 0.32262536883354187, "step": 13437, "token_acc": 0.885274776890559 }, { "epoch": 0.7250849835428694, "grad_norm": 0.3612493574619293, "learning_rate": 3.7088561121236878e-06, "loss": 0.29290640354156494, "step": 13438, "token_acc": 0.8943683883011824 }, { "epoch": 0.7251389413478659, "grad_norm": 0.3639563322067261, "learning_rate": 3.7074978058072563e-06, "loss": 0.2807137370109558, "step": 13439, "token_acc": 0.9001452912429005 }, { "epoch": 0.7251928991528624, "grad_norm": 0.38667067885398865, "learning_rate": 3.706139691662194e-06, "loss": 0.3597598671913147, "step": 13440, "token_acc": 0.8734356295570791 }, { "epoch": 0.725246856957859, "grad_norm": 0.36982181668281555, "learning_rate": 3.704781769729976e-06, "loss": 0.2992977499961853, "step": 13441, "token_acc": 0.8985210466439135 }, { "epoch": 0.7253008147628555, "grad_norm": 0.2346966713666916, "learning_rate": 3.7034240400520772e-06, "loss": 0.2546195983886719, "step": 13442, "token_acc": 0.9055655758825045 }, { "epoch": 0.725354772567852, "grad_norm": 0.35670655965805054, "learning_rate": 3.7020665026699555e-06, "loss": 0.2968338131904602, "step": 13443, "token_acc": 0.8913551401869159 }, { "epoch": 0.7254087303728485, "grad_norm": 0.43457189202308655, "learning_rate": 3.7007091576250697e-06, "loss": 0.32570505142211914, "step": 13444, "token_acc": 0.8871298739372618 }, { "epoch": 0.7254626881778449, "grad_norm": 0.5282579064369202, "learning_rate": 3.6993520049588795e-06, "loss": 0.40545225143432617, "step": 13445, "token_acc": 0.8612767014406358 }, { "epoch": 0.7255166459828414, "grad_norm": 0.4677165448665619, "learning_rate": 3.697995044712829e-06, "loss": 0.37802445888519287, "step": 13446, "token_acc": 0.8674089068825911 }, { "epoch": 0.7255706037878379, "grad_norm": 0.44233325123786926, "learning_rate": 3.696638276928358e-06, "loss": 0.3598954379558563, "step": 13447, "token_acc": 0.8666666666666667 }, { "epoch": 0.7256245615928344, "grad_norm": 0.4244402050971985, "learning_rate": 3.6952817016469032e-06, "loss": 0.36575907468795776, "step": 13448, "token_acc": 0.8692102137767221 }, { "epoch": 0.7256785193978309, "grad_norm": 0.31494829058647156, "learning_rate": 3.6939253189098934e-06, "loss": 0.2953861355781555, "step": 13449, "token_acc": 0.8924475668974068 }, { "epoch": 0.7257324772028274, "grad_norm": 0.38752877712249756, "learning_rate": 3.6925691287587495e-06, "loss": 0.37464049458503723, "step": 13450, "token_acc": 0.8688487067978943 }, { "epoch": 0.7257864350078239, "grad_norm": 0.3531007766723633, "learning_rate": 3.6912131312348943e-06, "loss": 0.3201327621936798, "step": 13451, "token_acc": 0.8855633802816901 }, { "epoch": 0.7258403928128204, "grad_norm": 0.4505577087402344, "learning_rate": 3.689857326379739e-06, "loss": 0.35858994722366333, "step": 13452, "token_acc": 0.874814113829931 }, { "epoch": 0.7258943506178168, "grad_norm": 0.34929367899894714, "learning_rate": 3.688501714234687e-06, "loss": 0.3336648643016815, "step": 13453, "token_acc": 0.8830615721586761 }, { "epoch": 0.7259483084228133, "grad_norm": 0.3239114284515381, "learning_rate": 3.6871462948411387e-06, "loss": 0.34007900953292847, "step": 13454, "token_acc": 0.8814814814814815 }, { "epoch": 0.7260022662278098, "grad_norm": 0.3778265714645386, "learning_rate": 3.6857910682404897e-06, "loss": 0.31912779808044434, "step": 13455, "token_acc": 0.8834863571705677 }, { "epoch": 0.7260562240328063, "grad_norm": 0.3651067018508911, "learning_rate": 3.684436034474127e-06, "loss": 0.3157283067703247, "step": 13456, "token_acc": 0.8878908470722001 }, { "epoch": 0.7261101818378028, "grad_norm": 0.26158979535102844, "learning_rate": 3.6830811935834344e-06, "loss": 0.28072699904441833, "step": 13457, "token_acc": 0.896476092996052 }, { "epoch": 0.7261641396427994, "grad_norm": 0.4148240089416504, "learning_rate": 3.681726545609786e-06, "loss": 0.3193403482437134, "step": 13458, "token_acc": 0.8867816091954023 }, { "epoch": 0.7262180974477959, "grad_norm": 0.4119129478931427, "learning_rate": 3.6803720905945537e-06, "loss": 0.3241007924079895, "step": 13459, "token_acc": 0.8785243372141955 }, { "epoch": 0.7262720552527924, "grad_norm": 0.37849703431129456, "learning_rate": 3.6790178285791034e-06, "loss": 0.36785972118377686, "step": 13460, "token_acc": 0.8731932647891522 }, { "epoch": 0.7263260130577888, "grad_norm": 0.4906582534313202, "learning_rate": 3.6776637596047916e-06, "loss": 0.39757901430130005, "step": 13461, "token_acc": 0.8630396963466709 }, { "epoch": 0.7263799708627853, "grad_norm": 0.35746628046035767, "learning_rate": 3.676309883712973e-06, "loss": 0.36061570048332214, "step": 13462, "token_acc": 0.8765593561368209 }, { "epoch": 0.7264339286677818, "grad_norm": 0.3526298701763153, "learning_rate": 3.67495620094499e-06, "loss": 0.34945565462112427, "step": 13463, "token_acc": 0.8746419545071609 }, { "epoch": 0.7264878864727783, "grad_norm": 0.3994464874267578, "learning_rate": 3.6736027113421925e-06, "loss": 0.3305662274360657, "step": 13464, "token_acc": 0.8833922261484098 }, { "epoch": 0.7265418442777748, "grad_norm": 0.38945308327674866, "learning_rate": 3.6722494149459097e-06, "loss": 0.34070348739624023, "step": 13465, "token_acc": 0.8755797217335679 }, { "epoch": 0.7265958020827713, "grad_norm": 0.42973026633262634, "learning_rate": 3.6708963117974715e-06, "loss": 0.26947563886642456, "step": 13466, "token_acc": 0.9000454338936847 }, { "epoch": 0.7266497598877678, "grad_norm": 0.27324607968330383, "learning_rate": 3.6695434019382037e-06, "loss": 0.4031694531440735, "step": 13467, "token_acc": 0.8634043497558811 }, { "epoch": 0.7267037176927642, "grad_norm": 0.3730730712413788, "learning_rate": 3.668190685409421e-06, "loss": 0.3567761182785034, "step": 13468, "token_acc": 0.8686030428769018 }, { "epoch": 0.7267576754977607, "grad_norm": 0.34376946091651917, "learning_rate": 3.666838162252433e-06, "loss": 0.2919228672981262, "step": 13469, "token_acc": 0.8929586359330597 }, { "epoch": 0.7268116333027572, "grad_norm": 0.3926534652709961, "learning_rate": 3.6654858325085507e-06, "loss": 0.38290441036224365, "step": 13470, "token_acc": 0.8681472278304084 }, { "epoch": 0.7268655911077537, "grad_norm": 0.4133564233779907, "learning_rate": 3.6641336962190733e-06, "loss": 0.308663934469223, "step": 13471, "token_acc": 0.8902586903799515 }, { "epoch": 0.7269195489127502, "grad_norm": 0.371198445558548, "learning_rate": 3.662781753425292e-06, "loss": 0.3686285614967346, "step": 13472, "token_acc": 0.8709222932697518 }, { "epoch": 0.7269735067177467, "grad_norm": 0.40743088722229004, "learning_rate": 3.661430004168497e-06, "loss": 0.36126837134361267, "step": 13473, "token_acc": 0.8689829895041622 }, { "epoch": 0.7270274645227432, "grad_norm": 0.45379403233528137, "learning_rate": 3.6600784484899676e-06, "loss": 0.326712965965271, "step": 13474, "token_acc": 0.8859941234084231 }, { "epoch": 0.7270814223277398, "grad_norm": 0.4934387505054474, "learning_rate": 3.6587270864309822e-06, "loss": 0.2881716191768646, "step": 13475, "token_acc": 0.894663167104112 }, { "epoch": 0.7271353801327362, "grad_norm": 0.46567806601524353, "learning_rate": 3.6573759180328107e-06, "loss": 0.3707331418991089, "step": 13476, "token_acc": 0.8703546580688953 }, { "epoch": 0.7271893379377327, "grad_norm": 0.4707600176334381, "learning_rate": 3.656024943336717e-06, "loss": 0.37991970777511597, "step": 13477, "token_acc": 0.8644101742439415 }, { "epoch": 0.7272432957427292, "grad_norm": 0.4748065173625946, "learning_rate": 3.65467416238396e-06, "loss": 0.35525181889533997, "step": 13478, "token_acc": 0.8780163088700282 }, { "epoch": 0.7272972535477257, "grad_norm": 0.45204728841781616, "learning_rate": 3.6533235752157913e-06, "loss": 0.3724963068962097, "step": 13479, "token_acc": 0.8712936889183369 }, { "epoch": 0.7273512113527222, "grad_norm": 0.3578954041004181, "learning_rate": 3.651973181873458e-06, "loss": 0.3292897939682007, "step": 13480, "token_acc": 0.8837631203835687 }, { "epoch": 0.7274051691577187, "grad_norm": 0.40819141268730164, "learning_rate": 3.6506229823982007e-06, "loss": 0.313889741897583, "step": 13481, "token_acc": 0.8899721448467967 }, { "epoch": 0.7274591269627152, "grad_norm": 0.3414784073829651, "learning_rate": 3.6492729768312508e-06, "loss": 0.3681858777999878, "step": 13482, "token_acc": 0.8670936749399519 }, { "epoch": 0.7275130847677117, "grad_norm": 0.4188057482242584, "learning_rate": 3.647923165213845e-06, "loss": 0.31647616624832153, "step": 13483, "token_acc": 0.8884161405435081 }, { "epoch": 0.7275670425727081, "grad_norm": 0.5295367240905762, "learning_rate": 3.6465735475872e-06, "loss": 0.3812655508518219, "step": 13484, "token_acc": 0.8622899705525723 }, { "epoch": 0.7276210003777046, "grad_norm": 0.4185744822025299, "learning_rate": 3.6452241239925356e-06, "loss": 0.34596771001815796, "step": 13485, "token_acc": 0.8765607712976135 }, { "epoch": 0.7276749581827011, "grad_norm": 0.3795110583305359, "learning_rate": 3.643874894471061e-06, "loss": 0.351390540599823, "step": 13486, "token_acc": 0.8762376237623762 }, { "epoch": 0.7277289159876976, "grad_norm": 0.4090180993080139, "learning_rate": 3.642525859063982e-06, "loss": 0.3398829698562622, "step": 13487, "token_acc": 0.8815439912486326 }, { "epoch": 0.7277828737926941, "grad_norm": 0.4002859890460968, "learning_rate": 3.6411770178124993e-06, "loss": 0.3764902651309967, "step": 13488, "token_acc": 0.8684525591140377 }, { "epoch": 0.7278368315976906, "grad_norm": 0.49300068616867065, "learning_rate": 3.6398283707578e-06, "loss": 0.38902515172958374, "step": 13489, "token_acc": 0.8653753026634382 }, { "epoch": 0.7278907894026871, "grad_norm": 0.4639199674129486, "learning_rate": 3.6384799179410835e-06, "loss": 0.3860652446746826, "step": 13490, "token_acc": 0.8678128523111612 }, { "epoch": 0.7279447472076835, "grad_norm": 0.46886616945266724, "learning_rate": 3.6371316594035198e-06, "loss": 0.3658515214920044, "step": 13491, "token_acc": 0.8720337749308488 }, { "epoch": 0.72799870501268, "grad_norm": 0.4018765389919281, "learning_rate": 3.6357835951862887e-06, "loss": 0.3150407075881958, "step": 13492, "token_acc": 0.8859712230215827 }, { "epoch": 0.7280526628176766, "grad_norm": 0.3963439464569092, "learning_rate": 3.6344357253305595e-06, "loss": 0.34837082028388977, "step": 13493, "token_acc": 0.8779787539477462 }, { "epoch": 0.7281066206226731, "grad_norm": 0.350492924451828, "learning_rate": 3.633088049877496e-06, "loss": 0.33706265687942505, "step": 13494, "token_acc": 0.8824212700051626 }, { "epoch": 0.7281605784276696, "grad_norm": 0.4457899034023285, "learning_rate": 3.6317405688682516e-06, "loss": 0.3629903793334961, "step": 13495, "token_acc": 0.8778662420382166 }, { "epoch": 0.7282145362326661, "grad_norm": 0.5329305529594421, "learning_rate": 3.630393282343986e-06, "loss": 0.36389485001564026, "step": 13496, "token_acc": 0.870064874884152 }, { "epoch": 0.7282684940376626, "grad_norm": 0.4456102252006531, "learning_rate": 3.6290461903458397e-06, "loss": 0.36786341667175293, "step": 13497, "token_acc": 0.875 }, { "epoch": 0.7283224518426591, "grad_norm": 0.2865435481071472, "learning_rate": 3.6276992929149544e-06, "loss": 0.3071989119052887, "step": 13498, "token_acc": 0.8906092224840121 }, { "epoch": 0.7283764096476555, "grad_norm": 0.4051245152950287, "learning_rate": 3.6263525900924633e-06, "loss": 0.3146083950996399, "step": 13499, "token_acc": 0.8875174958646138 }, { "epoch": 0.728430367452652, "grad_norm": 0.3481366038322449, "learning_rate": 3.6250060819194953e-06, "loss": 0.3463134169578552, "step": 13500, "token_acc": 0.8749260792430514 }, { "epoch": 0.7284843252576485, "grad_norm": 0.32134997844696045, "learning_rate": 3.623659768437171e-06, "loss": 0.34570255875587463, "step": 13501, "token_acc": 0.8767022062704528 }, { "epoch": 0.728538283062645, "grad_norm": 0.3632810413837433, "learning_rate": 3.622313649686603e-06, "loss": 0.3652091324329376, "step": 13502, "token_acc": 0.8749463288965221 }, { "epoch": 0.7285922408676415, "grad_norm": 0.38174504041671753, "learning_rate": 3.6209677257089093e-06, "loss": 0.28976696729660034, "step": 13503, "token_acc": 0.897069209039548 }, { "epoch": 0.728646198672638, "grad_norm": 0.4661511480808258, "learning_rate": 3.619621996545191e-06, "loss": 0.41388702392578125, "step": 13504, "token_acc": 0.8571942877715109 }, { "epoch": 0.7287001564776345, "grad_norm": 0.3819160461425781, "learning_rate": 3.6182764622365442e-06, "loss": 0.3201228380203247, "step": 13505, "token_acc": 0.8822346689645267 }, { "epoch": 0.728754114282631, "grad_norm": 0.3423864543437958, "learning_rate": 3.616931122824063e-06, "loss": 0.30509424209594727, "step": 13506, "token_acc": 0.8894409937888199 }, { "epoch": 0.7288080720876274, "grad_norm": 0.44478967785835266, "learning_rate": 3.6155859783488335e-06, "loss": 0.3101869821548462, "step": 13507, "token_acc": 0.8934725848563969 }, { "epoch": 0.7288620298926239, "grad_norm": 0.4458260238170624, "learning_rate": 3.6142410288519345e-06, "loss": 0.3440738916397095, "step": 13508, "token_acc": 0.8814663193152215 }, { "epoch": 0.7289159876976204, "grad_norm": 0.5138335227966309, "learning_rate": 3.612896274374443e-06, "loss": 0.32956719398498535, "step": 13509, "token_acc": 0.883289124668435 }, { "epoch": 0.728969945502617, "grad_norm": 0.4433155655860901, "learning_rate": 3.6115517149574253e-06, "loss": 0.36960816383361816, "step": 13510, "token_acc": 0.8668384879725086 }, { "epoch": 0.7290239033076135, "grad_norm": 0.4391997754573822, "learning_rate": 3.6102073506419455e-06, "loss": 0.2951677143573761, "step": 13511, "token_acc": 0.8923741007194245 }, { "epoch": 0.72907786111261, "grad_norm": 0.3606271743774414, "learning_rate": 3.608863181469059e-06, "loss": 0.3195018768310547, "step": 13512, "token_acc": 0.885309852875613 }, { "epoch": 0.7291318189176065, "grad_norm": 0.3804778456687927, "learning_rate": 3.607519207479816e-06, "loss": 0.339813768863678, "step": 13513, "token_acc": 0.8784479588083854 }, { "epoch": 0.7291857767226029, "grad_norm": 0.3959445655345917, "learning_rate": 3.6061754287152584e-06, "loss": 0.3077163100242615, "step": 13514, "token_acc": 0.8867170780425168 }, { "epoch": 0.7292397345275994, "grad_norm": 0.34781786799430847, "learning_rate": 3.604831845216432e-06, "loss": 0.3416324555873871, "step": 13515, "token_acc": 0.8792041207958792 }, { "epoch": 0.7292936923325959, "grad_norm": 0.43150946497917175, "learning_rate": 3.603488457024367e-06, "loss": 0.3282545208930969, "step": 13516, "token_acc": 0.8836945743023091 }, { "epoch": 0.7293476501375924, "grad_norm": 0.3934772312641144, "learning_rate": 3.602145264180087e-06, "loss": 0.3599421977996826, "step": 13517, "token_acc": 0.870610739172018 }, { "epoch": 0.7294016079425889, "grad_norm": 0.43820640444755554, "learning_rate": 3.600802266724616e-06, "loss": 0.31690293550491333, "step": 13518, "token_acc": 0.8803912137245471 }, { "epoch": 0.7294555657475854, "grad_norm": 0.5096561908721924, "learning_rate": 3.599459464698967e-06, "loss": 0.34515923261642456, "step": 13519, "token_acc": 0.8801428085685141 }, { "epoch": 0.7295095235525819, "grad_norm": 0.48671600222587585, "learning_rate": 3.5981168581441484e-06, "loss": 0.32273226976394653, "step": 13520, "token_acc": 0.880163599182004 }, { "epoch": 0.7295634813575784, "grad_norm": 0.3870090842247009, "learning_rate": 3.5967744471011613e-06, "loss": 0.3726552426815033, "step": 13521, "token_acc": 0.8729314420803782 }, { "epoch": 0.7296174391625748, "grad_norm": 0.30888304114341736, "learning_rate": 3.595432231611008e-06, "loss": 0.3230458199977875, "step": 13522, "token_acc": 0.8851540616246498 }, { "epoch": 0.7296713969675713, "grad_norm": 0.4290252923965454, "learning_rate": 3.594090211714677e-06, "loss": 0.34926626086235046, "step": 13523, "token_acc": 0.8741959611069559 }, { "epoch": 0.7297253547725678, "grad_norm": 0.4170243740081787, "learning_rate": 3.5927483874531565e-06, "loss": 0.3617628812789917, "step": 13524, "token_acc": 0.8747396194972921 }, { "epoch": 0.7297793125775643, "grad_norm": 0.41566064953804016, "learning_rate": 3.5914067588674174e-06, "loss": 0.33614951372146606, "step": 13525, "token_acc": 0.8800867244829886 }, { "epoch": 0.7298332703825609, "grad_norm": 0.41142264008522034, "learning_rate": 3.5900653259984376e-06, "loss": 0.36928656697273254, "step": 13526, "token_acc": 0.8681094324213965 }, { "epoch": 0.7298872281875574, "grad_norm": 0.33285096287727356, "learning_rate": 3.58872408888718e-06, "loss": 0.3320103883743286, "step": 13527, "token_acc": 0.8765818073544738 }, { "epoch": 0.7299411859925539, "grad_norm": 0.394145667552948, "learning_rate": 3.587383047574613e-06, "loss": 0.3359348177909851, "step": 13528, "token_acc": 0.8825337431511426 }, { "epoch": 0.7299951437975503, "grad_norm": 0.24529142677783966, "learning_rate": 3.5860422021016885e-06, "loss": 0.31460440158843994, "step": 13529, "token_acc": 0.8847404893225852 }, { "epoch": 0.7300491016025468, "grad_norm": 0.39406275749206543, "learning_rate": 3.584701552509353e-06, "loss": 0.3441148102283478, "step": 13530, "token_acc": 0.8746100133709701 }, { "epoch": 0.7301030594075433, "grad_norm": 0.36038780212402344, "learning_rate": 3.5833610988385537e-06, "loss": 0.31459230184555054, "step": 13531, "token_acc": 0.8837358684480987 }, { "epoch": 0.7301570172125398, "grad_norm": 0.5159715414047241, "learning_rate": 3.5820208411302236e-06, "loss": 0.32502132654190063, "step": 13532, "token_acc": 0.8823294774631033 }, { "epoch": 0.7302109750175363, "grad_norm": 0.38581767678260803, "learning_rate": 3.5806807794252975e-06, "loss": 0.30168068408966064, "step": 13533, "token_acc": 0.8915412558380903 }, { "epoch": 0.7302649328225328, "grad_norm": 0.47201254963874817, "learning_rate": 3.579340913764694e-06, "loss": 0.3462691009044647, "step": 13534, "token_acc": 0.8774551665243382 }, { "epoch": 0.7303188906275293, "grad_norm": 0.37866273522377014, "learning_rate": 3.578001244189341e-06, "loss": 0.342557430267334, "step": 13535, "token_acc": 0.8770196443101244 }, { "epoch": 0.7303728484325258, "grad_norm": 0.360986590385437, "learning_rate": 3.5766617707401476e-06, "loss": 0.3713377118110657, "step": 13536, "token_acc": 0.8711348551076574 }, { "epoch": 0.7304268062375222, "grad_norm": 0.38282009959220886, "learning_rate": 3.5753224934580213e-06, "loss": 0.31115663051605225, "step": 13537, "token_acc": 0.8860925225487344 }, { "epoch": 0.7304807640425187, "grad_norm": 0.27454671263694763, "learning_rate": 3.573983412383862e-06, "loss": 0.291126549243927, "step": 13538, "token_acc": 0.8968465045592705 }, { "epoch": 0.7305347218475152, "grad_norm": 0.38006672263145447, "learning_rate": 3.5726445275585663e-06, "loss": 0.27038776874542236, "step": 13539, "token_acc": 0.8999861668280537 }, { "epoch": 0.7305886796525117, "grad_norm": 0.39012521505355835, "learning_rate": 3.5713058390230237e-06, "loss": 0.31567129492759705, "step": 13540, "token_acc": 0.8858937050084239 }, { "epoch": 0.7306426374575082, "grad_norm": 0.3978780210018158, "learning_rate": 3.569967346818116e-06, "loss": 0.37376776337623596, "step": 13541, "token_acc": 0.869211849000107 }, { "epoch": 0.7306965952625047, "grad_norm": 0.31619518995285034, "learning_rate": 3.568629050984721e-06, "loss": 0.2445347160100937, "step": 13542, "token_acc": 0.9061882317165881 }, { "epoch": 0.7307505530675013, "grad_norm": 0.41512858867645264, "learning_rate": 3.567290951563711e-06, "loss": 0.27567917108535767, "step": 13543, "token_acc": 0.8954473646416774 }, { "epoch": 0.7308045108724978, "grad_norm": 0.47536754608154297, "learning_rate": 3.5659530485959493e-06, "loss": 0.27383172512054443, "step": 13544, "token_acc": 0.8961545606539104 }, { "epoch": 0.7308584686774942, "grad_norm": 0.3571774661540985, "learning_rate": 3.5646153421222964e-06, "loss": 0.3094307482242584, "step": 13545, "token_acc": 0.8918810289389068 }, { "epoch": 0.7309124264824907, "grad_norm": 0.48669207096099854, "learning_rate": 3.563277832183605e-06, "loss": 0.39573150873184204, "step": 13546, "token_acc": 0.861949305974653 }, { "epoch": 0.7309663842874872, "grad_norm": 0.48457443714141846, "learning_rate": 3.561940518820719e-06, "loss": 0.2688562273979187, "step": 13547, "token_acc": 0.8958752515090543 }, { "epoch": 0.7310203420924837, "grad_norm": 0.34680113196372986, "learning_rate": 3.560603402074486e-06, "loss": 0.317444771528244, "step": 13548, "token_acc": 0.8842608398770911 }, { "epoch": 0.7310742998974802, "grad_norm": 0.4264586865901947, "learning_rate": 3.5592664819857382e-06, "loss": 0.35214143991470337, "step": 13549, "token_acc": 0.8764625436863699 }, { "epoch": 0.7311282577024767, "grad_norm": 0.3439183831214905, "learning_rate": 3.5579297585953054e-06, "loss": 0.33703434467315674, "step": 13550, "token_acc": 0.8779440288168467 }, { "epoch": 0.7311822155074732, "grad_norm": 0.4612284302711487, "learning_rate": 3.55659323194401e-06, "loss": 0.3401127755641937, "step": 13551, "token_acc": 0.881752927842841 }, { "epoch": 0.7312361733124696, "grad_norm": 0.43440568447113037, "learning_rate": 3.555256902072669e-06, "loss": 0.3301125168800354, "step": 13552, "token_acc": 0.8805949593719873 }, { "epoch": 0.7312901311174661, "grad_norm": 0.5470329523086548, "learning_rate": 3.5539207690220913e-06, "loss": 0.3521726727485657, "step": 13553, "token_acc": 0.8735809120646527 }, { "epoch": 0.7313440889224626, "grad_norm": 0.3833197057247162, "learning_rate": 3.552584832833087e-06, "loss": 0.30025649070739746, "step": 13554, "token_acc": 0.8970976253298153 }, { "epoch": 0.7313980467274591, "grad_norm": 0.42470163106918335, "learning_rate": 3.551249093546454e-06, "loss": 0.310044527053833, "step": 13555, "token_acc": 0.89 }, { "epoch": 0.7314520045324556, "grad_norm": 0.42312493920326233, "learning_rate": 3.5499135512029836e-06, "loss": 0.33513200283050537, "step": 13556, "token_acc": 0.8808967772069126 }, { "epoch": 0.7315059623374521, "grad_norm": 0.4123179316520691, "learning_rate": 3.548578205843464e-06, "loss": 0.34667932987213135, "step": 13557, "token_acc": 0.8765632458233891 }, { "epoch": 0.7315599201424486, "grad_norm": 0.40912023186683655, "learning_rate": 3.547243057508676e-06, "loss": 0.3104941248893738, "step": 13558, "token_acc": 0.8919170021678539 }, { "epoch": 0.7316138779474451, "grad_norm": 0.4290373623371124, "learning_rate": 3.5459081062393975e-06, "loss": 0.3850218951702118, "step": 13559, "token_acc": 0.8661032028469751 }, { "epoch": 0.7316678357524415, "grad_norm": 0.46424204111099243, "learning_rate": 3.544573352076388e-06, "loss": 0.2934063673019409, "step": 13560, "token_acc": 0.8956377691882937 }, { "epoch": 0.731721793557438, "grad_norm": 0.49191486835479736, "learning_rate": 3.5432387950604206e-06, "loss": 0.3130250573158264, "step": 13561, "token_acc": 0.8890803697321009 }, { "epoch": 0.7317757513624346, "grad_norm": 0.36927086114883423, "learning_rate": 3.5419044352322497e-06, "loss": 0.347236692905426, "step": 13562, "token_acc": 0.8732014388489209 }, { "epoch": 0.7318297091674311, "grad_norm": 0.3224171996116638, "learning_rate": 3.5405702726326254e-06, "loss": 0.3462548851966858, "step": 13563, "token_acc": 0.8789949305708618 }, { "epoch": 0.7318836669724276, "grad_norm": 0.471523642539978, "learning_rate": 3.539236307302293e-06, "loss": 0.393917441368103, "step": 13564, "token_acc": 0.869327211725234 }, { "epoch": 0.7319376247774241, "grad_norm": 0.391852468252182, "learning_rate": 3.53790253928199e-06, "loss": 0.294360876083374, "step": 13565, "token_acc": 0.8905166981726528 }, { "epoch": 0.7319915825824206, "grad_norm": 0.4372503161430359, "learning_rate": 3.5365689686124483e-06, "loss": 0.3112106919288635, "step": 13566, "token_acc": 0.8873838399747992 }, { "epoch": 0.7320455403874171, "grad_norm": 0.4726165533065796, "learning_rate": 3.5352355953344e-06, "loss": 0.2907446026802063, "step": 13567, "token_acc": 0.8953384532952313 }, { "epoch": 0.7320994981924135, "grad_norm": 0.4418235719203949, "learning_rate": 3.533902419488564e-06, "loss": 0.31143462657928467, "step": 13568, "token_acc": 0.8831800973499189 }, { "epoch": 0.73215345599741, "grad_norm": 0.3910291790962219, "learning_rate": 3.532569441115653e-06, "loss": 0.30549097061157227, "step": 13569, "token_acc": 0.8911604135207342 }, { "epoch": 0.7322074138024065, "grad_norm": 0.4755678176879883, "learning_rate": 3.5312366602563776e-06, "loss": 0.33888739347457886, "step": 13570, "token_acc": 0.8806076854334227 }, { "epoch": 0.732261371607403, "grad_norm": 0.3588257133960724, "learning_rate": 3.52990407695144e-06, "loss": 0.3622797727584839, "step": 13571, "token_acc": 0.8718298336514863 }, { "epoch": 0.7323153294123995, "grad_norm": 0.4785357415676117, "learning_rate": 3.528571691241537e-06, "loss": 0.3763849437236786, "step": 13572, "token_acc": 0.8675269326388066 }, { "epoch": 0.732369287217396, "grad_norm": 0.3707003593444824, "learning_rate": 3.527239503167359e-06, "loss": 0.3740127384662628, "step": 13573, "token_acc": 0.870375 }, { "epoch": 0.7324232450223925, "grad_norm": 0.3608646094799042, "learning_rate": 3.525907512769592e-06, "loss": 0.31324344873428345, "step": 13574, "token_acc": 0.8899070727929789 }, { "epoch": 0.7324772028273889, "grad_norm": 0.3728480637073517, "learning_rate": 3.524575720088912e-06, "loss": 0.36575835943222046, "step": 13575, "token_acc": 0.8748830370271354 }, { "epoch": 0.7325311606323854, "grad_norm": 0.3865147531032562, "learning_rate": 3.523244125165994e-06, "loss": 0.3718276619911194, "step": 13576, "token_acc": 0.8667372881355933 }, { "epoch": 0.7325851184373819, "grad_norm": 0.3392735421657562, "learning_rate": 3.5219127280415035e-06, "loss": 0.30308055877685547, "step": 13577, "token_acc": 0.8911825017088175 }, { "epoch": 0.7326390762423785, "grad_norm": 0.4163143038749695, "learning_rate": 3.520581528756102e-06, "loss": 0.36376237869262695, "step": 13578, "token_acc": 0.8678673892253745 }, { "epoch": 0.732693034047375, "grad_norm": 0.4163881540298462, "learning_rate": 3.5192505273504384e-06, "loss": 0.3658231198787689, "step": 13579, "token_acc": 0.8734634036314425 }, { "epoch": 0.7327469918523715, "grad_norm": 0.4152393043041229, "learning_rate": 3.5179197238651708e-06, "loss": 0.307108998298645, "step": 13580, "token_acc": 0.8872823841841251 }, { "epoch": 0.732800949657368, "grad_norm": 0.4303438663482666, "learning_rate": 3.5165891183409363e-06, "loss": 0.39010369777679443, "step": 13581, "token_acc": 0.8618430847104362 }, { "epoch": 0.7328549074623645, "grad_norm": 0.38791728019714355, "learning_rate": 3.515258710818371e-06, "loss": 0.32282668352127075, "step": 13582, "token_acc": 0.8818522301668369 }, { "epoch": 0.7329088652673609, "grad_norm": 0.5014918446540833, "learning_rate": 3.513928501338106e-06, "loss": 0.35398074984550476, "step": 13583, "token_acc": 0.871390194761585 }, { "epoch": 0.7329628230723574, "grad_norm": 0.4512331783771515, "learning_rate": 3.512598489940766e-06, "loss": 0.3575606346130371, "step": 13584, "token_acc": 0.8726817780394466 }, { "epoch": 0.7330167808773539, "grad_norm": 0.6091119647026062, "learning_rate": 3.5112686766669656e-06, "loss": 0.3584575653076172, "step": 13585, "token_acc": 0.8723741577487119 }, { "epoch": 0.7330707386823504, "grad_norm": 0.5444518327713013, "learning_rate": 3.5099390615573237e-06, "loss": 0.36088037490844727, "step": 13586, "token_acc": 0.8703886925795054 }, { "epoch": 0.7331246964873469, "grad_norm": 0.3390856087207794, "learning_rate": 3.508609644652442e-06, "loss": 0.3492647409439087, "step": 13587, "token_acc": 0.875532821824382 }, { "epoch": 0.7331786542923434, "grad_norm": 0.42076700925827026, "learning_rate": 3.5072804259929207e-06, "loss": 0.33514630794525146, "step": 13588, "token_acc": 0.881269651145381 }, { "epoch": 0.7332326120973399, "grad_norm": 0.4607415497303009, "learning_rate": 3.5059514056193545e-06, "loss": 0.3202388286590576, "step": 13589, "token_acc": 0.8823038397328882 }, { "epoch": 0.7332865699023364, "grad_norm": 0.3899393677711487, "learning_rate": 3.5046225835723314e-06, "loss": 0.3509750962257385, "step": 13590, "token_acc": 0.8718840579710145 }, { "epoch": 0.7333405277073328, "grad_norm": 0.48817747831344604, "learning_rate": 3.5032939598924333e-06, "loss": 0.3860971927642822, "step": 13591, "token_acc": 0.8654999189758548 }, { "epoch": 0.7333944855123293, "grad_norm": 0.422062486410141, "learning_rate": 3.5019655346202363e-06, "loss": 0.3033834993839264, "step": 13592, "token_acc": 0.8932668329177057 }, { "epoch": 0.7334484433173258, "grad_norm": 0.42466238141059875, "learning_rate": 3.5006373077963087e-06, "loss": 0.3235306739807129, "step": 13593, "token_acc": 0.8845857120814689 }, { "epoch": 0.7335024011223223, "grad_norm": 0.481111079454422, "learning_rate": 3.499309279461215e-06, "loss": 0.28608256578445435, "step": 13594, "token_acc": 0.8936555891238671 }, { "epoch": 0.7335563589273189, "grad_norm": 0.3526211678981781, "learning_rate": 3.497981449655513e-06, "loss": 0.342157244682312, "step": 13595, "token_acc": 0.8805526667657109 }, { "epoch": 0.7336103167323154, "grad_norm": 0.3779672086238861, "learning_rate": 3.496653818419755e-06, "loss": 0.2924286127090454, "step": 13596, "token_acc": 0.8902391219129753 }, { "epoch": 0.7336642745373119, "grad_norm": 0.4238702356815338, "learning_rate": 3.4953263857944853e-06, "loss": 0.3368656635284424, "step": 13597, "token_acc": 0.8806014150943396 }, { "epoch": 0.7337182323423083, "grad_norm": 0.3536089062690735, "learning_rate": 3.49399915182024e-06, "loss": 0.33282968401908875, "step": 13598, "token_acc": 0.884656900539707 }, { "epoch": 0.7337721901473048, "grad_norm": 0.43518945574760437, "learning_rate": 3.49267211653756e-06, "loss": 0.34913021326065063, "step": 13599, "token_acc": 0.8796603912882983 }, { "epoch": 0.7338261479523013, "grad_norm": 0.5936970710754395, "learning_rate": 3.4913452799869695e-06, "loss": 0.36102139949798584, "step": 13600, "token_acc": 0.8770144189991518 }, { "epoch": 0.7338801057572978, "grad_norm": 0.3828238546848297, "learning_rate": 3.4900186422089877e-06, "loss": 0.33970966935157776, "step": 13601, "token_acc": 0.8796296296296297 }, { "epoch": 0.7339340635622943, "grad_norm": 0.35056379437446594, "learning_rate": 3.4886922032441326e-06, "loss": 0.32623007893562317, "step": 13602, "token_acc": 0.8838941348583588 }, { "epoch": 0.7339880213672908, "grad_norm": 0.4213302433490753, "learning_rate": 3.4873659631329115e-06, "loss": 0.34925323724746704, "step": 13603, "token_acc": 0.8777372262773723 }, { "epoch": 0.7340419791722873, "grad_norm": 0.5173211693763733, "learning_rate": 3.486039921915827e-06, "loss": 0.3160591125488281, "step": 13604, "token_acc": 0.8843853820598007 }, { "epoch": 0.7340959369772838, "grad_norm": 0.43477681279182434, "learning_rate": 3.4847140796333746e-06, "loss": 0.3591558337211609, "step": 13605, "token_acc": 0.8701882551278449 }, { "epoch": 0.7341498947822802, "grad_norm": 0.5519406795501709, "learning_rate": 3.4833884363260496e-06, "loss": 0.3976076543331146, "step": 13606, "token_acc": 0.861875945537065 }, { "epoch": 0.7342038525872767, "grad_norm": 0.45498013496398926, "learning_rate": 3.4820629920343384e-06, "loss": 0.3095492422580719, "step": 13607, "token_acc": 0.886056338028169 }, { "epoch": 0.7342578103922732, "grad_norm": 0.39278629422187805, "learning_rate": 3.4807377467987134e-06, "loss": 0.3463613986968994, "step": 13608, "token_acc": 0.8787417554540842 }, { "epoch": 0.7343117681972697, "grad_norm": 0.5111721754074097, "learning_rate": 3.479412700659649e-06, "loss": 0.35921603441238403, "step": 13609, "token_acc": 0.877348643006263 }, { "epoch": 0.7343657260022662, "grad_norm": 0.3899090886116028, "learning_rate": 3.4780878536576136e-06, "loss": 0.3037976622581482, "step": 13610, "token_acc": 0.8916666666666667 }, { "epoch": 0.7344196838072627, "grad_norm": 0.43865782022476196, "learning_rate": 3.4767632058330613e-06, "loss": 0.34804534912109375, "step": 13611, "token_acc": 0.8760112009956441 }, { "epoch": 0.7344736416122593, "grad_norm": 0.45658838748931885, "learning_rate": 3.475438757226457e-06, "loss": 0.31292325258255005, "step": 13612, "token_acc": 0.8872657518075845 }, { "epoch": 0.7345275994172558, "grad_norm": 0.3308618366718292, "learning_rate": 3.4741145078782433e-06, "loss": 0.3312963843345642, "step": 13613, "token_acc": 0.8806346381969158 }, { "epoch": 0.7345815572222522, "grad_norm": 0.4054332673549652, "learning_rate": 3.472790457828863e-06, "loss": 0.3167171776294708, "step": 13614, "token_acc": 0.8831769695411901 }, { "epoch": 0.7346355150272487, "grad_norm": 0.4574613571166992, "learning_rate": 3.471466607118752e-06, "loss": 0.376995325088501, "step": 13615, "token_acc": 0.8729729729729729 }, { "epoch": 0.7346894728322452, "grad_norm": 0.362637996673584, "learning_rate": 3.4701429557883404e-06, "loss": 0.34932029247283936, "step": 13616, "token_acc": 0.8786701702273506 }, { "epoch": 0.7347434306372417, "grad_norm": 0.3327745199203491, "learning_rate": 3.4688195038780524e-06, "loss": 0.32036083936691284, "step": 13617, "token_acc": 0.8837941418012593 }, { "epoch": 0.7347973884422382, "grad_norm": 0.34912094473838806, "learning_rate": 3.467496251428303e-06, "loss": 0.3492850959300995, "step": 13618, "token_acc": 0.8735224586288416 }, { "epoch": 0.7348513462472347, "grad_norm": 0.41133326292037964, "learning_rate": 3.46617319847951e-06, "loss": 0.3684125542640686, "step": 13619, "token_acc": 0.8716671155399947 }, { "epoch": 0.7349053040522312, "grad_norm": 0.4859949052333832, "learning_rate": 3.464850345072076e-06, "loss": 0.32197582721710205, "step": 13620, "token_acc": 0.8843046739757646 }, { "epoch": 0.7349592618572276, "grad_norm": 0.41210031509399414, "learning_rate": 3.4635276912464e-06, "loss": 0.29263922572135925, "step": 13621, "token_acc": 0.8912102671243098 }, { "epoch": 0.7350132196622241, "grad_norm": 0.4548236131668091, "learning_rate": 3.462205237042876e-06, "loss": 0.3077825903892517, "step": 13622, "token_acc": 0.8896602119108513 }, { "epoch": 0.7350671774672206, "grad_norm": 0.3485255241394043, "learning_rate": 3.460882982501892e-06, "loss": 0.3691462278366089, "step": 13623, "token_acc": 0.8789625360230547 }, { "epoch": 0.7351211352722171, "grad_norm": 0.4189557731151581, "learning_rate": 3.4595609276638287e-06, "loss": 0.3478911519050598, "step": 13624, "token_acc": 0.8804062126642772 }, { "epoch": 0.7351750930772136, "grad_norm": 0.47663185000419617, "learning_rate": 3.4582390725690605e-06, "loss": 0.3397713899612427, "step": 13625, "token_acc": 0.8773175542406312 }, { "epoch": 0.7352290508822101, "grad_norm": 0.3587621748447418, "learning_rate": 3.456917417257958e-06, "loss": 0.3212328553199768, "step": 13626, "token_acc": 0.8851247734560156 }, { "epoch": 0.7352830086872066, "grad_norm": 0.3919249176979065, "learning_rate": 3.4555959617708835e-06, "loss": 0.34493592381477356, "step": 13627, "token_acc": 0.8713639301874596 }, { "epoch": 0.7353369664922031, "grad_norm": 0.4413639307022095, "learning_rate": 3.454274706148193e-06, "loss": 0.28106290102005005, "step": 13628, "token_acc": 0.8963403306706298 }, { "epoch": 0.7353909242971995, "grad_norm": 0.38706669211387634, "learning_rate": 3.45295365043024e-06, "loss": 0.3712502121925354, "step": 13629, "token_acc": 0.8673076923076923 }, { "epoch": 0.735444882102196, "grad_norm": 0.41684526205062866, "learning_rate": 3.4516327946573627e-06, "loss": 0.34359854459762573, "step": 13630, "token_acc": 0.8801081081081081 }, { "epoch": 0.7354988399071926, "grad_norm": 0.46522656083106995, "learning_rate": 3.450312138869908e-06, "loss": 0.3407004475593567, "step": 13631, "token_acc": 0.8780790085205267 }, { "epoch": 0.7355527977121891, "grad_norm": 0.4171842932701111, "learning_rate": 3.448991683108205e-06, "loss": 0.3049553632736206, "step": 13632, "token_acc": 0.8905700586041556 }, { "epoch": 0.7356067555171856, "grad_norm": 0.4001100957393646, "learning_rate": 3.447671427412579e-06, "loss": 0.300114244222641, "step": 13633, "token_acc": 0.8939154830790812 }, { "epoch": 0.7356607133221821, "grad_norm": 0.38936740159988403, "learning_rate": 3.446351371823352e-06, "loss": 0.33795326948165894, "step": 13634, "token_acc": 0.8822572572572572 }, { "epoch": 0.7357146711271786, "grad_norm": 0.4476822018623352, "learning_rate": 3.4450315163808366e-06, "loss": 0.28575584292411804, "step": 13635, "token_acc": 0.8917155903457273 }, { "epoch": 0.735768628932175, "grad_norm": 0.4296591877937317, "learning_rate": 3.443711861125343e-06, "loss": 0.341902494430542, "step": 13636, "token_acc": 0.876439202471216 }, { "epoch": 0.7358225867371715, "grad_norm": 0.46983179450035095, "learning_rate": 3.442392406097167e-06, "loss": 0.3557553291320801, "step": 13637, "token_acc": 0.8690446180993194 }, { "epoch": 0.735876544542168, "grad_norm": 0.4739209711551666, "learning_rate": 3.4410731513366123e-06, "loss": 0.36657145619392395, "step": 13638, "token_acc": 0.8716475095785441 }, { "epoch": 0.7359305023471645, "grad_norm": 0.5269219279289246, "learning_rate": 3.439754096883966e-06, "loss": 0.3688575327396393, "step": 13639, "token_acc": 0.8711175152097342 }, { "epoch": 0.735984460152161, "grad_norm": 0.43226131796836853, "learning_rate": 3.438435242779511e-06, "loss": 0.35428571701049805, "step": 13640, "token_acc": 0.8748923959827833 }, { "epoch": 0.7360384179571575, "grad_norm": 0.469279408454895, "learning_rate": 3.4371165890635283e-06, "loss": 0.36217570304870605, "step": 13641, "token_acc": 0.8731594819939684 }, { "epoch": 0.736092375762154, "grad_norm": 0.37467122077941895, "learning_rate": 3.4357981357762815e-06, "loss": 0.32678359746932983, "step": 13642, "token_acc": 0.8850288559465882 }, { "epoch": 0.7361463335671505, "grad_norm": 0.44350966811180115, "learning_rate": 3.4344798829580374e-06, "loss": 0.30965811014175415, "step": 13643, "token_acc": 0.8837764687151728 }, { "epoch": 0.7362002913721469, "grad_norm": 0.40615344047546387, "learning_rate": 3.433161830649061e-06, "loss": 0.30652081966400146, "step": 13644, "token_acc": 0.8903901833723976 }, { "epoch": 0.7362542491771434, "grad_norm": 0.42428919672966003, "learning_rate": 3.431843978889602e-06, "loss": 0.3333203196525574, "step": 13645, "token_acc": 0.8806968424327267 }, { "epoch": 0.73630820698214, "grad_norm": 0.526839554309845, "learning_rate": 3.4305263277199076e-06, "loss": 0.3431476056575775, "step": 13646, "token_acc": 0.8724287260916637 }, { "epoch": 0.7363621647871365, "grad_norm": 0.3716987371444702, "learning_rate": 3.4292088771802177e-06, "loss": 0.33565422892570496, "step": 13647, "token_acc": 0.8777777777777778 }, { "epoch": 0.736416122592133, "grad_norm": 0.4244036078453064, "learning_rate": 3.4278916273107675e-06, "loss": 0.34312283992767334, "step": 13648, "token_acc": 0.8734753857457752 }, { "epoch": 0.7364700803971295, "grad_norm": 0.4638980031013489, "learning_rate": 3.4265745781517843e-06, "loss": 0.3831786811351776, "step": 13649, "token_acc": 0.8649041255084253 }, { "epoch": 0.736524038202126, "grad_norm": 0.44780367612838745, "learning_rate": 3.42525772974349e-06, "loss": 0.337495893239975, "step": 13650, "token_acc": 0.881224593495935 }, { "epoch": 0.7365779960071225, "grad_norm": 0.4613097608089447, "learning_rate": 3.4239410821261043e-06, "loss": 0.29651278257369995, "step": 13651, "token_acc": 0.8923780487804878 }, { "epoch": 0.7366319538121189, "grad_norm": 0.38981953263282776, "learning_rate": 3.422624635339836e-06, "loss": 0.33695679903030396, "step": 13652, "token_acc": 0.8846696322013671 }, { "epoch": 0.7366859116171154, "grad_norm": 0.44764643907546997, "learning_rate": 3.421308389424888e-06, "loss": 0.328774094581604, "step": 13653, "token_acc": 0.885701411835386 }, { "epoch": 0.7367398694221119, "grad_norm": 0.37309128046035767, "learning_rate": 3.419992344421459e-06, "loss": 0.32706397771835327, "step": 13654, "token_acc": 0.8830892772791686 }, { "epoch": 0.7367938272271084, "grad_norm": 0.35717499256134033, "learning_rate": 3.4186765003697386e-06, "loss": 0.3154395818710327, "step": 13655, "token_acc": 0.8852503019728896 }, { "epoch": 0.7368477850321049, "grad_norm": 0.3903517425060272, "learning_rate": 3.417360857309915e-06, "loss": 0.3372848629951477, "step": 13656, "token_acc": 0.8818617217241803 }, { "epoch": 0.7369017428371014, "grad_norm": 0.31592196226119995, "learning_rate": 3.4160454152821667e-06, "loss": 0.29819977283477783, "step": 13657, "token_acc": 0.8892111368909513 }, { "epoch": 0.7369557006420979, "grad_norm": 0.43741515278816223, "learning_rate": 3.414730174326666e-06, "loss": 0.41327428817749023, "step": 13658, "token_acc": 0.8572930047880416 }, { "epoch": 0.7370096584470943, "grad_norm": 0.39805611968040466, "learning_rate": 3.4134151344835807e-06, "loss": 0.343168705701828, "step": 13659, "token_acc": 0.8819435325016415 }, { "epoch": 0.7370636162520908, "grad_norm": 0.4306681454181671, "learning_rate": 3.4121002957930717e-06, "loss": 0.3456381857395172, "step": 13660, "token_acc": 0.8746863237139272 }, { "epoch": 0.7371175740570873, "grad_norm": 0.4529135227203369, "learning_rate": 3.4107856582952938e-06, "loss": 0.41354426741600037, "step": 13661, "token_acc": 0.8608588957055214 }, { "epoch": 0.7371715318620838, "grad_norm": 0.46329647302627563, "learning_rate": 3.409471222030396e-06, "loss": 0.3442251682281494, "step": 13662, "token_acc": 0.877967010862277 }, { "epoch": 0.7372254896670803, "grad_norm": 0.4357539415359497, "learning_rate": 3.408156987038517e-06, "loss": 0.33926263451576233, "step": 13663, "token_acc": 0.8804832916077066 }, { "epoch": 0.7372794474720769, "grad_norm": 0.46583160758018494, "learning_rate": 3.4068429533598e-06, "loss": 0.35765954852104187, "step": 13664, "token_acc": 0.8762237762237762 }, { "epoch": 0.7373334052770734, "grad_norm": 0.3867229223251343, "learning_rate": 3.4055291210343734e-06, "loss": 0.32276609539985657, "step": 13665, "token_acc": 0.8850814332247557 }, { "epoch": 0.7373873630820699, "grad_norm": 0.48782792687416077, "learning_rate": 3.4042154901023584e-06, "loss": 0.3772416412830353, "step": 13666, "token_acc": 0.8687155005221543 }, { "epoch": 0.7374413208870663, "grad_norm": 0.3727366626262665, "learning_rate": 3.4029020606038755e-06, "loss": 0.33255401253700256, "step": 13667, "token_acc": 0.8800179748352307 }, { "epoch": 0.7374952786920628, "grad_norm": 0.3645118772983551, "learning_rate": 3.4015888325790346e-06, "loss": 0.30862343311309814, "step": 13668, "token_acc": 0.8842357164223751 }, { "epoch": 0.7375492364970593, "grad_norm": 0.4022054076194763, "learning_rate": 3.400275806067939e-06, "loss": 0.3707123398780823, "step": 13669, "token_acc": 0.8731188058240548 }, { "epoch": 0.7376031943020558, "grad_norm": 0.34358537197113037, "learning_rate": 3.398962981110695e-06, "loss": 0.3337589502334595, "step": 13670, "token_acc": 0.8851208826870031 }, { "epoch": 0.7376571521070523, "grad_norm": 0.46653929352760315, "learning_rate": 3.3976503577473917e-06, "loss": 0.2826347053050995, "step": 13671, "token_acc": 0.8969267139479905 }, { "epoch": 0.7377111099120488, "grad_norm": 0.3843007981777191, "learning_rate": 3.396337936018118e-06, "loss": 0.3878041207790375, "step": 13672, "token_acc": 0.8626267228613738 }, { "epoch": 0.7377650677170453, "grad_norm": 0.3862007260322571, "learning_rate": 3.395025715962953e-06, "loss": 0.3120017647743225, "step": 13673, "token_acc": 0.8862981070577911 }, { "epoch": 0.7378190255220418, "grad_norm": 0.44288793206214905, "learning_rate": 3.393713697621972e-06, "loss": 0.3821170926094055, "step": 13674, "token_acc": 0.8683697770080104 }, { "epoch": 0.7378729833270382, "grad_norm": 0.3622482120990753, "learning_rate": 3.3924018810352453e-06, "loss": 0.3169427514076233, "step": 13675, "token_acc": 0.8842788171394086 }, { "epoch": 0.7379269411320347, "grad_norm": 0.4490731954574585, "learning_rate": 3.3910902662428334e-06, "loss": 0.3347669541835785, "step": 13676, "token_acc": 0.8802287581699346 }, { "epoch": 0.7379808989370312, "grad_norm": 0.4378875494003296, "learning_rate": 3.389778853284794e-06, "loss": 0.35917961597442627, "step": 13677, "token_acc": 0.8731563421828908 }, { "epoch": 0.7380348567420277, "grad_norm": 0.400452196598053, "learning_rate": 3.3884676422011762e-06, "loss": 0.3175414800643921, "step": 13678, "token_acc": 0.8852440013909818 }, { "epoch": 0.7380888145470242, "grad_norm": 0.3313206732273102, "learning_rate": 3.387156633032025e-06, "loss": 0.3634549081325531, "step": 13679, "token_acc": 0.8730413325107959 }, { "epoch": 0.7381427723520207, "grad_norm": 0.3849790394306183, "learning_rate": 3.385845825817378e-06, "loss": 0.38373833894729614, "step": 13680, "token_acc": 0.8667366211962224 }, { "epoch": 0.7381967301570173, "grad_norm": 0.4849839210510254, "learning_rate": 3.384535220597266e-06, "loss": 0.35001516342163086, "step": 13681, "token_acc": 0.8761829652996845 }, { "epoch": 0.7382506879620137, "grad_norm": 0.4764595627784729, "learning_rate": 3.383224817411712e-06, "loss": 0.3466450572013855, "step": 13682, "token_acc": 0.8714235868806699 }, { "epoch": 0.7383046457670102, "grad_norm": 0.4131014347076416, "learning_rate": 3.381914616300743e-06, "loss": 0.3571845293045044, "step": 13683, "token_acc": 0.873794702795069 }, { "epoch": 0.7383586035720067, "grad_norm": 0.36260464787483215, "learning_rate": 3.3806046173043673e-06, "loss": 0.3135199546813965, "step": 13684, "token_acc": 0.8882724809701974 }, { "epoch": 0.7384125613770032, "grad_norm": 0.30719128251075745, "learning_rate": 3.379294820462593e-06, "loss": 0.3063487410545349, "step": 13685, "token_acc": 0.8913959367100459 }, { "epoch": 0.7384665191819997, "grad_norm": 0.4105718731880188, "learning_rate": 3.37798522581542e-06, "loss": 0.31436699628829956, "step": 13686, "token_acc": 0.8899333650642551 }, { "epoch": 0.7385204769869962, "grad_norm": 0.4427587389945984, "learning_rate": 3.376675833402844e-06, "loss": 0.3154980540275574, "step": 13687, "token_acc": 0.8852630010430637 }, { "epoch": 0.7385744347919927, "grad_norm": 0.49291661381721497, "learning_rate": 3.375366643264849e-06, "loss": 0.34893274307250977, "step": 13688, "token_acc": 0.880661051962498 }, { "epoch": 0.7386283925969892, "grad_norm": 0.4755456745624542, "learning_rate": 3.3740576554414285e-06, "loss": 0.4104306399822235, "step": 13689, "token_acc": 0.8597116200169636 }, { "epoch": 0.7386823504019856, "grad_norm": 0.49396559596061707, "learning_rate": 3.3727488699725476e-06, "loss": 0.42169612646102905, "step": 13690, "token_acc": 0.8550595238095238 }, { "epoch": 0.7387363082069821, "grad_norm": 0.3995344638824463, "learning_rate": 3.371440286898181e-06, "loss": 0.3257595896720886, "step": 13691, "token_acc": 0.8833957553058677 }, { "epoch": 0.7387902660119786, "grad_norm": 0.42736971378326416, "learning_rate": 3.3701319062582905e-06, "loss": 0.3336164355278015, "step": 13692, "token_acc": 0.8803820497271073 }, { "epoch": 0.7388442238169751, "grad_norm": 0.48657190799713135, "learning_rate": 3.368823728092836e-06, "loss": 0.32267138361930847, "step": 13693, "token_acc": 0.8812165263963274 }, { "epoch": 0.7388981816219716, "grad_norm": 0.5191088318824768, "learning_rate": 3.3675157524417666e-06, "loss": 0.30978354811668396, "step": 13694, "token_acc": 0.8843990835242658 }, { "epoch": 0.7389521394269681, "grad_norm": 0.3210635483264923, "learning_rate": 3.3662079793450265e-06, "loss": 0.26967689394950867, "step": 13695, "token_acc": 0.8977578475336323 }, { "epoch": 0.7390060972319646, "grad_norm": 0.3508317470550537, "learning_rate": 3.3649004088425585e-06, "loss": 0.31552937626838684, "step": 13696, "token_acc": 0.8837334772052873 }, { "epoch": 0.7390600550369611, "grad_norm": 0.44237422943115234, "learning_rate": 3.3635930409742954e-06, "loss": 0.3309597373008728, "step": 13697, "token_acc": 0.8804515135967163 }, { "epoch": 0.7391140128419575, "grad_norm": 0.38637226819992065, "learning_rate": 3.3622858757801615e-06, "loss": 0.3008689284324646, "step": 13698, "token_acc": 0.8885651857246905 }, { "epoch": 0.739167970646954, "grad_norm": 0.4383956491947174, "learning_rate": 3.360978913300078e-06, "loss": 0.33453285694122314, "step": 13699, "token_acc": 0.8791363229845407 }, { "epoch": 0.7392219284519506, "grad_norm": 0.42137542366981506, "learning_rate": 3.3596721535739596e-06, "loss": 0.33511215448379517, "step": 13700, "token_acc": 0.8785283239244696 }, { "epoch": 0.7392758862569471, "grad_norm": 0.486337274312973, "learning_rate": 3.3583655966417118e-06, "loss": 0.36294469237327576, "step": 13701, "token_acc": 0.874775397373877 }, { "epoch": 0.7393298440619436, "grad_norm": 0.3646079897880554, "learning_rate": 3.35705924254324e-06, "loss": 0.2902980148792267, "step": 13702, "token_acc": 0.8898698884758365 }, { "epoch": 0.7393838018669401, "grad_norm": 0.5187240242958069, "learning_rate": 3.355753091318441e-06, "loss": 0.32387274503707886, "step": 13703, "token_acc": 0.8812851697222727 }, { "epoch": 0.7394377596719366, "grad_norm": 0.4157228469848633, "learning_rate": 3.3544471430072002e-06, "loss": 0.32208752632141113, "step": 13704, "token_acc": 0.8842224744608399 }, { "epoch": 0.739491717476933, "grad_norm": 0.34437084197998047, "learning_rate": 3.3531413976494043e-06, "loss": 0.35540199279785156, "step": 13705, "token_acc": 0.8767353807320152 }, { "epoch": 0.7395456752819295, "grad_norm": 0.5019707083702087, "learning_rate": 3.3518358552849275e-06, "loss": 0.3315170407295227, "step": 13706, "token_acc": 0.8748062015503876 }, { "epoch": 0.739599633086926, "grad_norm": 0.44746196269989014, "learning_rate": 3.350530515953643e-06, "loss": 0.3411809802055359, "step": 13707, "token_acc": 0.8798380566801619 }, { "epoch": 0.7396535908919225, "grad_norm": 0.3356739580631256, "learning_rate": 3.3492253796954144e-06, "loss": 0.3247069716453552, "step": 13708, "token_acc": 0.8837477569853883 }, { "epoch": 0.739707548696919, "grad_norm": 0.3761167824268341, "learning_rate": 3.3479204465501005e-06, "loss": 0.343402624130249, "step": 13709, "token_acc": 0.880227416298168 }, { "epoch": 0.7397615065019155, "grad_norm": 0.3929765820503235, "learning_rate": 3.346615716557553e-06, "loss": 0.3143894076347351, "step": 13710, "token_acc": 0.8875192604006163 }, { "epoch": 0.739815464306912, "grad_norm": 0.3814919888973236, "learning_rate": 3.3453111897576187e-06, "loss": 0.3008101284503937, "step": 13711, "token_acc": 0.8902648357037763 }, { "epoch": 0.7398694221119085, "grad_norm": 0.40225163102149963, "learning_rate": 3.344006866190137e-06, "loss": 0.30334728956222534, "step": 13712, "token_acc": 0.8867850868631661 }, { "epoch": 0.7399233799169049, "grad_norm": 0.364839643239975, "learning_rate": 3.3427027458949436e-06, "loss": 0.35573244094848633, "step": 13713, "token_acc": 0.8702118703521818 }, { "epoch": 0.7399773377219014, "grad_norm": 0.3958253562450409, "learning_rate": 3.3413988289118592e-06, "loss": 0.393812894821167, "step": 13714, "token_acc": 0.8602657522503214 }, { "epoch": 0.740031295526898, "grad_norm": 0.353129506111145, "learning_rate": 3.340095115280715e-06, "loss": 0.2782215476036072, "step": 13715, "token_acc": 0.8995639987542822 }, { "epoch": 0.7400852533318945, "grad_norm": 0.459445595741272, "learning_rate": 3.3387916050413204e-06, "loss": 0.2803429961204529, "step": 13716, "token_acc": 0.8954248366013072 }, { "epoch": 0.740139211136891, "grad_norm": 0.36249855160713196, "learning_rate": 3.3374882982334854e-06, "loss": 0.40859177708625793, "step": 13717, "token_acc": 0.86024750980984 }, { "epoch": 0.7401931689418875, "grad_norm": 0.5139562487602234, "learning_rate": 3.3361851948970123e-06, "loss": 0.35953885316848755, "step": 13718, "token_acc": 0.8780068728522337 }, { "epoch": 0.740247126746884, "grad_norm": 0.3904073238372803, "learning_rate": 3.3348822950716987e-06, "loss": 0.3242781162261963, "step": 13719, "token_acc": 0.8864834548513741 }, { "epoch": 0.7403010845518805, "grad_norm": 0.36163845658302307, "learning_rate": 3.333579598797333e-06, "loss": 0.30022549629211426, "step": 13720, "token_acc": 0.8893323657474601 }, { "epoch": 0.7403550423568769, "grad_norm": 0.38567423820495605, "learning_rate": 3.332277106113696e-06, "loss": 0.3245677947998047, "step": 13721, "token_acc": 0.8826370441922241 }, { "epoch": 0.7404090001618734, "grad_norm": 0.48833024501800537, "learning_rate": 3.330974817060574e-06, "loss": 0.3522758483886719, "step": 13722, "token_acc": 0.8740773545910836 }, { "epoch": 0.7404629579668699, "grad_norm": 0.4221661686897278, "learning_rate": 3.329672731677737e-06, "loss": 0.3799472451210022, "step": 13723, "token_acc": 0.8660226261599084 }, { "epoch": 0.7405169157718664, "grad_norm": 0.37271469831466675, "learning_rate": 3.3283708500049425e-06, "loss": 0.2941133975982666, "step": 13724, "token_acc": 0.8937774984286612 }, { "epoch": 0.7405708735768629, "grad_norm": 0.34727585315704346, "learning_rate": 3.327069172081957e-06, "loss": 0.33617496490478516, "step": 13725, "token_acc": 0.8762996941896024 }, { "epoch": 0.7406248313818594, "grad_norm": 0.3453640341758728, "learning_rate": 3.32576769794853e-06, "loss": 0.3038957715034485, "step": 13726, "token_acc": 0.8897343478841276 }, { "epoch": 0.7406787891868559, "grad_norm": 0.40933552384376526, "learning_rate": 3.324466427644405e-06, "loss": 0.2938222587108612, "step": 13727, "token_acc": 0.8916504854368932 }, { "epoch": 0.7407327469918523, "grad_norm": 0.33299481868743896, "learning_rate": 3.3231653612093305e-06, "loss": 0.3098185658454895, "step": 13728, "token_acc": 0.8840680803571429 }, { "epoch": 0.7407867047968488, "grad_norm": 0.449235200881958, "learning_rate": 3.321864498683036e-06, "loss": 0.28708261251449585, "step": 13729, "token_acc": 0.8895579147520203 }, { "epoch": 0.7408406626018453, "grad_norm": 0.4250625669956207, "learning_rate": 3.320563840105252e-06, "loss": 0.37833744287490845, "step": 13730, "token_acc": 0.8715272673820373 }, { "epoch": 0.7408946204068418, "grad_norm": 0.43254029750823975, "learning_rate": 3.3192633855156976e-06, "loss": 0.3285751938819885, "step": 13731, "token_acc": 0.8798509779571562 }, { "epoch": 0.7409485782118383, "grad_norm": 0.32904574275016785, "learning_rate": 3.3179631349540887e-06, "loss": 0.28408151865005493, "step": 13732, "token_acc": 0.8939343163538874 }, { "epoch": 0.7410025360168349, "grad_norm": 0.3611718714237213, "learning_rate": 3.3166630884601323e-06, "loss": 0.30408093333244324, "step": 13733, "token_acc": 0.8918122270742358 }, { "epoch": 0.7410564938218314, "grad_norm": 0.39739829301834106, "learning_rate": 3.3153632460735376e-06, "loss": 0.34063848853111267, "step": 13734, "token_acc": 0.8773559861959118 }, { "epoch": 0.7411104516268279, "grad_norm": 0.5319805145263672, "learning_rate": 3.3140636078339984e-06, "loss": 0.31616437435150146, "step": 13735, "token_acc": 0.8828906519489479 }, { "epoch": 0.7411644094318243, "grad_norm": 0.3693161904811859, "learning_rate": 3.3127641737812056e-06, "loss": 0.351712703704834, "step": 13736, "token_acc": 0.8761632956942549 }, { "epoch": 0.7412183672368208, "grad_norm": 0.5193494558334351, "learning_rate": 3.311464943954843e-06, "loss": 0.3092266619205475, "step": 13737, "token_acc": 0.8902863777089783 }, { "epoch": 0.7412723250418173, "grad_norm": 0.2762826085090637, "learning_rate": 3.3101659183945877e-06, "loss": 0.28165769577026367, "step": 13738, "token_acc": 0.8963468588041967 }, { "epoch": 0.7413262828468138, "grad_norm": 0.3531109392642975, "learning_rate": 3.308867097140114e-06, "loss": 0.35282352566719055, "step": 13739, "token_acc": 0.8726207906295754 }, { "epoch": 0.7413802406518103, "grad_norm": 0.3642132580280304, "learning_rate": 3.3075684802310857e-06, "loss": 0.29997870326042175, "step": 13740, "token_acc": 0.8914737188631625 }, { "epoch": 0.7414341984568068, "grad_norm": 0.4514681398868561, "learning_rate": 3.3062700677071636e-06, "loss": 0.346839964389801, "step": 13741, "token_acc": 0.8735959500079101 }, { "epoch": 0.7414881562618033, "grad_norm": 0.43943238258361816, "learning_rate": 3.304971859607999e-06, "loss": 0.3641234040260315, "step": 13742, "token_acc": 0.8713342545677875 }, { "epoch": 0.7415421140667998, "grad_norm": 0.3885267674922943, "learning_rate": 3.303673855973241e-06, "loss": 0.3194875717163086, "step": 13743, "token_acc": 0.8843144799281775 }, { "epoch": 0.7415960718717962, "grad_norm": 0.545809805393219, "learning_rate": 3.3023760568425287e-06, "loss": 0.38472431898117065, "step": 13744, "token_acc": 0.8682453909726637 }, { "epoch": 0.7416500296767927, "grad_norm": 0.4135603904724121, "learning_rate": 3.301078462255497e-06, "loss": 0.3477320671081543, "step": 13745, "token_acc": 0.8745784486911835 }, { "epoch": 0.7417039874817892, "grad_norm": 0.3969370126724243, "learning_rate": 3.299781072251771e-06, "loss": 0.2914871573448181, "step": 13746, "token_acc": 0.8966421825813221 }, { "epoch": 0.7417579452867857, "grad_norm": 0.3725478947162628, "learning_rate": 3.298483886870979e-06, "loss": 0.3550475537776947, "step": 13747, "token_acc": 0.877477267428305 }, { "epoch": 0.7418119030917822, "grad_norm": 0.40071752667427063, "learning_rate": 3.297186906152734e-06, "loss": 0.30804532766342163, "step": 13748, "token_acc": 0.8834327673244607 }, { "epoch": 0.7418658608967788, "grad_norm": 0.348213255405426, "learning_rate": 3.2958901301366443e-06, "loss": 0.3265553414821625, "step": 13749, "token_acc": 0.8806522493350295 }, { "epoch": 0.7419198187017753, "grad_norm": 0.3586808145046234, "learning_rate": 3.2945935588623146e-06, "loss": 0.3266259431838989, "step": 13750, "token_acc": 0.8822407977709341 }, { "epoch": 0.7419737765067717, "grad_norm": 0.49946820735931396, "learning_rate": 3.29329719236934e-06, "loss": 0.34054428339004517, "step": 13751, "token_acc": 0.8816126279863481 }, { "epoch": 0.7420277343117682, "grad_norm": 0.31964218616485596, "learning_rate": 3.292001030697313e-06, "loss": 0.34269559383392334, "step": 13752, "token_acc": 0.8766101694915254 }, { "epoch": 0.7420816921167647, "grad_norm": 0.28301751613616943, "learning_rate": 3.2907050738858137e-06, "loss": 0.2732842266559601, "step": 13753, "token_acc": 0.9016247778624016 }, { "epoch": 0.7421356499217612, "grad_norm": 0.4623521566390991, "learning_rate": 3.289409321974427e-06, "loss": 0.3555956780910492, "step": 13754, "token_acc": 0.8720871327254306 }, { "epoch": 0.7421896077267577, "grad_norm": 0.4278299808502197, "learning_rate": 3.2881137750027224e-06, "loss": 0.3360047936439514, "step": 13755, "token_acc": 0.8801208459214501 }, { "epoch": 0.7422435655317542, "grad_norm": 0.3863082230091095, "learning_rate": 3.2868184330102646e-06, "loss": 0.3432202935218811, "step": 13756, "token_acc": 0.8780847639484979 }, { "epoch": 0.7422975233367507, "grad_norm": 0.40509673953056335, "learning_rate": 3.2855232960366125e-06, "loss": 0.3515881896018982, "step": 13757, "token_acc": 0.8781362007168458 }, { "epoch": 0.7423514811417472, "grad_norm": 0.37510180473327637, "learning_rate": 3.2842283641213233e-06, "loss": 0.32457730174064636, "step": 13758, "token_acc": 0.8816918429003021 }, { "epoch": 0.7424054389467436, "grad_norm": 0.40782999992370605, "learning_rate": 3.2829336373039356e-06, "loss": 0.34246835112571716, "step": 13759, "token_acc": 0.8731175545066308 }, { "epoch": 0.7424593967517401, "grad_norm": 0.4363919496536255, "learning_rate": 3.2816391156239966e-06, "loss": 0.3403298258781433, "step": 13760, "token_acc": 0.8779505692863093 }, { "epoch": 0.7425133545567366, "grad_norm": 0.4080469012260437, "learning_rate": 3.2803447991210403e-06, "loss": 0.3257023096084595, "step": 13761, "token_acc": 0.8855639976621859 }, { "epoch": 0.7425673123617331, "grad_norm": 0.372834712266922, "learning_rate": 3.2790506878345918e-06, "loss": 0.3378344178199768, "step": 13762, "token_acc": 0.8806500761808025 }, { "epoch": 0.7426212701667296, "grad_norm": 0.366304874420166, "learning_rate": 3.2777567818041756e-06, "loss": 0.3583831191062927, "step": 13763, "token_acc": 0.8748377526423141 }, { "epoch": 0.7426752279717261, "grad_norm": 0.42428457736968994, "learning_rate": 3.2764630810693065e-06, "loss": 0.3270578384399414, "step": 13764, "token_acc": 0.8813586349864455 }, { "epoch": 0.7427291857767226, "grad_norm": 0.5034379363059998, "learning_rate": 3.2751695856694932e-06, "loss": 0.33517560362815857, "step": 13765, "token_acc": 0.88021627616803 }, { "epoch": 0.742783143581719, "grad_norm": 0.5740786790847778, "learning_rate": 3.273876295644236e-06, "loss": 0.35829007625579834, "step": 13766, "token_acc": 0.8717344333136908 }, { "epoch": 0.7428371013867155, "grad_norm": 0.3271522521972656, "learning_rate": 3.272583211033038e-06, "loss": 0.293567955493927, "step": 13767, "token_acc": 0.8922694221201684 }, { "epoch": 0.742891059191712, "grad_norm": 0.2882485091686249, "learning_rate": 3.271290331875386e-06, "loss": 0.3423001766204834, "step": 13768, "token_acc": 0.8811551277304702 }, { "epoch": 0.7429450169967086, "grad_norm": 0.3160785436630249, "learning_rate": 3.269997658210766e-06, "loss": 0.29682743549346924, "step": 13769, "token_acc": 0.8914881933003844 }, { "epoch": 0.7429989748017051, "grad_norm": 0.437663733959198, "learning_rate": 3.268705190078654e-06, "loss": 0.30358394980430603, "step": 13770, "token_acc": 0.8887264219915192 }, { "epoch": 0.7430529326067016, "grad_norm": 0.452332466840744, "learning_rate": 3.2674129275185208e-06, "loss": 0.3439965844154358, "step": 13771, "token_acc": 0.8805928016937191 }, { "epoch": 0.7431068904116981, "grad_norm": 0.4070398211479187, "learning_rate": 3.2661208705698343e-06, "loss": 0.3236271142959595, "step": 13772, "token_acc": 0.8797494780793319 }, { "epoch": 0.7431608482166946, "grad_norm": 0.4310927093029022, "learning_rate": 3.264829019272052e-06, "loss": 0.31668365001678467, "step": 13773, "token_acc": 0.8851754264318402 }, { "epoch": 0.743214806021691, "grad_norm": 0.3892790973186493, "learning_rate": 3.263537373664627e-06, "loss": 0.3574390113353729, "step": 13774, "token_acc": 0.8761891631049221 }, { "epoch": 0.7432687638266875, "grad_norm": 0.3931303322315216, "learning_rate": 3.262245933787006e-06, "loss": 0.36161041259765625, "step": 13775, "token_acc": 0.8804380894432613 }, { "epoch": 0.743322721631684, "grad_norm": 0.44253337383270264, "learning_rate": 3.260954699678629e-06, "loss": 0.34580379724502563, "step": 13776, "token_acc": 0.8785627653123105 }, { "epoch": 0.7433766794366805, "grad_norm": 0.3893544673919678, "learning_rate": 3.25966367137893e-06, "loss": 0.36878204345703125, "step": 13777, "token_acc": 0.8701028063350931 }, { "epoch": 0.743430637241677, "grad_norm": 0.4337727725505829, "learning_rate": 3.2583728489273336e-06, "loss": 0.3920469880104065, "step": 13778, "token_acc": 0.8641832866326026 }, { "epoch": 0.7434845950466735, "grad_norm": 0.4359099566936493, "learning_rate": 3.257082232363268e-06, "loss": 0.3246645927429199, "step": 13779, "token_acc": 0.8783501235506558 }, { "epoch": 0.74353855285167, "grad_norm": 0.34239935874938965, "learning_rate": 3.255791821726144e-06, "loss": 0.3088284730911255, "step": 13780, "token_acc": 0.8883826879271071 }, { "epoch": 0.7435925106566665, "grad_norm": 0.3741961121559143, "learning_rate": 3.254501617055371e-06, "loss": 0.2931862473487854, "step": 13781, "token_acc": 0.8880420054200542 }, { "epoch": 0.7436464684616629, "grad_norm": 0.40434202551841736, "learning_rate": 3.253211618390352e-06, "loss": 0.387039452791214, "step": 13782, "token_acc": 0.8643389456251726 }, { "epoch": 0.7437004262666594, "grad_norm": 0.4257054328918457, "learning_rate": 3.2519218257704832e-06, "loss": 0.3257783055305481, "step": 13783, "token_acc": 0.8854478854478854 }, { "epoch": 0.743754384071656, "grad_norm": 0.5048649907112122, "learning_rate": 3.250632239235153e-06, "loss": 0.30260980129241943, "step": 13784, "token_acc": 0.8874158249158249 }, { "epoch": 0.7438083418766525, "grad_norm": 0.4039320945739746, "learning_rate": 3.249342858823743e-06, "loss": 0.3208444118499756, "step": 13785, "token_acc": 0.8819312362838332 }, { "epoch": 0.743862299681649, "grad_norm": 0.4111829400062561, "learning_rate": 3.2480536845756372e-06, "loss": 0.344191312789917, "step": 13786, "token_acc": 0.8743589743589744 }, { "epoch": 0.7439162574866455, "grad_norm": 0.4897192716598511, "learning_rate": 3.246764716530204e-06, "loss": 0.39156097173690796, "step": 13787, "token_acc": 0.8650282030620468 }, { "epoch": 0.743970215291642, "grad_norm": 0.394513338804245, "learning_rate": 3.2454759547268054e-06, "loss": 0.33560606837272644, "step": 13788, "token_acc": 0.8847339498726712 }, { "epoch": 0.7440241730966384, "grad_norm": 0.32297685742378235, "learning_rate": 3.2441873992048036e-06, "loss": 0.31532275676727295, "step": 13789, "token_acc": 0.8861582207779019 }, { "epoch": 0.7440781309016349, "grad_norm": 0.41191211342811584, "learning_rate": 3.2428990500035474e-06, "loss": 0.34091147780418396, "step": 13790, "token_acc": 0.8804062909567497 }, { "epoch": 0.7441320887066314, "grad_norm": 0.42672479152679443, "learning_rate": 3.2416109071623846e-06, "loss": 0.357754111289978, "step": 13791, "token_acc": 0.8692485128080611 }, { "epoch": 0.7441860465116279, "grad_norm": 0.3784811198711395, "learning_rate": 3.240322970720654e-06, "loss": 0.33438336849212646, "step": 13792, "token_acc": 0.8827939076630176 }, { "epoch": 0.7442400043166244, "grad_norm": 0.25268757343292236, "learning_rate": 3.2390352407176895e-06, "loss": 0.3347395360469818, "step": 13793, "token_acc": 0.8793167934172796 }, { "epoch": 0.7442939621216209, "grad_norm": 0.4574394226074219, "learning_rate": 3.2377477171928183e-06, "loss": 0.31953251361846924, "step": 13794, "token_acc": 0.8806146572104019 }, { "epoch": 0.7443479199266174, "grad_norm": 0.41629451513290405, "learning_rate": 3.2364604001853594e-06, "loss": 0.34122779965400696, "step": 13795, "token_acc": 0.8746487667811427 }, { "epoch": 0.7444018777316139, "grad_norm": 0.3324575126171112, "learning_rate": 3.235173289734628e-06, "loss": 0.32388174533843994, "step": 13796, "token_acc": 0.8807053941908713 }, { "epoch": 0.7444558355366103, "grad_norm": 0.37805071473121643, "learning_rate": 3.233886385879932e-06, "loss": 0.3732202649116516, "step": 13797, "token_acc": 0.8712787550744249 }, { "epoch": 0.7445097933416068, "grad_norm": 0.4257446229457855, "learning_rate": 3.2325996886605704e-06, "loss": 0.3505212068557739, "step": 13798, "token_acc": 0.8789072426937738 }, { "epoch": 0.7445637511466033, "grad_norm": 0.4546241760253906, "learning_rate": 3.2313131981158454e-06, "loss": 0.2652460038661957, "step": 13799, "token_acc": 0.9011357289145411 }, { "epoch": 0.7446177089515998, "grad_norm": 0.38667845726013184, "learning_rate": 3.2300269142850414e-06, "loss": 0.324175089597702, "step": 13800, "token_acc": 0.8854300385109114 }, { "epoch": 0.7446716667565964, "grad_norm": 0.3626594841480255, "learning_rate": 3.2287408372074425e-06, "loss": 0.30582672357559204, "step": 13801, "token_acc": 0.8882657463330458 }, { "epoch": 0.7447256245615929, "grad_norm": 0.41247010231018066, "learning_rate": 3.227454966922323e-06, "loss": 0.35622620582580566, "step": 13802, "token_acc": 0.8706237782121912 }, { "epoch": 0.7447795823665894, "grad_norm": 0.38435259461402893, "learning_rate": 3.2261693034689567e-06, "loss": 0.29541918635368347, "step": 13803, "token_acc": 0.8929373165256432 }, { "epoch": 0.7448335401715859, "grad_norm": 0.4199066758155823, "learning_rate": 3.2248838468866007e-06, "loss": 0.34294164180755615, "step": 13804, "token_acc": 0.8797712296406813 }, { "epoch": 0.7448874979765823, "grad_norm": 0.5203608870506287, "learning_rate": 3.223598597214521e-06, "loss": 0.3589354455471039, "step": 13805, "token_acc": 0.8731598767545361 }, { "epoch": 0.7449414557815788, "grad_norm": 0.3589995503425598, "learning_rate": 3.222313554491968e-06, "loss": 0.32157546281814575, "step": 13806, "token_acc": 0.879007036747459 }, { "epoch": 0.7449954135865753, "grad_norm": 0.415700763463974, "learning_rate": 3.2210287187581812e-06, "loss": 0.36514103412628174, "step": 13807, "token_acc": 0.8720978156340157 }, { "epoch": 0.7450493713915718, "grad_norm": 0.40822169184684753, "learning_rate": 3.2197440900524003e-06, "loss": 0.3333982825279236, "step": 13808, "token_acc": 0.8821359024290253 }, { "epoch": 0.7451033291965683, "grad_norm": 0.5185872316360474, "learning_rate": 3.218459668413859e-06, "loss": 0.364867627620697, "step": 13809, "token_acc": 0.8744096554136785 }, { "epoch": 0.7451572870015648, "grad_norm": 0.4150996506214142, "learning_rate": 3.2171754538817834e-06, "loss": 0.35997140407562256, "step": 13810, "token_acc": 0.8716297786720322 }, { "epoch": 0.7452112448065613, "grad_norm": 0.4380807876586914, "learning_rate": 3.2158914464953894e-06, "loss": 0.3184848427772522, "step": 13811, "token_acc": 0.885070892410342 }, { "epoch": 0.7452652026115577, "grad_norm": 0.43341073393821716, "learning_rate": 3.214607646293896e-06, "loss": 0.3417791724205017, "step": 13812, "token_acc": 0.8743474133839583 }, { "epoch": 0.7453191604165542, "grad_norm": 0.4545212686061859, "learning_rate": 3.213324053316508e-06, "loss": 0.3351076543331146, "step": 13813, "token_acc": 0.8814272358990339 }, { "epoch": 0.7453731182215507, "grad_norm": 0.4317627251148224, "learning_rate": 3.2120406676024252e-06, "loss": 0.38164663314819336, "step": 13814, "token_acc": 0.8685892671324513 }, { "epoch": 0.7454270760265472, "grad_norm": 0.4443451166152954, "learning_rate": 3.210757489190842e-06, "loss": 0.32835832238197327, "step": 13815, "token_acc": 0.8804266185969656 }, { "epoch": 0.7454810338315437, "grad_norm": 0.44812506437301636, "learning_rate": 3.2094745181209474e-06, "loss": 0.36787575483322144, "step": 13816, "token_acc": 0.8683296901118683 }, { "epoch": 0.7455349916365402, "grad_norm": 0.2952682375907898, "learning_rate": 3.2081917544319186e-06, "loss": 0.33249586820602417, "step": 13817, "token_acc": 0.8756286029682325 }, { "epoch": 0.7455889494415368, "grad_norm": 0.4002786874771118, "learning_rate": 3.2069091981629372e-06, "loss": 0.33707720041275024, "step": 13818, "token_acc": 0.876660026560425 }, { "epoch": 0.7456429072465333, "grad_norm": 0.37319356203079224, "learning_rate": 3.2056268493531693e-06, "loss": 0.31937164068222046, "step": 13819, "token_acc": 0.8883495145631068 }, { "epoch": 0.7456968650515297, "grad_norm": 0.5307677984237671, "learning_rate": 3.2043447080417777e-06, "loss": 0.37714338302612305, "step": 13820, "token_acc": 0.8662175168431184 }, { "epoch": 0.7457508228565262, "grad_norm": 0.4235231578350067, "learning_rate": 3.203062774267919e-06, "loss": 0.33488577604293823, "step": 13821, "token_acc": 0.8825388797572385 }, { "epoch": 0.7458047806615227, "grad_norm": 0.37902915477752686, "learning_rate": 3.2017810480707423e-06, "loss": 0.29783540964126587, "step": 13822, "token_acc": 0.8895466164401992 }, { "epoch": 0.7458587384665192, "grad_norm": 0.38215526938438416, "learning_rate": 3.2004995294893915e-06, "loss": 0.28467392921447754, "step": 13823, "token_acc": 0.8964978111319575 }, { "epoch": 0.7459126962715157, "grad_norm": 0.4646129012107849, "learning_rate": 3.1992182185630038e-06, "loss": 0.3142896294593811, "step": 13824, "token_acc": 0.88608435735636 }, { "epoch": 0.7459666540765122, "grad_norm": 0.6019665002822876, "learning_rate": 3.1979371153307104e-06, "loss": 0.34542471170425415, "step": 13825, "token_acc": 0.8780578206078576 }, { "epoch": 0.7460206118815087, "grad_norm": 0.4895264208316803, "learning_rate": 3.1966562198316355e-06, "loss": 0.29882675409317017, "step": 13826, "token_acc": 0.8916048550236008 }, { "epoch": 0.7460745696865052, "grad_norm": 0.41816169023513794, "learning_rate": 3.1953755321048975e-06, "loss": 0.3017803728580475, "step": 13827, "token_acc": 0.8881046904866932 }, { "epoch": 0.7461285274915016, "grad_norm": 0.41515931487083435, "learning_rate": 3.1940950521896074e-06, "loss": 0.30425217747688293, "step": 13828, "token_acc": 0.885264773599386 }, { "epoch": 0.7461824852964981, "grad_norm": 0.39654847979545593, "learning_rate": 3.192814780124872e-06, "loss": 0.32722437381744385, "step": 13829, "token_acc": 0.8819128171763175 }, { "epoch": 0.7462364431014946, "grad_norm": 0.3908562660217285, "learning_rate": 3.1915347159497866e-06, "loss": 0.3212059438228607, "step": 13830, "token_acc": 0.8870269515430805 }, { "epoch": 0.7462904009064911, "grad_norm": 0.35657453536987305, "learning_rate": 3.190254859703451e-06, "loss": 0.32194221019744873, "step": 13831, "token_acc": 0.8879310344827587 }, { "epoch": 0.7463443587114876, "grad_norm": 0.3734757900238037, "learning_rate": 3.1889752114249482e-06, "loss": 0.31735947728157043, "step": 13832, "token_acc": 0.8825301204819277 }, { "epoch": 0.7463983165164841, "grad_norm": 0.4616357982158661, "learning_rate": 3.1876957711533574e-06, "loss": 0.36336278915405273, "step": 13833, "token_acc": 0.8702988702988703 }, { "epoch": 0.7464522743214806, "grad_norm": 0.3858104646205902, "learning_rate": 3.1864165389277537e-06, "loss": 0.3221205472946167, "step": 13834, "token_acc": 0.88604734150113 }, { "epoch": 0.746506232126477, "grad_norm": 0.40544402599334717, "learning_rate": 3.185137514787204e-06, "loss": 0.3403501510620117, "step": 13835, "token_acc": 0.8806270096463023 }, { "epoch": 0.7465601899314736, "grad_norm": 0.43086373805999756, "learning_rate": 3.1838586987707665e-06, "loss": 0.3904761075973511, "step": 13836, "token_acc": 0.8675068119891008 }, { "epoch": 0.7466141477364701, "grad_norm": 0.39929330348968506, "learning_rate": 3.182580090917501e-06, "loss": 0.3655683398246765, "step": 13837, "token_acc": 0.8727796447431589 }, { "epoch": 0.7466681055414666, "grad_norm": 0.5361838936805725, "learning_rate": 3.181301691266454e-06, "loss": 0.3933044672012329, "step": 13838, "token_acc": 0.8650770572840942 }, { "epoch": 0.7467220633464631, "grad_norm": 0.37796491384506226, "learning_rate": 3.180023499856667e-06, "loss": 0.3888462781906128, "step": 13839, "token_acc": 0.8653007846556233 }, { "epoch": 0.7467760211514596, "grad_norm": 0.4027371108531952, "learning_rate": 3.1787455167271796e-06, "loss": 0.3486706018447876, "step": 13840, "token_acc": 0.8815004262574595 }, { "epoch": 0.7468299789564561, "grad_norm": 0.5037593841552734, "learning_rate": 3.1774677419170142e-06, "loss": 0.3403773307800293, "step": 13841, "token_acc": 0.87739170365835 }, { "epoch": 0.7468839367614526, "grad_norm": 0.38014906644821167, "learning_rate": 3.176190175465196e-06, "loss": 0.3099677562713623, "step": 13842, "token_acc": 0.8868940754039497 }, { "epoch": 0.746937894566449, "grad_norm": 0.3293549120426178, "learning_rate": 3.174912817410739e-06, "loss": 0.3142595887184143, "step": 13843, "token_acc": 0.893780573025856 }, { "epoch": 0.7469918523714455, "grad_norm": 0.5298212766647339, "learning_rate": 3.1736356677926592e-06, "loss": 0.34502655267715454, "step": 13844, "token_acc": 0.8747269890795631 }, { "epoch": 0.747045810176442, "grad_norm": 0.3615123927593231, "learning_rate": 3.1723587266499588e-06, "loss": 0.32619422674179077, "step": 13845, "token_acc": 0.881896551724138 }, { "epoch": 0.7470997679814385, "grad_norm": 0.4322259724140167, "learning_rate": 3.171081994021634e-06, "loss": 0.38014280796051025, "step": 13846, "token_acc": 0.8718112244897959 }, { "epoch": 0.747153725786435, "grad_norm": 0.37235140800476074, "learning_rate": 3.169805469946676e-06, "loss": 0.3224002420902252, "step": 13847, "token_acc": 0.8838329764453962 }, { "epoch": 0.7472076835914315, "grad_norm": 0.3790849447250366, "learning_rate": 3.1685291544640706e-06, "loss": 0.30767303705215454, "step": 13848, "token_acc": 0.8817655282604354 }, { "epoch": 0.747261641396428, "grad_norm": 0.34630435705184937, "learning_rate": 3.1672530476127904e-06, "loss": 0.29971981048583984, "step": 13849, "token_acc": 0.8898279197637715 }, { "epoch": 0.7473155992014245, "grad_norm": 0.3849024176597595, "learning_rate": 3.165977149431817e-06, "loss": 0.3360421657562256, "step": 13850, "token_acc": 0.8805851063829787 }, { "epoch": 0.7473695570064209, "grad_norm": 0.3574157953262329, "learning_rate": 3.16470145996011e-06, "loss": 0.33787715435028076, "step": 13851, "token_acc": 0.8792102206736353 }, { "epoch": 0.7474235148114174, "grad_norm": 0.47706642746925354, "learning_rate": 3.1634259792366305e-06, "loss": 0.37749698758125305, "step": 13852, "token_acc": 0.8614371600343544 }, { "epoch": 0.747477472616414, "grad_norm": 0.4229208827018738, "learning_rate": 3.162150707300331e-06, "loss": 0.30684855580329895, "step": 13853, "token_acc": 0.8884615384615384 }, { "epoch": 0.7475314304214105, "grad_norm": 0.4143768548965454, "learning_rate": 3.1608756441901566e-06, "loss": 0.36146748065948486, "step": 13854, "token_acc": 0.8726299030250398 }, { "epoch": 0.747585388226407, "grad_norm": 0.46942105889320374, "learning_rate": 3.1596007899450486e-06, "loss": 0.33762115240097046, "step": 13855, "token_acc": 0.8801421347546808 }, { "epoch": 0.7476393460314035, "grad_norm": 0.40338408946990967, "learning_rate": 3.1583261446039416e-06, "loss": 0.3641631305217743, "step": 13856, "token_acc": 0.8728313360758362 }, { "epoch": 0.7476933038364, "grad_norm": 0.44095054268836975, "learning_rate": 3.1570517082057604e-06, "loss": 0.2917501926422119, "step": 13857, "token_acc": 0.893286505470755 }, { "epoch": 0.7477472616413964, "grad_norm": 0.3152776062488556, "learning_rate": 3.1557774807894283e-06, "loss": 0.32405248284339905, "step": 13858, "token_acc": 0.8896005210594876 }, { "epoch": 0.7478012194463929, "grad_norm": 0.4876892566680908, "learning_rate": 3.15450346239386e-06, "loss": 0.3440587520599365, "step": 13859, "token_acc": 0.8764255319148936 }, { "epoch": 0.7478551772513894, "grad_norm": 0.34552404284477234, "learning_rate": 3.153229653057961e-06, "loss": 0.25514495372772217, "step": 13860, "token_acc": 0.9071878202312985 }, { "epoch": 0.7479091350563859, "grad_norm": 0.5254814624786377, "learning_rate": 3.151956052820635e-06, "loss": 0.34169885516166687, "step": 13861, "token_acc": 0.8773510352457721 }, { "epoch": 0.7479630928613824, "grad_norm": 0.45957642793655396, "learning_rate": 3.150682661720774e-06, "loss": 0.3856073021888733, "step": 13862, "token_acc": 0.8661063099325205 }, { "epoch": 0.7480170506663789, "grad_norm": 0.35899651050567627, "learning_rate": 3.149409479797274e-06, "loss": 0.2911074161529541, "step": 13863, "token_acc": 0.892874337987482 }, { "epoch": 0.7480710084713754, "grad_norm": 0.5621888637542725, "learning_rate": 3.1481365070890134e-06, "loss": 0.3841922879219055, "step": 13864, "token_acc": 0.8693877551020408 }, { "epoch": 0.7481249662763719, "grad_norm": 0.49962952733039856, "learning_rate": 3.146863743634869e-06, "loss": 0.3795890212059021, "step": 13865, "token_acc": 0.8684123301292674 }, { "epoch": 0.7481789240813683, "grad_norm": 0.29230788350105286, "learning_rate": 3.1455911894737113e-06, "loss": 0.3144203722476959, "step": 13866, "token_acc": 0.8909000124828361 }, { "epoch": 0.7482328818863648, "grad_norm": 0.4807251989841461, "learning_rate": 3.144318844644403e-06, "loss": 0.32547712326049805, "step": 13867, "token_acc": 0.8859506203784936 }, { "epoch": 0.7482868396913613, "grad_norm": 0.36904630064964294, "learning_rate": 3.1430467091858e-06, "loss": 0.28187787532806396, "step": 13868, "token_acc": 0.900448053766452 }, { "epoch": 0.7483407974963578, "grad_norm": 0.44574013352394104, "learning_rate": 3.1417747831367516e-06, "loss": 0.3297935426235199, "step": 13869, "token_acc": 0.8836344995514546 }, { "epoch": 0.7483947553013544, "grad_norm": 0.30930548906326294, "learning_rate": 3.140503066536108e-06, "loss": 0.31452158093452454, "step": 13870, "token_acc": 0.8905562742561449 }, { "epoch": 0.7484487131063509, "grad_norm": 0.48667311668395996, "learning_rate": 3.139231559422704e-06, "loss": 0.35094934701919556, "step": 13871, "token_acc": 0.8741643727880456 }, { "epoch": 0.7485026709113474, "grad_norm": 0.4749416708946228, "learning_rate": 3.137960261835371e-06, "loss": 0.327519953250885, "step": 13872, "token_acc": 0.8818028801196933 }, { "epoch": 0.7485566287163438, "grad_norm": 0.4716655910015106, "learning_rate": 3.1366891738129335e-06, "loss": 0.33704879879951477, "step": 13873, "token_acc": 0.8796452056889432 }, { "epoch": 0.7486105865213403, "grad_norm": 0.34019389748573303, "learning_rate": 3.1354182953942113e-06, "loss": 0.3016853332519531, "step": 13874, "token_acc": 0.8890901490873624 }, { "epoch": 0.7486645443263368, "grad_norm": 0.45779553055763245, "learning_rate": 3.134147626618016e-06, "loss": 0.32836633920669556, "step": 13875, "token_acc": 0.8796205886455573 }, { "epoch": 0.7487185021313333, "grad_norm": 0.45114919543266296, "learning_rate": 3.132877167523153e-06, "loss": 0.38157349824905396, "step": 13876, "token_acc": 0.8664673642252118 }, { "epoch": 0.7487724599363298, "grad_norm": 0.41296136379241943, "learning_rate": 3.131606918148423e-06, "loss": 0.31014692783355713, "step": 13877, "token_acc": 0.8884223918575064 }, { "epoch": 0.7488264177413263, "grad_norm": 0.33878448605537415, "learning_rate": 3.130336878532618e-06, "loss": 0.3452972173690796, "step": 13878, "token_acc": 0.8783599653164871 }, { "epoch": 0.7488803755463228, "grad_norm": 0.42196106910705566, "learning_rate": 3.1290670487145245e-06, "loss": 0.32884782552719116, "step": 13879, "token_acc": 0.8833520669414262 }, { "epoch": 0.7489343333513193, "grad_norm": 0.43305841088294983, "learning_rate": 3.127797428732925e-06, "loss": 0.34984642267227173, "step": 13880, "token_acc": 0.8737631662942866 }, { "epoch": 0.7489882911563157, "grad_norm": 0.4799780249595642, "learning_rate": 3.12652801862659e-06, "loss": 0.3825950026512146, "step": 13881, "token_acc": 0.866424352567015 }, { "epoch": 0.7490422489613122, "grad_norm": 0.3535197377204895, "learning_rate": 3.1252588184342868e-06, "loss": 0.34852397441864014, "step": 13882, "token_acc": 0.879079634464752 }, { "epoch": 0.7490962067663087, "grad_norm": 0.4919346868991852, "learning_rate": 3.123989828194781e-06, "loss": 0.37093931436538696, "step": 13883, "token_acc": 0.8672363802838394 }, { "epoch": 0.7491501645713052, "grad_norm": 0.3255261182785034, "learning_rate": 3.1227210479468253e-06, "loss": 0.32353419065475464, "step": 13884, "token_acc": 0.8817107629077832 }, { "epoch": 0.7492041223763017, "grad_norm": 0.41209325194358826, "learning_rate": 3.1214524777291666e-06, "loss": 0.3190949559211731, "step": 13885, "token_acc": 0.8870852232036286 }, { "epoch": 0.7492580801812982, "grad_norm": 0.4628542363643646, "learning_rate": 3.120184117580547e-06, "loss": 0.3781687319278717, "step": 13886, "token_acc": 0.8694375091790277 }, { "epoch": 0.7493120379862948, "grad_norm": 0.42967748641967773, "learning_rate": 3.118915967539703e-06, "loss": 0.34443479776382446, "step": 13887, "token_acc": 0.8783315765697937 }, { "epoch": 0.7493659957912913, "grad_norm": 0.42568662762641907, "learning_rate": 3.1176480276453615e-06, "loss": 0.39049530029296875, "step": 13888, "token_acc": 0.8678556382530543 }, { "epoch": 0.7494199535962877, "grad_norm": 0.42662152647972107, "learning_rate": 3.116380297936248e-06, "loss": 0.423527330160141, "step": 13889, "token_acc": 0.8603137338597802 }, { "epoch": 0.7494739114012842, "grad_norm": 0.3846459686756134, "learning_rate": 3.115112778451076e-06, "loss": 0.3086325228214264, "step": 13890, "token_acc": 0.8889633901032586 }, { "epoch": 0.7495278692062807, "grad_norm": 0.35417938232421875, "learning_rate": 3.113845469228556e-06, "loss": 0.34076738357543945, "step": 13891, "token_acc": 0.8799024390243902 }, { "epoch": 0.7495818270112772, "grad_norm": 0.42757150530815125, "learning_rate": 3.112578370307391e-06, "loss": 0.3390904664993286, "step": 13892, "token_acc": 0.8803339517625232 }, { "epoch": 0.7496357848162737, "grad_norm": 0.4261491596698761, "learning_rate": 3.111311481726279e-06, "loss": 0.3930055797100067, "step": 13893, "token_acc": 0.8638132295719845 }, { "epoch": 0.7496897426212702, "grad_norm": 0.5217298269271851, "learning_rate": 3.1100448035239063e-06, "loss": 0.34823155403137207, "step": 13894, "token_acc": 0.875 }, { "epoch": 0.7497437004262667, "grad_norm": 0.41914430260658264, "learning_rate": 3.1087783357389635e-06, "loss": 0.3206653594970703, "step": 13895, "token_acc": 0.8847148288973384 }, { "epoch": 0.7497976582312631, "grad_norm": 0.384774386882782, "learning_rate": 3.107512078410125e-06, "loss": 0.3137102425098419, "step": 13896, "token_acc": 0.8844677137870856 }, { "epoch": 0.7498516160362596, "grad_norm": 0.4124573767185211, "learning_rate": 3.1062460315760635e-06, "loss": 0.3915990889072418, "step": 13897, "token_acc": 0.86579754601227 }, { "epoch": 0.7499055738412561, "grad_norm": 0.41660869121551514, "learning_rate": 3.104980195275441e-06, "loss": 0.3191491365432739, "step": 13898, "token_acc": 0.8836012861736334 }, { "epoch": 0.7499595316462526, "grad_norm": 0.43263912200927734, "learning_rate": 3.103714569546917e-06, "loss": 0.33374637365341187, "step": 13899, "token_acc": 0.8823529411764706 }, { "epoch": 0.7500134894512491, "grad_norm": 0.3537546396255493, "learning_rate": 3.1024491544291437e-06, "loss": 0.3265988826751709, "step": 13900, "token_acc": 0.8862574010595201 }, { "epoch": 0.7500674472562456, "grad_norm": 0.3252236247062683, "learning_rate": 3.1011839499607634e-06, "loss": 0.3356938362121582, "step": 13901, "token_acc": 0.8781866130855205 }, { "epoch": 0.7501214050612421, "grad_norm": 0.42452317476272583, "learning_rate": 3.09991895618042e-06, "loss": 0.36605581641197205, "step": 13902, "token_acc": 0.872704802259887 }, { "epoch": 0.7501753628662386, "grad_norm": 0.31516021490097046, "learning_rate": 3.098654173126745e-06, "loss": 0.2670263648033142, "step": 13903, "token_acc": 0.8986602052451539 }, { "epoch": 0.750229320671235, "grad_norm": 0.3608608841896057, "learning_rate": 3.097389600838364e-06, "loss": 0.3088047504425049, "step": 13904, "token_acc": 0.8891108891108891 }, { "epoch": 0.7502832784762316, "grad_norm": 0.398372620344162, "learning_rate": 3.0961252393538964e-06, "loss": 0.29858219623565674, "step": 13905, "token_acc": 0.8876718713586577 }, { "epoch": 0.7503372362812281, "grad_norm": 0.45676177740097046, "learning_rate": 3.0948610887119546e-06, "loss": 0.36966371536254883, "step": 13906, "token_acc": 0.8753904146472805 }, { "epoch": 0.7503911940862246, "grad_norm": 0.47588905692100525, "learning_rate": 3.0935971489511474e-06, "loss": 0.3629954755306244, "step": 13907, "token_acc": 0.8689896530736457 }, { "epoch": 0.7504451518912211, "grad_norm": 0.31650495529174805, "learning_rate": 3.092333420110073e-06, "loss": 0.3140590190887451, "step": 13908, "token_acc": 0.8897196261682243 }, { "epoch": 0.7504991096962176, "grad_norm": 0.4810475707054138, "learning_rate": 3.091069902227326e-06, "loss": 0.3536039888858795, "step": 13909, "token_acc": 0.8803196109084592 }, { "epoch": 0.7505530675012141, "grad_norm": 0.3740379512310028, "learning_rate": 3.0898065953414957e-06, "loss": 0.3106934726238251, "step": 13910, "token_acc": 0.8904055390702275 }, { "epoch": 0.7506070253062106, "grad_norm": 0.33699867129325867, "learning_rate": 3.088543499491161e-06, "loss": 0.3363955616950989, "step": 13911, "token_acc": 0.8793981755716147 }, { "epoch": 0.750660983111207, "grad_norm": 0.368749737739563, "learning_rate": 3.087280614714897e-06, "loss": 0.3101630210876465, "step": 13912, "token_acc": 0.8901132075471698 }, { "epoch": 0.7507149409162035, "grad_norm": 0.553572952747345, "learning_rate": 3.0860179410512725e-06, "loss": 0.30425208806991577, "step": 13913, "token_acc": 0.8894492636675408 }, { "epoch": 0.7507688987212, "grad_norm": 0.38054174184799194, "learning_rate": 3.0847554785388457e-06, "loss": 0.3822796642780304, "step": 13914, "token_acc": 0.8652591170825336 }, { "epoch": 0.7508228565261965, "grad_norm": 0.41363489627838135, "learning_rate": 3.0834932272161775e-06, "loss": 0.30199718475341797, "step": 13915, "token_acc": 0.8932894933492751 }, { "epoch": 0.750876814331193, "grad_norm": 0.43238675594329834, "learning_rate": 3.0822311871218157e-06, "loss": 0.2959287166595459, "step": 13916, "token_acc": 0.8941815360744764 }, { "epoch": 0.7509307721361895, "grad_norm": 0.3917394280433655, "learning_rate": 3.0809693582943e-06, "loss": 0.3055342733860016, "step": 13917, "token_acc": 0.8881789137380192 }, { "epoch": 0.750984729941186, "grad_norm": 0.381721556186676, "learning_rate": 3.079707740772169e-06, "loss": 0.33400872349739075, "step": 13918, "token_acc": 0.8778791829639288 }, { "epoch": 0.7510386877461824, "grad_norm": 0.4733453392982483, "learning_rate": 3.07844633459395e-06, "loss": 0.3504182696342468, "step": 13919, "token_acc": 0.8785975928833072 }, { "epoch": 0.7510926455511789, "grad_norm": 0.4838908910751343, "learning_rate": 3.077185139798162e-06, "loss": 0.3215966820716858, "step": 13920, "token_acc": 0.8863985262511513 }, { "epoch": 0.7511466033561754, "grad_norm": 0.33040910959243774, "learning_rate": 3.075924156423331e-06, "loss": 0.3361744284629822, "step": 13921, "token_acc": 0.8835817524841915 }, { "epoch": 0.751200561161172, "grad_norm": 0.41776326298713684, "learning_rate": 3.0746633845079654e-06, "loss": 0.30063480138778687, "step": 13922, "token_acc": 0.8925898752751283 }, { "epoch": 0.7512545189661685, "grad_norm": 0.3747994303703308, "learning_rate": 3.0734028240905632e-06, "loss": 0.30116522312164307, "step": 13923, "token_acc": 0.8840236686390532 }, { "epoch": 0.751308476771165, "grad_norm": 0.36593788862228394, "learning_rate": 3.072142475209623e-06, "loss": 0.3196398615837097, "step": 13924, "token_acc": 0.8807686708400059 }, { "epoch": 0.7513624345761615, "grad_norm": 0.3578358590602875, "learning_rate": 3.070882337903638e-06, "loss": 0.35993415117263794, "step": 13925, "token_acc": 0.8762650114694374 }, { "epoch": 0.751416392381158, "grad_norm": 0.4196822941303253, "learning_rate": 3.0696224122110906e-06, "loss": 0.2777262330055237, "step": 13926, "token_acc": 0.8968664435655613 }, { "epoch": 0.7514703501861544, "grad_norm": 0.39770737290382385, "learning_rate": 3.068362698170456e-06, "loss": 0.32448726892471313, "step": 13927, "token_acc": 0.8862068965517241 }, { "epoch": 0.7515243079911509, "grad_norm": 0.31736651062965393, "learning_rate": 3.0671031958202126e-06, "loss": 0.3042655885219574, "step": 13928, "token_acc": 0.889780631353665 }, { "epoch": 0.7515782657961474, "grad_norm": 0.31533053517341614, "learning_rate": 3.0658439051988197e-06, "loss": 0.32935473322868347, "step": 13929, "token_acc": 0.8848378174503394 }, { "epoch": 0.7516322236011439, "grad_norm": 0.39556562900543213, "learning_rate": 3.0645848263447387e-06, "loss": 0.3554881513118744, "step": 13930, "token_acc": 0.8719799646434885 }, { "epoch": 0.7516861814061404, "grad_norm": 0.43273094296455383, "learning_rate": 3.0633259592964202e-06, "loss": 0.3454665541648865, "step": 13931, "token_acc": 0.8761972837741244 }, { "epoch": 0.7517401392111369, "grad_norm": 0.4073200523853302, "learning_rate": 3.06206730409231e-06, "loss": 0.35383206605911255, "step": 13932, "token_acc": 0.8752256046203827 }, { "epoch": 0.7517940970161334, "grad_norm": 0.38250085711479187, "learning_rate": 3.060808860770843e-06, "loss": 0.32097750902175903, "step": 13933, "token_acc": 0.8828939301042306 }, { "epoch": 0.7518480548211299, "grad_norm": 0.409690797328949, "learning_rate": 3.059550629370459e-06, "loss": 0.29134318232536316, "step": 13934, "token_acc": 0.8881903143585387 }, { "epoch": 0.7519020126261263, "grad_norm": 0.3418865203857422, "learning_rate": 3.0582926099295807e-06, "loss": 0.28186315298080444, "step": 13935, "token_acc": 0.8981381751038622 }, { "epoch": 0.7519559704311228, "grad_norm": 0.3432040512561798, "learning_rate": 3.057034802486628e-06, "loss": 0.3892706036567688, "step": 13936, "token_acc": 0.8638233680746022 }, { "epoch": 0.7520099282361193, "grad_norm": 0.5366454124450684, "learning_rate": 3.055777207080013e-06, "loss": 0.36109521985054016, "step": 13937, "token_acc": 0.873211091234347 }, { "epoch": 0.7520638860411158, "grad_norm": 0.4154526889324188, "learning_rate": 3.0545198237481433e-06, "loss": 0.3095203638076782, "step": 13938, "token_acc": 0.8860646314982756 }, { "epoch": 0.7521178438461124, "grad_norm": 0.4266883432865143, "learning_rate": 3.0532626525294186e-06, "loss": 0.32239559292793274, "step": 13939, "token_acc": 0.8798565451285116 }, { "epoch": 0.7521718016511089, "grad_norm": 0.40126121044158936, "learning_rate": 3.0520056934622335e-06, "loss": 0.34114909172058105, "step": 13940, "token_acc": 0.8806997084548105 }, { "epoch": 0.7522257594561054, "grad_norm": 0.36164647340774536, "learning_rate": 3.0507489465849726e-06, "loss": 0.3699936866760254, "step": 13941, "token_acc": 0.8688295560384973 }, { "epoch": 0.7522797172611018, "grad_norm": 0.38464558124542236, "learning_rate": 3.0494924119360204e-06, "loss": 0.30630984902381897, "step": 13942, "token_acc": 0.8896623222748815 }, { "epoch": 0.7523336750660983, "grad_norm": 0.39206671714782715, "learning_rate": 3.0482360895537476e-06, "loss": 0.35448122024536133, "step": 13943, "token_acc": 0.8766396874127825 }, { "epoch": 0.7523876328710948, "grad_norm": 0.6147851943969727, "learning_rate": 3.0469799794765243e-06, "loss": 0.3156006932258606, "step": 13944, "token_acc": 0.8836785795583884 }, { "epoch": 0.7524415906760913, "grad_norm": 0.3443312644958496, "learning_rate": 3.0457240817427114e-06, "loss": 0.3319934010505676, "step": 13945, "token_acc": 0.877893447642376 }, { "epoch": 0.7524955484810878, "grad_norm": 0.3929790258407593, "learning_rate": 3.0444683963906583e-06, "loss": 0.3126494884490967, "step": 13946, "token_acc": 0.8875564334085779 }, { "epoch": 0.7525495062860843, "grad_norm": 0.37624409794807434, "learning_rate": 3.0432129234587237e-06, "loss": 0.30439430475234985, "step": 13947, "token_acc": 0.8915821367408774 }, { "epoch": 0.7526034640910808, "grad_norm": 0.37206417322158813, "learning_rate": 3.0419576629852423e-06, "loss": 0.2959510385990143, "step": 13948, "token_acc": 0.891552109035366 }, { "epoch": 0.7526574218960773, "grad_norm": 0.47339969873428345, "learning_rate": 3.040702615008552e-06, "loss": 0.3102473318576813, "step": 13949, "token_acc": 0.886498223450454 }, { "epoch": 0.7527113797010737, "grad_norm": 0.4335213899612427, "learning_rate": 3.039447779566981e-06, "loss": 0.32973402738571167, "step": 13950, "token_acc": 0.8826558265582656 }, { "epoch": 0.7527653375060702, "grad_norm": 0.39560991525650024, "learning_rate": 3.0381931566988498e-06, "loss": 0.31932443380355835, "step": 13951, "token_acc": 0.8818135506877228 }, { "epoch": 0.7528192953110667, "grad_norm": 0.43292319774627686, "learning_rate": 3.0369387464424737e-06, "loss": 0.3132774829864502, "step": 13952, "token_acc": 0.8896465349290287 }, { "epoch": 0.7528732531160632, "grad_norm": 0.3617081344127655, "learning_rate": 3.0356845488361675e-06, "loss": 0.3316764235496521, "step": 13953, "token_acc": 0.8813181042405146 }, { "epoch": 0.7529272109210597, "grad_norm": 0.4827636182308197, "learning_rate": 3.0344305639182313e-06, "loss": 0.37880179286003113, "step": 13954, "token_acc": 0.8721936903888481 }, { "epoch": 0.7529811687260562, "grad_norm": 0.4767456650733948, "learning_rate": 3.0331767917269607e-06, "loss": 0.37270134687423706, "step": 13955, "token_acc": 0.8741247860588144 }, { "epoch": 0.7530351265310528, "grad_norm": 0.5105333924293518, "learning_rate": 3.031923232300645e-06, "loss": 0.2706828713417053, "step": 13956, "token_acc": 0.9000555247084953 }, { "epoch": 0.7530890843360493, "grad_norm": 0.4144563376903534, "learning_rate": 3.0306698856775717e-06, "loss": 0.35532745718955994, "step": 13957, "token_acc": 0.8776607919432364 }, { "epoch": 0.7531430421410457, "grad_norm": 0.3386225402355194, "learning_rate": 3.0294167518960114e-06, "loss": 0.30709224939346313, "step": 13958, "token_acc": 0.8869775893397941 }, { "epoch": 0.7531969999460422, "grad_norm": 0.36808091402053833, "learning_rate": 3.0281638309942354e-06, "loss": 0.28459030389785767, "step": 13959, "token_acc": 0.8976515948124781 }, { "epoch": 0.7532509577510387, "grad_norm": 0.43874049186706543, "learning_rate": 3.026911123010512e-06, "loss": 0.334029883146286, "step": 13960, "token_acc": 0.8764137363767222 }, { "epoch": 0.7533049155560352, "grad_norm": 0.4245113134384155, "learning_rate": 3.0256586279830957e-06, "loss": 0.34021368622779846, "step": 13961, "token_acc": 0.8791457885946075 }, { "epoch": 0.7533588733610317, "grad_norm": 0.31579962372779846, "learning_rate": 3.0244063459502382e-06, "loss": 0.314273476600647, "step": 13962, "token_acc": 0.8848678375022175 }, { "epoch": 0.7534128311660282, "grad_norm": 0.3481135070323944, "learning_rate": 3.023154276950183e-06, "loss": 0.31741273403167725, "step": 13963, "token_acc": 0.882631489937426 }, { "epoch": 0.7534667889710247, "grad_norm": 0.3159303665161133, "learning_rate": 3.021902421021169e-06, "loss": 0.29525336623191833, "step": 13964, "token_acc": 0.89036083774918 }, { "epoch": 0.7535207467760211, "grad_norm": 0.4517628848552704, "learning_rate": 3.0206507782014237e-06, "loss": 0.34119102358818054, "step": 13965, "token_acc": 0.881998530492285 }, { "epoch": 0.7535747045810176, "grad_norm": 0.42640432715415955, "learning_rate": 3.0193993485291796e-06, "loss": 0.37133193016052246, "step": 13966, "token_acc": 0.8676685147359207 }, { "epoch": 0.7536286623860141, "grad_norm": 0.4756488502025604, "learning_rate": 3.01814813204265e-06, "loss": 0.3362991213798523, "step": 13967, "token_acc": 0.8791673188292377 }, { "epoch": 0.7536826201910106, "grad_norm": 0.4535262882709503, "learning_rate": 3.0168971287800462e-06, "loss": 0.39816272258758545, "step": 13968, "token_acc": 0.8618573413617415 }, { "epoch": 0.7537365779960071, "grad_norm": 0.34819552302360535, "learning_rate": 3.0156463387795767e-06, "loss": 0.33071839809417725, "step": 13969, "token_acc": 0.8808634772462077 }, { "epoch": 0.7537905358010036, "grad_norm": 0.5012693405151367, "learning_rate": 3.0143957620794373e-06, "loss": 0.34600841999053955, "step": 13970, "token_acc": 0.8730633802816902 }, { "epoch": 0.7538444936060001, "grad_norm": 0.3706343173980713, "learning_rate": 3.0131453987178216e-06, "loss": 0.28888988494873047, "step": 13971, "token_acc": 0.8932038834951457 }, { "epoch": 0.7538984514109967, "grad_norm": 0.38347524404525757, "learning_rate": 3.0118952487329144e-06, "loss": 0.37396514415740967, "step": 13972, "token_acc": 0.8685393258426967 }, { "epoch": 0.753952409215993, "grad_norm": 0.35656341910362244, "learning_rate": 3.0106453121628964e-06, "loss": 0.3707425892353058, "step": 13973, "token_acc": 0.8685871056241427 }, { "epoch": 0.7540063670209896, "grad_norm": 0.46405306458473206, "learning_rate": 3.0093955890459404e-06, "loss": 0.34680408239364624, "step": 13974, "token_acc": 0.8763422818791946 }, { "epoch": 0.7540603248259861, "grad_norm": 0.42014041543006897, "learning_rate": 3.00814607942021e-06, "loss": 0.3420172333717346, "step": 13975, "token_acc": 0.8814352574102964 }, { "epoch": 0.7541142826309826, "grad_norm": 0.35764792561531067, "learning_rate": 3.0068967833238684e-06, "loss": 0.3137083649635315, "step": 13976, "token_acc": 0.8816990869392616 }, { "epoch": 0.7541682404359791, "grad_norm": 0.43509402871131897, "learning_rate": 3.0056477007950657e-06, "loss": 0.3150138854980469, "step": 13977, "token_acc": 0.8840820854132002 }, { "epoch": 0.7542221982409756, "grad_norm": 0.5193960666656494, "learning_rate": 3.0043988318719477e-06, "loss": 0.39272356033325195, "step": 13978, "token_acc": 0.8629228030425635 }, { "epoch": 0.7542761560459721, "grad_norm": 0.33531710505485535, "learning_rate": 3.0031501765926596e-06, "loss": 0.34346964955329895, "step": 13979, "token_acc": 0.8776929765240074 }, { "epoch": 0.7543301138509686, "grad_norm": 0.37768083810806274, "learning_rate": 3.001901734995333e-06, "loss": 0.28906115889549255, "step": 13980, "token_acc": 0.8942041209784312 }, { "epoch": 0.754384071655965, "grad_norm": 0.48124632239341736, "learning_rate": 3.0006535071180933e-06, "loss": 0.35227906703948975, "step": 13981, "token_acc": 0.8764605300655457 }, { "epoch": 0.7544380294609615, "grad_norm": 0.4041840732097626, "learning_rate": 2.9994054929990614e-06, "loss": 0.32985401153564453, "step": 13982, "token_acc": 0.8845791949817041 }, { "epoch": 0.754491987265958, "grad_norm": 0.45439934730529785, "learning_rate": 2.998157692676352e-06, "loss": 0.33722835779190063, "step": 13983, "token_acc": 0.8770555990602976 }, { "epoch": 0.7545459450709545, "grad_norm": 0.5027700662612915, "learning_rate": 2.9969101061880736e-06, "loss": 0.390306681394577, "step": 13984, "token_acc": 0.8618589743589744 }, { "epoch": 0.754599902875951, "grad_norm": 0.3855922818183899, "learning_rate": 2.995662733572322e-06, "loss": 0.3104286789894104, "step": 13985, "token_acc": 0.8833786594144937 }, { "epoch": 0.7546538606809475, "grad_norm": 0.3739514648914337, "learning_rate": 2.9944155748671987e-06, "loss": 0.3021051585674286, "step": 13986, "token_acc": 0.891549513363201 }, { "epoch": 0.754707818485944, "grad_norm": 0.42633357644081116, "learning_rate": 2.993168630110789e-06, "loss": 0.3100297451019287, "step": 13987, "token_acc": 0.8862680288461539 }, { "epoch": 0.7547617762909404, "grad_norm": 0.4592927396297455, "learning_rate": 2.9919218993411735e-06, "loss": 0.32295528054237366, "step": 13988, "token_acc": 0.8802281368821293 }, { "epoch": 0.7548157340959369, "grad_norm": 0.5176997780799866, "learning_rate": 2.9906753825964275e-06, "loss": 0.31891345977783203, "step": 13989, "token_acc": 0.8845082494107563 }, { "epoch": 0.7548696919009334, "grad_norm": 0.3636164665222168, "learning_rate": 2.989429079914619e-06, "loss": 0.3144850730895996, "step": 13990, "token_acc": 0.882136473556934 }, { "epoch": 0.75492364970593, "grad_norm": 0.4271618127822876, "learning_rate": 2.98818299133381e-06, "loss": 0.3333243131637573, "step": 13991, "token_acc": 0.8792977322604243 }, { "epoch": 0.7549776075109265, "grad_norm": 0.4049966037273407, "learning_rate": 2.986937116892056e-06, "loss": 0.3103594183921814, "step": 13992, "token_acc": 0.8947883533153903 }, { "epoch": 0.755031565315923, "grad_norm": 0.36141714453697205, "learning_rate": 2.985691456627404e-06, "loss": 0.27346307039260864, "step": 13993, "token_acc": 0.8996096608929007 }, { "epoch": 0.7550855231209195, "grad_norm": 0.4335704743862152, "learning_rate": 2.9844460105778984e-06, "loss": 0.2863560914993286, "step": 13994, "token_acc": 0.8988970588235294 }, { "epoch": 0.755139480925916, "grad_norm": 0.4175480008125305, "learning_rate": 2.983200778781573e-06, "loss": 0.36372700333595276, "step": 13995, "token_acc": 0.8671337118009421 }, { "epoch": 0.7551934387309124, "grad_norm": 0.4859026372432709, "learning_rate": 2.981955761276459e-06, "loss": 0.35006195306777954, "step": 13996, "token_acc": 0.8752436647173489 }, { "epoch": 0.7552473965359089, "grad_norm": 0.34772419929504395, "learning_rate": 2.980710958100573e-06, "loss": 0.31218165159225464, "step": 13997, "token_acc": 0.8912813272149664 }, { "epoch": 0.7553013543409054, "grad_norm": 0.3474214971065521, "learning_rate": 2.97946636929194e-06, "loss": 0.3847156763076782, "step": 13998, "token_acc": 0.8731528421960953 }, { "epoch": 0.7553553121459019, "grad_norm": 0.38267388939857483, "learning_rate": 2.978221994888564e-06, "loss": 0.3690606355667114, "step": 13999, "token_acc": 0.8709564812659996 }, { "epoch": 0.7554092699508984, "grad_norm": 0.3384856879711151, "learning_rate": 2.9769778349284496e-06, "loss": 0.3326834738254547, "step": 14000, "token_acc": 0.8800246609124538 }, { "epoch": 0.7554632277558949, "grad_norm": 0.4841843545436859, "learning_rate": 2.9757338894495913e-06, "loss": 0.31352922320365906, "step": 14001, "token_acc": 0.8883782982508153 }, { "epoch": 0.7555171855608914, "grad_norm": 0.48374268412590027, "learning_rate": 2.974490158489981e-06, "loss": 0.31940171122550964, "step": 14002, "token_acc": 0.8867689755288262 }, { "epoch": 0.7555711433658878, "grad_norm": 0.40412425994873047, "learning_rate": 2.9732466420876003e-06, "loss": 0.39134031534194946, "step": 14003, "token_acc": 0.8627329192546583 }, { "epoch": 0.7556251011708843, "grad_norm": 0.33397382497787476, "learning_rate": 2.972003340280423e-06, "loss": 0.32156795263290405, "step": 14004, "token_acc": 0.8864087301587301 }, { "epoch": 0.7556790589758808, "grad_norm": 0.33293870091438293, "learning_rate": 2.9707602531064294e-06, "loss": 0.3452516794204712, "step": 14005, "token_acc": 0.8779953014878622 }, { "epoch": 0.7557330167808773, "grad_norm": 0.3143705427646637, "learning_rate": 2.9695173806035724e-06, "loss": 0.3045787811279297, "step": 14006, "token_acc": 0.8911841722432354 }, { "epoch": 0.7557869745858738, "grad_norm": 0.3761165738105774, "learning_rate": 2.968274722809814e-06, "loss": 0.3465856909751892, "step": 14007, "token_acc": 0.8771710968237213 }, { "epoch": 0.7558409323908704, "grad_norm": 0.29109427332878113, "learning_rate": 2.967032279763102e-06, "loss": 0.2514491081237793, "step": 14008, "token_acc": 0.9067559342665855 }, { "epoch": 0.7558948901958669, "grad_norm": 0.49514520168304443, "learning_rate": 2.9657900515013826e-06, "loss": 0.3505650758743286, "step": 14009, "token_acc": 0.8719172633253779 }, { "epoch": 0.7559488480008634, "grad_norm": 0.4387223720550537, "learning_rate": 2.9645480380625904e-06, "loss": 0.29342204332351685, "step": 14010, "token_acc": 0.8920477137176939 }, { "epoch": 0.7560028058058598, "grad_norm": 0.5413414835929871, "learning_rate": 2.963306239484659e-06, "loss": 0.3185734152793884, "step": 14011, "token_acc": 0.8799838089455576 }, { "epoch": 0.7560567636108563, "grad_norm": 0.5050960183143616, "learning_rate": 2.9620646558055133e-06, "loss": 0.38521087169647217, "step": 14012, "token_acc": 0.8696249484819343 }, { "epoch": 0.7561107214158528, "grad_norm": 0.48667508363723755, "learning_rate": 2.9608232870630684e-06, "loss": 0.29799026250839233, "step": 14013, "token_acc": 0.8881069669247009 }, { "epoch": 0.7561646792208493, "grad_norm": 0.41001105308532715, "learning_rate": 2.9595821332952366e-06, "loss": 0.3344084620475769, "step": 14014, "token_acc": 0.8840808591282375 }, { "epoch": 0.7562186370258458, "grad_norm": 0.3527398407459259, "learning_rate": 2.958341194539922e-06, "loss": 0.31660589575767517, "step": 14015, "token_acc": 0.8804573804573804 }, { "epoch": 0.7562725948308423, "grad_norm": 0.34328752756118774, "learning_rate": 2.9571004708350227e-06, "loss": 0.28909704089164734, "step": 14016, "token_acc": 0.8959821428571428 }, { "epoch": 0.7563265526358388, "grad_norm": 0.5059858560562134, "learning_rate": 2.955859962218427e-06, "loss": 0.3667936325073242, "step": 14017, "token_acc": 0.8734693877551021 }, { "epoch": 0.7563805104408353, "grad_norm": 0.30151835083961487, "learning_rate": 2.954619668728026e-06, "loss": 0.3398747146129608, "step": 14018, "token_acc": 0.8805031446540881 }, { "epoch": 0.7564344682458317, "grad_norm": 0.41296982765197754, "learning_rate": 2.953379590401695e-06, "loss": 0.3621333837509155, "step": 14019, "token_acc": 0.8687382024103383 }, { "epoch": 0.7564884260508282, "grad_norm": 0.35473379492759705, "learning_rate": 2.9521397272773045e-06, "loss": 0.2795010209083557, "step": 14020, "token_acc": 0.895867467686146 }, { "epoch": 0.7565423838558247, "grad_norm": 0.45342469215393066, "learning_rate": 2.9509000793927213e-06, "loss": 0.37624871730804443, "step": 14021, "token_acc": 0.8709315375982043 }, { "epoch": 0.7565963416608212, "grad_norm": 0.35839566588401794, "learning_rate": 2.9496606467858023e-06, "loss": 0.357840895652771, "step": 14022, "token_acc": 0.8726207906295754 }, { "epoch": 0.7566502994658177, "grad_norm": 0.35801610350608826, "learning_rate": 2.9484214294944e-06, "loss": 0.2884070575237274, "step": 14023, "token_acc": 0.8927901366925757 }, { "epoch": 0.7567042572708143, "grad_norm": 0.3524664044380188, "learning_rate": 2.947182427556361e-06, "loss": 0.3110964298248291, "step": 14024, "token_acc": 0.8873927392739274 }, { "epoch": 0.7567582150758108, "grad_norm": 0.3424481153488159, "learning_rate": 2.945943641009522e-06, "loss": 0.2770000696182251, "step": 14025, "token_acc": 0.899442082890542 }, { "epoch": 0.7568121728808072, "grad_norm": 0.364892840385437, "learning_rate": 2.944705069891717e-06, "loss": 0.3314048945903778, "step": 14026, "token_acc": 0.8784026323661382 }, { "epoch": 0.7568661306858037, "grad_norm": 0.4343198835849762, "learning_rate": 2.9434667142407713e-06, "loss": 0.355979323387146, "step": 14027, "token_acc": 0.8736535975872469 }, { "epoch": 0.7569200884908002, "grad_norm": 0.3253260850906372, "learning_rate": 2.9422285740945023e-06, "loss": 0.3918269872665405, "step": 14028, "token_acc": 0.8637605281129069 }, { "epoch": 0.7569740462957967, "grad_norm": 0.3876396417617798, "learning_rate": 2.940990649490725e-06, "loss": 0.3680437505245209, "step": 14029, "token_acc": 0.8722352941176471 }, { "epoch": 0.7570280041007932, "grad_norm": 0.5907277464866638, "learning_rate": 2.9397529404672396e-06, "loss": 0.34386515617370605, "step": 14030, "token_acc": 0.8830914723626284 }, { "epoch": 0.7570819619057897, "grad_norm": 0.4969029426574707, "learning_rate": 2.9385154470618547e-06, "loss": 0.2901386022567749, "step": 14031, "token_acc": 0.8916171410697529 }, { "epoch": 0.7571359197107862, "grad_norm": 0.38593292236328125, "learning_rate": 2.9372781693123575e-06, "loss": 0.33587706089019775, "step": 14032, "token_acc": 0.8799915931063472 }, { "epoch": 0.7571898775157827, "grad_norm": 0.47281065583229065, "learning_rate": 2.936041107256534e-06, "loss": 0.2844151258468628, "step": 14033, "token_acc": 0.8912575366063739 }, { "epoch": 0.7572438353207791, "grad_norm": 0.43366941809654236, "learning_rate": 2.9348042609321657e-06, "loss": 0.2772457003593445, "step": 14034, "token_acc": 0.8938578586008535 }, { "epoch": 0.7572977931257756, "grad_norm": 0.4662250280380249, "learning_rate": 2.9335676303770244e-06, "loss": 0.359407514333725, "step": 14035, "token_acc": 0.8705090964684299 }, { "epoch": 0.7573517509307721, "grad_norm": 0.5383169651031494, "learning_rate": 2.932331215628872e-06, "loss": 0.31267285346984863, "step": 14036, "token_acc": 0.8857312018946122 }, { "epoch": 0.7574057087357686, "grad_norm": 0.47825491428375244, "learning_rate": 2.931095016725477e-06, "loss": 0.347256600856781, "step": 14037, "token_acc": 0.8757136583223539 }, { "epoch": 0.7574596665407651, "grad_norm": 0.44210487604141235, "learning_rate": 2.9298590337045883e-06, "loss": 0.349689245223999, "step": 14038, "token_acc": 0.8771770210553678 }, { "epoch": 0.7575136243457616, "grad_norm": 0.4156457781791687, "learning_rate": 2.9286232666039562e-06, "loss": 0.3391231298446655, "step": 14039, "token_acc": 0.878644186863365 }, { "epoch": 0.7575675821507581, "grad_norm": 0.43545395135879517, "learning_rate": 2.927387715461313e-06, "loss": 0.3049950897693634, "step": 14040, "token_acc": 0.8860319666435025 }, { "epoch": 0.7576215399557547, "grad_norm": 0.3740294277667999, "learning_rate": 2.9261523803143967e-06, "loss": 0.30481088161468506, "step": 14041, "token_acc": 0.8888693820224719 }, { "epoch": 0.757675497760751, "grad_norm": 0.34764090180397034, "learning_rate": 2.92491726120093e-06, "loss": 0.32676777243614197, "step": 14042, "token_acc": 0.8854858548585486 }, { "epoch": 0.7577294555657476, "grad_norm": 0.35028064250946045, "learning_rate": 2.9236823581586406e-06, "loss": 0.33540838956832886, "step": 14043, "token_acc": 0.8848181338956247 }, { "epoch": 0.7577834133707441, "grad_norm": 0.34680184721946716, "learning_rate": 2.922447671225238e-06, "loss": 0.3041321039199829, "step": 14044, "token_acc": 0.8947637292464878 }, { "epoch": 0.7578373711757406, "grad_norm": 0.31660398840904236, "learning_rate": 2.92121320043843e-06, "loss": 0.35083484649658203, "step": 14045, "token_acc": 0.8735018522553933 }, { "epoch": 0.7578913289807371, "grad_norm": 0.451499342918396, "learning_rate": 2.919978945835915e-06, "loss": 0.29359591007232666, "step": 14046, "token_acc": 0.8946452476572958 }, { "epoch": 0.7579452867857336, "grad_norm": 0.48412325978279114, "learning_rate": 2.9187449074553895e-06, "loss": 0.32264676690101624, "step": 14047, "token_acc": 0.885879747913473 }, { "epoch": 0.7579992445907301, "grad_norm": 0.46287545561790466, "learning_rate": 2.917511085334539e-06, "loss": 0.36318573355674744, "step": 14048, "token_acc": 0.8674204946996467 }, { "epoch": 0.7580532023957265, "grad_norm": 0.5147858262062073, "learning_rate": 2.9162774795110417e-06, "loss": 0.3503548204898834, "step": 14049, "token_acc": 0.8778625954198473 }, { "epoch": 0.758107160200723, "grad_norm": 0.44108444452285767, "learning_rate": 2.9150440900225773e-06, "loss": 0.36211448907852173, "step": 14050, "token_acc": 0.8701540755467196 }, { "epoch": 0.7581611180057195, "grad_norm": 0.45633262395858765, "learning_rate": 2.9138109169068108e-06, "loss": 0.3266860246658325, "step": 14051, "token_acc": 0.8826307503935631 }, { "epoch": 0.758215075810716, "grad_norm": 0.42258068919181824, "learning_rate": 2.912577960201403e-06, "loss": 0.35621413588523865, "step": 14052, "token_acc": 0.8677248677248677 }, { "epoch": 0.7582690336157125, "grad_norm": 0.39255255460739136, "learning_rate": 2.911345219944006e-06, "loss": 0.343964159488678, "step": 14053, "token_acc": 0.8770438898450946 }, { "epoch": 0.758322991420709, "grad_norm": 0.44540777802467346, "learning_rate": 2.91011269617227e-06, "loss": 0.3325667977333069, "step": 14054, "token_acc": 0.8821409503470369 }, { "epoch": 0.7583769492257055, "grad_norm": 0.35438403487205505, "learning_rate": 2.9088803889238358e-06, "loss": 0.3020409345626831, "step": 14055, "token_acc": 0.892462311557789 }, { "epoch": 0.758430907030702, "grad_norm": 0.48799630999565125, "learning_rate": 2.9076482982363353e-06, "loss": 0.33563464879989624, "step": 14056, "token_acc": 0.879821704097377 }, { "epoch": 0.7584848648356984, "grad_norm": 0.36550173163414, "learning_rate": 2.906416424147399e-06, "loss": 0.2955111265182495, "step": 14057, "token_acc": 0.8936635105608157 }, { "epoch": 0.7585388226406949, "grad_norm": 0.524803876876831, "learning_rate": 2.9051847666946464e-06, "loss": 0.36235666275024414, "step": 14058, "token_acc": 0.8694724592707526 }, { "epoch": 0.7585927804456915, "grad_norm": 0.3851965069770813, "learning_rate": 2.9039533259156926e-06, "loss": 0.30274125933647156, "step": 14059, "token_acc": 0.8912218910864265 }, { "epoch": 0.758646738250688, "grad_norm": 0.4677087962627411, "learning_rate": 2.9027221018481457e-06, "loss": 0.37127685546875, "step": 14060, "token_acc": 0.8701956919611432 }, { "epoch": 0.7587006960556845, "grad_norm": 0.4351901412010193, "learning_rate": 2.901491094529606e-06, "loss": 0.3374762237071991, "step": 14061, "token_acc": 0.875787728026534 }, { "epoch": 0.758754653860681, "grad_norm": 0.36397016048431396, "learning_rate": 2.900260303997665e-06, "loss": 0.37644749879837036, "step": 14062, "token_acc": 0.8711211778029445 }, { "epoch": 0.7588086116656775, "grad_norm": 0.36701735854148865, "learning_rate": 2.8990297302899184e-06, "loss": 0.3517622649669647, "step": 14063, "token_acc": 0.8775006174364041 }, { "epoch": 0.758862569470674, "grad_norm": 0.35936176776885986, "learning_rate": 2.8977993734439437e-06, "loss": 0.36123305559158325, "step": 14064, "token_acc": 0.8718186024988431 }, { "epoch": 0.7589165272756704, "grad_norm": 0.4166901111602783, "learning_rate": 2.896569233497315e-06, "loss": 0.28258079290390015, "step": 14065, "token_acc": 0.8987047492527399 }, { "epoch": 0.7589704850806669, "grad_norm": 0.3812170922756195, "learning_rate": 2.8953393104876014e-06, "loss": 0.32767319679260254, "step": 14066, "token_acc": 0.8855608591885441 }, { "epoch": 0.7590244428856634, "grad_norm": 0.3821243345737457, "learning_rate": 2.8941096044523644e-06, "loss": 0.2874639928340912, "step": 14067, "token_acc": 0.8983433734939759 }, { "epoch": 0.7590784006906599, "grad_norm": 0.4631437361240387, "learning_rate": 2.8928801154291554e-06, "loss": 0.32081732153892517, "step": 14068, "token_acc": 0.8805334321170587 }, { "epoch": 0.7591323584956564, "grad_norm": 0.4132338762283325, "learning_rate": 2.891650843455528e-06, "loss": 0.3433316946029663, "step": 14069, "token_acc": 0.8830699774266366 }, { "epoch": 0.7591863163006529, "grad_norm": 0.4355795085430145, "learning_rate": 2.8904217885690233e-06, "loss": 0.33062857389450073, "step": 14070, "token_acc": 0.881268551788783 }, { "epoch": 0.7592402741056494, "grad_norm": 0.4299183189868927, "learning_rate": 2.8891929508071735e-06, "loss": 0.3345487415790558, "step": 14071, "token_acc": 0.8784904013961605 }, { "epoch": 0.7592942319106458, "grad_norm": 0.5494927167892456, "learning_rate": 2.887964330207509e-06, "loss": 0.28355079889297485, "step": 14072, "token_acc": 0.8922052823706249 }, { "epoch": 0.7593481897156423, "grad_norm": 0.390104204416275, "learning_rate": 2.88673592680755e-06, "loss": 0.3280089497566223, "step": 14073, "token_acc": 0.8814655172413793 }, { "epoch": 0.7594021475206388, "grad_norm": 0.4013877809047699, "learning_rate": 2.8855077406448164e-06, "loss": 0.3159901797771454, "step": 14074, "token_acc": 0.8865089989587982 }, { "epoch": 0.7594561053256353, "grad_norm": 0.4492013156414032, "learning_rate": 2.884279771756805e-06, "loss": 0.3831427991390228, "step": 14075, "token_acc": 0.866580728327425 }, { "epoch": 0.7595100631306319, "grad_norm": 0.48182398080825806, "learning_rate": 2.8830520201810296e-06, "loss": 0.3512135446071625, "step": 14076, "token_acc": 0.8735382559305045 }, { "epoch": 0.7595640209356284, "grad_norm": 0.33894529938697815, "learning_rate": 2.8818244859549804e-06, "loss": 0.23360532522201538, "step": 14077, "token_acc": 0.9119460777283809 }, { "epoch": 0.7596179787406249, "grad_norm": 0.42408156394958496, "learning_rate": 2.880597169116147e-06, "loss": 0.343709260225296, "step": 14078, "token_acc": 0.8775419982316534 }, { "epoch": 0.7596719365456214, "grad_norm": 0.38626083731651306, "learning_rate": 2.8793700697020112e-06, "loss": 0.3479517102241516, "step": 14079, "token_acc": 0.8767649244488481 }, { "epoch": 0.7597258943506178, "grad_norm": 0.39236798882484436, "learning_rate": 2.8781431877500486e-06, "loss": 0.31115859746932983, "step": 14080, "token_acc": 0.8913937143614905 }, { "epoch": 0.7597798521556143, "grad_norm": 0.4169231653213501, "learning_rate": 2.8769165232977234e-06, "loss": 0.34906214475631714, "step": 14081, "token_acc": 0.8762325239146431 }, { "epoch": 0.7598338099606108, "grad_norm": 0.5118816494941711, "learning_rate": 2.8756900763825047e-06, "loss": 0.35252779722213745, "step": 14082, "token_acc": 0.877067042686835 }, { "epoch": 0.7598877677656073, "grad_norm": 0.4108803868293762, "learning_rate": 2.874463847041845e-06, "loss": 0.31323936581611633, "step": 14083, "token_acc": 0.8864450127877238 }, { "epoch": 0.7599417255706038, "grad_norm": 0.4161853790283203, "learning_rate": 2.873237835313193e-06, "loss": 0.3863847851753235, "step": 14084, "token_acc": 0.8674567662353369 }, { "epoch": 0.7599956833756003, "grad_norm": 0.3419956862926483, "learning_rate": 2.87201204123399e-06, "loss": 0.3571103811264038, "step": 14085, "token_acc": 0.8738529690960869 }, { "epoch": 0.7600496411805968, "grad_norm": 0.43592721223831177, "learning_rate": 2.8707864648416726e-06, "loss": 0.2901272475719452, "step": 14086, "token_acc": 0.8979357798165137 }, { "epoch": 0.7601035989855933, "grad_norm": 0.32826268672943115, "learning_rate": 2.8695611061736695e-06, "loss": 0.34995684027671814, "step": 14087, "token_acc": 0.8725146198830409 }, { "epoch": 0.7601575567905897, "grad_norm": 0.47451838850975037, "learning_rate": 2.8683359652674016e-06, "loss": 0.31657248735427856, "step": 14088, "token_acc": 0.8844829784521229 }, { "epoch": 0.7602115145955862, "grad_norm": 0.4477635622024536, "learning_rate": 2.8671110421602845e-06, "loss": 0.36850953102111816, "step": 14089, "token_acc": 0.8707124010554089 }, { "epoch": 0.7602654724005827, "grad_norm": 0.41171377897262573, "learning_rate": 2.8658863368897284e-06, "loss": 0.36133384704589844, "step": 14090, "token_acc": 0.8728700548229368 }, { "epoch": 0.7603194302055792, "grad_norm": 0.4224144220352173, "learning_rate": 2.8646618494931344e-06, "loss": 0.3230486512184143, "step": 14091, "token_acc": 0.8801725515258029 }, { "epoch": 0.7603733880105757, "grad_norm": 0.4219074547290802, "learning_rate": 2.8634375800078985e-06, "loss": 0.33289778232574463, "step": 14092, "token_acc": 0.8779165090175306 }, { "epoch": 0.7604273458155723, "grad_norm": 0.5068389177322388, "learning_rate": 2.8622135284714092e-06, "loss": 0.3726006746292114, "step": 14093, "token_acc": 0.8732970027247956 }, { "epoch": 0.7604813036205688, "grad_norm": 0.4971638023853302, "learning_rate": 2.8609896949210457e-06, "loss": 0.37514302134513855, "step": 14094, "token_acc": 0.8677424721344202 }, { "epoch": 0.7605352614255652, "grad_norm": 0.5042946338653564, "learning_rate": 2.8597660793941897e-06, "loss": 0.3533012568950653, "step": 14095, "token_acc": 0.8758599124452783 }, { "epoch": 0.7605892192305617, "grad_norm": 0.4256226122379303, "learning_rate": 2.858542681928207e-06, "loss": 0.29427453875541687, "step": 14096, "token_acc": 0.8955482145666195 }, { "epoch": 0.7606431770355582, "grad_norm": 0.4065333902835846, "learning_rate": 2.8573195025604605e-06, "loss": 0.31704798340797424, "step": 14097, "token_acc": 0.8819545234639574 }, { "epoch": 0.7606971348405547, "grad_norm": 0.33162885904312134, "learning_rate": 2.856096541328305e-06, "loss": 0.34160473942756653, "step": 14098, "token_acc": 0.8816610657705233 }, { "epoch": 0.7607510926455512, "grad_norm": 0.4321479797363281, "learning_rate": 2.85487379826909e-06, "loss": 0.33806589245796204, "step": 14099, "token_acc": 0.8826719954324864 }, { "epoch": 0.7608050504505477, "grad_norm": 0.42549192905426025, "learning_rate": 2.853651273420154e-06, "loss": 0.3458622694015503, "step": 14100, "token_acc": 0.873645137342242 }, { "epoch": 0.7608590082555442, "grad_norm": 0.34016937017440796, "learning_rate": 2.8524289668188397e-06, "loss": 0.28625622391700745, "step": 14101, "token_acc": 0.8904227782571182 }, { "epoch": 0.7609129660605407, "grad_norm": 0.5396063327789307, "learning_rate": 2.8512068785024715e-06, "loss": 0.33093026280403137, "step": 14102, "token_acc": 0.8806034482758621 }, { "epoch": 0.7609669238655371, "grad_norm": 0.3930473327636719, "learning_rate": 2.8499850085083736e-06, "loss": 0.3883492648601532, "step": 14103, "token_acc": 0.8660971703150027 }, { "epoch": 0.7610208816705336, "grad_norm": 0.4058833718299866, "learning_rate": 2.848763356873859e-06, "loss": 0.3079511225223541, "step": 14104, "token_acc": 0.8894729747944466 }, { "epoch": 0.7610748394755301, "grad_norm": 0.4494810998439789, "learning_rate": 2.847541923636239e-06, "loss": 0.39229831099510193, "step": 14105, "token_acc": 0.8620064183061253 }, { "epoch": 0.7611287972805266, "grad_norm": 0.4910232424736023, "learning_rate": 2.846320708832815e-06, "loss": 0.3059070408344269, "step": 14106, "token_acc": 0.8891472868217054 }, { "epoch": 0.7611827550855231, "grad_norm": 0.41844436526298523, "learning_rate": 2.8450997125008827e-06, "loss": 0.2955370843410492, "step": 14107, "token_acc": 0.8941445114989398 }, { "epoch": 0.7612367128905196, "grad_norm": 0.4218314588069916, "learning_rate": 2.8438789346777306e-06, "loss": 0.3695400059223175, "step": 14108, "token_acc": 0.8704221600745772 }, { "epoch": 0.7612906706955161, "grad_norm": 0.34498515725135803, "learning_rate": 2.842658375400641e-06, "loss": 0.3166126012802124, "step": 14109, "token_acc": 0.889349453080023 }, { "epoch": 0.7613446285005127, "grad_norm": 0.3855814039707184, "learning_rate": 2.84143803470689e-06, "loss": 0.37406474351882935, "step": 14110, "token_acc": 0.8706356311548792 }, { "epoch": 0.761398586305509, "grad_norm": 0.3908936083316803, "learning_rate": 2.840217912633746e-06, "loss": 0.28843650221824646, "step": 14111, "token_acc": 0.8919787590832867 }, { "epoch": 0.7614525441105056, "grad_norm": 0.5462469458580017, "learning_rate": 2.838998009218471e-06, "loss": 0.35584428906440735, "step": 14112, "token_acc": 0.8714476444783521 }, { "epoch": 0.7615065019155021, "grad_norm": 0.40373867750167847, "learning_rate": 2.8377783244983183e-06, "loss": 0.31022968888282776, "step": 14113, "token_acc": 0.8909090909090909 }, { "epoch": 0.7615604597204986, "grad_norm": 0.3718093931674957, "learning_rate": 2.836558858510542e-06, "loss": 0.3068472146987915, "step": 14114, "token_acc": 0.8915207257150056 }, { "epoch": 0.7616144175254951, "grad_norm": 0.4229075312614441, "learning_rate": 2.8353396112923814e-06, "loss": 0.309325248003006, "step": 14115, "token_acc": 0.8916035255914644 }, { "epoch": 0.7616683753304916, "grad_norm": 0.47266873717308044, "learning_rate": 2.8341205828810727e-06, "loss": 0.35769563913345337, "step": 14116, "token_acc": 0.8722334004024145 }, { "epoch": 0.7617223331354881, "grad_norm": 0.4739249348640442, "learning_rate": 2.8329017733138443e-06, "loss": 0.3447389602661133, "step": 14117, "token_acc": 0.8770139634801289 }, { "epoch": 0.7617762909404845, "grad_norm": 0.36854133009910583, "learning_rate": 2.831683182627918e-06, "loss": 0.28996405005455017, "step": 14118, "token_acc": 0.8894770612768688 }, { "epoch": 0.761830248745481, "grad_norm": 0.3729305863380432, "learning_rate": 2.830464810860509e-06, "loss": 0.30535370111465454, "step": 14119, "token_acc": 0.888974914834314 }, { "epoch": 0.7618842065504775, "grad_norm": 0.5119912624359131, "learning_rate": 2.8292466580488243e-06, "loss": 0.36192870140075684, "step": 14120, "token_acc": 0.8684831970935513 }, { "epoch": 0.761938164355474, "grad_norm": 0.3975714147090912, "learning_rate": 2.828028724230071e-06, "loss": 0.30874350666999817, "step": 14121, "token_acc": 0.8923076923076924 }, { "epoch": 0.7619921221604705, "grad_norm": 0.4718972146511078, "learning_rate": 2.8268110094414446e-06, "loss": 0.3924194872379303, "step": 14122, "token_acc": 0.8648052902277737 }, { "epoch": 0.762046079965467, "grad_norm": 0.33234500885009766, "learning_rate": 2.8255935137201273e-06, "loss": 0.2856217324733734, "step": 14123, "token_acc": 0.8945477599323753 }, { "epoch": 0.7621000377704635, "grad_norm": 0.4078131914138794, "learning_rate": 2.8243762371033055e-06, "loss": 0.299155592918396, "step": 14124, "token_acc": 0.889662560257097 }, { "epoch": 0.76215399557546, "grad_norm": 0.37712112069129944, "learning_rate": 2.823159179628153e-06, "loss": 0.33207547664642334, "step": 14125, "token_acc": 0.885222193005522 }, { "epoch": 0.7622079533804564, "grad_norm": 0.4037882685661316, "learning_rate": 2.821942341331836e-06, "loss": 0.3443540632724762, "step": 14126, "token_acc": 0.8744561839651958 }, { "epoch": 0.762261911185453, "grad_norm": 0.5024387240409851, "learning_rate": 2.820725722251523e-06, "loss": 0.32783058285713196, "step": 14127, "token_acc": 0.885316887547566 }, { "epoch": 0.7623158689904495, "grad_norm": 0.42826080322265625, "learning_rate": 2.819509322424365e-06, "loss": 0.3641943335533142, "step": 14128, "token_acc": 0.8718147917961466 }, { "epoch": 0.762369826795446, "grad_norm": 0.5047488808631897, "learning_rate": 2.8182931418875124e-06, "loss": 0.37794196605682373, "step": 14129, "token_acc": 0.8721007289595759 }, { "epoch": 0.7624237846004425, "grad_norm": 0.3136083781719208, "learning_rate": 2.817077180678105e-06, "loss": 0.30961471796035767, "step": 14130, "token_acc": 0.8870282429392652 }, { "epoch": 0.762477742405439, "grad_norm": 0.4518429636955261, "learning_rate": 2.8158614388332793e-06, "loss": 0.3571815490722656, "step": 14131, "token_acc": 0.8738127544097694 }, { "epoch": 0.7625317002104355, "grad_norm": 0.42582571506500244, "learning_rate": 2.814645916390163e-06, "loss": 0.325309157371521, "step": 14132, "token_acc": 0.8816081741448245 }, { "epoch": 0.7625856580154319, "grad_norm": 0.4026895761489868, "learning_rate": 2.8134306133858756e-06, "loss": 0.3312862515449524, "step": 14133, "token_acc": 0.8770406391108023 }, { "epoch": 0.7626396158204284, "grad_norm": 0.4438022971153259, "learning_rate": 2.8122155298575373e-06, "loss": 0.3323138356208801, "step": 14134, "token_acc": 0.8810310641110377 }, { "epoch": 0.7626935736254249, "grad_norm": 0.3572951853275299, "learning_rate": 2.811000665842254e-06, "loss": 0.28056085109710693, "step": 14135, "token_acc": 0.8927340823970037 }, { "epoch": 0.7627475314304214, "grad_norm": 0.4469512104988098, "learning_rate": 2.809786021377127e-06, "loss": 0.32562559843063354, "step": 14136, "token_acc": 0.8769479564833872 }, { "epoch": 0.7628014892354179, "grad_norm": 0.40882182121276855, "learning_rate": 2.808571596499251e-06, "loss": 0.3055247664451599, "step": 14137, "token_acc": 0.888874137015401 }, { "epoch": 0.7628554470404144, "grad_norm": 0.3710276186466217, "learning_rate": 2.8073573912457142e-06, "loss": 0.2983672618865967, "step": 14138, "token_acc": 0.8899008531242795 }, { "epoch": 0.7629094048454109, "grad_norm": 0.44943249225616455, "learning_rate": 2.8061434056535985e-06, "loss": 0.30166682600975037, "step": 14139, "token_acc": 0.886881647813608 }, { "epoch": 0.7629633626504074, "grad_norm": 0.4431428015232086, "learning_rate": 2.80492963975998e-06, "loss": 0.3463370203971863, "step": 14140, "token_acc": 0.878053005369825 }, { "epoch": 0.7630173204554038, "grad_norm": 0.5085577368736267, "learning_rate": 2.803716093601924e-06, "loss": 0.32658255100250244, "step": 14141, "token_acc": 0.8807086614173228 }, { "epoch": 0.7630712782604003, "grad_norm": 0.47469788789749146, "learning_rate": 2.802502767216494e-06, "loss": 0.34391018748283386, "step": 14142, "token_acc": 0.8773199947347637 }, { "epoch": 0.7631252360653968, "grad_norm": 0.4102046489715576, "learning_rate": 2.8012896606407435e-06, "loss": 0.35008475184440613, "step": 14143, "token_acc": 0.8805455850681981 }, { "epoch": 0.7631791938703933, "grad_norm": 0.30501991510391235, "learning_rate": 2.8000767739117217e-06, "loss": 0.3069590926170349, "step": 14144, "token_acc": 0.8914940732056793 }, { "epoch": 0.7632331516753899, "grad_norm": 0.4152448773384094, "learning_rate": 2.7988641070664658e-06, "loss": 0.3291143476963043, "step": 14145, "token_acc": 0.8792687571410436 }, { "epoch": 0.7632871094803864, "grad_norm": 0.45558860898017883, "learning_rate": 2.797651660142017e-06, "loss": 0.3005636930465698, "step": 14146, "token_acc": 0.8844560715064312 }, { "epoch": 0.7633410672853829, "grad_norm": 0.39649468660354614, "learning_rate": 2.7964394331753987e-06, "loss": 0.35395631194114685, "step": 14147, "token_acc": 0.8730631704410012 }, { "epoch": 0.7633950250903794, "grad_norm": 0.41133010387420654, "learning_rate": 2.795227426203634e-06, "loss": 0.3305608034133911, "step": 14148, "token_acc": 0.8793650793650793 }, { "epoch": 0.7634489828953758, "grad_norm": 0.32741445302963257, "learning_rate": 2.7940156392637363e-06, "loss": 0.30004745721817017, "step": 14149, "token_acc": 0.8905852417302799 }, { "epoch": 0.7635029407003723, "grad_norm": 0.451829731464386, "learning_rate": 2.792804072392713e-06, "loss": 0.30973201990127563, "step": 14150, "token_acc": 0.8874477907232359 }, { "epoch": 0.7635568985053688, "grad_norm": 0.42235180735588074, "learning_rate": 2.7915927256275644e-06, "loss": 0.30662551522254944, "step": 14151, "token_acc": 0.8919763235777705 }, { "epoch": 0.7636108563103653, "grad_norm": 0.4671734571456909, "learning_rate": 2.790381599005284e-06, "loss": 0.3475891947746277, "step": 14152, "token_acc": 0.8791812281577633 }, { "epoch": 0.7636648141153618, "grad_norm": 0.4592343270778656, "learning_rate": 2.7891706925628635e-06, "loss": 0.3201921582221985, "step": 14153, "token_acc": 0.8892879450343535 }, { "epoch": 0.7637187719203583, "grad_norm": 0.4858863651752472, "learning_rate": 2.7879600063372803e-06, "loss": 0.32459113001823425, "step": 14154, "token_acc": 0.8857609139126887 }, { "epoch": 0.7637727297253548, "grad_norm": 0.3892483115196228, "learning_rate": 2.78674954036551e-06, "loss": 0.3191955089569092, "step": 14155, "token_acc": 0.8886747534293165 }, { "epoch": 0.7638266875303512, "grad_norm": 0.3869818449020386, "learning_rate": 2.7855392946845215e-06, "loss": 0.3344956040382385, "step": 14156, "token_acc": 0.8832729905865315 }, { "epoch": 0.7638806453353477, "grad_norm": 0.3973047137260437, "learning_rate": 2.7843292693312707e-06, "loss": 0.3411208391189575, "step": 14157, "token_acc": 0.8822280504558512 }, { "epoch": 0.7639346031403442, "grad_norm": 0.4742477238178253, "learning_rate": 2.7831194643427108e-06, "loss": 0.3448389768600464, "step": 14158, "token_acc": 0.878275701659172 }, { "epoch": 0.7639885609453407, "grad_norm": 0.3957575559616089, "learning_rate": 2.7819098797557943e-06, "loss": 0.322221577167511, "step": 14159, "token_acc": 0.8834860960074569 }, { "epoch": 0.7640425187503372, "grad_norm": 0.5138850212097168, "learning_rate": 2.7807005156074596e-06, "loss": 0.35159802436828613, "step": 14160, "token_acc": 0.8755234657039711 }, { "epoch": 0.7640964765553337, "grad_norm": 0.3765629231929779, "learning_rate": 2.7794913719346407e-06, "loss": 0.3389085829257965, "step": 14161, "token_acc": 0.8793023255813953 }, { "epoch": 0.7641504343603303, "grad_norm": 0.47807469964027405, "learning_rate": 2.7782824487742622e-06, "loss": 0.36868560314178467, "step": 14162, "token_acc": 0.8680359435173299 }, { "epoch": 0.7642043921653268, "grad_norm": 0.4465152621269226, "learning_rate": 2.777073746163247e-06, "loss": 0.3562086820602417, "step": 14163, "token_acc": 0.8754512635379061 }, { "epoch": 0.7642583499703232, "grad_norm": 0.38638579845428467, "learning_rate": 2.7758652641385066e-06, "loss": 0.33625495433807373, "step": 14164, "token_acc": 0.8797378170418922 }, { "epoch": 0.7643123077753197, "grad_norm": 0.4097064733505249, "learning_rate": 2.774657002736947e-06, "loss": 0.2928333878517151, "step": 14165, "token_acc": 0.8919018858621964 }, { "epoch": 0.7643662655803162, "grad_norm": 0.405046671628952, "learning_rate": 2.773448961995471e-06, "loss": 0.3893459439277649, "step": 14166, "token_acc": 0.8624768946395563 }, { "epoch": 0.7644202233853127, "grad_norm": 0.3723297715187073, "learning_rate": 2.772241141950972e-06, "loss": 0.2982749342918396, "step": 14167, "token_acc": 0.8933713471133286 }, { "epoch": 0.7644741811903092, "grad_norm": 0.3609076142311096, "learning_rate": 2.7710335426403346e-06, "loss": 0.348855584859848, "step": 14168, "token_acc": 0.8749306403284874 }, { "epoch": 0.7645281389953057, "grad_norm": 0.3911701738834381, "learning_rate": 2.769826164100439e-06, "loss": 0.3471357822418213, "step": 14169, "token_acc": 0.8766316275846137 }, { "epoch": 0.7645820968003022, "grad_norm": 0.40119338035583496, "learning_rate": 2.768619006368157e-06, "loss": 0.33905795216560364, "step": 14170, "token_acc": 0.8791582661290323 }, { "epoch": 0.7646360546052987, "grad_norm": 0.46018773317337036, "learning_rate": 2.7674120694803588e-06, "loss": 0.37902918457984924, "step": 14171, "token_acc": 0.8689987253930038 }, { "epoch": 0.7646900124102951, "grad_norm": 0.3586345314979553, "learning_rate": 2.7662053534739e-06, "loss": 0.2969655990600586, "step": 14172, "token_acc": 0.8929118354845766 }, { "epoch": 0.7647439702152916, "grad_norm": 0.43561574816703796, "learning_rate": 2.7649988583856347e-06, "loss": 0.35163989663124084, "step": 14173, "token_acc": 0.8734923964341899 }, { "epoch": 0.7647979280202881, "grad_norm": 0.47931796312332153, "learning_rate": 2.763792584252408e-06, "loss": 0.3559643626213074, "step": 14174, "token_acc": 0.8792598303777949 }, { "epoch": 0.7648518858252846, "grad_norm": 0.3448129892349243, "learning_rate": 2.7625865311110612e-06, "loss": 0.2750868499279022, "step": 14175, "token_acc": 0.8962344522065088 }, { "epoch": 0.7649058436302811, "grad_norm": 0.34830141067504883, "learning_rate": 2.761380698998426e-06, "loss": 0.360805481672287, "step": 14176, "token_acc": 0.8748795889971102 }, { "epoch": 0.7649598014352776, "grad_norm": 0.39589548110961914, "learning_rate": 2.760175087951328e-06, "loss": 0.3144855797290802, "step": 14177, "token_acc": 0.8928005847240833 }, { "epoch": 0.7650137592402741, "grad_norm": 0.4551777243614197, "learning_rate": 2.758969698006583e-06, "loss": 0.34493786096572876, "step": 14178, "token_acc": 0.8787046407402053 }, { "epoch": 0.7650677170452705, "grad_norm": 0.371823787689209, "learning_rate": 2.757764529201009e-06, "loss": 0.23143897950649261, "step": 14179, "token_acc": 0.9116129032258065 }, { "epoch": 0.765121674850267, "grad_norm": 0.30240246653556824, "learning_rate": 2.7565595815714095e-06, "loss": 0.3286615014076233, "step": 14180, "token_acc": 0.8839690911320986 }, { "epoch": 0.7651756326552636, "grad_norm": 0.35104721784591675, "learning_rate": 2.755354855154583e-06, "loss": 0.323905885219574, "step": 14181, "token_acc": 0.8792735042735043 }, { "epoch": 0.7652295904602601, "grad_norm": 0.45092085003852844, "learning_rate": 2.754150349987321e-06, "loss": 0.3236134648323059, "step": 14182, "token_acc": 0.8827619663648124 }, { "epoch": 0.7652835482652566, "grad_norm": 0.44318726658821106, "learning_rate": 2.75294606610641e-06, "loss": 0.307668000459671, "step": 14183, "token_acc": 0.885936706718565 }, { "epoch": 0.7653375060702531, "grad_norm": 0.43425536155700684, "learning_rate": 2.751742003548623e-06, "loss": 0.3832951784133911, "step": 14184, "token_acc": 0.8699059561128527 }, { "epoch": 0.7653914638752496, "grad_norm": 0.4063674807548523, "learning_rate": 2.750538162350741e-06, "loss": 0.3499133884906769, "step": 14185, "token_acc": 0.8787757284148623 }, { "epoch": 0.7654454216802461, "grad_norm": 0.4249064326286316, "learning_rate": 2.7493345425495243e-06, "loss": 0.30838316679000854, "step": 14186, "token_acc": 0.8825437112239143 }, { "epoch": 0.7654993794852425, "grad_norm": 0.41404271125793457, "learning_rate": 2.7481311441817304e-06, "loss": 0.35041213035583496, "step": 14187, "token_acc": 0.8769106999195495 }, { "epoch": 0.765553337290239, "grad_norm": 0.44091105461120605, "learning_rate": 2.746927967284112e-06, "loss": 0.3504527807235718, "step": 14188, "token_acc": 0.8768500418877408 }, { "epoch": 0.7656072950952355, "grad_norm": 0.3715943396091461, "learning_rate": 2.7457250118934144e-06, "loss": 0.421603262424469, "step": 14189, "token_acc": 0.8556755333157756 }, { "epoch": 0.765661252900232, "grad_norm": 0.3595186471939087, "learning_rate": 2.7445222780463732e-06, "loss": 0.373508095741272, "step": 14190, "token_acc": 0.8693693693693694 }, { "epoch": 0.7657152107052285, "grad_norm": 0.2910284698009491, "learning_rate": 2.743319765779722e-06, "loss": 0.31361210346221924, "step": 14191, "token_acc": 0.8897334332512579 }, { "epoch": 0.765769168510225, "grad_norm": 0.4030703902244568, "learning_rate": 2.7421174751301826e-06, "loss": 0.3132888078689575, "step": 14192, "token_acc": 0.8852845768209967 }, { "epoch": 0.7658231263152215, "grad_norm": 0.4272380769252777, "learning_rate": 2.7409154061344754e-06, "loss": 0.3619685769081116, "step": 14193, "token_acc": 0.8686056708564747 }, { "epoch": 0.765877084120218, "grad_norm": 0.4444126784801483, "learning_rate": 2.73971355882931e-06, "loss": 0.34767934679985046, "step": 14194, "token_acc": 0.8738232217573222 }, { "epoch": 0.7659310419252144, "grad_norm": 0.45760345458984375, "learning_rate": 2.738511933251391e-06, "loss": 0.3030897378921509, "step": 14195, "token_acc": 0.8880377136122569 }, { "epoch": 0.765984999730211, "grad_norm": 0.40789899230003357, "learning_rate": 2.7373105294374146e-06, "loss": 0.3207513093948364, "step": 14196, "token_acc": 0.8852013057671382 }, { "epoch": 0.7660389575352075, "grad_norm": 0.40271204710006714, "learning_rate": 2.7361093474240697e-06, "loss": 0.28575578331947327, "step": 14197, "token_acc": 0.8903843278002094 }, { "epoch": 0.766092915340204, "grad_norm": 0.4410209655761719, "learning_rate": 2.734908387248044e-06, "loss": 0.3497234582901001, "step": 14198, "token_acc": 0.8767178658043654 }, { "epoch": 0.7661468731452005, "grad_norm": 0.34725841879844666, "learning_rate": 2.7337076489460147e-06, "loss": 0.3058905601501465, "step": 14199, "token_acc": 0.8906486709819584 }, { "epoch": 0.766200830950197, "grad_norm": 0.41176024079322815, "learning_rate": 2.732507132554649e-06, "loss": 0.32288265228271484, "step": 14200, "token_acc": 0.881267217630854 }, { "epoch": 0.7662547887551935, "grad_norm": 0.3058687448501587, "learning_rate": 2.7313068381106124e-06, "loss": 0.3097231090068817, "step": 14201, "token_acc": 0.8919990091652217 }, { "epoch": 0.7663087465601899, "grad_norm": 0.3952740430831909, "learning_rate": 2.730106765650561e-06, "loss": 0.29045945405960083, "step": 14202, "token_acc": 0.8919232042370077 }, { "epoch": 0.7663627043651864, "grad_norm": 0.3475514352321625, "learning_rate": 2.728906915211141e-06, "loss": 0.31631746888160706, "step": 14203, "token_acc": 0.886053336735996 }, { "epoch": 0.7664166621701829, "grad_norm": 0.3468674123287201, "learning_rate": 2.7277072868290057e-06, "loss": 0.3066265881061554, "step": 14204, "token_acc": 0.8879839977139591 }, { "epoch": 0.7664706199751794, "grad_norm": 0.46459004282951355, "learning_rate": 2.726507880540781e-06, "loss": 0.40511971712112427, "step": 14205, "token_acc": 0.8628635661093157 }, { "epoch": 0.7665245777801759, "grad_norm": 0.4999389350414276, "learning_rate": 2.7253086963831e-06, "loss": 0.35677510499954224, "step": 14206, "token_acc": 0.8728279255728125 }, { "epoch": 0.7665785355851724, "grad_norm": 0.3791119158267975, "learning_rate": 2.724109734392587e-06, "loss": 0.29286879301071167, "step": 14207, "token_acc": 0.8907492674759313 }, { "epoch": 0.7666324933901689, "grad_norm": 0.39487069845199585, "learning_rate": 2.7229109946058552e-06, "loss": 0.33038651943206787, "step": 14208, "token_acc": 0.8824831249308399 }, { "epoch": 0.7666864511951654, "grad_norm": 0.3804492652416229, "learning_rate": 2.721712477059516e-06, "loss": 0.32182997465133667, "step": 14209, "token_acc": 0.8822751322751323 }, { "epoch": 0.7667404090001618, "grad_norm": 0.36973997950553894, "learning_rate": 2.720514181790168e-06, "loss": 0.36138948798179626, "step": 14210, "token_acc": 0.8699023104357265 }, { "epoch": 0.7667943668051583, "grad_norm": 0.4674481153488159, "learning_rate": 2.7193161088344134e-06, "loss": 0.2906085252761841, "step": 14211, "token_acc": 0.8902735070399741 }, { "epoch": 0.7668483246101548, "grad_norm": 0.35493484139442444, "learning_rate": 2.7181182582288368e-06, "loss": 0.3294375538825989, "step": 14212, "token_acc": 0.8866164737610968 }, { "epoch": 0.7669022824151513, "grad_norm": 0.42325717210769653, "learning_rate": 2.7169206300100215e-06, "loss": 0.3714965283870697, "step": 14213, "token_acc": 0.8721825962910128 }, { "epoch": 0.7669562402201479, "grad_norm": 0.44305962324142456, "learning_rate": 2.7157232242145425e-06, "loss": 0.32176655530929565, "step": 14214, "token_acc": 0.8819162100053032 }, { "epoch": 0.7670101980251444, "grad_norm": 0.4557986855506897, "learning_rate": 2.7145260408789674e-06, "loss": 0.3475154638290405, "step": 14215, "token_acc": 0.8784154297059658 }, { "epoch": 0.7670641558301409, "grad_norm": 0.4089670777320862, "learning_rate": 2.713329080039856e-06, "loss": 0.28714802861213684, "step": 14216, "token_acc": 0.8937142857142857 }, { "epoch": 0.7671181136351374, "grad_norm": 0.32787641882896423, "learning_rate": 2.7121323417337674e-06, "loss": 0.28667131066322327, "step": 14217, "token_acc": 0.8952427902390929 }, { "epoch": 0.7671720714401338, "grad_norm": 0.3928256630897522, "learning_rate": 2.7109358259972496e-06, "loss": 0.3448517620563507, "step": 14218, "token_acc": 0.8785178899731297 }, { "epoch": 0.7672260292451303, "grad_norm": 0.44502153992652893, "learning_rate": 2.709739532866842e-06, "loss": 0.2982473373413086, "step": 14219, "token_acc": 0.8899948515531149 }, { "epoch": 0.7672799870501268, "grad_norm": 0.49362221360206604, "learning_rate": 2.7085434623790785e-06, "loss": 0.3889782726764679, "step": 14220, "token_acc": 0.8608600810221253 }, { "epoch": 0.7673339448551233, "grad_norm": 0.39054587483406067, "learning_rate": 2.707347614570488e-06, "loss": 0.31486445665359497, "step": 14221, "token_acc": 0.8836402774455872 }, { "epoch": 0.7673879026601198, "grad_norm": 0.3084068298339844, "learning_rate": 2.70615198947759e-06, "loss": 0.2877257466316223, "step": 14222, "token_acc": 0.8923346254845567 }, { "epoch": 0.7674418604651163, "grad_norm": 0.3904392719268799, "learning_rate": 2.704956587136901e-06, "loss": 0.29516077041625977, "step": 14223, "token_acc": 0.8967265353418308 }, { "epoch": 0.7674958182701128, "grad_norm": 0.3406095504760742, "learning_rate": 2.703761407584925e-06, "loss": 0.3684600591659546, "step": 14224, "token_acc": 0.8726132534631225 }, { "epoch": 0.7675497760751092, "grad_norm": 0.3306782841682434, "learning_rate": 2.702566450858166e-06, "loss": 0.2992333173751831, "step": 14225, "token_acc": 0.8925862243349854 }, { "epoch": 0.7676037338801057, "grad_norm": 0.42959079146385193, "learning_rate": 2.701371716993115e-06, "loss": 0.34893298149108887, "step": 14226, "token_acc": 0.8783355947535052 }, { "epoch": 0.7676576916851022, "grad_norm": 0.42660096287727356, "learning_rate": 2.70017720602626e-06, "loss": 0.34649741649627686, "step": 14227, "token_acc": 0.8766744321490972 }, { "epoch": 0.7677116494900987, "grad_norm": 0.4735077917575836, "learning_rate": 2.698982917994081e-06, "loss": 0.36411726474761963, "step": 14228, "token_acc": 0.8725165562913907 }, { "epoch": 0.7677656072950952, "grad_norm": 0.49828484654426575, "learning_rate": 2.6977888529330475e-06, "loss": 0.34670886397361755, "step": 14229, "token_acc": 0.8783029947152085 }, { "epoch": 0.7678195651000917, "grad_norm": 0.3445858359336853, "learning_rate": 2.6965950108796336e-06, "loss": 0.2900106906890869, "step": 14230, "token_acc": 0.8916886047386235 }, { "epoch": 0.7678735229050883, "grad_norm": 0.3425487279891968, "learning_rate": 2.695401391870294e-06, "loss": 0.350189208984375, "step": 14231, "token_acc": 0.8720542591102426 }, { "epoch": 0.7679274807100848, "grad_norm": 0.3431534171104431, "learning_rate": 2.6942079959414825e-06, "loss": 0.3351333737373352, "step": 14232, "token_acc": 0.8794990409567867 }, { "epoch": 0.7679814385150812, "grad_norm": 0.391748309135437, "learning_rate": 2.6930148231296447e-06, "loss": 0.3199557065963745, "step": 14233, "token_acc": 0.8884322678843227 }, { "epoch": 0.7680353963200777, "grad_norm": 0.453043669462204, "learning_rate": 2.6918218734712208e-06, "loss": 0.34303122758865356, "step": 14234, "token_acc": 0.8780384993677112 }, { "epoch": 0.7680893541250742, "grad_norm": 0.4342775046825409, "learning_rate": 2.690629147002641e-06, "loss": 0.3317222595214844, "step": 14235, "token_acc": 0.8825983313468415 }, { "epoch": 0.7681433119300707, "grad_norm": 0.4693937599658966, "learning_rate": 2.6894366437603303e-06, "loss": 0.36159980297088623, "step": 14236, "token_acc": 0.8719668855266914 }, { "epoch": 0.7681972697350672, "grad_norm": 0.3049469292163849, "learning_rate": 2.688244363780712e-06, "loss": 0.3283137381076813, "step": 14237, "token_acc": 0.8839190628328009 }, { "epoch": 0.7682512275400637, "grad_norm": 0.3539692759513855, "learning_rate": 2.6870523071001997e-06, "loss": 0.36115899682044983, "step": 14238, "token_acc": 0.8689458689458689 }, { "epoch": 0.7683051853450602, "grad_norm": 0.41271188855171204, "learning_rate": 2.6858604737551897e-06, "loss": 0.3277983069419861, "step": 14239, "token_acc": 0.8840416549394877 }, { "epoch": 0.7683591431500566, "grad_norm": 0.5773195028305054, "learning_rate": 2.684668863782085e-06, "loss": 0.34631142020225525, "step": 14240, "token_acc": 0.8756176520701994 }, { "epoch": 0.7684131009550531, "grad_norm": 0.4498451352119446, "learning_rate": 2.6834774772172777e-06, "loss": 0.31402212381362915, "step": 14241, "token_acc": 0.8847549909255898 }, { "epoch": 0.7684670587600496, "grad_norm": 0.35928401350975037, "learning_rate": 2.6822863140971477e-06, "loss": 0.3243580162525177, "step": 14242, "token_acc": 0.8847598604775959 }, { "epoch": 0.7685210165650461, "grad_norm": 0.3595856726169586, "learning_rate": 2.6810953744580805e-06, "loss": 0.3138457238674164, "step": 14243, "token_acc": 0.8836429668948177 }, { "epoch": 0.7685749743700426, "grad_norm": 0.42308181524276733, "learning_rate": 2.679904658336442e-06, "loss": 0.35690194368362427, "step": 14244, "token_acc": 0.8737471952131638 }, { "epoch": 0.7686289321750391, "grad_norm": 0.5293096303939819, "learning_rate": 2.678714165768599e-06, "loss": 0.34204500913619995, "step": 14245, "token_acc": 0.8814988290398127 }, { "epoch": 0.7686828899800356, "grad_norm": 0.44937610626220703, "learning_rate": 2.677523896790907e-06, "loss": 0.3009684383869171, "step": 14246, "token_acc": 0.8896053649729172 }, { "epoch": 0.7687368477850322, "grad_norm": 0.4210708439350128, "learning_rate": 2.676333851439716e-06, "loss": 0.3596373498439789, "step": 14247, "token_acc": 0.8816500711237554 }, { "epoch": 0.7687908055900285, "grad_norm": 0.3269832134246826, "learning_rate": 2.675144029751372e-06, "loss": 0.35363078117370605, "step": 14248, "token_acc": 0.8751344375134438 }, { "epoch": 0.768844763395025, "grad_norm": 0.4366319179534912, "learning_rate": 2.673954431762207e-06, "loss": 0.30588027834892273, "step": 14249, "token_acc": 0.8906557625321824 }, { "epoch": 0.7688987212000216, "grad_norm": 0.5346997380256653, "learning_rate": 2.672765057508557e-06, "loss": 0.3643040955066681, "step": 14250, "token_acc": 0.872696502444528 }, { "epoch": 0.7689526790050181, "grad_norm": 0.4726316034793854, "learning_rate": 2.6715759070267433e-06, "loss": 0.33727872371673584, "step": 14251, "token_acc": 0.8759888817618131 }, { "epoch": 0.7690066368100146, "grad_norm": 0.47686779499053955, "learning_rate": 2.670386980353081e-06, "loss": 0.31917524337768555, "step": 14252, "token_acc": 0.8841554559043349 }, { "epoch": 0.7690605946150111, "grad_norm": 0.38324442505836487, "learning_rate": 2.669198277523881e-06, "loss": 0.31527942419052124, "step": 14253, "token_acc": 0.8875150454097822 }, { "epoch": 0.7691145524200076, "grad_norm": 0.48945024609565735, "learning_rate": 2.668009798575445e-06, "loss": 0.33260491490364075, "step": 14254, "token_acc": 0.8857005450464892 }, { "epoch": 0.7691685102250041, "grad_norm": 0.3656449019908905, "learning_rate": 2.6668215435440683e-06, "loss": 0.34333688020706177, "step": 14255, "token_acc": 0.8773822366055376 }, { "epoch": 0.7692224680300005, "grad_norm": 0.43597063422203064, "learning_rate": 2.6656335124660417e-06, "loss": 0.3362729847431183, "step": 14256, "token_acc": 0.8774752475247525 }, { "epoch": 0.769276425834997, "grad_norm": 0.4218626320362091, "learning_rate": 2.664445705377645e-06, "loss": 0.3011029362678528, "step": 14257, "token_acc": 0.8902585688514733 }, { "epoch": 0.7693303836399935, "grad_norm": 0.5507554411888123, "learning_rate": 2.6632581223151554e-06, "loss": 0.3466010093688965, "step": 14258, "token_acc": 0.8739264857437307 }, { "epoch": 0.76938434144499, "grad_norm": 0.39699381589889526, "learning_rate": 2.662070763314841e-06, "loss": 0.35434696078300476, "step": 14259, "token_acc": 0.8735023041474654 }, { "epoch": 0.7694382992499865, "grad_norm": 0.46720388531684875, "learning_rate": 2.6608836284129634e-06, "loss": 0.29562878608703613, "step": 14260, "token_acc": 0.8876603272888103 }, { "epoch": 0.769492257054983, "grad_norm": 0.39545387029647827, "learning_rate": 2.6596967176457745e-06, "loss": 0.3233201503753662, "step": 14261, "token_acc": 0.8853391232423491 }, { "epoch": 0.7695462148599795, "grad_norm": 0.3919356167316437, "learning_rate": 2.658510031049527e-06, "loss": 0.3375961184501648, "step": 14262, "token_acc": 0.8845600575401582 }, { "epoch": 0.7696001726649759, "grad_norm": 0.4885466992855072, "learning_rate": 2.6573235686604603e-06, "loss": 0.358834445476532, "step": 14263, "token_acc": 0.8734143410335158 }, { "epoch": 0.7696541304699724, "grad_norm": 0.38288000226020813, "learning_rate": 2.656137330514809e-06, "loss": 0.3327171802520752, "step": 14264, "token_acc": 0.8848179025888547 }, { "epoch": 0.769708088274969, "grad_norm": 0.4979287087917328, "learning_rate": 2.6549513166487984e-06, "loss": 0.3631646931171417, "step": 14265, "token_acc": 0.8719194724054148 }, { "epoch": 0.7697620460799655, "grad_norm": 0.4867575466632843, "learning_rate": 2.653765527098651e-06, "loss": 0.3146611452102661, "step": 14266, "token_acc": 0.8860182370820668 }, { "epoch": 0.769816003884962, "grad_norm": 0.5353308916091919, "learning_rate": 2.6525799619005797e-06, "loss": 0.35925203561782837, "step": 14267, "token_acc": 0.8766672440672094 }, { "epoch": 0.7698699616899585, "grad_norm": 0.4032905101776123, "learning_rate": 2.6513946210907892e-06, "loss": 0.3152939975261688, "step": 14268, "token_acc": 0.8876834716017868 }, { "epoch": 0.769923919494955, "grad_norm": 0.46470096707344055, "learning_rate": 2.6502095047054855e-06, "loss": 0.3284599781036377, "step": 14269, "token_acc": 0.8818458417849898 }, { "epoch": 0.7699778772999515, "grad_norm": 0.5004424452781677, "learning_rate": 2.649024612780857e-06, "loss": 0.36781078577041626, "step": 14270, "token_acc": 0.8709677419354839 }, { "epoch": 0.7700318351049479, "grad_norm": 0.27415502071380615, "learning_rate": 2.647839945353091e-06, "loss": 0.33002009987831116, "step": 14271, "token_acc": 0.8843083652268999 }, { "epoch": 0.7700857929099444, "grad_norm": 0.3884436786174774, "learning_rate": 2.6466555024583686e-06, "loss": 0.3526049256324768, "step": 14272, "token_acc": 0.8701807609657439 }, { "epoch": 0.7701397507149409, "grad_norm": 0.5426346659660339, "learning_rate": 2.645471284132862e-06, "loss": 0.30992305278778076, "step": 14273, "token_acc": 0.8871453138435081 }, { "epoch": 0.7701937085199374, "grad_norm": 0.4229804575443268, "learning_rate": 2.6442872904127304e-06, "loss": 0.3604999780654907, "step": 14274, "token_acc": 0.8733802295446131 }, { "epoch": 0.7702476663249339, "grad_norm": 0.36173319816589355, "learning_rate": 2.6431035213341415e-06, "loss": 0.27899038791656494, "step": 14275, "token_acc": 0.8995289001630731 }, { "epoch": 0.7703016241299304, "grad_norm": 0.37957221269607544, "learning_rate": 2.6419199769332436e-06, "loss": 0.3156644105911255, "step": 14276, "token_acc": 0.8845118322233244 }, { "epoch": 0.7703555819349269, "grad_norm": 0.4084877073764801, "learning_rate": 2.6407366572461824e-06, "loss": 0.3514667749404907, "step": 14277, "token_acc": 0.8749139316043149 }, { "epoch": 0.7704095397399234, "grad_norm": 0.45880481600761414, "learning_rate": 2.639553562309095e-06, "loss": 0.3758355975151062, "step": 14278, "token_acc": 0.8652365930599369 }, { "epoch": 0.7704634975449198, "grad_norm": 0.34925130009651184, "learning_rate": 2.6383706921581144e-06, "loss": 0.3387618958950043, "step": 14279, "token_acc": 0.8803196803196803 }, { "epoch": 0.7705174553499163, "grad_norm": 0.4165271818637848, "learning_rate": 2.637188046829363e-06, "loss": 0.3086785078048706, "step": 14280, "token_acc": 0.8854762496891321 }, { "epoch": 0.7705714131549128, "grad_norm": 0.4466930329799652, "learning_rate": 2.6360056263589583e-06, "loss": 0.38472580909729004, "step": 14281, "token_acc": 0.8671707804251676 }, { "epoch": 0.7706253709599094, "grad_norm": 0.46688947081565857, "learning_rate": 2.6348234307830157e-06, "loss": 0.39640945196151733, "step": 14282, "token_acc": 0.8632357122547626 }, { "epoch": 0.7706793287649059, "grad_norm": 0.3331109285354614, "learning_rate": 2.633641460137635e-06, "loss": 0.3589601516723633, "step": 14283, "token_acc": 0.8726580796252927 }, { "epoch": 0.7707332865699024, "grad_norm": 0.4481683075428009, "learning_rate": 2.632459714458915e-06, "loss": 0.33190369606018066, "step": 14284, "token_acc": 0.8819705781731099 }, { "epoch": 0.7707872443748989, "grad_norm": 0.4455937147140503, "learning_rate": 2.6312781937829457e-06, "loss": 0.33018630743026733, "step": 14285, "token_acc": 0.882491186839013 }, { "epoch": 0.7708412021798953, "grad_norm": 0.48636355996131897, "learning_rate": 2.6300968981458095e-06, "loss": 0.37012264132499695, "step": 14286, "token_acc": 0.8700393820054529 }, { "epoch": 0.7708951599848918, "grad_norm": 0.41372814774513245, "learning_rate": 2.6289158275835835e-06, "loss": 0.37488529086112976, "step": 14287, "token_acc": 0.8735974754558204 }, { "epoch": 0.7709491177898883, "grad_norm": 0.3833746016025543, "learning_rate": 2.6277349821323374e-06, "loss": 0.3210602402687073, "step": 14288, "token_acc": 0.8808820975863865 }, { "epoch": 0.7710030755948848, "grad_norm": 0.44433337450027466, "learning_rate": 2.626554361828133e-06, "loss": 0.3283804655075073, "step": 14289, "token_acc": 0.883004767426878 }, { "epoch": 0.7710570333998813, "grad_norm": 0.27882814407348633, "learning_rate": 2.6253739667070266e-06, "loss": 0.2707461416721344, "step": 14290, "token_acc": 0.9005291005291005 }, { "epoch": 0.7711109912048778, "grad_norm": 0.40957674384117126, "learning_rate": 2.6241937968050677e-06, "loss": 0.3746308386325836, "step": 14291, "token_acc": 0.8699026386341188 }, { "epoch": 0.7711649490098743, "grad_norm": 0.43961116671562195, "learning_rate": 2.6230138521582994e-06, "loss": 0.34832584857940674, "step": 14292, "token_acc": 0.8722678654472175 }, { "epoch": 0.7712189068148708, "grad_norm": 0.45887547731399536, "learning_rate": 2.621834132802754e-06, "loss": 0.37778520584106445, "step": 14293, "token_acc": 0.8695470674180755 }, { "epoch": 0.7712728646198672, "grad_norm": 0.4873475134372711, "learning_rate": 2.6206546387744593e-06, "loss": 0.35318031907081604, "step": 14294, "token_acc": 0.8762469668374225 }, { "epoch": 0.7713268224248637, "grad_norm": 0.5291270613670349, "learning_rate": 2.619475370109441e-06, "loss": 0.3539794683456421, "step": 14295, "token_acc": 0.8741965105601469 }, { "epoch": 0.7713807802298602, "grad_norm": 0.39068520069122314, "learning_rate": 2.618296326843712e-06, "loss": 0.3713294267654419, "step": 14296, "token_acc": 0.872041166380789 }, { "epoch": 0.7714347380348567, "grad_norm": 0.34361112117767334, "learning_rate": 2.6171175090132796e-06, "loss": 0.3268124461174011, "step": 14297, "token_acc": 0.8805013115709706 }, { "epoch": 0.7714886958398532, "grad_norm": 0.4380626380443573, "learning_rate": 2.6159389166541436e-06, "loss": 0.3914903402328491, "step": 14298, "token_acc": 0.8653694451748619 }, { "epoch": 0.7715426536448498, "grad_norm": 0.4873502552509308, "learning_rate": 2.614760549802299e-06, "loss": 0.35051101446151733, "step": 14299, "token_acc": 0.8752252252252253 }, { "epoch": 0.7715966114498463, "grad_norm": 0.4180881381034851, "learning_rate": 2.61358240849373e-06, "loss": 0.29068875312805176, "step": 14300, "token_acc": 0.8944698122780315 }, { "epoch": 0.7716505692548428, "grad_norm": 0.36709311604499817, "learning_rate": 2.612404492764422e-06, "loss": 0.3080393075942993, "step": 14301, "token_acc": 0.8891981444665341 }, { "epoch": 0.7717045270598392, "grad_norm": 0.4367679953575134, "learning_rate": 2.6112268026503452e-06, "loss": 0.27509814500808716, "step": 14302, "token_acc": 0.8943728387299591 }, { "epoch": 0.7717584848648357, "grad_norm": 0.31658825278282166, "learning_rate": 2.610049338187467e-06, "loss": 0.2593034505844116, "step": 14303, "token_acc": 0.9020278524309797 }, { "epoch": 0.7718124426698322, "grad_norm": 0.4077647030353546, "learning_rate": 2.6088720994117455e-06, "loss": 0.2824985086917877, "step": 14304, "token_acc": 0.8969680708344513 }, { "epoch": 0.7718664004748287, "grad_norm": 0.3509485125541687, "learning_rate": 2.6076950863591346e-06, "loss": 0.31809723377227783, "step": 14305, "token_acc": 0.8857464647728694 }, { "epoch": 0.7719203582798252, "grad_norm": 0.5239410400390625, "learning_rate": 2.606518299065578e-06, "loss": 0.34447231888771057, "step": 14306, "token_acc": 0.8731601731601731 }, { "epoch": 0.7719743160848217, "grad_norm": 0.38818109035491943, "learning_rate": 2.6053417375670165e-06, "loss": 0.30372971296310425, "step": 14307, "token_acc": 0.8901841527285015 }, { "epoch": 0.7720282738898182, "grad_norm": 0.4342329800128937, "learning_rate": 2.604165401899381e-06, "loss": 0.36319929361343384, "step": 14308, "token_acc": 0.8720229555236729 }, { "epoch": 0.7720822316948146, "grad_norm": 0.3911558985710144, "learning_rate": 2.6029892920985968e-06, "loss": 0.2679945230484009, "step": 14309, "token_acc": 0.8985957132298595 }, { "epoch": 0.7721361894998111, "grad_norm": 0.3825035095214844, "learning_rate": 2.601813408200582e-06, "loss": 0.34296268224716187, "step": 14310, "token_acc": 0.8866605894864783 }, { "epoch": 0.7721901473048076, "grad_norm": 0.3779812455177307, "learning_rate": 2.6006377502412473e-06, "loss": 0.4014235734939575, "step": 14311, "token_acc": 0.8605105496222974 }, { "epoch": 0.7722441051098041, "grad_norm": 0.37593114376068115, "learning_rate": 2.5994623182564983e-06, "loss": 0.3226654529571533, "step": 14312, "token_acc": 0.8872017353579176 }, { "epoch": 0.7722980629148006, "grad_norm": 0.3808937966823578, "learning_rate": 2.598287112282227e-06, "loss": 0.3547900319099426, "step": 14313, "token_acc": 0.8732252795577334 }, { "epoch": 0.7723520207197971, "grad_norm": 0.45876017212867737, "learning_rate": 2.5971121323543326e-06, "loss": 0.35787010192871094, "step": 14314, "token_acc": 0.8689751988769303 }, { "epoch": 0.7724059785247936, "grad_norm": 0.3787635266780853, "learning_rate": 2.5959373785086927e-06, "loss": 0.38135823607444763, "step": 14315, "token_acc": 0.866176774994825 }, { "epoch": 0.7724599363297902, "grad_norm": 0.2738839387893677, "learning_rate": 2.5947628507811874e-06, "loss": 0.250416100025177, "step": 14316, "token_acc": 0.9095049742298933 }, { "epoch": 0.7725138941347865, "grad_norm": 0.436133474111557, "learning_rate": 2.593588549207684e-06, "loss": 0.337421715259552, "step": 14317, "token_acc": 0.8749494131930392 }, { "epoch": 0.7725678519397831, "grad_norm": 0.35452768206596375, "learning_rate": 2.5924144738240464e-06, "loss": 0.33978497982025146, "step": 14318, "token_acc": 0.8762159875934019 }, { "epoch": 0.7726218097447796, "grad_norm": 0.3652404844760895, "learning_rate": 2.591240624666127e-06, "loss": 0.3230898976325989, "step": 14319, "token_acc": 0.883384805435454 }, { "epoch": 0.7726757675497761, "grad_norm": 0.38178491592407227, "learning_rate": 2.5900670017697806e-06, "loss": 0.37971246242523193, "step": 14320, "token_acc": 0.8676748582230623 }, { "epoch": 0.7727297253547726, "grad_norm": 0.4276241660118103, "learning_rate": 2.5888936051708502e-06, "loss": 0.35635870695114136, "step": 14321, "token_acc": 0.8729101938156532 }, { "epoch": 0.7727836831597691, "grad_norm": 0.45714038610458374, "learning_rate": 2.5877204349051644e-06, "loss": 0.3276684880256653, "step": 14322, "token_acc": 0.8856329537843268 }, { "epoch": 0.7728376409647656, "grad_norm": 0.44362181425094604, "learning_rate": 2.586547491008555e-06, "loss": 0.3609612286090851, "step": 14323, "token_acc": 0.8734405144694534 }, { "epoch": 0.7728915987697621, "grad_norm": 0.4338797926902771, "learning_rate": 2.585374773516843e-06, "loss": 0.3084612786769867, "step": 14324, "token_acc": 0.8935705892227631 }, { "epoch": 0.7729455565747585, "grad_norm": 0.4681992530822754, "learning_rate": 2.584202282465844e-06, "loss": 0.33268827199935913, "step": 14325, "token_acc": 0.8819993672888327 }, { "epoch": 0.772999514379755, "grad_norm": 0.5122849941253662, "learning_rate": 2.5830300178913616e-06, "loss": 0.316802442073822, "step": 14326, "token_acc": 0.8790548658390068 }, { "epoch": 0.7730534721847515, "grad_norm": 0.444153755903244, "learning_rate": 2.581857979829201e-06, "loss": 0.2879030704498291, "step": 14327, "token_acc": 0.8902460003440564 }, { "epoch": 0.773107429989748, "grad_norm": 0.4151051342487335, "learning_rate": 2.580686168315155e-06, "loss": 0.3440268635749817, "step": 14328, "token_acc": 0.878223693283685 }, { "epoch": 0.7731613877947445, "grad_norm": 0.3887268006801605, "learning_rate": 2.579514583385011e-06, "loss": 0.32123318314552307, "step": 14329, "token_acc": 0.8871817383669885 }, { "epoch": 0.773215345599741, "grad_norm": 0.5024527311325073, "learning_rate": 2.5783432250745465e-06, "loss": 0.3198311924934387, "step": 14330, "token_acc": 0.8874225154969007 }, { "epoch": 0.7732693034047375, "grad_norm": 0.4559696912765503, "learning_rate": 2.5771720934195354e-06, "loss": 0.3605862855911255, "step": 14331, "token_acc": 0.8788282290279628 }, { "epoch": 0.7733232612097339, "grad_norm": 0.38494575023651123, "learning_rate": 2.5760011884557423e-06, "loss": 0.33546555042266846, "step": 14332, "token_acc": 0.8830041241588886 }, { "epoch": 0.7733772190147304, "grad_norm": 0.3958515226840973, "learning_rate": 2.57483051021893e-06, "loss": 0.3377631604671478, "step": 14333, "token_acc": 0.876979293544458 }, { "epoch": 0.773431176819727, "grad_norm": 0.4847088158130646, "learning_rate": 2.573660058744849e-06, "loss": 0.35707351565361023, "step": 14334, "token_acc": 0.8721804511278195 }, { "epoch": 0.7734851346247235, "grad_norm": 0.5416447520256042, "learning_rate": 2.572489834069245e-06, "loss": 0.34683874249458313, "step": 14335, "token_acc": 0.8773997979117548 }, { "epoch": 0.77353909242972, "grad_norm": 0.4104628264904022, "learning_rate": 2.5713198362278546e-06, "loss": 0.3130112588405609, "step": 14336, "token_acc": 0.8866954213669542 }, { "epoch": 0.7735930502347165, "grad_norm": 0.5097423195838928, "learning_rate": 2.5701500652564117e-06, "loss": 0.37452059984207153, "step": 14337, "token_acc": 0.8681398297409889 }, { "epoch": 0.773647008039713, "grad_norm": 0.35463935136795044, "learning_rate": 2.568980521190638e-06, "loss": 0.2684982120990753, "step": 14338, "token_acc": 0.8995076744859543 }, { "epoch": 0.7737009658447095, "grad_norm": 0.3543567657470703, "learning_rate": 2.567811204066254e-06, "loss": 0.30332809686660767, "step": 14339, "token_acc": 0.8824619948090471 }, { "epoch": 0.7737549236497059, "grad_norm": 0.39348679780960083, "learning_rate": 2.566642113918968e-06, "loss": 0.3159085512161255, "step": 14340, "token_acc": 0.8862113402061855 }, { "epoch": 0.7738088814547024, "grad_norm": 0.3904566168785095, "learning_rate": 2.5654732507844846e-06, "loss": 0.3189998269081116, "step": 14341, "token_acc": 0.889028827315981 }, { "epoch": 0.7738628392596989, "grad_norm": 0.4340403974056244, "learning_rate": 2.564304614698501e-06, "loss": 0.33211374282836914, "step": 14342, "token_acc": 0.8869701726844584 }, { "epoch": 0.7739167970646954, "grad_norm": 0.45352715253829956, "learning_rate": 2.5631362056967058e-06, "loss": 0.31737175583839417, "step": 14343, "token_acc": 0.885375 }, { "epoch": 0.7739707548696919, "grad_norm": 0.32760217785835266, "learning_rate": 2.561968023814784e-06, "loss": 0.36837273836135864, "step": 14344, "token_acc": 0.8728212986018005 }, { "epoch": 0.7740247126746884, "grad_norm": 0.404623419046402, "learning_rate": 2.560800069088406e-06, "loss": 0.3112613558769226, "step": 14345, "token_acc": 0.8855228256317186 }, { "epoch": 0.7740786704796849, "grad_norm": 0.4099234938621521, "learning_rate": 2.559632341553249e-06, "loss": 0.3259122967720032, "step": 14346, "token_acc": 0.8862738643844634 }, { "epoch": 0.7741326282846814, "grad_norm": 0.33464041352272034, "learning_rate": 2.5584648412449695e-06, "loss": 0.3322222828865051, "step": 14347, "token_acc": 0.8839930024054231 }, { "epoch": 0.7741865860896778, "grad_norm": 0.3455009162425995, "learning_rate": 2.557297568199226e-06, "loss": 0.3621766269207001, "step": 14348, "token_acc": 0.8756293119522656 }, { "epoch": 0.7742405438946743, "grad_norm": 0.5132077932357788, "learning_rate": 2.556130522451664e-06, "loss": 0.33187437057495117, "step": 14349, "token_acc": 0.8855989232839838 }, { "epoch": 0.7742945016996708, "grad_norm": 0.30960649251937866, "learning_rate": 2.5549637040379273e-06, "loss": 0.3138725161552429, "step": 14350, "token_acc": 0.888273439610916 }, { "epoch": 0.7743484595046674, "grad_norm": 0.41487863659858704, "learning_rate": 2.5537971129936477e-06, "loss": 0.35761895775794983, "step": 14351, "token_acc": 0.8765840220385674 }, { "epoch": 0.7744024173096639, "grad_norm": 0.44822829961776733, "learning_rate": 2.55263074935445e-06, "loss": 0.3084385097026825, "step": 14352, "token_acc": 0.8855989232839838 }, { "epoch": 0.7744563751146604, "grad_norm": 0.3887440264225006, "learning_rate": 2.551464613155963e-06, "loss": 0.37975654006004333, "step": 14353, "token_acc": 0.8717549946165809 }, { "epoch": 0.7745103329196569, "grad_norm": 0.32042035460472107, "learning_rate": 2.5502987044337934e-06, "loss": 0.3290417790412903, "step": 14354, "token_acc": 0.8828580190125729 }, { "epoch": 0.7745642907246533, "grad_norm": 0.36062729358673096, "learning_rate": 2.5491330232235544e-06, "loss": 0.31088972091674805, "step": 14355, "token_acc": 0.890185676392573 }, { "epoch": 0.7746182485296498, "grad_norm": 0.3703612685203552, "learning_rate": 2.5479675695608374e-06, "loss": 0.33985623717308044, "step": 14356, "token_acc": 0.8806952586650211 }, { "epoch": 0.7746722063346463, "grad_norm": 0.4432811141014099, "learning_rate": 2.5468023434812384e-06, "loss": 0.3509226441383362, "step": 14357, "token_acc": 0.8772353794103431 }, { "epoch": 0.7747261641396428, "grad_norm": 0.36346957087516785, "learning_rate": 2.5456373450203407e-06, "loss": 0.29751455783843994, "step": 14358, "token_acc": 0.8926033779848573 }, { "epoch": 0.7747801219446393, "grad_norm": 0.31581753492355347, "learning_rate": 2.5444725742137286e-06, "loss": 0.3067241907119751, "step": 14359, "token_acc": 0.8874821965500871 }, { "epoch": 0.7748340797496358, "grad_norm": 0.464657723903656, "learning_rate": 2.5433080310969716e-06, "loss": 0.3086494207382202, "step": 14360, "token_acc": 0.8914779208082125 }, { "epoch": 0.7748880375546323, "grad_norm": 0.3366697132587433, "learning_rate": 2.5421437157056327e-06, "loss": 0.3334072530269623, "step": 14361, "token_acc": 0.8765344997883449 }, { "epoch": 0.7749419953596288, "grad_norm": 0.3728972375392914, "learning_rate": 2.5409796280752717e-06, "loss": 0.2722240686416626, "step": 14362, "token_acc": 0.8987651574146178 }, { "epoch": 0.7749959531646252, "grad_norm": 0.31812480092048645, "learning_rate": 2.5398157682414394e-06, "loss": 0.3071240186691284, "step": 14363, "token_acc": 0.8871468926553673 }, { "epoch": 0.7750499109696217, "grad_norm": 0.32712212204933167, "learning_rate": 2.5386521362396755e-06, "loss": 0.3136710524559021, "step": 14364, "token_acc": 0.8866252302025782 }, { "epoch": 0.7751038687746182, "grad_norm": 0.4599809944629669, "learning_rate": 2.5374887321055243e-06, "loss": 0.3513822555541992, "step": 14365, "token_acc": 0.8802395209580839 }, { "epoch": 0.7751578265796147, "grad_norm": 0.3499342203140259, "learning_rate": 2.5363255558745126e-06, "loss": 0.30719345808029175, "step": 14366, "token_acc": 0.8875346928924822 }, { "epoch": 0.7752117843846112, "grad_norm": 0.31959906220436096, "learning_rate": 2.5351626075821622e-06, "loss": 0.3334234952926636, "step": 14367, "token_acc": 0.8816483962647178 }, { "epoch": 0.7752657421896078, "grad_norm": 0.4334001839160919, "learning_rate": 2.5339998872639904e-06, "loss": 0.32721424102783203, "step": 14368, "token_acc": 0.8838709677419355 }, { "epoch": 0.7753196999946043, "grad_norm": 0.42392781376838684, "learning_rate": 2.532837394955506e-06, "loss": 0.360637903213501, "step": 14369, "token_acc": 0.8662430254184749 }, { "epoch": 0.7753736577996007, "grad_norm": 0.39636048674583435, "learning_rate": 2.5316751306922127e-06, "loss": 0.35613691806793213, "step": 14370, "token_acc": 0.8741576124329529 }, { "epoch": 0.7754276156045972, "grad_norm": 0.3774097263813019, "learning_rate": 2.5305130945096034e-06, "loss": 0.3570614457130432, "step": 14371, "token_acc": 0.8767522709431423 }, { "epoch": 0.7754815734095937, "grad_norm": 0.43373963236808777, "learning_rate": 2.529351286443168e-06, "loss": 0.30760306119918823, "step": 14372, "token_acc": 0.8937510641920654 }, { "epoch": 0.7755355312145902, "grad_norm": 0.4055477976799011, "learning_rate": 2.5281897065283867e-06, "loss": 0.2636207342147827, "step": 14373, "token_acc": 0.9013781993914445 }, { "epoch": 0.7755894890195867, "grad_norm": 0.41276857256889343, "learning_rate": 2.527028354800735e-06, "loss": 0.310219943523407, "step": 14374, "token_acc": 0.8856747191875673 }, { "epoch": 0.7756434468245832, "grad_norm": 0.4663633704185486, "learning_rate": 2.525867231295679e-06, "loss": 0.320954829454422, "step": 14375, "token_acc": 0.8875878220140515 }, { "epoch": 0.7756974046295797, "grad_norm": 0.4425722360610962, "learning_rate": 2.52470633604868e-06, "loss": 0.3941609263420105, "step": 14376, "token_acc": 0.8600312913708027 }, { "epoch": 0.7757513624345762, "grad_norm": 0.3433087468147278, "learning_rate": 2.5235456690951887e-06, "loss": 0.31102755665779114, "step": 14377, "token_acc": 0.8859986428409862 }, { "epoch": 0.7758053202395726, "grad_norm": 0.43349286913871765, "learning_rate": 2.5223852304706574e-06, "loss": 0.328805148601532, "step": 14378, "token_acc": 0.8812976528629093 }, { "epoch": 0.7758592780445691, "grad_norm": 0.3906337022781372, "learning_rate": 2.5212250202105214e-06, "loss": 0.34573036432266235, "step": 14379, "token_acc": 0.8818328738473239 }, { "epoch": 0.7759132358495656, "grad_norm": 0.40253087878227234, "learning_rate": 2.5200650383502145e-06, "loss": 0.30548644065856934, "step": 14380, "token_acc": 0.8880586493726209 }, { "epoch": 0.7759671936545621, "grad_norm": 0.48267099261283875, "learning_rate": 2.518905284925162e-06, "loss": 0.3229129910469055, "step": 14381, "token_acc": 0.880712259691144 }, { "epoch": 0.7760211514595586, "grad_norm": 0.4715297818183899, "learning_rate": 2.517745759970782e-06, "loss": 0.3557022213935852, "step": 14382, "token_acc": 0.8759529860228716 }, { "epoch": 0.7760751092645551, "grad_norm": 0.3631853461265564, "learning_rate": 2.5165864635224867e-06, "loss": 0.31206685304641724, "step": 14383, "token_acc": 0.8864120403237257 }, { "epoch": 0.7761290670695516, "grad_norm": 0.3787713348865509, "learning_rate": 2.5154273956156774e-06, "loss": 0.36111077666282654, "step": 14384, "token_acc": 0.8799573813188114 }, { "epoch": 0.7761830248745482, "grad_norm": 0.5044459700584412, "learning_rate": 2.5142685562857572e-06, "loss": 0.3552234172821045, "step": 14385, "token_acc": 0.8762622215098573 }, { "epoch": 0.7762369826795446, "grad_norm": 0.36905139684677124, "learning_rate": 2.5131099455681153e-06, "loss": 0.3047846555709839, "step": 14386, "token_acc": 0.8891815946610467 }, { "epoch": 0.7762909404845411, "grad_norm": 0.37867316603660583, "learning_rate": 2.511951563498133e-06, "loss": 0.3748816251754761, "step": 14387, "token_acc": 0.8682160477797974 }, { "epoch": 0.7763448982895376, "grad_norm": 0.3810494840145111, "learning_rate": 2.5107934101111898e-06, "loss": 0.3091040253639221, "step": 14388, "token_acc": 0.8867592755407069 }, { "epoch": 0.7763988560945341, "grad_norm": 0.42014724016189575, "learning_rate": 2.509635485442653e-06, "loss": 0.3073400557041168, "step": 14389, "token_acc": 0.8891410048622367 }, { "epoch": 0.7764528138995306, "grad_norm": 0.2914681136608124, "learning_rate": 2.5084777895278866e-06, "loss": 0.3159980773925781, "step": 14390, "token_acc": 0.8852225856919357 }, { "epoch": 0.7765067717045271, "grad_norm": 0.5096002817153931, "learning_rate": 2.5073203224022457e-06, "loss": 0.34494855999946594, "step": 14391, "token_acc": 0.8759016393442624 }, { "epoch": 0.7765607295095236, "grad_norm": 0.4438508152961731, "learning_rate": 2.5061630841010796e-06, "loss": 0.314913809299469, "step": 14392, "token_acc": 0.8820960698689956 }, { "epoch": 0.77661468731452, "grad_norm": 0.46176877617836, "learning_rate": 2.5050060746597294e-06, "loss": 0.3591712713241577, "step": 14393, "token_acc": 0.8756231960115455 }, { "epoch": 0.7766686451195165, "grad_norm": 0.4732775092124939, "learning_rate": 2.5038492941135307e-06, "loss": 0.3517797887325287, "step": 14394, "token_acc": 0.876643915431996 }, { "epoch": 0.776722602924513, "grad_norm": 0.4528786838054657, "learning_rate": 2.502692742497811e-06, "loss": 0.3366071581840515, "step": 14395, "token_acc": 0.8769123783031989 }, { "epoch": 0.7767765607295095, "grad_norm": 0.5437977313995361, "learning_rate": 2.5015364198478908e-06, "loss": 0.33944830298423767, "step": 14396, "token_acc": 0.8809481055844856 }, { "epoch": 0.776830518534506, "grad_norm": 0.23871873319149017, "learning_rate": 2.500380326199081e-06, "loss": 0.27627965807914734, "step": 14397, "token_acc": 0.8980231765507839 }, { "epoch": 0.7768844763395025, "grad_norm": 0.4446015954017639, "learning_rate": 2.4992244615866934e-06, "loss": 0.3722440004348755, "step": 14398, "token_acc": 0.8744829553558694 }, { "epoch": 0.776938434144499, "grad_norm": 0.4317628741264343, "learning_rate": 2.4980688260460274e-06, "loss": 0.35252851247787476, "step": 14399, "token_acc": 0.8733312635827383 }, { "epoch": 0.7769923919494955, "grad_norm": 0.42603620886802673, "learning_rate": 2.4969134196123733e-06, "loss": 0.3493340015411377, "step": 14400, "token_acc": 0.8779709682565002 }, { "epoch": 0.7770463497544919, "grad_norm": 0.5746450424194336, "learning_rate": 2.495758242321018e-06, "loss": 0.38711047172546387, "step": 14401, "token_acc": 0.8665669968230237 }, { "epoch": 0.7771003075594884, "grad_norm": 0.4272972643375397, "learning_rate": 2.4946032942072407e-06, "loss": 0.32382649183273315, "step": 14402, "token_acc": 0.8810130376142664 }, { "epoch": 0.777154265364485, "grad_norm": 0.32899990677833557, "learning_rate": 2.493448575306312e-06, "loss": 0.3138693571090698, "step": 14403, "token_acc": 0.8919324577861163 }, { "epoch": 0.7772082231694815, "grad_norm": 0.3461562693119049, "learning_rate": 2.4922940856534972e-06, "loss": 0.33571362495422363, "step": 14404, "token_acc": 0.8778376844494892 }, { "epoch": 0.777262180974478, "grad_norm": 0.449207067489624, "learning_rate": 2.4911398252840547e-06, "loss": 0.3556930422782898, "step": 14405, "token_acc": 0.871336405529954 }, { "epoch": 0.7773161387794745, "grad_norm": 0.38633665442466736, "learning_rate": 2.489985794233234e-06, "loss": 0.29349344968795776, "step": 14406, "token_acc": 0.8963414634146342 }, { "epoch": 0.777370096584471, "grad_norm": 0.4444744288921356, "learning_rate": 2.4888319925362813e-06, "loss": 0.3779013752937317, "step": 14407, "token_acc": 0.8662575266327003 }, { "epoch": 0.7774240543894675, "grad_norm": 0.4735009968280792, "learning_rate": 2.487678420228431e-06, "loss": 0.3442322611808777, "step": 14408, "token_acc": 0.8812207386856251 }, { "epoch": 0.7774780121944639, "grad_norm": 0.4399774968624115, "learning_rate": 2.48652507734491e-06, "loss": 0.41984039545059204, "step": 14409, "token_acc": 0.8526834611171961 }, { "epoch": 0.7775319699994604, "grad_norm": 0.36518043279647827, "learning_rate": 2.4853719639209493e-06, "loss": 0.2876417636871338, "step": 14410, "token_acc": 0.8959172563962983 }, { "epoch": 0.7775859278044569, "grad_norm": 0.5145568251609802, "learning_rate": 2.4842190799917597e-06, "loss": 0.34301960468292236, "step": 14411, "token_acc": 0.8829052258635961 }, { "epoch": 0.7776398856094534, "grad_norm": 0.4147549271583557, "learning_rate": 2.483066425592551e-06, "loss": 0.3598257303237915, "step": 14412, "token_acc": 0.8735763097949886 }, { "epoch": 0.7776938434144499, "grad_norm": 0.48981088399887085, "learning_rate": 2.4819140007585242e-06, "loss": 0.36181825399398804, "step": 14413, "token_acc": 0.8720538720538721 }, { "epoch": 0.7777478012194464, "grad_norm": 0.37906861305236816, "learning_rate": 2.480761805524874e-06, "loss": 0.3471408784389496, "step": 14414, "token_acc": 0.8773584905660378 }, { "epoch": 0.7778017590244429, "grad_norm": 0.5425268411636353, "learning_rate": 2.479609839926789e-06, "loss": 0.3704543113708496, "step": 14415, "token_acc": 0.868943606036537 }, { "epoch": 0.7778557168294393, "grad_norm": 0.44735294580459595, "learning_rate": 2.478458103999445e-06, "loss": 0.31068283319473267, "step": 14416, "token_acc": 0.8906385187748745 }, { "epoch": 0.7779096746344358, "grad_norm": 0.3912666141986847, "learning_rate": 2.477306597778024e-06, "loss": 0.31420013308525085, "step": 14417, "token_acc": 0.8864301157319089 }, { "epoch": 0.7779636324394323, "grad_norm": 0.35996291041374207, "learning_rate": 2.4761553212976894e-06, "loss": 0.3055209219455719, "step": 14418, "token_acc": 0.8862892832358481 }, { "epoch": 0.7780175902444288, "grad_norm": 0.3930627703666687, "learning_rate": 2.4750042745936e-06, "loss": 0.38817518949508667, "step": 14419, "token_acc": 0.8634621055735772 }, { "epoch": 0.7780715480494254, "grad_norm": 0.2910691499710083, "learning_rate": 2.473853457700909e-06, "loss": 0.28548333048820496, "step": 14420, "token_acc": 0.8947064262598243 }, { "epoch": 0.7781255058544219, "grad_norm": 0.35682007670402527, "learning_rate": 2.4727028706547616e-06, "loss": 0.30326783657073975, "step": 14421, "token_acc": 0.8899432809773123 }, { "epoch": 0.7781794636594184, "grad_norm": 0.3788512349128723, "learning_rate": 2.4715525134902963e-06, "loss": 0.32000914216041565, "step": 14422, "token_acc": 0.8833673700930338 }, { "epoch": 0.7782334214644149, "grad_norm": 0.3442995250225067, "learning_rate": 2.4704023862426454e-06, "loss": 0.32725948095321655, "step": 14423, "token_acc": 0.8838937621832359 }, { "epoch": 0.7782873792694113, "grad_norm": 0.4757748544216156, "learning_rate": 2.4692524889469327e-06, "loss": 0.39204955101013184, "step": 14424, "token_acc": 0.862268881228455 }, { "epoch": 0.7783413370744078, "grad_norm": 0.39670026302337646, "learning_rate": 2.4681028216382775e-06, "loss": 0.36913174390792847, "step": 14425, "token_acc": 0.870986173008799 }, { "epoch": 0.7783952948794043, "grad_norm": 0.47754815220832825, "learning_rate": 2.4669533843517878e-06, "loss": 0.32448437809944153, "step": 14426, "token_acc": 0.8829891838741396 }, { "epoch": 0.7784492526844008, "grad_norm": 0.3978113830089569, "learning_rate": 2.465804177122568e-06, "loss": 0.3621819019317627, "step": 14427, "token_acc": 0.8769476628046344 }, { "epoch": 0.7785032104893973, "grad_norm": 0.4214344024658203, "learning_rate": 2.464655199985716e-06, "loss": 0.30956801772117615, "step": 14428, "token_acc": 0.8838690993339126 }, { "epoch": 0.7785571682943938, "grad_norm": 0.4396165907382965, "learning_rate": 2.4635064529763163e-06, "loss": 0.33484578132629395, "step": 14429, "token_acc": 0.8795180722891566 }, { "epoch": 0.7786111260993903, "grad_norm": 0.3761829435825348, "learning_rate": 2.462357936129459e-06, "loss": 0.27144354581832886, "step": 14430, "token_acc": 0.9015299479166666 }, { "epoch": 0.7786650839043868, "grad_norm": 0.5266681909561157, "learning_rate": 2.4612096494802164e-06, "loss": 0.3513198792934418, "step": 14431, "token_acc": 0.8780158596254429 }, { "epoch": 0.7787190417093832, "grad_norm": 0.582619845867157, "learning_rate": 2.4600615930636552e-06, "loss": 0.36578381061553955, "step": 14432, "token_acc": 0.8704765317090648 }, { "epoch": 0.7787729995143797, "grad_norm": 0.4890044033527374, "learning_rate": 2.4589137669148376e-06, "loss": 0.34170448780059814, "step": 14433, "token_acc": 0.876730564430245 }, { "epoch": 0.7788269573193762, "grad_norm": 0.3018175959587097, "learning_rate": 2.4577661710688182e-06, "loss": 0.32325756549835205, "step": 14434, "token_acc": 0.8815284744762467 }, { "epoch": 0.7788809151243727, "grad_norm": 0.44185882806777954, "learning_rate": 2.456618805560641e-06, "loss": 0.3193856477737427, "step": 14435, "token_acc": 0.888118214716526 }, { "epoch": 0.7789348729293692, "grad_norm": 0.3183109760284424, "learning_rate": 2.4554716704253524e-06, "loss": 0.33962976932525635, "step": 14436, "token_acc": 0.879710893133712 }, { "epoch": 0.7789888307343658, "grad_norm": 0.4329979419708252, "learning_rate": 2.4543247656979864e-06, "loss": 0.3472442626953125, "step": 14437, "token_acc": 0.8796959508843736 }, { "epoch": 0.7790427885393623, "grad_norm": 0.3555033206939697, "learning_rate": 2.4531780914135616e-06, "loss": 0.31221920251846313, "step": 14438, "token_acc": 0.8860229574611749 }, { "epoch": 0.7790967463443587, "grad_norm": 0.29908716678619385, "learning_rate": 2.4520316476071004e-06, "loss": 0.27640318870544434, "step": 14439, "token_acc": 0.8967646050074312 }, { "epoch": 0.7791507041493552, "grad_norm": 0.49364638328552246, "learning_rate": 2.450885434313617e-06, "loss": 0.3468625843524933, "step": 14440, "token_acc": 0.8738425925925926 }, { "epoch": 0.7792046619543517, "grad_norm": 0.5081966519355774, "learning_rate": 2.4497394515681127e-06, "loss": 0.31935685873031616, "step": 14441, "token_acc": 0.8822077235055545 }, { "epoch": 0.7792586197593482, "grad_norm": 0.5767034888267517, "learning_rate": 2.448593699405587e-06, "loss": 0.3336331248283386, "step": 14442, "token_acc": 0.8828828828828829 }, { "epoch": 0.7793125775643447, "grad_norm": 0.5552159547805786, "learning_rate": 2.4474481778610325e-06, "loss": 0.33830350637435913, "step": 14443, "token_acc": 0.8758936755270395 }, { "epoch": 0.7793665353693412, "grad_norm": 0.4152919054031372, "learning_rate": 2.446302886969433e-06, "loss": 0.3106415271759033, "step": 14444, "token_acc": 0.8866330562746968 }, { "epoch": 0.7794204931743377, "grad_norm": 0.44011762738227844, "learning_rate": 2.445157826765765e-06, "loss": 0.3549994230270386, "step": 14445, "token_acc": 0.8743252595155709 }, { "epoch": 0.7794744509793342, "grad_norm": 0.4294120967388153, "learning_rate": 2.4440129972849968e-06, "loss": 0.3561129570007324, "step": 14446, "token_acc": 0.8748902546093064 }, { "epoch": 0.7795284087843306, "grad_norm": 0.2837430536746979, "learning_rate": 2.4428683985620926e-06, "loss": 0.35907530784606934, "step": 14447, "token_acc": 0.8706723532363272 }, { "epoch": 0.7795823665893271, "grad_norm": 0.3727751672267914, "learning_rate": 2.4417240306320056e-06, "loss": 0.3589562475681305, "step": 14448, "token_acc": 0.8736439055520102 }, { "epoch": 0.7796363243943236, "grad_norm": 0.4516449272632599, "learning_rate": 2.4405798935296897e-06, "loss": 0.2902998924255371, "step": 14449, "token_acc": 0.8972859499175289 }, { "epoch": 0.7796902821993201, "grad_norm": 0.3541761338710785, "learning_rate": 2.439435987290083e-06, "loss": 0.3235369920730591, "step": 14450, "token_acc": 0.8791741720936678 }, { "epoch": 0.7797442400043166, "grad_norm": 0.3337602913379669, "learning_rate": 2.4382923119481206e-06, "loss": 0.3147256076335907, "step": 14451, "token_acc": 0.8844839371155161 }, { "epoch": 0.7797981978093131, "grad_norm": 0.4318658709526062, "learning_rate": 2.43714886753873e-06, "loss": 0.33597418665885925, "step": 14452, "token_acc": 0.8795220166966771 }, { "epoch": 0.7798521556143097, "grad_norm": 0.42176124453544617, "learning_rate": 2.436005654096831e-06, "loss": 0.3254307508468628, "step": 14453, "token_acc": 0.884180790960452 }, { "epoch": 0.7799061134193062, "grad_norm": 0.5742604732513428, "learning_rate": 2.43486267165734e-06, "loss": 0.340770959854126, "step": 14454, "token_acc": 0.87745713090757 }, { "epoch": 0.7799600712243026, "grad_norm": 0.3605666160583496, "learning_rate": 2.4337199202551597e-06, "loss": 0.3255115747451782, "step": 14455, "token_acc": 0.883702635374328 }, { "epoch": 0.7800140290292991, "grad_norm": 0.4226224720478058, "learning_rate": 2.4325773999251913e-06, "loss": 0.3083166480064392, "step": 14456, "token_acc": 0.8867579908675799 }, { "epoch": 0.7800679868342956, "grad_norm": 0.42882591485977173, "learning_rate": 2.431435110702326e-06, "loss": 0.355840802192688, "step": 14457, "token_acc": 0.8755108803711477 }, { "epoch": 0.7801219446392921, "grad_norm": 0.41206926107406616, "learning_rate": 2.4302930526214508e-06, "loss": 0.31264084577560425, "step": 14458, "token_acc": 0.8851975887474883 }, { "epoch": 0.7801759024442886, "grad_norm": 0.4084286689758301, "learning_rate": 2.429151225717442e-06, "loss": 0.30353134870529175, "step": 14459, "token_acc": 0.8893235958963659 }, { "epoch": 0.7802298602492851, "grad_norm": 0.4026033580303192, "learning_rate": 2.428009630025172e-06, "loss": 0.3112567365169525, "step": 14460, "token_acc": 0.8835522201375859 }, { "epoch": 0.7802838180542816, "grad_norm": 0.4029964208602905, "learning_rate": 2.426868265579502e-06, "loss": 0.3185472786426544, "step": 14461, "token_acc": 0.8915917503966155 }, { "epoch": 0.780337775859278, "grad_norm": 0.3205423355102539, "learning_rate": 2.425727132415293e-06, "loss": 0.3107566237449646, "step": 14462, "token_acc": 0.888402625820569 }, { "epoch": 0.7803917336642745, "grad_norm": 0.3975409269332886, "learning_rate": 2.4245862305673938e-06, "loss": 0.3207990527153015, "step": 14463, "token_acc": 0.8843360995850622 }, { "epoch": 0.780445691469271, "grad_norm": 0.5163609981536865, "learning_rate": 2.4234455600706464e-06, "loss": 0.3569478988647461, "step": 14464, "token_acc": 0.8754578754578755 }, { "epoch": 0.7804996492742675, "grad_norm": 0.5037471652030945, "learning_rate": 2.422305120959887e-06, "loss": 0.3320693075656891, "step": 14465, "token_acc": 0.8786973409023006 }, { "epoch": 0.780553607079264, "grad_norm": 0.36000820994377136, "learning_rate": 2.4211649132699434e-06, "loss": 0.32386794686317444, "step": 14466, "token_acc": 0.8847699287103046 }, { "epoch": 0.7806075648842605, "grad_norm": 0.49770286679267883, "learning_rate": 2.4200249370356354e-06, "loss": 0.29708099365234375, "step": 14467, "token_acc": 0.8941040843214756 }, { "epoch": 0.780661522689257, "grad_norm": 0.3698303699493408, "learning_rate": 2.4188851922917833e-06, "loss": 0.3201773166656494, "step": 14468, "token_acc": 0.8833227647431833 }, { "epoch": 0.7807154804942535, "grad_norm": 0.5280194282531738, "learning_rate": 2.4177456790731912e-06, "loss": 0.37156394124031067, "step": 14469, "token_acc": 0.8676867686768677 }, { "epoch": 0.7807694382992499, "grad_norm": 0.4120246171951294, "learning_rate": 2.4166063974146604e-06, "loss": 0.3287732005119324, "step": 14470, "token_acc": 0.8807314410480349 }, { "epoch": 0.7808233961042464, "grad_norm": 0.3333197832107544, "learning_rate": 2.4154673473509836e-06, "loss": 0.3594757914543152, "step": 14471, "token_acc": 0.8748614190687362 }, { "epoch": 0.780877353909243, "grad_norm": 0.39004722237586975, "learning_rate": 2.4143285289169492e-06, "loss": 0.374052494764328, "step": 14472, "token_acc": 0.8656564184758851 }, { "epoch": 0.7809313117142395, "grad_norm": 0.4785487651824951, "learning_rate": 2.4131899421473314e-06, "loss": 0.36302119493484497, "step": 14473, "token_acc": 0.8703196347031964 }, { "epoch": 0.780985269519236, "grad_norm": 0.5492933392524719, "learning_rate": 2.4120515870769034e-06, "loss": 0.3652088940143585, "step": 14474, "token_acc": 0.8694975890496189 }, { "epoch": 0.7810392273242325, "grad_norm": 0.48497554659843445, "learning_rate": 2.410913463740434e-06, "loss": 0.32479774951934814, "step": 14475, "token_acc": 0.8865728678130971 }, { "epoch": 0.781093185129229, "grad_norm": 0.3440565764904022, "learning_rate": 2.40977557217268e-06, "loss": 0.34450703859329224, "step": 14476, "token_acc": 0.8733643501017738 }, { "epoch": 0.7811471429342254, "grad_norm": 0.41557884216308594, "learning_rate": 2.4086379124083927e-06, "loss": 0.39631450176239014, "step": 14477, "token_acc": 0.8597836505545666 }, { "epoch": 0.7812011007392219, "grad_norm": 0.2932972013950348, "learning_rate": 2.4075004844823136e-06, "loss": 0.32666832208633423, "step": 14478, "token_acc": 0.8830423940149625 }, { "epoch": 0.7812550585442184, "grad_norm": 0.4493683874607086, "learning_rate": 2.406363288429181e-06, "loss": 0.35737061500549316, "step": 14479, "token_acc": 0.8762956669498726 }, { "epoch": 0.7813090163492149, "grad_norm": 0.4564087390899658, "learning_rate": 2.4052263242837205e-06, "loss": 0.308574914932251, "step": 14480, "token_acc": 0.8874193040466068 }, { "epoch": 0.7813629741542114, "grad_norm": 0.38005876541137695, "learning_rate": 2.404089592080663e-06, "loss": 0.29651838541030884, "step": 14481, "token_acc": 0.8902002515236529 }, { "epoch": 0.7814169319592079, "grad_norm": 0.5319619178771973, "learning_rate": 2.4029530918547184e-06, "loss": 0.40513917803764343, "step": 14482, "token_acc": 0.8616622987645077 }, { "epoch": 0.7814708897642044, "grad_norm": 0.4299226403236389, "learning_rate": 2.401816823640596e-06, "loss": 0.31971120834350586, "step": 14483, "token_acc": 0.8884419000464181 }, { "epoch": 0.7815248475692009, "grad_norm": 0.4325454831123352, "learning_rate": 2.4006807874729976e-06, "loss": 0.35726314783096313, "step": 14484, "token_acc": 0.8756901954932099 }, { "epoch": 0.7815788053741973, "grad_norm": 0.4872875511646271, "learning_rate": 2.399544983386618e-06, "loss": 0.4021042585372925, "step": 14485, "token_acc": 0.8573527251358896 }, { "epoch": 0.7816327631791938, "grad_norm": 0.46122026443481445, "learning_rate": 2.3984094114161428e-06, "loss": 0.3543669581413269, "step": 14486, "token_acc": 0.8750146010980026 }, { "epoch": 0.7816867209841903, "grad_norm": 0.3811216950416565, "learning_rate": 2.3972740715962527e-06, "loss": 0.3509523868560791, "step": 14487, "token_acc": 0.872724726967236 }, { "epoch": 0.7817406787891868, "grad_norm": 0.3494052290916443, "learning_rate": 2.396138963961622e-06, "loss": 0.33813291788101196, "step": 14488, "token_acc": 0.8843176605504587 }, { "epoch": 0.7817946365941834, "grad_norm": 0.3909507095813751, "learning_rate": 2.395004088546914e-06, "loss": 0.3044785261154175, "step": 14489, "token_acc": 0.8913952793546459 }, { "epoch": 0.7818485943991799, "grad_norm": 0.3972790837287903, "learning_rate": 2.3938694453867896e-06, "loss": 0.3133038878440857, "step": 14490, "token_acc": 0.8867313915857605 }, { "epoch": 0.7819025522041764, "grad_norm": 0.36415567994117737, "learning_rate": 2.3927350345158996e-06, "loss": 0.35266828536987305, "step": 14491, "token_acc": 0.879983036471586 }, { "epoch": 0.7819565100091729, "grad_norm": 0.32450196146965027, "learning_rate": 2.3916008559688887e-06, "loss": 0.3045681118965149, "step": 14492, "token_acc": 0.8911359159553512 }, { "epoch": 0.7820104678141693, "grad_norm": 0.3743920624256134, "learning_rate": 2.390466909780392e-06, "loss": 0.32255426049232483, "step": 14493, "token_acc": 0.8891752577319587 }, { "epoch": 0.7820644256191658, "grad_norm": 0.3885275423526764, "learning_rate": 2.389333195985045e-06, "loss": 0.3458244502544403, "step": 14494, "token_acc": 0.8790164576802508 }, { "epoch": 0.7821183834241623, "grad_norm": 0.4420320987701416, "learning_rate": 2.3881997146174695e-06, "loss": 0.33745723962783813, "step": 14495, "token_acc": 0.8805719091673675 }, { "epoch": 0.7821723412291588, "grad_norm": 0.45745015144348145, "learning_rate": 2.3870664657122787e-06, "loss": 0.3161490261554718, "step": 14496, "token_acc": 0.8835900473933649 }, { "epoch": 0.7822262990341553, "grad_norm": 0.3978107273578644, "learning_rate": 2.3859334493040852e-06, "loss": 0.35544121265411377, "step": 14497, "token_acc": 0.8761430721148281 }, { "epoch": 0.7822802568391518, "grad_norm": 0.47433653473854065, "learning_rate": 2.3848006654274893e-06, "loss": 0.31515854597091675, "step": 14498, "token_acc": 0.8877882429360182 }, { "epoch": 0.7823342146441483, "grad_norm": 0.5080125331878662, "learning_rate": 2.3836681141170857e-06, "loss": 0.3563942313194275, "step": 14499, "token_acc": 0.8769230769230769 }, { "epoch": 0.7823881724491447, "grad_norm": 0.4025593400001526, "learning_rate": 2.38253579540746e-06, "loss": 0.284991055727005, "step": 14500, "token_acc": 0.8961280182883269 }, { "epoch": 0.7824421302541412, "grad_norm": 0.38082239031791687, "learning_rate": 2.3814037093332e-06, "loss": 0.3468596339225769, "step": 14501, "token_acc": 0.8726675427069646 }, { "epoch": 0.7824960880591377, "grad_norm": 0.43079835176467896, "learning_rate": 2.380271855928873e-06, "loss": 0.35083526372909546, "step": 14502, "token_acc": 0.8782097260077464 }, { "epoch": 0.7825500458641342, "grad_norm": 0.5691008567810059, "learning_rate": 2.379140235229048e-06, "loss": 0.34250855445861816, "step": 14503, "token_acc": 0.88277428371768 }, { "epoch": 0.7826040036691307, "grad_norm": 0.4668373465538025, "learning_rate": 2.3780088472682837e-06, "loss": 0.31639593839645386, "step": 14504, "token_acc": 0.8894356005788712 }, { "epoch": 0.7826579614741273, "grad_norm": 0.43080419301986694, "learning_rate": 2.376877692081133e-06, "loss": 0.3403378129005432, "step": 14505, "token_acc": 0.8757062146892656 }, { "epoch": 0.7827119192791238, "grad_norm": 0.4365609586238861, "learning_rate": 2.3757467697021396e-06, "loss": 0.3117982745170593, "step": 14506, "token_acc": 0.888939325162657 }, { "epoch": 0.7827658770841203, "grad_norm": 0.44155439734458923, "learning_rate": 2.3746160801658435e-06, "loss": 0.33619725704193115, "step": 14507, "token_acc": 0.8822021319659965 }, { "epoch": 0.7828198348891167, "grad_norm": 0.4258062541484833, "learning_rate": 2.3734856235067737e-06, "loss": 0.35808688402175903, "step": 14508, "token_acc": 0.8753627861980007 }, { "epoch": 0.7828737926941132, "grad_norm": 0.4140509366989136, "learning_rate": 2.372355399759456e-06, "loss": 0.29102522134780884, "step": 14509, "token_acc": 0.8949518913931725 }, { "epoch": 0.7829277504991097, "grad_norm": 0.38899892568588257, "learning_rate": 2.3712254089584065e-06, "loss": 0.3402274549007416, "step": 14510, "token_acc": 0.879298439442066 }, { "epoch": 0.7829817083041062, "grad_norm": 0.48242348432540894, "learning_rate": 2.3700956511381345e-06, "loss": 0.3428635001182556, "step": 14511, "token_acc": 0.8788214794867734 }, { "epoch": 0.7830356661091027, "grad_norm": 0.42628273367881775, "learning_rate": 2.3689661263331387e-06, "loss": 0.29913997650146484, "step": 14512, "token_acc": 0.8916863362182009 }, { "epoch": 0.7830896239140992, "grad_norm": 0.34099555015563965, "learning_rate": 2.3678368345779225e-06, "loss": 0.27301937341690063, "step": 14513, "token_acc": 0.9004360465116279 }, { "epoch": 0.7831435817190957, "grad_norm": 0.3781115710735321, "learning_rate": 2.366707775906969e-06, "loss": 0.3316645324230194, "step": 14514, "token_acc": 0.8799879536214426 }, { "epoch": 0.7831975395240922, "grad_norm": 0.42935943603515625, "learning_rate": 2.3655789503547598e-06, "loss": 0.32353734970092773, "step": 14515, "token_acc": 0.8844487241213288 }, { "epoch": 0.7832514973290886, "grad_norm": 0.39140331745147705, "learning_rate": 2.364450357955771e-06, "loss": 0.3579120635986328, "step": 14516, "token_acc": 0.8755208333333333 }, { "epoch": 0.7833054551340851, "grad_norm": 0.3367909789085388, "learning_rate": 2.3633219987444677e-06, "loss": 0.29037097096443176, "step": 14517, "token_acc": 0.8927572706935123 }, { "epoch": 0.7833594129390816, "grad_norm": 0.33952203392982483, "learning_rate": 2.3621938727553097e-06, "loss": 0.348536878824234, "step": 14518, "token_acc": 0.8731470549652368 }, { "epoch": 0.7834133707440781, "grad_norm": 0.41760721802711487, "learning_rate": 2.3610659800227466e-06, "loss": 0.3233221173286438, "step": 14519, "token_acc": 0.8819681456200228 }, { "epoch": 0.7834673285490746, "grad_norm": 0.5050580501556396, "learning_rate": 2.3599383205812333e-06, "loss": 0.32155144214630127, "step": 14520, "token_acc": 0.8825579012092641 }, { "epoch": 0.7835212863540711, "grad_norm": 0.46869999170303345, "learning_rate": 2.3588108944652e-06, "loss": 0.295559823513031, "step": 14521, "token_acc": 0.8889547520252914 }, { "epoch": 0.7835752441590677, "grad_norm": 0.35624244809150696, "learning_rate": 2.3576837017090805e-06, "loss": 0.24912160634994507, "step": 14522, "token_acc": 0.9085224227264606 }, { "epoch": 0.783629201964064, "grad_norm": 0.5262181758880615, "learning_rate": 2.356556742347298e-06, "loss": 0.37349581718444824, "step": 14523, "token_acc": 0.8705555555555555 }, { "epoch": 0.7836831597690606, "grad_norm": 0.443731427192688, "learning_rate": 2.3554300164142707e-06, "loss": 0.34298229217529297, "step": 14524, "token_acc": 0.8812893081761006 }, { "epoch": 0.7837371175740571, "grad_norm": 0.3774012327194214, "learning_rate": 2.3543035239444047e-06, "loss": 0.3283058702945709, "step": 14525, "token_acc": 0.8796694589087616 }, { "epoch": 0.7837910753790536, "grad_norm": 0.39494484663009644, "learning_rate": 2.3531772649721095e-06, "loss": 0.2831914722919464, "step": 14526, "token_acc": 0.8949534067529953 }, { "epoch": 0.7838450331840501, "grad_norm": 0.41515353322029114, "learning_rate": 2.3520512395317776e-06, "loss": 0.33874502778053284, "step": 14527, "token_acc": 0.8773374506776462 }, { "epoch": 0.7838989909890466, "grad_norm": 0.4175039827823639, "learning_rate": 2.3509254476577968e-06, "loss": 0.36507469415664673, "step": 14528, "token_acc": 0.8745173745173745 }, { "epoch": 0.7839529487940431, "grad_norm": 0.46640610694885254, "learning_rate": 2.3497998893845486e-06, "loss": 0.30083680152893066, "step": 14529, "token_acc": 0.8865463641236376 }, { "epoch": 0.7840069065990396, "grad_norm": 0.40798184275627136, "learning_rate": 2.348674564746408e-06, "loss": 0.3306421637535095, "step": 14530, "token_acc": 0.8816855753646677 }, { "epoch": 0.784060864404036, "grad_norm": 0.45407259464263916, "learning_rate": 2.347549473777743e-06, "loss": 0.27715030312538147, "step": 14531, "token_acc": 0.8965721040189125 }, { "epoch": 0.7841148222090325, "grad_norm": 0.4208575487136841, "learning_rate": 2.346424616512909e-06, "loss": 0.29354727268218994, "step": 14532, "token_acc": 0.8899345335515548 }, { "epoch": 0.784168780014029, "grad_norm": 0.44006866216659546, "learning_rate": 2.345299992986264e-06, "loss": 0.357158899307251, "step": 14533, "token_acc": 0.872050595962053 }, { "epoch": 0.7842227378190255, "grad_norm": 0.3678974509239197, "learning_rate": 2.344175603232154e-06, "loss": 0.2782335877418518, "step": 14534, "token_acc": 0.8996459694989106 }, { "epoch": 0.784276695624022, "grad_norm": 0.41069042682647705, "learning_rate": 2.343051447284914e-06, "loss": 0.30456486344337463, "step": 14535, "token_acc": 0.889180136886559 }, { "epoch": 0.7843306534290185, "grad_norm": 0.49224576354026794, "learning_rate": 2.3419275251788766e-06, "loss": 0.3167158365249634, "step": 14536, "token_acc": 0.8842401500938086 }, { "epoch": 0.784384611234015, "grad_norm": 0.3053933382034302, "learning_rate": 2.3408038369483678e-06, "loss": 0.32507407665252686, "step": 14537, "token_acc": 0.8825557590776856 }, { "epoch": 0.7844385690390115, "grad_norm": 0.5026171803474426, "learning_rate": 2.3396803826277027e-06, "loss": 0.3491574227809906, "step": 14538, "token_acc": 0.8741095553917956 }, { "epoch": 0.7844925268440079, "grad_norm": 0.3353937268257141, "learning_rate": 2.3385571622511916e-06, "loss": 0.356337308883667, "step": 14539, "token_acc": 0.8722210679409932 }, { "epoch": 0.7845464846490044, "grad_norm": 0.39418384432792664, "learning_rate": 2.337434175853138e-06, "loss": 0.31192469596862793, "step": 14540, "token_acc": 0.8922697368421053 }, { "epoch": 0.784600442454001, "grad_norm": 0.46429067850112915, "learning_rate": 2.3363114234678375e-06, "loss": 0.36469146609306335, "step": 14541, "token_acc": 0.8717984801576133 }, { "epoch": 0.7846544002589975, "grad_norm": 0.3710826635360718, "learning_rate": 2.3351889051295782e-06, "loss": 0.3150064945220947, "step": 14542, "token_acc": 0.8838567350116792 }, { "epoch": 0.784708358063994, "grad_norm": 0.41204383969306946, "learning_rate": 2.3340666208726424e-06, "loss": 0.36209315061569214, "step": 14543, "token_acc": 0.8691759243906483 }, { "epoch": 0.7847623158689905, "grad_norm": 0.40671616792678833, "learning_rate": 2.3329445707313035e-06, "loss": 0.30358606576919556, "step": 14544, "token_acc": 0.8899021721107611 }, { "epoch": 0.784816273673987, "grad_norm": 0.48704808950424194, "learning_rate": 2.3318227547398265e-06, "loss": 0.35395365953445435, "step": 14545, "token_acc": 0.8730828760377093 }, { "epoch": 0.7848702314789834, "grad_norm": 0.3521181643009186, "learning_rate": 2.3307011729324757e-06, "loss": 0.2859266996383667, "step": 14546, "token_acc": 0.8978562421185372 }, { "epoch": 0.7849241892839799, "grad_norm": 0.37811079621315, "learning_rate": 2.3295798253435033e-06, "loss": 0.32337242364883423, "step": 14547, "token_acc": 0.8819921491658489 }, { "epoch": 0.7849781470889764, "grad_norm": 0.4109843075275421, "learning_rate": 2.3284587120071523e-06, "loss": 0.339327871799469, "step": 14548, "token_acc": 0.8831069739050816 }, { "epoch": 0.7850321048939729, "grad_norm": 0.47901642322540283, "learning_rate": 2.3273378329576637e-06, "loss": 0.40530097484588623, "step": 14549, "token_acc": 0.8544705136334813 }, { "epoch": 0.7850860626989694, "grad_norm": 0.4952617883682251, "learning_rate": 2.326217188229267e-06, "loss": 0.33784329891204834, "step": 14550, "token_acc": 0.8721454326923077 }, { "epoch": 0.7851400205039659, "grad_norm": 0.3961244225502014, "learning_rate": 2.325096777856184e-06, "loss": 0.27703770995140076, "step": 14551, "token_acc": 0.8963671748297113 }, { "epoch": 0.7851939783089624, "grad_norm": 0.3263343274593353, "learning_rate": 2.3239766018726386e-06, "loss": 0.3512698709964752, "step": 14552, "token_acc": 0.8752950836497999 }, { "epoch": 0.7852479361139589, "grad_norm": 0.4179384112358093, "learning_rate": 2.3228566603128365e-06, "loss": 0.2760627865791321, "step": 14553, "token_acc": 0.897196261682243 }, { "epoch": 0.7853018939189553, "grad_norm": 0.47274014353752136, "learning_rate": 2.3217369532109835e-06, "loss": 0.3323827087879181, "step": 14554, "token_acc": 0.8787660111852788 }, { "epoch": 0.7853558517239518, "grad_norm": 0.43177464604377747, "learning_rate": 2.3206174806012695e-06, "loss": 0.3371262550354004, "step": 14555, "token_acc": 0.8825704225352112 }, { "epoch": 0.7854098095289483, "grad_norm": 0.3340685963630676, "learning_rate": 2.319498242517886e-06, "loss": 0.3420247435569763, "step": 14556, "token_acc": 0.8779020584641414 }, { "epoch": 0.7854637673339449, "grad_norm": 0.47749650478363037, "learning_rate": 2.3183792389950142e-06, "loss": 0.33883967995643616, "step": 14557, "token_acc": 0.879539783753812 }, { "epoch": 0.7855177251389414, "grad_norm": 0.5065462589263916, "learning_rate": 2.3172604700668256e-06, "loss": 0.3181529641151428, "step": 14558, "token_acc": 0.8853598014888338 }, { "epoch": 0.7855716829439379, "grad_norm": 0.3964071571826935, "learning_rate": 2.316141935767493e-06, "loss": 0.3705359697341919, "step": 14559, "token_acc": 0.8701774470520893 }, { "epoch": 0.7856256407489344, "grad_norm": 0.5079876184463501, "learning_rate": 2.3150236361311716e-06, "loss": 0.2672777771949768, "step": 14560, "token_acc": 0.8987373737373737 }, { "epoch": 0.7856795985539309, "grad_norm": 0.40015560388565063, "learning_rate": 2.3139055711920156e-06, "loss": 0.31236496567726135, "step": 14561, "token_acc": 0.8879851745083908 }, { "epoch": 0.7857335563589273, "grad_norm": 0.35667213797569275, "learning_rate": 2.3127877409841702e-06, "loss": 0.297091007232666, "step": 14562, "token_acc": 0.8892692110217354 }, { "epoch": 0.7857875141639238, "grad_norm": 0.364261656999588, "learning_rate": 2.3116701455417735e-06, "loss": 0.3354061543941498, "step": 14563, "token_acc": 0.8834053794428435 }, { "epoch": 0.7858414719689203, "grad_norm": 0.4034000337123871, "learning_rate": 2.310552784898954e-06, "loss": 0.3193049430847168, "step": 14564, "token_acc": 0.8815322481948354 }, { "epoch": 0.7858954297739168, "grad_norm": 0.3067288100719452, "learning_rate": 2.3094356590898403e-06, "loss": 0.30411410331726074, "step": 14565, "token_acc": 0.891321521272994 }, { "epoch": 0.7859493875789133, "grad_norm": 0.4472777247428894, "learning_rate": 2.3083187681485466e-06, "loss": 0.31457075476646423, "step": 14566, "token_acc": 0.8830479224744457 }, { "epoch": 0.7860033453839098, "grad_norm": 0.3934713900089264, "learning_rate": 2.3072021121091836e-06, "loss": 0.3379782736301422, "step": 14567, "token_acc": 0.883136316025737 }, { "epoch": 0.7860573031889063, "grad_norm": 0.3946041166782379, "learning_rate": 2.3060856910058514e-06, "loss": 0.3381497859954834, "step": 14568, "token_acc": 0.8782714546561169 }, { "epoch": 0.7861112609939027, "grad_norm": 0.606837809085846, "learning_rate": 2.3049695048726484e-06, "loss": 0.41346079111099243, "step": 14569, "token_acc": 0.8655339805825243 }, { "epoch": 0.7861652187988992, "grad_norm": 0.437997967004776, "learning_rate": 2.3038535537436602e-06, "loss": 0.36295074224472046, "step": 14570, "token_acc": 0.8750219106047327 }, { "epoch": 0.7862191766038957, "grad_norm": 0.3518064022064209, "learning_rate": 2.3027378376529687e-06, "loss": 0.2967320680618286, "step": 14571, "token_acc": 0.8919521912350598 }, { "epoch": 0.7862731344088922, "grad_norm": 0.4148901402950287, "learning_rate": 2.301622356634646e-06, "loss": 0.3213055729866028, "step": 14572, "token_acc": 0.8820779220779221 }, { "epoch": 0.7863270922138887, "grad_norm": 0.4420830011367798, "learning_rate": 2.3005071107227618e-06, "loss": 0.284434974193573, "step": 14573, "token_acc": 0.8966979703120267 }, { "epoch": 0.7863810500188853, "grad_norm": 0.4906255304813385, "learning_rate": 2.299392099951372e-06, "loss": 0.3416113257408142, "step": 14574, "token_acc": 0.8821481849825957 }, { "epoch": 0.7864350078238818, "grad_norm": 0.2692105770111084, "learning_rate": 2.29827732435453e-06, "loss": 0.3043120801448822, "step": 14575, "token_acc": 0.8915071183112421 }, { "epoch": 0.7864889656288783, "grad_norm": 0.4558485746383667, "learning_rate": 2.297162783966281e-06, "loss": 0.37790486216545105, "step": 14576, "token_acc": 0.8676380555895677 }, { "epoch": 0.7865429234338747, "grad_norm": 0.3659096360206604, "learning_rate": 2.2960484788206606e-06, "loss": 0.3030264973640442, "step": 14577, "token_acc": 0.8917787742899851 }, { "epoch": 0.7865968812388712, "grad_norm": 0.5135789513587952, "learning_rate": 2.2949344089517043e-06, "loss": 0.35081276297569275, "step": 14578, "token_acc": 0.8744292237442922 }, { "epoch": 0.7866508390438677, "grad_norm": 0.41045722365379333, "learning_rate": 2.2938205743934327e-06, "loss": 0.3231986165046692, "step": 14579, "token_acc": 0.8830171109300431 }, { "epoch": 0.7867047968488642, "grad_norm": 0.4325140416622162, "learning_rate": 2.2927069751798616e-06, "loss": 0.3373381495475769, "step": 14580, "token_acc": 0.8801778609875965 }, { "epoch": 0.7867587546538607, "grad_norm": 0.472888708114624, "learning_rate": 2.291593611345001e-06, "loss": 0.3482673764228821, "step": 14581, "token_acc": 0.8774348216961343 }, { "epoch": 0.7868127124588572, "grad_norm": 0.3553957939147949, "learning_rate": 2.290480482922851e-06, "loss": 0.3042593002319336, "step": 14582, "token_acc": 0.8895284480850122 }, { "epoch": 0.7868666702638537, "grad_norm": 0.4798050820827484, "learning_rate": 2.2893675899474056e-06, "loss": 0.3862329125404358, "step": 14583, "token_acc": 0.8634615384615385 }, { "epoch": 0.7869206280688502, "grad_norm": 0.5364287495613098, "learning_rate": 2.288254932452657e-06, "loss": 0.31952226161956787, "step": 14584, "token_acc": 0.8847255895966498 }, { "epoch": 0.7869745858738466, "grad_norm": 0.3744204640388489, "learning_rate": 2.287142510472582e-06, "loss": 0.32269302010536194, "step": 14585, "token_acc": 0.8848325673013788 }, { "epoch": 0.7870285436788431, "grad_norm": 0.3708668351173401, "learning_rate": 2.2860303240411553e-06, "loss": 0.3088788390159607, "step": 14586, "token_acc": 0.890090976149496 }, { "epoch": 0.7870825014838396, "grad_norm": 0.3800235390663147, "learning_rate": 2.284918373192341e-06, "loss": 0.3546071946620941, "step": 14587, "token_acc": 0.8714528256124181 }, { "epoch": 0.7871364592888361, "grad_norm": 0.3977046608924866, "learning_rate": 2.2838066579600983e-06, "loss": 0.3151683509349823, "step": 14588, "token_acc": 0.8872654347514682 }, { "epoch": 0.7871904170938326, "grad_norm": 0.3355603516101837, "learning_rate": 2.2826951783783813e-06, "loss": 0.30150002241134644, "step": 14589, "token_acc": 0.8898214285714285 }, { "epoch": 0.7872443748988291, "grad_norm": 0.3476375639438629, "learning_rate": 2.2815839344811273e-06, "loss": 0.3206966519355774, "step": 14590, "token_acc": 0.8821165438713998 }, { "epoch": 0.7872983327038257, "grad_norm": 0.33887505531311035, "learning_rate": 2.2804729263022805e-06, "loss": 0.2646072506904602, "step": 14591, "token_acc": 0.8996255460786353 }, { "epoch": 0.787352290508822, "grad_norm": 0.4856445789337158, "learning_rate": 2.279362153875768e-06, "loss": 0.3412761092185974, "step": 14592, "token_acc": 0.8752896279133161 }, { "epoch": 0.7874062483138186, "grad_norm": 0.4304342269897461, "learning_rate": 2.278251617235513e-06, "loss": 0.30673468112945557, "step": 14593, "token_acc": 0.8862139178344712 }, { "epoch": 0.7874602061188151, "grad_norm": 0.37697258591651917, "learning_rate": 2.27714131641543e-06, "loss": 0.36969006061553955, "step": 14594, "token_acc": 0.8705250596658711 }, { "epoch": 0.7875141639238116, "grad_norm": 0.47142696380615234, "learning_rate": 2.2760312514494288e-06, "loss": 0.36118340492248535, "step": 14595, "token_acc": 0.871842553850922 }, { "epoch": 0.7875681217288081, "grad_norm": 0.4136752486228943, "learning_rate": 2.274921422371408e-06, "loss": 0.3153477609157562, "step": 14596, "token_acc": 0.8903199002908184 }, { "epoch": 0.7876220795338046, "grad_norm": 0.4405168294906616, "learning_rate": 2.2738118292152643e-06, "loss": 0.2959568500518799, "step": 14597, "token_acc": 0.8937588652482269 }, { "epoch": 0.7876760373388011, "grad_norm": 0.41695043444633484, "learning_rate": 2.272702472014884e-06, "loss": 0.28223058581352234, "step": 14598, "token_acc": 0.8947796059313426 }, { "epoch": 0.7877299951437976, "grad_norm": 0.4090307950973511, "learning_rate": 2.2715933508041466e-06, "loss": 0.30535632371902466, "step": 14599, "token_acc": 0.8879216539717084 }, { "epoch": 0.787783952948794, "grad_norm": 0.4048667252063751, "learning_rate": 2.2704844656169225e-06, "loss": 0.39424073696136475, "step": 14600, "token_acc": 0.8600237247924081 }, { "epoch": 0.7878379107537905, "grad_norm": 0.4285338819026947, "learning_rate": 2.2693758164870784e-06, "loss": 0.35064709186553955, "step": 14601, "token_acc": 0.8755561547795605 }, { "epoch": 0.787891868558787, "grad_norm": 0.45447883009910583, "learning_rate": 2.268267403448472e-06, "loss": 0.3309801518917084, "step": 14602, "token_acc": 0.8834273834273835 }, { "epoch": 0.7879458263637835, "grad_norm": 0.46018025279045105, "learning_rate": 2.267159226534954e-06, "loss": 0.3188437819480896, "step": 14603, "token_acc": 0.8896780303030303 }, { "epoch": 0.78799978416878, "grad_norm": 0.47583404183387756, "learning_rate": 2.2660512857803664e-06, "loss": 0.3217594027519226, "step": 14604, "token_acc": 0.8838575872796405 }, { "epoch": 0.7880537419737765, "grad_norm": 0.3085714876651764, "learning_rate": 2.2649435812185473e-06, "loss": 0.31559300422668457, "step": 14605, "token_acc": 0.8874122900276419 }, { "epoch": 0.788107699778773, "grad_norm": 0.4420821964740753, "learning_rate": 2.2638361128833253e-06, "loss": 0.3691972494125366, "step": 14606, "token_acc": 0.8688865764828304 }, { "epoch": 0.7881616575837694, "grad_norm": 0.502936840057373, "learning_rate": 2.2627288808085214e-06, "loss": 0.32769113779067993, "step": 14607, "token_acc": 0.887294727744166 }, { "epoch": 0.7882156153887659, "grad_norm": 0.45674511790275574, "learning_rate": 2.26162188502795e-06, "loss": 0.36540743708610535, "step": 14608, "token_acc": 0.8777697320782042 }, { "epoch": 0.7882695731937625, "grad_norm": 0.37333089113235474, "learning_rate": 2.2605151255754165e-06, "loss": 0.27500975131988525, "step": 14609, "token_acc": 0.8986690865093769 }, { "epoch": 0.788323530998759, "grad_norm": 0.40700626373291016, "learning_rate": 2.2594086024847273e-06, "loss": 0.295050710439682, "step": 14610, "token_acc": 0.8921681359396959 }, { "epoch": 0.7883774888037555, "grad_norm": 0.38376620411872864, "learning_rate": 2.25830231578967e-06, "loss": 0.3339287042617798, "step": 14611, "token_acc": 0.8803009575923393 }, { "epoch": 0.788431446608752, "grad_norm": 0.4388352632522583, "learning_rate": 2.257196265524033e-06, "loss": 0.3636399805545807, "step": 14612, "token_acc": 0.8696262995223377 }, { "epoch": 0.7884854044137485, "grad_norm": 0.4745554029941559, "learning_rate": 2.2560904517215942e-06, "loss": 0.35065412521362305, "step": 14613, "token_acc": 0.8766973622600281 }, { "epoch": 0.788539362218745, "grad_norm": 0.37545737624168396, "learning_rate": 2.2549848744161227e-06, "loss": 0.314835786819458, "step": 14614, "token_acc": 0.8874332011722117 }, { "epoch": 0.7885933200237414, "grad_norm": 0.35126402974128723, "learning_rate": 2.2538795336413855e-06, "loss": 0.3318940997123718, "step": 14615, "token_acc": 0.8833859759949463 }, { "epoch": 0.7886472778287379, "grad_norm": 0.37930014729499817, "learning_rate": 2.252774429431136e-06, "loss": 0.36093688011169434, "step": 14616, "token_acc": 0.8758288125376733 }, { "epoch": 0.7887012356337344, "grad_norm": 0.4388602674007416, "learning_rate": 2.2516695618191274e-06, "loss": 0.35501253604888916, "step": 14617, "token_acc": 0.8744460856720827 }, { "epoch": 0.7887551934387309, "grad_norm": 0.4700588583946228, "learning_rate": 2.250564930839101e-06, "loss": 0.3139938414096832, "step": 14618, "token_acc": 0.8882924706223705 }, { "epoch": 0.7888091512437274, "grad_norm": 0.4269379675388336, "learning_rate": 2.2494605365247913e-06, "loss": 0.3691245913505554, "step": 14619, "token_acc": 0.8720807416969839 }, { "epoch": 0.7888631090487239, "grad_norm": 0.5197539925575256, "learning_rate": 2.248356378909927e-06, "loss": 0.3774871826171875, "step": 14620, "token_acc": 0.8693877551020408 }, { "epoch": 0.7889170668537204, "grad_norm": 0.4081960916519165, "learning_rate": 2.2472524580282284e-06, "loss": 0.2990056276321411, "step": 14621, "token_acc": 0.8949506674405108 }, { "epoch": 0.7889710246587169, "grad_norm": 0.4711463451385498, "learning_rate": 2.246148773913408e-06, "loss": 0.37710052728652954, "step": 14622, "token_acc": 0.8671649619722004 }, { "epoch": 0.7890249824637133, "grad_norm": 0.4798179268836975, "learning_rate": 2.2450453265991733e-06, "loss": 0.2954842448234558, "step": 14623, "token_acc": 0.8961852343898273 }, { "epoch": 0.7890789402687098, "grad_norm": 0.3123452067375183, "learning_rate": 2.243942116119222e-06, "loss": 0.26318174600601196, "step": 14624, "token_acc": 0.90573600552868 }, { "epoch": 0.7891328980737063, "grad_norm": 0.4612341523170471, "learning_rate": 2.242839142507247e-06, "loss": 0.31336572766304016, "step": 14625, "token_acc": 0.8861356035269079 }, { "epoch": 0.7891868558787029, "grad_norm": 0.44317522644996643, "learning_rate": 2.241736405796934e-06, "loss": 0.32039424777030945, "step": 14626, "token_acc": 0.8866096866096866 }, { "epoch": 0.7892408136836994, "grad_norm": 0.40303555130958557, "learning_rate": 2.2406339060219574e-06, "loss": 0.3612438440322876, "step": 14627, "token_acc": 0.875028875028875 }, { "epoch": 0.7892947714886959, "grad_norm": 0.3562021851539612, "learning_rate": 2.2395316432159854e-06, "loss": 0.32294541597366333, "step": 14628, "token_acc": 0.8815426997245179 }, { "epoch": 0.7893487292936924, "grad_norm": 0.4689313471317291, "learning_rate": 2.2384296174126874e-06, "loss": 0.33430683612823486, "step": 14629, "token_acc": 0.876746096959737 }, { "epoch": 0.7894026870986888, "grad_norm": 0.5156679153442383, "learning_rate": 2.2373278286457155e-06, "loss": 0.3467685282230377, "step": 14630, "token_acc": 0.8790665985351729 }, { "epoch": 0.7894566449036853, "grad_norm": 0.44317975640296936, "learning_rate": 2.2362262769487187e-06, "loss": 0.32422304153442383, "step": 14631, "token_acc": 0.8856694434383677 }, { "epoch": 0.7895106027086818, "grad_norm": 0.37126797437667847, "learning_rate": 2.2351249623553372e-06, "loss": 0.37733927369117737, "step": 14632, "token_acc": 0.8649980029290374 }, { "epoch": 0.7895645605136783, "grad_norm": 0.4678276777267456, "learning_rate": 2.234023884899206e-06, "loss": 0.37487369775772095, "step": 14633, "token_acc": 0.8698741672834938 }, { "epoch": 0.7896185183186748, "grad_norm": 0.4973512291908264, "learning_rate": 2.2329230446139494e-06, "loss": 0.36805304884910583, "step": 14634, "token_acc": 0.8709269218601708 }, { "epoch": 0.7896724761236713, "grad_norm": 0.4565451741218567, "learning_rate": 2.231822441533187e-06, "loss": 0.30787238478660583, "step": 14635, "token_acc": 0.8922908693275013 }, { "epoch": 0.7897264339286678, "grad_norm": 0.4180563688278198, "learning_rate": 2.2307220756905332e-06, "loss": 0.374412477016449, "step": 14636, "token_acc": 0.8686854591124756 }, { "epoch": 0.7897803917336643, "grad_norm": 0.48948928713798523, "learning_rate": 2.2296219471195967e-06, "loss": 0.37036454677581787, "step": 14637, "token_acc": 0.8688238713427631 }, { "epoch": 0.7898343495386607, "grad_norm": 0.4767017066478729, "learning_rate": 2.2285220558539667e-06, "loss": 0.3482973575592041, "step": 14638, "token_acc": 0.8756780212627469 }, { "epoch": 0.7898883073436572, "grad_norm": 0.4490700960159302, "learning_rate": 2.227422401927237e-06, "loss": 0.3792197108268738, "step": 14639, "token_acc": 0.8627914836093274 }, { "epoch": 0.7899422651486537, "grad_norm": 0.42667579650878906, "learning_rate": 2.226322985372992e-06, "loss": 0.3690987229347229, "step": 14640, "token_acc": 0.8696430703115674 }, { "epoch": 0.7899962229536502, "grad_norm": 0.3226411044597626, "learning_rate": 2.2252238062248034e-06, "loss": 0.2915267050266266, "step": 14641, "token_acc": 0.8956070480328264 }, { "epoch": 0.7900501807586467, "grad_norm": 0.42995625734329224, "learning_rate": 2.224124864516245e-06, "loss": 0.3322831988334656, "step": 14642, "token_acc": 0.8766408616627398 }, { "epoch": 0.7901041385636433, "grad_norm": 0.491545706987381, "learning_rate": 2.2230261602808767e-06, "loss": 0.3597393035888672, "step": 14643, "token_acc": 0.8732032854209446 }, { "epoch": 0.7901580963686398, "grad_norm": 0.4450588822364807, "learning_rate": 2.2219276935522526e-06, "loss": 0.3575363755226135, "step": 14644, "token_acc": 0.8743295019157088 }, { "epoch": 0.7902120541736363, "grad_norm": 0.3480698764324188, "learning_rate": 2.2208294643639185e-06, "loss": 0.25960174202919006, "step": 14645, "token_acc": 0.9039270687237027 }, { "epoch": 0.7902660119786327, "grad_norm": 0.4286426901817322, "learning_rate": 2.2197314727494157e-06, "loss": 0.3163812756538391, "step": 14646, "token_acc": 0.8885005242922055 }, { "epoch": 0.7903199697836292, "grad_norm": 0.3956032991409302, "learning_rate": 2.218633718742275e-06, "loss": 0.2997390329837799, "step": 14647, "token_acc": 0.8932644975387202 }, { "epoch": 0.7903739275886257, "grad_norm": 0.43482479453086853, "learning_rate": 2.2175362023760184e-06, "loss": 0.35152900218963623, "step": 14648, "token_acc": 0.8725138121546961 }, { "epoch": 0.7904278853936222, "grad_norm": 0.4618896543979645, "learning_rate": 2.216438923684171e-06, "loss": 0.3047506809234619, "step": 14649, "token_acc": 0.8919001684274996 }, { "epoch": 0.7904818431986187, "grad_norm": 0.36554452776908875, "learning_rate": 2.2153418827002403e-06, "loss": 0.3186991214752197, "step": 14650, "token_acc": 0.8841305998481397 }, { "epoch": 0.7905358010036152, "grad_norm": 0.45542216300964355, "learning_rate": 2.2142450794577286e-06, "loss": 0.29635125398635864, "step": 14651, "token_acc": 0.8942881500426257 }, { "epoch": 0.7905897588086117, "grad_norm": 0.3772621750831604, "learning_rate": 2.2131485139901334e-06, "loss": 0.3222542405128479, "step": 14652, "token_acc": 0.8875722260307511 }, { "epoch": 0.7906437166136081, "grad_norm": 0.5444985032081604, "learning_rate": 2.212052186330942e-06, "loss": 0.40366098284721375, "step": 14653, "token_acc": 0.8634558093346574 }, { "epoch": 0.7906976744186046, "grad_norm": 0.4924011826515198, "learning_rate": 2.2109560965136366e-06, "loss": 0.34838756918907166, "step": 14654, "token_acc": 0.8785714285714286 }, { "epoch": 0.7907516322236011, "grad_norm": 0.38124924898147583, "learning_rate": 2.209860244571692e-06, "loss": 0.31586605310440063, "step": 14655, "token_acc": 0.8871559633027523 }, { "epoch": 0.7908055900285976, "grad_norm": 0.35206300020217896, "learning_rate": 2.2087646305385746e-06, "loss": 0.29055124521255493, "step": 14656, "token_acc": 0.8951412696536234 }, { "epoch": 0.7908595478335941, "grad_norm": 0.4031687080860138, "learning_rate": 2.207669254447744e-06, "loss": 0.2997779846191406, "step": 14657, "token_acc": 0.8904729266620973 }, { "epoch": 0.7909135056385906, "grad_norm": 0.42499250173568726, "learning_rate": 2.206574116332654e-06, "loss": 0.3981530964374542, "step": 14658, "token_acc": 0.8639256619144603 }, { "epoch": 0.7909674634435871, "grad_norm": 0.4106556177139282, "learning_rate": 2.205479216226748e-06, "loss": 0.3203488886356354, "step": 14659, "token_acc": 0.8823875330785064 }, { "epoch": 0.7910214212485837, "grad_norm": 0.4648157060146332, "learning_rate": 2.2043845541634643e-06, "loss": 0.33054500818252563, "step": 14660, "token_acc": 0.8840372226198998 }, { "epoch": 0.79107537905358, "grad_norm": 0.37136366963386536, "learning_rate": 2.203290130176232e-06, "loss": 0.3246299624443054, "step": 14661, "token_acc": 0.8856655290102389 }, { "epoch": 0.7911293368585766, "grad_norm": 0.3334704339504242, "learning_rate": 2.2021959442984796e-06, "loss": 0.33005720376968384, "step": 14662, "token_acc": 0.8855276239170572 }, { "epoch": 0.7911832946635731, "grad_norm": 0.4661419689655304, "learning_rate": 2.2011019965636204e-06, "loss": 0.38146457076072693, "step": 14663, "token_acc": 0.8672531512605042 }, { "epoch": 0.7912372524685696, "grad_norm": 0.40541091561317444, "learning_rate": 2.2000082870050633e-06, "loss": 0.33823931217193604, "step": 14664, "token_acc": 0.8837604198826798 }, { "epoch": 0.7912912102735661, "grad_norm": 0.40167269110679626, "learning_rate": 2.1989148156562103e-06, "loss": 0.3187217712402344, "step": 14665, "token_acc": 0.8877635143092686 }, { "epoch": 0.7913451680785626, "grad_norm": 0.399048775434494, "learning_rate": 2.1978215825504534e-06, "loss": 0.31108278036117554, "step": 14666, "token_acc": 0.8947170756134202 }, { "epoch": 0.7913991258835591, "grad_norm": 0.3909914195537567, "learning_rate": 2.196728587721181e-06, "loss": 0.33859783411026, "step": 14667, "token_acc": 0.8784604996623903 }, { "epoch": 0.7914530836885556, "grad_norm": 0.335599809885025, "learning_rate": 2.1956358312017745e-06, "loss": 0.319259911775589, "step": 14668, "token_acc": 0.8848790583365671 }, { "epoch": 0.791507041493552, "grad_norm": 0.4327020049095154, "learning_rate": 2.1945433130256056e-06, "loss": 0.3515051305294037, "step": 14669, "token_acc": 0.8750865531089876 }, { "epoch": 0.7915609992985485, "grad_norm": 0.4690242409706116, "learning_rate": 2.1934510332260396e-06, "loss": 0.37013739347457886, "step": 14670, "token_acc": 0.8668465322964609 }, { "epoch": 0.791614957103545, "grad_norm": 0.37440675497055054, "learning_rate": 2.192358991836436e-06, "loss": 0.2896987199783325, "step": 14671, "token_acc": 0.8955823293172691 }, { "epoch": 0.7916689149085415, "grad_norm": 0.4723476469516754, "learning_rate": 2.1912671888901406e-06, "loss": 0.32524043321609497, "step": 14672, "token_acc": 0.8821625121398511 }, { "epoch": 0.791722872713538, "grad_norm": 0.35937976837158203, "learning_rate": 2.1901756244204975e-06, "loss": 0.3467048406600952, "step": 14673, "token_acc": 0.8764754779717373 }, { "epoch": 0.7917768305185345, "grad_norm": 0.4389179050922394, "learning_rate": 2.1890842984608473e-06, "loss": 0.27584904432296753, "step": 14674, "token_acc": 0.899528531946025 }, { "epoch": 0.791830788323531, "grad_norm": 0.3761065900325775, "learning_rate": 2.187993211044518e-06, "loss": 0.32642418146133423, "step": 14675, "token_acc": 0.8801724137931034 }, { "epoch": 0.7918847461285274, "grad_norm": 0.3516489863395691, "learning_rate": 2.186902362204828e-06, "loss": 0.31427839398384094, "step": 14676, "token_acc": 0.8853729415563448 }, { "epoch": 0.791938703933524, "grad_norm": 0.34354937076568604, "learning_rate": 2.185811751975093e-06, "loss": 0.2662566304206848, "step": 14677, "token_acc": 0.8997229185908431 }, { "epoch": 0.7919926617385205, "grad_norm": 0.45647311210632324, "learning_rate": 2.18472138038862e-06, "loss": 0.32701075077056885, "step": 14678, "token_acc": 0.8839045051983058 }, { "epoch": 0.792046619543517, "grad_norm": 0.44652286171913147, "learning_rate": 2.18363124747871e-06, "loss": 0.264760285615921, "step": 14679, "token_acc": 0.901538921261019 }, { "epoch": 0.7921005773485135, "grad_norm": 0.4982622563838959, "learning_rate": 2.18254135327865e-06, "loss": 0.3402256369590759, "step": 14680, "token_acc": 0.8744548286604361 }, { "epoch": 0.79215453515351, "grad_norm": 0.48639699816703796, "learning_rate": 2.1814516978217327e-06, "loss": 0.3675565719604492, "step": 14681, "token_acc": 0.8696468820435762 }, { "epoch": 0.7922084929585065, "grad_norm": 0.46583276987075806, "learning_rate": 2.1803622811412315e-06, "loss": 0.31898096203804016, "step": 14682, "token_acc": 0.8817312531454454 }, { "epoch": 0.792262450763503, "grad_norm": 0.4410425126552582, "learning_rate": 2.1792731032704195e-06, "loss": 0.3003619909286499, "step": 14683, "token_acc": 0.8927667510323698 }, { "epoch": 0.7923164085684994, "grad_norm": 0.439048707485199, "learning_rate": 2.1781841642425563e-06, "loss": 0.3380695879459381, "step": 14684, "token_acc": 0.8822307920498369 }, { "epoch": 0.7923703663734959, "grad_norm": 0.39803841710090637, "learning_rate": 2.1770954640909e-06, "loss": 0.3200451731681824, "step": 14685, "token_acc": 0.8808860202435089 }, { "epoch": 0.7924243241784924, "grad_norm": 0.3975375294685364, "learning_rate": 2.1760070028487e-06, "loss": 0.29829126596450806, "step": 14686, "token_acc": 0.8937811535722763 }, { "epoch": 0.7924782819834889, "grad_norm": 0.4662286937236786, "learning_rate": 2.1749187805491955e-06, "loss": 0.2824631631374359, "step": 14687, "token_acc": 0.8934938290415968 }, { "epoch": 0.7925322397884854, "grad_norm": 0.3800852298736572, "learning_rate": 2.173830797225622e-06, "loss": 0.3442395329475403, "step": 14688, "token_acc": 0.8809299403078856 }, { "epoch": 0.7925861975934819, "grad_norm": 0.4059423506259918, "learning_rate": 2.1727430529112057e-06, "loss": 0.3301849961280823, "step": 14689, "token_acc": 0.8846582984658299 }, { "epoch": 0.7926401553984784, "grad_norm": 0.41546517610549927, "learning_rate": 2.1716555476391665e-06, "loss": 0.3524075746536255, "step": 14690, "token_acc": 0.8794875047570722 }, { "epoch": 0.7926941132034749, "grad_norm": 0.4094836711883545, "learning_rate": 2.1705682814427154e-06, "loss": 0.3181185722351074, "step": 14691, "token_acc": 0.8844626967830254 }, { "epoch": 0.7927480710084713, "grad_norm": 0.37299585342407227, "learning_rate": 2.1694812543550594e-06, "loss": 0.3538193702697754, "step": 14692, "token_acc": 0.87264084999301 }, { "epoch": 0.7928020288134678, "grad_norm": 0.3891313970088959, "learning_rate": 2.1683944664093904e-06, "loss": 0.3129233121871948, "step": 14693, "token_acc": 0.8868954758190327 }, { "epoch": 0.7928559866184643, "grad_norm": 0.37937164306640625, "learning_rate": 2.1673079176389057e-06, "loss": 0.36737504601478577, "step": 14694, "token_acc": 0.8694648193425699 }, { "epoch": 0.7929099444234609, "grad_norm": 0.4746962785720825, "learning_rate": 2.166221608076785e-06, "loss": 0.32714682817459106, "step": 14695, "token_acc": 0.8803837517860787 }, { "epoch": 0.7929639022284574, "grad_norm": 0.48957398533821106, "learning_rate": 2.1651355377562046e-06, "loss": 0.33559978008270264, "step": 14696, "token_acc": 0.8768517133024091 }, { "epoch": 0.7930178600334539, "grad_norm": 0.4863241910934448, "learning_rate": 2.164049706710333e-06, "loss": 0.3607625961303711, "step": 14697, "token_acc": 0.8732937208928858 }, { "epoch": 0.7930718178384504, "grad_norm": 0.5222783088684082, "learning_rate": 2.1629641149723302e-06, "loss": 0.3297405242919922, "step": 14698, "token_acc": 0.8807803468208093 }, { "epoch": 0.7931257756434468, "grad_norm": 0.43193522095680237, "learning_rate": 2.161878762575347e-06, "loss": 0.3242797255516052, "step": 14699, "token_acc": 0.8829225352112676 }, { "epoch": 0.7931797334484433, "grad_norm": 0.4057218134403229, "learning_rate": 2.160793649552536e-06, "loss": 0.33088141679763794, "step": 14700, "token_acc": 0.8842504743833017 }, { "epoch": 0.7932336912534398, "grad_norm": 0.3792957067489624, "learning_rate": 2.159708775937034e-06, "loss": 0.30406689643859863, "step": 14701, "token_acc": 0.8851879145173176 }, { "epoch": 0.7932876490584363, "grad_norm": 0.39672479033470154, "learning_rate": 2.1586241417619724e-06, "loss": 0.2906043529510498, "step": 14702, "token_acc": 0.893671917436176 }, { "epoch": 0.7933416068634328, "grad_norm": 0.4570431113243103, "learning_rate": 2.1575397470604743e-06, "loss": 0.23829606175422668, "step": 14703, "token_acc": 0.9073453215483085 }, { "epoch": 0.7933955646684293, "grad_norm": 0.4537872076034546, "learning_rate": 2.1564555918656583e-06, "loss": 0.36523738503456116, "step": 14704, "token_acc": 0.8751847373370952 }, { "epoch": 0.7934495224734258, "grad_norm": 0.2890579104423523, "learning_rate": 2.1553716762106337e-06, "loss": 0.30112484097480774, "step": 14705, "token_acc": 0.8908602952139556 }, { "epoch": 0.7935034802784223, "grad_norm": 0.47872039675712585, "learning_rate": 2.154288000128503e-06, "loss": 0.2748931646347046, "step": 14706, "token_acc": 0.8944591029023746 }, { "epoch": 0.7935574380834187, "grad_norm": 0.417752742767334, "learning_rate": 2.1532045636523625e-06, "loss": 0.364223450422287, "step": 14707, "token_acc": 0.871575772807238 }, { "epoch": 0.7936113958884152, "grad_norm": 0.3584311604499817, "learning_rate": 2.1521213668152975e-06, "loss": 0.31674420833587646, "step": 14708, "token_acc": 0.8837153772683859 }, { "epoch": 0.7936653536934117, "grad_norm": 0.41144412755966187, "learning_rate": 2.151038409650391e-06, "loss": 0.34533876180648804, "step": 14709, "token_acc": 0.8832997987927566 }, { "epoch": 0.7937193114984082, "grad_norm": 0.3827110528945923, "learning_rate": 2.149955692190715e-06, "loss": 0.30415111780166626, "step": 14710, "token_acc": 0.886993312406169 }, { "epoch": 0.7937732693034047, "grad_norm": 0.3905777633190155, "learning_rate": 2.1488732144693358e-06, "loss": 0.331161230802536, "step": 14711, "token_acc": 0.8807002682479176 }, { "epoch": 0.7938272271084013, "grad_norm": 0.41585153341293335, "learning_rate": 2.1477909765193094e-06, "loss": 0.39220473170280457, "step": 14712, "token_acc": 0.863681345868382 }, { "epoch": 0.7938811849133978, "grad_norm": 0.4700179100036621, "learning_rate": 2.1467089783736918e-06, "loss": 0.3293952941894531, "step": 14713, "token_acc": 0.8834980091431942 }, { "epoch": 0.7939351427183943, "grad_norm": 0.3695555627346039, "learning_rate": 2.1456272200655247e-06, "loss": 0.30682432651519775, "step": 14714, "token_acc": 0.8889036016949152 }, { "epoch": 0.7939891005233907, "grad_norm": 0.385009229183197, "learning_rate": 2.144545701627846e-06, "loss": 0.3555351793766022, "step": 14715, "token_acc": 0.8734764175940647 }, { "epoch": 0.7940430583283872, "grad_norm": 0.4958915412425995, "learning_rate": 2.143464423093683e-06, "loss": 0.33355987071990967, "step": 14716, "token_acc": 0.8815020862308762 }, { "epoch": 0.7940970161333837, "grad_norm": 0.33277949690818787, "learning_rate": 2.142383384496057e-06, "loss": 0.32197123765945435, "step": 14717, "token_acc": 0.8862571577303487 }, { "epoch": 0.7941509739383802, "grad_norm": 0.4188440144062042, "learning_rate": 2.141302585867986e-06, "loss": 0.3492254316806793, "step": 14718, "token_acc": 0.8787591438634517 }, { "epoch": 0.7942049317433767, "grad_norm": 0.38027241826057434, "learning_rate": 2.1402220272424744e-06, "loss": 0.3260079324245453, "step": 14719, "token_acc": 0.8860212671688081 }, { "epoch": 0.7942588895483732, "grad_norm": 0.3012998402118683, "learning_rate": 2.1391417086525233e-06, "loss": 0.24916933476924896, "step": 14720, "token_acc": 0.9052401746724891 }, { "epoch": 0.7943128473533697, "grad_norm": 0.4130361080169678, "learning_rate": 2.138061630131125e-06, "loss": 0.26759207248687744, "step": 14721, "token_acc": 0.8996628299034996 }, { "epoch": 0.7943668051583661, "grad_norm": 0.35496222972869873, "learning_rate": 2.1369817917112656e-06, "loss": 0.31631582975387573, "step": 14722, "token_acc": 0.8889002557544757 }, { "epoch": 0.7944207629633626, "grad_norm": 0.2619379162788391, "learning_rate": 2.1359021934259216e-06, "loss": 0.3780982792377472, "step": 14723, "token_acc": 0.8709348441926346 }, { "epoch": 0.7944747207683591, "grad_norm": 0.41702398657798767, "learning_rate": 2.134822835308065e-06, "loss": 0.28173285722732544, "step": 14724, "token_acc": 0.8984755108660396 }, { "epoch": 0.7945286785733556, "grad_norm": 0.48697662353515625, "learning_rate": 2.133743717390656e-06, "loss": 0.34391528367996216, "step": 14725, "token_acc": 0.8811708169506335 }, { "epoch": 0.7945826363783521, "grad_norm": 0.450139582157135, "learning_rate": 2.132664839706655e-06, "loss": 0.3050794303417206, "step": 14726, "token_acc": 0.8938154868387427 }, { "epoch": 0.7946365941833486, "grad_norm": 0.43971192836761475, "learning_rate": 2.13158620228901e-06, "loss": 0.3763355016708374, "step": 14727, "token_acc": 0.8717647058823529 }, { "epoch": 0.7946905519883452, "grad_norm": 0.4661397635936737, "learning_rate": 2.1305078051706606e-06, "loss": 0.3053492307662964, "step": 14728, "token_acc": 0.8872809346787042 }, { "epoch": 0.7947445097933417, "grad_norm": 0.3927309811115265, "learning_rate": 2.129429648384541e-06, "loss": 0.29795756936073303, "step": 14729, "token_acc": 0.8913879846108316 }, { "epoch": 0.794798467598338, "grad_norm": 0.373585969209671, "learning_rate": 2.128351731963578e-06, "loss": 0.3342844545841217, "step": 14730, "token_acc": 0.8799709899673637 }, { "epoch": 0.7948524254033346, "grad_norm": 0.36366745829582214, "learning_rate": 2.127274055940689e-06, "loss": 0.36169999837875366, "step": 14731, "token_acc": 0.8739135757130616 }, { "epoch": 0.7949063832083311, "grad_norm": 0.38318338990211487, "learning_rate": 2.1261966203487905e-06, "loss": 0.3143044710159302, "step": 14732, "token_acc": 0.8884871275672549 }, { "epoch": 0.7949603410133276, "grad_norm": 0.48058855533599854, "learning_rate": 2.125119425220784e-06, "loss": 0.3116752505302429, "step": 14733, "token_acc": 0.8859717868338558 }, { "epoch": 0.7950142988183241, "grad_norm": 0.5338441729545593, "learning_rate": 2.124042470589568e-06, "loss": 0.39798101782798767, "step": 14734, "token_acc": 0.8624875290987696 }, { "epoch": 0.7950682566233206, "grad_norm": 0.5708645582199097, "learning_rate": 2.1229657564880326e-06, "loss": 0.39857998490333557, "step": 14735, "token_acc": 0.8606903014212088 }, { "epoch": 0.7951222144283171, "grad_norm": 0.40972402691841125, "learning_rate": 2.121889282949058e-06, "loss": 0.3473590016365051, "step": 14736, "token_acc": 0.879593318809005 }, { "epoch": 0.7951761722333135, "grad_norm": 0.39155542850494385, "learning_rate": 2.120813050005521e-06, "loss": 0.3413269519805908, "step": 14737, "token_acc": 0.8757790984200609 }, { "epoch": 0.79523013003831, "grad_norm": 0.37265366315841675, "learning_rate": 2.1197370576902898e-06, "loss": 0.33708009123802185, "step": 14738, "token_acc": 0.8796046720575023 }, { "epoch": 0.7952840878433065, "grad_norm": 0.3980378806591034, "learning_rate": 2.118661306036225e-06, "loss": 0.3357927203178406, "step": 14739, "token_acc": 0.8832537057454367 }, { "epoch": 0.795338045648303, "grad_norm": 0.4031502902507782, "learning_rate": 2.117585795076178e-06, "loss": 0.35740408301353455, "step": 14740, "token_acc": 0.8733818271483171 }, { "epoch": 0.7953920034532995, "grad_norm": 0.3467835485935211, "learning_rate": 2.1165105248429965e-06, "loss": 0.3744107186794281, "step": 14741, "token_acc": 0.8717588253670728 }, { "epoch": 0.795445961258296, "grad_norm": 0.4527396559715271, "learning_rate": 2.1154354953695188e-06, "loss": 0.30378448963165283, "step": 14742, "token_acc": 0.8910171730515192 }, { "epoch": 0.7954999190632925, "grad_norm": 0.45955950021743774, "learning_rate": 2.114360706688575e-06, "loss": 0.35690999031066895, "step": 14743, "token_acc": 0.8712969525159462 }, { "epoch": 0.795553876868289, "grad_norm": 0.4853562116622925, "learning_rate": 2.1132861588329867e-06, "loss": 0.3327428698539734, "step": 14744, "token_acc": 0.8796673455532926 }, { "epoch": 0.7956078346732854, "grad_norm": 0.44381022453308105, "learning_rate": 2.112211851835577e-06, "loss": 0.3795318901538849, "step": 14745, "token_acc": 0.8664337543671056 }, { "epoch": 0.795661792478282, "grad_norm": 0.3573128283023834, "learning_rate": 2.1111377857291494e-06, "loss": 0.3454817831516266, "step": 14746, "token_acc": 0.87856977014163 }, { "epoch": 0.7957157502832785, "grad_norm": 0.3881824016571045, "learning_rate": 2.110063960546508e-06, "loss": 0.3341617286205292, "step": 14747, "token_acc": 0.8843582887700535 }, { "epoch": 0.795769708088275, "grad_norm": 0.48472511768341064, "learning_rate": 2.108990376320447e-06, "loss": 0.3167578876018524, "step": 14748, "token_acc": 0.8904871499808209 }, { "epoch": 0.7958236658932715, "grad_norm": 0.4750305712223053, "learning_rate": 2.1079170330837517e-06, "loss": 0.33030280470848083, "step": 14749, "token_acc": 0.8793558938231224 }, { "epoch": 0.795877623698268, "grad_norm": 0.45712172985076904, "learning_rate": 2.1068439308692034e-06, "loss": 0.30139291286468506, "step": 14750, "token_acc": 0.8924770642201835 }, { "epoch": 0.7959315815032645, "grad_norm": 0.5345292091369629, "learning_rate": 2.1057710697095703e-06, "loss": 0.357742041349411, "step": 14751, "token_acc": 0.8734137055837563 }, { "epoch": 0.795985539308261, "grad_norm": 0.3155309557914734, "learning_rate": 2.1046984496376245e-06, "loss": 0.334953635931015, "step": 14752, "token_acc": 0.8804597701149425 }, { "epoch": 0.7960394971132574, "grad_norm": 0.3794238567352295, "learning_rate": 2.1036260706861212e-06, "loss": 0.34435051679611206, "step": 14753, "token_acc": 0.8750694058856191 }, { "epoch": 0.7960934549182539, "grad_norm": 0.39110076427459717, "learning_rate": 2.1025539328878076e-06, "loss": 0.2902657091617584, "step": 14754, "token_acc": 0.8943522606180545 }, { "epoch": 0.7961474127232504, "grad_norm": 0.3991633355617523, "learning_rate": 2.1014820362754273e-06, "loss": 0.30383628606796265, "step": 14755, "token_acc": 0.8871851040525739 }, { "epoch": 0.7962013705282469, "grad_norm": 0.4032036364078522, "learning_rate": 2.1004103808817166e-06, "loss": 0.2968962490558624, "step": 14756, "token_acc": 0.8965069284064665 }, { "epoch": 0.7962553283332434, "grad_norm": 0.4778705835342407, "learning_rate": 2.0993389667394014e-06, "loss": 0.32790863513946533, "step": 14757, "token_acc": 0.8854976303317535 }, { "epoch": 0.7963092861382399, "grad_norm": 0.4396741986274719, "learning_rate": 2.098267793881206e-06, "loss": 0.33214035630226135, "step": 14758, "token_acc": 0.8822373591826042 }, { "epoch": 0.7963632439432364, "grad_norm": 0.4490913450717926, "learning_rate": 2.097196862339843e-06, "loss": 0.3391202688217163, "step": 14759, "token_acc": 0.880294014700735 }, { "epoch": 0.7964172017482328, "grad_norm": 0.37841111421585083, "learning_rate": 2.0961261721480175e-06, "loss": 0.31750184297561646, "step": 14760, "token_acc": 0.8820414058738565 }, { "epoch": 0.7964711595532293, "grad_norm": 0.446813702583313, "learning_rate": 2.0950557233384283e-06, "loss": 0.3172748386859894, "step": 14761, "token_acc": 0.8870618779749027 }, { "epoch": 0.7965251173582258, "grad_norm": 0.48059940338134766, "learning_rate": 2.093985515943766e-06, "loss": 0.38505011796951294, "step": 14762, "token_acc": 0.8743967039434962 }, { "epoch": 0.7965790751632223, "grad_norm": 0.4866516888141632, "learning_rate": 2.0929155499967146e-06, "loss": 0.3065944015979767, "step": 14763, "token_acc": 0.8887251289609432 }, { "epoch": 0.7966330329682189, "grad_norm": 0.4429859519004822, "learning_rate": 2.091845825529949e-06, "loss": 0.3511539101600647, "step": 14764, "token_acc": 0.8730595084087969 }, { "epoch": 0.7966869907732154, "grad_norm": 0.5162789821624756, "learning_rate": 2.090776342576143e-06, "loss": 0.3204088807106018, "step": 14765, "token_acc": 0.8805337852149968 }, { "epoch": 0.7967409485782119, "grad_norm": 0.4460352659225464, "learning_rate": 2.089707101167956e-06, "loss": 0.356832891702652, "step": 14766, "token_acc": 0.8706934467086938 }, { "epoch": 0.7967949063832084, "grad_norm": 0.4187224805355072, "learning_rate": 2.088638101338041e-06, "loss": 0.2714574337005615, "step": 14767, "token_acc": 0.9005617124639441 }, { "epoch": 0.7968488641882048, "grad_norm": 0.38724157214164734, "learning_rate": 2.087569343119046e-06, "loss": 0.3518226742744446, "step": 14768, "token_acc": 0.8719488817891374 }, { "epoch": 0.7969028219932013, "grad_norm": 0.4102611839771271, "learning_rate": 2.0865008265436106e-06, "loss": 0.30071115493774414, "step": 14769, "token_acc": 0.8897387790445952 }, { "epoch": 0.7969567797981978, "grad_norm": 0.4167468547821045, "learning_rate": 2.0854325516443676e-06, "loss": 0.3482699394226074, "step": 14770, "token_acc": 0.8730654761904761 }, { "epoch": 0.7970107376031943, "grad_norm": 0.40434059500694275, "learning_rate": 2.0843645184539395e-06, "loss": 0.26020580530166626, "step": 14771, "token_acc": 0.8987960216367126 }, { "epoch": 0.7970646954081908, "grad_norm": 0.36733904480934143, "learning_rate": 2.0832967270049464e-06, "loss": 0.37041589617729187, "step": 14772, "token_acc": 0.8730953417501088 }, { "epoch": 0.7971186532131873, "grad_norm": 0.35298585891723633, "learning_rate": 2.0822291773299973e-06, "loss": 0.3057948648929596, "step": 14773, "token_acc": 0.8855095541401274 }, { "epoch": 0.7971726110181838, "grad_norm": 0.36119407415390015, "learning_rate": 2.0811618694616954e-06, "loss": 0.3393886387348175, "step": 14774, "token_acc": 0.875516355922742 }, { "epoch": 0.7972265688231803, "grad_norm": 0.5456655025482178, "learning_rate": 2.0800948034326353e-06, "loss": 0.33538317680358887, "step": 14775, "token_acc": 0.8783431180691454 }, { "epoch": 0.7972805266281767, "grad_norm": 0.43153977394104004, "learning_rate": 2.079027979275402e-06, "loss": 0.33785897493362427, "step": 14776, "token_acc": 0.8780611597513006 }, { "epoch": 0.7973344844331732, "grad_norm": 0.43101564049720764, "learning_rate": 2.077961397022582e-06, "loss": 0.38085538148880005, "step": 14777, "token_acc": 0.8635214827295703 }, { "epoch": 0.7973884422381697, "grad_norm": 0.372008740901947, "learning_rate": 2.076895056706746e-06, "loss": 0.3204188346862793, "step": 14778, "token_acc": 0.8802233124682795 }, { "epoch": 0.7974424000431662, "grad_norm": 0.4463326632976532, "learning_rate": 2.0758289583604594e-06, "loss": 0.3044421672821045, "step": 14779, "token_acc": 0.8893475154786474 }, { "epoch": 0.7974963578481628, "grad_norm": 0.4110531508922577, "learning_rate": 2.0747631020162806e-06, "loss": 0.3801524043083191, "step": 14780, "token_acc": 0.8648237733241189 }, { "epoch": 0.7975503156531593, "grad_norm": 0.41364938020706177, "learning_rate": 2.0736974877067597e-06, "loss": 0.35406041145324707, "step": 14781, "token_acc": 0.8760897527511791 }, { "epoch": 0.7976042734581558, "grad_norm": 0.409957617521286, "learning_rate": 2.0726321154644426e-06, "loss": 0.33210116624832153, "step": 14782, "token_acc": 0.8837608935804591 }, { "epoch": 0.7976582312631522, "grad_norm": 0.4120716154575348, "learning_rate": 2.07156698532186e-06, "loss": 0.3102988600730896, "step": 14783, "token_acc": 0.8883181684766657 }, { "epoch": 0.7977121890681487, "grad_norm": 0.4135507643222809, "learning_rate": 2.0705020973115474e-06, "loss": 0.3386549949645996, "step": 14784, "token_acc": 0.8769475357710652 }, { "epoch": 0.7977661468731452, "grad_norm": 0.4314744770526886, "learning_rate": 2.0694374514660245e-06, "loss": 0.34280067682266235, "step": 14785, "token_acc": 0.8819258089976322 }, { "epoch": 0.7978201046781417, "grad_norm": 0.47617119550704956, "learning_rate": 2.0683730478178032e-06, "loss": 0.3207036554813385, "step": 14786, "token_acc": 0.8875160275090337 }, { "epoch": 0.7978740624831382, "grad_norm": 0.4284614026546478, "learning_rate": 2.0673088863993916e-06, "loss": 0.3149285614490509, "step": 14787, "token_acc": 0.8886943471735868 }, { "epoch": 0.7979280202881347, "grad_norm": 0.3867895305156708, "learning_rate": 2.0662449672432915e-06, "loss": 0.3848522901535034, "step": 14788, "token_acc": 0.8626408406127358 }, { "epoch": 0.7979819780931312, "grad_norm": 0.49272218346595764, "learning_rate": 2.0651812903819867e-06, "loss": 0.31392112374305725, "step": 14789, "token_acc": 0.8892168779301962 }, { "epoch": 0.7980359358981277, "grad_norm": 0.43432700634002686, "learning_rate": 2.0641178558479693e-06, "loss": 0.32436662912368774, "step": 14790, "token_acc": 0.8834465498748659 }, { "epoch": 0.7980898937031241, "grad_norm": 0.442140132188797, "learning_rate": 2.0630546636737124e-06, "loss": 0.3341276943683624, "step": 14791, "token_acc": 0.8817379941195688 }, { "epoch": 0.7981438515081206, "grad_norm": 0.537137508392334, "learning_rate": 2.061991713891688e-06, "loss": 0.38729792833328247, "step": 14792, "token_acc": 0.8673376029277219 }, { "epoch": 0.7981978093131171, "grad_norm": 0.47423869371414185, "learning_rate": 2.0609290065343567e-06, "loss": 0.3715191185474396, "step": 14793, "token_acc": 0.8720569210866753 }, { "epoch": 0.7982517671181136, "grad_norm": 0.48192909359931946, "learning_rate": 2.0598665416341744e-06, "loss": 0.3268049359321594, "step": 14794, "token_acc": 0.878204054280449 }, { "epoch": 0.7983057249231101, "grad_norm": 0.4501858353614807, "learning_rate": 2.0588043192235872e-06, "loss": 0.32283127307891846, "step": 14795, "token_acc": 0.888315285822211 }, { "epoch": 0.7983596827281066, "grad_norm": 0.3739301860332489, "learning_rate": 2.057742339335034e-06, "loss": 0.3208025395870209, "step": 14796, "token_acc": 0.8857074967328027 }, { "epoch": 0.7984136405331032, "grad_norm": 0.4110874533653259, "learning_rate": 2.056680602000951e-06, "loss": 0.32321205735206604, "step": 14797, "token_acc": 0.8874222424518282 }, { "epoch": 0.7984675983380997, "grad_norm": 0.46096938848495483, "learning_rate": 2.055619107253761e-06, "loss": 0.3412878215312958, "step": 14798, "token_acc": 0.881767775603392 }, { "epoch": 0.7985215561430961, "grad_norm": 0.46920299530029297, "learning_rate": 2.054557855125884e-06, "loss": 0.4170529842376709, "step": 14799, "token_acc": 0.8532834580216127 }, { "epoch": 0.7985755139480926, "grad_norm": 0.5013335943222046, "learning_rate": 2.0534968456497274e-06, "loss": 0.36671289801597595, "step": 14800, "token_acc": 0.8695173930977589 }, { "epoch": 0.7986294717530891, "grad_norm": 0.3726334273815155, "learning_rate": 2.0524360788576957e-06, "loss": 0.32622140645980835, "step": 14801, "token_acc": 0.8805631786183034 }, { "epoch": 0.7986834295580856, "grad_norm": 0.43452778458595276, "learning_rate": 2.0513755547821845e-06, "loss": 0.3236793875694275, "step": 14802, "token_acc": 0.8851990984222389 }, { "epoch": 0.7987373873630821, "grad_norm": 0.489880234003067, "learning_rate": 2.050315273455582e-06, "loss": 0.3433872163295746, "step": 14803, "token_acc": 0.8767103557539968 }, { "epoch": 0.7987913451680786, "grad_norm": 0.31926488876342773, "learning_rate": 2.049255234910268e-06, "loss": 0.31186872720718384, "step": 14804, "token_acc": 0.8900997414111562 }, { "epoch": 0.7988453029730751, "grad_norm": 0.36305633187294006, "learning_rate": 2.0481954391786153e-06, "loss": 0.3856915235519409, "step": 14805, "token_acc": 0.8628177966101694 }, { "epoch": 0.7988992607780715, "grad_norm": 0.3770224153995514, "learning_rate": 2.0471358862929923e-06, "loss": 0.30773985385894775, "step": 14806, "token_acc": 0.8845931909982689 }, { "epoch": 0.798953218583068, "grad_norm": 0.4581064283847809, "learning_rate": 2.046076576285755e-06, "loss": 0.312669038772583, "step": 14807, "token_acc": 0.8858033858033858 }, { "epoch": 0.7990071763880645, "grad_norm": 0.4133933186531067, "learning_rate": 2.0450175091892566e-06, "loss": 0.3849313259124756, "step": 14808, "token_acc": 0.8700594844679445 }, { "epoch": 0.799061134193061, "grad_norm": 0.4258820414543152, "learning_rate": 2.043958685035835e-06, "loss": 0.3212732672691345, "step": 14809, "token_acc": 0.8845200726912275 }, { "epoch": 0.7991150919980575, "grad_norm": 0.4127119779586792, "learning_rate": 2.0429001038578356e-06, "loss": 0.32864996790885925, "step": 14810, "token_acc": 0.8824119163563449 }, { "epoch": 0.799169049803054, "grad_norm": 0.511441171169281, "learning_rate": 2.041841765687581e-06, "loss": 0.29222196340560913, "step": 14811, "token_acc": 0.8898561695685087 }, { "epoch": 0.7992230076080505, "grad_norm": 0.37051650881767273, "learning_rate": 2.0407836705573945e-06, "loss": 0.2983730435371399, "step": 14812, "token_acc": 0.8905579399141631 }, { "epoch": 0.799276965413047, "grad_norm": 0.3385825455188751, "learning_rate": 2.0397258184995906e-06, "loss": 0.35769009590148926, "step": 14813, "token_acc": 0.8713516870213206 }, { "epoch": 0.7993309232180434, "grad_norm": 0.42147859930992126, "learning_rate": 2.0386682095464737e-06, "loss": 0.29622453451156616, "step": 14814, "token_acc": 0.8913043478260869 }, { "epoch": 0.79938488102304, "grad_norm": 0.45831966400146484, "learning_rate": 2.037610843730342e-06, "loss": 0.34934741258621216, "step": 14815, "token_acc": 0.8766079891672309 }, { "epoch": 0.7994388388280365, "grad_norm": 0.4152085781097412, "learning_rate": 2.0365537210834927e-06, "loss": 0.34465497732162476, "step": 14816, "token_acc": 0.8728089236563813 }, { "epoch": 0.799492796633033, "grad_norm": 0.5183832049369812, "learning_rate": 2.035496841638206e-06, "loss": 0.34752771258354187, "step": 14817, "token_acc": 0.8780419186946125 }, { "epoch": 0.7995467544380295, "grad_norm": 0.4059840440750122, "learning_rate": 2.0344402054267585e-06, "loss": 0.30467137694358826, "step": 14818, "token_acc": 0.8888139252462556 }, { "epoch": 0.799600712243026, "grad_norm": 0.44170817732810974, "learning_rate": 2.033383812481421e-06, "loss": 0.32874923944473267, "step": 14819, "token_acc": 0.8809307162355388 }, { "epoch": 0.7996546700480225, "grad_norm": 0.44610631465911865, "learning_rate": 2.0323276628344555e-06, "loss": 0.32528042793273926, "step": 14820, "token_acc": 0.88298041713103 }, { "epoch": 0.799708627853019, "grad_norm": 0.4575169086456299, "learning_rate": 2.031271756518115e-06, "loss": 0.3154187798500061, "step": 14821, "token_acc": 0.8877816291161178 }, { "epoch": 0.7997625856580154, "grad_norm": 0.42596903443336487, "learning_rate": 2.0302160935646486e-06, "loss": 0.3480583131313324, "step": 14822, "token_acc": 0.8748221906116643 }, { "epoch": 0.7998165434630119, "grad_norm": 0.4509059190750122, "learning_rate": 2.029160674006294e-06, "loss": 0.30418121814727783, "step": 14823, "token_acc": 0.8867609059127527 }, { "epoch": 0.7998705012680084, "grad_norm": 0.5222554802894592, "learning_rate": 2.0281054978752845e-06, "loss": 0.339300274848938, "step": 14824, "token_acc": 0.8765868552145267 }, { "epoch": 0.7999244590730049, "grad_norm": 0.5240700244903564, "learning_rate": 2.0270505652038454e-06, "loss": 0.3568987250328064, "step": 14825, "token_acc": 0.8701226309921962 }, { "epoch": 0.7999784168780014, "grad_norm": 0.5129287242889404, "learning_rate": 2.0259958760241926e-06, "loss": 0.35483795404434204, "step": 14826, "token_acc": 0.8793005402331533 }, { "epoch": 0.8000323746829979, "grad_norm": 0.31293627619743347, "learning_rate": 2.0249414303685364e-06, "loss": 0.303474485874176, "step": 14827, "token_acc": 0.8912616876370772 }, { "epoch": 0.8000863324879944, "grad_norm": 0.4263627827167511, "learning_rate": 2.023887228269078e-06, "loss": 0.2519059479236603, "step": 14828, "token_acc": 0.908029197080292 }, { "epoch": 0.8001402902929908, "grad_norm": 0.4229590594768524, "learning_rate": 2.0228332697580168e-06, "loss": 0.36108049750328064, "step": 14829, "token_acc": 0.8759219605044016 }, { "epoch": 0.8001942480979873, "grad_norm": 0.4695374071598053, "learning_rate": 2.021779554867538e-06, "loss": 0.366197407245636, "step": 14830, "token_acc": 0.8730628346012962 }, { "epoch": 0.8002482059029838, "grad_norm": 0.4939727187156677, "learning_rate": 2.0207260836298205e-06, "loss": 0.34634557366371155, "step": 14831, "token_acc": 0.876238162621095 }, { "epoch": 0.8003021637079804, "grad_norm": 0.39958587288856506, "learning_rate": 2.0196728560770395e-06, "loss": 0.3112468421459198, "step": 14832, "token_acc": 0.8882151331399947 }, { "epoch": 0.8003561215129769, "grad_norm": 0.42066317796707153, "learning_rate": 2.0186198722413575e-06, "loss": 0.3502270579338074, "step": 14833, "token_acc": 0.8761792452830188 }, { "epoch": 0.8004100793179734, "grad_norm": 0.3223743736743927, "learning_rate": 2.0175671321549327e-06, "loss": 0.3712943196296692, "step": 14834, "token_acc": 0.871671699149053 }, { "epoch": 0.8004640371229699, "grad_norm": 0.4910675585269928, "learning_rate": 2.016514635849918e-06, "loss": 0.3251246213912964, "step": 14835, "token_acc": 0.8822100789313904 }, { "epoch": 0.8005179949279664, "grad_norm": 0.49765801429748535, "learning_rate": 2.0154623833584585e-06, "loss": 0.37945687770843506, "step": 14836, "token_acc": 0.8700014755791649 }, { "epoch": 0.8005719527329628, "grad_norm": 0.37915652990341187, "learning_rate": 2.0144103747126832e-06, "loss": 0.33721983432769775, "step": 14837, "token_acc": 0.8757733234347934 }, { "epoch": 0.8006259105379593, "grad_norm": 0.43098002672195435, "learning_rate": 2.0133586099447223e-06, "loss": 0.35072606801986694, "step": 14838, "token_acc": 0.8747326203208556 }, { "epoch": 0.8006798683429558, "grad_norm": 0.33638128638267517, "learning_rate": 2.012307089086698e-06, "loss": 0.36941102147102356, "step": 14839, "token_acc": 0.8682801695293331 }, { "epoch": 0.8007338261479523, "grad_norm": 0.40518221259117126, "learning_rate": 2.011255812170723e-06, "loss": 0.3495575189590454, "step": 14840, "token_acc": 0.8777072138088259 }, { "epoch": 0.8007877839529488, "grad_norm": 0.4427282512187958, "learning_rate": 2.010204779228899e-06, "loss": 0.324346125125885, "step": 14841, "token_acc": 0.8841292134831461 }, { "epoch": 0.8008417417579453, "grad_norm": 0.41058778762817383, "learning_rate": 2.009153990293331e-06, "loss": 0.2788431644439697, "step": 14842, "token_acc": 0.9008485931219294 }, { "epoch": 0.8008956995629418, "grad_norm": 0.36301368474960327, "learning_rate": 2.008103445396107e-06, "loss": 0.2782543897628784, "step": 14843, "token_acc": 0.8943779360509168 }, { "epoch": 0.8009496573679382, "grad_norm": 0.38182997703552246, "learning_rate": 2.007053144569311e-06, "loss": 0.3078688979148865, "step": 14844, "token_acc": 0.8914041994750657 }, { "epoch": 0.8010036151729347, "grad_norm": 0.3932867646217346, "learning_rate": 2.0060030878450164e-06, "loss": 0.36061781644821167, "step": 14845, "token_acc": 0.8710280373831776 }, { "epoch": 0.8010575729779312, "grad_norm": 0.399198979139328, "learning_rate": 2.0049532752552948e-06, "loss": 0.3228704035282135, "step": 14846, "token_acc": 0.8892485223754574 }, { "epoch": 0.8011115307829277, "grad_norm": 0.4435458481311798, "learning_rate": 2.003903706832203e-06, "loss": 0.3388463854789734, "step": 14847, "token_acc": 0.8811723938781404 }, { "epoch": 0.8011654885879242, "grad_norm": 0.5316239595413208, "learning_rate": 2.0028543826077993e-06, "loss": 0.3635460436344147, "step": 14848, "token_acc": 0.8715368711233632 }, { "epoch": 0.8012194463929208, "grad_norm": 0.41749241948127747, "learning_rate": 2.001805302614128e-06, "loss": 0.3024013042449951, "step": 14849, "token_acc": 0.8928380545163015 }, { "epoch": 0.8012734041979173, "grad_norm": 0.4408234655857086, "learning_rate": 2.000756466883228e-06, "loss": 0.36779600381851196, "step": 14850, "token_acc": 0.8723945902943516 }, { "epoch": 0.8013273620029138, "grad_norm": 0.2948266565799713, "learning_rate": 1.9997078754471288e-06, "loss": 0.3299853801727295, "step": 14851, "token_acc": 0.8819075237634928 }, { "epoch": 0.8013813198079102, "grad_norm": 0.4566614329814911, "learning_rate": 1.998659528337856e-06, "loss": 0.3147226870059967, "step": 14852, "token_acc": 0.889493071390984 }, { "epoch": 0.8014352776129067, "grad_norm": 0.4335005581378937, "learning_rate": 1.997611425587426e-06, "loss": 0.2890278100967407, "step": 14853, "token_acc": 0.8907273354015857 }, { "epoch": 0.8014892354179032, "grad_norm": 0.40785956382751465, "learning_rate": 1.996563567227846e-06, "loss": 0.31787213683128357, "step": 14854, "token_acc": 0.8883564305141821 }, { "epoch": 0.8015431932228997, "grad_norm": 0.3587907552719116, "learning_rate": 1.995515953291117e-06, "loss": 0.30251264572143555, "step": 14855, "token_acc": 0.8930675692616272 }, { "epoch": 0.8015971510278962, "grad_norm": 0.4836280345916748, "learning_rate": 1.994468583809236e-06, "loss": 0.33421456813812256, "step": 14856, "token_acc": 0.8817572598659718 }, { "epoch": 0.8016511088328927, "grad_norm": 0.37022706866264343, "learning_rate": 1.9934214588141854e-06, "loss": 0.3466840386390686, "step": 14857, "token_acc": 0.8783259794090119 }, { "epoch": 0.8017050666378892, "grad_norm": 0.36712613701820374, "learning_rate": 1.9923745783379467e-06, "loss": 0.295539915561676, "step": 14858, "token_acc": 0.8937982005141388 }, { "epoch": 0.8017590244428857, "grad_norm": 0.35231444239616394, "learning_rate": 1.991327942412491e-06, "loss": 0.31024158000946045, "step": 14859, "token_acc": 0.8896977718949923 }, { "epoch": 0.8018129822478821, "grad_norm": 0.47182995080947876, "learning_rate": 1.9902815510697793e-06, "loss": 0.3294377326965332, "step": 14860, "token_acc": 0.8809931506849316 }, { "epoch": 0.8018669400528786, "grad_norm": 0.40518856048583984, "learning_rate": 1.9892354043417728e-06, "loss": 0.3600415587425232, "step": 14861, "token_acc": 0.8699908628116434 }, { "epoch": 0.8019208978578751, "grad_norm": 0.4314590394496918, "learning_rate": 1.988189502260419e-06, "loss": 0.3400196135044098, "step": 14862, "token_acc": 0.877602844083291 }, { "epoch": 0.8019748556628716, "grad_norm": 0.38263359665870667, "learning_rate": 1.9871438448576584e-06, "loss": 0.2668796479701996, "step": 14863, "token_acc": 0.8992459489812289 }, { "epoch": 0.8020288134678681, "grad_norm": 0.43879470229148865, "learning_rate": 1.9860984321654265e-06, "loss": 0.38326048851013184, "step": 14864, "token_acc": 0.8683414441667763 }, { "epoch": 0.8020827712728646, "grad_norm": 0.43372946977615356, "learning_rate": 1.9850532642156484e-06, "loss": 0.2564898133277893, "step": 14865, "token_acc": 0.9075172303253727 }, { "epoch": 0.8021367290778612, "grad_norm": 0.32066357135772705, "learning_rate": 1.9840083410402443e-06, "loss": 0.2877976894378662, "step": 14866, "token_acc": 0.8946376993595379 }, { "epoch": 0.8021906868828576, "grad_norm": 0.49713796377182007, "learning_rate": 1.9829636626711237e-06, "loss": 0.31264370679855347, "step": 14867, "token_acc": 0.8818617871735654 }, { "epoch": 0.8022446446878541, "grad_norm": 0.5185738205909729, "learning_rate": 1.9819192291401943e-06, "loss": 0.35968223214149475, "step": 14868, "token_acc": 0.8684660061496413 }, { "epoch": 0.8022986024928506, "grad_norm": 0.48168492317199707, "learning_rate": 1.980875040479351e-06, "loss": 0.3572049140930176, "step": 14869, "token_acc": 0.8787066809574848 }, { "epoch": 0.8023525602978471, "grad_norm": 0.3310542404651642, "learning_rate": 1.979831096720487e-06, "loss": 0.2836768627166748, "step": 14870, "token_acc": 0.8967912669533576 }, { "epoch": 0.8024065181028436, "grad_norm": 0.35119643807411194, "learning_rate": 1.9787873978954764e-06, "loss": 0.2882757782936096, "step": 14871, "token_acc": 0.8885448916408669 }, { "epoch": 0.8024604759078401, "grad_norm": 0.4515225291252136, "learning_rate": 1.977743944036198e-06, "loss": 0.3365689516067505, "step": 14872, "token_acc": 0.8782281511886437 }, { "epoch": 0.8025144337128366, "grad_norm": 0.48962271213531494, "learning_rate": 1.976700735174516e-06, "loss": 0.3344951272010803, "step": 14873, "token_acc": 0.87961841308298 }, { "epoch": 0.8025683915178331, "grad_norm": 0.4681064188480377, "learning_rate": 1.975657771342293e-06, "loss": 0.26929521560668945, "step": 14874, "token_acc": 0.90026395173454 }, { "epoch": 0.8026223493228295, "grad_norm": 0.5701745748519897, "learning_rate": 1.97461505257138e-06, "loss": 0.3746910095214844, "step": 14875, "token_acc": 0.8694228407695457 }, { "epoch": 0.802676307127826, "grad_norm": 0.4017907679080963, "learning_rate": 1.973572578893622e-06, "loss": 0.2426413893699646, "step": 14876, "token_acc": 0.9031598513011152 }, { "epoch": 0.8027302649328225, "grad_norm": 0.41186803579330444, "learning_rate": 1.9725303503408545e-06, "loss": 0.3144713044166565, "step": 14877, "token_acc": 0.8929330123212009 }, { "epoch": 0.802784222737819, "grad_norm": 0.4091362953186035, "learning_rate": 1.971488366944906e-06, "loss": 0.29124632477760315, "step": 14878, "token_acc": 0.8912670101730744 }, { "epoch": 0.8028381805428155, "grad_norm": 0.5021199584007263, "learning_rate": 1.9704466287376013e-06, "loss": 0.34349173307418823, "step": 14879, "token_acc": 0.8748649899706835 }, { "epoch": 0.802892138347812, "grad_norm": 0.5040676593780518, "learning_rate": 1.9694051357507493e-06, "loss": 0.3304847776889801, "step": 14880, "token_acc": 0.8817608173076923 }, { "epoch": 0.8029460961528085, "grad_norm": 0.33166518807411194, "learning_rate": 1.9683638880161626e-06, "loss": 0.29881811141967773, "step": 14881, "token_acc": 0.8940265718171356 }, { "epoch": 0.803000053957805, "grad_norm": 0.41095930337905884, "learning_rate": 1.9673228855656402e-06, "loss": 0.3642093539237976, "step": 14882, "token_acc": 0.8761333948055939 }, { "epoch": 0.8030540117628014, "grad_norm": 0.4356830418109894, "learning_rate": 1.9662821284309717e-06, "loss": 0.28753477334976196, "step": 14883, "token_acc": 0.891924606605568 }, { "epoch": 0.803107969567798, "grad_norm": 0.37159091234207153, "learning_rate": 1.9652416166439437e-06, "loss": 0.2690703868865967, "step": 14884, "token_acc": 0.8960826094082938 }, { "epoch": 0.8031619273727945, "grad_norm": 0.3563636839389801, "learning_rate": 1.964201350236331e-06, "loss": 0.32541516423225403, "step": 14885, "token_acc": 0.8843918191603876 }, { "epoch": 0.803215885177791, "grad_norm": 0.3044188320636749, "learning_rate": 1.9631613292399034e-06, "loss": 0.3140847980976105, "step": 14886, "token_acc": 0.8859237889169371 }, { "epoch": 0.8032698429827875, "grad_norm": 0.4884548485279083, "learning_rate": 1.9621215536864245e-06, "loss": 0.34250056743621826, "step": 14887, "token_acc": 0.8800870875900184 }, { "epoch": 0.803323800787784, "grad_norm": 0.3840981423854828, "learning_rate": 1.9610820236076465e-06, "loss": 0.3408389687538147, "step": 14888, "token_acc": 0.876001526135063 }, { "epoch": 0.8033777585927805, "grad_norm": 0.4857804775238037, "learning_rate": 1.9600427390353196e-06, "loss": 0.3645539879798889, "step": 14889, "token_acc": 0.8751445086705202 }, { "epoch": 0.8034317163977769, "grad_norm": 0.4166593849658966, "learning_rate": 1.9590037000011796e-06, "loss": 0.33146384358406067, "step": 14890, "token_acc": 0.8845957011258956 }, { "epoch": 0.8034856742027734, "grad_norm": 0.407906711101532, "learning_rate": 1.95796490653696e-06, "loss": 0.3479660749435425, "step": 14891, "token_acc": 0.8752807306483007 }, { "epoch": 0.8035396320077699, "grad_norm": 0.49003294110298157, "learning_rate": 1.9569263586743835e-06, "loss": 0.3121822476387024, "step": 14892, "token_acc": 0.8910256410256411 }, { "epoch": 0.8035935898127664, "grad_norm": 0.34574615955352783, "learning_rate": 1.955888056445172e-06, "loss": 0.35149309039115906, "step": 14893, "token_acc": 0.8735967184801382 }, { "epoch": 0.8036475476177629, "grad_norm": 0.48156487941741943, "learning_rate": 1.9548499998810313e-06, "loss": 0.29586195945739746, "step": 14894, "token_acc": 0.8923604112074178 }, { "epoch": 0.8037015054227594, "grad_norm": 0.31594613194465637, "learning_rate": 1.9538121890136642e-06, "loss": 0.3446184992790222, "step": 14895, "token_acc": 0.8758952129664531 }, { "epoch": 0.8037554632277559, "grad_norm": 0.511445164680481, "learning_rate": 1.9527746238747645e-06, "loss": 0.3175617754459381, "step": 14896, "token_acc": 0.8861361771944216 }, { "epoch": 0.8038094210327524, "grad_norm": 0.40356191992759705, "learning_rate": 1.951737304496021e-06, "loss": 0.35612279176712036, "step": 14897, "token_acc": 0.8770822555613037 }, { "epoch": 0.8038633788377488, "grad_norm": 0.24726493656635284, "learning_rate": 1.9507002309091116e-06, "loss": 0.27405282855033875, "step": 14898, "token_acc": 0.8971825474466836 }, { "epoch": 0.8039173366427453, "grad_norm": 0.40099385380744934, "learning_rate": 1.949663403145705e-06, "loss": 0.24686215817928314, "step": 14899, "token_acc": 0.9085233047771163 }, { "epoch": 0.8039712944477418, "grad_norm": 0.46723487973213196, "learning_rate": 1.9486268212374725e-06, "loss": 0.3231518864631653, "step": 14900, "token_acc": 0.880449362843729 }, { "epoch": 0.8040252522527384, "grad_norm": 0.4420681893825531, "learning_rate": 1.947590485216068e-06, "loss": 0.3429282307624817, "step": 14901, "token_acc": 0.874874749498998 }, { "epoch": 0.8040792100577349, "grad_norm": 0.3884854018688202, "learning_rate": 1.9465543951131394e-06, "loss": 0.37363579869270325, "step": 14902, "token_acc": 0.8689490987416393 }, { "epoch": 0.8041331678627314, "grad_norm": 0.4271909296512604, "learning_rate": 1.9455185509603304e-06, "loss": 0.31494849920272827, "step": 14903, "token_acc": 0.8853427895981087 }, { "epoch": 0.8041871256677279, "grad_norm": 0.3733815550804138, "learning_rate": 1.9444829527892752e-06, "loss": 0.32184505462646484, "step": 14904, "token_acc": 0.8853006681514477 }, { "epoch": 0.8042410834727244, "grad_norm": 0.4094294309616089, "learning_rate": 1.9434476006316005e-06, "loss": 0.37800896167755127, "step": 14905, "token_acc": 0.8670274449168921 }, { "epoch": 0.8042950412777208, "grad_norm": 0.4121353030204773, "learning_rate": 1.942412494518925e-06, "loss": 0.35529792308807373, "step": 14906, "token_acc": 0.8742236024844721 }, { "epoch": 0.8043489990827173, "grad_norm": 0.36723509430885315, "learning_rate": 1.941377634482862e-06, "loss": 0.3086538314819336, "step": 14907, "token_acc": 0.8919947792038286 }, { "epoch": 0.8044029568877138, "grad_norm": 0.3043724000453949, "learning_rate": 1.9403430205550133e-06, "loss": 0.3201393187046051, "step": 14908, "token_acc": 0.8857758620689655 }, { "epoch": 0.8044569146927103, "grad_norm": 0.4539596140384674, "learning_rate": 1.9393086527669793e-06, "loss": 0.3427223563194275, "step": 14909, "token_acc": 0.8754295532646048 }, { "epoch": 0.8045108724977068, "grad_norm": 0.41372647881507874, "learning_rate": 1.938274531150346e-06, "loss": 0.3429340720176697, "step": 14910, "token_acc": 0.8761307265830172 }, { "epoch": 0.8045648303027033, "grad_norm": 0.48199793696403503, "learning_rate": 1.9372406557366972e-06, "loss": 0.3421911895275116, "step": 14911, "token_acc": 0.8786293958521191 }, { "epoch": 0.8046187881076998, "grad_norm": 0.3385014832019806, "learning_rate": 1.936207026557604e-06, "loss": 0.3293512463569641, "step": 14912, "token_acc": 0.8847587974831042 }, { "epoch": 0.8046727459126962, "grad_norm": 0.39906957745552063, "learning_rate": 1.9351736436446377e-06, "loss": 0.2719764709472656, "step": 14913, "token_acc": 0.8990335707019329 }, { "epoch": 0.8047267037176927, "grad_norm": 0.38210976123809814, "learning_rate": 1.9341405070293572e-06, "loss": 0.31081515550613403, "step": 14914, "token_acc": 0.8839023287361519 }, { "epoch": 0.8047806615226892, "grad_norm": 0.48206794261932373, "learning_rate": 1.9331076167433115e-06, "loss": 0.3182489573955536, "step": 14915, "token_acc": 0.8895037486611924 }, { "epoch": 0.8048346193276857, "grad_norm": 0.4313517212867737, "learning_rate": 1.9320749728180466e-06, "loss": 0.322187602519989, "step": 14916, "token_acc": 0.8826276463262764 }, { "epoch": 0.8048885771326822, "grad_norm": 0.43794965744018555, "learning_rate": 1.931042575285098e-06, "loss": 0.3354625105857849, "step": 14917, "token_acc": 0.8746798390047567 }, { "epoch": 0.8049425349376788, "grad_norm": 0.4403890371322632, "learning_rate": 1.9300104241759955e-06, "loss": 0.3402627408504486, "step": 14918, "token_acc": 0.8787351778656126 }, { "epoch": 0.8049964927426753, "grad_norm": 0.4887220859527588, "learning_rate": 1.92897851952226e-06, "loss": 0.3619915843009949, "step": 14919, "token_acc": 0.8711522287636669 }, { "epoch": 0.8050504505476718, "grad_norm": 0.3646475076675415, "learning_rate": 1.9279468613554077e-06, "loss": 0.334913432598114, "step": 14920, "token_acc": 0.8836223026069602 }, { "epoch": 0.8051044083526682, "grad_norm": 0.3701416850090027, "learning_rate": 1.926915449706942e-06, "loss": 0.3438810706138611, "step": 14921, "token_acc": 0.8775771136327314 }, { "epoch": 0.8051583661576647, "grad_norm": 0.46705901622772217, "learning_rate": 1.9258842846083647e-06, "loss": 0.3920503258705139, "step": 14922, "token_acc": 0.8680096696212731 }, { "epoch": 0.8052123239626612, "grad_norm": 0.48948636651039124, "learning_rate": 1.924853366091166e-06, "loss": 0.32902535796165466, "step": 14923, "token_acc": 0.8848837209302326 }, { "epoch": 0.8052662817676577, "grad_norm": 0.5250768065452576, "learning_rate": 1.9238226941868297e-06, "loss": 0.357693076133728, "step": 14924, "token_acc": 0.8746011486917677 }, { "epoch": 0.8053202395726542, "grad_norm": 0.47261613607406616, "learning_rate": 1.9227922689268306e-06, "loss": 0.3145560324192047, "step": 14925, "token_acc": 0.8875491480996068 }, { "epoch": 0.8053741973776507, "grad_norm": 0.3365309536457062, "learning_rate": 1.9217620903426416e-06, "loss": 0.30107632279396057, "step": 14926, "token_acc": 0.885495347564313 }, { "epoch": 0.8054281551826472, "grad_norm": 0.444763720035553, "learning_rate": 1.9207321584657225e-06, "loss": 0.2708508372306824, "step": 14927, "token_acc": 0.9039957567185289 }, { "epoch": 0.8054821129876437, "grad_norm": 0.49507999420166016, "learning_rate": 1.9197024733275273e-06, "loss": 0.34579789638519287, "step": 14928, "token_acc": 0.8768760017485064 }, { "epoch": 0.8055360707926401, "grad_norm": 0.4036414921283722, "learning_rate": 1.9186730349595005e-06, "loss": 0.3201526999473572, "step": 14929, "token_acc": 0.8874279123414072 }, { "epoch": 0.8055900285976366, "grad_norm": 0.44527706503868103, "learning_rate": 1.9176438433930834e-06, "loss": 0.3643597364425659, "step": 14930, "token_acc": 0.8684016242155776 }, { "epoch": 0.8056439864026331, "grad_norm": 0.35620492696762085, "learning_rate": 1.916614898659703e-06, "loss": 0.29260149598121643, "step": 14931, "token_acc": 0.8930624760444614 }, { "epoch": 0.8056979442076296, "grad_norm": 0.44490349292755127, "learning_rate": 1.915586200790789e-06, "loss": 0.35269874334335327, "step": 14932, "token_acc": 0.877318619394224 }, { "epoch": 0.8057519020126261, "grad_norm": 0.4711901843547821, "learning_rate": 1.9145577498177546e-06, "loss": 0.3226243257522583, "step": 14933, "token_acc": 0.8845714285714286 }, { "epoch": 0.8058058598176226, "grad_norm": 0.462465763092041, "learning_rate": 1.913529545772008e-06, "loss": 0.3055524230003357, "step": 14934, "token_acc": 0.8876369327073552 }, { "epoch": 0.8058598176226192, "grad_norm": 0.3668990433216095, "learning_rate": 1.912501588684951e-06, "loss": 0.3213540017604828, "step": 14935, "token_acc": 0.8841496475691307 }, { "epoch": 0.8059137754276156, "grad_norm": 0.46949824690818787, "learning_rate": 1.911473878587976e-06, "loss": 0.3427826762199402, "step": 14936, "token_acc": 0.8789818181818182 }, { "epoch": 0.8059677332326121, "grad_norm": 0.44998878240585327, "learning_rate": 1.9104464155124713e-06, "loss": 0.33227622509002686, "step": 14937, "token_acc": 0.8762636473918318 }, { "epoch": 0.8060216910376086, "grad_norm": 0.4330138862133026, "learning_rate": 1.9094191994898126e-06, "loss": 0.3452596664428711, "step": 14938, "token_acc": 0.8782305005820722 }, { "epoch": 0.8060756488426051, "grad_norm": 0.527779221534729, "learning_rate": 1.9083922305513725e-06, "loss": 0.3237922787666321, "step": 14939, "token_acc": 0.8765571913929785 }, { "epoch": 0.8061296066476016, "grad_norm": 0.41285377740859985, "learning_rate": 1.907365508728515e-06, "loss": 0.3690347969532013, "step": 14940, "token_acc": 0.8709148092438124 }, { "epoch": 0.8061835644525981, "grad_norm": 0.5513404607772827, "learning_rate": 1.9063390340525933e-06, "loss": 0.34814366698265076, "step": 14941, "token_acc": 0.8781566820276497 }, { "epoch": 0.8062375222575946, "grad_norm": 0.42753294110298157, "learning_rate": 1.9053128065549576e-06, "loss": 0.316793829202652, "step": 14942, "token_acc": 0.8853448275862069 }, { "epoch": 0.8062914800625911, "grad_norm": 0.5200305581092834, "learning_rate": 1.904286826266949e-06, "loss": 0.32334843277931213, "step": 14943, "token_acc": 0.8851536355339215 }, { "epoch": 0.8063454378675875, "grad_norm": 0.5000528693199158, "learning_rate": 1.903261093219897e-06, "loss": 0.32196998596191406, "step": 14944, "token_acc": 0.8841030195381883 }, { "epoch": 0.806399395672584, "grad_norm": 0.5605104565620422, "learning_rate": 1.902235607445133e-06, "loss": 0.36583325266838074, "step": 14945, "token_acc": 0.8749846794950361 }, { "epoch": 0.8064533534775805, "grad_norm": 0.3747802674770355, "learning_rate": 1.9012103689739714e-06, "loss": 0.32439738512039185, "step": 14946, "token_acc": 0.8855544946119219 }, { "epoch": 0.806507311282577, "grad_norm": 0.48143720626831055, "learning_rate": 1.9001853778377243e-06, "loss": 0.2949677109718323, "step": 14947, "token_acc": 0.8967060508413892 }, { "epoch": 0.8065612690875735, "grad_norm": 0.41954734921455383, "learning_rate": 1.8991606340676937e-06, "loss": 0.308408260345459, "step": 14948, "token_acc": 0.8880452582998363 }, { "epoch": 0.80661522689257, "grad_norm": 0.39516881108283997, "learning_rate": 1.8981361376951745e-06, "loss": 0.3818962574005127, "step": 14949, "token_acc": 0.8730434782608696 }, { "epoch": 0.8066691846975665, "grad_norm": 0.5337769389152527, "learning_rate": 1.897111888751454e-06, "loss": 0.33448874950408936, "step": 14950, "token_acc": 0.8825527097778406 }, { "epoch": 0.806723142502563, "grad_norm": 0.351645827293396, "learning_rate": 1.8960878872678157e-06, "loss": 0.3341350257396698, "step": 14951, "token_acc": 0.8822096756337873 }, { "epoch": 0.8067771003075594, "grad_norm": 0.3590644299983978, "learning_rate": 1.8950641332755327e-06, "loss": 0.31501734256744385, "step": 14952, "token_acc": 0.8858151854031784 }, { "epoch": 0.806831058112556, "grad_norm": 0.3742680251598358, "learning_rate": 1.8940406268058652e-06, "loss": 0.3178250193595886, "step": 14953, "token_acc": 0.887246566383257 }, { "epoch": 0.8068850159175525, "grad_norm": 0.47070950269699097, "learning_rate": 1.8930173678900742e-06, "loss": 0.40028858184814453, "step": 14954, "token_acc": 0.8599851705388037 }, { "epoch": 0.806938973722549, "grad_norm": 0.34671497344970703, "learning_rate": 1.891994356559409e-06, "loss": 0.34074872732162476, "step": 14955, "token_acc": 0.8812199036918138 }, { "epoch": 0.8069929315275455, "grad_norm": 0.3464333415031433, "learning_rate": 1.890971592845111e-06, "loss": 0.3533543348312378, "step": 14956, "token_acc": 0.8761788004526594 }, { "epoch": 0.807046889332542, "grad_norm": 0.4135311245918274, "learning_rate": 1.8899490767784146e-06, "loss": 0.3034108877182007, "step": 14957, "token_acc": 0.8904741921947126 }, { "epoch": 0.8071008471375385, "grad_norm": 0.49965766072273254, "learning_rate": 1.8889268083905522e-06, "loss": 0.36665305495262146, "step": 14958, "token_acc": 0.8712215320910973 }, { "epoch": 0.8071548049425349, "grad_norm": 0.44890889525413513, "learning_rate": 1.8879047877127387e-06, "loss": 0.3395710587501526, "step": 14959, "token_acc": 0.8782903309877509 }, { "epoch": 0.8072087627475314, "grad_norm": 0.43339425325393677, "learning_rate": 1.8868830147761885e-06, "loss": 0.27020397782325745, "step": 14960, "token_acc": 0.8977293369663942 }, { "epoch": 0.8072627205525279, "grad_norm": 0.5169813632965088, "learning_rate": 1.8858614896121064e-06, "loss": 0.3287794589996338, "step": 14961, "token_acc": 0.8787383559405132 }, { "epoch": 0.8073166783575244, "grad_norm": 0.3225589692592621, "learning_rate": 1.8848402122516873e-06, "loss": 0.32173827290534973, "step": 14962, "token_acc": 0.8848080133555927 }, { "epoch": 0.8073706361625209, "grad_norm": 0.38884973526000977, "learning_rate": 1.8838191827261209e-06, "loss": 0.3419981896877289, "step": 14963, "token_acc": 0.8761061946902655 }, { "epoch": 0.8074245939675174, "grad_norm": 0.41754162311553955, "learning_rate": 1.8827984010665923e-06, "loss": 0.3484744429588318, "step": 14964, "token_acc": 0.8739156268568034 }, { "epoch": 0.8074785517725139, "grad_norm": 0.37710365653038025, "learning_rate": 1.8817778673042752e-06, "loss": 0.2899667024612427, "step": 14965, "token_acc": 0.9024032494640641 }, { "epoch": 0.8075325095775104, "grad_norm": 0.38759663701057434, "learning_rate": 1.880757581470334e-06, "loss": 0.31273382902145386, "step": 14966, "token_acc": 0.889031355635855 }, { "epoch": 0.8075864673825068, "grad_norm": 0.39573994278907776, "learning_rate": 1.8797375435959297e-06, "loss": 0.30373615026474, "step": 14967, "token_acc": 0.891705668226729 }, { "epoch": 0.8076404251875033, "grad_norm": 0.41054487228393555, "learning_rate": 1.8787177537122148e-06, "loss": 0.32592618465423584, "step": 14968, "token_acc": 0.881950774840474 }, { "epoch": 0.8076943829924998, "grad_norm": 0.43898263573646545, "learning_rate": 1.8776982118503307e-06, "loss": 0.37456244230270386, "step": 14969, "token_acc": 0.871506727783528 }, { "epoch": 0.8077483407974964, "grad_norm": 0.31001922488212585, "learning_rate": 1.8766789180414158e-06, "loss": 0.33606794476509094, "step": 14970, "token_acc": 0.8780096308186196 }, { "epoch": 0.8078022986024929, "grad_norm": 0.47010084986686707, "learning_rate": 1.8756598723165998e-06, "loss": 0.32120126485824585, "step": 14971, "token_acc": 0.882899345210903 }, { "epoch": 0.8078562564074894, "grad_norm": 0.4363594949245453, "learning_rate": 1.874641074707002e-06, "loss": 0.35349714756011963, "step": 14972, "token_acc": 0.8764278296988577 }, { "epoch": 0.8079102142124859, "grad_norm": 0.4286065399646759, "learning_rate": 1.8736225252437367e-06, "loss": 0.39529329538345337, "step": 14973, "token_acc": 0.8635739774910788 }, { "epoch": 0.8079641720174823, "grad_norm": 0.38534015417099, "learning_rate": 1.8726042239579113e-06, "loss": 0.27025285363197327, "step": 14974, "token_acc": 0.9010526315789473 }, { "epoch": 0.8080181298224788, "grad_norm": 0.3858306109905243, "learning_rate": 1.8715861708806238e-06, "loss": 0.32638126611709595, "step": 14975, "token_acc": 0.8849615447553592 }, { "epoch": 0.8080720876274753, "grad_norm": 0.45087042450904846, "learning_rate": 1.870568366042963e-06, "loss": 0.2820211350917816, "step": 14976, "token_acc": 0.8947368421052632 }, { "epoch": 0.8081260454324718, "grad_norm": 0.3365829586982727, "learning_rate": 1.8695508094760173e-06, "loss": 0.3267924189567566, "step": 14977, "token_acc": 0.8817404647084612 }, { "epoch": 0.8081800032374683, "grad_norm": 0.34019574522972107, "learning_rate": 1.8685335012108596e-06, "loss": 0.3307141363620758, "step": 14978, "token_acc": 0.8778131846245768 }, { "epoch": 0.8082339610424648, "grad_norm": 0.48411825299263, "learning_rate": 1.8675164412785584e-06, "loss": 0.39640337228775024, "step": 14979, "token_acc": 0.867118741884288 }, { "epoch": 0.8082879188474613, "grad_norm": 0.45616650581359863, "learning_rate": 1.8664996297101756e-06, "loss": 0.31634148955345154, "step": 14980, "token_acc": 0.8853216899961475 }, { "epoch": 0.8083418766524578, "grad_norm": 0.39484846591949463, "learning_rate": 1.8654830665367628e-06, "loss": 0.35458314418792725, "step": 14981, "token_acc": 0.8744800587227796 }, { "epoch": 0.8083958344574542, "grad_norm": 0.37324321269989014, "learning_rate": 1.8644667517893656e-06, "loss": 0.37274420261383057, "step": 14982, "token_acc": 0.8684997011356844 }, { "epoch": 0.8084497922624507, "grad_norm": 0.49875932931900024, "learning_rate": 1.863450685499021e-06, "loss": 0.3414073884487152, "step": 14983, "token_acc": 0.8779098160507895 }, { "epoch": 0.8085037500674472, "grad_norm": 0.41669008135795593, "learning_rate": 1.862434867696763e-06, "loss": 0.3373568654060364, "step": 14984, "token_acc": 0.881269933314004 }, { "epoch": 0.8085577078724437, "grad_norm": 0.4307732880115509, "learning_rate": 1.8614192984136125e-06, "loss": 0.3123270869255066, "step": 14985, "token_acc": 0.8837433008107737 }, { "epoch": 0.8086116656774403, "grad_norm": 0.39218440651893616, "learning_rate": 1.860403977680585e-06, "loss": 0.3145942687988281, "step": 14986, "token_acc": 0.8888248017303533 }, { "epoch": 0.8086656234824368, "grad_norm": 0.3450241684913635, "learning_rate": 1.8593889055286896e-06, "loss": 0.2782137095928192, "step": 14987, "token_acc": 0.8990411731528483 }, { "epoch": 0.8087195812874333, "grad_norm": 0.4300311505794525, "learning_rate": 1.8583740819889228e-06, "loss": 0.35900020599365234, "step": 14988, "token_acc": 0.8721262724465286 }, { "epoch": 0.8087735390924298, "grad_norm": 0.32792386412620544, "learning_rate": 1.8573595070922757e-06, "loss": 0.2699641287326813, "step": 14989, "token_acc": 0.9022205864311127 }, { "epoch": 0.8088274968974262, "grad_norm": 0.5648345351219177, "learning_rate": 1.8563451808697386e-06, "loss": 0.3309454321861267, "step": 14990, "token_acc": 0.8802876480541455 }, { "epoch": 0.8088814547024227, "grad_norm": 0.47148752212524414, "learning_rate": 1.8553311033522858e-06, "loss": 0.33349448442459106, "step": 14991, "token_acc": 0.8847000550357732 }, { "epoch": 0.8089354125074192, "grad_norm": 0.46327900886535645, "learning_rate": 1.854317274570888e-06, "loss": 0.34429433941841125, "step": 14992, "token_acc": 0.8795870133101132 }, { "epoch": 0.8089893703124157, "grad_norm": 0.48630520701408386, "learning_rate": 1.8533036945565076e-06, "loss": 0.33644139766693115, "step": 14993, "token_acc": 0.8801130695044895 }, { "epoch": 0.8090433281174122, "grad_norm": 0.5379194021224976, "learning_rate": 1.8522903633400968e-06, "loss": 0.37621068954467773, "step": 14994, "token_acc": 0.8702757916241062 }, { "epoch": 0.8090972859224087, "grad_norm": 0.40657177567481995, "learning_rate": 1.8512772809526026e-06, "loss": 0.31828975677490234, "step": 14995, "token_acc": 0.88371467025572 }, { "epoch": 0.8091512437274052, "grad_norm": 0.43340063095092773, "learning_rate": 1.8502644474249676e-06, "loss": 0.3262813687324524, "step": 14996, "token_acc": 0.8819576333089847 }, { "epoch": 0.8092052015324016, "grad_norm": 0.4656771123409271, "learning_rate": 1.8492518627881206e-06, "loss": 0.381774365901947, "step": 14997, "token_acc": 0.8653445954217163 }, { "epoch": 0.8092591593373981, "grad_norm": 0.5243822932243347, "learning_rate": 1.848239527072988e-06, "loss": 0.29092636704444885, "step": 14998, "token_acc": 0.8960555693376333 }, { "epoch": 0.8093131171423946, "grad_norm": 0.5202552676200867, "learning_rate": 1.847227440310484e-06, "loss": 0.34815090894699097, "step": 14999, "token_acc": 0.8796419098143236 }, { "epoch": 0.8093670749473911, "grad_norm": 0.43978792428970337, "learning_rate": 1.846215602531518e-06, "loss": 0.3728378713130951, "step": 15000, "token_acc": 0.8697295725501599 }, { "epoch": 0.8094210327523876, "grad_norm": 0.4207962453365326, "learning_rate": 1.8452040137669923e-06, "loss": 0.339633047580719, "step": 15001, "token_acc": 0.8778078484438431 }, { "epoch": 0.8094749905573841, "grad_norm": 0.38944846391677856, "learning_rate": 1.8441926740477989e-06, "loss": 0.33436840772628784, "step": 15002, "token_acc": 0.8808440215142739 }, { "epoch": 0.8095289483623807, "grad_norm": 0.3194177448749542, "learning_rate": 1.843181583404825e-06, "loss": 0.2959032952785492, "step": 15003, "token_acc": 0.8939579090291921 }, { "epoch": 0.8095829061673772, "grad_norm": 0.45959538221359253, "learning_rate": 1.8421707418689482e-06, "loss": 0.29154497385025024, "step": 15004, "token_acc": 0.8929465469003093 }, { "epoch": 0.8096368639723736, "grad_norm": 0.4882124960422516, "learning_rate": 1.8411601494710408e-06, "loss": 0.3511502146720886, "step": 15005, "token_acc": 0.8749535488665924 }, { "epoch": 0.8096908217773701, "grad_norm": 0.3712087571620941, "learning_rate": 1.8401498062419653e-06, "loss": 0.3179534673690796, "step": 15006, "token_acc": 0.8813579799444243 }, { "epoch": 0.8097447795823666, "grad_norm": 0.39802220463752747, "learning_rate": 1.8391397122125764e-06, "loss": 0.31082814931869507, "step": 15007, "token_acc": 0.8864659538548115 }, { "epoch": 0.8097987373873631, "grad_norm": 0.38546109199523926, "learning_rate": 1.8381298674137204e-06, "loss": 0.3210514783859253, "step": 15008, "token_acc": 0.8849977905435263 }, { "epoch": 0.8098526951923596, "grad_norm": 0.3736862540245056, "learning_rate": 1.8371202718762426e-06, "loss": 0.30479782819747925, "step": 15009, "token_acc": 0.8838748495788207 }, { "epoch": 0.8099066529973561, "grad_norm": 0.47779327630996704, "learning_rate": 1.836110925630974e-06, "loss": 0.3912699222564697, "step": 15010, "token_acc": 0.8645915626481276 }, { "epoch": 0.8099606108023526, "grad_norm": 0.3938095271587372, "learning_rate": 1.8351018287087386e-06, "loss": 0.315888911485672, "step": 15011, "token_acc": 0.8875962898915265 }, { "epoch": 0.8100145686073491, "grad_norm": 0.4605746865272522, "learning_rate": 1.834092981140354e-06, "loss": 0.28436920046806335, "step": 15012, "token_acc": 0.8971460864651032 }, { "epoch": 0.8100685264123455, "grad_norm": 0.386554092168808, "learning_rate": 1.8330843829566313e-06, "loss": 0.3721495270729065, "step": 15013, "token_acc": 0.8713456817674626 }, { "epoch": 0.810122484217342, "grad_norm": 0.46845224499702454, "learning_rate": 1.8320760341883713e-06, "loss": 0.3614124059677124, "step": 15014, "token_acc": 0.8768831168831169 }, { "epoch": 0.8101764420223385, "grad_norm": 0.4140157401561737, "learning_rate": 1.8310679348663674e-06, "loss": 0.3511534333229065, "step": 15015, "token_acc": 0.8753963221306278 }, { "epoch": 0.810230399827335, "grad_norm": 0.40854012966156006, "learning_rate": 1.830060085021411e-06, "loss": 0.35187044739723206, "step": 15016, "token_acc": 0.8769288671433948 }, { "epoch": 0.8102843576323315, "grad_norm": 0.40610581636428833, "learning_rate": 1.8290524846842806e-06, "loss": 0.3371838927268982, "step": 15017, "token_acc": 0.8787230247904405 }, { "epoch": 0.810338315437328, "grad_norm": 0.38451215624809265, "learning_rate": 1.8280451338857451e-06, "loss": 0.2916732430458069, "step": 15018, "token_acc": 0.8957194899817851 }, { "epoch": 0.8103922732423245, "grad_norm": 0.3078954219818115, "learning_rate": 1.8270380326565718e-06, "loss": 0.26896828413009644, "step": 15019, "token_acc": 0.8965592098122218 }, { "epoch": 0.8104462310473209, "grad_norm": 0.3270847201347351, "learning_rate": 1.8260311810275155e-06, "loss": 0.3282301127910614, "step": 15020, "token_acc": 0.8813443851481776 }, { "epoch": 0.8105001888523174, "grad_norm": 0.35095304250717163, "learning_rate": 1.8250245790293253e-06, "loss": 0.28192877769470215, "step": 15021, "token_acc": 0.8962543788736189 }, { "epoch": 0.810554146657314, "grad_norm": 0.5227013230323792, "learning_rate": 1.8240182266927443e-06, "loss": 0.32895058393478394, "step": 15022, "token_acc": 0.8873686297838588 }, { "epoch": 0.8106081044623105, "grad_norm": 0.41225528717041016, "learning_rate": 1.8230121240485033e-06, "loss": 0.34919071197509766, "step": 15023, "token_acc": 0.8728813559322034 }, { "epoch": 0.810662062267307, "grad_norm": 0.4583573043346405, "learning_rate": 1.82200627112733e-06, "loss": 0.29315194487571716, "step": 15024, "token_acc": 0.8940684777366983 }, { "epoch": 0.8107160200723035, "grad_norm": 0.39916127920150757, "learning_rate": 1.8210006679599435e-06, "loss": 0.34185028076171875, "step": 15025, "token_acc": 0.882625036137612 }, { "epoch": 0.8107699778773, "grad_norm": 0.4769599437713623, "learning_rate": 1.8199953145770544e-06, "loss": 0.34953200817108154, "step": 15026, "token_acc": 0.8761878903068151 }, { "epoch": 0.8108239356822965, "grad_norm": 0.45355790853500366, "learning_rate": 1.8189902110093638e-06, "loss": 0.3530588746070862, "step": 15027, "token_acc": 0.8774027546541547 }, { "epoch": 0.8108778934872929, "grad_norm": 0.29665958881378174, "learning_rate": 1.8179853572875682e-06, "loss": 0.3199889361858368, "step": 15028, "token_acc": 0.8854007376090672 }, { "epoch": 0.8109318512922894, "grad_norm": 0.3998667895793915, "learning_rate": 1.8169807534423578e-06, "loss": 0.3102881908416748, "step": 15029, "token_acc": 0.8874315548198141 }, { "epoch": 0.8109858090972859, "grad_norm": 0.40830710530281067, "learning_rate": 1.8159763995044122e-06, "loss": 0.34548893570899963, "step": 15030, "token_acc": 0.8799332119004251 }, { "epoch": 0.8110397669022824, "grad_norm": 0.5198281407356262, "learning_rate": 1.8149722955044036e-06, "loss": 0.35329321026802063, "step": 15031, "token_acc": 0.876984126984127 }, { "epoch": 0.8110937247072789, "grad_norm": 0.4111658036708832, "learning_rate": 1.8139684414729964e-06, "loss": 0.32678207755088806, "step": 15032, "token_acc": 0.8833617262918796 }, { "epoch": 0.8111476825122754, "grad_norm": 0.3559251129627228, "learning_rate": 1.8129648374408492e-06, "loss": 0.3164266347885132, "step": 15033, "token_acc": 0.8842661607263856 }, { "epoch": 0.8112016403172719, "grad_norm": 0.4265315532684326, "learning_rate": 1.811961483438608e-06, "loss": 0.36131489276885986, "step": 15034, "token_acc": 0.8729617911900706 }, { "epoch": 0.8112555981222684, "grad_norm": 0.5011081695556641, "learning_rate": 1.8109583794969231e-06, "loss": 0.33165591955184937, "step": 15035, "token_acc": 0.8817974601107131 }, { "epoch": 0.8113095559272648, "grad_norm": 0.35482117533683777, "learning_rate": 1.8099555256464218e-06, "loss": 0.3880322575569153, "step": 15036, "token_acc": 0.8624749793947957 }, { "epoch": 0.8113635137322613, "grad_norm": 0.4582938253879547, "learning_rate": 1.8089529219177326e-06, "loss": 0.3373921513557434, "step": 15037, "token_acc": 0.8777203983769827 }, { "epoch": 0.8114174715372579, "grad_norm": 0.46350276470184326, "learning_rate": 1.8079505683414766e-06, "loss": 0.3334270417690277, "step": 15038, "token_acc": 0.8815311004784689 }, { "epoch": 0.8114714293422544, "grad_norm": 0.3741062879562378, "learning_rate": 1.8069484649482638e-06, "loss": 0.3475363254547119, "step": 15039, "token_acc": 0.874251497005988 }, { "epoch": 0.8115253871472509, "grad_norm": 0.38429850339889526, "learning_rate": 1.8059466117686952e-06, "loss": 0.24714425206184387, "step": 15040, "token_acc": 0.9011278195488722 }, { "epoch": 0.8115793449522474, "grad_norm": 0.37313079833984375, "learning_rate": 1.8049450088333742e-06, "loss": 0.34248703718185425, "step": 15041, "token_acc": 0.87934730824202 }, { "epoch": 0.8116333027572439, "grad_norm": 0.4296267330646515, "learning_rate": 1.8039436561728851e-06, "loss": 0.3690101206302643, "step": 15042, "token_acc": 0.8734698975768174 }, { "epoch": 0.8116872605622403, "grad_norm": 0.5177781581878662, "learning_rate": 1.8029425538178103e-06, "loss": 0.3197921812534332, "step": 15043, "token_acc": 0.887906536154089 }, { "epoch": 0.8117412183672368, "grad_norm": 0.48014897108078003, "learning_rate": 1.8019417017987218e-06, "loss": 0.3354017436504364, "step": 15044, "token_acc": 0.8773248407643313 }, { "epoch": 0.8117951761722333, "grad_norm": 0.47517910599708557, "learning_rate": 1.8009411001461862e-06, "loss": 0.271557092666626, "step": 15045, "token_acc": 0.9049247606019152 }, { "epoch": 0.8118491339772298, "grad_norm": 0.5051583051681519, "learning_rate": 1.7999407488907628e-06, "loss": 0.3539753556251526, "step": 15046, "token_acc": 0.875947622329428 }, { "epoch": 0.8119030917822263, "grad_norm": 0.5520848631858826, "learning_rate": 1.7989406480629967e-06, "loss": 0.371192991733551, "step": 15047, "token_acc": 0.8693810629867468 }, { "epoch": 0.8119570495872228, "grad_norm": 0.3957218527793884, "learning_rate": 1.7979407976934382e-06, "loss": 0.25807487964630127, "step": 15048, "token_acc": 0.9029345372460497 }, { "epoch": 0.8120110073922193, "grad_norm": 0.4049794375896454, "learning_rate": 1.7969411978126194e-06, "loss": 0.3587720990180969, "step": 15049, "token_acc": 0.8723320675657981 }, { "epoch": 0.8120649651972158, "grad_norm": 0.5320023894309998, "learning_rate": 1.795941848451066e-06, "loss": 0.3063696622848511, "step": 15050, "token_acc": 0.8853445133065986 }, { "epoch": 0.8121189230022122, "grad_norm": 0.4896192252635956, "learning_rate": 1.7949427496393012e-06, "loss": 0.254694402217865, "step": 15051, "token_acc": 0.9079572446555819 }, { "epoch": 0.8121728808072087, "grad_norm": 0.4098595976829529, "learning_rate": 1.793943901407834e-06, "loss": 0.3066481351852417, "step": 15052, "token_acc": 0.8876615256356816 }, { "epoch": 0.8122268386122052, "grad_norm": 0.3595585823059082, "learning_rate": 1.7929453037871714e-06, "loss": 0.3281751573085785, "step": 15053, "token_acc": 0.8803507728894173 }, { "epoch": 0.8122807964172017, "grad_norm": 0.401297926902771, "learning_rate": 1.7919469568078086e-06, "loss": 0.35361772775650024, "step": 15054, "token_acc": 0.8733586227020718 }, { "epoch": 0.8123347542221983, "grad_norm": 0.49435746669769287, "learning_rate": 1.7909488605002357e-06, "loss": 0.3344741463661194, "step": 15055, "token_acc": 0.882824427480916 }, { "epoch": 0.8123887120271948, "grad_norm": 0.43932202458381653, "learning_rate": 1.789951014894934e-06, "loss": 0.3552628755569458, "step": 15056, "token_acc": 0.8703143893591294 }, { "epoch": 0.8124426698321913, "grad_norm": 0.44713181257247925, "learning_rate": 1.788953420022378e-06, "loss": 0.32790011167526245, "step": 15057, "token_acc": 0.8829179053281501 }, { "epoch": 0.8124966276371878, "grad_norm": 0.41813698410987854, "learning_rate": 1.7879560759130332e-06, "loss": 0.31242433190345764, "step": 15058, "token_acc": 0.8858777476663655 }, { "epoch": 0.8125505854421842, "grad_norm": 0.4531853199005127, "learning_rate": 1.7869589825973588e-06, "loss": 0.33903056383132935, "step": 15059, "token_acc": 0.877115728636301 }, { "epoch": 0.8126045432471807, "grad_norm": 0.5221651792526245, "learning_rate": 1.7859621401058025e-06, "loss": 0.35493573546409607, "step": 15060, "token_acc": 0.879901583884361 }, { "epoch": 0.8126585010521772, "grad_norm": 0.40787050127983093, "learning_rate": 1.7849655484688133e-06, "loss": 0.34640341997146606, "step": 15061, "token_acc": 0.882446134347275 }, { "epoch": 0.8127124588571737, "grad_norm": 0.5708783864974976, "learning_rate": 1.7839692077168247e-06, "loss": 0.3277203440666199, "step": 15062, "token_acc": 0.8776404942208051 }, { "epoch": 0.8127664166621702, "grad_norm": 0.44902387261390686, "learning_rate": 1.7829731178802634e-06, "loss": 0.2927883565425873, "step": 15063, "token_acc": 0.8998522410113282 }, { "epoch": 0.8128203744671667, "grad_norm": 0.4451005756855011, "learning_rate": 1.7819772789895517e-06, "loss": 0.36225855350494385, "step": 15064, "token_acc": 0.8719520691096558 }, { "epoch": 0.8128743322721632, "grad_norm": 0.5311927199363708, "learning_rate": 1.7809816910750998e-06, "loss": 0.36125513911247253, "step": 15065, "token_acc": 0.871496475987543 }, { "epoch": 0.8129282900771596, "grad_norm": 0.34449994564056396, "learning_rate": 1.7799863541673125e-06, "loss": 0.29625099897384644, "step": 15066, "token_acc": 0.8921578371931219 }, { "epoch": 0.8129822478821561, "grad_norm": 0.43725189566612244, "learning_rate": 1.7789912682965904e-06, "loss": 0.2903676927089691, "step": 15067, "token_acc": 0.8902104300091491 }, { "epoch": 0.8130362056871526, "grad_norm": 0.38177159428596497, "learning_rate": 1.777996433493322e-06, "loss": 0.3466167151927948, "step": 15068, "token_acc": 0.878196848359597 }, { "epoch": 0.8130901634921491, "grad_norm": 0.3779712915420532, "learning_rate": 1.7770018497878905e-06, "loss": 0.3907998502254486, "step": 15069, "token_acc": 0.8640570285142571 }, { "epoch": 0.8131441212971456, "grad_norm": 0.40069347620010376, "learning_rate": 1.776007517210666e-06, "loss": 0.36136242747306824, "step": 15070, "token_acc": 0.8772640341995726 }, { "epoch": 0.8131980791021421, "grad_norm": 0.5192062854766846, "learning_rate": 1.7750134357920178e-06, "loss": 0.33453142642974854, "step": 15071, "token_acc": 0.8848096348096348 }, { "epoch": 0.8132520369071387, "grad_norm": 0.4733791649341583, "learning_rate": 1.7740196055623049e-06, "loss": 0.3320811688899994, "step": 15072, "token_acc": 0.8772921760391198 }, { "epoch": 0.8133059947121352, "grad_norm": 0.33181601762771606, "learning_rate": 1.773026026551875e-06, "loss": 0.32232359051704407, "step": 15073, "token_acc": 0.8820929821460138 }, { "epoch": 0.8133599525171316, "grad_norm": 0.3736130893230438, "learning_rate": 1.7720326987910775e-06, "loss": 0.3085000813007355, "step": 15074, "token_acc": 0.8905401912638925 }, { "epoch": 0.8134139103221281, "grad_norm": 0.48040780425071716, "learning_rate": 1.7710396223102466e-06, "loss": 0.35201436281204224, "step": 15075, "token_acc": 0.8772759856630824 }, { "epoch": 0.8134678681271246, "grad_norm": 0.421411395072937, "learning_rate": 1.7700467971397094e-06, "loss": 0.27480313181877136, "step": 15076, "token_acc": 0.9024484040507628 }, { "epoch": 0.8135218259321211, "grad_norm": 0.41756823658943176, "learning_rate": 1.7690542233097885e-06, "loss": 0.3679145872592926, "step": 15077, "token_acc": 0.8662153449387492 }, { "epoch": 0.8135757837371176, "grad_norm": 0.4440782070159912, "learning_rate": 1.7680619008507937e-06, "loss": 0.3469414710998535, "step": 15078, "token_acc": 0.8769625603864735 }, { "epoch": 0.8136297415421141, "grad_norm": 0.4378874897956848, "learning_rate": 1.76706982979303e-06, "loss": 0.3583737909793854, "step": 15079, "token_acc": 0.8704354469060351 }, { "epoch": 0.8136836993471106, "grad_norm": 0.3746214807033539, "learning_rate": 1.7660780101667996e-06, "loss": 0.29585951566696167, "step": 15080, "token_acc": 0.8906602708803611 }, { "epoch": 0.813737657152107, "grad_norm": 0.2908294200897217, "learning_rate": 1.7650864420023905e-06, "loss": 0.29026398062705994, "step": 15081, "token_acc": 0.896077841308055 }, { "epoch": 0.8137916149571035, "grad_norm": 0.38006171584129333, "learning_rate": 1.764095125330083e-06, "loss": 0.3095407485961914, "step": 15082, "token_acc": 0.8893051771117166 }, { "epoch": 0.8138455727621, "grad_norm": 0.413901686668396, "learning_rate": 1.763104060180153e-06, "loss": 0.33390405774116516, "step": 15083, "token_acc": 0.8834591822196894 }, { "epoch": 0.8138995305670965, "grad_norm": 0.39208540320396423, "learning_rate": 1.7621132465828684e-06, "loss": 0.29867660999298096, "step": 15084, "token_acc": 0.888920414243155 }, { "epoch": 0.813953488372093, "grad_norm": 0.4351080656051636, "learning_rate": 1.7611226845684858e-06, "loss": 0.3410267233848572, "step": 15085, "token_acc": 0.8808758229980095 }, { "epoch": 0.8140074461770895, "grad_norm": 0.3602202534675598, "learning_rate": 1.7601323741672583e-06, "loss": 0.31530478596687317, "step": 15086, "token_acc": 0.8837105419892793 }, { "epoch": 0.814061403982086, "grad_norm": 0.4166705012321472, "learning_rate": 1.75914231540943e-06, "loss": 0.2933443784713745, "step": 15087, "token_acc": 0.891973125884017 }, { "epoch": 0.8141153617870825, "grad_norm": 0.3881146013736725, "learning_rate": 1.7581525083252372e-06, "loss": 0.3089265823364258, "step": 15088, "token_acc": 0.8894709271870089 }, { "epoch": 0.8141693195920789, "grad_norm": 0.37376904487609863, "learning_rate": 1.7571629529449063e-06, "loss": 0.32594865560531616, "step": 15089, "token_acc": 0.8822123220688906 }, { "epoch": 0.8142232773970755, "grad_norm": 0.2909126877784729, "learning_rate": 1.7561736492986603e-06, "loss": 0.28839758038520813, "step": 15090, "token_acc": 0.8942693409742121 }, { "epoch": 0.814277235202072, "grad_norm": 0.2979818284511566, "learning_rate": 1.7551845974167115e-06, "loss": 0.38509058952331543, "step": 15091, "token_acc": 0.8648064250411862 }, { "epoch": 0.8143311930070685, "grad_norm": 0.41814425587654114, "learning_rate": 1.7541957973292622e-06, "loss": 0.30396971106529236, "step": 15092, "token_acc": 0.8893099636822991 }, { "epoch": 0.814385150812065, "grad_norm": 0.39513128995895386, "learning_rate": 1.7532072490665164e-06, "loss": 0.3241158723831177, "step": 15093, "token_acc": 0.8846896884810432 }, { "epoch": 0.8144391086170615, "grad_norm": 0.4851687550544739, "learning_rate": 1.7522189526586607e-06, "loss": 0.3752078413963318, "step": 15094, "token_acc": 0.8733055265901981 }, { "epoch": 0.814493066422058, "grad_norm": 0.4223136305809021, "learning_rate": 1.751230908135877e-06, "loss": 0.3222200572490692, "step": 15095, "token_acc": 0.884420735877445 }, { "epoch": 0.8145470242270545, "grad_norm": 0.4447920024394989, "learning_rate": 1.7502431155283417e-06, "loss": 0.29941320419311523, "step": 15096, "token_acc": 0.8906742281033396 }, { "epoch": 0.8146009820320509, "grad_norm": 0.4587325155735016, "learning_rate": 1.7492555748662188e-06, "loss": 0.3078853189945221, "step": 15097, "token_acc": 0.8887453874538745 }, { "epoch": 0.8146549398370474, "grad_norm": 0.4317178726196289, "learning_rate": 1.7482682861796684e-06, "loss": 0.3084243834018707, "step": 15098, "token_acc": 0.8880024085503537 }, { "epoch": 0.8147088976420439, "grad_norm": 0.4784744381904602, "learning_rate": 1.7472812494988444e-06, "loss": 0.3687150478363037, "step": 15099, "token_acc": 0.8690441813456541 }, { "epoch": 0.8147628554470404, "grad_norm": 0.4423992931842804, "learning_rate": 1.7462944648538894e-06, "loss": 0.2975993752479553, "step": 15100, "token_acc": 0.8947661469933185 }, { "epoch": 0.8148168132520369, "grad_norm": 0.35143932700157166, "learning_rate": 1.7453079322749389e-06, "loss": 0.3448697328567505, "step": 15101, "token_acc": 0.8769109119662625 }, { "epoch": 0.8148707710570334, "grad_norm": 0.43467485904693604, "learning_rate": 1.7443216517921224e-06, "loss": 0.3305180072784424, "step": 15102, "token_acc": 0.8809368900455433 }, { "epoch": 0.8149247288620299, "grad_norm": 0.5507727265357971, "learning_rate": 1.7433356234355592e-06, "loss": 0.31866562366485596, "step": 15103, "token_acc": 0.8909090909090909 }, { "epoch": 0.8149786866670263, "grad_norm": 0.4301398694515228, "learning_rate": 1.7423498472353662e-06, "loss": 0.36203670501708984, "step": 15104, "token_acc": 0.8720992984349704 }, { "epoch": 0.8150326444720228, "grad_norm": 0.506088376045227, "learning_rate": 1.7413643232216404e-06, "loss": 0.3502799868583679, "step": 15105, "token_acc": 0.8747199724280544 }, { "epoch": 0.8150866022770193, "grad_norm": 0.42918264865875244, "learning_rate": 1.7403790514244868e-06, "loss": 0.32734233140945435, "step": 15106, "token_acc": 0.8811819980217607 }, { "epoch": 0.8151405600820159, "grad_norm": 0.5515410900115967, "learning_rate": 1.7393940318739932e-06, "loss": 0.34207913279533386, "step": 15107, "token_acc": 0.881274734430327 }, { "epoch": 0.8151945178870124, "grad_norm": 0.43478459119796753, "learning_rate": 1.7384092646002426e-06, "loss": 0.29622727632522583, "step": 15108, "token_acc": 0.8927378448595819 }, { "epoch": 0.8152484756920089, "grad_norm": 0.5028344988822937, "learning_rate": 1.7374247496333085e-06, "loss": 0.2983908951282501, "step": 15109, "token_acc": 0.8936537857367264 }, { "epoch": 0.8153024334970054, "grad_norm": 0.3844664394855499, "learning_rate": 1.7364404870032581e-06, "loss": 0.31663084030151367, "step": 15110, "token_acc": 0.8873164218958611 }, { "epoch": 0.8153563913020019, "grad_norm": 0.4684995412826538, "learning_rate": 1.7354564767401482e-06, "loss": 0.30962494015693665, "step": 15111, "token_acc": 0.8883921919237073 }, { "epoch": 0.8154103491069983, "grad_norm": 0.4212116599082947, "learning_rate": 1.734472718874035e-06, "loss": 0.35154297947883606, "step": 15112, "token_acc": 0.876979293544458 }, { "epoch": 0.8154643069119948, "grad_norm": 0.48067793250083923, "learning_rate": 1.733489213434959e-06, "loss": 0.3432508707046509, "step": 15113, "token_acc": 0.8760022117777163 }, { "epoch": 0.8155182647169913, "grad_norm": 0.43480151891708374, "learning_rate": 1.7325059604529582e-06, "loss": 0.350358784198761, "step": 15114, "token_acc": 0.8767709563164109 }, { "epoch": 0.8155722225219878, "grad_norm": 0.46737492084503174, "learning_rate": 1.7315229599580584e-06, "loss": 0.3343919515609741, "step": 15115, "token_acc": 0.8766614263255681 }, { "epoch": 0.8156261803269843, "grad_norm": 0.4688881039619446, "learning_rate": 1.7305402119802817e-06, "loss": 0.3470912277698517, "step": 15116, "token_acc": 0.8808490858382398 }, { "epoch": 0.8156801381319808, "grad_norm": 0.44144317507743835, "learning_rate": 1.7295577165496402e-06, "loss": 0.34490135312080383, "step": 15117, "token_acc": 0.8782106427430257 }, { "epoch": 0.8157340959369773, "grad_norm": 0.3983107805252075, "learning_rate": 1.7285754736961403e-06, "loss": 0.27587419748306274, "step": 15118, "token_acc": 0.8994746059544658 }, { "epoch": 0.8157880537419738, "grad_norm": 0.40650254487991333, "learning_rate": 1.7275934834497776e-06, "loss": 0.3012048900127411, "step": 15119, "token_acc": 0.8894412067192321 }, { "epoch": 0.8158420115469702, "grad_norm": 0.5185282230377197, "learning_rate": 1.726611745840544e-06, "loss": 0.34449848532676697, "step": 15120, "token_acc": 0.8802518133296838 }, { "epoch": 0.8158959693519667, "grad_norm": 0.48486533761024475, "learning_rate": 1.7256302608984198e-06, "loss": 0.40180426836013794, "step": 15121, "token_acc": 0.8584610027855153 }, { "epoch": 0.8159499271569632, "grad_norm": 0.38750576972961426, "learning_rate": 1.724649028653379e-06, "loss": 0.3114277720451355, "step": 15122, "token_acc": 0.8907461888205402 }, { "epoch": 0.8160038849619597, "grad_norm": 0.3634931147098541, "learning_rate": 1.7236680491353907e-06, "loss": 0.2784522771835327, "step": 15123, "token_acc": 0.8949808740002319 }, { "epoch": 0.8160578427669563, "grad_norm": 0.4377123713493347, "learning_rate": 1.7226873223744078e-06, "loss": 0.3676655888557434, "step": 15124, "token_acc": 0.8681534929485077 }, { "epoch": 0.8161118005719528, "grad_norm": 0.40605291724205017, "learning_rate": 1.7217068484003895e-06, "loss": 0.32521146535873413, "step": 15125, "token_acc": 0.887952176578786 }, { "epoch": 0.8161657583769493, "grad_norm": 0.4102359712123871, "learning_rate": 1.7207266272432755e-06, "loss": 0.2978876829147339, "step": 15126, "token_acc": 0.8911018310311597 }, { "epoch": 0.8162197161819457, "grad_norm": 0.4725226163864136, "learning_rate": 1.7197466589330003e-06, "loss": 0.3347766101360321, "step": 15127, "token_acc": 0.8818158436213992 }, { "epoch": 0.8162736739869422, "grad_norm": 0.37719717621803284, "learning_rate": 1.7187669434994936e-06, "loss": 0.2750759720802307, "step": 15128, "token_acc": 0.896066716455066 }, { "epoch": 0.8163276317919387, "grad_norm": 0.3453541398048401, "learning_rate": 1.7177874809726759e-06, "loss": 0.29842305183410645, "step": 15129, "token_acc": 0.8941125415883119 }, { "epoch": 0.8163815895969352, "grad_norm": 0.47910425066947937, "learning_rate": 1.7168082713824575e-06, "loss": 0.3610529601573944, "step": 15130, "token_acc": 0.8747393506106643 }, { "epoch": 0.8164355474019317, "grad_norm": 0.5434199571609497, "learning_rate": 1.7158293147587435e-06, "loss": 0.3546009063720703, "step": 15131, "token_acc": 0.8729987684729064 }, { "epoch": 0.8164895052069282, "grad_norm": 0.32746994495391846, "learning_rate": 1.7148506111314334e-06, "loss": 0.2830302119255066, "step": 15132, "token_acc": 0.8939879046602632 }, { "epoch": 0.8165434630119247, "grad_norm": 0.45649030804634094, "learning_rate": 1.7138721605304155e-06, "loss": 0.360058069229126, "step": 15133, "token_acc": 0.8755249650023332 }, { "epoch": 0.8165974208169212, "grad_norm": 0.3866068124771118, "learning_rate": 1.7128939629855712e-06, "loss": 0.31121277809143066, "step": 15134, "token_acc": 0.8875336607001426 }, { "epoch": 0.8166513786219176, "grad_norm": 0.42024633288383484, "learning_rate": 1.7119160185267746e-06, "loss": 0.33849090337753296, "step": 15135, "token_acc": 0.8805952930318413 }, { "epoch": 0.8167053364269141, "grad_norm": 0.5669316649436951, "learning_rate": 1.7109383271838909e-06, "loss": 0.29948878288269043, "step": 15136, "token_acc": 0.8899003034243607 }, { "epoch": 0.8167592942319106, "grad_norm": 0.3481961786746979, "learning_rate": 1.7099608889867802e-06, "loss": 0.3077578842639923, "step": 15137, "token_acc": 0.8885370487650411 }, { "epoch": 0.8168132520369071, "grad_norm": 0.4152372181415558, "learning_rate": 1.7089837039652924e-06, "loss": 0.3383462727069855, "step": 15138, "token_acc": 0.8875164690382081 }, { "epoch": 0.8168672098419036, "grad_norm": 0.47248518466949463, "learning_rate": 1.7080067721492688e-06, "loss": 0.3298812806606293, "step": 15139, "token_acc": 0.878946602648043 }, { "epoch": 0.8169211676469001, "grad_norm": 0.3226708471775055, "learning_rate": 1.707030093568548e-06, "loss": 0.360129714012146, "step": 15140, "token_acc": 0.8729435585927613 }, { "epoch": 0.8169751254518967, "grad_norm": 0.542320966720581, "learning_rate": 1.7060536682529538e-06, "loss": 0.3498753309249878, "step": 15141, "token_acc": 0.8773065271275131 }, { "epoch": 0.8170290832568932, "grad_norm": 0.3713115155696869, "learning_rate": 1.705077496232309e-06, "loss": 0.31926801800727844, "step": 15142, "token_acc": 0.8882844129554656 }, { "epoch": 0.8170830410618896, "grad_norm": 0.5452268719673157, "learning_rate": 1.7041015775364222e-06, "loss": 0.32915815711021423, "step": 15143, "token_acc": 0.8812751923781605 }, { "epoch": 0.8171369988668861, "grad_norm": 0.449065625667572, "learning_rate": 1.7031259121951026e-06, "loss": 0.34043818712234497, "step": 15144, "token_acc": 0.8809063988670014 }, { "epoch": 0.8171909566718826, "grad_norm": 0.44678738713264465, "learning_rate": 1.7021505002381433e-06, "loss": 0.2735878825187683, "step": 15145, "token_acc": 0.8978723404255319 }, { "epoch": 0.8172449144768791, "grad_norm": 0.4396001696586609, "learning_rate": 1.7011753416953348e-06, "loss": 0.34366869926452637, "step": 15146, "token_acc": 0.8779564806054873 }, { "epoch": 0.8172988722818756, "grad_norm": 0.4442690312862396, "learning_rate": 1.7002004365964564e-06, "loss": 0.3038567304611206, "step": 15147, "token_acc": 0.8865374112506826 }, { "epoch": 0.8173528300868721, "grad_norm": 0.4973912239074707, "learning_rate": 1.6992257849712835e-06, "loss": 0.3356924057006836, "step": 15148, "token_acc": 0.8808172531214529 }, { "epoch": 0.8174067878918686, "grad_norm": 0.4556697905063629, "learning_rate": 1.6982513868495797e-06, "loss": 0.3345433473587036, "step": 15149, "token_acc": 0.8775360188179947 }, { "epoch": 0.817460745696865, "grad_norm": 0.4029374122619629, "learning_rate": 1.6972772422611018e-06, "loss": 0.31576254963874817, "step": 15150, "token_acc": 0.8900695762175838 }, { "epoch": 0.8175147035018615, "grad_norm": 0.46760427951812744, "learning_rate": 1.6963033512356043e-06, "loss": 0.38298460841178894, "step": 15151, "token_acc": 0.8679923027581783 }, { "epoch": 0.817568661306858, "grad_norm": 0.4240373969078064, "learning_rate": 1.695329713802829e-06, "loss": 0.26134753227233887, "step": 15152, "token_acc": 0.9032527105921602 }, { "epoch": 0.8176226191118545, "grad_norm": 0.40219560265541077, "learning_rate": 1.694356329992507e-06, "loss": 0.296009361743927, "step": 15153, "token_acc": 0.887722980062959 }, { "epoch": 0.817676576916851, "grad_norm": 0.42842310667037964, "learning_rate": 1.6933831998343676e-06, "loss": 0.3356049656867981, "step": 15154, "token_acc": 0.8813866545825308 }, { "epoch": 0.8177305347218475, "grad_norm": 0.40381407737731934, "learning_rate": 1.692410323358128e-06, "loss": 0.3588969111442566, "step": 15155, "token_acc": 0.8741369390103567 }, { "epoch": 0.817784492526844, "grad_norm": 0.4132941961288452, "learning_rate": 1.691437700593499e-06, "loss": 0.354250431060791, "step": 15156, "token_acc": 0.8759665456840776 }, { "epoch": 0.8178384503318405, "grad_norm": 0.3511106073856354, "learning_rate": 1.690465331570188e-06, "loss": 0.3383881449699402, "step": 15157, "token_acc": 0.8816746051642016 }, { "epoch": 0.817892408136837, "grad_norm": 0.3977945148944855, "learning_rate": 1.689493216317889e-06, "loss": 0.3350721597671509, "step": 15158, "token_acc": 0.8822510822510823 }, { "epoch": 0.8179463659418335, "grad_norm": 0.4712142050266266, "learning_rate": 1.6885213548662904e-06, "loss": 0.32341575622558594, "step": 15159, "token_acc": 0.8873517786561265 }, { "epoch": 0.81800032374683, "grad_norm": 0.5120502710342407, "learning_rate": 1.687549747245072e-06, "loss": 0.3788108825683594, "step": 15160, "token_acc": 0.8746645619573796 }, { "epoch": 0.8180542815518265, "grad_norm": 0.5305731892585754, "learning_rate": 1.6865783934839075e-06, "loss": 0.3581434190273285, "step": 15161, "token_acc": 0.8665760341813944 }, { "epoch": 0.818108239356823, "grad_norm": 0.3679811954498291, "learning_rate": 1.6856072936124602e-06, "loss": 0.34731876850128174, "step": 15162, "token_acc": 0.8732373009855952 }, { "epoch": 0.8181621971618195, "grad_norm": 0.38457438349723816, "learning_rate": 1.684636447660386e-06, "loss": 0.2960563898086548, "step": 15163, "token_acc": 0.890703337630712 }, { "epoch": 0.818216154966816, "grad_norm": 0.46313178539276123, "learning_rate": 1.6836658556573393e-06, "loss": 0.37361574172973633, "step": 15164, "token_acc": 0.8673256299596269 }, { "epoch": 0.8182701127718125, "grad_norm": 0.37212130427360535, "learning_rate": 1.6826955176329584e-06, "loss": 0.3364686667919159, "step": 15165, "token_acc": 0.8826434619002822 }, { "epoch": 0.8183240705768089, "grad_norm": 0.4123382568359375, "learning_rate": 1.6817254336168775e-06, "loss": 0.3648900091648102, "step": 15166, "token_acc": 0.8714720531267294 }, { "epoch": 0.8183780283818054, "grad_norm": 0.4291805624961853, "learning_rate": 1.6807556036387228e-06, "loss": 0.3306542634963989, "step": 15167, "token_acc": 0.8832044975404076 }, { "epoch": 0.8184319861868019, "grad_norm": 0.3961135447025299, "learning_rate": 1.6797860277281119e-06, "loss": 0.3270632028579712, "step": 15168, "token_acc": 0.8780982751123351 }, { "epoch": 0.8184859439917984, "grad_norm": 0.38615933060646057, "learning_rate": 1.6788167059146576e-06, "loss": 0.31546637415885925, "step": 15169, "token_acc": 0.8833952557873678 }, { "epoch": 0.8185399017967949, "grad_norm": 0.41305291652679443, "learning_rate": 1.6778476382279595e-06, "loss": 0.3840208649635315, "step": 15170, "token_acc": 0.8669157803966657 }, { "epoch": 0.8185938596017914, "grad_norm": 0.3696914613246918, "learning_rate": 1.6768788246976164e-06, "loss": 0.35457247495651245, "step": 15171, "token_acc": 0.8733323109952461 }, { "epoch": 0.8186478174067879, "grad_norm": 0.49037209153175354, "learning_rate": 1.6759102653532123e-06, "loss": 0.29735273122787476, "step": 15172, "token_acc": 0.8920953575909661 }, { "epoch": 0.8187017752117843, "grad_norm": 0.3627795875072479, "learning_rate": 1.6749419602243278e-06, "loss": 0.35109806060791016, "step": 15173, "token_acc": 0.8757720381807973 }, { "epoch": 0.8187557330167808, "grad_norm": 0.5156713724136353, "learning_rate": 1.6739739093405361e-06, "loss": 0.3882097005844116, "step": 15174, "token_acc": 0.8682374621561143 }, { "epoch": 0.8188096908217773, "grad_norm": 0.41431957483291626, "learning_rate": 1.673006112731399e-06, "loss": 0.3545999825000763, "step": 15175, "token_acc": 0.8762886597938144 }, { "epoch": 0.8188636486267739, "grad_norm": 0.4303756356239319, "learning_rate": 1.672038570426472e-06, "loss": 0.3633609414100647, "step": 15176, "token_acc": 0.8693144579892516 }, { "epoch": 0.8189176064317704, "grad_norm": 0.36206191778182983, "learning_rate": 1.6710712824553077e-06, "loss": 0.335630863904953, "step": 15177, "token_acc": 0.876854404621242 }, { "epoch": 0.8189715642367669, "grad_norm": 0.5000070333480835, "learning_rate": 1.670104248847445e-06, "loss": 0.30189675092697144, "step": 15178, "token_acc": 0.8945551128818061 }, { "epoch": 0.8190255220417634, "grad_norm": 0.41157421469688416, "learning_rate": 1.6691374696324159e-06, "loss": 0.34595489501953125, "step": 15179, "token_acc": 0.8818246614397719 }, { "epoch": 0.8190794798467599, "grad_norm": 0.4286287724971771, "learning_rate": 1.6681709448397466e-06, "loss": 0.33821189403533936, "step": 15180, "token_acc": 0.8848167539267016 }, { "epoch": 0.8191334376517563, "grad_norm": 0.4136136770248413, "learning_rate": 1.6672046744989533e-06, "loss": 0.3319990634918213, "step": 15181, "token_acc": 0.8823230784851254 }, { "epoch": 0.8191873954567528, "grad_norm": 0.4639761447906494, "learning_rate": 1.666238658639544e-06, "loss": 0.38654008507728577, "step": 15182, "token_acc": 0.8595877576514679 }, { "epoch": 0.8192413532617493, "grad_norm": 0.35584622621536255, "learning_rate": 1.665272897291026e-06, "loss": 0.29824429750442505, "step": 15183, "token_acc": 0.8899095946340041 }, { "epoch": 0.8192953110667458, "grad_norm": 0.4476619362831116, "learning_rate": 1.66430739048289e-06, "loss": 0.3444398045539856, "step": 15184, "token_acc": 0.8815846286701209 }, { "epoch": 0.8193492688717423, "grad_norm": 0.34689459204673767, "learning_rate": 1.6633421382446225e-06, "loss": 0.30787619948387146, "step": 15185, "token_acc": 0.8855076419213974 }, { "epoch": 0.8194032266767388, "grad_norm": 0.38345474004745483, "learning_rate": 1.6623771406057044e-06, "loss": 0.3500707745552063, "step": 15186, "token_acc": 0.8799544419134396 }, { "epoch": 0.8194571844817353, "grad_norm": 0.5397818684577942, "learning_rate": 1.661412397595602e-06, "loss": 0.35877999663352966, "step": 15187, "token_acc": 0.8685913389746143 }, { "epoch": 0.8195111422867318, "grad_norm": 0.5327430367469788, "learning_rate": 1.6604479092437808e-06, "loss": 0.34292298555374146, "step": 15188, "token_acc": 0.8773820681037175 }, { "epoch": 0.8195651000917282, "grad_norm": 0.4145326316356659, "learning_rate": 1.6594836755796928e-06, "loss": 0.3089783787727356, "step": 15189, "token_acc": 0.8853854642913448 }, { "epoch": 0.8196190578967247, "grad_norm": 0.3577879071235657, "learning_rate": 1.658519696632792e-06, "loss": 0.2941957116127014, "step": 15190, "token_acc": 0.8927018451051828 }, { "epoch": 0.8196730157017212, "grad_norm": 0.41236671805381775, "learning_rate": 1.657555972432513e-06, "loss": 0.26003438234329224, "step": 15191, "token_acc": 0.9053349378259126 }, { "epoch": 0.8197269735067177, "grad_norm": 0.45640966296195984, "learning_rate": 1.6565925030082907e-06, "loss": 0.40054214000701904, "step": 15192, "token_acc": 0.8606347581264397 }, { "epoch": 0.8197809313117143, "grad_norm": 0.3823879659175873, "learning_rate": 1.6556292883895464e-06, "loss": 0.31588807702064514, "step": 15193, "token_acc": 0.8856663104869485 }, { "epoch": 0.8198348891167108, "grad_norm": 0.4293555021286011, "learning_rate": 1.6546663286056985e-06, "loss": 0.24851743876934052, "step": 15194, "token_acc": 0.9070060131450147 }, { "epoch": 0.8198888469217073, "grad_norm": 0.4394071400165558, "learning_rate": 1.6537036236861526e-06, "loss": 0.345639169216156, "step": 15195, "token_acc": 0.8802395209580839 }, { "epoch": 0.8199428047267037, "grad_norm": 0.4121911823749542, "learning_rate": 1.6527411736603128e-06, "loss": 0.25688639283180237, "step": 15196, "token_acc": 0.9039345203905801 }, { "epoch": 0.8199967625317002, "grad_norm": 0.47095927596092224, "learning_rate": 1.6517789785575721e-06, "loss": 0.37236905097961426, "step": 15197, "token_acc": 0.8715489683231619 }, { "epoch": 0.8200507203366967, "grad_norm": 0.4329926371574402, "learning_rate": 1.650817038407313e-06, "loss": 0.33712929487228394, "step": 15198, "token_acc": 0.8770685579196218 }, { "epoch": 0.8201046781416932, "grad_norm": 0.5002341270446777, "learning_rate": 1.649855353238916e-06, "loss": 0.33644261956214905, "step": 15199, "token_acc": 0.8837135109267826 }, { "epoch": 0.8201586359466897, "grad_norm": 0.48232510685920715, "learning_rate": 1.648893923081748e-06, "loss": 0.31676268577575684, "step": 15200, "token_acc": 0.886205136928049 }, { "epoch": 0.8202125937516862, "grad_norm": 0.3133716583251953, "learning_rate": 1.6479327479651718e-06, "loss": 0.3099448084831238, "step": 15201, "token_acc": 0.8919317463661259 }, { "epoch": 0.8202665515566827, "grad_norm": 0.48294779658317566, "learning_rate": 1.646971827918542e-06, "loss": 0.3071194589138031, "step": 15202, "token_acc": 0.8896091868864604 }, { "epoch": 0.8203205093616792, "grad_norm": 0.4223519563674927, "learning_rate": 1.646011162971205e-06, "loss": 0.2983606457710266, "step": 15203, "token_acc": 0.892896174863388 }, { "epoch": 0.8203744671666756, "grad_norm": 0.3727792203426361, "learning_rate": 1.6450507531524984e-06, "loss": 0.28547850251197815, "step": 15204, "token_acc": 0.8950143266475644 }, { "epoch": 0.8204284249716721, "grad_norm": 0.3918156325817108, "learning_rate": 1.644090598491752e-06, "loss": 0.2915797233581543, "step": 15205, "token_acc": 0.8928098808998677 }, { "epoch": 0.8204823827766686, "grad_norm": 0.4696793854236603, "learning_rate": 1.6431306990182905e-06, "loss": 0.3779603838920593, "step": 15206, "token_acc": 0.874710259851165 }, { "epoch": 0.8205363405816651, "grad_norm": 0.3922264575958252, "learning_rate": 1.6421710547614277e-06, "loss": 0.30222123861312866, "step": 15207, "token_acc": 0.8928353472476505 }, { "epoch": 0.8205902983866616, "grad_norm": 0.3951173722743988, "learning_rate": 1.6412116657504707e-06, "loss": 0.3266294002532959, "step": 15208, "token_acc": 0.8809268397339627 }, { "epoch": 0.8206442561916582, "grad_norm": 0.5093547105789185, "learning_rate": 1.64025253201472e-06, "loss": 0.36632075905799866, "step": 15209, "token_acc": 0.8771623551817171 }, { "epoch": 0.8206982139966547, "grad_norm": 0.5104402303695679, "learning_rate": 1.6392936535834681e-06, "loss": 0.31377649307250977, "step": 15210, "token_acc": 0.8867081538216053 }, { "epoch": 0.820752171801651, "grad_norm": 0.33274614810943604, "learning_rate": 1.638335030485998e-06, "loss": 0.30827808380126953, "step": 15211, "token_acc": 0.8880242109814094 }, { "epoch": 0.8208061296066476, "grad_norm": 0.5921668410301208, "learning_rate": 1.6373766627515853e-06, "loss": 0.33726173639297485, "step": 15212, "token_acc": 0.8836805555555556 }, { "epoch": 0.8208600874116441, "grad_norm": 0.3681165277957916, "learning_rate": 1.6364185504094988e-06, "loss": 0.34310275316238403, "step": 15213, "token_acc": 0.8778084825183228 }, { "epoch": 0.8209140452166406, "grad_norm": 0.4438087046146393, "learning_rate": 1.635460693488996e-06, "loss": 0.335237979888916, "step": 15214, "token_acc": 0.8850040096230954 }, { "epoch": 0.8209680030216371, "grad_norm": 0.4155295193195343, "learning_rate": 1.634503092019335e-06, "loss": 0.35524988174438477, "step": 15215, "token_acc": 0.8741544532130778 }, { "epoch": 0.8210219608266336, "grad_norm": 0.5018314719200134, "learning_rate": 1.6335457460297578e-06, "loss": 0.3760565519332886, "step": 15216, "token_acc": 0.8642101236771643 }, { "epoch": 0.8210759186316301, "grad_norm": 0.4268190562725067, "learning_rate": 1.632588655549503e-06, "loss": 0.28166377544403076, "step": 15217, "token_acc": 0.8973724033567203 }, { "epoch": 0.8211298764366266, "grad_norm": 0.45772144198417664, "learning_rate": 1.6316318206077974e-06, "loss": 0.3167582154273987, "step": 15218, "token_acc": 0.8827864537638183 }, { "epoch": 0.821183834241623, "grad_norm": 0.4605303704738617, "learning_rate": 1.6306752412338645e-06, "loss": 0.28259918093681335, "step": 15219, "token_acc": 0.895764802631579 }, { "epoch": 0.8212377920466195, "grad_norm": 0.48942238092422485, "learning_rate": 1.6297189174569173e-06, "loss": 0.34564751386642456, "step": 15220, "token_acc": 0.8778614680254024 }, { "epoch": 0.821291749851616, "grad_norm": 0.49080002307891846, "learning_rate": 1.6287628493061603e-06, "loss": 0.3889245390892029, "step": 15221, "token_acc": 0.8655410590943975 }, { "epoch": 0.8213457076566125, "grad_norm": 0.497127890586853, "learning_rate": 1.6278070368107946e-06, "loss": 0.34976571798324585, "step": 15222, "token_acc": 0.8768813559322034 }, { "epoch": 0.821399665461609, "grad_norm": 0.4124804139137268, "learning_rate": 1.6268514800000078e-06, "loss": 0.2995378077030182, "step": 15223, "token_acc": 0.8967478669469519 }, { "epoch": 0.8214536232666055, "grad_norm": 0.33199822902679443, "learning_rate": 1.6258961789029836e-06, "loss": 0.32798516750335693, "step": 15224, "token_acc": 0.8858927483241925 }, { "epoch": 0.821507581071602, "grad_norm": 0.5395990014076233, "learning_rate": 1.6249411335488963e-06, "loss": 0.34357863664627075, "step": 15225, "token_acc": 0.8764206819273251 }, { "epoch": 0.8215615388765986, "grad_norm": 0.45381680130958557, "learning_rate": 1.6239863439669135e-06, "loss": 0.3292614221572876, "step": 15226, "token_acc": 0.880087023291528 }, { "epoch": 0.821615496681595, "grad_norm": 0.465954452753067, "learning_rate": 1.623031810186191e-06, "loss": 0.37276512384414673, "step": 15227, "token_acc": 0.8709005726184279 }, { "epoch": 0.8216694544865915, "grad_norm": 0.41848519444465637, "learning_rate": 1.622077532235884e-06, "loss": 0.30865785479545593, "step": 15228, "token_acc": 0.8870252287219296 }, { "epoch": 0.821723412291588, "grad_norm": 0.3314739763736725, "learning_rate": 1.621123510145135e-06, "loss": 0.2893245220184326, "step": 15229, "token_acc": 0.8888475836431227 }, { "epoch": 0.8217773700965845, "grad_norm": 0.4644095003604889, "learning_rate": 1.6201697439430787e-06, "loss": 0.3345986306667328, "step": 15230, "token_acc": 0.882825278810409 }, { "epoch": 0.821831327901581, "grad_norm": 0.43523064255714417, "learning_rate": 1.6192162336588434e-06, "loss": 0.3179287910461426, "step": 15231, "token_acc": 0.8791605181177242 }, { "epoch": 0.8218852857065775, "grad_norm": 0.4287846088409424, "learning_rate": 1.61826297932155e-06, "loss": 0.3310773968696594, "step": 15232, "token_acc": 0.88119911176906 }, { "epoch": 0.821939243511574, "grad_norm": 0.27924177050590515, "learning_rate": 1.6173099809603077e-06, "loss": 0.34985995292663574, "step": 15233, "token_acc": 0.8755989852485202 }, { "epoch": 0.8219932013165704, "grad_norm": 0.37541598081588745, "learning_rate": 1.6163572386042237e-06, "loss": 0.34427833557128906, "step": 15234, "token_acc": 0.8776660572821451 }, { "epoch": 0.8220471591215669, "grad_norm": 0.45960360765457153, "learning_rate": 1.6154047522823935e-06, "loss": 0.35372138023376465, "step": 15235, "token_acc": 0.8808290155440415 }, { "epoch": 0.8221011169265634, "grad_norm": 0.3783097267150879, "learning_rate": 1.6144525220239049e-06, "loss": 0.30990615487098694, "step": 15236, "token_acc": 0.8877098723975823 }, { "epoch": 0.8221550747315599, "grad_norm": 0.3845241069793701, "learning_rate": 1.6135005478578403e-06, "loss": 0.3400711715221405, "step": 15237, "token_acc": 0.8822238478419897 }, { "epoch": 0.8222090325365564, "grad_norm": 0.43211808800697327, "learning_rate": 1.612548829813272e-06, "loss": 0.31413620710372925, "step": 15238, "token_acc": 0.8847744023009168 }, { "epoch": 0.8222629903415529, "grad_norm": 0.40188372135162354, "learning_rate": 1.6115973679192641e-06, "loss": 0.35821232199668884, "step": 15239, "token_acc": 0.874874731567645 }, { "epoch": 0.8223169481465494, "grad_norm": 0.4452056884765625, "learning_rate": 1.6106461622048742e-06, "loss": 0.32841965556144714, "step": 15240, "token_acc": 0.8810065396644868 }, { "epoch": 0.8223709059515459, "grad_norm": 0.5182388424873352, "learning_rate": 1.609695212699154e-06, "loss": 0.33541232347488403, "step": 15241, "token_acc": 0.8821508331901735 }, { "epoch": 0.8224248637565423, "grad_norm": 0.3410206139087677, "learning_rate": 1.6087445194311446e-06, "loss": 0.31256332993507385, "step": 15242, "token_acc": 0.885024154589372 }, { "epoch": 0.8224788215615388, "grad_norm": 0.4009990692138672, "learning_rate": 1.6077940824298788e-06, "loss": 0.3087789714336395, "step": 15243, "token_acc": 0.8893050891242573 }, { "epoch": 0.8225327793665353, "grad_norm": 0.3945476710796356, "learning_rate": 1.6068439017243842e-06, "loss": 0.2941419780254364, "step": 15244, "token_acc": 0.8942696629213484 }, { "epoch": 0.8225867371715319, "grad_norm": 0.4864152669906616, "learning_rate": 1.6058939773436777e-06, "loss": 0.3851694166660309, "step": 15245, "token_acc": 0.8670824670824671 }, { "epoch": 0.8226406949765284, "grad_norm": 0.4108901917934418, "learning_rate": 1.6049443093167704e-06, "loss": 0.33852142095565796, "step": 15246, "token_acc": 0.8803970816887932 }, { "epoch": 0.8226946527815249, "grad_norm": 0.40535834431648254, "learning_rate": 1.6039948976726615e-06, "loss": 0.3496491611003876, "step": 15247, "token_acc": 0.8764104606398513 }, { "epoch": 0.8227486105865214, "grad_norm": 0.4291006624698639, "learning_rate": 1.6030457424403524e-06, "loss": 0.32550251483917236, "step": 15248, "token_acc": 0.8823008849557522 }, { "epoch": 0.8228025683915179, "grad_norm": 0.35926833748817444, "learning_rate": 1.602096843648826e-06, "loss": 0.33242088556289673, "step": 15249, "token_acc": 0.8815238095238095 }, { "epoch": 0.8228565261965143, "grad_norm": 0.4024658799171448, "learning_rate": 1.601148201327063e-06, "loss": 0.38044318556785583, "step": 15250, "token_acc": 0.873255482768442 }, { "epoch": 0.8229104840015108, "grad_norm": 0.45644280314445496, "learning_rate": 1.600199815504032e-06, "loss": 0.3630900979042053, "step": 15251, "token_acc": 0.8708870755461964 }, { "epoch": 0.8229644418065073, "grad_norm": 0.5195263028144836, "learning_rate": 1.5992516862086993e-06, "loss": 0.364568293094635, "step": 15252, "token_acc": 0.8707520891364903 }, { "epoch": 0.8230183996115038, "grad_norm": 0.4595589339733124, "learning_rate": 1.5983038134700202e-06, "loss": 0.3184053599834442, "step": 15253, "token_acc": 0.8821386756862235 }, { "epoch": 0.8230723574165003, "grad_norm": 0.4073545038700104, "learning_rate": 1.5973561973169404e-06, "loss": 0.2958289682865143, "step": 15254, "token_acc": 0.8886968085106383 }, { "epoch": 0.8231263152214968, "grad_norm": 0.3813410997390747, "learning_rate": 1.5964088377784015e-06, "loss": 0.35037142038345337, "step": 15255, "token_acc": 0.8762115286515899 }, { "epoch": 0.8231802730264933, "grad_norm": 0.4045281708240509, "learning_rate": 1.5954617348833346e-06, "loss": 0.28209757804870605, "step": 15256, "token_acc": 0.892871419053964 }, { "epoch": 0.8232342308314897, "grad_norm": 0.423374742269516, "learning_rate": 1.5945148886606653e-06, "loss": 0.31574371457099915, "step": 15257, "token_acc": 0.8834716459197787 }, { "epoch": 0.8232881886364862, "grad_norm": 0.38443779945373535, "learning_rate": 1.5935682991393098e-06, "loss": 0.3456834554672241, "step": 15258, "token_acc": 0.8753105590062111 }, { "epoch": 0.8233421464414827, "grad_norm": 0.2748902440071106, "learning_rate": 1.5926219663481734e-06, "loss": 0.2716016173362732, "step": 15259, "token_acc": 0.8976719715901618 }, { "epoch": 0.8233961042464792, "grad_norm": 0.4322762191295624, "learning_rate": 1.5916758903161622e-06, "loss": 0.3644671142101288, "step": 15260, "token_acc": 0.8688029020556227 }, { "epoch": 0.8234500620514758, "grad_norm": 0.373745858669281, "learning_rate": 1.5907300710721664e-06, "loss": 0.3075595498085022, "step": 15261, "token_acc": 0.8912150523386776 }, { "epoch": 0.8235040198564723, "grad_norm": 0.34635594487190247, "learning_rate": 1.5897845086450714e-06, "loss": 0.2988561689853668, "step": 15262, "token_acc": 0.8924313834211256 }, { "epoch": 0.8235579776614688, "grad_norm": 0.2761770486831665, "learning_rate": 1.5888392030637544e-06, "loss": 0.305510938167572, "step": 15263, "token_acc": 0.891695847923962 }, { "epoch": 0.8236119354664653, "grad_norm": 0.529651403427124, "learning_rate": 1.5878941543570848e-06, "loss": 0.3408593535423279, "step": 15264, "token_acc": 0.8775210364632029 }, { "epoch": 0.8236658932714617, "grad_norm": 0.35750988125801086, "learning_rate": 1.5869493625539235e-06, "loss": 0.3108227849006653, "step": 15265, "token_acc": 0.8901748337847821 }, { "epoch": 0.8237198510764582, "grad_norm": 0.43016645312309265, "learning_rate": 1.586004827683123e-06, "loss": 0.35615408420562744, "step": 15266, "token_acc": 0.8724524227291133 }, { "epoch": 0.8237738088814547, "grad_norm": 0.41936999559402466, "learning_rate": 1.5850605497735327e-06, "loss": 0.28944215178489685, "step": 15267, "token_acc": 0.894874022589053 }, { "epoch": 0.8238277666864512, "grad_norm": 0.40136393904685974, "learning_rate": 1.5841165288539918e-06, "loss": 0.3585107922554016, "step": 15268, "token_acc": 0.8742785836842927 }, { "epoch": 0.8238817244914477, "grad_norm": 0.458412766456604, "learning_rate": 1.5831727649533246e-06, "loss": 0.3248051702976227, "step": 15269, "token_acc": 0.8817234581807941 }, { "epoch": 0.8239356822964442, "grad_norm": 0.2927313446998596, "learning_rate": 1.5822292581003574e-06, "loss": 0.33692675828933716, "step": 15270, "token_acc": 0.883267653637235 }, { "epoch": 0.8239896401014407, "grad_norm": 0.45711013674736023, "learning_rate": 1.5812860083239023e-06, "loss": 0.3247460722923279, "step": 15271, "token_acc": 0.8866304036644718 }, { "epoch": 0.8240435979064372, "grad_norm": 0.4396096467971802, "learning_rate": 1.5803430156527645e-06, "loss": 0.38828691840171814, "step": 15272, "token_acc": 0.8695306284805091 }, { "epoch": 0.8240975557114336, "grad_norm": 0.3880338668823242, "learning_rate": 1.5794002801157481e-06, "loss": 0.25626879930496216, "step": 15273, "token_acc": 0.9017760617760617 }, { "epoch": 0.8241515135164301, "grad_norm": 0.40105703473091125, "learning_rate": 1.5784578017416419e-06, "loss": 0.35789549350738525, "step": 15274, "token_acc": 0.8743795792956748 }, { "epoch": 0.8242054713214266, "grad_norm": 0.36400991678237915, "learning_rate": 1.577515580559228e-06, "loss": 0.3266565203666687, "step": 15275, "token_acc": 0.882713581234173 }, { "epoch": 0.8242594291264231, "grad_norm": 0.5797821879386902, "learning_rate": 1.5765736165972811e-06, "loss": 0.37412774562835693, "step": 15276, "token_acc": 0.8674140508221225 }, { "epoch": 0.8243133869314196, "grad_norm": 0.43046844005584717, "learning_rate": 1.5756319098845697e-06, "loss": 0.33937200903892517, "step": 15277, "token_acc": 0.8819846611363281 }, { "epoch": 0.8243673447364162, "grad_norm": 0.39315134286880493, "learning_rate": 1.5746904604498535e-06, "loss": 0.3212484121322632, "step": 15278, "token_acc": 0.8831417624521073 }, { "epoch": 0.8244213025414127, "grad_norm": 0.38378140330314636, "learning_rate": 1.5737492683218802e-06, "loss": 0.3294610381126404, "step": 15279, "token_acc": 0.8813302217036173 }, { "epoch": 0.8244752603464091, "grad_norm": 0.54562908411026, "learning_rate": 1.5728083335293997e-06, "loss": 0.31889626383781433, "step": 15280, "token_acc": 0.8870805706248744 }, { "epoch": 0.8245292181514056, "grad_norm": 0.38333606719970703, "learning_rate": 1.5718676561011458e-06, "loss": 0.3305585980415344, "step": 15281, "token_acc": 0.8788112697800077 }, { "epoch": 0.8245831759564021, "grad_norm": 0.48197054862976074, "learning_rate": 1.5709272360658445e-06, "loss": 0.30912524461746216, "step": 15282, "token_acc": 0.8851063829787233 }, { "epoch": 0.8246371337613986, "grad_norm": 0.34798288345336914, "learning_rate": 1.5699870734522182e-06, "loss": 0.3300171494483948, "step": 15283, "token_acc": 0.883061049011178 }, { "epoch": 0.8246910915663951, "grad_norm": 0.4881526529788971, "learning_rate": 1.5690471682889775e-06, "loss": 0.35560518503189087, "step": 15284, "token_acc": 0.8722873900293255 }, { "epoch": 0.8247450493713916, "grad_norm": 0.46391287446022034, "learning_rate": 1.568107520604829e-06, "loss": 0.3631622791290283, "step": 15285, "token_acc": 0.8707974437343706 }, { "epoch": 0.8247990071763881, "grad_norm": 0.4493395984172821, "learning_rate": 1.567168130428467e-06, "loss": 0.33477920293807983, "step": 15286, "token_acc": 0.8855121402154688 }, { "epoch": 0.8248529649813846, "grad_norm": 0.36895596981048584, "learning_rate": 1.5662289977885813e-06, "loss": 0.3107058107852936, "step": 15287, "token_acc": 0.8871244144864617 }, { "epoch": 0.824906922786381, "grad_norm": 0.38392600417137146, "learning_rate": 1.565290122713853e-06, "loss": 0.33166739344596863, "step": 15288, "token_acc": 0.8814950980392157 }, { "epoch": 0.8249608805913775, "grad_norm": 0.4044758677482605, "learning_rate": 1.564351505232954e-06, "loss": 0.3343248963356018, "step": 15289, "token_acc": 0.8859126984126984 }, { "epoch": 0.825014838396374, "grad_norm": 0.47062918543815613, "learning_rate": 1.5634131453745504e-06, "loss": 0.3670775294303894, "step": 15290, "token_acc": 0.8696965536180146 }, { "epoch": 0.8250687962013705, "grad_norm": 0.41077080368995667, "learning_rate": 1.5624750431672997e-06, "loss": 0.3455169200897217, "step": 15291, "token_acc": 0.878599517950019 }, { "epoch": 0.825122754006367, "grad_norm": 0.5639786720275879, "learning_rate": 1.5615371986398487e-06, "loss": 0.37008213996887207, "step": 15292, "token_acc": 0.8716625532869643 }, { "epoch": 0.8251767118113635, "grad_norm": 0.3666249215602875, "learning_rate": 1.5605996118208422e-06, "loss": 0.29745811223983765, "step": 15293, "token_acc": 0.891473827496597 }, { "epoch": 0.82523066961636, "grad_norm": 0.3764856159687042, "learning_rate": 1.5596622827389141e-06, "loss": 0.32684317231178284, "step": 15294, "token_acc": 0.8818052360609048 }, { "epoch": 0.8252846274213566, "grad_norm": 0.41455623507499695, "learning_rate": 1.5587252114226882e-06, "loss": 0.3009111285209656, "step": 15295, "token_acc": 0.8922211808809747 }, { "epoch": 0.825338585226353, "grad_norm": 0.4280634820461273, "learning_rate": 1.5577883979007834e-06, "loss": 0.35439759492874146, "step": 15296, "token_acc": 0.8729908603844941 }, { "epoch": 0.8253925430313495, "grad_norm": 0.43447375297546387, "learning_rate": 1.5568518422018086e-06, "loss": 0.323575884103775, "step": 15297, "token_acc": 0.8875702685821362 }, { "epoch": 0.825446500836346, "grad_norm": 0.512380063533783, "learning_rate": 1.555915544354365e-06, "loss": 0.3733372092247009, "step": 15298, "token_acc": 0.8703062924516212 }, { "epoch": 0.8255004586413425, "grad_norm": 0.38865718245506287, "learning_rate": 1.5549795043870508e-06, "loss": 0.2513467073440552, "step": 15299, "token_acc": 0.9050317705650008 }, { "epoch": 0.825554416446339, "grad_norm": 0.39887288212776184, "learning_rate": 1.55404372232845e-06, "loss": 0.38118839263916016, "step": 15300, "token_acc": 0.8670564872021183 }, { "epoch": 0.8256083742513355, "grad_norm": 0.35206422209739685, "learning_rate": 1.5531081982071416e-06, "loss": 0.3276939392089844, "step": 15301, "token_acc": 0.8806573627233598 }, { "epoch": 0.825662332056332, "grad_norm": 0.5070869326591492, "learning_rate": 1.5521729320516965e-06, "loss": 0.3339800238609314, "step": 15302, "token_acc": 0.8843537414965986 }, { "epoch": 0.8257162898613284, "grad_norm": 0.3085331320762634, "learning_rate": 1.5512379238906794e-06, "loss": 0.2738139033317566, "step": 15303, "token_acc": 0.8998756218905473 }, { "epoch": 0.8257702476663249, "grad_norm": 0.36449068784713745, "learning_rate": 1.5503031737526375e-06, "loss": 0.32387563586235046, "step": 15304, "token_acc": 0.8843171387877828 }, { "epoch": 0.8258242054713214, "grad_norm": 0.3230573832988739, "learning_rate": 1.5493686816661257e-06, "loss": 0.2921571433544159, "step": 15305, "token_acc": 0.895455227238638 }, { "epoch": 0.8258781632763179, "grad_norm": 0.48132699728012085, "learning_rate": 1.5484344476596803e-06, "loss": 0.3248075544834137, "step": 15306, "token_acc": 0.88376821416469 }, { "epoch": 0.8259321210813144, "grad_norm": 0.43938931822776794, "learning_rate": 1.5475004717618336e-06, "loss": 0.30889007449150085, "step": 15307, "token_acc": 0.8877146631439894 }, { "epoch": 0.8259860788863109, "grad_norm": 0.4502955377101898, "learning_rate": 1.5465667540011075e-06, "loss": 0.32071906328201294, "step": 15308, "token_acc": 0.8863519313304721 }, { "epoch": 0.8260400366913074, "grad_norm": 0.4671328067779541, "learning_rate": 1.5456332944060182e-06, "loss": 0.3645642399787903, "step": 15309, "token_acc": 0.8696261027867246 }, { "epoch": 0.8260939944963039, "grad_norm": 0.5134448409080505, "learning_rate": 1.5447000930050738e-06, "loss": 0.2971799969673157, "step": 15310, "token_acc": 0.8941252420916721 }, { "epoch": 0.8261479523013003, "grad_norm": 0.46448102593421936, "learning_rate": 1.5437671498267714e-06, "loss": 0.2873581647872925, "step": 15311, "token_acc": 0.8957173447537473 }, { "epoch": 0.8262019101062968, "grad_norm": 0.520076334476471, "learning_rate": 1.5428344648996063e-06, "loss": 0.3053692877292633, "step": 15312, "token_acc": 0.8919472913616399 }, { "epoch": 0.8262558679112934, "grad_norm": 0.3852822482585907, "learning_rate": 1.5419020382520622e-06, "loss": 0.344593346118927, "step": 15313, "token_acc": 0.8851626016260162 }, { "epoch": 0.8263098257162899, "grad_norm": 0.43039917945861816, "learning_rate": 1.540969869912614e-06, "loss": 0.27020466327667236, "step": 15314, "token_acc": 0.9026099141706078 }, { "epoch": 0.8263637835212864, "grad_norm": 0.5004794597625732, "learning_rate": 1.5400379599097304e-06, "loss": 0.3461950123310089, "step": 15315, "token_acc": 0.8814661543009163 }, { "epoch": 0.8264177413262829, "grad_norm": 0.40530693531036377, "learning_rate": 1.5391063082718705e-06, "loss": 0.3085777163505554, "step": 15316, "token_acc": 0.8883522727272727 }, { "epoch": 0.8264716991312794, "grad_norm": 0.38595473766326904, "learning_rate": 1.5381749150274872e-06, "loss": 0.37514400482177734, "step": 15317, "token_acc": 0.8687089715536105 }, { "epoch": 0.8265256569362759, "grad_norm": 0.32653099298477173, "learning_rate": 1.5372437802050267e-06, "loss": 0.3391563892364502, "step": 15318, "token_acc": 0.8797951176983435 }, { "epoch": 0.8265796147412723, "grad_norm": 0.4691976010799408, "learning_rate": 1.5363129038329228e-06, "loss": 0.39930346608161926, "step": 15319, "token_acc": 0.8663120996011121 }, { "epoch": 0.8266335725462688, "grad_norm": 0.4379845857620239, "learning_rate": 1.5353822859396062e-06, "loss": 0.343420147895813, "step": 15320, "token_acc": 0.8783114632334736 }, { "epoch": 0.8266875303512653, "grad_norm": 0.3880748450756073, "learning_rate": 1.5344519265534975e-06, "loss": 0.28652963042259216, "step": 15321, "token_acc": 0.8956690189566902 }, { "epoch": 0.8267414881562618, "grad_norm": 0.4627867043018341, "learning_rate": 1.5335218257030093e-06, "loss": 0.3741224408149719, "step": 15322, "token_acc": 0.8743688743688743 }, { "epoch": 0.8267954459612583, "grad_norm": 0.48134076595306396, "learning_rate": 1.532591983416546e-06, "loss": 0.30645421147346497, "step": 15323, "token_acc": 0.8847615027437737 }, { "epoch": 0.8268494037662548, "grad_norm": 0.5174725651741028, "learning_rate": 1.5316623997225034e-06, "loss": 0.3321385979652405, "step": 15324, "token_acc": 0.884006334125099 }, { "epoch": 0.8269033615712513, "grad_norm": 0.3744303286075592, "learning_rate": 1.5307330746492744e-06, "loss": 0.3389831483364105, "step": 15325, "token_acc": 0.8753331359194552 }, { "epoch": 0.8269573193762477, "grad_norm": 0.43970754742622375, "learning_rate": 1.5298040082252397e-06, "loss": 0.3170124888420105, "step": 15326, "token_acc": 0.8842962962962962 }, { "epoch": 0.8270112771812442, "grad_norm": 0.3285658061504364, "learning_rate": 1.528875200478771e-06, "loss": 0.3756166398525238, "step": 15327, "token_acc": 0.8672338231502965 }, { "epoch": 0.8270652349862407, "grad_norm": 0.35148757696151733, "learning_rate": 1.5279466514382347e-06, "loss": 0.30166304111480713, "step": 15328, "token_acc": 0.8911098198721673 }, { "epoch": 0.8271191927912372, "grad_norm": 0.32489484548568726, "learning_rate": 1.5270183611319889e-06, "loss": 0.26878806948661804, "step": 15329, "token_acc": 0.9029422536930778 }, { "epoch": 0.8271731505962338, "grad_norm": 0.447932630777359, "learning_rate": 1.5260903295883788e-06, "loss": 0.32280510663986206, "step": 15330, "token_acc": 0.8835799859055673 }, { "epoch": 0.8272271084012303, "grad_norm": 0.3262759745121002, "learning_rate": 1.5251625568357542e-06, "loss": 0.2817685604095459, "step": 15331, "token_acc": 0.8972618239420683 }, { "epoch": 0.8272810662062268, "grad_norm": 0.49020397663116455, "learning_rate": 1.5242350429024444e-06, "loss": 0.3484916687011719, "step": 15332, "token_acc": 0.8812618665108807 }, { "epoch": 0.8273350240112233, "grad_norm": 0.37047266960144043, "learning_rate": 1.5233077878167757e-06, "loss": 0.320688396692276, "step": 15333, "token_acc": 0.8878650411114262 }, { "epoch": 0.8273889818162197, "grad_norm": 0.32990947365760803, "learning_rate": 1.5223807916070665e-06, "loss": 0.37525326013565063, "step": 15334, "token_acc": 0.8677033492822966 }, { "epoch": 0.8274429396212162, "grad_norm": 0.3814805746078491, "learning_rate": 1.5214540543016275e-06, "loss": 0.3768310248851776, "step": 15335, "token_acc": 0.8691507798960139 }, { "epoch": 0.8274968974262127, "grad_norm": 0.4333685636520386, "learning_rate": 1.5205275759287607e-06, "loss": 0.30164334177970886, "step": 15336, "token_acc": 0.8893673401788672 }, { "epoch": 0.8275508552312092, "grad_norm": 0.42798295617103577, "learning_rate": 1.5196013565167611e-06, "loss": 0.3941514790058136, "step": 15337, "token_acc": 0.8658365976496922 }, { "epoch": 0.8276048130362057, "grad_norm": 0.431643009185791, "learning_rate": 1.5186753960939127e-06, "loss": 0.3020138740539551, "step": 15338, "token_acc": 0.8905109489051095 }, { "epoch": 0.8276587708412022, "grad_norm": 0.43697622418403625, "learning_rate": 1.5177496946884973e-06, "loss": 0.36683154106140137, "step": 15339, "token_acc": 0.8743835219031042 }, { "epoch": 0.8277127286461987, "grad_norm": 0.4036672115325928, "learning_rate": 1.5168242523287836e-06, "loss": 0.33624544739723206, "step": 15340, "token_acc": 0.881104611617467 }, { "epoch": 0.8277666864511951, "grad_norm": 0.5343257784843445, "learning_rate": 1.5158990690430353e-06, "loss": 0.3715643584728241, "step": 15341, "token_acc": 0.8722213337597953 }, { "epoch": 0.8278206442561916, "grad_norm": 0.48273083567619324, "learning_rate": 1.5149741448595067e-06, "loss": 0.34716176986694336, "step": 15342, "token_acc": 0.8811357074109721 }, { "epoch": 0.8278746020611881, "grad_norm": 0.4481613039970398, "learning_rate": 1.5140494798064432e-06, "loss": 0.3232031464576721, "step": 15343, "token_acc": 0.8819810326659642 }, { "epoch": 0.8279285598661846, "grad_norm": 0.36061832308769226, "learning_rate": 1.5131250739120874e-06, "loss": 0.3056720197200775, "step": 15344, "token_acc": 0.8865690916917779 }, { "epoch": 0.8279825176711811, "grad_norm": 0.4083496630191803, "learning_rate": 1.512200927204668e-06, "loss": 0.3298061192035675, "step": 15345, "token_acc": 0.8832512937018806 }, { "epoch": 0.8280364754761776, "grad_norm": 0.4812358617782593, "learning_rate": 1.5112770397124088e-06, "loss": 0.31732258200645447, "step": 15346, "token_acc": 0.882344470046083 }, { "epoch": 0.8280904332811742, "grad_norm": 0.4148944318294525, "learning_rate": 1.5103534114635255e-06, "loss": 0.3358634412288666, "step": 15347, "token_acc": 0.8817790167257983 }, { "epoch": 0.8281443910861707, "grad_norm": 0.48562002182006836, "learning_rate": 1.5094300424862251e-06, "loss": 0.3325645625591278, "step": 15348, "token_acc": 0.8795362144255383 }, { "epoch": 0.8281983488911671, "grad_norm": 0.29382312297821045, "learning_rate": 1.5085069328087077e-06, "loss": 0.3435141444206238, "step": 15349, "token_acc": 0.8809714743956926 }, { "epoch": 0.8282523066961636, "grad_norm": 0.35190120339393616, "learning_rate": 1.5075840824591603e-06, "loss": 0.3264368772506714, "step": 15350, "token_acc": 0.8858988159311088 }, { "epoch": 0.8283062645011601, "grad_norm": 0.44009360671043396, "learning_rate": 1.5066614914657763e-06, "loss": 0.3188849985599518, "step": 15351, "token_acc": 0.8799937333542222 }, { "epoch": 0.8283602223061566, "grad_norm": 0.42334532737731934, "learning_rate": 1.5057391598567216e-06, "loss": 0.3262556195259094, "step": 15352, "token_acc": 0.8864381520119226 }, { "epoch": 0.8284141801111531, "grad_norm": 0.3860495686531067, "learning_rate": 1.5048170876601697e-06, "loss": 0.3072788417339325, "step": 15353, "token_acc": 0.8912763214439192 }, { "epoch": 0.8284681379161496, "grad_norm": 0.42772814631462097, "learning_rate": 1.5038952749042768e-06, "loss": 0.2900511622428894, "step": 15354, "token_acc": 0.8938660839706571 }, { "epoch": 0.8285220957211461, "grad_norm": 0.5089605450630188, "learning_rate": 1.5029737216171968e-06, "loss": 0.3650929033756256, "step": 15355, "token_acc": 0.8660381064623845 }, { "epoch": 0.8285760535261426, "grad_norm": 0.3506298065185547, "learning_rate": 1.502052427827072e-06, "loss": 0.2979377508163452, "step": 15356, "token_acc": 0.8923448373810539 }, { "epoch": 0.828630011331139, "grad_norm": 0.3852553963661194, "learning_rate": 1.501131393562042e-06, "loss": 0.30449801683425903, "step": 15357, "token_acc": 0.8892369020501139 }, { "epoch": 0.8286839691361355, "grad_norm": 0.37292319536209106, "learning_rate": 1.5002106188502318e-06, "loss": 0.3003730773925781, "step": 15358, "token_acc": 0.8980638543403 }, { "epoch": 0.828737926941132, "grad_norm": 0.42396992444992065, "learning_rate": 1.499290103719764e-06, "loss": 0.2775087356567383, "step": 15359, "token_acc": 0.8977014397575145 }, { "epoch": 0.8287918847461285, "grad_norm": 0.34290987253189087, "learning_rate": 1.4983698481987485e-06, "loss": 0.28245681524276733, "step": 15360, "token_acc": 0.8893280632411067 }, { "epoch": 0.828845842551125, "grad_norm": 0.3156200349330902, "learning_rate": 1.4974498523152913e-06, "loss": 0.3050474524497986, "step": 15361, "token_acc": 0.8916338151554971 }, { "epoch": 0.8288998003561215, "grad_norm": 0.45634207129478455, "learning_rate": 1.4965301160974854e-06, "loss": 0.316943496465683, "step": 15362, "token_acc": 0.8833742833742834 }, { "epoch": 0.828953758161118, "grad_norm": 0.3160744607448578, "learning_rate": 1.4956106395734249e-06, "loss": 0.30340102314949036, "step": 15363, "token_acc": 0.89124916499666 }, { "epoch": 0.8290077159661144, "grad_norm": 0.35854053497314453, "learning_rate": 1.4946914227711884e-06, "loss": 0.24743574857711792, "step": 15364, "token_acc": 0.9056323546212727 }, { "epoch": 0.829061673771111, "grad_norm": 0.35287413001060486, "learning_rate": 1.4937724657188467e-06, "loss": 0.2907882332801819, "step": 15365, "token_acc": 0.896491588018055 }, { "epoch": 0.8291156315761075, "grad_norm": 0.3419196307659149, "learning_rate": 1.4928537684444654e-06, "loss": 0.30062973499298096, "step": 15366, "token_acc": 0.8879164747428221 }, { "epoch": 0.829169589381104, "grad_norm": 0.3635404407978058, "learning_rate": 1.491935330976102e-06, "loss": 0.2730027437210083, "step": 15367, "token_acc": 0.9006315246810156 }, { "epoch": 0.8292235471861005, "grad_norm": 0.40208184719085693, "learning_rate": 1.491017153341805e-06, "loss": 0.3477795422077179, "step": 15368, "token_acc": 0.8782473827064754 }, { "epoch": 0.829277504991097, "grad_norm": 0.38042232394218445, "learning_rate": 1.4900992355696142e-06, "loss": 0.34705495834350586, "step": 15369, "token_acc": 0.8739082297945627 }, { "epoch": 0.8293314627960935, "grad_norm": 0.4840090870857239, "learning_rate": 1.489181577687564e-06, "loss": 0.3192800283432007, "step": 15370, "token_acc": 0.8899413622011727 }, { "epoch": 0.82938542060109, "grad_norm": 0.4605904221534729, "learning_rate": 1.4882641797236785e-06, "loss": 0.32424890995025635, "step": 15371, "token_acc": 0.8855283307810107 }, { "epoch": 0.8294393784060864, "grad_norm": 0.3924119174480438, "learning_rate": 1.4873470417059755e-06, "loss": 0.34972715377807617, "step": 15372, "token_acc": 0.8742560465999747 }, { "epoch": 0.8294933362110829, "grad_norm": 0.6152989864349365, "learning_rate": 1.4864301636624646e-06, "loss": 0.3494676351547241, "step": 15373, "token_acc": 0.8744588744588745 }, { "epoch": 0.8295472940160794, "grad_norm": 0.35393059253692627, "learning_rate": 1.4855135456211457e-06, "loss": 0.3008863925933838, "step": 15374, "token_acc": 0.8875652523089278 }, { "epoch": 0.8296012518210759, "grad_norm": 0.4201100766658783, "learning_rate": 1.4845971876100096e-06, "loss": 0.34323757886886597, "step": 15375, "token_acc": 0.8784766795036372 }, { "epoch": 0.8296552096260724, "grad_norm": 0.46810322999954224, "learning_rate": 1.4836810896570475e-06, "loss": 0.3045438528060913, "step": 15376, "token_acc": 0.8861316790074044 }, { "epoch": 0.8297091674310689, "grad_norm": 0.4289572238922119, "learning_rate": 1.4827652517902335e-06, "loss": 0.33989477157592773, "step": 15377, "token_acc": 0.8785330530249942 }, { "epoch": 0.8297631252360654, "grad_norm": 0.44429463148117065, "learning_rate": 1.4818496740375387e-06, "loss": 0.3077557682991028, "step": 15378, "token_acc": 0.8881337501699063 }, { "epoch": 0.8298170830410619, "grad_norm": 0.3336176574230194, "learning_rate": 1.4809343564269217e-06, "loss": 0.3100303113460541, "step": 15379, "token_acc": 0.8860898138006572 }, { "epoch": 0.8298710408460583, "grad_norm": 0.4295986294746399, "learning_rate": 1.4800192989863393e-06, "loss": 0.2687360942363739, "step": 15380, "token_acc": 0.8996234067207416 }, { "epoch": 0.8299249986510548, "grad_norm": 0.4625679552555084, "learning_rate": 1.4791045017437344e-06, "loss": 0.3638632297515869, "step": 15381, "token_acc": 0.8685691844482262 }, { "epoch": 0.8299789564560514, "grad_norm": 0.4656680226325989, "learning_rate": 1.4781899647270447e-06, "loss": 0.3071686625480652, "step": 15382, "token_acc": 0.8911845730027548 }, { "epoch": 0.8300329142610479, "grad_norm": 0.33243677020072937, "learning_rate": 1.4772756879642024e-06, "loss": 0.3236602544784546, "step": 15383, "token_acc": 0.8841139042453322 }, { "epoch": 0.8300868720660444, "grad_norm": 0.4764406383037567, "learning_rate": 1.4763616714831287e-06, "loss": 0.37006449699401855, "step": 15384, "token_acc": 0.8671164564388384 }, { "epoch": 0.8301408298710409, "grad_norm": 0.40672484040260315, "learning_rate": 1.4754479153117386e-06, "loss": 0.35321900248527527, "step": 15385, "token_acc": 0.8737880032867708 }, { "epoch": 0.8301947876760374, "grad_norm": 0.4719785153865814, "learning_rate": 1.474534419477933e-06, "loss": 0.3003695607185364, "step": 15386, "token_acc": 0.8908698001080497 }, { "epoch": 0.8302487454810338, "grad_norm": 0.4820893108844757, "learning_rate": 1.4736211840096138e-06, "loss": 0.3485947847366333, "step": 15387, "token_acc": 0.8772058823529412 }, { "epoch": 0.8303027032860303, "grad_norm": 0.41165629029273987, "learning_rate": 1.4727082089346678e-06, "loss": 0.339588463306427, "step": 15388, "token_acc": 0.8762618537779138 }, { "epoch": 0.8303566610910268, "grad_norm": 0.34855180978775024, "learning_rate": 1.471795494280981e-06, "loss": 0.2848043739795685, "step": 15389, "token_acc": 0.8945153597980081 }, { "epoch": 0.8304106188960233, "grad_norm": 0.38674622774124146, "learning_rate": 1.4708830400764262e-06, "loss": 0.29978325963020325, "step": 15390, "token_acc": 0.8881350034794712 }, { "epoch": 0.8304645767010198, "grad_norm": 0.33117440342903137, "learning_rate": 1.4699708463488694e-06, "loss": 0.27961254119873047, "step": 15391, "token_acc": 0.897407304153889 }, { "epoch": 0.8305185345060163, "grad_norm": 0.3767758905887604, "learning_rate": 1.4690589131261679e-06, "loss": 0.333016574382782, "step": 15392, "token_acc": 0.8856079085890358 }, { "epoch": 0.8305724923110128, "grad_norm": 0.4116731286048889, "learning_rate": 1.4681472404361718e-06, "loss": 0.29001426696777344, "step": 15393, "token_acc": 0.8945872801082544 }, { "epoch": 0.8306264501160093, "grad_norm": 0.4585071802139282, "learning_rate": 1.4672358283067245e-06, "loss": 0.37640705704689026, "step": 15394, "token_acc": 0.8703981968444778 }, { "epoch": 0.8306804079210057, "grad_norm": 0.37850767374038696, "learning_rate": 1.466324676765657e-06, "loss": 0.3389410376548767, "step": 15395, "token_acc": 0.8834681161341641 }, { "epoch": 0.8307343657260022, "grad_norm": 0.41456303000450134, "learning_rate": 1.4654137858408012e-06, "loss": 0.25741344690322876, "step": 15396, "token_acc": 0.9019197871127163 }, { "epoch": 0.8307883235309987, "grad_norm": 0.4591689705848694, "learning_rate": 1.464503155559972e-06, "loss": 0.3771243393421173, "step": 15397, "token_acc": 0.8739185931073606 }, { "epoch": 0.8308422813359952, "grad_norm": 0.41065025329589844, "learning_rate": 1.4635927859509813e-06, "loss": 0.38725870847702026, "step": 15398, "token_acc": 0.865847140566542 }, { "epoch": 0.8308962391409918, "grad_norm": 0.3273545801639557, "learning_rate": 1.4626826770416303e-06, "loss": 0.26294827461242676, "step": 15399, "token_acc": 0.9026613733274519 }, { "epoch": 0.8309501969459883, "grad_norm": 0.5537134408950806, "learning_rate": 1.4617728288597133e-06, "loss": 0.3274551033973694, "step": 15400, "token_acc": 0.8828748890860693 }, { "epoch": 0.8310041547509848, "grad_norm": 0.4231303632259369, "learning_rate": 1.460863241433018e-06, "loss": 0.2915574312210083, "step": 15401, "token_acc": 0.8936335403726708 }, { "epoch": 0.8310581125559813, "grad_norm": 0.4294753074645996, "learning_rate": 1.4599539147893226e-06, "loss": 0.34428510069847107, "step": 15402, "token_acc": 0.8799552429667519 }, { "epoch": 0.8311120703609777, "grad_norm": 0.5243898630142212, "learning_rate": 1.459044848956397e-06, "loss": 0.29870179295539856, "step": 15403, "token_acc": 0.8932296270107831 }, { "epoch": 0.8311660281659742, "grad_norm": 0.34282997250556946, "learning_rate": 1.4581360439620052e-06, "loss": 0.3064941167831421, "step": 15404, "token_acc": 0.8889442231075697 }, { "epoch": 0.8312199859709707, "grad_norm": 0.3922971785068512, "learning_rate": 1.4572274998338998e-06, "loss": 0.3086306154727936, "step": 15405, "token_acc": 0.8866111467008329 }, { "epoch": 0.8312739437759672, "grad_norm": 0.3512316644191742, "learning_rate": 1.4563192165998285e-06, "loss": 0.384196937084198, "step": 15406, "token_acc": 0.8659118659118659 }, { "epoch": 0.8313279015809637, "grad_norm": 0.3821714520454407, "learning_rate": 1.4554111942875281e-06, "loss": 0.33136653900146484, "step": 15407, "token_acc": 0.8822897070679925 }, { "epoch": 0.8313818593859602, "grad_norm": 0.367227703332901, "learning_rate": 1.4545034329247342e-06, "loss": 0.32469648122787476, "step": 15408, "token_acc": 0.8832555036691128 }, { "epoch": 0.8314358171909567, "grad_norm": 0.37672898173332214, "learning_rate": 1.453595932539167e-06, "loss": 0.31203240156173706, "step": 15409, "token_acc": 0.8875677302829621 }, { "epoch": 0.8314897749959531, "grad_norm": 0.5045934319496155, "learning_rate": 1.4526886931585415e-06, "loss": 0.3901708126068115, "step": 15410, "token_acc": 0.8691129184792888 }, { "epoch": 0.8315437328009496, "grad_norm": 0.30965280532836914, "learning_rate": 1.4517817148105628e-06, "loss": 0.348914235830307, "step": 15411, "token_acc": 0.8733179779367196 }, { "epoch": 0.8315976906059461, "grad_norm": 0.4741402864456177, "learning_rate": 1.4508749975229319e-06, "loss": 0.3339289426803589, "step": 15412, "token_acc": 0.8819454901370276 }, { "epoch": 0.8316516484109426, "grad_norm": 0.5016571283340454, "learning_rate": 1.44996854132334e-06, "loss": 0.32588714361190796, "step": 15413, "token_acc": 0.8815319701395651 }, { "epoch": 0.8317056062159391, "grad_norm": 0.37135642766952515, "learning_rate": 1.449062346239465e-06, "loss": 0.3303431570529938, "step": 15414, "token_acc": 0.8782582030052132 }, { "epoch": 0.8317595640209356, "grad_norm": 0.41028541326522827, "learning_rate": 1.4481564122989889e-06, "loss": 0.3149871230125427, "step": 15415, "token_acc": 0.8872435810351854 }, { "epoch": 0.8318135218259322, "grad_norm": 0.3964429795742035, "learning_rate": 1.4472507395295765e-06, "loss": 0.3567121624946594, "step": 15416, "token_acc": 0.8706896551724138 }, { "epoch": 0.8318674796309287, "grad_norm": 0.5399904847145081, "learning_rate": 1.4463453279588858e-06, "loss": 0.3786505460739136, "step": 15417, "token_acc": 0.8643835616438356 }, { "epoch": 0.8319214374359251, "grad_norm": 0.4584849774837494, "learning_rate": 1.4454401776145666e-06, "loss": 0.33970996737480164, "step": 15418, "token_acc": 0.8803761755485894 }, { "epoch": 0.8319753952409216, "grad_norm": 0.376168429851532, "learning_rate": 1.4445352885242647e-06, "loss": 0.28239887952804565, "step": 15419, "token_acc": 0.8952702702702703 }, { "epoch": 0.8320293530459181, "grad_norm": 0.3608142137527466, "learning_rate": 1.4436306607156135e-06, "loss": 0.30649858713150024, "step": 15420, "token_acc": 0.8900258630383447 }, { "epoch": 0.8320833108509146, "grad_norm": 0.3576514422893524, "learning_rate": 1.4427262942162402e-06, "loss": 0.24515552818775177, "step": 15421, "token_acc": 0.9080641965524073 }, { "epoch": 0.8321372686559111, "grad_norm": 0.3456185758113861, "learning_rate": 1.4418221890537642e-06, "loss": 0.27070313692092896, "step": 15422, "token_acc": 0.8970524802300504 }, { "epoch": 0.8321912264609076, "grad_norm": 0.32609978318214417, "learning_rate": 1.4409183452557974e-06, "loss": 0.36536699533462524, "step": 15423, "token_acc": 0.8741267312170609 }, { "epoch": 0.8322451842659041, "grad_norm": 0.4528340995311737, "learning_rate": 1.4400147628499416e-06, "loss": 0.36047685146331787, "step": 15424, "token_acc": 0.871952088977613 }, { "epoch": 0.8322991420709006, "grad_norm": 0.3985712230205536, "learning_rate": 1.439111441863793e-06, "loss": 0.34473422169685364, "step": 15425, "token_acc": 0.8780858978694623 }, { "epoch": 0.832353099875897, "grad_norm": 0.3767652213573456, "learning_rate": 1.438208382324937e-06, "loss": 0.321911096572876, "step": 15426, "token_acc": 0.8869823248613082 }, { "epoch": 0.8324070576808935, "grad_norm": 0.5488942861557007, "learning_rate": 1.437305584260953e-06, "loss": 0.3569641709327698, "step": 15427, "token_acc": 0.8733909702209414 }, { "epoch": 0.83246101548589, "grad_norm": 0.5305565595626831, "learning_rate": 1.436403047699415e-06, "loss": 0.38234472274780273, "step": 15428, "token_acc": 0.869534355479841 }, { "epoch": 0.8325149732908865, "grad_norm": 0.48174047470092773, "learning_rate": 1.435500772667885e-06, "loss": 0.3559199571609497, "step": 15429, "token_acc": 0.8732604945370903 }, { "epoch": 0.832568931095883, "grad_norm": 0.3530150055885315, "learning_rate": 1.434598759193918e-06, "loss": 0.3530951142311096, "step": 15430, "token_acc": 0.8739805314390949 }, { "epoch": 0.8326228889008795, "grad_norm": 0.3778645992279053, "learning_rate": 1.4336970073050604e-06, "loss": 0.3477073311805725, "step": 15431, "token_acc": 0.8775687409551375 }, { "epoch": 0.832676846705876, "grad_norm": 0.40429818630218506, "learning_rate": 1.4327955170288531e-06, "loss": 0.3209958076477051, "step": 15432, "token_acc": 0.8867403314917127 }, { "epoch": 0.8327308045108724, "grad_norm": 0.4506179094314575, "learning_rate": 1.4318942883928255e-06, "loss": 0.299694687128067, "step": 15433, "token_acc": 0.8889278820845763 }, { "epoch": 0.832784762315869, "grad_norm": 0.4032646119594574, "learning_rate": 1.430993321424502e-06, "loss": 0.3047538995742798, "step": 15434, "token_acc": 0.8887403942532576 }, { "epoch": 0.8328387201208655, "grad_norm": 0.4431539475917816, "learning_rate": 1.4300926161513984e-06, "loss": 0.3116539418697357, "step": 15435, "token_acc": 0.8857252054270973 }, { "epoch": 0.832892677925862, "grad_norm": 0.3588865101337433, "learning_rate": 1.4291921726010205e-06, "loss": 0.3075237274169922, "step": 15436, "token_acc": 0.8851374869474417 }, { "epoch": 0.8329466357308585, "grad_norm": 0.4504926800727844, "learning_rate": 1.4282919908008685e-06, "loss": 0.3488140106201172, "step": 15437, "token_acc": 0.8699913519746325 }, { "epoch": 0.833000593535855, "grad_norm": 0.35544219613075256, "learning_rate": 1.427392070778434e-06, "loss": 0.30743688344955444, "step": 15438, "token_acc": 0.890482532527405 }, { "epoch": 0.8330545513408515, "grad_norm": 0.4559914469718933, "learning_rate": 1.426492412561199e-06, "loss": 0.3577745258808136, "step": 15439, "token_acc": 0.8740751925109467 }, { "epoch": 0.833108509145848, "grad_norm": 0.42917075753211975, "learning_rate": 1.4255930161766384e-06, "loss": 0.3092133104801178, "step": 15440, "token_acc": 0.8859677949392047 }, { "epoch": 0.8331624669508444, "grad_norm": 0.37956228852272034, "learning_rate": 1.4246938816522226e-06, "loss": 0.2978828549385071, "step": 15441, "token_acc": 0.8906272635086194 }, { "epoch": 0.8332164247558409, "grad_norm": 0.443745493888855, "learning_rate": 1.4237950090154095e-06, "loss": 0.3330601453781128, "step": 15442, "token_acc": 0.878936877076412 }, { "epoch": 0.8332703825608374, "grad_norm": 0.449047327041626, "learning_rate": 1.4228963982936505e-06, "loss": 0.3573272228240967, "step": 15443, "token_acc": 0.8742865957118618 }, { "epoch": 0.8333243403658339, "grad_norm": 0.4237031638622284, "learning_rate": 1.4219980495143882e-06, "loss": 0.3372129201889038, "step": 15444, "token_acc": 0.8816749903956973 }, { "epoch": 0.8333782981708304, "grad_norm": 0.3963760733604431, "learning_rate": 1.421099962705058e-06, "loss": 0.3148840069770813, "step": 15445, "token_acc": 0.8875206296813508 }, { "epoch": 0.8334322559758269, "grad_norm": 0.3862374722957611, "learning_rate": 1.4202021378930863e-06, "loss": 0.28978848457336426, "step": 15446, "token_acc": 0.8914928203738824 }, { "epoch": 0.8334862137808234, "grad_norm": 0.4284055233001709, "learning_rate": 1.4193045751058953e-06, "loss": 0.31082049012184143, "step": 15447, "token_acc": 0.886404116366515 }, { "epoch": 0.8335401715858198, "grad_norm": 0.4002220034599304, "learning_rate": 1.4184072743708943e-06, "loss": 0.280717670917511, "step": 15448, "token_acc": 0.894282259457592 }, { "epoch": 0.8335941293908163, "grad_norm": 0.4805808961391449, "learning_rate": 1.417510235715488e-06, "loss": 0.3375900983810425, "step": 15449, "token_acc": 0.8797872340425532 }, { "epoch": 0.8336480871958128, "grad_norm": 0.33478352427482605, "learning_rate": 1.41661345916707e-06, "loss": 0.33741384744644165, "step": 15450, "token_acc": 0.8782749484839565 }, { "epoch": 0.8337020450008094, "grad_norm": 0.42352718114852905, "learning_rate": 1.4157169447530296e-06, "loss": 0.3504910171031952, "step": 15451, "token_acc": 0.8742550655542313 }, { "epoch": 0.8337560028058059, "grad_norm": 0.4808865785598755, "learning_rate": 1.4148206925007435e-06, "loss": 0.35216331481933594, "step": 15452, "token_acc": 0.8793926987376323 }, { "epoch": 0.8338099606108024, "grad_norm": 0.3407326340675354, "learning_rate": 1.4139247024375856e-06, "loss": 0.30096638202667236, "step": 15453, "token_acc": 0.8931358637814828 }, { "epoch": 0.8338639184157989, "grad_norm": 0.4433135688304901, "learning_rate": 1.4130289745909176e-06, "loss": 0.23156708478927612, "step": 15454, "token_acc": 0.9128819157720892 }, { "epoch": 0.8339178762207954, "grad_norm": 0.5183353424072266, "learning_rate": 1.412133508988095e-06, "loss": 0.34789466857910156, "step": 15455, "token_acc": 0.8749160510409671 }, { "epoch": 0.8339718340257918, "grad_norm": 0.3786374628543854, "learning_rate": 1.411238305656465e-06, "loss": 0.3522183895111084, "step": 15456, "token_acc": 0.8764932743862289 }, { "epoch": 0.8340257918307883, "grad_norm": 0.35394370555877686, "learning_rate": 1.4103433646233678e-06, "loss": 0.31466925144195557, "step": 15457, "token_acc": 0.8874697499327776 }, { "epoch": 0.8340797496357848, "grad_norm": 0.463776558637619, "learning_rate": 1.4094486859161338e-06, "loss": 0.31001490354537964, "step": 15458, "token_acc": 0.8942127019917324 }, { "epoch": 0.8341337074407813, "grad_norm": 0.44874197244644165, "learning_rate": 1.4085542695620847e-06, "loss": 0.3461196720600128, "step": 15459, "token_acc": 0.8743190661478599 }, { "epoch": 0.8341876652457778, "grad_norm": 0.4350855052471161, "learning_rate": 1.4076601155885394e-06, "loss": 0.30710116028785706, "step": 15460, "token_acc": 0.8823892385536222 }, { "epoch": 0.8342416230507743, "grad_norm": 0.4011426568031311, "learning_rate": 1.406766224022803e-06, "loss": 0.2846545875072479, "step": 15461, "token_acc": 0.8965562053281352 }, { "epoch": 0.8342955808557708, "grad_norm": 0.48677361011505127, "learning_rate": 1.4058725948921758e-06, "loss": 0.358298659324646, "step": 15462, "token_acc": 0.8681571592963998 }, { "epoch": 0.8343495386607673, "grad_norm": 0.397438108921051, "learning_rate": 1.4049792282239482e-06, "loss": 0.32883816957473755, "step": 15463, "token_acc": 0.8880925436942034 }, { "epoch": 0.8344034964657637, "grad_norm": 0.4495176374912262, "learning_rate": 1.404086124045403e-06, "loss": 0.27987754344940186, "step": 15464, "token_acc": 0.8958904109589041 }, { "epoch": 0.8344574542707602, "grad_norm": 0.44558215141296387, "learning_rate": 1.4031932823838134e-06, "loss": 0.36477163434028625, "step": 15465, "token_acc": 0.8741278258442646 }, { "epoch": 0.8345114120757567, "grad_norm": 0.47081467509269714, "learning_rate": 1.4023007032664516e-06, "loss": 0.33211174607276917, "step": 15466, "token_acc": 0.8833760527279385 }, { "epoch": 0.8345653698807532, "grad_norm": 0.48193907737731934, "learning_rate": 1.4014083867205751e-06, "loss": 0.3215133249759674, "step": 15467, "token_acc": 0.8844293137430568 }, { "epoch": 0.8346193276857498, "grad_norm": 0.37861961126327515, "learning_rate": 1.4005163327734327e-06, "loss": 0.3173553943634033, "step": 15468, "token_acc": 0.8836637244971784 }, { "epoch": 0.8346732854907463, "grad_norm": 0.38869720697402954, "learning_rate": 1.399624541452269e-06, "loss": 0.32571977376937866, "step": 15469, "token_acc": 0.8838509316770187 }, { "epoch": 0.8347272432957428, "grad_norm": 0.5021839141845703, "learning_rate": 1.398733012784318e-06, "loss": 0.3852137327194214, "step": 15470, "token_acc": 0.8627319846179569 }, { "epoch": 0.8347812011007392, "grad_norm": 0.37897995114326477, "learning_rate": 1.3978417467968075e-06, "loss": 0.3032470941543579, "step": 15471, "token_acc": 0.8881100726895119 }, { "epoch": 0.8348351589057357, "grad_norm": 0.34457629919052124, "learning_rate": 1.3969507435169549e-06, "loss": 0.30428382754325867, "step": 15472, "token_acc": 0.8893685051958433 }, { "epoch": 0.8348891167107322, "grad_norm": 0.5517059564590454, "learning_rate": 1.396060002971975e-06, "loss": 0.33220359683036804, "step": 15473, "token_acc": 0.8793589292004227 }, { "epoch": 0.8349430745157287, "grad_norm": 0.4950014650821686, "learning_rate": 1.3951695251890684e-06, "loss": 0.37081485986709595, "step": 15474, "token_acc": 0.8688325225851286 }, { "epoch": 0.8349970323207252, "grad_norm": 0.39505112171173096, "learning_rate": 1.394279310195431e-06, "loss": 0.3019316494464874, "step": 15475, "token_acc": 0.8915377328702242 }, { "epoch": 0.8350509901257217, "grad_norm": 0.38469821214675903, "learning_rate": 1.393389358018249e-06, "loss": 0.37890005111694336, "step": 15476, "token_acc": 0.8701704832038737 }, { "epoch": 0.8351049479307182, "grad_norm": 0.4031146764755249, "learning_rate": 1.3924996686847015e-06, "loss": 0.3041501045227051, "step": 15477, "token_acc": 0.8887784090909091 }, { "epoch": 0.8351589057357147, "grad_norm": 0.44550034403800964, "learning_rate": 1.3916102422219557e-06, "loss": 0.29280465841293335, "step": 15478, "token_acc": 0.8916521037773895 }, { "epoch": 0.8352128635407111, "grad_norm": 0.4713577628135681, "learning_rate": 1.3907210786571812e-06, "loss": 0.3701879680156708, "step": 15479, "token_acc": 0.8712284482758621 }, { "epoch": 0.8352668213457076, "grad_norm": 0.2977820038795471, "learning_rate": 1.3898321780175294e-06, "loss": 0.3013693392276764, "step": 15480, "token_acc": 0.8905702992659514 }, { "epoch": 0.8353207791507041, "grad_norm": 0.42925310134887695, "learning_rate": 1.3889435403301477e-06, "loss": 0.39967772364616394, "step": 15481, "token_acc": 0.8622742407426093 }, { "epoch": 0.8353747369557006, "grad_norm": 0.46503978967666626, "learning_rate": 1.388055165622174e-06, "loss": 0.32361823320388794, "step": 15482, "token_acc": 0.8844802914010668 }, { "epoch": 0.8354286947606971, "grad_norm": 0.5158418416976929, "learning_rate": 1.3871670539207405e-06, "loss": 0.3399890661239624, "step": 15483, "token_acc": 0.8795439524124257 }, { "epoch": 0.8354826525656937, "grad_norm": 0.5137079954147339, "learning_rate": 1.3862792052529683e-06, "loss": 0.35909923911094666, "step": 15484, "token_acc": 0.8715251690458302 }, { "epoch": 0.8355366103706902, "grad_norm": 0.41418710350990295, "learning_rate": 1.3853916196459726e-06, "loss": 0.3546680212020874, "step": 15485, "token_acc": 0.8690063547082612 }, { "epoch": 0.8355905681756867, "grad_norm": 0.4027576744556427, "learning_rate": 1.3845042971268596e-06, "loss": 0.33742737770080566, "step": 15486, "token_acc": 0.8821064394468501 }, { "epoch": 0.8356445259806831, "grad_norm": 0.46633613109588623, "learning_rate": 1.3836172377227297e-06, "loss": 0.282534658908844, "step": 15487, "token_acc": 0.8968994581577363 }, { "epoch": 0.8356984837856796, "grad_norm": 0.3137279450893402, "learning_rate": 1.3827304414606713e-06, "loss": 0.34972327947616577, "step": 15488, "token_acc": 0.8776737967914439 }, { "epoch": 0.8357524415906761, "grad_norm": 0.4052373766899109, "learning_rate": 1.3818439083677682e-06, "loss": 0.36659130454063416, "step": 15489, "token_acc": 0.8699064292182312 }, { "epoch": 0.8358063993956726, "grad_norm": 0.3972187042236328, "learning_rate": 1.380957638471093e-06, "loss": 0.36163902282714844, "step": 15490, "token_acc": 0.8738822078322541 }, { "epoch": 0.8358603572006691, "grad_norm": 0.5142831206321716, "learning_rate": 1.3800716317977136e-06, "loss": 0.37405872344970703, "step": 15491, "token_acc": 0.8737905520774046 }, { "epoch": 0.8359143150056656, "grad_norm": 0.3204132616519928, "learning_rate": 1.379185888374689e-06, "loss": 0.33369725942611694, "step": 15492, "token_acc": 0.8827603264902301 }, { "epoch": 0.8359682728106621, "grad_norm": 0.567013680934906, "learning_rate": 1.3783004082290697e-06, "loss": 0.30580100417137146, "step": 15493, "token_acc": 0.8906474820143885 }, { "epoch": 0.8360222306156585, "grad_norm": 0.3473554253578186, "learning_rate": 1.3774151913878976e-06, "loss": 0.2933994233608246, "step": 15494, "token_acc": 0.8927068723702665 }, { "epoch": 0.836076188420655, "grad_norm": 0.4062904119491577, "learning_rate": 1.3765302378782065e-06, "loss": 0.35519659519195557, "step": 15495, "token_acc": 0.8753315649867374 }, { "epoch": 0.8361301462256515, "grad_norm": 0.4620784521102905, "learning_rate": 1.3756455477270236e-06, "loss": 0.3143214285373688, "step": 15496, "token_acc": 0.885289697779262 }, { "epoch": 0.836184104030648, "grad_norm": 0.3651883006095886, "learning_rate": 1.3747611209613654e-06, "loss": 0.33059927821159363, "step": 15497, "token_acc": 0.8852344296710987 }, { "epoch": 0.8362380618356445, "grad_norm": 0.4882161021232605, "learning_rate": 1.373876957608241e-06, "loss": 0.3285594582557678, "step": 15498, "token_acc": 0.8847356452529846 }, { "epoch": 0.836292019640641, "grad_norm": 0.3337237536907196, "learning_rate": 1.3729930576946572e-06, "loss": 0.3425653576850891, "step": 15499, "token_acc": 0.8804872165943077 }, { "epoch": 0.8363459774456375, "grad_norm": 0.29003700613975525, "learning_rate": 1.372109421247605e-06, "loss": 0.305586576461792, "step": 15500, "token_acc": 0.8910916916655798 }, { "epoch": 0.836399935250634, "grad_norm": 0.37640294432640076, "learning_rate": 1.371226048294071e-06, "loss": 0.317443311214447, "step": 15501, "token_acc": 0.8846412556053812 }, { "epoch": 0.8364538930556304, "grad_norm": 0.4174579977989197, "learning_rate": 1.3703429388610356e-06, "loss": 0.3828151822090149, "step": 15502, "token_acc": 0.8706340378197998 }, { "epoch": 0.836507850860627, "grad_norm": 0.4295545518398285, "learning_rate": 1.369460092975463e-06, "loss": 0.2560965120792389, "step": 15503, "token_acc": 0.903569525839105 }, { "epoch": 0.8365618086656235, "grad_norm": 0.3917251229286194, "learning_rate": 1.3685775106643174e-06, "loss": 0.35301533341407776, "step": 15504, "token_acc": 0.8719768664563617 }, { "epoch": 0.83661576647062, "grad_norm": 0.49667197465896606, "learning_rate": 1.3676951919545556e-06, "loss": 0.29941433668136597, "step": 15505, "token_acc": 0.8923100030039051 }, { "epoch": 0.8366697242756165, "grad_norm": 0.4583528935909271, "learning_rate": 1.3668131368731207e-06, "loss": 0.30605679750442505, "step": 15506, "token_acc": 0.8951691493804816 }, { "epoch": 0.836723682080613, "grad_norm": 0.4028710126876831, "learning_rate": 1.3659313454469503e-06, "loss": 0.28930044174194336, "step": 15507, "token_acc": 0.893587123147675 }, { "epoch": 0.8367776398856095, "grad_norm": 0.4481460154056549, "learning_rate": 1.3650498177029758e-06, "loss": 0.27854692935943604, "step": 15508, "token_acc": 0.8990775908844275 }, { "epoch": 0.836831597690606, "grad_norm": 0.35408106446266174, "learning_rate": 1.3641685536681182e-06, "loss": 0.3238462209701538, "step": 15509, "token_acc": 0.8855421686746988 }, { "epoch": 0.8368855554956024, "grad_norm": 0.4282309412956238, "learning_rate": 1.363287553369289e-06, "loss": 0.35788360238075256, "step": 15510, "token_acc": 0.8742100145843461 }, { "epoch": 0.8369395133005989, "grad_norm": 0.397284597158432, "learning_rate": 1.362406816833395e-06, "loss": 0.3079816997051239, "step": 15511, "token_acc": 0.8895233911706567 }, { "epoch": 0.8369934711055954, "grad_norm": 0.4747506380081177, "learning_rate": 1.3615263440873349e-06, "loss": 0.3435949683189392, "step": 15512, "token_acc": 0.8811583301993231 }, { "epoch": 0.8370474289105919, "grad_norm": 0.38647013902664185, "learning_rate": 1.360646135157998e-06, "loss": 0.26911604404449463, "step": 15513, "token_acc": 0.9025133282559025 }, { "epoch": 0.8371013867155884, "grad_norm": 0.4991515874862671, "learning_rate": 1.359766190072266e-06, "loss": 0.32519376277923584, "step": 15514, "token_acc": 0.8822393822393823 }, { "epoch": 0.8371553445205849, "grad_norm": 0.3614369034767151, "learning_rate": 1.358886508857009e-06, "loss": 0.3163360357284546, "step": 15515, "token_acc": 0.8870836718115354 }, { "epoch": 0.8372093023255814, "grad_norm": 0.5273265242576599, "learning_rate": 1.3580070915390953e-06, "loss": 0.3057713806629181, "step": 15516, "token_acc": 0.8849176172370089 }, { "epoch": 0.8372632601305778, "grad_norm": 0.5231784582138062, "learning_rate": 1.3571279381453818e-06, "loss": 0.387969046831131, "step": 15517, "token_acc": 0.8616701712225894 }, { "epoch": 0.8373172179355743, "grad_norm": 0.4271685779094696, "learning_rate": 1.3562490487027158e-06, "loss": 0.32085120677948, "step": 15518, "token_acc": 0.8907763373565788 }, { "epoch": 0.8373711757405709, "grad_norm": 0.4077621102333069, "learning_rate": 1.3553704232379406e-06, "loss": 0.32655712962150574, "step": 15519, "token_acc": 0.8852142963072817 }, { "epoch": 0.8374251335455674, "grad_norm": 0.39566171169281006, "learning_rate": 1.3544920617778867e-06, "loss": 0.3045876622200012, "step": 15520, "token_acc": 0.8875376279349789 }, { "epoch": 0.8374790913505639, "grad_norm": 0.4104672968387604, "learning_rate": 1.3536139643493817e-06, "loss": 0.33330124616622925, "step": 15521, "token_acc": 0.879376875803916 }, { "epoch": 0.8375330491555604, "grad_norm": 0.43803104758262634, "learning_rate": 1.3527361309792408e-06, "loss": 0.3264388144016266, "step": 15522, "token_acc": 0.8862147753236862 }, { "epoch": 0.8375870069605569, "grad_norm": 0.5114563703536987, "learning_rate": 1.3518585616942704e-06, "loss": 0.3360174000263214, "step": 15523, "token_acc": 0.8786157707040021 }, { "epoch": 0.8376409647655534, "grad_norm": 0.5712090134620667, "learning_rate": 1.3509812565212777e-06, "loss": 0.3588721454143524, "step": 15524, "token_acc": 0.8763215061549602 }, { "epoch": 0.8376949225705498, "grad_norm": 0.43612271547317505, "learning_rate": 1.3501042154870502e-06, "loss": 0.3196537494659424, "step": 15525, "token_acc": 0.8872930866601753 }, { "epoch": 0.8377488803755463, "grad_norm": 0.4870341122150421, "learning_rate": 1.3492274386183746e-06, "loss": 0.3613702952861786, "step": 15526, "token_acc": 0.8717136150234742 }, { "epoch": 0.8378028381805428, "grad_norm": 0.4843992590904236, "learning_rate": 1.3483509259420269e-06, "loss": 0.33256256580352783, "step": 15527, "token_acc": 0.8814685314685314 }, { "epoch": 0.8378567959855393, "grad_norm": 0.40830469131469727, "learning_rate": 1.3474746774847747e-06, "loss": 0.3290152847766876, "step": 15528, "token_acc": 0.882710531020919 }, { "epoch": 0.8379107537905358, "grad_norm": 0.42604711651802063, "learning_rate": 1.3465986932733799e-06, "loss": 0.3239789307117462, "step": 15529, "token_acc": 0.8850795392210642 }, { "epoch": 0.8379647115955323, "grad_norm": 0.4189983308315277, "learning_rate": 1.3457229733345923e-06, "loss": 0.33570778369903564, "step": 15530, "token_acc": 0.8812537673297167 }, { "epoch": 0.8380186694005288, "grad_norm": 0.40914756059646606, "learning_rate": 1.3448475176951592e-06, "loss": 0.25432026386260986, "step": 15531, "token_acc": 0.9039877300613497 }, { "epoch": 0.8380726272055253, "grad_norm": 0.35403189063072205, "learning_rate": 1.3439723263818161e-06, "loss": 0.2698547840118408, "step": 15532, "token_acc": 0.9018608208004079 }, { "epoch": 0.8381265850105217, "grad_norm": 0.3959309756755829, "learning_rate": 1.3430973994212904e-06, "loss": 0.27511417865753174, "step": 15533, "token_acc": 0.8910103420843277 }, { "epoch": 0.8381805428155182, "grad_norm": 0.46502485871315, "learning_rate": 1.342222736840303e-06, "loss": 0.3438926637172699, "step": 15534, "token_acc": 0.8798598364758885 }, { "epoch": 0.8382345006205147, "grad_norm": 0.5589246153831482, "learning_rate": 1.3413483386655646e-06, "loss": 0.3763214945793152, "step": 15535, "token_acc": 0.8659868900218499 }, { "epoch": 0.8382884584255113, "grad_norm": 0.5098888278007507, "learning_rate": 1.3404742049237796e-06, "loss": 0.323486864566803, "step": 15536, "token_acc": 0.8781431334622823 }, { "epoch": 0.8383424162305078, "grad_norm": 0.4122265577316284, "learning_rate": 1.3396003356416443e-06, "loss": 0.2953486740589142, "step": 15537, "token_acc": 0.8936723163841808 }, { "epoch": 0.8383963740355043, "grad_norm": 0.40112432837486267, "learning_rate": 1.3387267308458462e-06, "loss": 0.3748043179512024, "step": 15538, "token_acc": 0.8699949740325013 }, { "epoch": 0.8384503318405008, "grad_norm": 0.3652191460132599, "learning_rate": 1.3378533905630641e-06, "loss": 0.34307861328125, "step": 15539, "token_acc": 0.881322718922229 }, { "epoch": 0.8385042896454972, "grad_norm": 0.4799826145172119, "learning_rate": 1.33698031481997e-06, "loss": 0.3677169680595398, "step": 15540, "token_acc": 0.8673536146548905 }, { "epoch": 0.8385582474504937, "grad_norm": 0.4444623589515686, "learning_rate": 1.3361075036432292e-06, "loss": 0.35625404119491577, "step": 15541, "token_acc": 0.8747534516765286 }, { "epoch": 0.8386122052554902, "grad_norm": 0.3356722593307495, "learning_rate": 1.3352349570594948e-06, "loss": 0.28404903411865234, "step": 15542, "token_acc": 0.8959232613908873 }, { "epoch": 0.8386661630604867, "grad_norm": 0.34253260493278503, "learning_rate": 1.334362675095412e-06, "loss": 0.28883665800094604, "step": 15543, "token_acc": 0.8951482814244943 }, { "epoch": 0.8387201208654832, "grad_norm": 0.4332761764526367, "learning_rate": 1.3334906577776263e-06, "loss": 0.3498937785625458, "step": 15544, "token_acc": 0.8805370985603543 }, { "epoch": 0.8387740786704797, "grad_norm": 0.4853440523147583, "learning_rate": 1.3326189051327642e-06, "loss": 0.3169337809085846, "step": 15545, "token_acc": 0.8828911253430924 }, { "epoch": 0.8388280364754762, "grad_norm": 0.3572973906993866, "learning_rate": 1.3317474171874512e-06, "loss": 0.31945914030075073, "step": 15546, "token_acc": 0.8854875283446711 }, { "epoch": 0.8388819942804727, "grad_norm": 0.4808759391307831, "learning_rate": 1.3308761939683014e-06, "loss": 0.3204321265220642, "step": 15547, "token_acc": 0.8858267716535433 }, { "epoch": 0.8389359520854691, "grad_norm": 0.40240776538848877, "learning_rate": 1.330005235501922e-06, "loss": 0.349359929561615, "step": 15548, "token_acc": 0.8742660199131989 }, { "epoch": 0.8389899098904656, "grad_norm": 0.4151713252067566, "learning_rate": 1.3291345418149081e-06, "loss": 0.3170737326145172, "step": 15549, "token_acc": 0.8859270874166993 }, { "epoch": 0.8390438676954621, "grad_norm": 0.38771358132362366, "learning_rate": 1.3282641129338592e-06, "loss": 0.3116275668144226, "step": 15550, "token_acc": 0.8866569408596117 }, { "epoch": 0.8390978255004586, "grad_norm": 0.3446125388145447, "learning_rate": 1.3273939488853504e-06, "loss": 0.32848185300827026, "step": 15551, "token_acc": 0.8817765334690761 }, { "epoch": 0.8391517833054551, "grad_norm": 0.4427220821380615, "learning_rate": 1.3265240496959574e-06, "loss": 0.32112830877304077, "step": 15552, "token_acc": 0.8839471855455178 }, { "epoch": 0.8392057411104517, "grad_norm": 0.36546990275382996, "learning_rate": 1.325654415392249e-06, "loss": 0.28970757126808167, "step": 15553, "token_acc": 0.8929111735465745 }, { "epoch": 0.8392596989154482, "grad_norm": 0.3864865303039551, "learning_rate": 1.3247850460007816e-06, "loss": 0.2485308200120926, "step": 15554, "token_acc": 0.9045689019896831 }, { "epoch": 0.8393136567204447, "grad_norm": 0.325977087020874, "learning_rate": 1.3239159415481072e-06, "loss": 0.31852301955223083, "step": 15555, "token_acc": 0.8826312752452394 }, { "epoch": 0.8393676145254411, "grad_norm": 0.5514612793922424, "learning_rate": 1.3230471020607639e-06, "loss": 0.33058080077171326, "step": 15556, "token_acc": 0.8815594670176016 }, { "epoch": 0.8394215723304376, "grad_norm": 0.53351891040802, "learning_rate": 1.3221785275652922e-06, "loss": 0.3460591435432434, "step": 15557, "token_acc": 0.8771515824541921 }, { "epoch": 0.8394755301354341, "grad_norm": 0.584334135055542, "learning_rate": 1.3213102180882142e-06, "loss": 0.3341935873031616, "step": 15558, "token_acc": 0.8719449225473321 }, { "epoch": 0.8395294879404306, "grad_norm": 0.5288856625556946, "learning_rate": 1.3204421736560491e-06, "loss": 0.33353185653686523, "step": 15559, "token_acc": 0.8790397045244691 }, { "epoch": 0.8395834457454271, "grad_norm": 0.509726345539093, "learning_rate": 1.3195743942953054e-06, "loss": 0.346010684967041, "step": 15560, "token_acc": 0.8801324503311259 }, { "epoch": 0.8396374035504236, "grad_norm": 0.5226176977157593, "learning_rate": 1.3187068800324864e-06, "loss": 0.32069313526153564, "step": 15561, "token_acc": 0.8820186014467792 }, { "epoch": 0.8396913613554201, "grad_norm": 0.4238581359386444, "learning_rate": 1.3178396308940834e-06, "loss": 0.3461744487285614, "step": 15562, "token_acc": 0.8793544048419637 }, { "epoch": 0.8397453191604165, "grad_norm": 0.3197568655014038, "learning_rate": 1.3169726469065859e-06, "loss": 0.2769184708595276, "step": 15563, "token_acc": 0.8936936936936937 }, { "epoch": 0.839799276965413, "grad_norm": 0.40678834915161133, "learning_rate": 1.3161059280964683e-06, "loss": 0.3577202260494232, "step": 15564, "token_acc": 0.8685110351258937 }, { "epoch": 0.8398532347704095, "grad_norm": 0.3824602961540222, "learning_rate": 1.3152394744902008e-06, "loss": 0.2930135130882263, "step": 15565, "token_acc": 0.8935597780393476 }, { "epoch": 0.839907192575406, "grad_norm": 0.36671918630599976, "learning_rate": 1.3143732861142456e-06, "loss": 0.3036137819290161, "step": 15566, "token_acc": 0.8861412324781803 }, { "epoch": 0.8399611503804025, "grad_norm": 0.3823612332344055, "learning_rate": 1.3135073629950535e-06, "loss": 0.29607337713241577, "step": 15567, "token_acc": 0.8952304394426581 }, { "epoch": 0.840015108185399, "grad_norm": 0.3647333085536957, "learning_rate": 1.3126417051590723e-06, "loss": 0.29249274730682373, "step": 15568, "token_acc": 0.8908208020050126 }, { "epoch": 0.8400690659903955, "grad_norm": 0.4854024648666382, "learning_rate": 1.3117763126327365e-06, "loss": 0.3365001380443573, "step": 15569, "token_acc": 0.8789349259674007 }, { "epoch": 0.840123023795392, "grad_norm": 0.4668424129486084, "learning_rate": 1.3109111854424771e-06, "loss": 0.37020114064216614, "step": 15570, "token_acc": 0.8708942699592424 }, { "epoch": 0.8401769816003885, "grad_norm": 0.38723573088645935, "learning_rate": 1.3100463236147132e-06, "loss": 0.368720144033432, "step": 15571, "token_acc": 0.8718499427262314 }, { "epoch": 0.840230939405385, "grad_norm": 0.4252732992172241, "learning_rate": 1.3091817271758589e-06, "loss": 0.3417842984199524, "step": 15572, "token_acc": 0.8798026083891435 }, { "epoch": 0.8402848972103815, "grad_norm": 0.3064972460269928, "learning_rate": 1.3083173961523165e-06, "loss": 0.31834936141967773, "step": 15573, "token_acc": 0.8845273931366646 }, { "epoch": 0.840338855015378, "grad_norm": 0.4510189890861511, "learning_rate": 1.307453330570485e-06, "loss": 0.41173839569091797, "step": 15574, "token_acc": 0.8568689220654705 }, { "epoch": 0.8403928128203745, "grad_norm": 0.31162527203559875, "learning_rate": 1.30658953045675e-06, "loss": 0.2888966500759125, "step": 15575, "token_acc": 0.8960816093049861 }, { "epoch": 0.840446770625371, "grad_norm": 0.3419360816478729, "learning_rate": 1.3057259958374945e-06, "loss": 0.2993631362915039, "step": 15576, "token_acc": 0.8921020656136087 }, { "epoch": 0.8405007284303675, "grad_norm": 0.4077843725681305, "learning_rate": 1.3048627267390901e-06, "loss": 0.3235793113708496, "step": 15577, "token_acc": 0.8842975206611571 }, { "epoch": 0.8405546862353639, "grad_norm": 0.5199643969535828, "learning_rate": 1.3039997231878997e-06, "loss": 0.31766295433044434, "step": 15578, "token_acc": 0.8886992794842624 }, { "epoch": 0.8406086440403604, "grad_norm": 0.4720143675804138, "learning_rate": 1.3031369852102805e-06, "loss": 0.3215601146221161, "step": 15579, "token_acc": 0.8841463414634146 }, { "epoch": 0.8406626018453569, "grad_norm": 0.4896141588687897, "learning_rate": 1.3022745128325786e-06, "loss": 0.2748352587223053, "step": 15580, "token_acc": 0.8959788559562016 }, { "epoch": 0.8407165596503534, "grad_norm": 0.33055174350738525, "learning_rate": 1.3014123060811323e-06, "loss": 0.28736016154289246, "step": 15581, "token_acc": 0.8936471193415638 }, { "epoch": 0.8407705174553499, "grad_norm": 0.43810391426086426, "learning_rate": 1.300550364982278e-06, "loss": 0.27310582995414734, "step": 15582, "token_acc": 0.8999191265669227 }, { "epoch": 0.8408244752603464, "grad_norm": 0.3903316259384155, "learning_rate": 1.2996886895623373e-06, "loss": 0.272609144449234, "step": 15583, "token_acc": 0.9017826534110387 }, { "epoch": 0.8408784330653429, "grad_norm": 0.4300435185432434, "learning_rate": 1.2988272798476264e-06, "loss": 0.3111330270767212, "step": 15584, "token_acc": 0.8853768091831642 }, { "epoch": 0.8409323908703394, "grad_norm": 0.4919532835483551, "learning_rate": 1.297966135864449e-06, "loss": 0.3519285321235657, "step": 15585, "token_acc": 0.8773199947347637 }, { "epoch": 0.8409863486753358, "grad_norm": 0.4126531779766083, "learning_rate": 1.2971052576391064e-06, "loss": 0.25554952025413513, "step": 15586, "token_acc": 0.900725467735777 }, { "epoch": 0.8410403064803323, "grad_norm": 0.4555693566799164, "learning_rate": 1.2962446451978883e-06, "loss": 0.2772330045700073, "step": 15587, "token_acc": 0.8968034727703236 }, { "epoch": 0.8410942642853289, "grad_norm": 0.3535146415233612, "learning_rate": 1.2953842985670773e-06, "loss": 0.3165220618247986, "step": 15588, "token_acc": 0.8878266411727215 }, { "epoch": 0.8411482220903254, "grad_norm": 0.5299661159515381, "learning_rate": 1.2945242177729522e-06, "loss": 0.3304973840713501, "step": 15589, "token_acc": 0.8797256607561057 }, { "epoch": 0.8412021798953219, "grad_norm": 0.4702545404434204, "learning_rate": 1.2936644028417755e-06, "loss": 0.3319327235221863, "step": 15590, "token_acc": 0.8802731411229135 }, { "epoch": 0.8412561377003184, "grad_norm": 0.447325199842453, "learning_rate": 1.2928048537998084e-06, "loss": 0.30919888615608215, "step": 15591, "token_acc": 0.8918766718288047 }, { "epoch": 0.8413100955053149, "grad_norm": 0.4096375107765198, "learning_rate": 1.2919455706732997e-06, "loss": 0.3359823226928711, "step": 15592, "token_acc": 0.8765880217785844 }, { "epoch": 0.8413640533103114, "grad_norm": 0.42453888058662415, "learning_rate": 1.291086553488492e-06, "loss": 0.3452960252761841, "step": 15593, "token_acc": 0.8778138308659501 }, { "epoch": 0.8414180111153078, "grad_norm": 0.40759095549583435, "learning_rate": 1.2902278022716174e-06, "loss": 0.31836098432540894, "step": 15594, "token_acc": 0.8883973011834974 }, { "epoch": 0.8414719689203043, "grad_norm": 0.3278256058692932, "learning_rate": 1.2893693170489063e-06, "loss": 0.28759679198265076, "step": 15595, "token_acc": 0.8937715411127524 }, { "epoch": 0.8415259267253008, "grad_norm": 0.5022730231285095, "learning_rate": 1.2885110978465753e-06, "loss": 0.3541359305381775, "step": 15596, "token_acc": 0.8747194719471947 }, { "epoch": 0.8415798845302973, "grad_norm": 0.39358508586883545, "learning_rate": 1.2876531446908325e-06, "loss": 0.25874221324920654, "step": 15597, "token_acc": 0.9060761660248181 }, { "epoch": 0.8416338423352938, "grad_norm": 0.4464957118034363, "learning_rate": 1.28679545760788e-06, "loss": 0.3119669556617737, "step": 15598, "token_acc": 0.8876316816230979 }, { "epoch": 0.8416878001402903, "grad_norm": 0.41236481070518494, "learning_rate": 1.2859380366239115e-06, "loss": 0.34970545768737793, "step": 15599, "token_acc": 0.8732345849121599 }, { "epoch": 0.8417417579452868, "grad_norm": 0.29861682653427124, "learning_rate": 1.2850808817651128e-06, "loss": 0.32856255769729614, "step": 15600, "token_acc": 0.8834355828220859 }, { "epoch": 0.8417957157502832, "grad_norm": 0.44496408104896545, "learning_rate": 1.2842239930576616e-06, "loss": 0.37792688608169556, "step": 15601, "token_acc": 0.8652931854199684 }, { "epoch": 0.8418496735552797, "grad_norm": 0.4317677915096283, "learning_rate": 1.2833673705277262e-06, "loss": 0.3057774305343628, "step": 15602, "token_acc": 0.8876367252237322 }, { "epoch": 0.8419036313602762, "grad_norm": 0.47990167140960693, "learning_rate": 1.2825110142014674e-06, "loss": 0.3655206561088562, "step": 15603, "token_acc": 0.8727595911716782 }, { "epoch": 0.8419575891652727, "grad_norm": 0.44598299264907837, "learning_rate": 1.2816549241050392e-06, "loss": 0.34092119336128235, "step": 15604, "token_acc": 0.8776554504574353 }, { "epoch": 0.8420115469702693, "grad_norm": 0.3536531329154968, "learning_rate": 1.2807991002645847e-06, "loss": 0.3153521716594696, "step": 15605, "token_acc": 0.8859691651313876 }, { "epoch": 0.8420655047752658, "grad_norm": 0.5781804323196411, "learning_rate": 1.2799435427062423e-06, "loss": 0.36124223470687866, "step": 15606, "token_acc": 0.872626582278481 }, { "epoch": 0.8421194625802623, "grad_norm": 0.35479021072387695, "learning_rate": 1.2790882514561376e-06, "loss": 0.3782375752925873, "step": 15607, "token_acc": 0.8740950226244344 }, { "epoch": 0.8421734203852588, "grad_norm": 0.36040377616882324, "learning_rate": 1.278233226540395e-06, "loss": 0.2958337962627411, "step": 15608, "token_acc": 0.8935816822873414 }, { "epoch": 0.8422273781902552, "grad_norm": 0.4014572203159332, "learning_rate": 1.2773784679851253e-06, "loss": 0.30659371614456177, "step": 15609, "token_acc": 0.8873125906819281 }, { "epoch": 0.8422813359952517, "grad_norm": 0.4754887819290161, "learning_rate": 1.2765239758164316e-06, "loss": 0.36390385031700134, "step": 15610, "token_acc": 0.8683713497021648 }, { "epoch": 0.8423352938002482, "grad_norm": 0.3611176311969757, "learning_rate": 1.275669750060411e-06, "loss": 0.29888916015625, "step": 15611, "token_acc": 0.8904334639201315 }, { "epoch": 0.8423892516052447, "grad_norm": 0.33261919021606445, "learning_rate": 1.2748157907431502e-06, "loss": 0.3255007863044739, "step": 15612, "token_acc": 0.8833568866181106 }, { "epoch": 0.8424432094102412, "grad_norm": 0.3835720717906952, "learning_rate": 1.2739620978907297e-06, "loss": 0.27875369787216187, "step": 15613, "token_acc": 0.899205298013245 }, { "epoch": 0.8424971672152377, "grad_norm": 0.5623670816421509, "learning_rate": 1.2731086715292196e-06, "loss": 0.34620529413223267, "step": 15614, "token_acc": 0.8803660565723793 }, { "epoch": 0.8425511250202342, "grad_norm": 0.48910170793533325, "learning_rate": 1.2722555116846857e-06, "loss": 0.3407549560070038, "step": 15615, "token_acc": 0.8814891723524176 }, { "epoch": 0.8426050828252307, "grad_norm": 0.3863896131515503, "learning_rate": 1.2714026183831818e-06, "loss": 0.33619141578674316, "step": 15616, "token_acc": 0.8853783982365907 }, { "epoch": 0.8426590406302271, "grad_norm": 0.3803393542766571, "learning_rate": 1.2705499916507559e-06, "loss": 0.30908048152923584, "step": 15617, "token_acc": 0.8904548366431775 }, { "epoch": 0.8427129984352236, "grad_norm": 0.5493834614753723, "learning_rate": 1.2696976315134469e-06, "loss": 0.35441339015960693, "step": 15618, "token_acc": 0.871418208197316 }, { "epoch": 0.8427669562402201, "grad_norm": 0.39196592569351196, "learning_rate": 1.2688455379972864e-06, "loss": 0.2911088466644287, "step": 15619, "token_acc": 0.8952858840617985 }, { "epoch": 0.8428209140452166, "grad_norm": 0.37843453884124756, "learning_rate": 1.2679937111282913e-06, "loss": 0.3363325893878937, "step": 15620, "token_acc": 0.8800522823245526 }, { "epoch": 0.8428748718502131, "grad_norm": 0.3940262496471405, "learning_rate": 1.2671421509324843e-06, "loss": 0.34803056716918945, "step": 15621, "token_acc": 0.8783830078794107 }, { "epoch": 0.8429288296552097, "grad_norm": 0.3643517792224884, "learning_rate": 1.266290857435868e-06, "loss": 0.30484557151794434, "step": 15622, "token_acc": 0.8911174785100286 }, { "epoch": 0.8429827874602062, "grad_norm": 0.4381439685821533, "learning_rate": 1.26543983066444e-06, "loss": 0.3292773962020874, "step": 15623, "token_acc": 0.881400498200941 }, { "epoch": 0.8430367452652026, "grad_norm": 0.5170519948005676, "learning_rate": 1.2645890706441922e-06, "loss": 0.33429619669914246, "step": 15624, "token_acc": 0.8826036419992251 }, { "epoch": 0.8430907030701991, "grad_norm": 0.48581159114837646, "learning_rate": 1.2637385774011058e-06, "loss": 0.3404451012611389, "step": 15625, "token_acc": 0.8790586833482276 }, { "epoch": 0.8431446608751956, "grad_norm": 0.4963992238044739, "learning_rate": 1.2628883509611523e-06, "loss": 0.34320616722106934, "step": 15626, "token_acc": 0.8723517892113227 }, { "epoch": 0.8431986186801921, "grad_norm": 0.4091140031814575, "learning_rate": 1.262038391350302e-06, "loss": 0.3486233651638031, "step": 15627, "token_acc": 0.8752246469833119 }, { "epoch": 0.8432525764851886, "grad_norm": 0.3427182734012604, "learning_rate": 1.261188698594511e-06, "loss": 0.36625272035598755, "step": 15628, "token_acc": 0.8706179066834805 }, { "epoch": 0.8433065342901851, "grad_norm": 0.3793894946575165, "learning_rate": 1.2603392727197262e-06, "loss": 0.2904973328113556, "step": 15629, "token_acc": 0.8970227670753065 }, { "epoch": 0.8433604920951816, "grad_norm": 0.46586331725120544, "learning_rate": 1.2594901137518923e-06, "loss": 0.3425713777542114, "step": 15630, "token_acc": 0.8743273737198403 }, { "epoch": 0.8434144499001781, "grad_norm": 0.39752712845802307, "learning_rate": 1.2586412217169397e-06, "loss": 0.298844575881958, "step": 15631, "token_acc": 0.8887760284408329 }, { "epoch": 0.8434684077051745, "grad_norm": 0.4415588080883026, "learning_rate": 1.2577925966407945e-06, "loss": 0.34726303815841675, "step": 15632, "token_acc": 0.8773468548718651 }, { "epoch": 0.843522365510171, "grad_norm": 0.4955398142337799, "learning_rate": 1.2569442385493724e-06, "loss": 0.3844306766986847, "step": 15633, "token_acc": 0.8634594426280554 }, { "epoch": 0.8435763233151675, "grad_norm": 0.508470356464386, "learning_rate": 1.256096147468584e-06, "loss": 0.3661589026451111, "step": 15634, "token_acc": 0.8699576349235587 }, { "epoch": 0.843630281120164, "grad_norm": 0.3936828076839447, "learning_rate": 1.2552483234243285e-06, "loss": 0.34029561281204224, "step": 15635, "token_acc": 0.8825859386051775 }, { "epoch": 0.8436842389251605, "grad_norm": 0.4794315695762634, "learning_rate": 1.2544007664424972e-06, "loss": 0.3419405519962311, "step": 15636, "token_acc": 0.8825443305677566 }, { "epoch": 0.843738196730157, "grad_norm": 0.34923264384269714, "learning_rate": 1.2535534765489766e-06, "loss": 0.3327467143535614, "step": 15637, "token_acc": 0.8787760923116815 }, { "epoch": 0.8437921545351535, "grad_norm": 0.4578474760055542, "learning_rate": 1.252706453769642e-06, "loss": 0.33701080083847046, "step": 15638, "token_acc": 0.8801192530990114 }, { "epoch": 0.8438461123401501, "grad_norm": 0.4004373550415039, "learning_rate": 1.2518596981303577e-06, "loss": 0.32245779037475586, "step": 15639, "token_acc": 0.8828375925150118 }, { "epoch": 0.8439000701451465, "grad_norm": 0.3850846588611603, "learning_rate": 1.2510132096569894e-06, "loss": 0.32160496711730957, "step": 15640, "token_acc": 0.8821871476888388 }, { "epoch": 0.843954027950143, "grad_norm": 0.3963530361652374, "learning_rate": 1.2501669883753853e-06, "loss": 0.35321155190467834, "step": 15641, "token_acc": 0.8749818075971474 }, { "epoch": 0.8440079857551395, "grad_norm": 0.3658847510814667, "learning_rate": 1.2493210343113883e-06, "loss": 0.32283663749694824, "step": 15642, "token_acc": 0.8851894374282434 }, { "epoch": 0.844061943560136, "grad_norm": 0.5000734925270081, "learning_rate": 1.2484753474908361e-06, "loss": 0.36035647988319397, "step": 15643, "token_acc": 0.8713040181956028 }, { "epoch": 0.8441159013651325, "grad_norm": 0.43771451711654663, "learning_rate": 1.2476299279395533e-06, "loss": 0.3473831117153168, "step": 15644, "token_acc": 0.8754566589173032 }, { "epoch": 0.844169859170129, "grad_norm": 0.5176702737808228, "learning_rate": 1.2467847756833606e-06, "loss": 0.2952635586261749, "step": 15645, "token_acc": 0.8864176203843662 }, { "epoch": 0.8442238169751255, "grad_norm": 0.3994635045528412, "learning_rate": 1.2459398907480646e-06, "loss": 0.3447521924972534, "step": 15646, "token_acc": 0.8793778801843318 }, { "epoch": 0.8442777747801219, "grad_norm": 0.4777470529079437, "learning_rate": 1.2450952731594735e-06, "loss": 0.30604639649391174, "step": 15647, "token_acc": 0.8883637548891786 }, { "epoch": 0.8443317325851184, "grad_norm": 0.4320744574069977, "learning_rate": 1.2442509229433797e-06, "loss": 0.3396183252334595, "step": 15648, "token_acc": 0.8750480584390619 }, { "epoch": 0.8443856903901149, "grad_norm": 0.40786248445510864, "learning_rate": 1.2434068401255683e-06, "loss": 0.35561296343803406, "step": 15649, "token_acc": 0.8792264655801508 }, { "epoch": 0.8444396481951114, "grad_norm": 0.3736838400363922, "learning_rate": 1.2425630247318188e-06, "loss": 0.2860105633735657, "step": 15650, "token_acc": 0.8945730644759771 }, { "epoch": 0.8444936060001079, "grad_norm": 0.488076776266098, "learning_rate": 1.2417194767879003e-06, "loss": 0.3165719509124756, "step": 15651, "token_acc": 0.8779417630634224 }, { "epoch": 0.8445475638051044, "grad_norm": 0.3325483202934265, "learning_rate": 1.2408761963195748e-06, "loss": 0.26526308059692383, "step": 15652, "token_acc": 0.9000571755288737 }, { "epoch": 0.8446015216101009, "grad_norm": 0.4761490225791931, "learning_rate": 1.2400331833525959e-06, "loss": 0.2851923108100891, "step": 15653, "token_acc": 0.8934834379735873 }, { "epoch": 0.8446554794150974, "grad_norm": 0.5461338758468628, "learning_rate": 1.2391904379127073e-06, "loss": 0.33756309747695923, "step": 15654, "token_acc": 0.8767367296045601 }, { "epoch": 0.8447094372200938, "grad_norm": 0.4774147570133209, "learning_rate": 1.2383479600256488e-06, "loss": 0.30678820610046387, "step": 15655, "token_acc": 0.8888488560619708 }, { "epoch": 0.8447633950250903, "grad_norm": 0.41613566875457764, "learning_rate": 1.2375057497171483e-06, "loss": 0.31277161836624146, "step": 15656, "token_acc": 0.8855670103092783 }, { "epoch": 0.8448173528300869, "grad_norm": 0.39926260709762573, "learning_rate": 1.2366638070129266e-06, "loss": 0.33710604906082153, "step": 15657, "token_acc": 0.8808290155440415 }, { "epoch": 0.8448713106350834, "grad_norm": 0.5061070322990417, "learning_rate": 1.2358221319386965e-06, "loss": 0.33769506216049194, "step": 15658, "token_acc": 0.8814965788324356 }, { "epoch": 0.8449252684400799, "grad_norm": 0.3937469720840454, "learning_rate": 1.2349807245201606e-06, "loss": 0.30931952595710754, "step": 15659, "token_acc": 0.8868624420401855 }, { "epoch": 0.8449792262450764, "grad_norm": 0.43013960123062134, "learning_rate": 1.2341395847830195e-06, "loss": 0.32921090722084045, "step": 15660, "token_acc": 0.8810469048669314 }, { "epoch": 0.8450331840500729, "grad_norm": 0.43689921498298645, "learning_rate": 1.2332987127529595e-06, "loss": 0.2820621430873871, "step": 15661, "token_acc": 0.8980044345898004 }, { "epoch": 0.8450871418550694, "grad_norm": 0.3455735445022583, "learning_rate": 1.2324581084556598e-06, "loss": 0.29979389905929565, "step": 15662, "token_acc": 0.8906951352662865 }, { "epoch": 0.8451410996600658, "grad_norm": 0.4327402412891388, "learning_rate": 1.231617771916792e-06, "loss": 0.2980267405509949, "step": 15663, "token_acc": 0.891867415323311 }, { "epoch": 0.8451950574650623, "grad_norm": 0.40978381037712097, "learning_rate": 1.2307777031620216e-06, "loss": 0.3406123220920563, "step": 15664, "token_acc": 0.8780487804878049 }, { "epoch": 0.8452490152700588, "grad_norm": 0.5000965595245361, "learning_rate": 1.229937902217e-06, "loss": 0.2731216549873352, "step": 15665, "token_acc": 0.8990071362085014 }, { "epoch": 0.8453029730750553, "grad_norm": 0.37935057282447815, "learning_rate": 1.22909836910738e-06, "loss": 0.32908743619918823, "step": 15666, "token_acc": 0.8793303882817949 }, { "epoch": 0.8453569308800518, "grad_norm": 0.4685353934764862, "learning_rate": 1.2282591038587999e-06, "loss": 0.3123832643032074, "step": 15667, "token_acc": 0.8900904443570586 }, { "epoch": 0.8454108886850483, "grad_norm": 0.45295315980911255, "learning_rate": 1.227420106496887e-06, "loss": 0.3843637704849243, "step": 15668, "token_acc": 0.8632304558984804 }, { "epoch": 0.8454648464900448, "grad_norm": 0.42488715052604675, "learning_rate": 1.2265813770472667e-06, "loss": 0.29002994298934937, "step": 15669, "token_acc": 0.8978743806936231 }, { "epoch": 0.8455188042950412, "grad_norm": 0.3625524342060089, "learning_rate": 1.2257429155355515e-06, "loss": 0.33594679832458496, "step": 15670, "token_acc": 0.8772707133362871 }, { "epoch": 0.8455727621000377, "grad_norm": 0.42598456144332886, "learning_rate": 1.2249047219873478e-06, "loss": 0.3433072566986084, "step": 15671, "token_acc": 0.874038864811967 }, { "epoch": 0.8456267199050342, "grad_norm": 0.32218649983406067, "learning_rate": 1.224066796428257e-06, "loss": 0.2818712890148163, "step": 15672, "token_acc": 0.8941078376876043 }, { "epoch": 0.8456806777100307, "grad_norm": 0.4683911204338074, "learning_rate": 1.2232291388838668e-06, "loss": 0.33879929780960083, "step": 15673, "token_acc": 0.8754786742374224 }, { "epoch": 0.8457346355150273, "grad_norm": 0.3729490041732788, "learning_rate": 1.22239174937976e-06, "loss": 0.34374505281448364, "step": 15674, "token_acc": 0.876862863164233 }, { "epoch": 0.8457885933200238, "grad_norm": 0.4495607912540436, "learning_rate": 1.2215546279415103e-06, "loss": 0.3260086476802826, "step": 15675, "token_acc": 0.8842782799233087 }, { "epoch": 0.8458425511250203, "grad_norm": 0.4337056875228882, "learning_rate": 1.2207177745946818e-06, "loss": 0.3253241777420044, "step": 15676, "token_acc": 0.8767561799751022 }, { "epoch": 0.8458965089300168, "grad_norm": 0.48677074909210205, "learning_rate": 1.2198811893648321e-06, "loss": 0.3391559422016144, "step": 15677, "token_acc": 0.8852936141071734 }, { "epoch": 0.8459504667350132, "grad_norm": 0.3978676497936249, "learning_rate": 1.2190448722775094e-06, "loss": 0.29241806268692017, "step": 15678, "token_acc": 0.89392575928009 }, { "epoch": 0.8460044245400097, "grad_norm": 0.35728317499160767, "learning_rate": 1.218208823358259e-06, "loss": 0.3853572607040405, "step": 15679, "token_acc": 0.8683326026596522 }, { "epoch": 0.8460583823450062, "grad_norm": 0.49554869532585144, "learning_rate": 1.2173730426326091e-06, "loss": 0.3283122181892395, "step": 15680, "token_acc": 0.8813667571451381 }, { "epoch": 0.8461123401500027, "grad_norm": 0.3627340793609619, "learning_rate": 1.2165375301260863e-06, "loss": 0.3214898109436035, "step": 15681, "token_acc": 0.8850574712643678 }, { "epoch": 0.8461662979549992, "grad_norm": 0.4634668827056885, "learning_rate": 1.2157022858642065e-06, "loss": 0.34827226400375366, "step": 15682, "token_acc": 0.8781922303049063 }, { "epoch": 0.8462202557599957, "grad_norm": 0.42368513345718384, "learning_rate": 1.2148673098724784e-06, "loss": 0.3162476122379303, "step": 15683, "token_acc": 0.8859120625560826 }, { "epoch": 0.8462742135649922, "grad_norm": 0.46409687399864197, "learning_rate": 1.2140326021764004e-06, "loss": 0.3134271800518036, "step": 15684, "token_acc": 0.8839985080193957 }, { "epoch": 0.8463281713699887, "grad_norm": 0.4206107258796692, "learning_rate": 1.2131981628014645e-06, "loss": 0.2387845367193222, "step": 15685, "token_acc": 0.9101609832092782 }, { "epoch": 0.8463821291749851, "grad_norm": 0.38512328267097473, "learning_rate": 1.2123639917731555e-06, "loss": 0.3120719790458679, "step": 15686, "token_acc": 0.8882875918051798 }, { "epoch": 0.8464360869799816, "grad_norm": 0.4093988239765167, "learning_rate": 1.211530089116949e-06, "loss": 0.33184197545051575, "step": 15687, "token_acc": 0.8884310618066561 }, { "epoch": 0.8464900447849781, "grad_norm": 0.45348069071769714, "learning_rate": 1.21069645485831e-06, "loss": 0.37520015239715576, "step": 15688, "token_acc": 0.870549305095963 }, { "epoch": 0.8465440025899746, "grad_norm": 0.42434144020080566, "learning_rate": 1.2098630890226993e-06, "loss": 0.2802585959434509, "step": 15689, "token_acc": 0.8973265073947668 }, { "epoch": 0.8465979603949711, "grad_norm": 0.46437978744506836, "learning_rate": 1.2090299916355674e-06, "loss": 0.33737242221832275, "step": 15690, "token_acc": 0.8801153376954641 }, { "epoch": 0.8466519181999677, "grad_norm": 0.43192997574806213, "learning_rate": 1.2081971627223544e-06, "loss": 0.35719412565231323, "step": 15691, "token_acc": 0.8716568819308546 }, { "epoch": 0.8467058760049642, "grad_norm": 0.4779960811138153, "learning_rate": 1.2073646023084996e-06, "loss": 0.33988291025161743, "step": 15692, "token_acc": 0.8825154371140721 }, { "epoch": 0.8467598338099606, "grad_norm": 0.4273067116737366, "learning_rate": 1.2065323104194248e-06, "loss": 0.35074329376220703, "step": 15693, "token_acc": 0.8763536447389688 }, { "epoch": 0.8468137916149571, "grad_norm": 0.5672670602798462, "learning_rate": 1.2057002870805512e-06, "loss": 0.2940653860569, "step": 15694, "token_acc": 0.8926428095132252 }, { "epoch": 0.8468677494199536, "grad_norm": 0.3689206540584564, "learning_rate": 1.204868532317287e-06, "loss": 0.31170570850372314, "step": 15695, "token_acc": 0.8913889358822534 }, { "epoch": 0.8469217072249501, "grad_norm": 0.5018559694290161, "learning_rate": 1.2040370461550332e-06, "loss": 0.33706703782081604, "step": 15696, "token_acc": 0.8824072364777552 }, { "epoch": 0.8469756650299466, "grad_norm": 0.5011491179466248, "learning_rate": 1.2032058286191817e-06, "loss": 0.36484652757644653, "step": 15697, "token_acc": 0.8703780639800581 }, { "epoch": 0.8470296228349431, "grad_norm": 0.46052712202072144, "learning_rate": 1.202374879735122e-06, "loss": 0.3150031268596649, "step": 15698, "token_acc": 0.8918706636994371 }, { "epoch": 0.8470835806399396, "grad_norm": 0.3353431820869446, "learning_rate": 1.2015441995282284e-06, "loss": 0.3580227494239807, "step": 15699, "token_acc": 0.8778581427904807 }, { "epoch": 0.8471375384449361, "grad_norm": 0.4587981700897217, "learning_rate": 1.2007137880238707e-06, "loss": 0.3211672902107239, "step": 15700, "token_acc": 0.8852459016393442 }, { "epoch": 0.8471914962499325, "grad_norm": 0.34862470626831055, "learning_rate": 1.1998836452474105e-06, "loss": 0.33937501907348633, "step": 15701, "token_acc": 0.8808120497707924 }, { "epoch": 0.847245454054929, "grad_norm": 0.308012455701828, "learning_rate": 1.1990537712241967e-06, "loss": 0.2877975106239319, "step": 15702, "token_acc": 0.8945141728952193 }, { "epoch": 0.8472994118599255, "grad_norm": 0.3647390604019165, "learning_rate": 1.1982241659795757e-06, "loss": 0.28699374198913574, "step": 15703, "token_acc": 0.8911008413319114 }, { "epoch": 0.847353369664922, "grad_norm": 0.4228472411632538, "learning_rate": 1.1973948295388793e-06, "loss": 0.32370662689208984, "step": 15704, "token_acc": 0.8814661134163209 }, { "epoch": 0.8474073274699185, "grad_norm": 0.2673467695713043, "learning_rate": 1.1965657619274417e-06, "loss": 0.2646147608757019, "step": 15705, "token_acc": 0.8954572803850782 }, { "epoch": 0.847461285274915, "grad_norm": 0.32335400581359863, "learning_rate": 1.1957369631705806e-06, "loss": 0.306045800447464, "step": 15706, "token_acc": 0.8894569996932201 }, { "epoch": 0.8475152430799116, "grad_norm": 0.416324645280838, "learning_rate": 1.1949084332936045e-06, "loss": 0.30697327852249146, "step": 15707, "token_acc": 0.8918808649530804 }, { "epoch": 0.847569200884908, "grad_norm": 0.43822821974754333, "learning_rate": 1.1940801723218198e-06, "loss": 0.312740683555603, "step": 15708, "token_acc": 0.8877412031782066 }, { "epoch": 0.8476231586899045, "grad_norm": 0.502977192401886, "learning_rate": 1.1932521802805185e-06, "loss": 0.37601789832115173, "step": 15709, "token_acc": 0.8689446366782007 }, { "epoch": 0.847677116494901, "grad_norm": 0.3288362920284271, "learning_rate": 1.1924244571949873e-06, "loss": 0.37762022018432617, "step": 15710, "token_acc": 0.8683335970881468 }, { "epoch": 0.8477310742998975, "grad_norm": 0.4767155349254608, "learning_rate": 1.1915970030905078e-06, "loss": 0.3690759837627411, "step": 15711, "token_acc": 0.8714830011723329 }, { "epoch": 0.847785032104894, "grad_norm": 0.4796004891395569, "learning_rate": 1.1907698179923478e-06, "loss": 0.341974139213562, "step": 15712, "token_acc": 0.8765660592255126 }, { "epoch": 0.8478389899098905, "grad_norm": 0.4795040488243103, "learning_rate": 1.1899429019257703e-06, "loss": 0.41678738594055176, "step": 15713, "token_acc": 0.8568318606735817 }, { "epoch": 0.847892947714887, "grad_norm": 0.4851629137992859, "learning_rate": 1.1891162549160285e-06, "loss": 0.34342727065086365, "step": 15714, "token_acc": 0.869812855980472 }, { "epoch": 0.8479469055198835, "grad_norm": 0.3238915205001831, "learning_rate": 1.188289876988369e-06, "loss": 0.3068313002586365, "step": 15715, "token_acc": 0.8892287234042553 }, { "epoch": 0.8480008633248799, "grad_norm": 0.6003554463386536, "learning_rate": 1.1874637681680267e-06, "loss": 0.3047187030315399, "step": 15716, "token_acc": 0.8871978513876455 }, { "epoch": 0.8480548211298764, "grad_norm": 0.5187963247299194, "learning_rate": 1.186637928480233e-06, "loss": 0.3739728629589081, "step": 15717, "token_acc": 0.8713794796269023 }, { "epoch": 0.8481087789348729, "grad_norm": 0.3723873198032379, "learning_rate": 1.185812357950209e-06, "loss": 0.37427645921707153, "step": 15718, "token_acc": 0.872785622593068 }, { "epoch": 0.8481627367398694, "grad_norm": 0.4776165187358856, "learning_rate": 1.1849870566031662e-06, "loss": 0.3183136582374573, "step": 15719, "token_acc": 0.8883510306058713 }, { "epoch": 0.8482166945448659, "grad_norm": 0.42951807379722595, "learning_rate": 1.1841620244643093e-06, "loss": 0.3337063789367676, "step": 15720, "token_acc": 0.8801392794964511 }, { "epoch": 0.8482706523498624, "grad_norm": 0.43955421447753906, "learning_rate": 1.1833372615588346e-06, "loss": 0.3150843381881714, "step": 15721, "token_acc": 0.8855555555555555 }, { "epoch": 0.8483246101548589, "grad_norm": 0.4060700237751007, "learning_rate": 1.1825127679119298e-06, "loss": 0.31392425298690796, "step": 15722, "token_acc": 0.8871655221406582 }, { "epoch": 0.8483785679598554, "grad_norm": 0.39620184898376465, "learning_rate": 1.1816885435487735e-06, "loss": 0.2984784245491028, "step": 15723, "token_acc": 0.888927255985267 }, { "epoch": 0.8484325257648518, "grad_norm": 0.3308030962944031, "learning_rate": 1.1808645884945413e-06, "loss": 0.2904761731624603, "step": 15724, "token_acc": 0.892468841156192 }, { "epoch": 0.8484864835698483, "grad_norm": 0.4336133599281311, "learning_rate": 1.1800409027743942e-06, "loss": 0.3103407621383667, "step": 15725, "token_acc": 0.8955856225228919 }, { "epoch": 0.8485404413748449, "grad_norm": 0.4588630795478821, "learning_rate": 1.1792174864134876e-06, "loss": 0.2730479836463928, "step": 15726, "token_acc": 0.8971238938053098 }, { "epoch": 0.8485943991798414, "grad_norm": 0.4227932095527649, "learning_rate": 1.178394339436968e-06, "loss": 0.330318808555603, "step": 15727, "token_acc": 0.8818865896264445 }, { "epoch": 0.8486483569848379, "grad_norm": 0.3762088716030121, "learning_rate": 1.177571461869974e-06, "loss": 0.3399274945259094, "step": 15728, "token_acc": 0.875387309713054 }, { "epoch": 0.8487023147898344, "grad_norm": 0.39422139525413513, "learning_rate": 1.1767488537376348e-06, "loss": 0.3566248416900635, "step": 15729, "token_acc": 0.8727166447464563 }, { "epoch": 0.8487562725948309, "grad_norm": 0.45839923620224, "learning_rate": 1.1759265150650767e-06, "loss": 0.32859691977500916, "step": 15730, "token_acc": 0.8825279863964858 }, { "epoch": 0.8488102303998273, "grad_norm": 0.4643842875957489, "learning_rate": 1.1751044458774108e-06, "loss": 0.3585360646247864, "step": 15731, "token_acc": 0.8691350622095638 }, { "epoch": 0.8488641882048238, "grad_norm": 0.5318546891212463, "learning_rate": 1.1742826461997436e-06, "loss": 0.32802480459213257, "step": 15732, "token_acc": 0.8826578699340245 }, { "epoch": 0.8489181460098203, "grad_norm": 0.36830759048461914, "learning_rate": 1.1734611160571718e-06, "loss": 0.35397180914878845, "step": 15733, "token_acc": 0.8714938030006523 }, { "epoch": 0.8489721038148168, "grad_norm": 0.37584221363067627, "learning_rate": 1.1726398554747853e-06, "loss": 0.3046063482761383, "step": 15734, "token_acc": 0.8895784846133954 }, { "epoch": 0.8490260616198133, "grad_norm": 0.4361528754234314, "learning_rate": 1.1718188644776651e-06, "loss": 0.36765098571777344, "step": 15735, "token_acc": 0.8689492325855962 }, { "epoch": 0.8490800194248098, "grad_norm": 0.4190532863140106, "learning_rate": 1.1709981430908845e-06, "loss": 0.333099365234375, "step": 15736, "token_acc": 0.879249559304961 }, { "epoch": 0.8491339772298063, "grad_norm": 0.5018508434295654, "learning_rate": 1.170177691339508e-06, "loss": 0.39146390557289124, "step": 15737, "token_acc": 0.8599120153973054 }, { "epoch": 0.8491879350348028, "grad_norm": 0.31524786353111267, "learning_rate": 1.169357509248591e-06, "loss": 0.3179129362106323, "step": 15738, "token_acc": 0.8842324268093328 }, { "epoch": 0.8492418928397992, "grad_norm": 0.46415120363235474, "learning_rate": 1.1685375968431833e-06, "loss": 0.3120220899581909, "step": 15739, "token_acc": 0.8875484704868591 }, { "epoch": 0.8492958506447957, "grad_norm": 0.5467674136161804, "learning_rate": 1.167717954148323e-06, "loss": 0.3640674948692322, "step": 15740, "token_acc": 0.8724913233740758 }, { "epoch": 0.8493498084497922, "grad_norm": 0.49539363384246826, "learning_rate": 1.1668985811890421e-06, "loss": 0.3356040120124817, "step": 15741, "token_acc": 0.8801364300795842 }, { "epoch": 0.8494037662547888, "grad_norm": 0.3959762156009674, "learning_rate": 1.166079477990364e-06, "loss": 0.3339654505252838, "step": 15742, "token_acc": 0.8812360874689014 }, { "epoch": 0.8494577240597853, "grad_norm": 0.4674740731716156, "learning_rate": 1.165260644577305e-06, "loss": 0.3557642102241516, "step": 15743, "token_acc": 0.8763744427934621 }, { "epoch": 0.8495116818647818, "grad_norm": 0.3926110565662384, "learning_rate": 1.1644420809748724e-06, "loss": 0.29032570123672485, "step": 15744, "token_acc": 0.8946527481891777 }, { "epoch": 0.8495656396697783, "grad_norm": 0.4499165713787079, "learning_rate": 1.1636237872080647e-06, "loss": 0.3976288437843323, "step": 15745, "token_acc": 0.8649823417549579 }, { "epoch": 0.8496195974747748, "grad_norm": 0.3843984603881836, "learning_rate": 1.1628057633018708e-06, "loss": 0.30046239495277405, "step": 15746, "token_acc": 0.8889018147975802 }, { "epoch": 0.8496735552797712, "grad_norm": 0.41955533623695374, "learning_rate": 1.161988009281274e-06, "loss": 0.3141714930534363, "step": 15747, "token_acc": 0.8850620744893872 }, { "epoch": 0.8497275130847677, "grad_norm": 0.40517371892929077, "learning_rate": 1.161170525171248e-06, "loss": 0.34684914350509644, "step": 15748, "token_acc": 0.8771929824561403 }, { "epoch": 0.8497814708897642, "grad_norm": 0.43941718339920044, "learning_rate": 1.1603533109967591e-06, "loss": 0.31190943717956543, "step": 15749, "token_acc": 0.8881932021466905 }, { "epoch": 0.8498354286947607, "grad_norm": 0.4303135871887207, "learning_rate": 1.1595363667827642e-06, "loss": 0.3443385362625122, "step": 15750, "token_acc": 0.8771354380210469 }, { "epoch": 0.8498893864997572, "grad_norm": 0.4560657739639282, "learning_rate": 1.1587196925542133e-06, "loss": 0.2769354581832886, "step": 15751, "token_acc": 0.9014869888475836 }, { "epoch": 0.8499433443047537, "grad_norm": 0.3936648964881897, "learning_rate": 1.1579032883360464e-06, "loss": 0.3335445523262024, "step": 15752, "token_acc": 0.8778382684902404 }, { "epoch": 0.8499973021097502, "grad_norm": 0.4760504961013794, "learning_rate": 1.157087154153197e-06, "loss": 0.3910282254219055, "step": 15753, "token_acc": 0.8667487684729064 }, { "epoch": 0.8500512599147466, "grad_norm": 0.4549010992050171, "learning_rate": 1.1562712900305884e-06, "loss": 0.3025854229927063, "step": 15754, "token_acc": 0.8891799314930485 }, { "epoch": 0.8501052177197431, "grad_norm": 0.4079090356826782, "learning_rate": 1.1554556959931363e-06, "loss": 0.30474358797073364, "step": 15755, "token_acc": 0.895603462909523 }, { "epoch": 0.8501591755247396, "grad_norm": 0.3598255217075348, "learning_rate": 1.1546403720657529e-06, "loss": 0.36611267924308777, "step": 15756, "token_acc": 0.874188222344524 }, { "epoch": 0.8502131333297361, "grad_norm": 0.31673720479011536, "learning_rate": 1.153825318273335e-06, "loss": 0.3177526593208313, "step": 15757, "token_acc": 0.8866659088325566 }, { "epoch": 0.8502670911347326, "grad_norm": 0.3832271099090576, "learning_rate": 1.153010534640775e-06, "loss": 0.3415542244911194, "step": 15758, "token_acc": 0.8788681349316978 }, { "epoch": 0.8503210489397292, "grad_norm": 0.4084645211696625, "learning_rate": 1.152196021192955e-06, "loss": 0.3025296628475189, "step": 15759, "token_acc": 0.8900652902770425 }, { "epoch": 0.8503750067447257, "grad_norm": 0.40855085849761963, "learning_rate": 1.151381777954751e-06, "loss": 0.3252681493759155, "step": 15760, "token_acc": 0.882916368834882 }, { "epoch": 0.8504289645497222, "grad_norm": 0.37596914172172546, "learning_rate": 1.1505678049510295e-06, "loss": 0.36790961027145386, "step": 15761, "token_acc": 0.8727678571428571 }, { "epoch": 0.8504829223547186, "grad_norm": 0.4227997660636902, "learning_rate": 1.1497541022066472e-06, "loss": 0.39092308282852173, "step": 15762, "token_acc": 0.867283589082816 }, { "epoch": 0.8505368801597151, "grad_norm": 0.3888850808143616, "learning_rate": 1.1489406697464577e-06, "loss": 0.31358763575553894, "step": 15763, "token_acc": 0.8901438752185021 }, { "epoch": 0.8505908379647116, "grad_norm": 0.3533932864665985, "learning_rate": 1.1481275075953013e-06, "loss": 0.3208739757537842, "step": 15764, "token_acc": 0.8856394587516369 }, { "epoch": 0.8506447957697081, "grad_norm": 0.45823580026626587, "learning_rate": 1.1473146157780124e-06, "loss": 0.3394540846347809, "step": 15765, "token_acc": 0.8814306712395884 }, { "epoch": 0.8506987535747046, "grad_norm": 0.575310468673706, "learning_rate": 1.1465019943194155e-06, "loss": 0.35285842418670654, "step": 15766, "token_acc": 0.8755261575466026 }, { "epoch": 0.8507527113797011, "grad_norm": 0.47320857644081116, "learning_rate": 1.1456896432443287e-06, "loss": 0.35216495394706726, "step": 15767, "token_acc": 0.8726321036889332 }, { "epoch": 0.8508066691846976, "grad_norm": 0.46079912781715393, "learning_rate": 1.144877562577561e-06, "loss": 0.2927951216697693, "step": 15768, "token_acc": 0.8907801418439716 }, { "epoch": 0.8508606269896941, "grad_norm": 0.5622753500938416, "learning_rate": 1.1440657523439126e-06, "loss": 0.3582175374031067, "step": 15769, "token_acc": 0.8759029449898129 }, { "epoch": 0.8509145847946905, "grad_norm": 0.35934871435165405, "learning_rate": 1.1432542125681766e-06, "loss": 0.32910528779029846, "step": 15770, "token_acc": 0.8850224502886466 }, { "epoch": 0.850968542599687, "grad_norm": 0.4791560173034668, "learning_rate": 1.142442943275136e-06, "loss": 0.3288135528564453, "step": 15771, "token_acc": 0.8848501288485013 }, { "epoch": 0.8510225004046835, "grad_norm": 0.4413406550884247, "learning_rate": 1.1416319444895686e-06, "loss": 0.3224642872810364, "step": 15772, "token_acc": 0.8811927317906507 }, { "epoch": 0.85107645820968, "grad_norm": 0.374620258808136, "learning_rate": 1.1408212162362399e-06, "loss": 0.31022709608078003, "step": 15773, "token_acc": 0.8875694795351188 }, { "epoch": 0.8511304160146765, "grad_norm": 0.43561050295829773, "learning_rate": 1.140010758539909e-06, "loss": 0.35947874188423157, "step": 15774, "token_acc": 0.8716437459070072 }, { "epoch": 0.851184373819673, "grad_norm": 0.43735527992248535, "learning_rate": 1.1392005714253307e-06, "loss": 0.3743307888507843, "step": 15775, "token_acc": 0.8740086978766948 }, { "epoch": 0.8512383316246696, "grad_norm": 0.4089871346950531, "learning_rate": 1.1383906549172452e-06, "loss": 0.34116169810295105, "step": 15776, "token_acc": 0.8803573740774311 }, { "epoch": 0.851292289429666, "grad_norm": 0.38284415006637573, "learning_rate": 1.1375810090403882e-06, "loss": 0.2887735068798065, "step": 15777, "token_acc": 0.8961389961389962 }, { "epoch": 0.8513462472346625, "grad_norm": 0.4662444293498993, "learning_rate": 1.1367716338194857e-06, "loss": 0.3867928981781006, "step": 15778, "token_acc": 0.8661803546723682 }, { "epoch": 0.851400205039659, "grad_norm": 0.34535250067710876, "learning_rate": 1.1359625292792553e-06, "loss": 0.30994492769241333, "step": 15779, "token_acc": 0.892914753691525 }, { "epoch": 0.8514541628446555, "grad_norm": 0.4426482915878296, "learning_rate": 1.1351536954444064e-06, "loss": 0.37858614325523376, "step": 15780, "token_acc": 0.8685249971786481 }, { "epoch": 0.851508120649652, "grad_norm": 0.5074166059494019, "learning_rate": 1.1343451323396404e-06, "loss": 0.344093918800354, "step": 15781, "token_acc": 0.8822585819698427 }, { "epoch": 0.8515620784546485, "grad_norm": 0.5712068676948547, "learning_rate": 1.133536839989654e-06, "loss": 0.386374831199646, "step": 15782, "token_acc": 0.8695652173913043 }, { "epoch": 0.851616036259645, "grad_norm": 0.4081270694732666, "learning_rate": 1.1327288184191309e-06, "loss": 0.3287442922592163, "step": 15783, "token_acc": 0.88283745167255 }, { "epoch": 0.8516699940646415, "grad_norm": 0.4233401119709015, "learning_rate": 1.1319210676527448e-06, "loss": 0.2977706789970398, "step": 15784, "token_acc": 0.8931319464234825 }, { "epoch": 0.8517239518696379, "grad_norm": 0.3474583029747009, "learning_rate": 1.131113587715167e-06, "loss": 0.3028363585472107, "step": 15785, "token_acc": 0.8916638157144486 }, { "epoch": 0.8517779096746344, "grad_norm": 0.41817161440849304, "learning_rate": 1.1303063786310563e-06, "loss": 0.27705955505371094, "step": 15786, "token_acc": 0.8943454954176033 }, { "epoch": 0.8518318674796309, "grad_norm": 0.41669604182243347, "learning_rate": 1.1294994404250625e-06, "loss": 0.2840915024280548, "step": 15787, "token_acc": 0.893840579710145 }, { "epoch": 0.8518858252846274, "grad_norm": 0.5322192907333374, "learning_rate": 1.1286927731218356e-06, "loss": 0.310322642326355, "step": 15788, "token_acc": 0.8905717531238168 }, { "epoch": 0.8519397830896239, "grad_norm": 0.4022575914859772, "learning_rate": 1.1278863767460058e-06, "loss": 0.30115020275115967, "step": 15789, "token_acc": 0.8935257967700443 }, { "epoch": 0.8519937408946204, "grad_norm": 0.3693179488182068, "learning_rate": 1.1270802513222035e-06, "loss": 0.3324471116065979, "step": 15790, "token_acc": 0.8835745752045312 }, { "epoch": 0.8520476986996169, "grad_norm": 0.2596522569656372, "learning_rate": 1.1262743968750445e-06, "loss": 0.32252660393714905, "step": 15791, "token_acc": 0.8848097002357697 }, { "epoch": 0.8521016565046134, "grad_norm": 0.47970470786094666, "learning_rate": 1.1254688134291413e-06, "loss": 0.3967150151729584, "step": 15792, "token_acc": 0.8654690618762475 }, { "epoch": 0.8521556143096098, "grad_norm": 0.4086666703224182, "learning_rate": 1.1246635010090968e-06, "loss": 0.3574584722518921, "step": 15793, "token_acc": 0.8740035429583702 }, { "epoch": 0.8522095721146064, "grad_norm": 0.46507155895233154, "learning_rate": 1.123858459639501e-06, "loss": 0.32652372121810913, "step": 15794, "token_acc": 0.8822348611575778 }, { "epoch": 0.8522635299196029, "grad_norm": 0.4242047667503357, "learning_rate": 1.123053689344944e-06, "loss": 0.33064669370651245, "step": 15795, "token_acc": 0.8798586572438163 }, { "epoch": 0.8523174877245994, "grad_norm": 0.35411450266838074, "learning_rate": 1.122249190150002e-06, "loss": 0.3395766615867615, "step": 15796, "token_acc": 0.8791377983063895 }, { "epoch": 0.8523714455295959, "grad_norm": 0.46896716952323914, "learning_rate": 1.1214449620792457e-06, "loss": 0.317801296710968, "step": 15797, "token_acc": 0.8858867490658235 }, { "epoch": 0.8524254033345924, "grad_norm": 0.4242614209651947, "learning_rate": 1.1206410051572325e-06, "loss": 0.3763929009437561, "step": 15798, "token_acc": 0.8699174326219037 }, { "epoch": 0.8524793611395889, "grad_norm": 0.3576166331768036, "learning_rate": 1.1198373194085177e-06, "loss": 0.30869418382644653, "step": 15799, "token_acc": 0.8877870563674322 }, { "epoch": 0.8525333189445853, "grad_norm": 0.4747467339038849, "learning_rate": 1.1190339048576447e-06, "loss": 0.3541415333747864, "step": 15800, "token_acc": 0.8778470912519948 }, { "epoch": 0.8525872767495818, "grad_norm": 0.6022900938987732, "learning_rate": 1.118230761529151e-06, "loss": 0.37214338779449463, "step": 15801, "token_acc": 0.8690305063088963 }, { "epoch": 0.8526412345545783, "grad_norm": 0.40855729579925537, "learning_rate": 1.1174278894475622e-06, "loss": 0.3631322383880615, "step": 15802, "token_acc": 0.8702673361378749 }, { "epoch": 0.8526951923595748, "grad_norm": 0.4517936110496521, "learning_rate": 1.1166252886373985e-06, "loss": 0.3204715847969055, "step": 15803, "token_acc": 0.8874965574221978 }, { "epoch": 0.8527491501645713, "grad_norm": 0.39228546619415283, "learning_rate": 1.1158229591231706e-06, "loss": 0.28319698572158813, "step": 15804, "token_acc": 0.9036559784237339 }, { "epoch": 0.8528031079695678, "grad_norm": 0.398110568523407, "learning_rate": 1.1150209009293833e-06, "loss": 0.29491040110588074, "step": 15805, "token_acc": 0.894615849969752 }, { "epoch": 0.8528570657745643, "grad_norm": 0.4449279308319092, "learning_rate": 1.1142191140805303e-06, "loss": 0.3283509314060211, "step": 15806, "token_acc": 0.8820431806213797 }, { "epoch": 0.8529110235795608, "grad_norm": 0.4814535081386566, "learning_rate": 1.1134175986010953e-06, "loss": 0.3092171549797058, "step": 15807, "token_acc": 0.8919779286926995 }, { "epoch": 0.8529649813845572, "grad_norm": 0.5145282745361328, "learning_rate": 1.112616354515561e-06, "loss": 0.33628782629966736, "step": 15808, "token_acc": 0.874095513748191 }, { "epoch": 0.8530189391895537, "grad_norm": 0.3862193524837494, "learning_rate": 1.1118153818483946e-06, "loss": 0.28837254643440247, "step": 15809, "token_acc": 0.8973087818696884 }, { "epoch": 0.8530728969945502, "grad_norm": 0.43088915944099426, "learning_rate": 1.1110146806240584e-06, "loss": 0.31010735034942627, "step": 15810, "token_acc": 0.8868702290076336 }, { "epoch": 0.8531268547995468, "grad_norm": 0.5018097758293152, "learning_rate": 1.1102142508670056e-06, "loss": 0.31257352232933044, "step": 15811, "token_acc": 0.8881658595641646 }, { "epoch": 0.8531808126045433, "grad_norm": 0.44906648993492126, "learning_rate": 1.1094140926016805e-06, "loss": 0.2926095128059387, "step": 15812, "token_acc": 0.8912391738840773 }, { "epoch": 0.8532347704095398, "grad_norm": 0.3822282552719116, "learning_rate": 1.1086142058525184e-06, "loss": 0.29613903164863586, "step": 15813, "token_acc": 0.8902575587905935 }, { "epoch": 0.8532887282145363, "grad_norm": 0.38403427600860596, "learning_rate": 1.1078145906439509e-06, "loss": 0.36964529752731323, "step": 15814, "token_acc": 0.8691121895518127 }, { "epoch": 0.8533426860195327, "grad_norm": 0.3782002627849579, "learning_rate": 1.1070152470003959e-06, "loss": 0.3361290693283081, "step": 15815, "token_acc": 0.8797906943855183 }, { "epoch": 0.8533966438245292, "grad_norm": 0.4399968087673187, "learning_rate": 1.1062161749462664e-06, "loss": 0.3028686046600342, "step": 15816, "token_acc": 0.8938132064247472 }, { "epoch": 0.8534506016295257, "grad_norm": 0.37850964069366455, "learning_rate": 1.105417374505965e-06, "loss": 0.33036115765571594, "step": 15817, "token_acc": 0.884875611533257 }, { "epoch": 0.8535045594345222, "grad_norm": 0.45090970396995544, "learning_rate": 1.1046188457038887e-06, "loss": 0.33042800426483154, "step": 15818, "token_acc": 0.8807553327893693 }, { "epoch": 0.8535585172395187, "grad_norm": 0.3963156044483185, "learning_rate": 1.1038205885644205e-06, "loss": 0.3044865131378174, "step": 15819, "token_acc": 0.8898699439207732 }, { "epoch": 0.8536124750445152, "grad_norm": 0.4441504180431366, "learning_rate": 1.1030226031119384e-06, "loss": 0.2621876001358032, "step": 15820, "token_acc": 0.902502801643631 }, { "epoch": 0.8536664328495117, "grad_norm": 0.48580291867256165, "learning_rate": 1.1022248893708177e-06, "loss": 0.3103564977645874, "step": 15821, "token_acc": 0.8864164323266903 }, { "epoch": 0.8537203906545082, "grad_norm": 0.4900679290294647, "learning_rate": 1.1014274473654186e-06, "loss": 0.3210740089416504, "step": 15822, "token_acc": 0.8862094951017332 }, { "epoch": 0.8537743484595046, "grad_norm": 0.42808809876441956, "learning_rate": 1.100630277120094e-06, "loss": 0.24741072952747345, "step": 15823, "token_acc": 0.9063870352716873 }, { "epoch": 0.8538283062645011, "grad_norm": 0.4136331081390381, "learning_rate": 1.0998333786591886e-06, "loss": 0.298598051071167, "step": 15824, "token_acc": 0.8890863320005923 }, { "epoch": 0.8538822640694976, "grad_norm": 0.43037572503089905, "learning_rate": 1.099036752007041e-06, "loss": 0.3763602077960968, "step": 15825, "token_acc": 0.8680258988841438 }, { "epoch": 0.8539362218744941, "grad_norm": 0.4494134187698364, "learning_rate": 1.0982403971879774e-06, "loss": 0.31377801299095154, "step": 15826, "token_acc": 0.8883652430044182 }, { "epoch": 0.8539901796794906, "grad_norm": 0.4260534346103668, "learning_rate": 1.0974443142263214e-06, "loss": 0.3443455100059509, "step": 15827, "token_acc": 0.8814229249011858 }, { "epoch": 0.8540441374844872, "grad_norm": 0.4141964912414551, "learning_rate": 1.0966485031463847e-06, "loss": 0.31756964325904846, "step": 15828, "token_acc": 0.8878687019396304 }, { "epoch": 0.8540980952894837, "grad_norm": 0.35488203167915344, "learning_rate": 1.0958529639724702e-06, "loss": 0.33696383237838745, "step": 15829, "token_acc": 0.8792873051224944 }, { "epoch": 0.8541520530944802, "grad_norm": 0.5261135697364807, "learning_rate": 1.0950576967288729e-06, "loss": 0.34574592113494873, "step": 15830, "token_acc": 0.8812592047128129 }, { "epoch": 0.8542060108994766, "grad_norm": 0.44203776121139526, "learning_rate": 1.0942627014398822e-06, "loss": 0.351368248462677, "step": 15831, "token_acc": 0.8772405482753427 }, { "epoch": 0.8542599687044731, "grad_norm": 0.4661758244037628, "learning_rate": 1.0934679781297742e-06, "loss": 0.2889643907546997, "step": 15832, "token_acc": 0.8981125160344512 }, { "epoch": 0.8543139265094696, "grad_norm": 0.4043564796447754, "learning_rate": 1.0926735268228216e-06, "loss": 0.3480323851108551, "step": 15833, "token_acc": 0.876592977893368 }, { "epoch": 0.8543678843144661, "grad_norm": 0.4751329720020294, "learning_rate": 1.0918793475432855e-06, "loss": 0.37746864557266235, "step": 15834, "token_acc": 0.8694773706896551 }, { "epoch": 0.8544218421194626, "grad_norm": 0.5564302802085876, "learning_rate": 1.0910854403154215e-06, "loss": 0.4168533682823181, "step": 15835, "token_acc": 0.8538721573448064 }, { "epoch": 0.8544757999244591, "grad_norm": 0.39302897453308105, "learning_rate": 1.0902918051634737e-06, "loss": 0.29591652750968933, "step": 15836, "token_acc": 0.8928413935028664 }, { "epoch": 0.8545297577294556, "grad_norm": 0.3513730466365814, "learning_rate": 1.0894984421116806e-06, "loss": 0.3209007978439331, "step": 15837, "token_acc": 0.8854909955827387 }, { "epoch": 0.854583715534452, "grad_norm": 0.4980480670928955, "learning_rate": 1.0887053511842694e-06, "loss": 0.38853299617767334, "step": 15838, "token_acc": 0.8678370786516854 }, { "epoch": 0.8546376733394485, "grad_norm": 0.3920546770095825, "learning_rate": 1.0879125324054619e-06, "loss": 0.3816182017326355, "step": 15839, "token_acc": 0.8692149739377666 }, { "epoch": 0.854691631144445, "grad_norm": 0.42668694257736206, "learning_rate": 1.087119985799473e-06, "loss": 0.3099069595336914, "step": 15840, "token_acc": 0.886993006993007 }, { "epoch": 0.8547455889494415, "grad_norm": 0.41622576117515564, "learning_rate": 1.0863277113905035e-06, "loss": 0.2720930576324463, "step": 15841, "token_acc": 0.9015962441314554 }, { "epoch": 0.854799546754438, "grad_norm": 0.4498242735862732, "learning_rate": 1.0855357092027518e-06, "loss": 0.3608081638813019, "step": 15842, "token_acc": 0.8754541491062345 }, { "epoch": 0.8548535045594345, "grad_norm": 0.4489472806453705, "learning_rate": 1.0847439792604041e-06, "loss": 0.3791406750679016, "step": 15843, "token_acc": 0.8686199125016572 }, { "epoch": 0.854907462364431, "grad_norm": 0.4542714059352875, "learning_rate": 1.08395252158764e-06, "loss": 0.3080781102180481, "step": 15844, "token_acc": 0.8841652323580035 }, { "epoch": 0.8549614201694276, "grad_norm": 0.304189532995224, "learning_rate": 1.083161336208628e-06, "loss": 0.2672394812107086, "step": 15845, "token_acc": 0.9026836158192091 }, { "epoch": 0.855015377974424, "grad_norm": 0.36089029908180237, "learning_rate": 1.082370423147534e-06, "loss": 0.32644858956336975, "step": 15846, "token_acc": 0.8816951147733961 }, { "epoch": 0.8550693357794205, "grad_norm": 0.4675131142139435, "learning_rate": 1.0815797824285124e-06, "loss": 0.3682655990123749, "step": 15847, "token_acc": 0.866375236294896 }, { "epoch": 0.855123293584417, "grad_norm": 0.4329116940498352, "learning_rate": 1.080789414075707e-06, "loss": 0.34457385540008545, "step": 15848, "token_acc": 0.8761384335154827 }, { "epoch": 0.8551772513894135, "grad_norm": 0.3747456669807434, "learning_rate": 1.0799993181132573e-06, "loss": 0.333315908908844, "step": 15849, "token_acc": 0.8815068493150685 }, { "epoch": 0.85523120919441, "grad_norm": 0.3576413094997406, "learning_rate": 1.0792094945652908e-06, "loss": 0.31218764185905457, "step": 15850, "token_acc": 0.8844635809113742 }, { "epoch": 0.8552851669994065, "grad_norm": 0.4823828637599945, "learning_rate": 1.0784199434559307e-06, "loss": 0.35391271114349365, "step": 15851, "token_acc": 0.8746778350515464 }, { "epoch": 0.855339124804403, "grad_norm": 0.4223642647266388, "learning_rate": 1.0776306648092872e-06, "loss": 0.33789658546447754, "step": 15852, "token_acc": 0.8792908944399678 }, { "epoch": 0.8553930826093995, "grad_norm": 0.4700292944908142, "learning_rate": 1.0768416586494667e-06, "loss": 0.32430726289749146, "step": 15853, "token_acc": 0.8878731343283582 }, { "epoch": 0.8554470404143959, "grad_norm": 0.5063563585281372, "learning_rate": 1.0760529250005637e-06, "loss": 0.26193374395370483, "step": 15854, "token_acc": 0.9039637152435417 }, { "epoch": 0.8555009982193924, "grad_norm": 0.5077484846115112, "learning_rate": 1.0752644638866672e-06, "loss": 0.31311464309692383, "step": 15855, "token_acc": 0.8863013698630137 }, { "epoch": 0.8555549560243889, "grad_norm": 0.4180629551410675, "learning_rate": 1.0744762753318549e-06, "loss": 0.2980826497077942, "step": 15856, "token_acc": 0.8897982220967969 }, { "epoch": 0.8556089138293854, "grad_norm": 0.38437148928642273, "learning_rate": 1.0736883593602e-06, "loss": 0.3073385953903198, "step": 15857, "token_acc": 0.8902898892293188 }, { "epoch": 0.8556628716343819, "grad_norm": 0.4100891053676605, "learning_rate": 1.0729007159957616e-06, "loss": 0.3258984088897705, "step": 15858, "token_acc": 0.8868813825608798 }, { "epoch": 0.8557168294393784, "grad_norm": 0.4207606315612793, "learning_rate": 1.0721133452625998e-06, "loss": 0.3639580309391022, "step": 15859, "token_acc": 0.8745699920613919 }, { "epoch": 0.8557707872443749, "grad_norm": 0.41478195786476135, "learning_rate": 1.0713262471847564e-06, "loss": 0.28906577825546265, "step": 15860, "token_acc": 0.8946103238866396 }, { "epoch": 0.8558247450493713, "grad_norm": 0.4293939769268036, "learning_rate": 1.0705394217862707e-06, "loss": 0.36990880966186523, "step": 15861, "token_acc": 0.8706358576488447 }, { "epoch": 0.8558787028543678, "grad_norm": 0.45792916417121887, "learning_rate": 1.0697528690911729e-06, "loss": 0.3370441198348999, "step": 15862, "token_acc": 0.8785714285714286 }, { "epoch": 0.8559326606593644, "grad_norm": 0.3362613618373871, "learning_rate": 1.0689665891234825e-06, "loss": 0.27770301699638367, "step": 15863, "token_acc": 0.8942198177676538 }, { "epoch": 0.8559866184643609, "grad_norm": 0.4954085350036621, "learning_rate": 1.0681805819072123e-06, "loss": 0.3199004530906677, "step": 15864, "token_acc": 0.885077435229411 }, { "epoch": 0.8560405762693574, "grad_norm": 0.5448052287101746, "learning_rate": 1.0673948474663653e-06, "loss": 0.3522051274776459, "step": 15865, "token_acc": 0.8771818470326881 }, { "epoch": 0.8560945340743539, "grad_norm": 0.4419694244861603, "learning_rate": 1.0666093858249437e-06, "loss": 0.3129556179046631, "step": 15866, "token_acc": 0.8913502109704642 }, { "epoch": 0.8561484918793504, "grad_norm": 0.4797450304031372, "learning_rate": 1.0658241970069305e-06, "loss": 0.336551696062088, "step": 15867, "token_acc": 0.8768043447191654 }, { "epoch": 0.8562024496843469, "grad_norm": 0.429425984621048, "learning_rate": 1.065039281036304e-06, "loss": 0.37403279542922974, "step": 15868, "token_acc": 0.8722963346716747 }, { "epoch": 0.8562564074893433, "grad_norm": 0.38199883699417114, "learning_rate": 1.0642546379370389e-06, "loss": 0.3627800941467285, "step": 15869, "token_acc": 0.871948608137045 }, { "epoch": 0.8563103652943398, "grad_norm": 0.46505558490753174, "learning_rate": 1.0634702677330955e-06, "loss": 0.33077454566955566, "step": 15870, "token_acc": 0.8825824760553388 }, { "epoch": 0.8563643230993363, "grad_norm": 0.3913361132144928, "learning_rate": 1.062686170448427e-06, "loss": 0.3023405075073242, "step": 15871, "token_acc": 0.8940108058397517 }, { "epoch": 0.8564182809043328, "grad_norm": 0.511883020401001, "learning_rate": 1.0619023461069832e-06, "loss": 0.36074578762054443, "step": 15872, "token_acc": 0.8702841547599935 }, { "epoch": 0.8564722387093293, "grad_norm": 0.43406569957733154, "learning_rate": 1.0611187947327018e-06, "loss": 0.33135491609573364, "step": 15873, "token_acc": 0.8776500179662236 }, { "epoch": 0.8565261965143258, "grad_norm": 0.3528671860694885, "learning_rate": 1.0603355163495088e-06, "loss": 0.3224371671676636, "step": 15874, "token_acc": 0.8863160487744797 }, { "epoch": 0.8565801543193223, "grad_norm": 0.5524618029594421, "learning_rate": 1.0595525109813286e-06, "loss": 0.30675727128982544, "step": 15875, "token_acc": 0.8911598537720172 }, { "epoch": 0.8566341121243188, "grad_norm": 0.4991365969181061, "learning_rate": 1.0587697786520722e-06, "loss": 0.33908289670944214, "step": 15876, "token_acc": 0.8780526735833999 }, { "epoch": 0.8566880699293152, "grad_norm": 0.537617564201355, "learning_rate": 1.057987319385645e-06, "loss": 0.3110894560813904, "step": 15877, "token_acc": 0.8855698529411765 }, { "epoch": 0.8567420277343117, "grad_norm": 0.5310665965080261, "learning_rate": 1.0572051332059397e-06, "loss": 0.35531729459762573, "step": 15878, "token_acc": 0.8723835246455098 }, { "epoch": 0.8567959855393082, "grad_norm": 0.494630366563797, "learning_rate": 1.0564232201368495e-06, "loss": 0.34132853150367737, "step": 15879, "token_acc": 0.8798201389835127 }, { "epoch": 0.8568499433443048, "grad_norm": 0.3456089198589325, "learning_rate": 1.0556415802022513e-06, "loss": 0.30819472670555115, "step": 15880, "token_acc": 0.8875160875160876 }, { "epoch": 0.8569039011493013, "grad_norm": 0.3434503972530365, "learning_rate": 1.0548602134260155e-06, "loss": 0.3210735619068146, "step": 15881, "token_acc": 0.8845254216422505 }, { "epoch": 0.8569578589542978, "grad_norm": 0.4848181903362274, "learning_rate": 1.0540791198320055e-06, "loss": 0.38601401448249817, "step": 15882, "token_acc": 0.8632188498402555 }, { "epoch": 0.8570118167592943, "grad_norm": 0.6301392316818237, "learning_rate": 1.0532982994440765e-06, "loss": 0.32681724429130554, "step": 15883, "token_acc": 0.879409509202454 }, { "epoch": 0.8570657745642907, "grad_norm": 0.42475375533103943, "learning_rate": 1.052517752286073e-06, "loss": 0.3535611629486084, "step": 15884, "token_acc": 0.8759866060751017 }, { "epoch": 0.8571197323692872, "grad_norm": 0.4708222448825836, "learning_rate": 1.0517374783818336e-06, "loss": 0.3333591818809509, "step": 15885, "token_acc": 0.8800575723652647 }, { "epoch": 0.8571736901742837, "grad_norm": 0.4248671531677246, "learning_rate": 1.0509574777551879e-06, "loss": 0.31961265206336975, "step": 15886, "token_acc": 0.8839539155117716 }, { "epoch": 0.8572276479792802, "grad_norm": 0.5159901976585388, "learning_rate": 1.050177750429956e-06, "loss": 0.3857061266899109, "step": 15887, "token_acc": 0.8649497487437185 }, { "epoch": 0.8572816057842767, "grad_norm": 0.380585253238678, "learning_rate": 1.049398296429951e-06, "loss": 0.30243203043937683, "step": 15888, "token_acc": 0.8912992495309568 }, { "epoch": 0.8573355635892732, "grad_norm": 0.4763023853302002, "learning_rate": 1.0486191157789783e-06, "loss": 0.30329132080078125, "step": 15889, "token_acc": 0.890961999315303 }, { "epoch": 0.8573895213942697, "grad_norm": 0.4881799817085266, "learning_rate": 1.04784020850083e-06, "loss": 0.2733708322048187, "step": 15890, "token_acc": 0.9016721452050879 }, { "epoch": 0.8574434791992662, "grad_norm": 0.3948671221733093, "learning_rate": 1.047061574619298e-06, "loss": 0.3846637010574341, "step": 15891, "token_acc": 0.8706221198156682 }, { "epoch": 0.8574974370042626, "grad_norm": 0.34917184710502625, "learning_rate": 1.0462832141581613e-06, "loss": 0.30144965648651123, "step": 15892, "token_acc": 0.8908981314044605 }, { "epoch": 0.8575513948092591, "grad_norm": 0.4520261585712433, "learning_rate": 1.0455051271411886e-06, "loss": 0.34294602274894714, "step": 15893, "token_acc": 0.8768898488120951 }, { "epoch": 0.8576053526142556, "grad_norm": 0.32106176018714905, "learning_rate": 1.0447273135921442e-06, "loss": 0.319771945476532, "step": 15894, "token_acc": 0.8854045231988809 }, { "epoch": 0.8576593104192521, "grad_norm": 0.39591020345687866, "learning_rate": 1.0439497735347826e-06, "loss": 0.30044761300086975, "step": 15895, "token_acc": 0.8911419423692636 }, { "epoch": 0.8577132682242486, "grad_norm": 0.5070382356643677, "learning_rate": 1.0431725069928478e-06, "loss": 0.30061137676239014, "step": 15896, "token_acc": 0.8931827459518302 }, { "epoch": 0.8577672260292452, "grad_norm": 0.31647172570228577, "learning_rate": 1.0423955139900766e-06, "loss": 0.2777930200099945, "step": 15897, "token_acc": 0.8960596936771829 }, { "epoch": 0.8578211838342417, "grad_norm": 0.31575295329093933, "learning_rate": 1.041618794550201e-06, "loss": 0.28751808404922485, "step": 15898, "token_acc": 0.8931371366542034 }, { "epoch": 0.8578751416392382, "grad_norm": 0.45000943541526794, "learning_rate": 1.040842348696941e-06, "loss": 0.32070639729499817, "step": 15899, "token_acc": 0.8848256590936703 }, { "epoch": 0.8579290994442346, "grad_norm": 0.4221407175064087, "learning_rate": 1.040066176454011e-06, "loss": 0.34432071447372437, "step": 15900, "token_acc": 0.8796925048046125 }, { "epoch": 0.8579830572492311, "grad_norm": 0.3876434862613678, "learning_rate": 1.0392902778451107e-06, "loss": 0.33076733350753784, "step": 15901, "token_acc": 0.8828259620907525 }, { "epoch": 0.8580370150542276, "grad_norm": 0.45962440967559814, "learning_rate": 1.0385146528939383e-06, "loss": 0.3518775999546051, "step": 15902, "token_acc": 0.877914592611999 }, { "epoch": 0.8580909728592241, "grad_norm": 0.5096837878227234, "learning_rate": 1.0377393016241777e-06, "loss": 0.3124806880950928, "step": 15903, "token_acc": 0.8862522063149637 }, { "epoch": 0.8581449306642206, "grad_norm": 0.49769556522369385, "learning_rate": 1.0369642240595134e-06, "loss": 0.2891978621482849, "step": 15904, "token_acc": 0.8936103733791595 }, { "epoch": 0.8581988884692171, "grad_norm": 0.421070396900177, "learning_rate": 1.0361894202236145e-06, "loss": 0.31890615820884705, "step": 15905, "token_acc": 0.8834779528953427 }, { "epoch": 0.8582528462742136, "grad_norm": 0.37169599533081055, "learning_rate": 1.0354148901401417e-06, "loss": 0.3359023630619049, "step": 15906, "token_acc": 0.87856463878327 }, { "epoch": 0.85830680407921, "grad_norm": 0.48736536502838135, "learning_rate": 1.0346406338327508e-06, "loss": 0.37162336707115173, "step": 15907, "token_acc": 0.8724879539311318 }, { "epoch": 0.8583607618842065, "grad_norm": 0.41577455401420593, "learning_rate": 1.0338666513250851e-06, "loss": 0.3377411365509033, "step": 15908, "token_acc": 0.8790464240903387 }, { "epoch": 0.858414719689203, "grad_norm": 0.3311651349067688, "learning_rate": 1.033092942640783e-06, "loss": 0.28867679834365845, "step": 15909, "token_acc": 0.8959571076655557 }, { "epoch": 0.8584686774941995, "grad_norm": 0.31948092579841614, "learning_rate": 1.0323195078034732e-06, "loss": 0.29071372747421265, "step": 15910, "token_acc": 0.8938448864305049 }, { "epoch": 0.858522635299196, "grad_norm": 0.5033389925956726, "learning_rate": 1.031546346836777e-06, "loss": 0.28504568338394165, "step": 15911, "token_acc": 0.895446017397237 }, { "epoch": 0.8585765931041925, "grad_norm": 0.47429391741752625, "learning_rate": 1.030773459764306e-06, "loss": 0.32465916872024536, "step": 15912, "token_acc": 0.8814107749555088 }, { "epoch": 0.858630550909189, "grad_norm": 0.32401373982429504, "learning_rate": 1.0300008466096634e-06, "loss": 0.31954216957092285, "step": 15913, "token_acc": 0.8883551862910555 }, { "epoch": 0.8586845087141856, "grad_norm": 0.4152218997478485, "learning_rate": 1.029228507396446e-06, "loss": 0.33475857973098755, "step": 15914, "token_acc": 0.8809927360774819 }, { "epoch": 0.858738466519182, "grad_norm": 0.39580845832824707, "learning_rate": 1.0284564421482402e-06, "loss": 0.3123388886451721, "step": 15915, "token_acc": 0.8870611003594139 }, { "epoch": 0.8587924243241785, "grad_norm": 0.34591880440711975, "learning_rate": 1.027684650888624e-06, "loss": 0.3185754418373108, "step": 15916, "token_acc": 0.8859894377135756 }, { "epoch": 0.858846382129175, "grad_norm": 0.507998526096344, "learning_rate": 1.0269131336411675e-06, "loss": 0.3875172734260559, "step": 15917, "token_acc": 0.8681088625261689 }, { "epoch": 0.8589003399341715, "grad_norm": 0.3804112672805786, "learning_rate": 1.026141890429434e-06, "loss": 0.2919136881828308, "step": 15918, "token_acc": 0.8929073482428115 }, { "epoch": 0.858954297739168, "grad_norm": 0.3935631215572357, "learning_rate": 1.0253709212769768e-06, "loss": 0.32324713468551636, "step": 15919, "token_acc": 0.8833150384193195 }, { "epoch": 0.8590082555441645, "grad_norm": 0.5508943796157837, "learning_rate": 1.0246002262073396e-06, "loss": 0.3084852695465088, "step": 15920, "token_acc": 0.8854787793756577 }, { "epoch": 0.859062213349161, "grad_norm": 0.3752005994319916, "learning_rate": 1.023829805244061e-06, "loss": 0.28371378779411316, "step": 15921, "token_acc": 0.8961362692147902 }, { "epoch": 0.8591161711541575, "grad_norm": 0.6101595759391785, "learning_rate": 1.0230596584106689e-06, "loss": 0.3484020233154297, "step": 15922, "token_acc": 0.8764395861799726 }, { "epoch": 0.8591701289591539, "grad_norm": 0.49385708570480347, "learning_rate": 1.02228978573068e-06, "loss": 0.34925252199172974, "step": 15923, "token_acc": 0.8749351323300467 }, { "epoch": 0.8592240867641504, "grad_norm": 0.3257160484790802, "learning_rate": 1.0215201872276125e-06, "loss": 0.27785372734069824, "step": 15924, "token_acc": 0.8970116711555727 }, { "epoch": 0.8592780445691469, "grad_norm": 0.3328840732574463, "learning_rate": 1.0207508629249651e-06, "loss": 0.25759804248809814, "step": 15925, "token_acc": 0.9060087277609936 }, { "epoch": 0.8593320023741434, "grad_norm": 0.3570511043071747, "learning_rate": 1.0199818128462357e-06, "loss": 0.27653026580810547, "step": 15926, "token_acc": 0.9004648945047086 }, { "epoch": 0.8593859601791399, "grad_norm": 0.4873100817203522, "learning_rate": 1.0192130370149079e-06, "loss": 0.35289809107780457, "step": 15927, "token_acc": 0.8759934023091918 }, { "epoch": 0.8594399179841364, "grad_norm": 0.3413749635219574, "learning_rate": 1.0184445354544626e-06, "loss": 0.34061816334724426, "step": 15928, "token_acc": 0.8774054054054055 }, { "epoch": 0.8594938757891329, "grad_norm": 0.4491676986217499, "learning_rate": 1.0176763081883656e-06, "loss": 0.3480781316757202, "step": 15929, "token_acc": 0.8763040238450075 }, { "epoch": 0.8595478335941293, "grad_norm": 0.39886268973350525, "learning_rate": 1.0169083552400827e-06, "loss": 0.3400978446006775, "step": 15930, "token_acc": 0.881867678600891 }, { "epoch": 0.8596017913991258, "grad_norm": 0.5048508644104004, "learning_rate": 1.0161406766330661e-06, "loss": 0.30377551913261414, "step": 15931, "token_acc": 0.8905742145178764 }, { "epoch": 0.8596557492041224, "grad_norm": 0.4638582468032837, "learning_rate": 1.0153732723907595e-06, "loss": 0.3265573978424072, "step": 15932, "token_acc": 0.8829079659706109 }, { "epoch": 0.8597097070091189, "grad_norm": 0.3785114288330078, "learning_rate": 1.0146061425365994e-06, "loss": 0.3377155065536499, "step": 15933, "token_acc": 0.8837874201143626 }, { "epoch": 0.8597636648141154, "grad_norm": 0.4802705645561218, "learning_rate": 1.013839287094014e-06, "loss": 0.3421269953250885, "step": 15934, "token_acc": 0.8776904240237144 }, { "epoch": 0.8598176226191119, "grad_norm": 0.4792534410953522, "learning_rate": 1.0130727060864221e-06, "loss": 0.3306182324886322, "step": 15935, "token_acc": 0.8830599008012209 }, { "epoch": 0.8598715804241084, "grad_norm": 0.49532151222229004, "learning_rate": 1.0123063995372362e-06, "loss": 0.3222326338291168, "step": 15936, "token_acc": 0.883687740830598 }, { "epoch": 0.8599255382291049, "grad_norm": 0.5777586102485657, "learning_rate": 1.0115403674698587e-06, "loss": 0.3452701270580292, "step": 15937, "token_acc": 0.8802563075690829 }, { "epoch": 0.8599794960341013, "grad_norm": 0.5131258368492126, "learning_rate": 1.010774609907683e-06, "loss": 0.3570740222930908, "step": 15938, "token_acc": 0.8753812032159689 }, { "epoch": 0.8600334538390978, "grad_norm": 0.3438851237297058, "learning_rate": 1.0100091268740964e-06, "loss": 0.2684720754623413, "step": 15939, "token_acc": 0.902018420536939 }, { "epoch": 0.8600874116440943, "grad_norm": 0.39836758375167847, "learning_rate": 1.0092439183924751e-06, "loss": 0.26887714862823486, "step": 15940, "token_acc": 0.8989299987108418 }, { "epoch": 0.8601413694490908, "grad_norm": 0.27510982751846313, "learning_rate": 1.008478984486191e-06, "loss": 0.3320081830024719, "step": 15941, "token_acc": 0.8786004056795131 }, { "epoch": 0.8601953272540873, "grad_norm": 0.40179598331451416, "learning_rate": 1.0077143251785993e-06, "loss": 0.3441333472728729, "step": 15942, "token_acc": 0.8845299777942265 }, { "epoch": 0.8602492850590838, "grad_norm": 0.4509982168674469, "learning_rate": 1.0069499404930593e-06, "loss": 0.3067970275878906, "step": 15943, "token_acc": 0.8885317562355362 }, { "epoch": 0.8603032428640803, "grad_norm": 0.36918941140174866, "learning_rate": 1.0061858304529126e-06, "loss": 0.3034188449382782, "step": 15944, "token_acc": 0.891761294998792 }, { "epoch": 0.8603572006690767, "grad_norm": 0.42519810795783997, "learning_rate": 1.0054219950814947e-06, "loss": 0.3313659727573395, "step": 15945, "token_acc": 0.8854260764829871 }, { "epoch": 0.8604111584740732, "grad_norm": 0.38141289353370667, "learning_rate": 1.0046584344021315e-06, "loss": 0.31726670265197754, "step": 15946, "token_acc": 0.8865220759101472 }, { "epoch": 0.8604651162790697, "grad_norm": 0.4306334853172302, "learning_rate": 1.0038951484381443e-06, "loss": 0.34630170464515686, "step": 15947, "token_acc": 0.8792914034605878 }, { "epoch": 0.8605190740840662, "grad_norm": 0.5301271677017212, "learning_rate": 1.0031321372128422e-06, "loss": 0.33084338903427124, "step": 15948, "token_acc": 0.8805510534846029 }, { "epoch": 0.8605730318890628, "grad_norm": 0.42438653111457825, "learning_rate": 1.0023694007495288e-06, "loss": 0.29101240634918213, "step": 15949, "token_acc": 0.8966233766233767 }, { "epoch": 0.8606269896940593, "grad_norm": 0.4053691029548645, "learning_rate": 1.0016069390714955e-06, "loss": 0.3193684220314026, "step": 15950, "token_acc": 0.8854023110357256 }, { "epoch": 0.8606809474990558, "grad_norm": 0.3388477861881256, "learning_rate": 1.0008447522020304e-06, "loss": 0.3353806734085083, "step": 15951, "token_acc": 0.8781163434903048 }, { "epoch": 0.8607349053040523, "grad_norm": 0.3799341320991516, "learning_rate": 1.000082840164408e-06, "loss": 0.325467586517334, "step": 15952, "token_acc": 0.8834892846362813 }, { "epoch": 0.8607888631090487, "grad_norm": 0.4517793357372284, "learning_rate": 9.993212029818977e-07, "loss": 0.34372031688690186, "step": 15953, "token_acc": 0.8785061211839454 }, { "epoch": 0.8608428209140452, "grad_norm": 0.33805298805236816, "learning_rate": 9.98559840677761e-07, "loss": 0.3054817020893097, "step": 15954, "token_acc": 0.8902029297137481 }, { "epoch": 0.8608967787190417, "grad_norm": 0.38827911019325256, "learning_rate": 9.977987532752464e-07, "loss": 0.3245239853858948, "step": 15955, "token_acc": 0.8870096645821489 }, { "epoch": 0.8609507365240382, "grad_norm": 0.4674002230167389, "learning_rate": 9.970379407976017e-07, "loss": 0.34079670906066895, "step": 15956, "token_acc": 0.8775165206700476 }, { "epoch": 0.8610046943290347, "grad_norm": 0.447189062833786, "learning_rate": 9.962774032680588e-07, "loss": 0.38480162620544434, "step": 15957, "token_acc": 0.8664040114613181 }, { "epoch": 0.8610586521340312, "grad_norm": 0.43029001355171204, "learning_rate": 9.955171407098464e-07, "loss": 0.2752493917942047, "step": 15958, "token_acc": 0.9010595433517385 }, { "epoch": 0.8611126099390277, "grad_norm": 0.477942556142807, "learning_rate": 9.947571531461819e-07, "loss": 0.31719714403152466, "step": 15959, "token_acc": 0.8834771886559802 }, { "epoch": 0.8611665677440242, "grad_norm": 0.4393865168094635, "learning_rate": 9.93997440600274e-07, "loss": 0.33826524019241333, "step": 15960, "token_acc": 0.8850189449140192 }, { "epoch": 0.8612205255490206, "grad_norm": 0.5247389078140259, "learning_rate": 9.932380030953238e-07, "loss": 0.3868328928947449, "step": 15961, "token_acc": 0.866615969581749 }, { "epoch": 0.8612744833540171, "grad_norm": 0.39601871371269226, "learning_rate": 9.92478840654527e-07, "loss": 0.31646257638931274, "step": 15962, "token_acc": 0.8847653429602889 }, { "epoch": 0.8613284411590136, "grad_norm": 0.44440770149230957, "learning_rate": 9.917199533010669e-07, "loss": 0.32927513122558594, "step": 15963, "token_acc": 0.8818113491168733 }, { "epoch": 0.8613823989640101, "grad_norm": 0.4461519420146942, "learning_rate": 9.909613410581198e-07, "loss": 0.3235113024711609, "step": 15964, "token_acc": 0.884438002371854 }, { "epoch": 0.8614363567690067, "grad_norm": 0.33536213636398315, "learning_rate": 9.902030039488531e-07, "loss": 0.2736995220184326, "step": 15965, "token_acc": 0.8984105044920525 }, { "epoch": 0.8614903145740032, "grad_norm": 0.45510560274124146, "learning_rate": 9.89444941996427e-07, "loss": 0.2980996370315552, "step": 15966, "token_acc": 0.890360435875943 }, { "epoch": 0.8615442723789997, "grad_norm": 0.2901309132575989, "learning_rate": 9.886871552239907e-07, "loss": 0.30969521403312683, "step": 15967, "token_acc": 0.8888604680905486 }, { "epoch": 0.8615982301839961, "grad_norm": 0.32065433263778687, "learning_rate": 9.879296436546893e-07, "loss": 0.3202367424964905, "step": 15968, "token_acc": 0.8904025715597734 }, { "epoch": 0.8616521879889926, "grad_norm": 0.449649453163147, "learning_rate": 9.87172407311655e-07, "loss": 0.331256628036499, "step": 15969, "token_acc": 0.8800146323619071 }, { "epoch": 0.8617061457939891, "grad_norm": 0.4033803343772888, "learning_rate": 9.864154462180141e-07, "loss": 0.32987886667251587, "step": 15970, "token_acc": 0.8865227103499628 }, { "epoch": 0.8617601035989856, "grad_norm": 0.4324418604373932, "learning_rate": 9.856587603968848e-07, "loss": 0.32482191920280457, "step": 15971, "token_acc": 0.8804561541486433 }, { "epoch": 0.8618140614039821, "grad_norm": 0.4106515944004059, "learning_rate": 9.84902349871376e-07, "loss": 0.27010881900787354, "step": 15972, "token_acc": 0.9013431403901503 }, { "epoch": 0.8618680192089786, "grad_norm": 0.37729939818382263, "learning_rate": 9.841462146645865e-07, "loss": 0.360263854265213, "step": 15973, "token_acc": 0.8777599605279388 }, { "epoch": 0.8619219770139751, "grad_norm": 0.46681782603263855, "learning_rate": 9.833903547996094e-07, "loss": 0.3045826852321625, "step": 15974, "token_acc": 0.8910476759826633 }, { "epoch": 0.8619759348189716, "grad_norm": 0.3839639127254486, "learning_rate": 9.826347702995287e-07, "loss": 0.3427521288394928, "step": 15975, "token_acc": 0.8813664596273292 }, { "epoch": 0.862029892623968, "grad_norm": 0.42994239926338196, "learning_rate": 9.818794611874217e-07, "loss": 0.32662469148635864, "step": 15976, "token_acc": 0.8819865028551652 }, { "epoch": 0.8620838504289645, "grad_norm": 0.33830955624580383, "learning_rate": 9.81124427486353e-07, "loss": 0.33997541666030884, "step": 15977, "token_acc": 0.8764951722150166 }, { "epoch": 0.862137808233961, "grad_norm": 0.43342217803001404, "learning_rate": 9.803696692193808e-07, "loss": 0.3471744656562805, "step": 15978, "token_acc": 0.8757686033379345 }, { "epoch": 0.8621917660389575, "grad_norm": 0.4594916105270386, "learning_rate": 9.79615186409557e-07, "loss": 0.3155199885368347, "step": 15979, "token_acc": 0.8910908453174324 }, { "epoch": 0.862245723843954, "grad_norm": 0.5564594268798828, "learning_rate": 9.788609790799231e-07, "loss": 0.34941229224205017, "step": 15980, "token_acc": 0.8783703995276521 }, { "epoch": 0.8622996816489505, "grad_norm": 0.40825286507606506, "learning_rate": 9.781070472535081e-07, "loss": 0.382936954498291, "step": 15981, "token_acc": 0.8657042869641295 }, { "epoch": 0.862353639453947, "grad_norm": 0.5342081785202026, "learning_rate": 9.77353390953345e-07, "loss": 0.38721299171447754, "step": 15982, "token_acc": 0.8616929202007962 }, { "epoch": 0.8624075972589436, "grad_norm": 0.3245905339717865, "learning_rate": 9.766000102024442e-07, "loss": 0.26135414838790894, "step": 15983, "token_acc": 0.8987981579242952 }, { "epoch": 0.86246155506394, "grad_norm": 0.34364011883735657, "learning_rate": 9.75846905023815e-07, "loss": 0.3762577176094055, "step": 15984, "token_acc": 0.8699571449682282 }, { "epoch": 0.8625155128689365, "grad_norm": 0.6510552167892456, "learning_rate": 9.75094075440457e-07, "loss": 0.3421485126018524, "step": 15985, "token_acc": 0.8881422924901186 }, { "epoch": 0.862569470673933, "grad_norm": 0.41787636280059814, "learning_rate": 9.743415214753626e-07, "loss": 0.3451794981956482, "step": 15986, "token_acc": 0.8731847591660675 }, { "epoch": 0.8626234284789295, "grad_norm": 0.3399946391582489, "learning_rate": 9.735892431515104e-07, "loss": 0.29966896772384644, "step": 15987, "token_acc": 0.892974825359167 }, { "epoch": 0.862677386283926, "grad_norm": 0.3913205564022064, "learning_rate": 9.728372404918807e-07, "loss": 0.36217397451400757, "step": 15988, "token_acc": 0.8718328397499178 }, { "epoch": 0.8627313440889225, "grad_norm": 0.49098795652389526, "learning_rate": 9.720855135194373e-07, "loss": 0.3543071746826172, "step": 15989, "token_acc": 0.8738375153535708 }, { "epoch": 0.862785301893919, "grad_norm": 0.4595772325992584, "learning_rate": 9.713340622571376e-07, "loss": 0.34245941042900085, "step": 15990, "token_acc": 0.8816131237183868 }, { "epoch": 0.8628392596989154, "grad_norm": 0.2865552306175232, "learning_rate": 9.7058288672793e-07, "loss": 0.27787554264068604, "step": 15991, "token_acc": 0.8996572140853849 }, { "epoch": 0.8628932175039119, "grad_norm": 0.3720431923866272, "learning_rate": 9.698319869547556e-07, "loss": 0.30915266275405884, "step": 15992, "token_acc": 0.8882303132938189 }, { "epoch": 0.8629471753089084, "grad_norm": 0.44736289978027344, "learning_rate": 9.690813629605444e-07, "loss": 0.3089503049850464, "step": 15993, "token_acc": 0.8871260948603537 }, { "epoch": 0.8630011331139049, "grad_norm": 0.36887145042419434, "learning_rate": 9.683310147682245e-07, "loss": 0.273632287979126, "step": 15994, "token_acc": 0.9003967711041182 }, { "epoch": 0.8630550909189014, "grad_norm": 0.44025343656539917, "learning_rate": 9.675809424007099e-07, "loss": 0.3080958127975464, "step": 15995, "token_acc": 0.8881360619469026 }, { "epoch": 0.8631090487238979, "grad_norm": 0.3846757709980011, "learning_rate": 9.668311458809077e-07, "loss": 0.3751599192619324, "step": 15996, "token_acc": 0.8661072144288577 }, { "epoch": 0.8631630065288944, "grad_norm": 0.340579092502594, "learning_rate": 9.66081625231715e-07, "loss": 0.3425573706626892, "step": 15997, "token_acc": 0.8751262626262626 }, { "epoch": 0.863216964333891, "grad_norm": 0.43824949860572815, "learning_rate": 9.653323804760228e-07, "loss": 0.3559722900390625, "step": 15998, "token_acc": 0.8772935779816514 }, { "epoch": 0.8632709221388873, "grad_norm": 0.2553206980228424, "learning_rate": 9.645834116367137e-07, "loss": 0.32744187116622925, "step": 15999, "token_acc": 0.8778405813819357 }, { "epoch": 0.8633248799438838, "grad_norm": 0.43975260853767395, "learning_rate": 9.638347187366603e-07, "loss": 0.35439980030059814, "step": 16000, "token_acc": 0.8814222524668255 }, { "epoch": 0.8633788377488804, "grad_norm": 0.39715510606765747, "learning_rate": 9.630863017987268e-07, "loss": 0.3501386046409607, "step": 16001, "token_acc": 0.8768672951414068 }, { "epoch": 0.8634327955538769, "grad_norm": 0.45828014612197876, "learning_rate": 9.623381608457705e-07, "loss": 0.36902493238449097, "step": 16002, "token_acc": 0.8718554551772064 }, { "epoch": 0.8634867533588734, "grad_norm": 0.4230254888534546, "learning_rate": 9.615902959006396e-07, "loss": 0.35302087664604187, "step": 16003, "token_acc": 0.8797484599589322 }, { "epoch": 0.8635407111638699, "grad_norm": 0.35182514786720276, "learning_rate": 9.608427069861726e-07, "loss": 0.31755372881889343, "step": 16004, "token_acc": 0.8837962962962963 }, { "epoch": 0.8635946689688664, "grad_norm": 0.4500591456890106, "learning_rate": 9.600953941252012e-07, "loss": 0.35768264532089233, "step": 16005, "token_acc": 0.8794304126390188 }, { "epoch": 0.8636486267738629, "grad_norm": 0.3839055895805359, "learning_rate": 9.59348357340547e-07, "loss": 0.30040243268013, "step": 16006, "token_acc": 0.8888888888888888 }, { "epoch": 0.8637025845788593, "grad_norm": 0.4523911476135254, "learning_rate": 9.586015966550278e-07, "loss": 0.3612360656261444, "step": 16007, "token_acc": 0.8732961586121437 }, { "epoch": 0.8637565423838558, "grad_norm": 0.4519207179546356, "learning_rate": 9.57855112091447e-07, "loss": 0.28994497656822205, "step": 16008, "token_acc": 0.8921366163621922 }, { "epoch": 0.8638105001888523, "grad_norm": 0.35175707936286926, "learning_rate": 9.571089036726023e-07, "loss": 0.29250627756118774, "step": 16009, "token_acc": 0.895325562227717 }, { "epoch": 0.8638644579938488, "grad_norm": 0.4482670724391937, "learning_rate": 9.56362971421283e-07, "loss": 0.37784844636917114, "step": 16010, "token_acc": 0.8663141993957704 }, { "epoch": 0.8639184157988453, "grad_norm": 0.3811132609844208, "learning_rate": 9.556173153602698e-07, "loss": 0.3145204484462738, "step": 16011, "token_acc": 0.8874877399467563 }, { "epoch": 0.8639723736038418, "grad_norm": 0.4004030227661133, "learning_rate": 9.548719355123337e-07, "loss": 0.3481769263744354, "step": 16012, "token_acc": 0.8782367447595562 }, { "epoch": 0.8640263314088383, "grad_norm": 0.37489551305770874, "learning_rate": 9.541268319002373e-07, "loss": 0.2986036539077759, "step": 16013, "token_acc": 0.8889024241685954 }, { "epoch": 0.8640802892138347, "grad_norm": 0.4547189474105835, "learning_rate": 9.533820045467402e-07, "loss": 0.3359971046447754, "step": 16014, "token_acc": 0.877603746165025 }, { "epoch": 0.8641342470188312, "grad_norm": 0.36891111731529236, "learning_rate": 9.526374534745863e-07, "loss": 0.28908562660217285, "step": 16015, "token_acc": 0.8998275591785546 }, { "epoch": 0.8641882048238277, "grad_norm": 0.40143266320228577, "learning_rate": 9.518931787065156e-07, "loss": 0.29700469970703125, "step": 16016, "token_acc": 0.8913934426229508 }, { "epoch": 0.8642421626288243, "grad_norm": 0.32147979736328125, "learning_rate": 9.511491802652572e-07, "loss": 0.26914650201797485, "step": 16017, "token_acc": 0.8989597034556978 }, { "epoch": 0.8642961204338208, "grad_norm": 0.5146663188934326, "learning_rate": 9.504054581735311e-07, "loss": 0.38326314091682434, "step": 16018, "token_acc": 0.8642406114988368 }, { "epoch": 0.8643500782388173, "grad_norm": 0.4194552004337311, "learning_rate": 9.496620124540501e-07, "loss": 0.3275405168533325, "step": 16019, "token_acc": 0.8834757834757835 }, { "epoch": 0.8644040360438138, "grad_norm": 0.4824523627758026, "learning_rate": 9.489188431295215e-07, "loss": 0.2923412621021271, "step": 16020, "token_acc": 0.8943414322250639 }, { "epoch": 0.8644579938488103, "grad_norm": 0.42796772718429565, "learning_rate": 9.481759502226417e-07, "loss": 0.3083598017692566, "step": 16021, "token_acc": 0.890534871525957 }, { "epoch": 0.8645119516538067, "grad_norm": 0.4302554130554199, "learning_rate": 9.474333337560959e-07, "loss": 0.3246794641017914, "step": 16022, "token_acc": 0.8812592959841349 }, { "epoch": 0.8645659094588032, "grad_norm": 0.41469213366508484, "learning_rate": 9.466909937525648e-07, "loss": 0.2907199263572693, "step": 16023, "token_acc": 0.8906876349059513 }, { "epoch": 0.8646198672637997, "grad_norm": 0.34180864691734314, "learning_rate": 9.459489302347202e-07, "loss": 0.34625065326690674, "step": 16024, "token_acc": 0.8826979472140762 }, { "epoch": 0.8646738250687962, "grad_norm": 0.3882744610309601, "learning_rate": 9.452071432252219e-07, "loss": 0.2782500386238098, "step": 16025, "token_acc": 0.8994405785236731 }, { "epoch": 0.8647277828737927, "grad_norm": 0.4843563735485077, "learning_rate": 9.444656327467249e-07, "loss": 0.37908127903938293, "step": 16026, "token_acc": 0.8740054780226947 }, { "epoch": 0.8647817406787892, "grad_norm": 0.3932279050350189, "learning_rate": 9.437243988218758e-07, "loss": 0.3077171742916107, "step": 16027, "token_acc": 0.884838666126637 }, { "epoch": 0.8648356984837857, "grad_norm": 0.40467703342437744, "learning_rate": 9.429834414733119e-07, "loss": 0.3015980124473572, "step": 16028, "token_acc": 0.8880949170823783 }, { "epoch": 0.8648896562887822, "grad_norm": 0.4394701421260834, "learning_rate": 9.422427607236606e-07, "loss": 0.31952279806137085, "step": 16029, "token_acc": 0.8845231296402056 }, { "epoch": 0.8649436140937786, "grad_norm": 0.3794134557247162, "learning_rate": 9.415023565955439e-07, "loss": 0.24477547407150269, "step": 16030, "token_acc": 0.9068361817423927 }, { "epoch": 0.8649975718987751, "grad_norm": 0.4523009657859802, "learning_rate": 9.407622291115704e-07, "loss": 0.37320950627326965, "step": 16031, "token_acc": 0.8688651794374393 }, { "epoch": 0.8650515297037716, "grad_norm": 0.4344432055950165, "learning_rate": 9.400223782943462e-07, "loss": 0.3568252921104431, "step": 16032, "token_acc": 0.8715623171445289 }, { "epoch": 0.8651054875087681, "grad_norm": 0.5322993993759155, "learning_rate": 9.392828041664659e-07, "loss": 0.3512829542160034, "step": 16033, "token_acc": 0.8794538794538794 }, { "epoch": 0.8651594453137647, "grad_norm": 0.2860689163208008, "learning_rate": 9.385435067505133e-07, "loss": 0.27253180742263794, "step": 16034, "token_acc": 0.8975895410595125 }, { "epoch": 0.8652134031187612, "grad_norm": 0.40853434801101685, "learning_rate": 9.378044860690694e-07, "loss": 0.3618072271347046, "step": 16035, "token_acc": 0.8730124623979373 }, { "epoch": 0.8652673609237577, "grad_norm": 0.40384548902511597, "learning_rate": 9.370657421447027e-07, "loss": 0.27718687057495117, "step": 16036, "token_acc": 0.8980340375586855 }, { "epoch": 0.8653213187287541, "grad_norm": 0.48025113344192505, "learning_rate": 9.363272749999741e-07, "loss": 0.31958210468292236, "step": 16037, "token_acc": 0.8871538008249853 }, { "epoch": 0.8653752765337506, "grad_norm": 0.5390043258666992, "learning_rate": 9.355890846574334e-07, "loss": 0.2900591492652893, "step": 16038, "token_acc": 0.8953534551231136 }, { "epoch": 0.8654292343387471, "grad_norm": 0.3648056387901306, "learning_rate": 9.348511711396301e-07, "loss": 0.35543811321258545, "step": 16039, "token_acc": 0.876838765503317 }, { "epoch": 0.8654831921437436, "grad_norm": 0.3273186683654785, "learning_rate": 9.341135344690977e-07, "loss": 0.30170291662216187, "step": 16040, "token_acc": 0.8924485125858124 }, { "epoch": 0.8655371499487401, "grad_norm": 0.33130812644958496, "learning_rate": 9.333761746683623e-07, "loss": 0.2817099094390869, "step": 16041, "token_acc": 0.8986398580721466 }, { "epoch": 0.8655911077537366, "grad_norm": 0.39727315306663513, "learning_rate": 9.326390917599437e-07, "loss": 0.28997594118118286, "step": 16042, "token_acc": 0.8953621528771829 }, { "epoch": 0.8656450655587331, "grad_norm": 0.460016667842865, "learning_rate": 9.319022857663517e-07, "loss": 0.4111512303352356, "step": 16043, "token_acc": 0.8570147915732855 }, { "epoch": 0.8656990233637296, "grad_norm": 0.39096906781196594, "learning_rate": 9.311657567100884e-07, "loss": 0.3054584562778473, "step": 16044, "token_acc": 0.8908331451794987 }, { "epoch": 0.865752981168726, "grad_norm": 0.5122201442718506, "learning_rate": 9.304295046136447e-07, "loss": 0.30549156665802, "step": 16045, "token_acc": 0.8878954607977991 }, { "epoch": 0.8658069389737225, "grad_norm": 0.3643818497657776, "learning_rate": 9.296935294995113e-07, "loss": 0.31344059109687805, "step": 16046, "token_acc": 0.8853673919899085 }, { "epoch": 0.865860896778719, "grad_norm": 0.3991837203502655, "learning_rate": 9.289578313901604e-07, "loss": 0.30980825424194336, "step": 16047, "token_acc": 0.8921248530756171 }, { "epoch": 0.8659148545837155, "grad_norm": 0.4151628613471985, "learning_rate": 9.282224103080606e-07, "loss": 0.2906130254268646, "step": 16048, "token_acc": 0.8919798093101514 }, { "epoch": 0.865968812388712, "grad_norm": 0.456785649061203, "learning_rate": 9.274872662756729e-07, "loss": 0.35628843307495117, "step": 16049, "token_acc": 0.8728478913283841 }, { "epoch": 0.8660227701937085, "grad_norm": 0.4274034798145294, "learning_rate": 9.26752399315447e-07, "loss": 0.35214123129844666, "step": 16050, "token_acc": 0.8764949674363529 }, { "epoch": 0.866076727998705, "grad_norm": 0.4880818724632263, "learning_rate": 9.260178094498262e-07, "loss": 0.31329473853111267, "step": 16051, "token_acc": 0.885336812917958 }, { "epoch": 0.8661306858037014, "grad_norm": 0.45091983675956726, "learning_rate": 9.252834967012447e-07, "loss": 0.3207528591156006, "step": 16052, "token_acc": 0.882901866345575 }, { "epoch": 0.866184643608698, "grad_norm": 0.331753134727478, "learning_rate": 9.245494610921269e-07, "loss": 0.32590287923812866, "step": 16053, "token_acc": 0.8845646437994723 }, { "epoch": 0.8662386014136945, "grad_norm": 0.5032505989074707, "learning_rate": 9.238157026448924e-07, "loss": 0.3030129075050354, "step": 16054, "token_acc": 0.8903971845148315 }, { "epoch": 0.866292559218691, "grad_norm": 0.4796103835105896, "learning_rate": 9.230822213819479e-07, "loss": 0.36296749114990234, "step": 16055, "token_acc": 0.8693207719191612 }, { "epoch": 0.8663465170236875, "grad_norm": 0.38148441910743713, "learning_rate": 9.223490173256944e-07, "loss": 0.32482361793518066, "step": 16056, "token_acc": 0.886579139314369 }, { "epoch": 0.866400474828684, "grad_norm": 0.416749507188797, "learning_rate": 9.216160904985238e-07, "loss": 0.3224312663078308, "step": 16057, "token_acc": 0.8830311170808148 }, { "epoch": 0.8664544326336805, "grad_norm": 0.4663524627685547, "learning_rate": 9.208834409228185e-07, "loss": 0.29235804080963135, "step": 16058, "token_acc": 0.8907424381301559 }, { "epoch": 0.866508390438677, "grad_norm": 0.43768924474716187, "learning_rate": 9.201510686209558e-07, "loss": 0.37275075912475586, "step": 16059, "token_acc": 0.8680485338725986 }, { "epoch": 0.8665623482436734, "grad_norm": 0.36023783683776855, "learning_rate": 9.194189736153014e-07, "loss": 0.3019542694091797, "step": 16060, "token_acc": 0.890087498501738 }, { "epoch": 0.8666163060486699, "grad_norm": 0.44976431131362915, "learning_rate": 9.186871559282118e-07, "loss": 0.3176111578941345, "step": 16061, "token_acc": 0.8873031496062992 }, { "epoch": 0.8666702638536664, "grad_norm": 0.3812606930732727, "learning_rate": 9.179556155820391e-07, "loss": 0.2916066646575928, "step": 16062, "token_acc": 0.8926575203252033 }, { "epoch": 0.8667242216586629, "grad_norm": 0.43844103813171387, "learning_rate": 9.172243525991209e-07, "loss": 0.3478091359138489, "step": 16063, "token_acc": 0.8725438130642592 }, { "epoch": 0.8667781794636594, "grad_norm": 0.47885462641716003, "learning_rate": 9.164933670017906e-07, "loss": 0.3274538516998291, "step": 16064, "token_acc": 0.8821157877761652 }, { "epoch": 0.8668321372686559, "grad_norm": 0.41122251749038696, "learning_rate": 9.15762658812377e-07, "loss": 0.329237699508667, "step": 16065, "token_acc": 0.8839810212381383 }, { "epoch": 0.8668860950736524, "grad_norm": 0.4403059780597687, "learning_rate": 9.15032228053191e-07, "loss": 0.33722740411758423, "step": 16066, "token_acc": 0.8839167035888348 }, { "epoch": 0.866940052878649, "grad_norm": 0.5306956768035889, "learning_rate": 9.143020747465392e-07, "loss": 0.34122610092163086, "step": 16067, "token_acc": 0.8828437337041544 }, { "epoch": 0.8669940106836453, "grad_norm": 0.3735337257385254, "learning_rate": 9.135721989147228e-07, "loss": 0.32642698287963867, "step": 16068, "token_acc": 0.8872680742162508 }, { "epoch": 0.8670479684886419, "grad_norm": 0.46755048632621765, "learning_rate": 9.128426005800317e-07, "loss": 0.31691378355026245, "step": 16069, "token_acc": 0.8841756420878211 }, { "epoch": 0.8671019262936384, "grad_norm": 0.481950581073761, "learning_rate": 9.121132797647458e-07, "loss": 0.33697590231895447, "step": 16070, "token_acc": 0.8758465011286681 }, { "epoch": 0.8671558840986349, "grad_norm": 0.42894405126571655, "learning_rate": 9.113842364911385e-07, "loss": 0.3684171438217163, "step": 16071, "token_acc": 0.8744477172312224 }, { "epoch": 0.8672098419036314, "grad_norm": 0.4351609945297241, "learning_rate": 9.106554707814774e-07, "loss": 0.28731048107147217, "step": 16072, "token_acc": 0.897172619047619 }, { "epoch": 0.8672637997086279, "grad_norm": 0.4095969796180725, "learning_rate": 9.09926982658017e-07, "loss": 0.313640296459198, "step": 16073, "token_acc": 0.8883333333333333 }, { "epoch": 0.8673177575136244, "grad_norm": 0.4331711232662201, "learning_rate": 9.091987721430063e-07, "loss": 0.3101089596748352, "step": 16074, "token_acc": 0.8863896301944338 }, { "epoch": 0.8673717153186208, "grad_norm": 0.4332737624645233, "learning_rate": 9.084708392586828e-07, "loss": 0.2849196493625641, "step": 16075, "token_acc": 0.8967627856365615 }, { "epoch": 0.8674256731236173, "grad_norm": 0.3547153174877167, "learning_rate": 9.077431840272777e-07, "loss": 0.34037041664123535, "step": 16076, "token_acc": 0.8798088703749081 }, { "epoch": 0.8674796309286138, "grad_norm": 0.43028056621551514, "learning_rate": 9.070158064710122e-07, "loss": 0.33217114210128784, "step": 16077, "token_acc": 0.8809422449269193 }, { "epoch": 0.8675335887336103, "grad_norm": 0.47236141562461853, "learning_rate": 9.06288706612104e-07, "loss": 0.3469713628292084, "step": 16078, "token_acc": 0.875968992248062 }, { "epoch": 0.8675875465386068, "grad_norm": 0.49021995067596436, "learning_rate": 9.055618844727576e-07, "loss": 0.34379082918167114, "step": 16079, "token_acc": 0.8782197587218781 }, { "epoch": 0.8676415043436033, "grad_norm": 0.41887831687927246, "learning_rate": 9.048353400751675e-07, "loss": 0.26898282766342163, "step": 16080, "token_acc": 0.9018424396442185 }, { "epoch": 0.8676954621485998, "grad_norm": 0.3807700574398041, "learning_rate": 9.041090734415237e-07, "loss": 0.3096553683280945, "step": 16081, "token_acc": 0.8898419864559819 }, { "epoch": 0.8677494199535963, "grad_norm": 0.4756790101528168, "learning_rate": 9.033830845940062e-07, "loss": 0.31790706515312195, "step": 16082, "token_acc": 0.883678541839271 }, { "epoch": 0.8678033777585927, "grad_norm": 0.4055323004722595, "learning_rate": 9.026573735547862e-07, "loss": 0.37767159938812256, "step": 16083, "token_acc": 0.872311462020147 }, { "epoch": 0.8678573355635892, "grad_norm": 0.3852301836013794, "learning_rate": 9.01931940346027e-07, "loss": 0.31876689195632935, "step": 16084, "token_acc": 0.8843386243386243 }, { "epoch": 0.8679112933685857, "grad_norm": 0.35936516523361206, "learning_rate": 9.012067849898831e-07, "loss": 0.2929467260837555, "step": 16085, "token_acc": 0.8915310805173133 }, { "epoch": 0.8679652511735823, "grad_norm": 0.40382009744644165, "learning_rate": 9.004819075085003e-07, "loss": 0.27402037382125854, "step": 16086, "token_acc": 0.8989220710372857 }, { "epoch": 0.8680192089785788, "grad_norm": 0.43702977895736694, "learning_rate": 8.997573079240163e-07, "loss": 0.32282528281211853, "step": 16087, "token_acc": 0.8833998158944462 }, { "epoch": 0.8680731667835753, "grad_norm": 0.45086488127708435, "learning_rate": 8.990329862585601e-07, "loss": 0.34478628635406494, "step": 16088, "token_acc": 0.8725435925823416 }, { "epoch": 0.8681271245885718, "grad_norm": 0.46721407771110535, "learning_rate": 8.983089425342517e-07, "loss": 0.32319051027297974, "step": 16089, "token_acc": 0.8828434438190536 }, { "epoch": 0.8681810823935683, "grad_norm": 0.36426520347595215, "learning_rate": 8.975851767732025e-07, "loss": 0.31321418285369873, "step": 16090, "token_acc": 0.8885298869143781 }, { "epoch": 0.8682350401985647, "grad_norm": 0.4281066060066223, "learning_rate": 8.968616889975201e-07, "loss": 0.3329256772994995, "step": 16091, "token_acc": 0.8860369609856262 }, { "epoch": 0.8682889980035612, "grad_norm": 0.3922450542449951, "learning_rate": 8.96138479229296e-07, "loss": 0.27908873558044434, "step": 16092, "token_acc": 0.8982569922983381 }, { "epoch": 0.8683429558085577, "grad_norm": 0.384555846452713, "learning_rate": 8.95415547490619e-07, "loss": 0.2785685062408447, "step": 16093, "token_acc": 0.896 }, { "epoch": 0.8683969136135542, "grad_norm": 0.39375096559524536, "learning_rate": 8.946928938035648e-07, "loss": 0.3302805423736572, "step": 16094, "token_acc": 0.8857504979177983 }, { "epoch": 0.8684508714185507, "grad_norm": 0.47785693407058716, "learning_rate": 8.939705181902048e-07, "loss": 0.36268362402915955, "step": 16095, "token_acc": 0.870066889632107 }, { "epoch": 0.8685048292235472, "grad_norm": 0.404062956571579, "learning_rate": 8.932484206725978e-07, "loss": 0.31025028228759766, "step": 16096, "token_acc": 0.8913099205331966 }, { "epoch": 0.8685587870285437, "grad_norm": 0.510764479637146, "learning_rate": 8.925266012728007e-07, "loss": 0.318313866853714, "step": 16097, "token_acc": 0.8795417934490782 }, { "epoch": 0.8686127448335401, "grad_norm": 0.3748336732387543, "learning_rate": 8.918050600128559e-07, "loss": 0.3595028519630432, "step": 16098, "token_acc": 0.8758314855875832 }, { "epoch": 0.8686667026385366, "grad_norm": 0.5692804455757141, "learning_rate": 8.91083796914799e-07, "loss": 0.3124171495437622, "step": 16099, "token_acc": 0.8852908007088771 }, { "epoch": 0.8687206604435331, "grad_norm": 0.42635828256607056, "learning_rate": 8.903628120006558e-07, "loss": 0.3294891119003296, "step": 16100, "token_acc": 0.8767544445929688 }, { "epoch": 0.8687746182485296, "grad_norm": 0.45143213868141174, "learning_rate": 8.896421052924453e-07, "loss": 0.3267592787742615, "step": 16101, "token_acc": 0.8818994757940178 }, { "epoch": 0.8688285760535261, "grad_norm": 0.4080891013145447, "learning_rate": 8.889216768121789e-07, "loss": 0.31605064868927, "step": 16102, "token_acc": 0.8880403238124331 }, { "epoch": 0.8688825338585227, "grad_norm": 0.45084112882614136, "learning_rate": 8.882015265818556e-07, "loss": 0.30755946040153503, "step": 16103, "token_acc": 0.8899570815450644 }, { "epoch": 0.8689364916635192, "grad_norm": 0.43893906474113464, "learning_rate": 8.874816546234722e-07, "loss": 0.32015669345855713, "step": 16104, "token_acc": 0.8838751069289992 }, { "epoch": 0.8689904494685157, "grad_norm": 0.3753988742828369, "learning_rate": 8.867620609590122e-07, "loss": 0.2831759452819824, "step": 16105, "token_acc": 0.8978605100930739 }, { "epoch": 0.8690444072735121, "grad_norm": 0.469699501991272, "learning_rate": 8.860427456104515e-07, "loss": 0.3236845135688782, "step": 16106, "token_acc": 0.8835064935064935 }, { "epoch": 0.8690983650785086, "grad_norm": 0.3978128731250763, "learning_rate": 8.853237085997579e-07, "loss": 0.3294637203216553, "step": 16107, "token_acc": 0.8845602605863192 }, { "epoch": 0.8691523228835051, "grad_norm": 0.3696526288986206, "learning_rate": 8.846049499488907e-07, "loss": 0.35862571001052856, "step": 16108, "token_acc": 0.8775924256086565 }, { "epoch": 0.8692062806885016, "grad_norm": 0.46696141362190247, "learning_rate": 8.838864696797989e-07, "loss": 0.31857946515083313, "step": 16109, "token_acc": 0.8867542972699697 }, { "epoch": 0.8692602384934981, "grad_norm": 0.39351433515548706, "learning_rate": 8.831682678144271e-07, "loss": 0.31820595264434814, "step": 16110, "token_acc": 0.8829646989893072 }, { "epoch": 0.8693141962984946, "grad_norm": 0.3704184889793396, "learning_rate": 8.824503443747091e-07, "loss": 0.2940340042114258, "step": 16111, "token_acc": 0.8944 }, { "epoch": 0.8693681541034911, "grad_norm": 0.5072246193885803, "learning_rate": 8.817326993825681e-07, "loss": 0.3143848180770874, "step": 16112, "token_acc": 0.887237599851384 }, { "epoch": 0.8694221119084876, "grad_norm": 0.42367157340049744, "learning_rate": 8.810153328599225e-07, "loss": 0.32494980096817017, "step": 16113, "token_acc": 0.884239538962666 }, { "epoch": 0.869476069713484, "grad_norm": 0.33243507146835327, "learning_rate": 8.80298244828679e-07, "loss": 0.3482746183872223, "step": 16114, "token_acc": 0.8770250368188512 }, { "epoch": 0.8695300275184805, "grad_norm": 0.4663800001144409, "learning_rate": 8.79581435310739e-07, "loss": 0.30374014377593994, "step": 16115, "token_acc": 0.8890162055982976 }, { "epoch": 0.869583985323477, "grad_norm": 0.40579211711883545, "learning_rate": 8.788649043279929e-07, "loss": 0.27498823404312134, "step": 16116, "token_acc": 0.8977364591754244 }, { "epoch": 0.8696379431284735, "grad_norm": 0.37648844718933105, "learning_rate": 8.78148651902322e-07, "loss": 0.3052859604358673, "step": 16117, "token_acc": 0.8872727272727273 }, { "epoch": 0.86969190093347, "grad_norm": 0.525397539138794, "learning_rate": 8.774326780556031e-07, "loss": 0.40176138281822205, "step": 16118, "token_acc": 0.8671264367816092 }, { "epoch": 0.8697458587384665, "grad_norm": 0.416191428899765, "learning_rate": 8.767169828096989e-07, "loss": 0.28480759263038635, "step": 16119, "token_acc": 0.8929850152499669 }, { "epoch": 0.8697998165434631, "grad_norm": 0.656731367111206, "learning_rate": 8.760015661864696e-07, "loss": 0.40859803557395935, "step": 16120, "token_acc": 0.8567765567765567 }, { "epoch": 0.8698537743484595, "grad_norm": 0.2739502489566803, "learning_rate": 8.752864282077611e-07, "loss": 0.3160998225212097, "step": 16121, "token_acc": 0.8854227067146004 }, { "epoch": 0.869907732153456, "grad_norm": 0.4856751561164856, "learning_rate": 8.745715688954137e-07, "loss": 0.3534437119960785, "step": 16122, "token_acc": 0.8702870442203259 }, { "epoch": 0.8699616899584525, "grad_norm": 0.39662083983421326, "learning_rate": 8.738569882712611e-07, "loss": 0.2861703634262085, "step": 16123, "token_acc": 0.8969934640522876 }, { "epoch": 0.870015647763449, "grad_norm": 0.34101560711860657, "learning_rate": 8.731426863571257e-07, "loss": 0.26765578985214233, "step": 16124, "token_acc": 0.8988483418898293 }, { "epoch": 0.8700696055684455, "grad_norm": 0.48548802733421326, "learning_rate": 8.724286631748224e-07, "loss": 0.28540509939193726, "step": 16125, "token_acc": 0.8985868661679135 }, { "epoch": 0.870123563373442, "grad_norm": 0.5089617371559143, "learning_rate": 8.717149187461559e-07, "loss": 0.3408062160015106, "step": 16126, "token_acc": 0.8787171103845555 }, { "epoch": 0.8701775211784385, "grad_norm": 0.38325127959251404, "learning_rate": 8.710014530929245e-07, "loss": 0.3297450542449951, "step": 16127, "token_acc": 0.8800314465408805 }, { "epoch": 0.870231478983435, "grad_norm": 0.3209648132324219, "learning_rate": 8.702882662369172e-07, "loss": 0.27259814739227295, "step": 16128, "token_acc": 0.8996521963158572 }, { "epoch": 0.8702854367884314, "grad_norm": 0.5018836259841919, "learning_rate": 8.695753581999133e-07, "loss": 0.3303435444831848, "step": 16129, "token_acc": 0.8836641718190693 }, { "epoch": 0.8703393945934279, "grad_norm": 0.35797548294067383, "learning_rate": 8.688627290036877e-07, "loss": 0.32442909479141235, "step": 16130, "token_acc": 0.8875666803446861 }, { "epoch": 0.8703933523984244, "grad_norm": 0.515906810760498, "learning_rate": 8.681503786700019e-07, "loss": 0.2908158302307129, "step": 16131, "token_acc": 0.893237660360948 }, { "epoch": 0.8704473102034209, "grad_norm": 0.5030524730682373, "learning_rate": 8.674383072206116e-07, "loss": 0.3194662630558014, "step": 16132, "token_acc": 0.8822802682668549 }, { "epoch": 0.8705012680084174, "grad_norm": 0.381924569606781, "learning_rate": 8.66726514677263e-07, "loss": 0.30774909257888794, "step": 16133, "token_acc": 0.8859107493685097 }, { "epoch": 0.8705552258134139, "grad_norm": 0.33628252148628235, "learning_rate": 8.660150010616963e-07, "loss": 0.31556206941604614, "step": 16134, "token_acc": 0.8815639630520056 }, { "epoch": 0.8706091836184104, "grad_norm": 0.27947893738746643, "learning_rate": 8.653037663956343e-07, "loss": 0.339100182056427, "step": 16135, "token_acc": 0.8745753499116727 }, { "epoch": 0.870663141423407, "grad_norm": 0.4884181618690491, "learning_rate": 8.64592810700805e-07, "loss": 0.32278501987457275, "step": 16136, "token_acc": 0.8866585563665855 }, { "epoch": 0.8707170992284033, "grad_norm": 0.49066033959388733, "learning_rate": 8.638821339989167e-07, "loss": 0.3920782804489136, "step": 16137, "token_acc": 0.867410035478966 }, { "epoch": 0.8707710570333999, "grad_norm": 0.33290156722068787, "learning_rate": 8.631717363116765e-07, "loss": 0.23007521033287048, "step": 16138, "token_acc": 0.9125014429181577 }, { "epoch": 0.8708250148383964, "grad_norm": 0.5486291646957397, "learning_rate": 8.62461617660777e-07, "loss": 0.40984147787094116, "step": 16139, "token_acc": 0.8552179293276502 }, { "epoch": 0.8708789726433929, "grad_norm": 0.4710683822631836, "learning_rate": 8.617517780679052e-07, "loss": 0.3475325405597687, "step": 16140, "token_acc": 0.880844382414515 }, { "epoch": 0.8709329304483894, "grad_norm": 0.455891489982605, "learning_rate": 8.610422175547417e-07, "loss": 0.2934015095233917, "step": 16141, "token_acc": 0.8899244737264985 }, { "epoch": 0.8709868882533859, "grad_norm": 0.4186576008796692, "learning_rate": 8.603329361429525e-07, "loss": 0.35283327102661133, "step": 16142, "token_acc": 0.8752899197145406 }, { "epoch": 0.8710408460583824, "grad_norm": 0.47877031564712524, "learning_rate": 8.596239338542023e-07, "loss": 0.3067810535430908, "step": 16143, "token_acc": 0.8904051172707889 }, { "epoch": 0.8710948038633788, "grad_norm": 0.41589781641960144, "learning_rate": 8.589152107101428e-07, "loss": 0.349941611289978, "step": 16144, "token_acc": 0.8731019522776573 }, { "epoch": 0.8711487616683753, "grad_norm": 0.40220731496810913, "learning_rate": 8.58206766732419e-07, "loss": 0.29235923290252686, "step": 16145, "token_acc": 0.8944789284547533 }, { "epoch": 0.8712027194733718, "grad_norm": 0.49103888869285583, "learning_rate": 8.574986019426657e-07, "loss": 0.29315441846847534, "step": 16146, "token_acc": 0.8932004894249257 }, { "epoch": 0.8712566772783683, "grad_norm": 0.3481645882129669, "learning_rate": 8.567907163625089e-07, "loss": 0.37201717495918274, "step": 16147, "token_acc": 0.8678168052625047 }, { "epoch": 0.8713106350833648, "grad_norm": 0.43364545702934265, "learning_rate": 8.560831100135691e-07, "loss": 0.3347175717353821, "step": 16148, "token_acc": 0.8790117167600612 }, { "epoch": 0.8713645928883613, "grad_norm": 0.4080173373222351, "learning_rate": 8.553757829174558e-07, "loss": 0.3370625376701355, "step": 16149, "token_acc": 0.8851095993953136 }, { "epoch": 0.8714185506933578, "grad_norm": 0.42258375883102417, "learning_rate": 8.546687350957706e-07, "loss": 0.3433248996734619, "step": 16150, "token_acc": 0.8753672316384181 }, { "epoch": 0.8714725084983543, "grad_norm": 0.39657196402549744, "learning_rate": 8.539619665701071e-07, "loss": 0.31152474880218506, "step": 16151, "token_acc": 0.8851987865240301 }, { "epoch": 0.8715264663033507, "grad_norm": 0.5157091021537781, "learning_rate": 8.532554773620483e-07, "loss": 0.35164371132850647, "step": 16152, "token_acc": 0.8736008265885999 }, { "epoch": 0.8715804241083472, "grad_norm": 0.45063942670822144, "learning_rate": 8.525492674931713e-07, "loss": 0.33844253420829773, "step": 16153, "token_acc": 0.8780746145698481 }, { "epoch": 0.8716343819133437, "grad_norm": 0.3452378809452057, "learning_rate": 8.51843336985041e-07, "loss": 0.27543020248413086, "step": 16154, "token_acc": 0.9000205718987863 }, { "epoch": 0.8716883397183403, "grad_norm": 0.4698544442653656, "learning_rate": 8.511376858592213e-07, "loss": 0.3286939263343811, "step": 16155, "token_acc": 0.8838843635202044 }, { "epoch": 0.8717422975233368, "grad_norm": 0.3536754846572876, "learning_rate": 8.504323141372594e-07, "loss": 0.296230673789978, "step": 16156, "token_acc": 0.8923484180007564 }, { "epoch": 0.8717962553283333, "grad_norm": 0.43499186635017395, "learning_rate": 8.497272218406983e-07, "loss": 0.34133923053741455, "step": 16157, "token_acc": 0.877948034643571 }, { "epoch": 0.8718502131333298, "grad_norm": 0.4710143208503723, "learning_rate": 8.490224089910714e-07, "loss": 0.30773425102233887, "step": 16158, "token_acc": 0.8878114277189318 }, { "epoch": 0.8719041709383263, "grad_norm": 0.4343683421611786, "learning_rate": 8.48317875609902e-07, "loss": 0.29505789279937744, "step": 16159, "token_acc": 0.892432770481551 }, { "epoch": 0.8719581287433227, "grad_norm": 0.36562520265579224, "learning_rate": 8.476136217187069e-07, "loss": 0.31281906366348267, "step": 16160, "token_acc": 0.885039033258475 }, { "epoch": 0.8720120865483192, "grad_norm": 0.5032394528388977, "learning_rate": 8.469096473389926e-07, "loss": 0.31540748476982117, "step": 16161, "token_acc": 0.8866092690969042 }, { "epoch": 0.8720660443533157, "grad_norm": 0.35531729459762573, "learning_rate": 8.462059524922628e-07, "loss": 0.33800145983695984, "step": 16162, "token_acc": 0.8845021413276232 }, { "epoch": 0.8721200021583122, "grad_norm": 0.39186012744903564, "learning_rate": 8.455025372000036e-07, "loss": 0.33772221207618713, "step": 16163, "token_acc": 0.8805841924398625 }, { "epoch": 0.8721739599633087, "grad_norm": 0.4634539484977722, "learning_rate": 8.447994014837002e-07, "loss": 0.27003249526023865, "step": 16164, "token_acc": 0.8997799221262909 }, { "epoch": 0.8722279177683052, "grad_norm": 0.33669668436050415, "learning_rate": 8.440965453648242e-07, "loss": 0.2749870717525482, "step": 16165, "token_acc": 0.9009632342965677 }, { "epoch": 0.8722818755733017, "grad_norm": 0.3693980276584625, "learning_rate": 8.433939688648407e-07, "loss": 0.30553698539733887, "step": 16166, "token_acc": 0.8902260359983256 }, { "epoch": 0.8723358333782981, "grad_norm": 0.4177297055721283, "learning_rate": 8.42691672005207e-07, "loss": 0.3438510000705719, "step": 16167, "token_acc": 0.8777357625759138 }, { "epoch": 0.8723897911832946, "grad_norm": 0.47340747714042664, "learning_rate": 8.419896548073714e-07, "loss": 0.34310007095336914, "step": 16168, "token_acc": 0.8694749332542272 }, { "epoch": 0.8724437489882911, "grad_norm": 0.3567858934402466, "learning_rate": 8.412879172927724e-07, "loss": 0.3073117136955261, "step": 16169, "token_acc": 0.8865592049448552 }, { "epoch": 0.8724977067932876, "grad_norm": 0.3230751156806946, "learning_rate": 8.405864594828406e-07, "loss": 0.2778162360191345, "step": 16170, "token_acc": 0.8992926045016078 }, { "epoch": 0.8725516645982841, "grad_norm": 0.48121532797813416, "learning_rate": 8.398852813989989e-07, "loss": 0.37016424536705017, "step": 16171, "token_acc": 0.8708231458842706 }, { "epoch": 0.8726056224032807, "grad_norm": 0.40799105167388916, "learning_rate": 8.391843830626611e-07, "loss": 0.27619415521621704, "step": 16172, "token_acc": 0.8975349766822118 }, { "epoch": 0.8726595802082772, "grad_norm": 0.4275863468647003, "learning_rate": 8.384837644952326e-07, "loss": 0.33358800411224365, "step": 16173, "token_acc": 0.8818885448916408 }, { "epoch": 0.8727135380132737, "grad_norm": 0.49266770482063293, "learning_rate": 8.377834257181084e-07, "loss": 0.33173859119415283, "step": 16174, "token_acc": 0.8855086686293752 }, { "epoch": 0.8727674958182701, "grad_norm": 0.4753849506378174, "learning_rate": 8.3708336675268e-07, "loss": 0.3449745178222656, "step": 16175, "token_acc": 0.8759527228543025 }, { "epoch": 0.8728214536232666, "grad_norm": 0.3469218909740448, "learning_rate": 8.363835876203252e-07, "loss": 0.2707803249359131, "step": 16176, "token_acc": 0.8998242530755711 }, { "epoch": 0.8728754114282631, "grad_norm": 0.648496150970459, "learning_rate": 8.356840883424144e-07, "loss": 0.3295004367828369, "step": 16177, "token_acc": 0.8822448979591837 }, { "epoch": 0.8729293692332596, "grad_norm": 0.562649130821228, "learning_rate": 8.349848689403117e-07, "loss": 0.3130779266357422, "step": 16178, "token_acc": 0.8843560350640594 }, { "epoch": 0.8729833270382561, "grad_norm": 0.3026840388774872, "learning_rate": 8.342859294353701e-07, "loss": 0.32670891284942627, "step": 16179, "token_acc": 0.8823996033713436 }, { "epoch": 0.8730372848432526, "grad_norm": 0.3372929096221924, "learning_rate": 8.335872698489322e-07, "loss": 0.3284454047679901, "step": 16180, "token_acc": 0.8813868613138686 }, { "epoch": 0.8730912426482491, "grad_norm": 0.46595194935798645, "learning_rate": 8.328888902023413e-07, "loss": 0.30465540289878845, "step": 16181, "token_acc": 0.8909144369303146 }, { "epoch": 0.8731452004532455, "grad_norm": 0.2957824766635895, "learning_rate": 8.321907905169224e-07, "loss": 0.2590996026992798, "step": 16182, "token_acc": 0.9072054735941844 }, { "epoch": 0.873199158258242, "grad_norm": 0.42460137605667114, "learning_rate": 8.31492970813994e-07, "loss": 0.3220001459121704, "step": 16183, "token_acc": 0.8836016526570737 }, { "epoch": 0.8732531160632385, "grad_norm": 0.4669743478298187, "learning_rate": 8.307954311148692e-07, "loss": 0.3838598132133484, "step": 16184, "token_acc": 0.8704072913699801 }, { "epoch": 0.873307073868235, "grad_norm": 0.39129185676574707, "learning_rate": 8.300981714408496e-07, "loss": 0.33691635727882385, "step": 16185, "token_acc": 0.8778979907264297 }, { "epoch": 0.8733610316732315, "grad_norm": 0.3807254433631897, "learning_rate": 8.294011918132294e-07, "loss": 0.3608733117580414, "step": 16186, "token_acc": 0.8680993955674949 }, { "epoch": 0.873414989478228, "grad_norm": 0.4272884726524353, "learning_rate": 8.287044922532916e-07, "loss": 0.35547691583633423, "step": 16187, "token_acc": 0.871872422326093 }, { "epoch": 0.8734689472832246, "grad_norm": 0.5039498209953308, "learning_rate": 8.280080727823192e-07, "loss": 0.34145990014076233, "step": 16188, "token_acc": 0.8809285571342805 }, { "epoch": 0.8735229050882211, "grad_norm": 0.40533483028411865, "learning_rate": 8.273119334215774e-07, "loss": 0.2839779555797577, "step": 16189, "token_acc": 0.8941411451398136 }, { "epoch": 0.8735768628932175, "grad_norm": 0.44096142053604126, "learning_rate": 8.266160741923257e-07, "loss": 0.2831285893917084, "step": 16190, "token_acc": 0.8957017408719766 }, { "epoch": 0.873630820698214, "grad_norm": 0.4007022976875305, "learning_rate": 8.259204951158162e-07, "loss": 0.3121455907821655, "step": 16191, "token_acc": 0.8853277143174004 }, { "epoch": 0.8736847785032105, "grad_norm": 0.3846385180950165, "learning_rate": 8.252251962132918e-07, "loss": 0.27861446142196655, "step": 16192, "token_acc": 0.8970820189274448 }, { "epoch": 0.873738736308207, "grad_norm": 0.5126304626464844, "learning_rate": 8.245301775059844e-07, "loss": 0.31357693672180176, "step": 16193, "token_acc": 0.8882097791798107 }, { "epoch": 0.8737926941132035, "grad_norm": 0.49789372086524963, "learning_rate": 8.238354390151237e-07, "loss": 0.3207285404205322, "step": 16194, "token_acc": 0.8837494256394547 }, { "epoch": 0.8738466519182, "grad_norm": 0.3407335877418518, "learning_rate": 8.231409807619251e-07, "loss": 0.2894046902656555, "step": 16195, "token_acc": 0.8970782280867107 }, { "epoch": 0.8739006097231965, "grad_norm": 0.46409526467323303, "learning_rate": 8.224468027675958e-07, "loss": 0.33142077922821045, "step": 16196, "token_acc": 0.8741815607857016 }, { "epoch": 0.873954567528193, "grad_norm": 0.36695489287376404, "learning_rate": 8.217529050533379e-07, "loss": 0.32648685574531555, "step": 16197, "token_acc": 0.8828263318847578 }, { "epoch": 0.8740085253331894, "grad_norm": 0.39250701665878296, "learning_rate": 8.210592876403423e-07, "loss": 0.28940650820732117, "step": 16198, "token_acc": 0.8956265508684863 }, { "epoch": 0.8740624831381859, "grad_norm": 0.4174294173717499, "learning_rate": 8.203659505497907e-07, "loss": 0.2481735348701477, "step": 16199, "token_acc": 0.9020582953446609 }, { "epoch": 0.8741164409431824, "grad_norm": 0.5299342274665833, "learning_rate": 8.196728938028587e-07, "loss": 0.36234116554260254, "step": 16200, "token_acc": 0.8747256457876076 }, { "epoch": 0.8741703987481789, "grad_norm": 0.4553922414779663, "learning_rate": 8.189801174207113e-07, "loss": 0.3025962710380554, "step": 16201, "token_acc": 0.8816968731328421 }, { "epoch": 0.8742243565531754, "grad_norm": 0.4338630735874176, "learning_rate": 8.182876214245062e-07, "loss": 0.29806554317474365, "step": 16202, "token_acc": 0.8940561364887176 }, { "epoch": 0.8742783143581719, "grad_norm": 0.40298742055892944, "learning_rate": 8.175954058353918e-07, "loss": 0.3486034870147705, "step": 16203, "token_acc": 0.8790812452543659 }, { "epoch": 0.8743322721631684, "grad_norm": 0.4812110960483551, "learning_rate": 8.169034706745083e-07, "loss": 0.2916814684867859, "step": 16204, "token_acc": 0.8942782457159454 }, { "epoch": 0.8743862299681648, "grad_norm": 0.4899318516254425, "learning_rate": 8.162118159629861e-07, "loss": 0.31169483065605164, "step": 16205, "token_acc": 0.8922651933701657 }, { "epoch": 0.8744401877731613, "grad_norm": 0.3372509181499481, "learning_rate": 8.155204417219486e-07, "loss": 0.33601656556129456, "step": 16206, "token_acc": 0.8818764357393302 }, { "epoch": 0.8744941455781579, "grad_norm": 0.4296872019767761, "learning_rate": 8.148293479725123e-07, "loss": 0.3292871117591858, "step": 16207, "token_acc": 0.8819736473226801 }, { "epoch": 0.8745481033831544, "grad_norm": 0.4700941741466522, "learning_rate": 8.141385347357821e-07, "loss": 0.29843682050704956, "step": 16208, "token_acc": 0.8905807365439093 }, { "epoch": 0.8746020611881509, "grad_norm": 0.27714234590530396, "learning_rate": 8.134480020328539e-07, "loss": 0.3404688239097595, "step": 16209, "token_acc": 0.8797235023041474 }, { "epoch": 0.8746560189931474, "grad_norm": 0.4062497913837433, "learning_rate": 8.127577498848172e-07, "loss": 0.3066994547843933, "step": 16210, "token_acc": 0.8889339826839827 }, { "epoch": 0.8747099767981439, "grad_norm": 0.3785325288772583, "learning_rate": 8.120677783127529e-07, "loss": 0.30098143219947815, "step": 16211, "token_acc": 0.8940633245382585 }, { "epoch": 0.8747639346031404, "grad_norm": 0.48667991161346436, "learning_rate": 8.113780873377286e-07, "loss": 0.3138068914413452, "step": 16212, "token_acc": 0.8901236500234778 }, { "epoch": 0.8748178924081368, "grad_norm": 0.5400204658508301, "learning_rate": 8.106886769808131e-07, "loss": 0.3067740797996521, "step": 16213, "token_acc": 0.8900293255131965 }, { "epoch": 0.8748718502131333, "grad_norm": 0.40360820293426514, "learning_rate": 8.099995472630584e-07, "loss": 0.3246811330318451, "step": 16214, "token_acc": 0.8837295233403129 }, { "epoch": 0.8749258080181298, "grad_norm": 0.4788861870765686, "learning_rate": 8.093106982055099e-07, "loss": 0.3479420244693756, "step": 16215, "token_acc": 0.8764662212323683 }, { "epoch": 0.8749797658231263, "grad_norm": 0.5037645697593689, "learning_rate": 8.086221298292063e-07, "loss": 0.33404555916786194, "step": 16216, "token_acc": 0.8826483414827541 }, { "epoch": 0.8750337236281228, "grad_norm": 0.5159388184547424, "learning_rate": 8.079338421551741e-07, "loss": 0.31728723645210266, "step": 16217, "token_acc": 0.8851696187331132 }, { "epoch": 0.8750876814331193, "grad_norm": 0.34071844816207886, "learning_rate": 8.072458352044332e-07, "loss": 0.28734272718429565, "step": 16218, "token_acc": 0.894566098945661 }, { "epoch": 0.8751416392381158, "grad_norm": 0.3461417555809021, "learning_rate": 8.065581089979957e-07, "loss": 0.35146188735961914, "step": 16219, "token_acc": 0.872370486656201 }, { "epoch": 0.8751955970431123, "grad_norm": 0.45308977365493774, "learning_rate": 8.058706635568658e-07, "loss": 0.35184985399246216, "step": 16220, "token_acc": 0.875921568627451 }, { "epoch": 0.8752495548481087, "grad_norm": 0.388814777135849, "learning_rate": 8.05183498902038e-07, "loss": 0.2867147922515869, "step": 16221, "token_acc": 0.8979520595764486 }, { "epoch": 0.8753035126531052, "grad_norm": 0.5467677712440491, "learning_rate": 8.044966150544975e-07, "loss": 0.34273919463157654, "step": 16222, "token_acc": 0.878733407079646 }, { "epoch": 0.8753574704581017, "grad_norm": 0.4112023115158081, "learning_rate": 8.03810012035221e-07, "loss": 0.29385533928871155, "step": 16223, "token_acc": 0.8945719082260772 }, { "epoch": 0.8754114282630983, "grad_norm": 0.4775362014770508, "learning_rate": 8.031236898651785e-07, "loss": 0.3431466221809387, "step": 16224, "token_acc": 0.881157535360104 }, { "epoch": 0.8754653860680948, "grad_norm": 0.40895897150039673, "learning_rate": 8.024376485653274e-07, "loss": 0.27831757068634033, "step": 16225, "token_acc": 0.8929308565531475 }, { "epoch": 0.8755193438730913, "grad_norm": 0.39640384912490845, "learning_rate": 8.017518881566222e-07, "loss": 0.3081732392311096, "step": 16226, "token_acc": 0.8874030047878487 }, { "epoch": 0.8755733016780878, "grad_norm": 0.4560408294200897, "learning_rate": 8.010664086600051e-07, "loss": 0.32574084401130676, "step": 16227, "token_acc": 0.8850339063239236 }, { "epoch": 0.8756272594830842, "grad_norm": 0.4501272141933441, "learning_rate": 8.003812100964104e-07, "loss": 0.33759137988090515, "step": 16228, "token_acc": 0.8810023799524009 }, { "epoch": 0.8756812172880807, "grad_norm": 0.41324520111083984, "learning_rate": 7.996962924867623e-07, "loss": 0.32772165536880493, "step": 16229, "token_acc": 0.8824600182030945 }, { "epoch": 0.8757351750930772, "grad_norm": 0.4199908673763275, "learning_rate": 7.990116558519801e-07, "loss": 0.3049893379211426, "step": 16230, "token_acc": 0.8842716010663322 }, { "epoch": 0.8757891328980737, "grad_norm": 0.4199954569339752, "learning_rate": 7.983273002129721e-07, "loss": 0.3336080312728882, "step": 16231, "token_acc": 0.8808923619863877 }, { "epoch": 0.8758430907030702, "grad_norm": 0.39683058857917786, "learning_rate": 7.976432255906374e-07, "loss": 0.2953703701496124, "step": 16232, "token_acc": 0.89484787919165 }, { "epoch": 0.8758970485080667, "grad_norm": 0.46221038699150085, "learning_rate": 7.969594320058671e-07, "loss": 0.3495330512523651, "step": 16233, "token_acc": 0.8712621359223301 }, { "epoch": 0.8759510063130632, "grad_norm": 0.38566166162490845, "learning_rate": 7.962759194795466e-07, "loss": 0.29732203483581543, "step": 16234, "token_acc": 0.8954455445544555 }, { "epoch": 0.8760049641180597, "grad_norm": 0.37530526518821716, "learning_rate": 7.95592688032547e-07, "loss": 0.38819223642349243, "step": 16235, "token_acc": 0.8673118553419076 }, { "epoch": 0.8760589219230561, "grad_norm": 0.4614931643009186, "learning_rate": 7.949097376857362e-07, "loss": 0.32902199029922485, "step": 16236, "token_acc": 0.882742630009937 }, { "epoch": 0.8761128797280526, "grad_norm": 0.4046517312526703, "learning_rate": 7.942270684599706e-07, "loss": 0.2632983326911926, "step": 16237, "token_acc": 0.9051646028390214 }, { "epoch": 0.8761668375330491, "grad_norm": 0.3298569321632385, "learning_rate": 7.93544680376096e-07, "loss": 0.3223990201950073, "step": 16238, "token_acc": 0.8849363197189284 }, { "epoch": 0.8762207953380456, "grad_norm": 0.39003247022628784, "learning_rate": 7.928625734549577e-07, "loss": 0.30584225058555603, "step": 16239, "token_acc": 0.8928656079639725 }, { "epoch": 0.8762747531430422, "grad_norm": 0.37441402673721313, "learning_rate": 7.921807477173849e-07, "loss": 0.3308318257331848, "step": 16240, "token_acc": 0.8814159292035398 }, { "epoch": 0.8763287109480387, "grad_norm": 0.42530378699302673, "learning_rate": 7.914992031841995e-07, "loss": 0.28924065828323364, "step": 16241, "token_acc": 0.8865375223832004 }, { "epoch": 0.8763826687530352, "grad_norm": 0.38809382915496826, "learning_rate": 7.908179398762162e-07, "loss": 0.3063397705554962, "step": 16242, "token_acc": 0.8901616393025328 }, { "epoch": 0.8764366265580317, "grad_norm": 0.4662252366542816, "learning_rate": 7.901369578142404e-07, "loss": 0.3384951055049896, "step": 16243, "token_acc": 0.879372738238842 }, { "epoch": 0.8764905843630281, "grad_norm": 0.4695243239402771, "learning_rate": 7.894562570190689e-07, "loss": 0.2964450716972351, "step": 16244, "token_acc": 0.8911796998902037 }, { "epoch": 0.8765445421680246, "grad_norm": 0.3508574962615967, "learning_rate": 7.887758375114895e-07, "loss": 0.3115042448043823, "step": 16245, "token_acc": 0.8878839081602227 }, { "epoch": 0.8765984999730211, "grad_norm": 0.49188023805618286, "learning_rate": 7.880956993122846e-07, "loss": 0.310804545879364, "step": 16246, "token_acc": 0.887331167720978 }, { "epoch": 0.8766524577780176, "grad_norm": 0.3999861776828766, "learning_rate": 7.874158424422229e-07, "loss": 0.3452305495738983, "step": 16247, "token_acc": 0.8793346129238644 }, { "epoch": 0.8767064155830141, "grad_norm": 0.4797798693180084, "learning_rate": 7.86736266922069e-07, "loss": 0.2762838900089264, "step": 16248, "token_acc": 0.8927405565055184 }, { "epoch": 0.8767603733880106, "grad_norm": 0.5563676357269287, "learning_rate": 7.860569727725753e-07, "loss": 0.3138861358165741, "step": 16249, "token_acc": 0.8836516558612231 }, { "epoch": 0.8768143311930071, "grad_norm": 0.42938703298568726, "learning_rate": 7.853779600144884e-07, "loss": 0.2918722331523895, "step": 16250, "token_acc": 0.8887079261672095 }, { "epoch": 0.8768682889980035, "grad_norm": 0.5212602019309998, "learning_rate": 7.84699228668545e-07, "loss": 0.37445613741874695, "step": 16251, "token_acc": 0.8655667144906743 }, { "epoch": 0.876922246803, "grad_norm": 0.4317285120487213, "learning_rate": 7.840207787554721e-07, "loss": 0.3036630153656006, "step": 16252, "token_acc": 0.8853591160220995 }, { "epoch": 0.8769762046079965, "grad_norm": 0.3596383333206177, "learning_rate": 7.833426102959918e-07, "loss": 0.3363954722881317, "step": 16253, "token_acc": 0.8850216684723727 }, { "epoch": 0.877030162412993, "grad_norm": 0.38424167037010193, "learning_rate": 7.82664723310812e-07, "loss": 0.31428223848342896, "step": 16254, "token_acc": 0.8865491183879093 }, { "epoch": 0.8770841202179895, "grad_norm": 0.3799595236778259, "learning_rate": 7.819871178206384e-07, "loss": 0.2900046408176422, "step": 16255, "token_acc": 0.8989469794938112 }, { "epoch": 0.877138078022986, "grad_norm": 0.40117064118385315, "learning_rate": 7.813097938461622e-07, "loss": 0.36385202407836914, "step": 16256, "token_acc": 0.8701298701298701 }, { "epoch": 0.8771920358279826, "grad_norm": 0.46186935901641846, "learning_rate": 7.806327514080691e-07, "loss": 0.33873283863067627, "step": 16257, "token_acc": 0.8781732530751112 }, { "epoch": 0.8772459936329791, "grad_norm": 0.4933195114135742, "learning_rate": 7.799559905270382e-07, "loss": 0.34514474868774414, "step": 16258, "token_acc": 0.872319845236176 }, { "epoch": 0.8772999514379755, "grad_norm": 0.45380955934524536, "learning_rate": 7.792795112237361e-07, "loss": 0.3159106373786926, "step": 16259, "token_acc": 0.8895067264573991 }, { "epoch": 0.877353909242972, "grad_norm": 0.3777941167354584, "learning_rate": 7.78603313518822e-07, "loss": 0.32203322649002075, "step": 16260, "token_acc": 0.8834879406307977 }, { "epoch": 0.8774078670479685, "grad_norm": 0.47611570358276367, "learning_rate": 7.779273974329471e-07, "loss": 0.28980547189712524, "step": 16261, "token_acc": 0.892390898918314 }, { "epoch": 0.877461824852965, "grad_norm": 0.2821115553379059, "learning_rate": 7.772517629867538e-07, "loss": 0.3068311810493469, "step": 16262, "token_acc": 0.8885366472229267 }, { "epoch": 0.8775157826579615, "grad_norm": 0.40459150075912476, "learning_rate": 7.765764102008754e-07, "loss": 0.3262214660644531, "step": 16263, "token_acc": 0.8848153579522731 }, { "epoch": 0.877569740462958, "grad_norm": 0.528274655342102, "learning_rate": 7.759013390959369e-07, "loss": 0.33188939094543457, "step": 16264, "token_acc": 0.8833237382460181 }, { "epoch": 0.8776236982679545, "grad_norm": 0.3461991548538208, "learning_rate": 7.752265496925559e-07, "loss": 0.33024677634239197, "step": 16265, "token_acc": 0.8839285714285714 }, { "epoch": 0.877677656072951, "grad_norm": 0.5233228206634521, "learning_rate": 7.745520420113383e-07, "loss": 0.34474581480026245, "step": 16266, "token_acc": 0.8772770853307766 }, { "epoch": 0.8777316138779474, "grad_norm": 0.34718555212020874, "learning_rate": 7.738778160728855e-07, "loss": 0.3529456555843353, "step": 16267, "token_acc": 0.8699329250510353 }, { "epoch": 0.8777855716829439, "grad_norm": 0.5003931522369385, "learning_rate": 7.732038718977874e-07, "loss": 0.3040299117565155, "step": 16268, "token_acc": 0.8909839618552232 }, { "epoch": 0.8778395294879404, "grad_norm": 0.28907179832458496, "learning_rate": 7.725302095066256e-07, "loss": 0.29956483840942383, "step": 16269, "token_acc": 0.8910645959936979 }, { "epoch": 0.8778934872929369, "grad_norm": 0.3512788414955139, "learning_rate": 7.718568289199724e-07, "loss": 0.31831029057502747, "step": 16270, "token_acc": 0.8876149846687108 }, { "epoch": 0.8779474450979334, "grad_norm": 0.4672274887561798, "learning_rate": 7.711837301583958e-07, "loss": 0.30915725231170654, "step": 16271, "token_acc": 0.8895397489539749 }, { "epoch": 0.8780014029029299, "grad_norm": 0.3894360065460205, "learning_rate": 7.705109132424504e-07, "loss": 0.3187870383262634, "step": 16272, "token_acc": 0.8783165599268069 }, { "epoch": 0.8780553607079264, "grad_norm": 0.46666038036346436, "learning_rate": 7.698383781926843e-07, "loss": 0.28169113397598267, "step": 16273, "token_acc": 0.8939288206559665 }, { "epoch": 0.8781093185129228, "grad_norm": 0.4372430741786957, "learning_rate": 7.691661250296356e-07, "loss": 0.32267647981643677, "step": 16274, "token_acc": 0.8815749924857229 }, { "epoch": 0.8781632763179194, "grad_norm": 0.3929781913757324, "learning_rate": 7.684941537738355e-07, "loss": 0.3741779923439026, "step": 16275, "token_acc": 0.8723976608187135 }, { "epoch": 0.8782172341229159, "grad_norm": 0.4690772294998169, "learning_rate": 7.678224644458065e-07, "loss": 0.35318422317504883, "step": 16276, "token_acc": 0.8743506493506493 }, { "epoch": 0.8782711919279124, "grad_norm": 0.4493480324745178, "learning_rate": 7.671510570660578e-07, "loss": 0.33535996079444885, "step": 16277, "token_acc": 0.8831201404564836 }, { "epoch": 0.8783251497329089, "grad_norm": 0.41964632272720337, "learning_rate": 7.664799316550997e-07, "loss": 0.2961432933807373, "step": 16278, "token_acc": 0.8928571428571429 }, { "epoch": 0.8783791075379054, "grad_norm": 0.36112546920776367, "learning_rate": 7.658090882334257e-07, "loss": 0.29985570907592773, "step": 16279, "token_acc": 0.8931188047694297 }, { "epoch": 0.8784330653429019, "grad_norm": 0.35828864574432373, "learning_rate": 7.651385268215228e-07, "loss": 0.2711564004421234, "step": 16280, "token_acc": 0.8983495463985135 }, { "epoch": 0.8784870231478984, "grad_norm": 0.41860175132751465, "learning_rate": 7.644682474398701e-07, "loss": 0.3012378215789795, "step": 16281, "token_acc": 0.8950099800399202 }, { "epoch": 0.8785409809528948, "grad_norm": 0.3930303752422333, "learning_rate": 7.637982501089381e-07, "loss": 0.2928306460380554, "step": 16282, "token_acc": 0.896600749644565 }, { "epoch": 0.8785949387578913, "grad_norm": 0.3521110415458679, "learning_rate": 7.631285348491869e-07, "loss": 0.33902958035469055, "step": 16283, "token_acc": 0.8758519961051606 }, { "epoch": 0.8786488965628878, "grad_norm": 0.5018827319145203, "learning_rate": 7.624591016810712e-07, "loss": 0.36727333068847656, "step": 16284, "token_acc": 0.8716671155399947 }, { "epoch": 0.8787028543678843, "grad_norm": 0.3290505111217499, "learning_rate": 7.617899506250348e-07, "loss": 0.33143943548202515, "step": 16285, "token_acc": 0.8791158185586 }, { "epoch": 0.8787568121728808, "grad_norm": 0.37749552726745605, "learning_rate": 7.611210817015136e-07, "loss": 0.3112350106239319, "step": 16286, "token_acc": 0.8908195502694667 }, { "epoch": 0.8788107699778773, "grad_norm": 0.4628068208694458, "learning_rate": 7.604524949309334e-07, "loss": 0.33535903692245483, "step": 16287, "token_acc": 0.8863911697402073 }, { "epoch": 0.8788647277828738, "grad_norm": 0.3859406113624573, "learning_rate": 7.597841903337144e-07, "loss": 0.31917038559913635, "step": 16288, "token_acc": 0.8907511622216785 }, { "epoch": 0.8789186855878703, "grad_norm": 0.42190665006637573, "learning_rate": 7.59116167930265e-07, "loss": 0.31488800048828125, "step": 16289, "token_acc": 0.8868335146898803 }, { "epoch": 0.8789726433928667, "grad_norm": 0.422299861907959, "learning_rate": 7.584484277409854e-07, "loss": 0.33674851059913635, "step": 16290, "token_acc": 0.8814764646122587 }, { "epoch": 0.8790266011978632, "grad_norm": 0.4018295407295227, "learning_rate": 7.577809697862714e-07, "loss": 0.3155983090400696, "step": 16291, "token_acc": 0.890302816071639 }, { "epoch": 0.8790805590028598, "grad_norm": 0.5937114953994751, "learning_rate": 7.571137940865059e-07, "loss": 0.3909783363342285, "step": 16292, "token_acc": 0.861973986228003 }, { "epoch": 0.8791345168078563, "grad_norm": 0.4197466969490051, "learning_rate": 7.564469006620645e-07, "loss": 0.3598625659942627, "step": 16293, "token_acc": 0.8697049371895997 }, { "epoch": 0.8791884746128528, "grad_norm": 0.40620389580726624, "learning_rate": 7.557802895333122e-07, "loss": 0.3165324330329895, "step": 16294, "token_acc": 0.8855057876195269 }, { "epoch": 0.8792424324178493, "grad_norm": 0.470014363527298, "learning_rate": 7.551139607206082e-07, "loss": 0.3251476287841797, "step": 16295, "token_acc": 0.8773873185637892 }, { "epoch": 0.8792963902228458, "grad_norm": 0.38796570897102356, "learning_rate": 7.544479142443006e-07, "loss": 0.35831010341644287, "step": 16296, "token_acc": 0.8753552578156719 }, { "epoch": 0.8793503480278422, "grad_norm": 0.48976168036460876, "learning_rate": 7.537821501247334e-07, "loss": 0.29302048683166504, "step": 16297, "token_acc": 0.8951769449007018 }, { "epoch": 0.8794043058328387, "grad_norm": 0.378902405500412, "learning_rate": 7.531166683822388e-07, "loss": 0.3091413676738739, "step": 16298, "token_acc": 0.8885722855428915 }, { "epoch": 0.8794582636378352, "grad_norm": 0.46707144379615784, "learning_rate": 7.524514690371365e-07, "loss": 0.3026055097579956, "step": 16299, "token_acc": 0.8892895780742878 }, { "epoch": 0.8795122214428317, "grad_norm": 0.5148692727088928, "learning_rate": 7.517865521097445e-07, "loss": 0.36437785625457764, "step": 16300, "token_acc": 0.8682262363044122 }, { "epoch": 0.8795661792478282, "grad_norm": 0.42683055996894836, "learning_rate": 7.511219176203688e-07, "loss": 0.30169546604156494, "step": 16301, "token_acc": 0.8890984534138061 }, { "epoch": 0.8796201370528247, "grad_norm": 0.3534916341304779, "learning_rate": 7.504575655893054e-07, "loss": 0.31661221385002136, "step": 16302, "token_acc": 0.8893922234805587 }, { "epoch": 0.8796740948578212, "grad_norm": 0.3527926206588745, "learning_rate": 7.497934960368458e-07, "loss": 0.3281330466270447, "step": 16303, "token_acc": 0.8838928614713368 }, { "epoch": 0.8797280526628177, "grad_norm": 0.3796766698360443, "learning_rate": 7.491297089832694e-07, "loss": 0.337377667427063, "step": 16304, "token_acc": 0.8787783375314862 }, { "epoch": 0.8797820104678141, "grad_norm": 0.5463980436325073, "learning_rate": 7.484662044488489e-07, "loss": 0.2760181427001953, "step": 16305, "token_acc": 0.8940316686967114 }, { "epoch": 0.8798359682728106, "grad_norm": 0.41843196749687195, "learning_rate": 7.478029824538469e-07, "loss": 0.2774902880191803, "step": 16306, "token_acc": 0.8949325254750757 }, { "epoch": 0.8798899260778071, "grad_norm": 0.4292159974575043, "learning_rate": 7.471400430185172e-07, "loss": 0.3238441050052643, "step": 16307, "token_acc": 0.8825806451612903 }, { "epoch": 0.8799438838828036, "grad_norm": 0.38795098662376404, "learning_rate": 7.46477386163108e-07, "loss": 0.29852333664894104, "step": 16308, "token_acc": 0.893164398560926 }, { "epoch": 0.8799978416878002, "grad_norm": 0.31472501158714294, "learning_rate": 7.458150119078522e-07, "loss": 0.33551114797592163, "step": 16309, "token_acc": 0.8811372745490982 }, { "epoch": 0.8800517994927967, "grad_norm": 0.39137426018714905, "learning_rate": 7.451529202729846e-07, "loss": 0.32660239934921265, "step": 16310, "token_acc": 0.8823872926886241 }, { "epoch": 0.8801057572977932, "grad_norm": 0.4047405421733856, "learning_rate": 7.444911112787223e-07, "loss": 0.37000787258148193, "step": 16311, "token_acc": 0.8728230052652896 }, { "epoch": 0.8801597151027896, "grad_norm": 0.4012647271156311, "learning_rate": 7.438295849452759e-07, "loss": 0.33902525901794434, "step": 16312, "token_acc": 0.8803566675538994 }, { "epoch": 0.8802136729077861, "grad_norm": 0.3349333703517914, "learning_rate": 7.431683412928503e-07, "loss": 0.26800012588500977, "step": 16313, "token_acc": 0.9002654531664771 }, { "epoch": 0.8802676307127826, "grad_norm": 0.44816869497299194, "learning_rate": 7.425073803416372e-07, "loss": 0.29870861768722534, "step": 16314, "token_acc": 0.8968697596422582 }, { "epoch": 0.8803215885177791, "grad_norm": 0.3508358895778656, "learning_rate": 7.418467021118247e-07, "loss": 0.3327455222606659, "step": 16315, "token_acc": 0.8850696547546941 }, { "epoch": 0.8803755463227756, "grad_norm": 0.4238929748535156, "learning_rate": 7.411863066235881e-07, "loss": 0.3021669089794159, "step": 16316, "token_acc": 0.8842033667743581 }, { "epoch": 0.8804295041277721, "grad_norm": 0.3673006296157837, "learning_rate": 7.405261938970965e-07, "loss": 0.32635849714279175, "step": 16317, "token_acc": 0.8826807228915663 }, { "epoch": 0.8804834619327686, "grad_norm": 0.495357871055603, "learning_rate": 7.398663639525095e-07, "loss": 0.352992981672287, "step": 16318, "token_acc": 0.8769998532217819 }, { "epoch": 0.8805374197377651, "grad_norm": 0.40551063418388367, "learning_rate": 7.392068168099775e-07, "loss": 0.3624243140220642, "step": 16319, "token_acc": 0.8718929254302104 }, { "epoch": 0.8805913775427615, "grad_norm": 0.5773985385894775, "learning_rate": 7.385475524896435e-07, "loss": 0.3120906352996826, "step": 16320, "token_acc": 0.8885376809183164 }, { "epoch": 0.880645335347758, "grad_norm": 0.4604826271533966, "learning_rate": 7.378885710116412e-07, "loss": 0.28736209869384766, "step": 16321, "token_acc": 0.8905200687177885 }, { "epoch": 0.8806992931527545, "grad_norm": 0.5168046355247498, "learning_rate": 7.372298723960935e-07, "loss": 0.3565792441368103, "step": 16322, "token_acc": 0.8775790921595599 }, { "epoch": 0.880753250957751, "grad_norm": 0.3643665313720703, "learning_rate": 7.36571456663121e-07, "loss": 0.29084503650665283, "step": 16323, "token_acc": 0.8933789653810714 }, { "epoch": 0.8808072087627475, "grad_norm": 0.3938544690608978, "learning_rate": 7.359133238328298e-07, "loss": 0.3075316548347473, "step": 16324, "token_acc": 0.8884032634032634 }, { "epoch": 0.880861166567744, "grad_norm": 0.30293288826942444, "learning_rate": 7.352554739253182e-07, "loss": 0.3214215040206909, "step": 16325, "token_acc": 0.885305448541552 }, { "epoch": 0.8809151243727406, "grad_norm": 0.4119625389575958, "learning_rate": 7.345979069606779e-07, "loss": 0.29726365208625793, "step": 16326, "token_acc": 0.8899170389278876 }, { "epoch": 0.8809690821777371, "grad_norm": 0.4045994281768799, "learning_rate": 7.339406229589907e-07, "loss": 0.32543492317199707, "step": 16327, "token_acc": 0.8811469092030214 }, { "epoch": 0.8810230399827335, "grad_norm": 0.4432964622974396, "learning_rate": 7.332836219403272e-07, "loss": 0.3343217372894287, "step": 16328, "token_acc": 0.8782197301559488 }, { "epoch": 0.88107699778773, "grad_norm": 0.4986250698566437, "learning_rate": 7.326269039247558e-07, "loss": 0.277496874332428, "step": 16329, "token_acc": 0.8988168094655242 }, { "epoch": 0.8811309555927265, "grad_norm": 0.3996575176715851, "learning_rate": 7.319704689323315e-07, "loss": 0.3156035542488098, "step": 16330, "token_acc": 0.8854473092230489 }, { "epoch": 0.881184913397723, "grad_norm": 0.3815858066082001, "learning_rate": 7.313143169831005e-07, "loss": 0.3339308500289917, "step": 16331, "token_acc": 0.8850195136595617 }, { "epoch": 0.8812388712027195, "grad_norm": 0.4693031907081604, "learning_rate": 7.306584480971024e-07, "loss": 0.2928920388221741, "step": 16332, "token_acc": 0.8886923190328763 }, { "epoch": 0.881292829007716, "grad_norm": 0.45667892694473267, "learning_rate": 7.300028622943689e-07, "loss": 0.30713897943496704, "step": 16333, "token_acc": 0.8929692404268675 }, { "epoch": 0.8813467868127125, "grad_norm": 0.38163134455680847, "learning_rate": 7.293475595949184e-07, "loss": 0.3056465685367584, "step": 16334, "token_acc": 0.8900421179302046 }, { "epoch": 0.8814007446177089, "grad_norm": 0.3810461461544037, "learning_rate": 7.286925400187628e-07, "loss": 0.29460713267326355, "step": 16335, "token_acc": 0.8924389186063322 }, { "epoch": 0.8814547024227054, "grad_norm": 0.38109201192855835, "learning_rate": 7.280378035859093e-07, "loss": 0.30406081676483154, "step": 16336, "token_acc": 0.8891010342084328 }, { "epoch": 0.8815086602277019, "grad_norm": 0.3702011704444885, "learning_rate": 7.27383350316353e-07, "loss": 0.3089127540588379, "step": 16337, "token_acc": 0.8861100894936864 }, { "epoch": 0.8815626180326984, "grad_norm": 0.44891563057899475, "learning_rate": 7.267291802300802e-07, "loss": 0.32112765312194824, "step": 16338, "token_acc": 0.8870627261761158 }, { "epoch": 0.8816165758376949, "grad_norm": 0.32910481095314026, "learning_rate": 7.260752933470694e-07, "loss": 0.3192625641822815, "step": 16339, "token_acc": 0.8857589984350548 }, { "epoch": 0.8816705336426914, "grad_norm": 0.5425517559051514, "learning_rate": 7.25421689687289e-07, "loss": 0.34463614225387573, "step": 16340, "token_acc": 0.8772504091653028 }, { "epoch": 0.8817244914476879, "grad_norm": 0.3798951208591461, "learning_rate": 7.247683692706986e-07, "loss": 0.33610421419143677, "step": 16341, "token_acc": 0.8825131101250504 }, { "epoch": 0.8817784492526844, "grad_norm": 0.40756192803382874, "learning_rate": 7.241153321172545e-07, "loss": 0.3223087191581726, "step": 16342, "token_acc": 0.8797225960045544 }, { "epoch": 0.8818324070576808, "grad_norm": 0.39235061407089233, "learning_rate": 7.234625782468985e-07, "loss": 0.3145417869091034, "step": 16343, "token_acc": 0.886238258535858 }, { "epoch": 0.8818863648626774, "grad_norm": 0.5248064398765564, "learning_rate": 7.228101076795646e-07, "loss": 0.3666430115699768, "step": 16344, "token_acc": 0.8719704952581665 }, { "epoch": 0.8819403226676739, "grad_norm": 0.34626510739326477, "learning_rate": 7.221579204351803e-07, "loss": 0.29162538051605225, "step": 16345, "token_acc": 0.8930078360458107 }, { "epoch": 0.8819942804726704, "grad_norm": 0.5152768492698669, "learning_rate": 7.21506016533663e-07, "loss": 0.4066739082336426, "step": 16346, "token_acc": 0.8624760218385716 }, { "epoch": 0.8820482382776669, "grad_norm": 0.3523222804069519, "learning_rate": 7.208543959949199e-07, "loss": 0.3626648187637329, "step": 16347, "token_acc": 0.8749281526612255 }, { "epoch": 0.8821021960826634, "grad_norm": 0.3810189664363861, "learning_rate": 7.202030588388542e-07, "loss": 0.32009994983673096, "step": 16348, "token_acc": 0.8831442891057768 }, { "epoch": 0.8821561538876599, "grad_norm": 0.4377889335155487, "learning_rate": 7.195520050853555e-07, "loss": 0.28484249114990234, "step": 16349, "token_acc": 0.896235368554255 }, { "epoch": 0.8822101116926564, "grad_norm": 0.4815262258052826, "learning_rate": 7.189012347543067e-07, "loss": 0.30791911482810974, "step": 16350, "token_acc": 0.8868628263572317 }, { "epoch": 0.8822640694976528, "grad_norm": 0.5420560836791992, "learning_rate": 7.182507478655843e-07, "loss": 0.34300869703292847, "step": 16351, "token_acc": 0.8789182544560541 }, { "epoch": 0.8823180273026493, "grad_norm": 0.3821500539779663, "learning_rate": 7.176005444390521e-07, "loss": 0.32391437888145447, "step": 16352, "token_acc": 0.8848611595213198 }, { "epoch": 0.8823719851076458, "grad_norm": 0.4568435549736023, "learning_rate": 7.169506244945668e-07, "loss": 0.3604252338409424, "step": 16353, "token_acc": 0.8708811689279857 }, { "epoch": 0.8824259429126423, "grad_norm": 0.3607598543167114, "learning_rate": 7.163009880519767e-07, "loss": 0.2675324082374573, "step": 16354, "token_acc": 0.9018597637597386 }, { "epoch": 0.8824799007176388, "grad_norm": 0.41586238145828247, "learning_rate": 7.156516351311238e-07, "loss": 0.3143446445465088, "step": 16355, "token_acc": 0.8893331201023509 }, { "epoch": 0.8825338585226353, "grad_norm": 0.46627482771873474, "learning_rate": 7.150025657518378e-07, "loss": 0.3709896504878998, "step": 16356, "token_acc": 0.8704675572519084 }, { "epoch": 0.8825878163276318, "grad_norm": 0.42789217829704285, "learning_rate": 7.143537799339417e-07, "loss": 0.3135884404182434, "step": 16357, "token_acc": 0.8826788333061757 }, { "epoch": 0.8826417741326282, "grad_norm": 0.41460686922073364, "learning_rate": 7.137052776972475e-07, "loss": 0.32903289794921875, "step": 16358, "token_acc": 0.8775510204081632 }, { "epoch": 0.8826957319376247, "grad_norm": 0.40232962369918823, "learning_rate": 7.130570590615627e-07, "loss": 0.3055962026119232, "step": 16359, "token_acc": 0.8908344733242134 }, { "epoch": 0.8827496897426212, "grad_norm": 0.4835112690925598, "learning_rate": 7.124091240466802e-07, "loss": 0.37353044748306274, "step": 16360, "token_acc": 0.8743643805035347 }, { "epoch": 0.8828036475476178, "grad_norm": 0.4756337106227875, "learning_rate": 7.11761472672392e-07, "loss": 0.4096362590789795, "step": 16361, "token_acc": 0.8561992569148205 }, { "epoch": 0.8828576053526143, "grad_norm": 0.3564186096191406, "learning_rate": 7.111141049584746e-07, "loss": 0.3218252658843994, "step": 16362, "token_acc": 0.8891612903225806 }, { "epoch": 0.8829115631576108, "grad_norm": 0.4216158986091614, "learning_rate": 7.104670209246989e-07, "loss": 0.32185667753219604, "step": 16363, "token_acc": 0.8850115295926211 }, { "epoch": 0.8829655209626073, "grad_norm": 0.41615986824035645, "learning_rate": 7.098202205908278e-07, "loss": 0.3069802224636078, "step": 16364, "token_acc": 0.8910913140311804 }, { "epoch": 0.8830194787676038, "grad_norm": 0.5280004143714905, "learning_rate": 7.091737039766133e-07, "loss": 0.33444762229919434, "step": 16365, "token_acc": 0.8821564885496184 }, { "epoch": 0.8830734365726002, "grad_norm": 0.407674103975296, "learning_rate": 7.085274711017998e-07, "loss": 0.34038078784942627, "step": 16366, "token_acc": 0.8757129990993695 }, { "epoch": 0.8831273943775967, "grad_norm": 0.49703913927078247, "learning_rate": 7.078815219861223e-07, "loss": 0.28409332036972046, "step": 16367, "token_acc": 0.8921952048971263 }, { "epoch": 0.8831813521825932, "grad_norm": 0.3396717607975006, "learning_rate": 7.072358566493109e-07, "loss": 0.26799583435058594, "step": 16368, "token_acc": 0.9006653992395437 }, { "epoch": 0.8832353099875897, "grad_norm": 0.45028573274612427, "learning_rate": 7.065904751110809e-07, "loss": 0.2851514220237732, "step": 16369, "token_acc": 0.8926010145181039 }, { "epoch": 0.8832892677925862, "grad_norm": 0.4678761065006256, "learning_rate": 7.059453773911429e-07, "loss": 0.33061516284942627, "step": 16370, "token_acc": 0.8847845206684257 }, { "epoch": 0.8833432255975827, "grad_norm": 0.4734524190425873, "learning_rate": 7.053005635091991e-07, "loss": 0.333696186542511, "step": 16371, "token_acc": 0.8796715267303502 }, { "epoch": 0.8833971834025792, "grad_norm": 0.40423649549484253, "learning_rate": 7.046560334849406e-07, "loss": 0.36236703395843506, "step": 16372, "token_acc": 0.8749540947484392 }, { "epoch": 0.8834511412075757, "grad_norm": 0.4353712499141693, "learning_rate": 7.040117873380504e-07, "loss": 0.27559757232666016, "step": 16373, "token_acc": 0.8979963570127505 }, { "epoch": 0.8835050990125721, "grad_norm": 0.4908227324485779, "learning_rate": 7.03367825088207e-07, "loss": 0.37175536155700684, "step": 16374, "token_acc": 0.8691629955947137 }, { "epoch": 0.8835590568175686, "grad_norm": 0.48418113589286804, "learning_rate": 7.027241467550749e-07, "loss": 0.32666707038879395, "step": 16375, "token_acc": 0.8827741678822437 }, { "epoch": 0.8836130146225651, "grad_norm": 0.3367047607898712, "learning_rate": 7.020807523583107e-07, "loss": 0.3076861500740051, "step": 16376, "token_acc": 0.8888576252110298 }, { "epoch": 0.8836669724275616, "grad_norm": 0.42811843752861023, "learning_rate": 7.014376419175662e-07, "loss": 0.3024038076400757, "step": 16377, "token_acc": 0.8955859576664946 }, { "epoch": 0.8837209302325582, "grad_norm": 0.4963510036468506, "learning_rate": 7.00794815452479e-07, "loss": 0.3176099359989166, "step": 16378, "token_acc": 0.8895883777239709 }, { "epoch": 0.8837748880375547, "grad_norm": 0.42882028222084045, "learning_rate": 7.001522729826826e-07, "loss": 0.3738951086997986, "step": 16379, "token_acc": 0.8703974284044419 }, { "epoch": 0.8838288458425512, "grad_norm": 0.3221603035926819, "learning_rate": 6.995100145277978e-07, "loss": 0.30947500467300415, "step": 16380, "token_acc": 0.8908 }, { "epoch": 0.8838828036475476, "grad_norm": 0.5168519616127014, "learning_rate": 6.988680401074443e-07, "loss": 0.35943445563316345, "step": 16381, "token_acc": 0.8735581836764491 }, { "epoch": 0.8839367614525441, "grad_norm": 0.3464585542678833, "learning_rate": 6.982263497412222e-07, "loss": 0.2995702028274536, "step": 16382, "token_acc": 0.8944910015111966 }, { "epoch": 0.8839907192575406, "grad_norm": 0.5345620512962341, "learning_rate": 6.975849434487303e-07, "loss": 0.3059733510017395, "step": 16383, "token_acc": 0.890990639625585 }, { "epoch": 0.8840446770625371, "grad_norm": 0.2652396857738495, "learning_rate": 6.969438212495583e-07, "loss": 0.29500919580459595, "step": 16384, "token_acc": 0.8916558861578266 }, { "epoch": 0.8840986348675336, "grad_norm": 0.29461783170700073, "learning_rate": 6.963029831632839e-07, "loss": 0.33941391110420227, "step": 16385, "token_acc": 0.8827136834036029 }, { "epoch": 0.8841525926725301, "grad_norm": 0.38912683725357056, "learning_rate": 6.956624292094783e-07, "loss": 0.3629923164844513, "step": 16386, "token_acc": 0.8723636761009725 }, { "epoch": 0.8842065504775266, "grad_norm": 0.4166630208492279, "learning_rate": 6.950221594077066e-07, "loss": 0.2618273198604584, "step": 16387, "token_acc": 0.9000580214679431 }, { "epoch": 0.8842605082825231, "grad_norm": 0.4545499086380005, "learning_rate": 6.943821737775202e-07, "loss": 0.36414867639541626, "step": 16388, "token_acc": 0.869020035436827 }, { "epoch": 0.8843144660875195, "grad_norm": 0.4515618681907654, "learning_rate": 6.937424723384645e-07, "loss": 0.356671541929245, "step": 16389, "token_acc": 0.8762074602466934 }, { "epoch": 0.884368423892516, "grad_norm": 0.42861396074295044, "learning_rate": 6.931030551100748e-07, "loss": 0.28615403175354004, "step": 16390, "token_acc": 0.8988580750407831 }, { "epoch": 0.8844223816975125, "grad_norm": 0.3901495635509491, "learning_rate": 6.924639221118812e-07, "loss": 0.3483962416648865, "step": 16391, "token_acc": 0.8735460285809239 }, { "epoch": 0.884476339502509, "grad_norm": 0.3796776235103607, "learning_rate": 6.918250733634002e-07, "loss": 0.32564717531204224, "step": 16392, "token_acc": 0.8840196078431373 }, { "epoch": 0.8845302973075055, "grad_norm": 0.4828561842441559, "learning_rate": 6.911865088841418e-07, "loss": 0.31928592920303345, "step": 16393, "token_acc": 0.8850422832980972 }, { "epoch": 0.884584255112502, "grad_norm": 0.4079953730106354, "learning_rate": 6.905482286936105e-07, "loss": 0.3539743423461914, "step": 16394, "token_acc": 0.8771655437921078 }, { "epoch": 0.8846382129174986, "grad_norm": 0.3807491958141327, "learning_rate": 6.899102328112972e-07, "loss": 0.28083157539367676, "step": 16395, "token_acc": 0.8944759938048529 }, { "epoch": 0.8846921707224951, "grad_norm": 0.33438679575920105, "learning_rate": 6.892725212566876e-07, "loss": 0.3383462131023407, "step": 16396, "token_acc": 0.8791318864774624 }, { "epoch": 0.8847461285274915, "grad_norm": 0.3662494421005249, "learning_rate": 6.886350940492547e-07, "loss": 0.3411516547203064, "step": 16397, "token_acc": 0.8783164100884376 }, { "epoch": 0.884800086332488, "grad_norm": 0.4127553403377533, "learning_rate": 6.879979512084678e-07, "loss": 0.34020206332206726, "step": 16398, "token_acc": 0.8774104683195593 }, { "epoch": 0.8848540441374845, "grad_norm": 0.503031313419342, "learning_rate": 6.873610927537833e-07, "loss": 0.3343096077442169, "step": 16399, "token_acc": 0.8778860028860029 }, { "epoch": 0.884908001942481, "grad_norm": 0.41430217027664185, "learning_rate": 6.867245187046522e-07, "loss": 0.3171311020851135, "step": 16400, "token_acc": 0.8859649122807017 }, { "epoch": 0.8849619597474775, "grad_norm": 0.37857064604759216, "learning_rate": 6.860882290805137e-07, "loss": 0.3144493103027344, "step": 16401, "token_acc": 0.8833414121829052 }, { "epoch": 0.885015917552474, "grad_norm": 0.4590851664543152, "learning_rate": 6.854522239008011e-07, "loss": 0.31306225061416626, "step": 16402, "token_acc": 0.8864906043676993 }, { "epoch": 0.8850698753574705, "grad_norm": 0.3254408836364746, "learning_rate": 6.848165031849386e-07, "loss": 0.26741325855255127, "step": 16403, "token_acc": 0.8991379310344828 }, { "epoch": 0.8851238331624669, "grad_norm": 0.45022329688072205, "learning_rate": 6.841810669523385e-07, "loss": 0.2975202798843384, "step": 16404, "token_acc": 0.8891960100949405 }, { "epoch": 0.8851777909674634, "grad_norm": 0.2576458156108856, "learning_rate": 6.835459152224066e-07, "loss": 0.2389475703239441, "step": 16405, "token_acc": 0.9091454632741238 }, { "epoch": 0.8852317487724599, "grad_norm": 0.38732782006263733, "learning_rate": 6.82911048014544e-07, "loss": 0.2927679419517517, "step": 16406, "token_acc": 0.8943951238902875 }, { "epoch": 0.8852857065774564, "grad_norm": 0.40193137526512146, "learning_rate": 6.822764653481373e-07, "loss": 0.3198748230934143, "step": 16407, "token_acc": 0.8849357861853523 }, { "epoch": 0.8853396643824529, "grad_norm": 0.43129733204841614, "learning_rate": 6.816421672425666e-07, "loss": 0.3378866910934448, "step": 16408, "token_acc": 0.8793950361944157 }, { "epoch": 0.8853936221874494, "grad_norm": 0.45977088809013367, "learning_rate": 6.81008153717202e-07, "loss": 0.3030741214752197, "step": 16409, "token_acc": 0.8917329093799682 }, { "epoch": 0.8854475799924459, "grad_norm": 0.42254355549812317, "learning_rate": 6.803744247914079e-07, "loss": 0.3120524287223816, "step": 16410, "token_acc": 0.8857860520094563 }, { "epoch": 0.8855015377974425, "grad_norm": 0.40895819664001465, "learning_rate": 6.797409804845378e-07, "loss": 0.2895078659057617, "step": 16411, "token_acc": 0.8934878587196468 }, { "epoch": 0.8855554956024388, "grad_norm": 0.45310303568840027, "learning_rate": 6.791078208159341e-07, "loss": 0.30215615034103394, "step": 16412, "token_acc": 0.8864502085978596 }, { "epoch": 0.8856094534074354, "grad_norm": 0.3311258554458618, "learning_rate": 6.784749458049377e-07, "loss": 0.30361276865005493, "step": 16413, "token_acc": 0.8899034698669449 }, { "epoch": 0.8856634112124319, "grad_norm": 0.33754509687423706, "learning_rate": 6.778423554708746e-07, "loss": 0.34353190660476685, "step": 16414, "token_acc": 0.8812527185732927 }, { "epoch": 0.8857173690174284, "grad_norm": 0.4699462354183197, "learning_rate": 6.772100498330658e-07, "loss": 0.35941964387893677, "step": 16415, "token_acc": 0.8679719777258846 }, { "epoch": 0.8857713268224249, "grad_norm": 0.43385857343673706, "learning_rate": 6.765780289108182e-07, "loss": 0.3441890478134155, "step": 16416, "token_acc": 0.8778950253528848 }, { "epoch": 0.8858252846274214, "grad_norm": 0.42906954884529114, "learning_rate": 6.75946292723435e-07, "loss": 0.23339387774467468, "step": 16417, "token_acc": 0.9129221732745961 }, { "epoch": 0.8858792424324179, "grad_norm": 0.42854517698287964, "learning_rate": 6.753148412902077e-07, "loss": 0.3336327373981476, "step": 16418, "token_acc": 0.8801767676767677 }, { "epoch": 0.8859332002374143, "grad_norm": 0.38668394088745117, "learning_rate": 6.746836746304242e-07, "loss": 0.29165253043174744, "step": 16419, "token_acc": 0.8939759036144578 }, { "epoch": 0.8859871580424108, "grad_norm": 0.3871472477912903, "learning_rate": 6.740527927633589e-07, "loss": 0.26877379417419434, "step": 16420, "token_acc": 0.8989335520918786 }, { "epoch": 0.8860411158474073, "grad_norm": 0.4512234926223755, "learning_rate": 6.734221957082776e-07, "loss": 0.3240302503108978, "step": 16421, "token_acc": 0.8867924528301887 }, { "epoch": 0.8860950736524038, "grad_norm": 0.5609233975410461, "learning_rate": 6.727918834844394e-07, "loss": 0.331786572933197, "step": 16422, "token_acc": 0.8779433681073026 }, { "epoch": 0.8861490314574003, "grad_norm": 0.5011217594146729, "learning_rate": 6.721618561110931e-07, "loss": 0.2737969756126404, "step": 16423, "token_acc": 0.8981636060100167 }, { "epoch": 0.8862029892623968, "grad_norm": 0.4365347623825073, "learning_rate": 6.715321136074804e-07, "loss": 0.33093172311782837, "step": 16424, "token_acc": 0.8824898196625945 }, { "epoch": 0.8862569470673933, "grad_norm": 0.3939569294452667, "learning_rate": 6.709026559928322e-07, "loss": 0.306387335062027, "step": 16425, "token_acc": 0.888432489731019 }, { "epoch": 0.8863109048723898, "grad_norm": 0.4214189946651459, "learning_rate": 6.702734832863744e-07, "loss": 0.3126441240310669, "step": 16426, "token_acc": 0.8899435960929977 }, { "epoch": 0.8863648626773862, "grad_norm": 0.3801861107349396, "learning_rate": 6.696445955073205e-07, "loss": 0.3266797363758087, "step": 16427, "token_acc": 0.8834009923319801 }, { "epoch": 0.8864188204823827, "grad_norm": 0.4606502950191498, "learning_rate": 6.690159926748763e-07, "loss": 0.36964643001556396, "step": 16428, "token_acc": 0.8734635787294605 }, { "epoch": 0.8864727782873792, "grad_norm": 0.4612785577774048, "learning_rate": 6.683876748082385e-07, "loss": 0.31894922256469727, "step": 16429, "token_acc": 0.8824130879345603 }, { "epoch": 0.8865267360923758, "grad_norm": 0.3343099057674408, "learning_rate": 6.677596419265975e-07, "loss": 0.33636122941970825, "step": 16430, "token_acc": 0.8815770609318997 }, { "epoch": 0.8865806938973723, "grad_norm": 0.4378776252269745, "learning_rate": 6.671318940491323e-07, "loss": 0.33483996987342834, "step": 16431, "token_acc": 0.8784828592268418 }, { "epoch": 0.8866346517023688, "grad_norm": 0.3969678580760956, "learning_rate": 6.665044311950131e-07, "loss": 0.32118070125579834, "step": 16432, "token_acc": 0.8834042553191489 }, { "epoch": 0.8866886095073653, "grad_norm": 0.41658294200897217, "learning_rate": 6.658772533834046e-07, "loss": 0.3186286687850952, "step": 16433, "token_acc": 0.8875296912114015 }, { "epoch": 0.8867425673123618, "grad_norm": 0.4203338623046875, "learning_rate": 6.652503606334592e-07, "loss": 0.3402945399284363, "step": 16434, "token_acc": 0.8806091922763122 }, { "epoch": 0.8867965251173582, "grad_norm": 0.5622534155845642, "learning_rate": 6.646237529643229e-07, "loss": 0.35614871978759766, "step": 16435, "token_acc": 0.8748960931005819 }, { "epoch": 0.8868504829223547, "grad_norm": 0.4706994891166687, "learning_rate": 6.639974303951314e-07, "loss": 0.34336745738983154, "step": 16436, "token_acc": 0.8796182685753238 }, { "epoch": 0.8869044407273512, "grad_norm": 0.42629632353782654, "learning_rate": 6.633713929450125e-07, "loss": 0.3444157540798187, "step": 16437, "token_acc": 0.8771808602522024 }, { "epoch": 0.8869583985323477, "grad_norm": 0.3621237874031067, "learning_rate": 6.627456406330834e-07, "loss": 0.33874112367630005, "step": 16438, "token_acc": 0.8775778850372971 }, { "epoch": 0.8870123563373442, "grad_norm": 0.36839160323143005, "learning_rate": 6.621201734784588e-07, "loss": 0.29199129343032837, "step": 16439, "token_acc": 0.8914853612677948 }, { "epoch": 0.8870663141423407, "grad_norm": 0.38850170373916626, "learning_rate": 6.614949915002377e-07, "loss": 0.3604068160057068, "step": 16440, "token_acc": 0.8731265766434189 }, { "epoch": 0.8871202719473372, "grad_norm": 0.44440963864326477, "learning_rate": 6.608700947175129e-07, "loss": 0.34055930376052856, "step": 16441, "token_acc": 0.88196450680974 }, { "epoch": 0.8871742297523336, "grad_norm": 0.44453996419906616, "learning_rate": 6.602454831493687e-07, "loss": 0.3396643400192261, "step": 16442, "token_acc": 0.8847608754390706 }, { "epoch": 0.8872281875573301, "grad_norm": 0.3661440908908844, "learning_rate": 6.596211568148814e-07, "loss": 0.3189041018486023, "step": 16443, "token_acc": 0.8817831718987046 }, { "epoch": 0.8872821453623266, "grad_norm": 0.38593482971191406, "learning_rate": 6.589971157331155e-07, "loss": 0.280006468296051, "step": 16444, "token_acc": 0.8991228070175439 }, { "epoch": 0.8873361031673231, "grad_norm": 0.3297567069530487, "learning_rate": 6.583733599231323e-07, "loss": 0.29649585485458374, "step": 16445, "token_acc": 0.8949680511182109 }, { "epoch": 0.8873900609723196, "grad_norm": 0.4330660104751587, "learning_rate": 6.577498894039791e-07, "loss": 0.3525918126106262, "step": 16446, "token_acc": 0.8793169364088187 }, { "epoch": 0.8874440187773162, "grad_norm": 0.5235536694526672, "learning_rate": 6.571267041946971e-07, "loss": 0.3001154065132141, "step": 16447, "token_acc": 0.8971428571428571 }, { "epoch": 0.8874979765823127, "grad_norm": 0.41618412733078003, "learning_rate": 6.56503804314318e-07, "loss": 0.3336068093776703, "step": 16448, "token_acc": 0.877197093977033 }, { "epoch": 0.8875519343873092, "grad_norm": 0.4736928343772888, "learning_rate": 6.558811897818651e-07, "loss": 0.36204081773757935, "step": 16449, "token_acc": 0.8733665953908292 }, { "epoch": 0.8876058921923056, "grad_norm": 0.43629804253578186, "learning_rate": 6.552588606163546e-07, "loss": 0.31205350160598755, "step": 16450, "token_acc": 0.8861594609769792 }, { "epoch": 0.8876598499973021, "grad_norm": 0.364950031042099, "learning_rate": 6.546368168367867e-07, "loss": 0.3423726260662079, "step": 16451, "token_acc": 0.8763821944995747 }, { "epoch": 0.8877138078022986, "grad_norm": 0.4276285171508789, "learning_rate": 6.54015058462164e-07, "loss": 0.30412784218788147, "step": 16452, "token_acc": 0.889168765743073 }, { "epoch": 0.8877677656072951, "grad_norm": 0.43745148181915283, "learning_rate": 6.533935855114737e-07, "loss": 0.32681867480278015, "step": 16453, "token_acc": 0.8845231752797017 }, { "epoch": 0.8878217234122916, "grad_norm": 0.4834791421890259, "learning_rate": 6.527723980036948e-07, "loss": 0.3102281093597412, "step": 16454, "token_acc": 0.8869485294117647 }, { "epoch": 0.8878756812172881, "grad_norm": 0.3303143382072449, "learning_rate": 6.52151495957798e-07, "loss": 0.3023380637168884, "step": 16455, "token_acc": 0.8935632183908045 }, { "epoch": 0.8879296390222846, "grad_norm": 0.6775092482566833, "learning_rate": 6.515308793927455e-07, "loss": 0.3193075358867645, "step": 16456, "token_acc": 0.8832712022367195 }, { "epoch": 0.8879835968272811, "grad_norm": 0.45545294880867004, "learning_rate": 6.509105483274891e-07, "loss": 0.2958261966705322, "step": 16457, "token_acc": 0.8936576061039581 }, { "epoch": 0.8880375546322775, "grad_norm": 0.4650438725948334, "learning_rate": 6.50290502780978e-07, "loss": 0.3377598822116852, "step": 16458, "token_acc": 0.8797370997063347 }, { "epoch": 0.888091512437274, "grad_norm": 0.38764381408691406, "learning_rate": 6.496707427721449e-07, "loss": 0.36046141386032104, "step": 16459, "token_acc": 0.8724893487522825 }, { "epoch": 0.8881454702422705, "grad_norm": 0.502848744392395, "learning_rate": 6.49051268319918e-07, "loss": 0.30011463165283203, "step": 16460, "token_acc": 0.8859781696053737 }, { "epoch": 0.888199428047267, "grad_norm": 0.4027478098869324, "learning_rate": 6.484320794432164e-07, "loss": 0.2963593602180481, "step": 16461, "token_acc": 0.8920826161790018 }, { "epoch": 0.8882533858522635, "grad_norm": 0.3334181606769562, "learning_rate": 6.478131761609486e-07, "loss": 0.35329991579055786, "step": 16462, "token_acc": 0.8717914966434118 }, { "epoch": 0.88830734365726, "grad_norm": 0.48571711778640747, "learning_rate": 6.47194558492017e-07, "loss": 0.3415990471839905, "step": 16463, "token_acc": 0.8780363078496548 }, { "epoch": 0.8883613014622566, "grad_norm": 0.41712257266044617, "learning_rate": 6.465762264553121e-07, "loss": 0.2886391282081604, "step": 16464, "token_acc": 0.8931392462198149 }, { "epoch": 0.888415259267253, "grad_norm": 0.38353651762008667, "learning_rate": 6.459581800697201e-07, "loss": 0.2958928048610687, "step": 16465, "token_acc": 0.8895989774724397 }, { "epoch": 0.8884692170722495, "grad_norm": 0.4141266345977783, "learning_rate": 6.453404193541146e-07, "loss": 0.32532766461372375, "step": 16466, "token_acc": 0.8806913213157406 }, { "epoch": 0.888523174877246, "grad_norm": 0.4629911184310913, "learning_rate": 6.447229443273628e-07, "loss": 0.37541663646698, "step": 16467, "token_acc": 0.8729606846750468 }, { "epoch": 0.8885771326822425, "grad_norm": 0.40044939517974854, "learning_rate": 6.441057550083207e-07, "loss": 0.28242623805999756, "step": 16468, "token_acc": 0.8958246055020248 }, { "epoch": 0.888631090487239, "grad_norm": 0.34562650322914124, "learning_rate": 6.434888514158378e-07, "loss": 0.315898597240448, "step": 16469, "token_acc": 0.8819178780923234 }, { "epoch": 0.8886850482922355, "grad_norm": 0.39835524559020996, "learning_rate": 6.428722335687532e-07, "loss": 0.2871493995189667, "step": 16470, "token_acc": 0.892817149256119 }, { "epoch": 0.888739006097232, "grad_norm": 0.3281759023666382, "learning_rate": 6.422559014859009e-07, "loss": 0.2883473038673401, "step": 16471, "token_acc": 0.8969937077604287 }, { "epoch": 0.8887929639022285, "grad_norm": 0.5375303626060486, "learning_rate": 6.416398551861025e-07, "loss": 0.353546679019928, "step": 16472, "token_acc": 0.8735275080906149 }, { "epoch": 0.8888469217072249, "grad_norm": 0.30848994851112366, "learning_rate": 6.410240946881707e-07, "loss": 0.26847776770591736, "step": 16473, "token_acc": 0.9041287613715885 }, { "epoch": 0.8889008795122214, "grad_norm": 0.5270199775695801, "learning_rate": 6.404086200109117e-07, "loss": 0.3176937699317932, "step": 16474, "token_acc": 0.8825231481481481 }, { "epoch": 0.8889548373172179, "grad_norm": 0.3666958808898926, "learning_rate": 6.397934311731214e-07, "loss": 0.29766684770584106, "step": 16475, "token_acc": 0.8897338403041825 }, { "epoch": 0.8890087951222144, "grad_norm": 0.3574253022670746, "learning_rate": 6.391785281935859e-07, "loss": 0.33721059560775757, "step": 16476, "token_acc": 0.8778928136419001 }, { "epoch": 0.8890627529272109, "grad_norm": 0.48522982001304626, "learning_rate": 6.385639110910868e-07, "loss": 0.4015870690345764, "step": 16477, "token_acc": 0.8620641947980078 }, { "epoch": 0.8891167107322074, "grad_norm": 0.44939061999320984, "learning_rate": 6.379495798843949e-07, "loss": 0.3376340866088867, "step": 16478, "token_acc": 0.882210568777733 }, { "epoch": 0.889170668537204, "grad_norm": 0.44092509150505066, "learning_rate": 6.373355345922693e-07, "loss": 0.2833743095397949, "step": 16479, "token_acc": 0.8973214285714286 }, { "epoch": 0.8892246263422005, "grad_norm": 0.4432162344455719, "learning_rate": 6.367217752334643e-07, "loss": 0.3318954408168793, "step": 16480, "token_acc": 0.8792906178489702 }, { "epoch": 0.8892785841471968, "grad_norm": 0.49925920367240906, "learning_rate": 6.361083018267233e-07, "loss": 0.37651240825653076, "step": 16481, "token_acc": 0.8672787979966611 }, { "epoch": 0.8893325419521934, "grad_norm": 0.4855669438838959, "learning_rate": 6.354951143907817e-07, "loss": 0.3358905613422394, "step": 16482, "token_acc": 0.8783394985614468 }, { "epoch": 0.8893864997571899, "grad_norm": 0.4131946563720703, "learning_rate": 6.348822129443666e-07, "loss": 0.33033791184425354, "step": 16483, "token_acc": 0.8799030918320259 }, { "epoch": 0.8894404575621864, "grad_norm": 0.48766928911209106, "learning_rate": 6.342695975061952e-07, "loss": 0.3135778605937958, "step": 16484, "token_acc": 0.8859464339452302 }, { "epoch": 0.8894944153671829, "grad_norm": 0.4906582236289978, "learning_rate": 6.33657268094976e-07, "loss": 0.3512561321258545, "step": 16485, "token_acc": 0.8773006134969326 }, { "epoch": 0.8895483731721794, "grad_norm": 0.3517424464225769, "learning_rate": 6.330452247294105e-07, "loss": 0.31811994314193726, "step": 16486, "token_acc": 0.8904574453644631 }, { "epoch": 0.8896023309771759, "grad_norm": 0.4289294183254242, "learning_rate": 6.324334674281906e-07, "loss": 0.29718947410583496, "step": 16487, "token_acc": 0.894032416502947 }, { "epoch": 0.8896562887821723, "grad_norm": 0.4345591068267822, "learning_rate": 6.318219962099981e-07, "loss": 0.3191264867782593, "step": 16488, "token_acc": 0.8896274540114392 }, { "epoch": 0.8897102465871688, "grad_norm": 0.4142795503139496, "learning_rate": 6.312108110935066e-07, "loss": 0.3050515651702881, "step": 16489, "token_acc": 0.8888728323699422 }, { "epoch": 0.8897642043921653, "grad_norm": 0.4656883180141449, "learning_rate": 6.305999120973838e-07, "loss": 0.3483537435531616, "step": 16490, "token_acc": 0.8809076093769588 }, { "epoch": 0.8898181621971618, "grad_norm": 0.391848623752594, "learning_rate": 6.299892992402856e-07, "loss": 0.31099873781204224, "step": 16491, "token_acc": 0.889794227316669 }, { "epoch": 0.8898721200021583, "grad_norm": 0.4004015028476715, "learning_rate": 6.293789725408595e-07, "loss": 0.3312334418296814, "step": 16492, "token_acc": 0.877470873150936 }, { "epoch": 0.8899260778071548, "grad_norm": 0.37151038646698, "learning_rate": 6.287689320177448e-07, "loss": 0.3105975389480591, "step": 16493, "token_acc": 0.8882225953466059 }, { "epoch": 0.8899800356121513, "grad_norm": 0.5422109365463257, "learning_rate": 6.281591776895724e-07, "loss": 0.3658989369869232, "step": 16494, "token_acc": 0.8661275831087152 }, { "epoch": 0.8900339934171478, "grad_norm": 0.3885033130645752, "learning_rate": 6.275497095749627e-07, "loss": 0.32726362347602844, "step": 16495, "token_acc": 0.8817104036232849 }, { "epoch": 0.8900879512221442, "grad_norm": 0.41110166907310486, "learning_rate": 6.269405276925289e-07, "loss": 0.3606310486793518, "step": 16496, "token_acc": 0.8791005698444478 }, { "epoch": 0.8901419090271407, "grad_norm": 0.5007141828536987, "learning_rate": 6.263316320608781e-07, "loss": 0.3041059374809265, "step": 16497, "token_acc": 0.889601250977326 }, { "epoch": 0.8901958668321373, "grad_norm": 0.4287733733654022, "learning_rate": 6.257230226986033e-07, "loss": 0.3575083911418915, "step": 16498, "token_acc": 0.8764293139293139 }, { "epoch": 0.8902498246371338, "grad_norm": 0.4789850115776062, "learning_rate": 6.251146996242907e-07, "loss": 0.3605582118034363, "step": 16499, "token_acc": 0.870208033494701 }, { "epoch": 0.8903037824421303, "grad_norm": 0.4366079270839691, "learning_rate": 6.2450666285652e-07, "loss": 0.28572267293930054, "step": 16500, "token_acc": 0.8934933433339584 }, { "epoch": 0.8903577402471268, "grad_norm": 0.4974617660045624, "learning_rate": 6.238989124138595e-07, "loss": 0.3660353422164917, "step": 16501, "token_acc": 0.870496746548167 }, { "epoch": 0.8904116980521233, "grad_norm": 0.4281633198261261, "learning_rate": 6.232914483148678e-07, "loss": 0.37524139881134033, "step": 16502, "token_acc": 0.8689445880633921 }, { "epoch": 0.8904656558571198, "grad_norm": 0.3501213788986206, "learning_rate": 6.226842705781011e-07, "loss": 0.3240126371383667, "step": 16503, "token_acc": 0.8887015177065767 }, { "epoch": 0.8905196136621162, "grad_norm": 0.4567139446735382, "learning_rate": 6.220773792221002e-07, "loss": 0.3404853940010071, "step": 16504, "token_acc": 0.8778676349232547 }, { "epoch": 0.8905735714671127, "grad_norm": 0.5292244553565979, "learning_rate": 6.214707742654003e-07, "loss": 0.368249773979187, "step": 16505, "token_acc": 0.873688227437512 }, { "epoch": 0.8906275292721092, "grad_norm": 0.4674157202243805, "learning_rate": 6.208644557265253e-07, "loss": 0.30814722180366516, "step": 16506, "token_acc": 0.8882923191648023 }, { "epoch": 0.8906814870771057, "grad_norm": 0.4582553207874298, "learning_rate": 6.202584236239928e-07, "loss": 0.31715142726898193, "step": 16507, "token_acc": 0.8823874468397126 }, { "epoch": 0.8907354448821022, "grad_norm": 0.37279126048088074, "learning_rate": 6.196526779763101e-07, "loss": 0.3103989064693451, "step": 16508, "token_acc": 0.8852881812065485 }, { "epoch": 0.8907894026870987, "grad_norm": 0.4014725387096405, "learning_rate": 6.190472188019769e-07, "loss": 0.36296504735946655, "step": 16509, "token_acc": 0.8714816781731279 }, { "epoch": 0.8908433604920952, "grad_norm": 0.38754481077194214, "learning_rate": 6.184420461194851e-07, "loss": 0.32907286286354065, "step": 16510, "token_acc": 0.8797160510056526 }, { "epoch": 0.8908973182970916, "grad_norm": 0.4727477729320526, "learning_rate": 6.178371599473165e-07, "loss": 0.32319438457489014, "step": 16511, "token_acc": 0.884117951370926 }, { "epoch": 0.8909512761020881, "grad_norm": 0.515177845954895, "learning_rate": 6.172325603039419e-07, "loss": 0.3165167570114136, "step": 16512, "token_acc": 0.8877928483353884 }, { "epoch": 0.8910052339070846, "grad_norm": 0.45005282759666443, "learning_rate": 6.166282472078277e-07, "loss": 0.3055742681026459, "step": 16513, "token_acc": 0.8901286223461956 }, { "epoch": 0.8910591917120811, "grad_norm": 0.3842181861400604, "learning_rate": 6.160242206774292e-07, "loss": 0.2925668954849243, "step": 16514, "token_acc": 0.8860720830788027 }, { "epoch": 0.8911131495170777, "grad_norm": 0.3788926601409912, "learning_rate": 6.154204807311925e-07, "loss": 0.30588358640670776, "step": 16515, "token_acc": 0.8901972778712341 }, { "epoch": 0.8911671073220742, "grad_norm": 0.49681714177131653, "learning_rate": 6.148170273875553e-07, "loss": 0.36514705419540405, "step": 16516, "token_acc": 0.8709390862944163 }, { "epoch": 0.8912210651270707, "grad_norm": 0.5044819116592407, "learning_rate": 6.142138606649473e-07, "loss": 0.35333162546157837, "step": 16517, "token_acc": 0.8787054664128365 }, { "epoch": 0.8912750229320672, "grad_norm": 0.3474838435649872, "learning_rate": 6.136109805817903e-07, "loss": 0.2815954387187958, "step": 16518, "token_acc": 0.896908428353121 }, { "epoch": 0.8913289807370636, "grad_norm": 0.3087596893310547, "learning_rate": 6.13008387156494e-07, "loss": 0.3286634385585785, "step": 16519, "token_acc": 0.884016761448668 }, { "epoch": 0.8913829385420601, "grad_norm": 0.35881906747817993, "learning_rate": 6.124060804074638e-07, "loss": 0.3305830955505371, "step": 16520, "token_acc": 0.8838090990187333 }, { "epoch": 0.8914368963470566, "grad_norm": 0.48397642374038696, "learning_rate": 6.118040603530895e-07, "loss": 0.34175747632980347, "step": 16521, "token_acc": 0.8745779064158224 }, { "epoch": 0.8914908541520531, "grad_norm": 0.45206665992736816, "learning_rate": 6.112023270117629e-07, "loss": 0.30563390254974365, "step": 16522, "token_acc": 0.8928685132123527 }, { "epoch": 0.8915448119570496, "grad_norm": 0.4802337884902954, "learning_rate": 6.10600880401857e-07, "loss": 0.3734917640686035, "step": 16523, "token_acc": 0.8696025515210991 }, { "epoch": 0.8915987697620461, "grad_norm": 0.4089221656322479, "learning_rate": 6.099997205417407e-07, "loss": 0.33392155170440674, "step": 16524, "token_acc": 0.8805239481344271 }, { "epoch": 0.8916527275670426, "grad_norm": 0.4507081210613251, "learning_rate": 6.093988474497725e-07, "loss": 0.33547282218933105, "step": 16525, "token_acc": 0.8773709167544784 }, { "epoch": 0.8917066853720391, "grad_norm": 0.3996492028236389, "learning_rate": 6.087982611443044e-07, "loss": 0.3058136999607086, "step": 16526, "token_acc": 0.888465204957102 }, { "epoch": 0.8917606431770355, "grad_norm": 0.3032274842262268, "learning_rate": 6.081979616436762e-07, "loss": 0.3146618604660034, "step": 16527, "token_acc": 0.8875760411792232 }, { "epoch": 0.891814600982032, "grad_norm": 0.4491896331310272, "learning_rate": 6.07597948966221e-07, "loss": 0.31901562213897705, "step": 16528, "token_acc": 0.8840414949552367 }, { "epoch": 0.8918685587870285, "grad_norm": 0.4720461666584015, "learning_rate": 6.069982231302652e-07, "loss": 0.4074776768684387, "step": 16529, "token_acc": 0.858253030774013 }, { "epoch": 0.891922516592025, "grad_norm": 0.44079193472862244, "learning_rate": 6.063987841541242e-07, "loss": 0.31989550590515137, "step": 16530, "token_acc": 0.8820271066588097 }, { "epoch": 0.8919764743970215, "grad_norm": 0.3668016195297241, "learning_rate": 6.057996320561022e-07, "loss": 0.32799747586250305, "step": 16531, "token_acc": 0.879335053023789 }, { "epoch": 0.892030432202018, "grad_norm": 0.39315780997276306, "learning_rate": 6.052007668545001e-07, "loss": 0.34729254245758057, "step": 16532, "token_acc": 0.8765670202507232 }, { "epoch": 0.8920843900070146, "grad_norm": 0.39334169030189514, "learning_rate": 6.046021885676045e-07, "loss": 0.34445124864578247, "step": 16533, "token_acc": 0.8757434843733103 }, { "epoch": 0.892138347812011, "grad_norm": 0.3446628451347351, "learning_rate": 6.040038972136952e-07, "loss": 0.29225191473960876, "step": 16534, "token_acc": 0.8899052948773138 }, { "epoch": 0.8921923056170075, "grad_norm": 0.39261582493782043, "learning_rate": 6.034058928110464e-07, "loss": 0.29568636417388916, "step": 16535, "token_acc": 0.8941210880374378 }, { "epoch": 0.892246263422004, "grad_norm": 0.4153291583061218, "learning_rate": 6.028081753779213e-07, "loss": 0.33197224140167236, "step": 16536, "token_acc": 0.8784813452443511 }, { "epoch": 0.8923002212270005, "grad_norm": 0.42100444436073303, "learning_rate": 6.02210744932572e-07, "loss": 0.30524346232414246, "step": 16537, "token_acc": 0.8919964819700967 }, { "epoch": 0.892354179031997, "grad_norm": 0.48409953713417053, "learning_rate": 6.01613601493245e-07, "loss": 0.37415221333503723, "step": 16538, "token_acc": 0.872294729886007 }, { "epoch": 0.8924081368369935, "grad_norm": 0.46095985174179077, "learning_rate": 6.010167450781757e-07, "loss": 0.3422222137451172, "step": 16539, "token_acc": 0.881377151799687 }, { "epoch": 0.89246209464199, "grad_norm": 0.3677821457386017, "learning_rate": 6.004201757055938e-07, "loss": 0.2983986437320709, "step": 16540, "token_acc": 0.895291020766306 }, { "epoch": 0.8925160524469865, "grad_norm": 0.27697163820266724, "learning_rate": 5.99823893393715e-07, "loss": 0.35707762837409973, "step": 16541, "token_acc": 0.8783724015922159 }, { "epoch": 0.8925700102519829, "grad_norm": 0.3995025157928467, "learning_rate": 5.992278981607535e-07, "loss": 0.3171095848083496, "step": 16542, "token_acc": 0.8843342036553525 }, { "epoch": 0.8926239680569794, "grad_norm": 0.36550724506378174, "learning_rate": 5.986321900249103e-07, "loss": 0.35713839530944824, "step": 16543, "token_acc": 0.8726788094180364 }, { "epoch": 0.8926779258619759, "grad_norm": 0.3704940974712372, "learning_rate": 5.980367690043764e-07, "loss": 0.3270684480667114, "step": 16544, "token_acc": 0.8839724680432645 }, { "epoch": 0.8927318836669724, "grad_norm": 0.39564621448516846, "learning_rate": 5.974416351173373e-07, "loss": 0.3480524718761444, "step": 16545, "token_acc": 0.8787006578947368 }, { "epoch": 0.8927858414719689, "grad_norm": 0.3237491548061371, "learning_rate": 5.968467883819673e-07, "loss": 0.33110493421554565, "step": 16546, "token_acc": 0.8837482080020852 }, { "epoch": 0.8928397992769654, "grad_norm": 0.4225417375564575, "learning_rate": 5.962522288164318e-07, "loss": 0.3018781244754791, "step": 16547, "token_acc": 0.8902144123811059 }, { "epoch": 0.892893757081962, "grad_norm": 0.41274213790893555, "learning_rate": 5.956579564388909e-07, "loss": 0.36411333084106445, "step": 16548, "token_acc": 0.8717476681394207 }, { "epoch": 0.8929477148869583, "grad_norm": 0.4107341170310974, "learning_rate": 5.950639712674933e-07, "loss": 0.32266658544540405, "step": 16549, "token_acc": 0.8858294157152451 }, { "epoch": 0.8930016726919549, "grad_norm": 0.4562514126300812, "learning_rate": 5.944702733203767e-07, "loss": 0.29866811633110046, "step": 16550, "token_acc": 0.8882625036137612 }, { "epoch": 0.8930556304969514, "grad_norm": 0.4373142421245575, "learning_rate": 5.938768626156743e-07, "loss": 0.3085041344165802, "step": 16551, "token_acc": 0.8901364411597499 }, { "epoch": 0.8931095883019479, "grad_norm": 0.5134566426277161, "learning_rate": 5.932837391715097e-07, "loss": 0.3367633819580078, "step": 16552, "token_acc": 0.8815848716794237 }, { "epoch": 0.8931635461069444, "grad_norm": 0.3730657696723938, "learning_rate": 5.926909030059947e-07, "loss": 0.28618210554122925, "step": 16553, "token_acc": 0.8923956262425448 }, { "epoch": 0.8932175039119409, "grad_norm": 0.3510579466819763, "learning_rate": 5.920983541372328e-07, "loss": 0.31704285740852356, "step": 16554, "token_acc": 0.8873854677178777 }, { "epoch": 0.8932714617169374, "grad_norm": 0.37678074836730957, "learning_rate": 5.91506092583325e-07, "loss": 0.30279114842414856, "step": 16555, "token_acc": 0.8925792938360263 }, { "epoch": 0.8933254195219339, "grad_norm": 0.4247622489929199, "learning_rate": 5.909141183623568e-07, "loss": 0.3490854501724243, "step": 16556, "token_acc": 0.8749625636418089 }, { "epoch": 0.8933793773269303, "grad_norm": 0.4853339195251465, "learning_rate": 5.903224314924061e-07, "loss": 0.3549007773399353, "step": 16557, "token_acc": 0.8740024554941682 }, { "epoch": 0.8934333351319268, "grad_norm": 0.41689226031303406, "learning_rate": 5.897310319915428e-07, "loss": 0.2926366329193115, "step": 16558, "token_acc": 0.8902691511387164 }, { "epoch": 0.8934872929369233, "grad_norm": 0.39285165071487427, "learning_rate": 5.891399198778291e-07, "loss": 0.3063551187515259, "step": 16559, "token_acc": 0.8911764705882353 }, { "epoch": 0.8935412507419198, "grad_norm": 0.5214961767196655, "learning_rate": 5.885490951693152e-07, "loss": 0.3216971755027771, "step": 16560, "token_acc": 0.8823223570190641 }, { "epoch": 0.8935952085469163, "grad_norm": 0.39149320125579834, "learning_rate": 5.879585578840485e-07, "loss": 0.3107718825340271, "step": 16561, "token_acc": 0.8890592487131749 }, { "epoch": 0.8936491663519128, "grad_norm": 0.45166119933128357, "learning_rate": 5.873683080400616e-07, "loss": 0.3316206932067871, "step": 16562, "token_acc": 0.883502860775588 }, { "epoch": 0.8937031241569093, "grad_norm": 0.5241718888282776, "learning_rate": 5.867783456553811e-07, "loss": 0.3350106179714203, "step": 16563, "token_acc": 0.881408065618592 }, { "epoch": 0.8937570819619058, "grad_norm": 0.46460622549057007, "learning_rate": 5.861886707480236e-07, "loss": 0.34986722469329834, "step": 16564, "token_acc": 0.8783632286995515 }, { "epoch": 0.8938110397669022, "grad_norm": 0.3674635887145996, "learning_rate": 5.855992833359981e-07, "loss": 0.30707815289497375, "step": 16565, "token_acc": 0.8906210069000766 }, { "epoch": 0.8938649975718987, "grad_norm": 0.27746522426605225, "learning_rate": 5.850101834373046e-07, "loss": 0.2583780288696289, "step": 16566, "token_acc": 0.9026570960836167 }, { "epoch": 0.8939189553768953, "grad_norm": 0.4812004566192627, "learning_rate": 5.844213710699331e-07, "loss": 0.3465445041656494, "step": 16567, "token_acc": 0.8748151798915722 }, { "epoch": 0.8939729131818918, "grad_norm": 0.44675981998443604, "learning_rate": 5.83832846251866e-07, "loss": 0.31032100319862366, "step": 16568, "token_acc": 0.8880022798518097 }, { "epoch": 0.8940268709868883, "grad_norm": 0.37562841176986694, "learning_rate": 5.83244609001078e-07, "loss": 0.312026709318161, "step": 16569, "token_acc": 0.8881772768028692 }, { "epoch": 0.8940808287918848, "grad_norm": 0.3466484248638153, "learning_rate": 5.82656659335532e-07, "loss": 0.31742995977401733, "step": 16570, "token_acc": 0.8883349950149552 }, { "epoch": 0.8941347865968813, "grad_norm": 0.5966461300849915, "learning_rate": 5.820689972731853e-07, "loss": 0.37098246812820435, "step": 16571, "token_acc": 0.8708376885172465 }, { "epoch": 0.8941887444018777, "grad_norm": 0.3896975517272949, "learning_rate": 5.814816228319841e-07, "loss": 0.3459121286869049, "step": 16572, "token_acc": 0.878517195176418 }, { "epoch": 0.8942427022068742, "grad_norm": 0.3934081792831421, "learning_rate": 5.808945360298656e-07, "loss": 0.3158642649650574, "step": 16573, "token_acc": 0.8877551020408163 }, { "epoch": 0.8942966600118707, "grad_norm": 0.4901430010795593, "learning_rate": 5.803077368847621e-07, "loss": 0.29794058203697205, "step": 16574, "token_acc": 0.8941279579316389 }, { "epoch": 0.8943506178168672, "grad_norm": 0.47816407680511475, "learning_rate": 5.797212254145923e-07, "loss": 0.2904626131057739, "step": 16575, "token_acc": 0.8945131375579598 }, { "epoch": 0.8944045756218637, "grad_norm": 0.5202435851097107, "learning_rate": 5.791350016372677e-07, "loss": 0.3584326505661011, "step": 16576, "token_acc": 0.8772441979273099 }, { "epoch": 0.8944585334268602, "grad_norm": 0.44526171684265137, "learning_rate": 5.785490655706938e-07, "loss": 0.33261072635650635, "step": 16577, "token_acc": 0.8814133591481123 }, { "epoch": 0.8945124912318567, "grad_norm": 0.3867904245853424, "learning_rate": 5.77963417232762e-07, "loss": 0.28912144899368286, "step": 16578, "token_acc": 0.8949591280653951 }, { "epoch": 0.8945664490368532, "grad_norm": 0.4111761748790741, "learning_rate": 5.773780566413578e-07, "loss": 0.33224332332611084, "step": 16579, "token_acc": 0.8845604002859185 }, { "epoch": 0.8946204068418496, "grad_norm": 0.5140597224235535, "learning_rate": 5.767929838143637e-07, "loss": 0.40793538093566895, "step": 16580, "token_acc": 0.8605377276669558 }, { "epoch": 0.8946743646468461, "grad_norm": 0.4601820409297943, "learning_rate": 5.762081987696399e-07, "loss": 0.3693377375602722, "step": 16581, "token_acc": 0.8678713527851459 }, { "epoch": 0.8947283224518426, "grad_norm": 0.43642887473106384, "learning_rate": 5.75623701525051e-07, "loss": 0.3200175166130066, "step": 16582, "token_acc": 0.887159048449371 }, { "epoch": 0.8947822802568391, "grad_norm": 0.38059288263320923, "learning_rate": 5.750394920984437e-07, "loss": 0.32251203060150146, "step": 16583, "token_acc": 0.8890299892546645 }, { "epoch": 0.8948362380618357, "grad_norm": 0.4102557301521301, "learning_rate": 5.744555705076627e-07, "loss": 0.3113095462322235, "step": 16584, "token_acc": 0.8855337078651685 }, { "epoch": 0.8948901958668322, "grad_norm": 0.47194910049438477, "learning_rate": 5.738719367705381e-07, "loss": 0.35622096061706543, "step": 16585, "token_acc": 0.8707668944570994 }, { "epoch": 0.8949441536718287, "grad_norm": 0.4600776135921478, "learning_rate": 5.732885909048946e-07, "loss": 0.3387919068336487, "step": 16586, "token_acc": 0.8782784129119031 }, { "epoch": 0.8949981114768252, "grad_norm": 0.3703644871711731, "learning_rate": 5.727055329285492e-07, "loss": 0.30205056071281433, "step": 16587, "token_acc": 0.8876762722256285 }, { "epoch": 0.8950520692818216, "grad_norm": 0.47300252318382263, "learning_rate": 5.721227628593074e-07, "loss": 0.3402002453804016, "step": 16588, "token_acc": 0.873446847676024 }, { "epoch": 0.8951060270868181, "grad_norm": 0.49409130215644836, "learning_rate": 5.715402807149672e-07, "loss": 0.34867405891418457, "step": 16589, "token_acc": 0.8783669320684397 }, { "epoch": 0.8951599848918146, "grad_norm": 0.40802687406539917, "learning_rate": 5.709580865133157e-07, "loss": 0.3453456461429596, "step": 16590, "token_acc": 0.8788927335640139 }, { "epoch": 0.8952139426968111, "grad_norm": 0.476589173078537, "learning_rate": 5.703761802721352e-07, "loss": 0.276579350233078, "step": 16591, "token_acc": 0.9022519582245431 }, { "epoch": 0.8952679005018076, "grad_norm": 0.3259837031364441, "learning_rate": 5.697945620091938e-07, "loss": 0.3227683901786804, "step": 16592, "token_acc": 0.885072463768116 }, { "epoch": 0.8953218583068041, "grad_norm": 0.44039595127105713, "learning_rate": 5.692132317422583e-07, "loss": 0.33933502435684204, "step": 16593, "token_acc": 0.8742068527918782 }, { "epoch": 0.8953758161118006, "grad_norm": 0.39026549458503723, "learning_rate": 5.6863218948908e-07, "loss": 0.33670544624328613, "step": 16594, "token_acc": 0.875829132395861 }, { "epoch": 0.895429773916797, "grad_norm": 0.34280991554260254, "learning_rate": 5.680514352674038e-07, "loss": 0.3515335023403168, "step": 16595, "token_acc": 0.8735139618468344 }, { "epoch": 0.8954837317217935, "grad_norm": 0.3469332456588745, "learning_rate": 5.674709690949653e-07, "loss": 0.34407174587249756, "step": 16596, "token_acc": 0.8805648635710867 }, { "epoch": 0.89553768952679, "grad_norm": 0.3867228031158447, "learning_rate": 5.668907909894927e-07, "loss": 0.37639284133911133, "step": 16597, "token_acc": 0.8697228697228697 }, { "epoch": 0.8955916473317865, "grad_norm": 0.4282357096672058, "learning_rate": 5.663109009687051e-07, "loss": 0.2978464663028717, "step": 16598, "token_acc": 0.8907683486238532 }, { "epoch": 0.895645605136783, "grad_norm": 0.4744209051132202, "learning_rate": 5.657312990503106e-07, "loss": 0.3082095980644226, "step": 16599, "token_acc": 0.8906812425009999 }, { "epoch": 0.8956995629417795, "grad_norm": 0.47711843252182007, "learning_rate": 5.651519852520104e-07, "loss": 0.32757842540740967, "step": 16600, "token_acc": 0.8817503292843554 }, { "epoch": 0.895753520746776, "grad_norm": 0.37384456396102905, "learning_rate": 5.645729595914973e-07, "loss": 0.3177615702152252, "step": 16601, "token_acc": 0.886307767944936 }, { "epoch": 0.8958074785517726, "grad_norm": 0.3708342909812927, "learning_rate": 5.639942220864547e-07, "loss": 0.3515990972518921, "step": 16602, "token_acc": 0.8748839368616528 }, { "epoch": 0.895861436356769, "grad_norm": 0.4986063241958618, "learning_rate": 5.634157727545563e-07, "loss": 0.38513433933258057, "step": 16603, "token_acc": 0.8705810021518599 }, { "epoch": 0.8959153941617655, "grad_norm": 0.42772430181503296, "learning_rate": 5.628376116134682e-07, "loss": 0.34575334191322327, "step": 16604, "token_acc": 0.8808414513624171 }, { "epoch": 0.895969351966762, "grad_norm": 0.4040382504463196, "learning_rate": 5.622597386808459e-07, "loss": 0.29860660433769226, "step": 16605, "token_acc": 0.8891696750902527 }, { "epoch": 0.8960233097717585, "grad_norm": 0.3672705590724945, "learning_rate": 5.6168215397434e-07, "loss": 0.31174567341804504, "step": 16606, "token_acc": 0.8874394835933297 }, { "epoch": 0.896077267576755, "grad_norm": 0.44560253620147705, "learning_rate": 5.611048575115885e-07, "loss": 0.3392345905303955, "step": 16607, "token_acc": 0.878181160294295 }, { "epoch": 0.8961312253817515, "grad_norm": 0.3820323348045349, "learning_rate": 5.605278493102218e-07, "loss": 0.29863354563713074, "step": 16608, "token_acc": 0.8961451247165533 }, { "epoch": 0.896185183186748, "grad_norm": 0.4236319661140442, "learning_rate": 5.599511293878623e-07, "loss": 0.32943451404571533, "step": 16609, "token_acc": 0.8847476428175264 }, { "epoch": 0.8962391409917445, "grad_norm": 0.3876623809337616, "learning_rate": 5.593746977621229e-07, "loss": 0.22819137573242188, "step": 16610, "token_acc": 0.9111181905065308 }, { "epoch": 0.8962930987967409, "grad_norm": 0.4043654501438141, "learning_rate": 5.587985544506059e-07, "loss": 0.3273693919181824, "step": 16611, "token_acc": 0.8817816476892163 }, { "epoch": 0.8963470566017374, "grad_norm": 0.3382747769355774, "learning_rate": 5.582226994709072e-07, "loss": 0.2693403959274292, "step": 16612, "token_acc": 0.9000361750874232 }, { "epoch": 0.8964010144067339, "grad_norm": 0.3828918933868408, "learning_rate": 5.576471328406152e-07, "loss": 0.3418731689453125, "step": 16613, "token_acc": 0.8758389261744967 }, { "epoch": 0.8964549722117304, "grad_norm": 0.4845324158668518, "learning_rate": 5.570718545773068e-07, "loss": 0.29693543910980225, "step": 16614, "token_acc": 0.894718121998933 }, { "epoch": 0.8965089300167269, "grad_norm": 0.38202375173568726, "learning_rate": 5.564968646985502e-07, "loss": 0.3551952540874481, "step": 16615, "token_acc": 0.8710695961057755 }, { "epoch": 0.8965628878217234, "grad_norm": 0.3788933753967285, "learning_rate": 5.559221632219047e-07, "loss": 0.29683494567871094, "step": 16616, "token_acc": 0.8959128836091076 }, { "epoch": 0.89661684562672, "grad_norm": 0.43145063519477844, "learning_rate": 5.553477501649219e-07, "loss": 0.29771754145622253, "step": 16617, "token_acc": 0.8908398211031969 }, { "epoch": 0.8966708034317163, "grad_norm": 0.4746594727039337, "learning_rate": 5.547736255451442e-07, "loss": 0.3424207270145416, "step": 16618, "token_acc": 0.8748353096179183 }, { "epoch": 0.8967247612367129, "grad_norm": 0.4911981225013733, "learning_rate": 5.541997893801066e-07, "loss": 0.30328577756881714, "step": 16619, "token_acc": 0.8853396099529254 }, { "epoch": 0.8967787190417094, "grad_norm": 0.3738331198692322, "learning_rate": 5.536262416873328e-07, "loss": 0.36319398880004883, "step": 16620, "token_acc": 0.8720867208672086 }, { "epoch": 0.8968326768467059, "grad_norm": 0.611445426940918, "learning_rate": 5.530529824843389e-07, "loss": 0.295931875705719, "step": 16621, "token_acc": 0.891566265060241 }, { "epoch": 0.8968866346517024, "grad_norm": 0.4892445504665375, "learning_rate": 5.524800117886331e-07, "loss": 0.30059921741485596, "step": 16622, "token_acc": 0.8947131945484499 }, { "epoch": 0.8969405924566989, "grad_norm": 0.5147982835769653, "learning_rate": 5.519073296177113e-07, "loss": 0.35260242223739624, "step": 16623, "token_acc": 0.8768391469664407 }, { "epoch": 0.8969945502616954, "grad_norm": 0.5930397510528564, "learning_rate": 5.513349359890641e-07, "loss": 0.33542153239250183, "step": 16624, "token_acc": 0.8845782667438719 }, { "epoch": 0.8970485080666919, "grad_norm": 0.43752002716064453, "learning_rate": 5.507628309201741e-07, "loss": 0.3469970226287842, "step": 16625, "token_acc": 0.8763068567549219 }, { "epoch": 0.8971024658716883, "grad_norm": 0.4241516590118408, "learning_rate": 5.501910144285116e-07, "loss": 0.3250405192375183, "step": 16626, "token_acc": 0.8833662807116633 }, { "epoch": 0.8971564236766848, "grad_norm": 0.41874346137046814, "learning_rate": 5.496194865315408e-07, "loss": 0.309700608253479, "step": 16627, "token_acc": 0.8863668807994289 }, { "epoch": 0.8972103814816813, "grad_norm": 0.49037137627601624, "learning_rate": 5.49048247246714e-07, "loss": 0.3707735240459442, "step": 16628, "token_acc": 0.8721721027064538 }, { "epoch": 0.8972643392866778, "grad_norm": 0.5643797516822815, "learning_rate": 5.484772965914786e-07, "loss": 0.3382375240325928, "step": 16629, "token_acc": 0.8778724981467754 }, { "epoch": 0.8973182970916743, "grad_norm": 0.4201332628726959, "learning_rate": 5.479066345832706e-07, "loss": 0.3194742798805237, "step": 16630, "token_acc": 0.8877091377091377 }, { "epoch": 0.8973722548966708, "grad_norm": 0.30656710267066956, "learning_rate": 5.473362612395183e-07, "loss": 0.2705407738685608, "step": 16631, "token_acc": 0.9000914196160376 }, { "epoch": 0.8974262127016673, "grad_norm": 0.5957112312316895, "learning_rate": 5.4676617657764e-07, "loss": 0.37929052114486694, "step": 16632, "token_acc": 0.8655434423521638 }, { "epoch": 0.8974801705066638, "grad_norm": 0.4794847071170807, "learning_rate": 5.461963806150461e-07, "loss": 0.28213343024253845, "step": 16633, "token_acc": 0.8964791695890026 }, { "epoch": 0.8975341283116602, "grad_norm": 0.3670795261859894, "learning_rate": 5.456268733691383e-07, "loss": 0.29413676261901855, "step": 16634, "token_acc": 0.8909180046574335 }, { "epoch": 0.8975880861166567, "grad_norm": 0.48008567094802856, "learning_rate": 5.450576548573083e-07, "loss": 0.38984620571136475, "step": 16635, "token_acc": 0.8624104830108182 }, { "epoch": 0.8976420439216533, "grad_norm": 0.3085557222366333, "learning_rate": 5.44488725096941e-07, "loss": 0.3140558898448944, "step": 16636, "token_acc": 0.8870026525198939 }, { "epoch": 0.8976960017266498, "grad_norm": 0.48081064224243164, "learning_rate": 5.439200841054104e-07, "loss": 0.3324826955795288, "step": 16637, "token_acc": 0.8771003360537686 }, { "epoch": 0.8977499595316463, "grad_norm": 0.5159066319465637, "learning_rate": 5.433517319000847e-07, "loss": 0.37157800793647766, "step": 16638, "token_acc": 0.8732926474861958 }, { "epoch": 0.8978039173366428, "grad_norm": 0.3608264923095703, "learning_rate": 5.42783668498319e-07, "loss": 0.3416052758693695, "step": 16639, "token_acc": 0.8747761862130707 }, { "epoch": 0.8978578751416393, "grad_norm": 0.3778621256351471, "learning_rate": 5.422158939174627e-07, "loss": 0.31350818276405334, "step": 16640, "token_acc": 0.8862760997364687 }, { "epoch": 0.8979118329466357, "grad_norm": 0.39698511362075806, "learning_rate": 5.416484081748563e-07, "loss": 0.3399283289909363, "step": 16641, "token_acc": 0.8789639924194568 }, { "epoch": 0.8979657907516322, "grad_norm": 0.3428041636943817, "learning_rate": 5.410812112878283e-07, "loss": 0.3331766128540039, "step": 16642, "token_acc": 0.8802416918429004 }, { "epoch": 0.8980197485566287, "grad_norm": 0.4743212163448334, "learning_rate": 5.405143032737037e-07, "loss": 0.3483317792415619, "step": 16643, "token_acc": 0.881651376146789 }, { "epoch": 0.8980737063616252, "grad_norm": 0.3862953782081604, "learning_rate": 5.399476841497919e-07, "loss": 0.3087032437324524, "step": 16644, "token_acc": 0.8923226164079823 }, { "epoch": 0.8981276641666217, "grad_norm": 0.49018344283103943, "learning_rate": 5.393813539334014e-07, "loss": 0.339184045791626, "step": 16645, "token_acc": 0.8808787632221318 }, { "epoch": 0.8981816219716182, "grad_norm": 0.43863779306411743, "learning_rate": 5.388153126418261e-07, "loss": 0.33649611473083496, "step": 16646, "token_acc": 0.8843036109064112 }, { "epoch": 0.8982355797766147, "grad_norm": 0.612159788608551, "learning_rate": 5.382495602923521e-07, "loss": 0.3425790071487427, "step": 16647, "token_acc": 0.8767449778685734 }, { "epoch": 0.8982895375816112, "grad_norm": 0.5318114161491394, "learning_rate": 5.376840969022579e-07, "loss": 0.3447217345237732, "step": 16648, "token_acc": 0.8787927957163719 }, { "epoch": 0.8983434953866076, "grad_norm": 0.41605421900749207, "learning_rate": 5.371189224888151e-07, "loss": 0.31018173694610596, "step": 16649, "token_acc": 0.8917735849056604 }, { "epoch": 0.8983974531916041, "grad_norm": 0.419347882270813, "learning_rate": 5.365540370692779e-07, "loss": 0.37289005517959595, "step": 16650, "token_acc": 0.8658278457196613 }, { "epoch": 0.8984514109966006, "grad_norm": 0.36663734912872314, "learning_rate": 5.359894406609023e-07, "loss": 0.34233391284942627, "step": 16651, "token_acc": 0.882190121961768 }, { "epoch": 0.8985053688015971, "grad_norm": 0.41875845193862915, "learning_rate": 5.354251332809301e-07, "loss": 0.33296871185302734, "step": 16652, "token_acc": 0.8842705786471068 }, { "epoch": 0.8985593266065937, "grad_norm": 0.40904682874679565, "learning_rate": 5.348611149465955e-07, "loss": 0.3086392879486084, "step": 16653, "token_acc": 0.88486646884273 }, { "epoch": 0.8986132844115902, "grad_norm": 0.4343952536582947, "learning_rate": 5.342973856751222e-07, "loss": 0.3083583116531372, "step": 16654, "token_acc": 0.8889806707417809 }, { "epoch": 0.8986672422165867, "grad_norm": 0.38879290223121643, "learning_rate": 5.337339454837276e-07, "loss": 0.3475257456302643, "step": 16655, "token_acc": 0.8728142076502732 }, { "epoch": 0.8987212000215831, "grad_norm": 0.5217757225036621, "learning_rate": 5.331707943896181e-07, "loss": 0.3333539366722107, "step": 16656, "token_acc": 0.8809451219512195 }, { "epoch": 0.8987751578265796, "grad_norm": 0.4653659164905548, "learning_rate": 5.32607932409992e-07, "loss": 0.33028700947761536, "step": 16657, "token_acc": 0.8806661839246922 }, { "epoch": 0.8988291156315761, "grad_norm": 0.4016989767551422, "learning_rate": 5.320453595620401e-07, "loss": 0.2861472964286804, "step": 16658, "token_acc": 0.8946925021061499 }, { "epoch": 0.8988830734365726, "grad_norm": 0.44665196537971497, "learning_rate": 5.31483075862943e-07, "loss": 0.2996942400932312, "step": 16659, "token_acc": 0.8906001062134891 }, { "epoch": 0.8989370312415691, "grad_norm": 0.39367416501045227, "learning_rate": 5.309210813298726e-07, "loss": 0.3366520404815674, "step": 16660, "token_acc": 0.880677721701514 }, { "epoch": 0.8989909890465656, "grad_norm": 0.3484996557235718, "learning_rate": 5.303593759799908e-07, "loss": 0.35995471477508545, "step": 16661, "token_acc": 0.8718360291394 }, { "epoch": 0.8990449468515621, "grad_norm": 0.43086427450180054, "learning_rate": 5.297979598304537e-07, "loss": 0.3388104736804962, "step": 16662, "token_acc": 0.8774737356462252 }, { "epoch": 0.8990989046565586, "grad_norm": 0.43219971656799316, "learning_rate": 5.292368328984066e-07, "loss": 0.3079591691493988, "step": 16663, "token_acc": 0.884389755329804 }, { "epoch": 0.899152862461555, "grad_norm": 0.3300667703151703, "learning_rate": 5.286759952009846e-07, "loss": 0.31821155548095703, "step": 16664, "token_acc": 0.8854351687388987 }, { "epoch": 0.8992068202665515, "grad_norm": 0.360283225774765, "learning_rate": 5.281154467553174e-07, "loss": 0.2960204482078552, "step": 16665, "token_acc": 0.8936903804160441 }, { "epoch": 0.899260778071548, "grad_norm": 0.4893021285533905, "learning_rate": 5.275551875785223e-07, "loss": 0.3236573338508606, "step": 16666, "token_acc": 0.888366055492455 }, { "epoch": 0.8993147358765445, "grad_norm": 0.3496335744857788, "learning_rate": 5.269952176877102e-07, "loss": 0.3365124464035034, "step": 16667, "token_acc": 0.8797036007873104 }, { "epoch": 0.899368693681541, "grad_norm": 0.36533817648887634, "learning_rate": 5.264355370999841e-07, "loss": 0.26233333349227905, "step": 16668, "token_acc": 0.902715223265989 }, { "epoch": 0.8994226514865375, "grad_norm": 0.4095357060432434, "learning_rate": 5.258761458324324e-07, "loss": 0.3292103707790375, "step": 16669, "token_acc": 0.8856953642384106 }, { "epoch": 0.8994766092915341, "grad_norm": 0.3981623649597168, "learning_rate": 5.253170439021427e-07, "loss": 0.3369036614894867, "step": 16670, "token_acc": 0.8821066194234176 }, { "epoch": 0.8995305670965306, "grad_norm": 0.4117584228515625, "learning_rate": 5.247582313261889e-07, "loss": 0.31040051579475403, "step": 16671, "token_acc": 0.8853580048270314 }, { "epoch": 0.899584524901527, "grad_norm": 0.40585586428642273, "learning_rate": 5.241997081216354e-07, "loss": 0.27878040075302124, "step": 16672, "token_acc": 0.8981276176398127 }, { "epoch": 0.8996384827065235, "grad_norm": 0.4921591281890869, "learning_rate": 5.236414743055418e-07, "loss": 0.27011096477508545, "step": 16673, "token_acc": 0.8976449275362319 }, { "epoch": 0.89969244051152, "grad_norm": 0.43575432896614075, "learning_rate": 5.230835298949544e-07, "loss": 0.3125927150249481, "step": 16674, "token_acc": 0.8855338160529964 }, { "epoch": 0.8997463983165165, "grad_norm": 0.4466087520122528, "learning_rate": 5.22525874906914e-07, "loss": 0.37165990471839905, "step": 16675, "token_acc": 0.8668734491315137 }, { "epoch": 0.899800356121513, "grad_norm": 0.4755416512489319, "learning_rate": 5.219685093584493e-07, "loss": 0.33482393622398376, "step": 16676, "token_acc": 0.8780345980520424 }, { "epoch": 0.8998543139265095, "grad_norm": 0.3794723153114319, "learning_rate": 5.214114332665843e-07, "loss": 0.3137902021408081, "step": 16677, "token_acc": 0.8833142201834863 }, { "epoch": 0.899908271731506, "grad_norm": 0.42696771025657654, "learning_rate": 5.208546466483311e-07, "loss": 0.33892229199409485, "step": 16678, "token_acc": 0.8800481412142284 }, { "epoch": 0.8999622295365024, "grad_norm": 0.3188263177871704, "learning_rate": 5.202981495206938e-07, "loss": 0.3420220911502838, "step": 16679, "token_acc": 0.881490464116415 }, { "epoch": 0.9000161873414989, "grad_norm": 0.3834133446216583, "learning_rate": 5.197419419006678e-07, "loss": 0.3403991460800171, "step": 16680, "token_acc": 0.8790395846852693 }, { "epoch": 0.9000701451464954, "grad_norm": 0.3551887571811676, "learning_rate": 5.191860238052393e-07, "loss": 0.2939046621322632, "step": 16681, "token_acc": 0.8943955534969893 }, { "epoch": 0.9001241029514919, "grad_norm": 0.41405558586120605, "learning_rate": 5.186303952513861e-07, "loss": 0.2929428219795227, "step": 16682, "token_acc": 0.8903688524590164 }, { "epoch": 0.9001780607564884, "grad_norm": 0.504766047000885, "learning_rate": 5.180750562560766e-07, "loss": 0.32489144802093506, "step": 16683, "token_acc": 0.8813505335244466 }, { "epoch": 0.9002320185614849, "grad_norm": 0.43862685561180115, "learning_rate": 5.175200068362707e-07, "loss": 0.3416672348976135, "step": 16684, "token_acc": 0.8748766528517861 }, { "epoch": 0.9002859763664814, "grad_norm": 0.3980805277824402, "learning_rate": 5.169652470089203e-07, "loss": 0.2809178829193115, "step": 16685, "token_acc": 0.8930598555211559 }, { "epoch": 0.900339934171478, "grad_norm": 0.5110332369804382, "learning_rate": 5.164107767909676e-07, "loss": 0.3021782636642456, "step": 16686, "token_acc": 0.8884703912952501 }, { "epoch": 0.9003938919764743, "grad_norm": 0.5272756814956665, "learning_rate": 5.158565961993444e-07, "loss": 0.3284929692745209, "step": 16687, "token_acc": 0.8792222771461525 }, { "epoch": 0.9004478497814709, "grad_norm": 0.424809068441391, "learning_rate": 5.153027052509762e-07, "loss": 0.31152525544166565, "step": 16688, "token_acc": 0.8882371591629676 }, { "epoch": 0.9005018075864674, "grad_norm": 0.45758527517318726, "learning_rate": 5.147491039627784e-07, "loss": 0.323567658662796, "step": 16689, "token_acc": 0.8825 }, { "epoch": 0.9005557653914639, "grad_norm": 0.3804013729095459, "learning_rate": 5.141957923516583e-07, "loss": 0.318960964679718, "step": 16690, "token_acc": 0.8843970809683771 }, { "epoch": 0.9006097231964604, "grad_norm": 0.4177170991897583, "learning_rate": 5.136427704345149e-07, "loss": 0.33708107471466064, "step": 16691, "token_acc": 0.8775964391691394 }, { "epoch": 0.9006636810014569, "grad_norm": 0.4483816623687744, "learning_rate": 5.130900382282355e-07, "loss": 0.3658418655395508, "step": 16692, "token_acc": 0.8692468619246861 }, { "epoch": 0.9007176388064534, "grad_norm": 0.4199938476085663, "learning_rate": 5.125375957497003e-07, "loss": 0.2824784517288208, "step": 16693, "token_acc": 0.8992423021118814 }, { "epoch": 0.9007715966114499, "grad_norm": 0.4472711980342865, "learning_rate": 5.119854430157834e-07, "loss": 0.35926759243011475, "step": 16694, "token_acc": 0.8712121212121212 }, { "epoch": 0.9008255544164463, "grad_norm": 0.3260800242424011, "learning_rate": 5.114335800433423e-07, "loss": 0.2871112823486328, "step": 16695, "token_acc": 0.8940345368916798 }, { "epoch": 0.9008795122214428, "grad_norm": 0.3754001259803772, "learning_rate": 5.10882006849237e-07, "loss": 0.31507596373558044, "step": 16696, "token_acc": 0.8858784893267652 }, { "epoch": 0.9009334700264393, "grad_norm": 0.41399049758911133, "learning_rate": 5.103307234503097e-07, "loss": 0.3715749979019165, "step": 16697, "token_acc": 0.8677789738069609 }, { "epoch": 0.9009874278314358, "grad_norm": 0.3884093463420868, "learning_rate": 5.097797298633955e-07, "loss": 0.30413126945495605, "step": 16698, "token_acc": 0.8871346276936206 }, { "epoch": 0.9010413856364323, "grad_norm": 0.4352404773235321, "learning_rate": 5.092290261053212e-07, "loss": 0.33329200744628906, "step": 16699, "token_acc": 0.8822497976800647 }, { "epoch": 0.9010953434414288, "grad_norm": 0.4170960485935211, "learning_rate": 5.086786121929078e-07, "loss": 0.281471848487854, "step": 16700, "token_acc": 0.8959047811176114 }, { "epoch": 0.9011493012464253, "grad_norm": 0.3819330334663391, "learning_rate": 5.081284881429615e-07, "loss": 0.356733500957489, "step": 16701, "token_acc": 0.8748675847457628 }, { "epoch": 0.9012032590514217, "grad_norm": 0.4297604560852051, "learning_rate": 5.075786539722849e-07, "loss": 0.34593456983566284, "step": 16702, "token_acc": 0.8733843313109997 }, { "epoch": 0.9012572168564182, "grad_norm": 0.47542473673820496, "learning_rate": 5.070291096976698e-07, "loss": 0.3382372260093689, "step": 16703, "token_acc": 0.8794374892814268 }, { "epoch": 0.9013111746614147, "grad_norm": 0.35409530997276306, "learning_rate": 5.064798553358985e-07, "loss": 0.30241480469703674, "step": 16704, "token_acc": 0.8872971602434077 }, { "epoch": 0.9013651324664113, "grad_norm": 0.3316790461540222, "learning_rate": 5.059308909037464e-07, "loss": 0.3189306855201721, "step": 16705, "token_acc": 0.8825660377358491 }, { "epoch": 0.9014190902714078, "grad_norm": 0.37140005826950073, "learning_rate": 5.053822164179778e-07, "loss": 0.3096933960914612, "step": 16706, "token_acc": 0.8895465994962216 }, { "epoch": 0.9014730480764043, "grad_norm": 0.41398584842681885, "learning_rate": 5.048338318953483e-07, "loss": 0.28542646765708923, "step": 16707, "token_acc": 0.8951414514145142 }, { "epoch": 0.9015270058814008, "grad_norm": 0.3636208176612854, "learning_rate": 5.042857373526055e-07, "loss": 0.34575510025024414, "step": 16708, "token_acc": 0.8784893267651889 }, { "epoch": 0.9015809636863973, "grad_norm": 0.6042779088020325, "learning_rate": 5.037379328064895e-07, "loss": 0.3671470284461975, "step": 16709, "token_acc": 0.8720248993042842 }, { "epoch": 0.9016349214913937, "grad_norm": 0.4189889132976532, "learning_rate": 5.031904182737302e-07, "loss": 0.3356565833091736, "step": 16710, "token_acc": 0.8801001251564455 }, { "epoch": 0.9016888792963902, "grad_norm": 0.5234628915786743, "learning_rate": 5.026431937710475e-07, "loss": 0.33766141533851624, "step": 16711, "token_acc": 0.875977478886456 }, { "epoch": 0.9017428371013867, "grad_norm": 0.4665857255458832, "learning_rate": 5.020962593151534e-07, "loss": 0.3452342748641968, "step": 16712, "token_acc": 0.8755746700281773 }, { "epoch": 0.9017967949063832, "grad_norm": 0.414345383644104, "learning_rate": 5.015496149227516e-07, "loss": 0.3331776559352875, "step": 16713, "token_acc": 0.8839665340831162 }, { "epoch": 0.9018507527113797, "grad_norm": 0.48977646231651306, "learning_rate": 5.010032606105353e-07, "loss": 0.3914194107055664, "step": 16714, "token_acc": 0.8641459243777313 }, { "epoch": 0.9019047105163762, "grad_norm": 0.44585657119750977, "learning_rate": 5.004571963951921e-07, "loss": 0.3149993121623993, "step": 16715, "token_acc": 0.8847352024922118 }, { "epoch": 0.9019586683213727, "grad_norm": 0.47801458835601807, "learning_rate": 4.999114222933976e-07, "loss": 0.3481951355934143, "step": 16716, "token_acc": 0.8747769667477696 }, { "epoch": 0.9020126261263692, "grad_norm": 0.3742363452911377, "learning_rate": 4.993659383218197e-07, "loss": 0.30034011602401733, "step": 16717, "token_acc": 0.8892459543211293 }, { "epoch": 0.9020665839313656, "grad_norm": 0.47066769003868103, "learning_rate": 4.988207444971161e-07, "loss": 0.40503624081611633, "step": 16718, "token_acc": 0.8622047244094488 }, { "epoch": 0.9021205417363621, "grad_norm": 0.4083441197872162, "learning_rate": 4.982758408359389e-07, "loss": 0.3554818034172058, "step": 16719, "token_acc": 0.8764511792741049 }, { "epoch": 0.9021744995413586, "grad_norm": 0.3880927860736847, "learning_rate": 4.977312273549284e-07, "loss": 0.35770463943481445, "step": 16720, "token_acc": 0.8746131947023147 }, { "epoch": 0.9022284573463552, "grad_norm": 0.39370304346084595, "learning_rate": 4.971869040707156e-07, "loss": 0.3377189636230469, "step": 16721, "token_acc": 0.8825895663104966 }, { "epoch": 0.9022824151513517, "grad_norm": 0.37999120354652405, "learning_rate": 4.96642870999926e-07, "loss": 0.2681611478328705, "step": 16722, "token_acc": 0.9013937282229966 }, { "epoch": 0.9023363729563482, "grad_norm": 0.331649512052536, "learning_rate": 4.960991281591742e-07, "loss": 0.2973862588405609, "step": 16723, "token_acc": 0.8896291640477687 }, { "epoch": 0.9023903307613447, "grad_norm": 0.4556102454662323, "learning_rate": 4.955556755650658e-07, "loss": 0.2781745195388794, "step": 16724, "token_acc": 0.8953229398663697 }, { "epoch": 0.9024442885663411, "grad_norm": 0.3389820456504822, "learning_rate": 4.950125132341966e-07, "loss": 0.28739607334136963, "step": 16725, "token_acc": 0.8972646995477063 }, { "epoch": 0.9024982463713376, "grad_norm": 0.5590510964393616, "learning_rate": 4.944696411831551e-07, "loss": 0.37815797328948975, "step": 16726, "token_acc": 0.8697723663313923 }, { "epoch": 0.9025522041763341, "grad_norm": 0.4559192657470703, "learning_rate": 4.939270594285195e-07, "loss": 0.3098970353603363, "step": 16727, "token_acc": 0.8857183418512209 }, { "epoch": 0.9026061619813306, "grad_norm": 0.4457877576351166, "learning_rate": 4.933847679868631e-07, "loss": 0.34260323643684387, "step": 16728, "token_acc": 0.8784545967287084 }, { "epoch": 0.9026601197863271, "grad_norm": 0.43037083745002747, "learning_rate": 4.928427668747449e-07, "loss": 0.31929677724838257, "step": 16729, "token_acc": 0.8854494289024996 }, { "epoch": 0.9027140775913236, "grad_norm": 0.5260941386222839, "learning_rate": 4.923010561087182e-07, "loss": 0.3378514051437378, "step": 16730, "token_acc": 0.8759145029407546 }, { "epoch": 0.9027680353963201, "grad_norm": 0.3088833689689636, "learning_rate": 4.917596357053278e-07, "loss": 0.3516412079334259, "step": 16731, "token_acc": 0.8756899448044156 }, { "epoch": 0.9028219932013166, "grad_norm": 0.45339229702949524, "learning_rate": 4.912185056811058e-07, "loss": 0.3147470951080322, "step": 16732, "token_acc": 0.8854545454545455 }, { "epoch": 0.902875951006313, "grad_norm": 0.36817148327827454, "learning_rate": 4.906776660525803e-07, "loss": 0.321311891078949, "step": 16733, "token_acc": 0.8886029411764705 }, { "epoch": 0.9029299088113095, "grad_norm": 0.33440518379211426, "learning_rate": 4.901371168362668e-07, "loss": 0.3007141351699829, "step": 16734, "token_acc": 0.8928860964734495 }, { "epoch": 0.902983866616306, "grad_norm": 0.5374110341072083, "learning_rate": 4.895968580486743e-07, "loss": 0.3936708867549896, "step": 16735, "token_acc": 0.8665329424471532 }, { "epoch": 0.9030378244213025, "grad_norm": 0.39262235164642334, "learning_rate": 4.890568897063031e-07, "loss": 0.27460554242134094, "step": 16736, "token_acc": 0.8999073215940686 }, { "epoch": 0.903091782226299, "grad_norm": 0.44952595233917236, "learning_rate": 4.885172118256432e-07, "loss": 0.3618679642677307, "step": 16737, "token_acc": 0.8720300585700077 }, { "epoch": 0.9031457400312956, "grad_norm": 0.29082998633384705, "learning_rate": 4.879778244231759e-07, "loss": 0.3144626021385193, "step": 16738, "token_acc": 0.8858293284842187 }, { "epoch": 0.9031996978362921, "grad_norm": 0.3875560462474823, "learning_rate": 4.874387275153747e-07, "loss": 0.30087101459503174, "step": 16739, "token_acc": 0.8916375369723044 }, { "epoch": 0.9032536556412886, "grad_norm": 0.38325703144073486, "learning_rate": 4.868999211187008e-07, "loss": 0.3016168475151062, "step": 16740, "token_acc": 0.8931764004767581 }, { "epoch": 0.903307613446285, "grad_norm": 0.5176739692687988, "learning_rate": 4.863614052496124e-07, "loss": 0.3301561176776886, "step": 16741, "token_acc": 0.8836111520873285 }, { "epoch": 0.9033615712512815, "grad_norm": 0.43794065713882446, "learning_rate": 4.858231799245538e-07, "loss": 0.37424999475479126, "step": 16742, "token_acc": 0.8696601184908014 }, { "epoch": 0.903415529056278, "grad_norm": 0.3379276394844055, "learning_rate": 4.852852451599644e-07, "loss": 0.32318824529647827, "step": 16743, "token_acc": 0.8818554587391082 }, { "epoch": 0.9034694868612745, "grad_norm": 0.3930044174194336, "learning_rate": 4.847476009722707e-07, "loss": 0.32818013429641724, "step": 16744, "token_acc": 0.8811582269795526 }, { "epoch": 0.903523444666271, "grad_norm": 0.46731749176979065, "learning_rate": 4.842102473778921e-07, "loss": 0.3804442882537842, "step": 16745, "token_acc": 0.8679520888109845 }, { "epoch": 0.9035774024712675, "grad_norm": 0.360314279794693, "learning_rate": 4.836731843932396e-07, "loss": 0.34244924783706665, "step": 16746, "token_acc": 0.8762759394279305 }, { "epoch": 0.903631360276264, "grad_norm": 0.34738245606422424, "learning_rate": 4.83136412034716e-07, "loss": 0.31829214096069336, "step": 16747, "token_acc": 0.8838603017152304 }, { "epoch": 0.9036853180812604, "grad_norm": 0.4274705648422241, "learning_rate": 4.825999303187124e-07, "loss": 0.31533533334732056, "step": 16748, "token_acc": 0.886 }, { "epoch": 0.9037392758862569, "grad_norm": 0.5197226405143738, "learning_rate": 4.820637392616146e-07, "loss": 0.3283421993255615, "step": 16749, "token_acc": 0.8756446991404011 }, { "epoch": 0.9037932336912534, "grad_norm": 0.39435574412345886, "learning_rate": 4.815278388797961e-07, "loss": 0.3743234872817993, "step": 16750, "token_acc": 0.8696656534954408 }, { "epoch": 0.9038471914962499, "grad_norm": 0.4103398323059082, "learning_rate": 4.809922291896241e-07, "loss": 0.3336201012134552, "step": 16751, "token_acc": 0.8786484354457008 }, { "epoch": 0.9039011493012464, "grad_norm": 0.38630399107933044, "learning_rate": 4.804569102074552e-07, "loss": 0.3262442946434021, "step": 16752, "token_acc": 0.883213296398892 }, { "epoch": 0.9039551071062429, "grad_norm": 0.3341142535209656, "learning_rate": 4.799218819496377e-07, "loss": 0.3120686113834381, "step": 16753, "token_acc": 0.8894421298648512 }, { "epoch": 0.9040090649112394, "grad_norm": 0.3845611810684204, "learning_rate": 4.793871444325137e-07, "loss": 0.297285258769989, "step": 16754, "token_acc": 0.8910059960026648 }, { "epoch": 0.904063022716236, "grad_norm": 0.40203380584716797, "learning_rate": 4.788526976724117e-07, "loss": 0.3186951279640198, "step": 16755, "token_acc": 0.8833132610731969 }, { "epoch": 0.9041169805212323, "grad_norm": 0.38550999760627747, "learning_rate": 4.78318541685654e-07, "loss": 0.3197566270828247, "step": 16756, "token_acc": 0.8844750140370579 }, { "epoch": 0.9041709383262289, "grad_norm": 0.3645526170730591, "learning_rate": 4.777846764885541e-07, "loss": 0.2982511520385742, "step": 16757, "token_acc": 0.8907862877532815 }, { "epoch": 0.9042248961312254, "grad_norm": 0.3973656892776489, "learning_rate": 4.772511020974157e-07, "loss": 0.33492985367774963, "step": 16758, "token_acc": 0.8804455445544555 }, { "epoch": 0.9042788539362219, "grad_norm": 0.44843244552612305, "learning_rate": 4.767178185285337e-07, "loss": 0.3384286165237427, "step": 16759, "token_acc": 0.8786117285005984 }, { "epoch": 0.9043328117412184, "grad_norm": 0.37732142210006714, "learning_rate": 4.761848257981938e-07, "loss": 0.3860134780406952, "step": 16760, "token_acc": 0.8645712630359212 }, { "epoch": 0.9043867695462149, "grad_norm": 0.380671888589859, "learning_rate": 4.7565212392267635e-07, "loss": 0.31708472967147827, "step": 16761, "token_acc": 0.882372487124107 }, { "epoch": 0.9044407273512114, "grad_norm": 0.46263009309768677, "learning_rate": 4.751197129182472e-07, "loss": 0.3543860614299774, "step": 16762, "token_acc": 0.8754684347074084 }, { "epoch": 0.9044946851562079, "grad_norm": 0.5210853815078735, "learning_rate": 4.745875928011667e-07, "loss": 0.3193666338920593, "step": 16763, "token_acc": 0.8918209651552704 }, { "epoch": 0.9045486429612043, "grad_norm": 0.5118866562843323, "learning_rate": 4.7405576358768637e-07, "loss": 0.3531595766544342, "step": 16764, "token_acc": 0.8788272674754477 }, { "epoch": 0.9046026007662008, "grad_norm": 0.4661981761455536, "learning_rate": 4.735242252940464e-07, "loss": 0.3379552960395813, "step": 16765, "token_acc": 0.8825665859564165 }, { "epoch": 0.9046565585711973, "grad_norm": 0.3980582058429718, "learning_rate": 4.729929779364817e-07, "loss": 0.33578166365623474, "step": 16766, "token_acc": 0.8804490624626776 }, { "epoch": 0.9047105163761938, "grad_norm": 0.35537806153297424, "learning_rate": 4.724620215312159e-07, "loss": 0.27604052424430847, "step": 16767, "token_acc": 0.897022022022022 }, { "epoch": 0.9047644741811903, "grad_norm": 0.6378496885299683, "learning_rate": 4.7193135609446494e-07, "loss": 0.36118966341018677, "step": 16768, "token_acc": 0.870409982174688 }, { "epoch": 0.9048184319861868, "grad_norm": 0.506442129611969, "learning_rate": 4.714009816424325e-07, "loss": 0.32933181524276733, "step": 16769, "token_acc": 0.881350729086723 }, { "epoch": 0.9048723897911833, "grad_norm": 0.31288227438926697, "learning_rate": 4.70870898191319e-07, "loss": 0.30846723914146423, "step": 16770, "token_acc": 0.8904651162790698 }, { "epoch": 0.9049263475961797, "grad_norm": 0.4025534987449646, "learning_rate": 4.703411057573115e-07, "loss": 0.317572683095932, "step": 16771, "token_acc": 0.885014669374859 }, { "epoch": 0.9049803054011762, "grad_norm": 0.392659068107605, "learning_rate": 4.6981160435659144e-07, "loss": 0.30471453070640564, "step": 16772, "token_acc": 0.8848325178773052 }, { "epoch": 0.9050342632061728, "grad_norm": 0.4894638657569885, "learning_rate": 4.692823940053259e-07, "loss": 0.3236539959907532, "step": 16773, "token_acc": 0.8829978432782171 }, { "epoch": 0.9050882210111693, "grad_norm": 0.3962874114513397, "learning_rate": 4.687534747196809e-07, "loss": 0.37008240818977356, "step": 16774, "token_acc": 0.8712418300653595 }, { "epoch": 0.9051421788161658, "grad_norm": 0.29583320021629333, "learning_rate": 4.682248465158079e-07, "loss": 0.29137617349624634, "step": 16775, "token_acc": 0.8914151313210505 }, { "epoch": 0.9051961366211623, "grad_norm": 0.38743865489959717, "learning_rate": 4.676965094098507e-07, "loss": 0.30060693621635437, "step": 16776, "token_acc": 0.8955042527339003 }, { "epoch": 0.9052500944261588, "grad_norm": 0.3823617398738861, "learning_rate": 4.671684634179463e-07, "loss": 0.35829460620880127, "step": 16777, "token_acc": 0.8713637127963679 }, { "epoch": 0.9053040522311553, "grad_norm": 0.36155444383621216, "learning_rate": 4.6664070855621856e-07, "loss": 0.3067917823791504, "step": 16778, "token_acc": 0.8940478002740143 }, { "epoch": 0.9053580100361517, "grad_norm": 0.4502304494380951, "learning_rate": 4.661132448407868e-07, "loss": 0.27612078189849854, "step": 16779, "token_acc": 0.8957923008057296 }, { "epoch": 0.9054119678411482, "grad_norm": 0.40421417355537415, "learning_rate": 4.655860722877581e-07, "loss": 0.2628340721130371, "step": 16780, "token_acc": 0.9066476239937679 }, { "epoch": 0.9054659256461447, "grad_norm": 0.3458334505558014, "learning_rate": 4.6505919091323406e-07, "loss": 0.35669875144958496, "step": 16781, "token_acc": 0.8733003708281829 }, { "epoch": 0.9055198834511412, "grad_norm": 0.42182406783103943, "learning_rate": 4.6453260073330395e-07, "loss": 0.36169859766960144, "step": 16782, "token_acc": 0.8731082654249127 }, { "epoch": 0.9055738412561377, "grad_norm": 0.3928309977054596, "learning_rate": 4.6400630176405057e-07, "loss": 0.3500436246395111, "step": 16783, "token_acc": 0.8762433794083452 }, { "epoch": 0.9056277990611342, "grad_norm": 0.5475265979766846, "learning_rate": 4.634802940215466e-07, "loss": 0.3164266049861908, "step": 16784, "token_acc": 0.8851030110935024 }, { "epoch": 0.9056817568661307, "grad_norm": 0.40728265047073364, "learning_rate": 4.629545775218547e-07, "loss": 0.32744577527046204, "step": 16785, "token_acc": 0.8810177822755529 }, { "epoch": 0.9057357146711271, "grad_norm": 0.47593581676483154, "learning_rate": 4.6242915228103313e-07, "loss": 0.3428933024406433, "step": 16786, "token_acc": 0.8764141486467134 }, { "epoch": 0.9057896724761236, "grad_norm": 0.4742487370967865, "learning_rate": 4.619040183151269e-07, "loss": 0.32738733291625977, "step": 16787, "token_acc": 0.8822916666666667 }, { "epoch": 0.9058436302811201, "grad_norm": 0.45911601185798645, "learning_rate": 4.613791756401731e-07, "loss": 0.33121153712272644, "step": 16788, "token_acc": 0.8818726878428371 }, { "epoch": 0.9058975880861166, "grad_norm": 0.47152653336524963, "learning_rate": 4.6085462427220007e-07, "loss": 0.37998324632644653, "step": 16789, "token_acc": 0.866204454934356 }, { "epoch": 0.9059515458911132, "grad_norm": 0.43925562500953674, "learning_rate": 4.6033036422722833e-07, "loss": 0.3535703420639038, "step": 16790, "token_acc": 0.8739719680296536 }, { "epoch": 0.9060055036961097, "grad_norm": 0.43204352259635925, "learning_rate": 4.598063955212684e-07, "loss": 0.270156592130661, "step": 16791, "token_acc": 0.9019607843137255 }, { "epoch": 0.9060594615011062, "grad_norm": 0.5747743844985962, "learning_rate": 4.592827181703208e-07, "loss": 0.3032724857330322, "step": 16792, "token_acc": 0.8881999248402856 }, { "epoch": 0.9061134193061027, "grad_norm": 0.446929931640625, "learning_rate": 4.587593321903805e-07, "loss": 0.3267784118652344, "step": 16793, "token_acc": 0.8822343794989922 }, { "epoch": 0.9061673771110991, "grad_norm": 0.4846727252006531, "learning_rate": 4.582362375974314e-07, "loss": 0.32068219780921936, "step": 16794, "token_acc": 0.8783624002364765 }, { "epoch": 0.9062213349160956, "grad_norm": 0.31570500135421753, "learning_rate": 4.577134344074474e-07, "loss": 0.21670694649219513, "step": 16795, "token_acc": 0.9195841797108376 }, { "epoch": 0.9062752927210921, "grad_norm": 0.4924733638763428, "learning_rate": 4.5719092263639464e-07, "loss": 0.3490086793899536, "step": 16796, "token_acc": 0.8743076267575628 }, { "epoch": 0.9063292505260886, "grad_norm": 0.44826269149780273, "learning_rate": 4.5666870230023255e-07, "loss": 0.3822539150714874, "step": 16797, "token_acc": 0.8683751743375174 }, { "epoch": 0.9063832083310851, "grad_norm": 0.5035579800605774, "learning_rate": 4.561467734149072e-07, "loss": 0.35098105669021606, "step": 16798, "token_acc": 0.8737524950099801 }, { "epoch": 0.9064371661360816, "grad_norm": 0.539592981338501, "learning_rate": 4.556251359963604e-07, "loss": 0.36242178082466125, "step": 16799, "token_acc": 0.8769085768143261 }, { "epoch": 0.9064911239410781, "grad_norm": 0.42155593633651733, "learning_rate": 4.5510379006052043e-07, "loss": 0.34446755051612854, "step": 16800, "token_acc": 0.8761325394770904 }, { "epoch": 0.9065450817460746, "grad_norm": 0.453714519739151, "learning_rate": 4.545827356233112e-07, "loss": 0.402654767036438, "step": 16801, "token_acc": 0.8613375013955565 }, { "epoch": 0.906599039551071, "grad_norm": 0.4849966764450073, "learning_rate": 4.5406197270064457e-07, "loss": 0.3425835967063904, "step": 16802, "token_acc": 0.878286270691334 }, { "epoch": 0.9066529973560675, "grad_norm": 0.3470984697341919, "learning_rate": 4.5354150130842435e-07, "loss": 0.33975934982299805, "step": 16803, "token_acc": 0.8804284037558685 }, { "epoch": 0.906706955161064, "grad_norm": 0.47339218854904175, "learning_rate": 4.530213214625456e-07, "loss": 0.32926321029663086, "step": 16804, "token_acc": 0.8826383623957543 }, { "epoch": 0.9067609129660605, "grad_norm": 0.5198414325714111, "learning_rate": 4.525014331788946e-07, "loss": 0.3533766269683838, "step": 16805, "token_acc": 0.8696349065004453 }, { "epoch": 0.906814870771057, "grad_norm": 0.3983082175254822, "learning_rate": 4.519818364733486e-07, "loss": 0.3008480668067932, "step": 16806, "token_acc": 0.8927628103539356 }, { "epoch": 0.9068688285760536, "grad_norm": 0.3950343728065491, "learning_rate": 4.514625313617771e-07, "loss": 0.29811060428619385, "step": 16807, "token_acc": 0.8897614461401689 }, { "epoch": 0.9069227863810501, "grad_norm": 0.37241098284721375, "learning_rate": 4.5094351786003743e-07, "loss": 0.3335329294204712, "step": 16808, "token_acc": 0.8822921451538814 }, { "epoch": 0.9069767441860465, "grad_norm": 0.4370144009590149, "learning_rate": 4.5042479598398246e-07, "loss": 0.3269434869289398, "step": 16809, "token_acc": 0.8827586206896552 }, { "epoch": 0.907030701991043, "grad_norm": 0.4580751359462738, "learning_rate": 4.4990636574945177e-07, "loss": 0.3435880243778229, "step": 16810, "token_acc": 0.8788062414785639 }, { "epoch": 0.9070846597960395, "grad_norm": 0.41150182485580444, "learning_rate": 4.4938822717227825e-07, "loss": 0.31185752153396606, "step": 16811, "token_acc": 0.882183908045977 }, { "epoch": 0.907138617601036, "grad_norm": 0.39700424671173096, "learning_rate": 4.48870380268287e-07, "loss": 0.29869574308395386, "step": 16812, "token_acc": 0.8947002203409486 }, { "epoch": 0.9071925754060325, "grad_norm": 0.4242675304412842, "learning_rate": 4.4835282505329313e-07, "loss": 0.33900952339172363, "step": 16813, "token_acc": 0.8780797101449276 }, { "epoch": 0.907246533211029, "grad_norm": 0.3788343667984009, "learning_rate": 4.478355615431018e-07, "loss": 0.29325389862060547, "step": 16814, "token_acc": 0.8939210136136946 }, { "epoch": 0.9073004910160255, "grad_norm": 0.413129061460495, "learning_rate": 4.4731858975350926e-07, "loss": 0.26068049669265747, "step": 16815, "token_acc": 0.9037800687285223 }, { "epoch": 0.907354448821022, "grad_norm": 0.4759543538093567, "learning_rate": 4.46801909700304e-07, "loss": 0.37833112478256226, "step": 16816, "token_acc": 0.8600258314497902 }, { "epoch": 0.9074084066260184, "grad_norm": 0.49013856053352356, "learning_rate": 4.4628552139926673e-07, "loss": 0.3139243721961975, "step": 16817, "token_acc": 0.8879461673493271 }, { "epoch": 0.9074623644310149, "grad_norm": 0.4208061695098877, "learning_rate": 4.457694248661659e-07, "loss": 0.3328890800476074, "step": 16818, "token_acc": 0.8890436397400185 }, { "epoch": 0.9075163222360114, "grad_norm": 0.34239262342453003, "learning_rate": 4.4525362011676455e-07, "loss": 0.31777679920196533, "step": 16819, "token_acc": 0.8877719636463784 }, { "epoch": 0.9075702800410079, "grad_norm": 0.43492379784584045, "learning_rate": 4.447381071668144e-07, "loss": 0.3545506000518799, "step": 16820, "token_acc": 0.8765887658876589 }, { "epoch": 0.9076242378460044, "grad_norm": 0.4308512508869171, "learning_rate": 4.4422288603205964e-07, "loss": 0.27537477016448975, "step": 16821, "token_acc": 0.9007182087029996 }, { "epoch": 0.9076781956510009, "grad_norm": 0.47970736026763916, "learning_rate": 4.437079567282354e-07, "loss": 0.33450600504875183, "step": 16822, "token_acc": 0.8800934715933986 }, { "epoch": 0.9077321534559974, "grad_norm": 0.5123192071914673, "learning_rate": 4.431933192710658e-07, "loss": 0.32280683517456055, "step": 16823, "token_acc": 0.8918794922771066 }, { "epoch": 0.907786111260994, "grad_norm": 0.48224613070487976, "learning_rate": 4.426789736762682e-07, "loss": 0.34073692560195923, "step": 16824, "token_acc": 0.8855182926829268 }, { "epoch": 0.9078400690659904, "grad_norm": 0.44232094287872314, "learning_rate": 4.4216491995955126e-07, "loss": 0.3302169442176819, "step": 16825, "token_acc": 0.8808968377397616 }, { "epoch": 0.9078940268709869, "grad_norm": 0.5245869755744934, "learning_rate": 4.416511581366145e-07, "loss": 0.2869868874549866, "step": 16826, "token_acc": 0.8923045182587167 }, { "epoch": 0.9079479846759834, "grad_norm": 0.5096558332443237, "learning_rate": 4.4113768822314773e-07, "loss": 0.3549070358276367, "step": 16827, "token_acc": 0.877260190009194 }, { "epoch": 0.9080019424809799, "grad_norm": 0.3598628342151642, "learning_rate": 4.4062451023483166e-07, "loss": 0.29427555203437805, "step": 16828, "token_acc": 0.8892026354712922 }, { "epoch": 0.9080559002859764, "grad_norm": 0.3929942846298218, "learning_rate": 4.4011162418733823e-07, "loss": 0.31564176082611084, "step": 16829, "token_acc": 0.8827498408656906 }, { "epoch": 0.9081098580909729, "grad_norm": 0.4655036926269531, "learning_rate": 4.3959903009633265e-07, "loss": 0.30399030447006226, "step": 16830, "token_acc": 0.8902891030392883 }, { "epoch": 0.9081638158959694, "grad_norm": 0.3863582909107208, "learning_rate": 4.390867279774669e-07, "loss": 0.3095927834510803, "step": 16831, "token_acc": 0.8914457228614308 }, { "epoch": 0.9082177737009658, "grad_norm": 0.36714228987693787, "learning_rate": 4.385747178463884e-07, "loss": 0.29711323976516724, "step": 16832, "token_acc": 0.8938856015779093 }, { "epoch": 0.9082717315059623, "grad_norm": 0.45157697796821594, "learning_rate": 4.380629997187336e-07, "loss": 0.3960423171520233, "step": 16833, "token_acc": 0.8626930101278433 }, { "epoch": 0.9083256893109588, "grad_norm": 0.4382912516593933, "learning_rate": 4.375515736101299e-07, "loss": 0.26675671339035034, "step": 16834, "token_acc": 0.9053292545483819 }, { "epoch": 0.9083796471159553, "grad_norm": 0.33601632714271545, "learning_rate": 4.370404395361949e-07, "loss": 0.2985082268714905, "step": 16835, "token_acc": 0.8942241576896631 }, { "epoch": 0.9084336049209518, "grad_norm": 0.5600957870483398, "learning_rate": 4.365295975125417e-07, "loss": 0.34375542402267456, "step": 16836, "token_acc": 0.8839673913043479 }, { "epoch": 0.9084875627259483, "grad_norm": 0.48952293395996094, "learning_rate": 4.3601904755476655e-07, "loss": 0.32529932260513306, "step": 16837, "token_acc": 0.8792071802543007 }, { "epoch": 0.9085415205309448, "grad_norm": 0.4665674865245819, "learning_rate": 4.35508789678466e-07, "loss": 0.34959545731544495, "step": 16838, "token_acc": 0.8700751328568811 }, { "epoch": 0.9085954783359413, "grad_norm": 0.4417928159236908, "learning_rate": 4.3499882389922085e-07, "loss": 0.27511730790138245, "step": 16839, "token_acc": 0.9002120640904807 }, { "epoch": 0.9086494361409377, "grad_norm": 0.4582652449607849, "learning_rate": 4.3448915023260653e-07, "loss": 0.2789798974990845, "step": 16840, "token_acc": 0.8989296367783822 }, { "epoch": 0.9087033939459342, "grad_norm": 0.5131434202194214, "learning_rate": 4.339797686941882e-07, "loss": 0.3406559228897095, "step": 16841, "token_acc": 0.8743245598744989 }, { "epoch": 0.9087573517509308, "grad_norm": 0.43343615531921387, "learning_rate": 4.3347067929952134e-07, "loss": 0.3056410849094391, "step": 16842, "token_acc": 0.8885999726289859 }, { "epoch": 0.9088113095559273, "grad_norm": 0.5478089451789856, "learning_rate": 4.3296188206415235e-07, "loss": 0.3106071352958679, "step": 16843, "token_acc": 0.8870703764320785 }, { "epoch": 0.9088652673609238, "grad_norm": 0.5299346446990967, "learning_rate": 4.3245337700362324e-07, "loss": 0.37931132316589355, "step": 16844, "token_acc": 0.8621155288822205 }, { "epoch": 0.9089192251659203, "grad_norm": 0.31882932782173157, "learning_rate": 4.3194516413346045e-07, "loss": 0.2779952585697174, "step": 16845, "token_acc": 0.8910992968616018 }, { "epoch": 0.9089731829709168, "grad_norm": 0.4241712987422943, "learning_rate": 4.314372434691871e-07, "loss": 0.4057430624961853, "step": 16846, "token_acc": 0.8588501698326834 }, { "epoch": 0.9090271407759133, "grad_norm": 0.4765337109565735, "learning_rate": 4.309296150263131e-07, "loss": 0.28649279475212097, "step": 16847, "token_acc": 0.8972395085367799 }, { "epoch": 0.9090810985809097, "grad_norm": 0.46760594844818115, "learning_rate": 4.304222788203427e-07, "loss": 0.32438457012176514, "step": 16848, "token_acc": 0.8883124369505693 }, { "epoch": 0.9091350563859062, "grad_norm": 0.42960846424102783, "learning_rate": 4.299152348667679e-07, "loss": 0.29073217511177063, "step": 16849, "token_acc": 0.8968587053287549 }, { "epoch": 0.9091890141909027, "grad_norm": 0.43067115545272827, "learning_rate": 4.294084831810741e-07, "loss": 0.30382877588272095, "step": 16850, "token_acc": 0.8876500857632933 }, { "epoch": 0.9092429719958992, "grad_norm": 0.44847217202186584, "learning_rate": 4.289020237787389e-07, "loss": 0.3415968716144562, "step": 16851, "token_acc": 0.8782934131736527 }, { "epoch": 0.9092969298008957, "grad_norm": 0.3795437216758728, "learning_rate": 4.283958566752289e-07, "loss": 0.334361732006073, "step": 16852, "token_acc": 0.879479932165065 }, { "epoch": 0.9093508876058922, "grad_norm": 0.44662028551101685, "learning_rate": 4.2788998188600275e-07, "loss": 0.33432239294052124, "step": 16853, "token_acc": 0.8832514278124585 }, { "epoch": 0.9094048454108887, "grad_norm": 0.42296093702316284, "learning_rate": 4.273843994265081e-07, "loss": 0.2596032917499542, "step": 16854, "token_acc": 0.9045756080966151 }, { "epoch": 0.9094588032158851, "grad_norm": 0.35179632902145386, "learning_rate": 4.2687910931218603e-07, "loss": 0.27564534544944763, "step": 16855, "token_acc": 0.9003550295857988 }, { "epoch": 0.9095127610208816, "grad_norm": 0.3734000027179718, "learning_rate": 4.263741115584674e-07, "loss": 0.25174036622047424, "step": 16856, "token_acc": 0.9055846210001217 }, { "epoch": 0.9095667188258781, "grad_norm": 0.4613298177719116, "learning_rate": 4.2586940618077663e-07, "loss": 0.33173173666000366, "step": 16857, "token_acc": 0.8816784274193549 }, { "epoch": 0.9096206766308746, "grad_norm": 0.4313686192035675, "learning_rate": 4.2536499319452583e-07, "loss": 0.3438832759857178, "step": 16858, "token_acc": 0.8753566944412738 }, { "epoch": 0.9096746344358712, "grad_norm": 0.43220946192741394, "learning_rate": 4.2486087261511934e-07, "loss": 0.2718571424484253, "step": 16859, "token_acc": 0.8981780781663238 }, { "epoch": 0.9097285922408677, "grad_norm": 0.5819941759109497, "learning_rate": 4.2435704445795476e-07, "loss": 0.3703397512435913, "step": 16860, "token_acc": 0.8696581196581197 }, { "epoch": 0.9097825500458642, "grad_norm": 0.42292484641075134, "learning_rate": 4.238535087384166e-07, "loss": 0.3396027088165283, "step": 16861, "token_acc": 0.8841441655763055 }, { "epoch": 0.9098365078508607, "grad_norm": 0.41428765654563904, "learning_rate": 4.2335026547188465e-07, "loss": 0.26395320892333984, "step": 16862, "token_acc": 0.8996911071370509 }, { "epoch": 0.9098904656558571, "grad_norm": 0.4513436555862427, "learning_rate": 4.228473146737255e-07, "loss": 0.3282301425933838, "step": 16863, "token_acc": 0.880347420274825 }, { "epoch": 0.9099444234608536, "grad_norm": 0.46343785524368286, "learning_rate": 4.2234465635930143e-07, "loss": 0.30441975593566895, "step": 16864, "token_acc": 0.8898250162022034 }, { "epoch": 0.9099983812658501, "grad_norm": 0.4451487362384796, "learning_rate": 4.218422905439623e-07, "loss": 0.3205367922782898, "step": 16865, "token_acc": 0.8853974121996303 }, { "epoch": 0.9100523390708466, "grad_norm": 0.38150355219841003, "learning_rate": 4.213402172430503e-07, "loss": 0.26952221989631653, "step": 16866, "token_acc": 0.8990071362085014 }, { "epoch": 0.9101062968758431, "grad_norm": 0.49784979224205017, "learning_rate": 4.2083843647189984e-07, "loss": 0.35302895307540894, "step": 16867, "token_acc": 0.8808437153077587 }, { "epoch": 0.9101602546808396, "grad_norm": 0.3772526681423187, "learning_rate": 4.2033694824583306e-07, "loss": 0.2902991771697998, "step": 16868, "token_acc": 0.894697160090475 }, { "epoch": 0.9102142124858361, "grad_norm": 0.4041382968425751, "learning_rate": 4.198357525801655e-07, "loss": 0.32374709844589233, "step": 16869, "token_acc": 0.8763731473408892 }, { "epoch": 0.9102681702908326, "grad_norm": 0.4024786949157715, "learning_rate": 4.1933484949020496e-07, "loss": 0.3352452516555786, "step": 16870, "token_acc": 0.8777738640366326 }, { "epoch": 0.910322128095829, "grad_norm": 0.4748879671096802, "learning_rate": 4.188342389912492e-07, "loss": 0.28497377038002014, "step": 16871, "token_acc": 0.8920111092958667 }, { "epoch": 0.9103760859008255, "grad_norm": 0.4718224108219147, "learning_rate": 4.1833392109858595e-07, "loss": 0.31259024143218994, "step": 16872, "token_acc": 0.8842787682333874 }, { "epoch": 0.910430043705822, "grad_norm": 0.4835740923881531, "learning_rate": 4.1783389582749414e-07, "loss": 0.35902488231658936, "step": 16873, "token_acc": 0.8672277567846101 }, { "epoch": 0.9104840015108185, "grad_norm": 0.42314496636390686, "learning_rate": 4.173341631932448e-07, "loss": 0.31005769968032837, "step": 16874, "token_acc": 0.88533934252386 }, { "epoch": 0.910537959315815, "grad_norm": 0.4408542215824127, "learning_rate": 4.168347232111003e-07, "loss": 0.30437415838241577, "step": 16875, "token_acc": 0.8889729239146876 }, { "epoch": 0.9105919171208116, "grad_norm": 0.4140511453151703, "learning_rate": 4.163355758963117e-07, "loss": 0.32158035039901733, "step": 16876, "token_acc": 0.8820571756931476 }, { "epoch": 0.9106458749258081, "grad_norm": 0.37459796667099, "learning_rate": 4.158367212641246e-07, "loss": 0.2970387935638428, "step": 16877, "token_acc": 0.8886374464030491 }, { "epoch": 0.9106998327308045, "grad_norm": 0.392300009727478, "learning_rate": 4.153381593297734e-07, "loss": 0.29996567964553833, "step": 16878, "token_acc": 0.8883790294826202 }, { "epoch": 0.910753790535801, "grad_norm": 0.42273029685020447, "learning_rate": 4.148398901084849e-07, "loss": 0.32557305693626404, "step": 16879, "token_acc": 0.8834129062066841 }, { "epoch": 0.9108077483407975, "grad_norm": 0.3546808660030365, "learning_rate": 4.1434191361547473e-07, "loss": 0.27686989307403564, "step": 16880, "token_acc": 0.8988529718456726 }, { "epoch": 0.910861706145794, "grad_norm": 0.3955911695957184, "learning_rate": 4.1384422986595063e-07, "loss": 0.32855096459388733, "step": 16881, "token_acc": 0.8814720812182741 }, { "epoch": 0.9109156639507905, "grad_norm": 0.4583165943622589, "learning_rate": 4.1334683887511383e-07, "loss": 0.3350212574005127, "step": 16882, "token_acc": 0.8781356686952649 }, { "epoch": 0.910969621755787, "grad_norm": 0.39223241806030273, "learning_rate": 4.128497406581522e-07, "loss": 0.30659011006355286, "step": 16883, "token_acc": 0.8874934589220304 }, { "epoch": 0.9110235795607835, "grad_norm": 0.4129611551761627, "learning_rate": 4.12352935230248e-07, "loss": 0.32408708333969116, "step": 16884, "token_acc": 0.8812154696132597 }, { "epoch": 0.91107753736578, "grad_norm": 0.4841749370098114, "learning_rate": 4.1185642260657467e-07, "loss": 0.33593976497650146, "step": 16885, "token_acc": 0.8820080091533181 }, { "epoch": 0.9111314951707764, "grad_norm": 0.38617125153541565, "learning_rate": 4.113602028022934e-07, "loss": 0.35561251640319824, "step": 16886, "token_acc": 0.8733437663015128 }, { "epoch": 0.9111854529757729, "grad_norm": 0.4670533537864685, "learning_rate": 4.108642758325598e-07, "loss": 0.3077097535133362, "step": 16887, "token_acc": 0.8828708133971291 }, { "epoch": 0.9112394107807694, "grad_norm": 0.3936624228954315, "learning_rate": 4.1036864171251854e-07, "loss": 0.30667030811309814, "step": 16888, "token_acc": 0.8873381536909366 }, { "epoch": 0.9112933685857659, "grad_norm": 0.34750646352767944, "learning_rate": 4.0987330045730743e-07, "loss": 0.3544057011604309, "step": 16889, "token_acc": 0.8729085083659666 }, { "epoch": 0.9113473263907624, "grad_norm": 0.36655154824256897, "learning_rate": 4.093782520820533e-07, "loss": 0.2953113615512848, "step": 16890, "token_acc": 0.8938597615690296 }, { "epoch": 0.9114012841957589, "grad_norm": 0.4218200743198395, "learning_rate": 4.088834966018751e-07, "loss": 0.30121827125549316, "step": 16891, "token_acc": 0.8938540793099317 }, { "epoch": 0.9114552420007555, "grad_norm": 0.42774471640586853, "learning_rate": 4.08389034031883e-07, "loss": 0.31460604071617126, "step": 16892, "token_acc": 0.8862680288461539 }, { "epoch": 0.911509199805752, "grad_norm": 0.2917730510234833, "learning_rate": 4.0789486438717717e-07, "loss": 0.2855827212333679, "step": 16893, "token_acc": 0.8987606182982871 }, { "epoch": 0.9115631576107484, "grad_norm": 0.43428635597229004, "learning_rate": 4.0740098768284883e-07, "loss": 0.3836907744407654, "step": 16894, "token_acc": 0.8687417963622727 }, { "epoch": 0.9116171154157449, "grad_norm": 0.43198713660240173, "learning_rate": 4.0690740393398044e-07, "loss": 0.29551899433135986, "step": 16895, "token_acc": 0.8923218511701289 }, { "epoch": 0.9116710732207414, "grad_norm": 0.46136239171028137, "learning_rate": 4.064141131556487e-07, "loss": 0.3535929322242737, "step": 16896, "token_acc": 0.8790417155302396 }, { "epoch": 0.9117250310257379, "grad_norm": 0.3961119055747986, "learning_rate": 4.0592111536291613e-07, "loss": 0.2865848243236542, "step": 16897, "token_acc": 0.893057647542106 }, { "epoch": 0.9117789888307344, "grad_norm": 0.4554234445095062, "learning_rate": 4.054284105708395e-07, "loss": 0.3704286813735962, "step": 16898, "token_acc": 0.8716085271317829 }, { "epoch": 0.9118329466357309, "grad_norm": 0.37946513295173645, "learning_rate": 4.0493599879446567e-07, "loss": 0.3354887068271637, "step": 16899, "token_acc": 0.8845463333793675 }, { "epoch": 0.9118869044407274, "grad_norm": 0.3012152314186096, "learning_rate": 4.044438800488326e-07, "loss": 0.3166762590408325, "step": 16900, "token_acc": 0.8871304538960325 }, { "epoch": 0.9119408622457238, "grad_norm": 0.3487943410873413, "learning_rate": 4.039520543489683e-07, "loss": 0.28846168518066406, "step": 16901, "token_acc": 0.8975564987954572 }, { "epoch": 0.9119948200507203, "grad_norm": 0.4755217432975769, "learning_rate": 4.034605217098964e-07, "loss": 0.33453208208084106, "step": 16902, "token_acc": 0.8816608996539792 }, { "epoch": 0.9120487778557168, "grad_norm": 0.39721885323524475, "learning_rate": 4.029692821466258e-07, "loss": 0.3612252473831177, "step": 16903, "token_acc": 0.8743748511550369 }, { "epoch": 0.9121027356607133, "grad_norm": 0.41031691431999207, "learning_rate": 4.0247833567415906e-07, "loss": 0.38665661215782166, "step": 16904, "token_acc": 0.861271676300578 }, { "epoch": 0.9121566934657098, "grad_norm": 0.38030362129211426, "learning_rate": 4.0198768230748977e-07, "loss": 0.30737176537513733, "step": 16905, "token_acc": 0.8891218549580661 }, { "epoch": 0.9122106512707063, "grad_norm": 0.5877022743225098, "learning_rate": 4.014973220616025e-07, "loss": 0.3360203802585602, "step": 16906, "token_acc": 0.8828649138712602 }, { "epoch": 0.9122646090757028, "grad_norm": 0.5672212839126587, "learning_rate": 4.010072549514732e-07, "loss": 0.30166637897491455, "step": 16907, "token_acc": 0.8943707538013588 }, { "epoch": 0.9123185668806993, "grad_norm": 0.5099486112594604, "learning_rate": 4.005174809920653e-07, "loss": 0.30984270572662354, "step": 16908, "token_acc": 0.8917387414255891 }, { "epoch": 0.9123725246856957, "grad_norm": 0.5137808322906494, "learning_rate": 4.000280001983414e-07, "loss": 0.38883936405181885, "step": 16909, "token_acc": 0.8655949618826649 }, { "epoch": 0.9124264824906922, "grad_norm": 0.4398114085197449, "learning_rate": 3.9953881258524616e-07, "loss": 0.3321835398674011, "step": 16910, "token_acc": 0.8847523842353211 }, { "epoch": 0.9124804402956888, "grad_norm": 0.523901641368866, "learning_rate": 3.99049918167721e-07, "loss": 0.30362027883529663, "step": 16911, "token_acc": 0.8865270896595813 }, { "epoch": 0.9125343981006853, "grad_norm": 0.42332226037979126, "learning_rate": 3.985613169606972e-07, "loss": 0.3120485544204712, "step": 16912, "token_acc": 0.8863049095607235 }, { "epoch": 0.9125883559056818, "grad_norm": 0.3807975947856903, "learning_rate": 3.980730089790941e-07, "loss": 0.3365069627761841, "step": 16913, "token_acc": 0.8829611248966087 }, { "epoch": 0.9126423137106783, "grad_norm": 0.4662613272666931, "learning_rate": 3.9758499423782737e-07, "loss": 0.295396089553833, "step": 16914, "token_acc": 0.8918340467566678 }, { "epoch": 0.9126962715156748, "grad_norm": 0.3940182328224182, "learning_rate": 3.970972727517985e-07, "loss": 0.27019745111465454, "step": 16915, "token_acc": 0.9002102539220443 }, { "epoch": 0.9127502293206712, "grad_norm": 0.422883003950119, "learning_rate": 3.9660984453590347e-07, "loss": 0.28736692667007446, "step": 16916, "token_acc": 0.8926737418692229 }, { "epoch": 0.9128041871256677, "grad_norm": 0.4212011396884918, "learning_rate": 3.9612270960502797e-07, "loss": 0.35423827171325684, "step": 16917, "token_acc": 0.8742668070386524 }, { "epoch": 0.9128581449306642, "grad_norm": 0.4304828941822052, "learning_rate": 3.956358679740491e-07, "loss": 0.3260812759399414, "step": 16918, "token_acc": 0.8831897676846776 }, { "epoch": 0.9129121027356607, "grad_norm": 0.4288787543773651, "learning_rate": 3.9514931965783486e-07, "loss": 0.3338276445865631, "step": 16919, "token_acc": 0.8781850388336285 }, { "epoch": 0.9129660605406572, "grad_norm": 0.41475042700767517, "learning_rate": 3.946630646712435e-07, "loss": 0.3818616569042206, "step": 16920, "token_acc": 0.8634083601286173 }, { "epoch": 0.9130200183456537, "grad_norm": 0.546002209186554, "learning_rate": 3.9417710302912637e-07, "loss": 0.33876538276672363, "step": 16921, "token_acc": 0.8847638057218895 }, { "epoch": 0.9130739761506502, "grad_norm": 0.5092203617095947, "learning_rate": 3.936914347463239e-07, "loss": 0.40140587091445923, "step": 16922, "token_acc": 0.8599517950019028 }, { "epoch": 0.9131279339556467, "grad_norm": 0.4426238536834717, "learning_rate": 3.932060598376686e-07, "loss": 0.33708178997039795, "step": 16923, "token_acc": 0.880257906048511 }, { "epoch": 0.9131818917606431, "grad_norm": 0.5351930856704712, "learning_rate": 3.9272097831798415e-07, "loss": 0.33732128143310547, "step": 16924, "token_acc": 0.8816078365141023 }, { "epoch": 0.9132358495656396, "grad_norm": 0.5041447281837463, "learning_rate": 3.9223619020208435e-07, "loss": 0.2540416717529297, "step": 16925, "token_acc": 0.9031497750160703 }, { "epoch": 0.9132898073706361, "grad_norm": 0.44305911660194397, "learning_rate": 3.91751695504774e-07, "loss": 0.3587839603424072, "step": 16926, "token_acc": 0.8739319048245038 }, { "epoch": 0.9133437651756326, "grad_norm": 0.5014483332633972, "learning_rate": 3.91267494240849e-07, "loss": 0.3947277069091797, "step": 16927, "token_acc": 0.8609625668449198 }, { "epoch": 0.9133977229806292, "grad_norm": 0.5244056582450867, "learning_rate": 3.907835864250986e-07, "loss": 0.3249794840812683, "step": 16928, "token_acc": 0.8844460102166637 }, { "epoch": 0.9134516807856257, "grad_norm": 0.4096220135688782, "learning_rate": 3.902999720723e-07, "loss": 0.3576568067073822, "step": 16929, "token_acc": 0.8725061492210987 }, { "epoch": 0.9135056385906222, "grad_norm": 0.5063053965568542, "learning_rate": 3.8981665119722453e-07, "loss": 0.3162705600261688, "step": 16930, "token_acc": 0.8858503881439661 }, { "epoch": 0.9135595963956187, "grad_norm": 0.45450296998023987, "learning_rate": 3.8933362381462946e-07, "loss": 0.3084160089492798, "step": 16931, "token_acc": 0.8877608573040046 }, { "epoch": 0.9136135542006151, "grad_norm": 0.43323802947998047, "learning_rate": 3.8885088993926736e-07, "loss": 0.339687705039978, "step": 16932, "token_acc": 0.8781453208934125 }, { "epoch": 0.9136675120056116, "grad_norm": 0.5783352255821228, "learning_rate": 3.8836844958588085e-07, "loss": 0.3418492078781128, "step": 16933, "token_acc": 0.8813171080887616 }, { "epoch": 0.9137214698106081, "grad_norm": 0.37593963742256165, "learning_rate": 3.878863027692048e-07, "loss": 0.3202015161514282, "step": 16934, "token_acc": 0.8827138643067847 }, { "epoch": 0.9137754276156046, "grad_norm": 0.3989730775356293, "learning_rate": 3.8740444950396415e-07, "loss": 0.33169126510620117, "step": 16935, "token_acc": 0.8841951930080116 }, { "epoch": 0.9138293854206011, "grad_norm": 0.489712655544281, "learning_rate": 3.869228898048727e-07, "loss": 0.3130967617034912, "step": 16936, "token_acc": 0.8868274582560297 }, { "epoch": 0.9138833432255976, "grad_norm": 0.4612366557121277, "learning_rate": 3.864416236866375e-07, "loss": 0.3672814965248108, "step": 16937, "token_acc": 0.8689827075868516 }, { "epoch": 0.9139373010305941, "grad_norm": 0.4099666476249695, "learning_rate": 3.8596065116395686e-07, "loss": 0.2924377918243408, "step": 16938, "token_acc": 0.8944281524926686 }, { "epoch": 0.9139912588355905, "grad_norm": 0.4601248800754547, "learning_rate": 3.85479972251519e-07, "loss": 0.29849475622177124, "step": 16939, "token_acc": 0.8940942646223736 }, { "epoch": 0.914045216640587, "grad_norm": 0.3643473982810974, "learning_rate": 3.8499958696400333e-07, "loss": 0.2927039861679077, "step": 16940, "token_acc": 0.8915454769534139 }, { "epoch": 0.9140991744455835, "grad_norm": 0.47438809275627136, "learning_rate": 3.8451949531608355e-07, "loss": 0.3150949478149414, "step": 16941, "token_acc": 0.8828486450760079 }, { "epoch": 0.91415313225058, "grad_norm": 0.4525342881679535, "learning_rate": 3.8403969732241806e-07, "loss": 0.37912750244140625, "step": 16942, "token_acc": 0.8687129223626515 }, { "epoch": 0.9142070900555765, "grad_norm": 0.5308433175086975, "learning_rate": 3.835601929976618e-07, "loss": 0.29605114459991455, "step": 16943, "token_acc": 0.8980711933511055 }, { "epoch": 0.914261047860573, "grad_norm": 0.3412237763404846, "learning_rate": 3.8308098235645854e-07, "loss": 0.32456594705581665, "step": 16944, "token_acc": 0.882312925170068 }, { "epoch": 0.9143150056655696, "grad_norm": 0.43621665239334106, "learning_rate": 3.8260206541344215e-07, "loss": 0.3157412111759186, "step": 16945, "token_acc": 0.8875506405343261 }, { "epoch": 0.9143689634705661, "grad_norm": 0.4659772515296936, "learning_rate": 3.8212344218323983e-07, "loss": 0.3277015686035156, "step": 16946, "token_acc": 0.8851045168834966 }, { "epoch": 0.9144229212755625, "grad_norm": 0.3094382882118225, "learning_rate": 3.8164511268046766e-07, "loss": 0.3101440370082855, "step": 16947, "token_acc": 0.8893680097021964 }, { "epoch": 0.914476879080559, "grad_norm": 0.4439745843410492, "learning_rate": 3.81167076919734e-07, "loss": 0.30688443779945374, "step": 16948, "token_acc": 0.8832659660468877 }, { "epoch": 0.9145308368855555, "grad_norm": 0.3487994968891144, "learning_rate": 3.806893349156382e-07, "loss": 0.31544631719589233, "step": 16949, "token_acc": 0.8840359364201796 }, { "epoch": 0.914584794690552, "grad_norm": 0.5200266242027283, "learning_rate": 3.8021188668276975e-07, "loss": 0.3223474621772766, "step": 16950, "token_acc": 0.8792291220556745 }, { "epoch": 0.9146387524955485, "grad_norm": 0.4186384677886963, "learning_rate": 3.7973473223571036e-07, "loss": 0.34162792563438416, "step": 16951, "token_acc": 0.8797684429271189 }, { "epoch": 0.914692710300545, "grad_norm": 0.38322511315345764, "learning_rate": 3.792578715890327e-07, "loss": 0.35460782051086426, "step": 16952, "token_acc": 0.8762597984322509 }, { "epoch": 0.9147466681055415, "grad_norm": 0.36828625202178955, "learning_rate": 3.7878130475729746e-07, "loss": 0.3219884932041168, "step": 16953, "token_acc": 0.8839982334756367 }, { "epoch": 0.914800625910538, "grad_norm": 0.3975462317466736, "learning_rate": 3.7830503175506294e-07, "loss": 0.28902146220207214, "step": 16954, "token_acc": 0.8914864601597506 }, { "epoch": 0.9148545837155344, "grad_norm": 0.4039774537086487, "learning_rate": 3.7782905259687086e-07, "loss": 0.28338444232940674, "step": 16955, "token_acc": 0.898943661971831 }, { "epoch": 0.9149085415205309, "grad_norm": 0.3888965845108032, "learning_rate": 3.7735336729725955e-07, "loss": 0.3031052052974701, "step": 16956, "token_acc": 0.8901256161551916 }, { "epoch": 0.9149624993255274, "grad_norm": 0.5334196090698242, "learning_rate": 3.7687797587075525e-07, "loss": 0.35642367601394653, "step": 16957, "token_acc": 0.8698511595707857 }, { "epoch": 0.9150164571305239, "grad_norm": 0.3723110854625702, "learning_rate": 3.764028783318774e-07, "loss": 0.24853679537773132, "step": 16958, "token_acc": 0.9095929112623603 }, { "epoch": 0.9150704149355204, "grad_norm": 0.44036757946014404, "learning_rate": 3.7592807469513326e-07, "loss": 0.34956759214401245, "step": 16959, "token_acc": 0.8756536156102538 }, { "epoch": 0.915124372740517, "grad_norm": 0.4624933898448944, "learning_rate": 3.754535649750257e-07, "loss": 0.3399967551231384, "step": 16960, "token_acc": 0.8794773928361714 }, { "epoch": 0.9151783305455135, "grad_norm": 0.3245312571525574, "learning_rate": 3.749793491860443e-07, "loss": 0.3096764385700226, "step": 16961, "token_acc": 0.886352232814852 }, { "epoch": 0.9152322883505098, "grad_norm": 0.5836255550384521, "learning_rate": 3.745054273426729e-07, "loss": 0.3792753219604492, "step": 16962, "token_acc": 0.870089530422072 }, { "epoch": 0.9152862461555064, "grad_norm": 0.3314902186393738, "learning_rate": 3.740317994593834e-07, "loss": 0.30017033219337463, "step": 16963, "token_acc": 0.8892965385228904 }, { "epoch": 0.9153402039605029, "grad_norm": 0.4144594073295593, "learning_rate": 3.735584655506419e-07, "loss": 0.3361363708972931, "step": 16964, "token_acc": 0.8819510144013455 }, { "epoch": 0.9153941617654994, "grad_norm": 0.40902629494667053, "learning_rate": 3.7308542563090353e-07, "loss": 0.3459973633289337, "step": 16965, "token_acc": 0.8781490320869796 }, { "epoch": 0.9154481195704959, "grad_norm": 0.5017129778862, "learning_rate": 3.7261267971461346e-07, "loss": 0.34844037890434265, "step": 16966, "token_acc": 0.8782877772047447 }, { "epoch": 0.9155020773754924, "grad_norm": 0.4807831645011902, "learning_rate": 3.721402278162101e-07, "loss": 0.3695014417171478, "step": 16967, "token_acc": 0.8733832539142273 }, { "epoch": 0.9155560351804889, "grad_norm": 0.5129214525222778, "learning_rate": 3.716680699501218e-07, "loss": 0.31471550464630127, "step": 16968, "token_acc": 0.8850446428571429 }, { "epoch": 0.9156099929854854, "grad_norm": 0.3560391664505005, "learning_rate": 3.711962061307683e-07, "loss": 0.35264351963996887, "step": 16969, "token_acc": 0.8779342723004695 }, { "epoch": 0.9156639507904818, "grad_norm": 0.3305528163909912, "learning_rate": 3.707246363725614e-07, "loss": 0.28955113887786865, "step": 16970, "token_acc": 0.8964264792032806 }, { "epoch": 0.9157179085954783, "grad_norm": 0.44000276923179626, "learning_rate": 3.702533606899006e-07, "loss": 0.36326900124549866, "step": 16971, "token_acc": 0.8744029947076287 }, { "epoch": 0.9157718664004748, "grad_norm": 0.4725440442562103, "learning_rate": 3.6978237909717884e-07, "loss": 0.2906379699707031, "step": 16972, "token_acc": 0.8930489988604916 }, { "epoch": 0.9158258242054713, "grad_norm": 0.44549983739852905, "learning_rate": 3.6931169160878133e-07, "loss": 0.2994944155216217, "step": 16973, "token_acc": 0.892404140545269 }, { "epoch": 0.9158797820104678, "grad_norm": 0.37410691380500793, "learning_rate": 3.688412982390821e-07, "loss": 0.2926103174686432, "step": 16974, "token_acc": 0.8924826465810073 }, { "epoch": 0.9159337398154643, "grad_norm": 0.3524976372718811, "learning_rate": 3.6837119900244635e-07, "loss": 0.2939718961715698, "step": 16975, "token_acc": 0.8913846608541451 }, { "epoch": 0.9159876976204608, "grad_norm": 0.5074458122253418, "learning_rate": 3.679013939132314e-07, "loss": 0.34229540824890137, "step": 16976, "token_acc": 0.8760961351088016 }, { "epoch": 0.9160416554254573, "grad_norm": 0.3875250220298767, "learning_rate": 3.674318829857837e-07, "loss": 0.3664500117301941, "step": 16977, "token_acc": 0.8704205274411975 }, { "epoch": 0.9160956132304537, "grad_norm": 0.43291914463043213, "learning_rate": 3.669626662344439e-07, "loss": 0.3292914032936096, "step": 16978, "token_acc": 0.87943848059455 }, { "epoch": 0.9161495710354502, "grad_norm": 0.2879415452480316, "learning_rate": 3.664937436735405e-07, "loss": 0.2586347460746765, "step": 16979, "token_acc": 0.9021739130434783 }, { "epoch": 0.9162035288404468, "grad_norm": 0.47217831015586853, "learning_rate": 3.660251153173944e-07, "loss": 0.29407215118408203, "step": 16980, "token_acc": 0.893197502464673 }, { "epoch": 0.9162574866454433, "grad_norm": 0.46217355132102966, "learning_rate": 3.6555678118031735e-07, "loss": 0.37041735649108887, "step": 16981, "token_acc": 0.871244635193133 }, { "epoch": 0.9163114444504398, "grad_norm": 0.47435203194618225, "learning_rate": 3.650887412766135e-07, "loss": 0.32974687218666077, "step": 16982, "token_acc": 0.884271014299078 }, { "epoch": 0.9163654022554363, "grad_norm": 0.6004189848899841, "learning_rate": 3.646209956205737e-07, "loss": 0.38675493001937866, "step": 16983, "token_acc": 0.865076660988075 }, { "epoch": 0.9164193600604328, "grad_norm": 0.3978959321975708, "learning_rate": 3.641535442264854e-07, "loss": 0.3021342158317566, "step": 16984, "token_acc": 0.8898334290637565 }, { "epoch": 0.9164733178654292, "grad_norm": 0.372388631105423, "learning_rate": 3.636863871086227e-07, "loss": 0.33631235361099243, "step": 16985, "token_acc": 0.8825656132833423 }, { "epoch": 0.9165272756704257, "grad_norm": 0.461955189704895, "learning_rate": 3.632195242812542e-07, "loss": 0.32777392864227295, "step": 16986, "token_acc": 0.8786747550163323 }, { "epoch": 0.9165812334754222, "grad_norm": 0.3382079005241394, "learning_rate": 3.6275295575863734e-07, "loss": 0.2932587265968323, "step": 16987, "token_acc": 0.8897838066977533 }, { "epoch": 0.9166351912804187, "grad_norm": 0.3135763108730316, "learning_rate": 3.622866815550197e-07, "loss": 0.2875620722770691, "step": 16988, "token_acc": 0.8951451116758058 }, { "epoch": 0.9166891490854152, "grad_norm": 0.37396669387817383, "learning_rate": 3.618207016846431e-07, "loss": 0.2571621537208557, "step": 16989, "token_acc": 0.9056430446194226 }, { "epoch": 0.9167431068904117, "grad_norm": 0.483986496925354, "learning_rate": 3.613550161617374e-07, "loss": 0.35238099098205566, "step": 16990, "token_acc": 0.8755514985546935 }, { "epoch": 0.9167970646954082, "grad_norm": 0.322012722492218, "learning_rate": 3.6088962500052226e-07, "loss": 0.30711686611175537, "step": 16991, "token_acc": 0.8893720831565549 }, { "epoch": 0.9168510225004047, "grad_norm": 0.3610701262950897, "learning_rate": 3.6042452821521525e-07, "loss": 0.3209025263786316, "step": 16992, "token_acc": 0.8870007442322004 }, { "epoch": 0.9169049803054011, "grad_norm": 0.3884536921977997, "learning_rate": 3.5995972582001716e-07, "loss": 0.2845671474933624, "step": 16993, "token_acc": 0.8950632295719845 }, { "epoch": 0.9169589381103976, "grad_norm": 0.37247154116630554, "learning_rate": 3.5949521782912336e-07, "loss": 0.3229176998138428, "step": 16994, "token_acc": 0.8836792211119652 }, { "epoch": 0.9170128959153941, "grad_norm": 0.44828543066978455, "learning_rate": 3.5903100425672133e-07, "loss": 0.3429884910583496, "step": 16995, "token_acc": 0.8761366767704601 }, { "epoch": 0.9170668537203907, "grad_norm": 0.29929521679878235, "learning_rate": 3.5856708511698645e-07, "loss": 0.2610282301902771, "step": 16996, "token_acc": 0.8999723298284449 }, { "epoch": 0.9171208115253872, "grad_norm": 0.4032593369483948, "learning_rate": 3.581034604240863e-07, "loss": 0.3028726279735565, "step": 16997, "token_acc": 0.8887017362010884 }, { "epoch": 0.9171747693303837, "grad_norm": 0.4604637622833252, "learning_rate": 3.576401301921817e-07, "loss": 0.33803731203079224, "step": 16998, "token_acc": 0.8794102159031069 }, { "epoch": 0.9172287271353802, "grad_norm": 0.40837976336479187, "learning_rate": 3.5717709443542027e-07, "loss": 0.3068736791610718, "step": 16999, "token_acc": 0.889167862266858 }, { "epoch": 0.9172826849403767, "grad_norm": 0.47328799962997437, "learning_rate": 3.567143531679451e-07, "loss": 0.3077579140663147, "step": 17000, "token_acc": 0.8898508012525327 }, { "epoch": 0.9173366427453731, "grad_norm": 0.42039769887924194, "learning_rate": 3.562519064038872e-07, "loss": 0.30069005489349365, "step": 17001, "token_acc": 0.8933758978451716 }, { "epoch": 0.9173906005503696, "grad_norm": 0.4044521749019623, "learning_rate": 3.557897541573707e-07, "loss": 0.299492210149765, "step": 17002, "token_acc": 0.8906815020862309 }, { "epoch": 0.9174445583553661, "grad_norm": 0.5349386930465698, "learning_rate": 3.553278964425078e-07, "loss": 0.3918897807598114, "step": 17003, "token_acc": 0.8716592427616926 }, { "epoch": 0.9174985161603626, "grad_norm": 0.3944210112094879, "learning_rate": 3.548663332734026e-07, "loss": 0.3080955147743225, "step": 17004, "token_acc": 0.8871222235708217 }, { "epoch": 0.9175524739653591, "grad_norm": 0.3990628123283386, "learning_rate": 3.5440506466415504e-07, "loss": 0.2841300666332245, "step": 17005, "token_acc": 0.8962703962703963 }, { "epoch": 0.9176064317703556, "grad_norm": 0.4464826285839081, "learning_rate": 3.539440906288494e-07, "loss": 0.3402599096298218, "step": 17006, "token_acc": 0.878936143520261 }, { "epoch": 0.9176603895753521, "grad_norm": 0.362371563911438, "learning_rate": 3.5348341118156437e-07, "loss": 0.29160594940185547, "step": 17007, "token_acc": 0.8902418682235196 }, { "epoch": 0.9177143473803485, "grad_norm": 0.37819361686706543, "learning_rate": 3.5302302633636985e-07, "loss": 0.29804378747940063, "step": 17008, "token_acc": 0.8908970976253299 }, { "epoch": 0.917768305185345, "grad_norm": 0.4216144382953644, "learning_rate": 3.525629361073246e-07, "loss": 0.3509649634361267, "step": 17009, "token_acc": 0.8739404869251578 }, { "epoch": 0.9178222629903415, "grad_norm": 0.4647790193557739, "learning_rate": 3.521031405084796e-07, "loss": 0.33114224672317505, "step": 17010, "token_acc": 0.8764643237486688 }, { "epoch": 0.917876220795338, "grad_norm": 0.47910261154174805, "learning_rate": 3.516436395538758e-07, "loss": 0.3030455708503723, "step": 17011, "token_acc": 0.8918260869565218 }, { "epoch": 0.9179301786003345, "grad_norm": 0.4645146131515503, "learning_rate": 3.5118443325755094e-07, "loss": 0.3136822283267975, "step": 17012, "token_acc": 0.8856747191875673 }, { "epoch": 0.917984136405331, "grad_norm": 0.5051515102386475, "learning_rate": 3.507255216335237e-07, "loss": 0.2878049612045288, "step": 17013, "token_acc": 0.8928125557339041 }, { "epoch": 0.9180380942103276, "grad_norm": 0.3878304958343506, "learning_rate": 3.502669046958118e-07, "loss": 0.27349698543548584, "step": 17014, "token_acc": 0.8941192904036876 }, { "epoch": 0.9180920520153241, "grad_norm": 0.5196622014045715, "learning_rate": 3.4980858245842077e-07, "loss": 0.32395654916763306, "step": 17015, "token_acc": 0.8833199033037873 }, { "epoch": 0.9181460098203205, "grad_norm": 0.4882935881614685, "learning_rate": 3.493505549353471e-07, "loss": 0.3748137056827545, "step": 17016, "token_acc": 0.8675278274014017 }, { "epoch": 0.918199967625317, "grad_norm": 0.4186171591281891, "learning_rate": 3.4889282214057853e-07, "loss": 0.2652386426925659, "step": 17017, "token_acc": 0.902461257976299 }, { "epoch": 0.9182539254303135, "grad_norm": 0.38950851559638977, "learning_rate": 3.484353840880961e-07, "loss": 0.3286370635032654, "step": 17018, "token_acc": 0.8800050524188455 }, { "epoch": 0.91830788323531, "grad_norm": 0.4250234067440033, "learning_rate": 3.479782407918686e-07, "loss": 0.354580819606781, "step": 17019, "token_acc": 0.8746089676746611 }, { "epoch": 0.9183618410403065, "grad_norm": 0.4323573112487793, "learning_rate": 3.4752139226585823e-07, "loss": 0.3012468218803406, "step": 17020, "token_acc": 0.8892345986309894 }, { "epoch": 0.918415798845303, "grad_norm": 0.420742928981781, "learning_rate": 3.4706483852401493e-07, "loss": 0.3438441753387451, "step": 17021, "token_acc": 0.8757782839787396 }, { "epoch": 0.9184697566502995, "grad_norm": 0.341262549161911, "learning_rate": 3.4660857958028203e-07, "loss": 0.2817721962928772, "step": 17022, "token_acc": 0.8965576005453306 }, { "epoch": 0.9185237144552959, "grad_norm": 0.37466397881507874, "learning_rate": 3.46152615448595e-07, "loss": 0.28004878759384155, "step": 17023, "token_acc": 0.8981683104905853 }, { "epoch": 0.9185776722602924, "grad_norm": 0.374493807554245, "learning_rate": 3.4569694614287716e-07, "loss": 0.33955222368240356, "step": 17024, "token_acc": 0.8787781350482315 }, { "epoch": 0.9186316300652889, "grad_norm": 0.4198193848133087, "learning_rate": 3.452415716770463e-07, "loss": 0.2656998634338379, "step": 17025, "token_acc": 0.903938381049266 }, { "epoch": 0.9186855878702854, "grad_norm": 0.4379311501979828, "learning_rate": 3.44786492065009e-07, "loss": 0.2944510281085968, "step": 17026, "token_acc": 0.8961432506887053 }, { "epoch": 0.9187395456752819, "grad_norm": 0.4264265298843384, "learning_rate": 3.4433170732066313e-07, "loss": 0.3376004695892334, "step": 17027, "token_acc": 0.8759517394869392 }, { "epoch": 0.9187935034802784, "grad_norm": 0.5113639235496521, "learning_rate": 3.438772174578975e-07, "loss": 0.3209138810634613, "step": 17028, "token_acc": 0.8872810357958872 }, { "epoch": 0.918847461285275, "grad_norm": 0.3283137381076813, "learning_rate": 3.43423022490591e-07, "loss": 0.3053204119205475, "step": 17029, "token_acc": 0.8907764660716 }, { "epoch": 0.9189014190902715, "grad_norm": 0.2990175783634186, "learning_rate": 3.4296912243261705e-07, "loss": 0.30647504329681396, "step": 17030, "token_acc": 0.8860249205961398 }, { "epoch": 0.9189553768952679, "grad_norm": 0.4724939465522766, "learning_rate": 3.425155172978356e-07, "loss": 0.2892330288887024, "step": 17031, "token_acc": 0.8960338852522141 }, { "epoch": 0.9190093347002644, "grad_norm": 0.49813902378082275, "learning_rate": 3.4206220710010007e-07, "loss": 0.33499544858932495, "step": 17032, "token_acc": 0.8791469194312796 }, { "epoch": 0.9190632925052609, "grad_norm": 0.4953281879425049, "learning_rate": 3.416091918532549e-07, "loss": 0.3624281883239746, "step": 17033, "token_acc": 0.8734066701588965 }, { "epoch": 0.9191172503102574, "grad_norm": 0.47496482729911804, "learning_rate": 3.4115647157113576e-07, "loss": 0.30992600321769714, "step": 17034, "token_acc": 0.8861313868613139 }, { "epoch": 0.9191712081152539, "grad_norm": 0.3395952582359314, "learning_rate": 3.4070404626756594e-07, "loss": 0.32008397579193115, "step": 17035, "token_acc": 0.8830169317598768 }, { "epoch": 0.9192251659202504, "grad_norm": 0.38682031631469727, "learning_rate": 3.402519159563644e-07, "loss": 0.33181923627853394, "step": 17036, "token_acc": 0.8811728395061729 }, { "epoch": 0.9192791237252469, "grad_norm": 0.46238669753074646, "learning_rate": 3.3980008065134016e-07, "loss": 0.33252817392349243, "step": 17037, "token_acc": 0.8807704552690226 }, { "epoch": 0.9193330815302434, "grad_norm": 0.43355628848075867, "learning_rate": 3.3934854036628995e-07, "loss": 0.37456753849983215, "step": 17038, "token_acc": 0.8672671249111163 }, { "epoch": 0.9193870393352398, "grad_norm": 0.4140986204147339, "learning_rate": 3.388972951150049e-07, "loss": 0.342734158039093, "step": 17039, "token_acc": 0.8778461206431424 }, { "epoch": 0.9194409971402363, "grad_norm": 0.36846309900283813, "learning_rate": 3.3844634491126514e-07, "loss": 0.27240240573883057, "step": 17040, "token_acc": 0.9000279642058165 }, { "epoch": 0.9194949549452328, "grad_norm": 0.48532164096832275, "learning_rate": 3.379956897688441e-07, "loss": 0.2890443801879883, "step": 17041, "token_acc": 0.8931258549931601 }, { "epoch": 0.9195489127502293, "grad_norm": 0.4839632213115692, "learning_rate": 3.37545329701503e-07, "loss": 0.34901362657546997, "step": 17042, "token_acc": 0.8773807649929167 }, { "epoch": 0.9196028705552258, "grad_norm": 0.34110137820243835, "learning_rate": 3.370952647229941e-07, "loss": 0.28248175978660583, "step": 17043, "token_acc": 0.8987078910936779 }, { "epoch": 0.9196568283602223, "grad_norm": 0.3444335162639618, "learning_rate": 3.366454948470677e-07, "loss": 0.2954590320587158, "step": 17044, "token_acc": 0.8966812773951158 }, { "epoch": 0.9197107861652188, "grad_norm": 0.3449682593345642, "learning_rate": 3.361960200874548e-07, "loss": 0.3343501389026642, "step": 17045, "token_acc": 0.878115034841148 }, { "epoch": 0.9197647439702152, "grad_norm": 0.4008788764476776, "learning_rate": 3.357468404578845e-07, "loss": 0.3337380588054657, "step": 17046, "token_acc": 0.88113384142265 }, { "epoch": 0.9198187017752117, "grad_norm": 0.4183906614780426, "learning_rate": 3.352979559720748e-07, "loss": 0.34767961502075195, "step": 17047, "token_acc": 0.876836407820612 }, { "epoch": 0.9198726595802083, "grad_norm": 0.3732200264930725, "learning_rate": 3.348493666437336e-07, "loss": 0.3062155544757843, "step": 17048, "token_acc": 0.8877245508982036 }, { "epoch": 0.9199266173852048, "grad_norm": 0.38880831003189087, "learning_rate": 3.3440107248655875e-07, "loss": 0.3081299662590027, "step": 17049, "token_acc": 0.8887503895294484 }, { "epoch": 0.9199805751902013, "grad_norm": 0.40903523564338684, "learning_rate": 3.339530735142438e-07, "loss": 0.28197723627090454, "step": 17050, "token_acc": 0.8935155753337571 }, { "epoch": 0.9200345329951978, "grad_norm": 0.3486432731151581, "learning_rate": 3.335053697404711e-07, "loss": 0.3686283826828003, "step": 17051, "token_acc": 0.8686225865602416 }, { "epoch": 0.9200884908001943, "grad_norm": 0.38444626331329346, "learning_rate": 3.330579611789109e-07, "loss": 0.29343390464782715, "step": 17052, "token_acc": 0.8912164114417799 }, { "epoch": 0.9201424486051908, "grad_norm": 0.4353206753730774, "learning_rate": 3.3261084784322883e-07, "loss": 0.34123390913009644, "step": 17053, "token_acc": 0.8778021237148155 }, { "epoch": 0.9201964064101872, "grad_norm": 0.5218570828437805, "learning_rate": 3.321640297470785e-07, "loss": 0.3631766438484192, "step": 17054, "token_acc": 0.8727681709545901 }, { "epoch": 0.9202503642151837, "grad_norm": 0.4135819971561432, "learning_rate": 3.317175069041068e-07, "loss": 0.32285913825035095, "step": 17055, "token_acc": 0.8865933577696323 }, { "epoch": 0.9203043220201802, "grad_norm": 0.48841068148612976, "learning_rate": 3.3127127932794824e-07, "loss": 0.3276642858982086, "step": 17056, "token_acc": 0.8818675789897363 }, { "epoch": 0.9203582798251767, "grad_norm": 0.48658648133277893, "learning_rate": 3.308253470322331e-07, "loss": 0.32197487354278564, "step": 17057, "token_acc": 0.883362831858407 }, { "epoch": 0.9204122376301732, "grad_norm": 0.4930943548679352, "learning_rate": 3.3037971003057946e-07, "loss": 0.33398449420928955, "step": 17058, "token_acc": 0.8766295707472178 }, { "epoch": 0.9204661954351697, "grad_norm": 0.41530871391296387, "learning_rate": 3.299343683365952e-07, "loss": 0.2925622761249542, "step": 17059, "token_acc": 0.8889488598029955 }, { "epoch": 0.9205201532401662, "grad_norm": 0.45394983887672424, "learning_rate": 3.2948932196388283e-07, "loss": 0.296615332365036, "step": 17060, "token_acc": 0.8872421695951108 }, { "epoch": 0.9205741110451627, "grad_norm": 0.39089787006378174, "learning_rate": 3.290445709260326e-07, "loss": 0.3191468119621277, "step": 17061, "token_acc": 0.8849506101104009 }, { "epoch": 0.9206280688501591, "grad_norm": 0.37579837441444397, "learning_rate": 3.2860011523662804e-07, "loss": 0.3281683325767517, "step": 17062, "token_acc": 0.8891865219230227 }, { "epoch": 0.9206820266551556, "grad_norm": 0.3488013744354248, "learning_rate": 3.281559549092428e-07, "loss": 0.36906468868255615, "step": 17063, "token_acc": 0.874469678063389 }, { "epoch": 0.9207359844601521, "grad_norm": 0.5683217644691467, "learning_rate": 3.2771208995744154e-07, "loss": 0.3140639364719391, "step": 17064, "token_acc": 0.8896668788457458 }, { "epoch": 0.9207899422651487, "grad_norm": 0.5122678875923157, "learning_rate": 3.2726852039477783e-07, "loss": 0.31131115555763245, "step": 17065, "token_acc": 0.8868038740920097 }, { "epoch": 0.9208439000701452, "grad_norm": 0.3484683036804199, "learning_rate": 3.2682524623480096e-07, "loss": 0.3234562873840332, "step": 17066, "token_acc": 0.88683156003869 }, { "epoch": 0.9208978578751417, "grad_norm": 0.468465119600296, "learning_rate": 3.263822674910455e-07, "loss": 0.3391353189945221, "step": 17067, "token_acc": 0.8775426340661598 }, { "epoch": 0.9209518156801382, "grad_norm": 0.4323349893093109, "learning_rate": 3.2593958417704295e-07, "loss": 0.300778865814209, "step": 17068, "token_acc": 0.8914549653579676 }, { "epoch": 0.9210057734851346, "grad_norm": 0.3101065754890442, "learning_rate": 3.254971963063092e-07, "loss": 0.2834276258945465, "step": 17069, "token_acc": 0.9002891844997108 }, { "epoch": 0.9210597312901311, "grad_norm": 0.5263265371322632, "learning_rate": 3.2505510389235773e-07, "loss": 0.31423091888427734, "step": 17070, "token_acc": 0.8834992586316458 }, { "epoch": 0.9211136890951276, "grad_norm": 0.3693220615386963, "learning_rate": 3.246133069486901e-07, "loss": 0.32709765434265137, "step": 17071, "token_acc": 0.8841951930080116 }, { "epoch": 0.9211676469001241, "grad_norm": 0.35673579573631287, "learning_rate": 3.2417180548879657e-07, "loss": 0.3098542392253876, "step": 17072, "token_acc": 0.8884288272157564 }, { "epoch": 0.9212216047051206, "grad_norm": 0.35593748092651367, "learning_rate": 3.2373059952616083e-07, "loss": 0.3278028070926666, "step": 17073, "token_acc": 0.8827205348059876 }, { "epoch": 0.9212755625101171, "grad_norm": 0.47492900490760803, "learning_rate": 3.232896890742587e-07, "loss": 0.3451327681541443, "step": 17074, "token_acc": 0.8810802017244184 }, { "epoch": 0.9213295203151136, "grad_norm": 0.3948003649711609, "learning_rate": 3.228490741465529e-07, "loss": 0.32277292013168335, "step": 17075, "token_acc": 0.8849887133182844 }, { "epoch": 0.9213834781201101, "grad_norm": 0.37602415680885315, "learning_rate": 3.2240875475650246e-07, "loss": 0.3087574541568756, "step": 17076, "token_acc": 0.8882604497172024 }, { "epoch": 0.9214374359251065, "grad_norm": 0.3737947344779968, "learning_rate": 3.2196873091755343e-07, "loss": 0.331136554479599, "step": 17077, "token_acc": 0.8806967310904319 }, { "epoch": 0.921491393730103, "grad_norm": 0.5192604064941406, "learning_rate": 3.215290026431439e-07, "loss": 0.4111751914024353, "step": 17078, "token_acc": 0.8592252665676879 }, { "epoch": 0.9215453515350995, "grad_norm": 0.46553367376327515, "learning_rate": 3.2108956994670426e-07, "loss": 0.32154738903045654, "step": 17079, "token_acc": 0.8846627445614995 }, { "epoch": 0.921599309340096, "grad_norm": 0.4700213670730591, "learning_rate": 3.2065043284165263e-07, "loss": 0.3542495369911194, "step": 17080, "token_acc": 0.8720216864031959 }, { "epoch": 0.9216532671450925, "grad_norm": 0.3563655614852905, "learning_rate": 3.202115913414028e-07, "loss": 0.29395949840545654, "step": 17081, "token_acc": 0.8945842514206193 }, { "epoch": 0.921707224950089, "grad_norm": 0.5723221302032471, "learning_rate": 3.1977304545935284e-07, "loss": 0.37266725301742554, "step": 17082, "token_acc": 0.8676528599605523 }, { "epoch": 0.9217611827550856, "grad_norm": 0.3626609444618225, "learning_rate": 3.193347952088988e-07, "loss": 0.26914530992507935, "step": 17083, "token_acc": 0.9016234552944027 }, { "epoch": 0.9218151405600821, "grad_norm": 0.432316392660141, "learning_rate": 3.188968406034243e-07, "loss": 0.3029448390007019, "step": 17084, "token_acc": 0.890728476821192 }, { "epoch": 0.9218690983650785, "grad_norm": 0.3658766448497772, "learning_rate": 3.184591816563043e-07, "loss": 0.29552286863327026, "step": 17085, "token_acc": 0.8910878447395302 }, { "epoch": 0.921923056170075, "grad_norm": 0.44663214683532715, "learning_rate": 3.1802181838090475e-07, "loss": 0.27995070815086365, "step": 17086, "token_acc": 0.8996448973290103 }, { "epoch": 0.9219770139750715, "grad_norm": 0.4155285358428955, "learning_rate": 3.175847507905827e-07, "loss": 0.31474530696868896, "step": 17087, "token_acc": 0.8848053457292272 }, { "epoch": 0.922030971780068, "grad_norm": 0.4918048083782196, "learning_rate": 3.171479788986842e-07, "loss": 0.2969955801963806, "step": 17088, "token_acc": 0.8921321925851368 }, { "epoch": 0.9220849295850645, "grad_norm": 0.40475696325302124, "learning_rate": 3.167115027185519e-07, "loss": 0.28706079721450806, "step": 17089, "token_acc": 0.8960968312253502 }, { "epoch": 0.922138887390061, "grad_norm": 0.3675019443035126, "learning_rate": 3.162753222635129e-07, "loss": 0.32191962003707886, "step": 17090, "token_acc": 0.8839570972158831 }, { "epoch": 0.9221928451950575, "grad_norm": 0.3290523290634155, "learning_rate": 3.1583943754688875e-07, "loss": 0.3107735812664032, "step": 17091, "token_acc": 0.8885824600110315 }, { "epoch": 0.9222468030000539, "grad_norm": 0.37446942925453186, "learning_rate": 3.154038485819921e-07, "loss": 0.2661128342151642, "step": 17092, "token_acc": 0.899941141848146 }, { "epoch": 0.9223007608050504, "grad_norm": 0.3838719427585602, "learning_rate": 3.149685553821236e-07, "loss": 0.31939250230789185, "step": 17093, "token_acc": 0.8861065017348017 }, { "epoch": 0.9223547186100469, "grad_norm": 0.42647814750671387, "learning_rate": 3.1453355796057796e-07, "loss": 0.3806830942630768, "step": 17094, "token_acc": 0.8611828512396694 }, { "epoch": 0.9224086764150434, "grad_norm": 0.48725202679634094, "learning_rate": 3.140988563306424e-07, "loss": 0.3470503091812134, "step": 17095, "token_acc": 0.8744853234161243 }, { "epoch": 0.9224626342200399, "grad_norm": 0.48594141006469727, "learning_rate": 3.136644505055897e-07, "loss": 0.3129509687423706, "step": 17096, "token_acc": 0.8867256637168142 }, { "epoch": 0.9225165920250364, "grad_norm": 0.4694675803184509, "learning_rate": 3.1323034049868696e-07, "loss": 0.3712363541126251, "step": 17097, "token_acc": 0.8728185512789863 }, { "epoch": 0.922570549830033, "grad_norm": 0.43520575761795044, "learning_rate": 3.1279652632319134e-07, "loss": 0.29569870233535767, "step": 17098, "token_acc": 0.8904668625530525 }, { "epoch": 0.9226245076350295, "grad_norm": 0.4187854826450348, "learning_rate": 3.1236300799235233e-07, "loss": 0.33111849427223206, "step": 17099, "token_acc": 0.8844127672761078 }, { "epoch": 0.9226784654400259, "grad_norm": 0.46252021193504333, "learning_rate": 3.119297855194103e-07, "loss": 0.3292226791381836, "step": 17100, "token_acc": 0.8829656862745098 }, { "epoch": 0.9227324232450224, "grad_norm": 0.2835875153541565, "learning_rate": 3.1149685891759263e-07, "loss": 0.3012118637561798, "step": 17101, "token_acc": 0.8933171324422843 }, { "epoch": 0.9227863810500189, "grad_norm": 0.48325589299201965, "learning_rate": 3.1106422820012415e-07, "loss": 0.3626805543899536, "step": 17102, "token_acc": 0.8728287841191067 }, { "epoch": 0.9228403388550154, "grad_norm": 0.4195821285247803, "learning_rate": 3.1063189338021547e-07, "loss": 0.327808678150177, "step": 17103, "token_acc": 0.8815273166314519 }, { "epoch": 0.9228942966600119, "grad_norm": 0.5045949220657349, "learning_rate": 3.101998544710716e-07, "loss": 0.4212659001350403, "step": 17104, "token_acc": 0.8537865861661635 }, { "epoch": 0.9229482544650084, "grad_norm": 0.37285172939300537, "learning_rate": 3.0976811148588523e-07, "loss": 0.31531280279159546, "step": 17105, "token_acc": 0.8852097130242825 }, { "epoch": 0.9230022122700049, "grad_norm": 0.5841773152351379, "learning_rate": 3.093366644378415e-07, "loss": 0.33527714014053345, "step": 17106, "token_acc": 0.8808390622245725 }, { "epoch": 0.9230561700750014, "grad_norm": 0.39369067549705505, "learning_rate": 3.089055133401175e-07, "loss": 0.2667529582977295, "step": 17107, "token_acc": 0.9018668631785802 }, { "epoch": 0.9231101278799978, "grad_norm": 0.337643563747406, "learning_rate": 3.0847465820588064e-07, "loss": 0.32261013984680176, "step": 17108, "token_acc": 0.8866851945426983 }, { "epoch": 0.9231640856849943, "grad_norm": 0.3664381206035614, "learning_rate": 3.0804409904828913e-07, "loss": 0.2745872735977173, "step": 17109, "token_acc": 0.8973090009279308 }, { "epoch": 0.9232180434899908, "grad_norm": 0.3173527717590332, "learning_rate": 3.076138358804925e-07, "loss": 0.291208952665329, "step": 17110, "token_acc": 0.8963689670726207 }, { "epoch": 0.9232720012949873, "grad_norm": 0.37145113945007324, "learning_rate": 3.071838687156292e-07, "loss": 0.36013340950012207, "step": 17111, "token_acc": 0.8802423855844363 }, { "epoch": 0.9233259590999838, "grad_norm": 0.4700002074241638, "learning_rate": 3.06754197566832e-07, "loss": 0.32066234946250916, "step": 17112, "token_acc": 0.8860383472859843 }, { "epoch": 0.9233799169049803, "grad_norm": 0.3965723216533661, "learning_rate": 3.063248224472226e-07, "loss": 0.29929137229919434, "step": 17113, "token_acc": 0.8911924716953389 }, { "epoch": 0.9234338747099768, "grad_norm": 0.5606134533882141, "learning_rate": 3.058957433699139e-07, "loss": 0.3787342309951782, "step": 17114, "token_acc": 0.8686440677966102 }, { "epoch": 0.9234878325149732, "grad_norm": 0.5041959881782532, "learning_rate": 3.054669603480087e-07, "loss": 0.3542826175689697, "step": 17115, "token_acc": 0.875890637945319 }, { "epoch": 0.9235417903199697, "grad_norm": 0.36154308915138245, "learning_rate": 3.050384733946032e-07, "loss": 0.3800082802772522, "step": 17116, "token_acc": 0.8663810689514484 }, { "epoch": 0.9235957481249663, "grad_norm": 0.34314554929733276, "learning_rate": 3.046102825227837e-07, "loss": 0.29396623373031616, "step": 17117, "token_acc": 0.8922898550724637 }, { "epoch": 0.9236497059299628, "grad_norm": 0.4205882251262665, "learning_rate": 3.0418238774562515e-07, "loss": 0.3240935206413269, "step": 17118, "token_acc": 0.8840500068690754 }, { "epoch": 0.9237036637349593, "grad_norm": 0.30396971106529236, "learning_rate": 3.0375478907619714e-07, "loss": 0.380898654460907, "step": 17119, "token_acc": 0.8662115223380046 }, { "epoch": 0.9237576215399558, "grad_norm": 0.40974563360214233, "learning_rate": 3.0332748652755704e-07, "loss": 0.3530767560005188, "step": 17120, "token_acc": 0.8693515141905819 }, { "epoch": 0.9238115793449523, "grad_norm": 0.38507279753685, "learning_rate": 3.0290048011275664e-07, "loss": 0.2951321005821228, "step": 17121, "token_acc": 0.8938324988620847 }, { "epoch": 0.9238655371499488, "grad_norm": 0.4785492420196533, "learning_rate": 3.0247376984483436e-07, "loss": 0.3059350252151489, "step": 17122, "token_acc": 0.8835728040290327 }, { "epoch": 0.9239194949549452, "grad_norm": 0.44623610377311707, "learning_rate": 3.020473557368231e-07, "loss": 0.31615740060806274, "step": 17123, "token_acc": 0.8831985624438454 }, { "epoch": 0.9239734527599417, "grad_norm": 0.35254552960395813, "learning_rate": 3.016212378017447e-07, "loss": 0.3043026328086853, "step": 17124, "token_acc": 0.8907589055240062 }, { "epoch": 0.9240274105649382, "grad_norm": 0.535443902015686, "learning_rate": 3.0119541605261426e-07, "loss": 0.37691301107406616, "step": 17125, "token_acc": 0.8681662446000298 }, { "epoch": 0.9240813683699347, "grad_norm": 0.4716375768184662, "learning_rate": 3.0076989050243367e-07, "loss": 0.357112318277359, "step": 17126, "token_acc": 0.8761726078799249 }, { "epoch": 0.9241353261749312, "grad_norm": 0.4392603635787964, "learning_rate": 3.0034466116419916e-07, "loss": 0.3482919931411743, "step": 17127, "token_acc": 0.8749465278768003 }, { "epoch": 0.9241892839799277, "grad_norm": 0.41740769147872925, "learning_rate": 2.999197280508992e-07, "loss": 0.3721988797187805, "step": 17128, "token_acc": 0.8667282240689443 }, { "epoch": 0.9242432417849242, "grad_norm": 0.5058994889259338, "learning_rate": 2.9949509117551013e-07, "loss": 0.290644109249115, "step": 17129, "token_acc": 0.8904347826086957 }, { "epoch": 0.9242971995899207, "grad_norm": 0.5478100180625916, "learning_rate": 2.9907075055099934e-07, "loss": 0.41170865297317505, "step": 17130, "token_acc": 0.8574607147823669 }, { "epoch": 0.9243511573949171, "grad_norm": 0.4823853075504303, "learning_rate": 2.986467061903264e-07, "loss": 0.3052251636981964, "step": 17131, "token_acc": 0.8892474944793614 }, { "epoch": 0.9244051151999136, "grad_norm": 0.4046010673046112, "learning_rate": 2.982229581064422e-07, "loss": 0.2691204249858856, "step": 17132, "token_acc": 0.900687757909216 }, { "epoch": 0.9244590730049101, "grad_norm": 0.38326114416122437, "learning_rate": 2.9779950631228515e-07, "loss": 0.3059849739074707, "step": 17133, "token_acc": 0.8922191052709287 }, { "epoch": 0.9245130308099067, "grad_norm": 0.3629973232746124, "learning_rate": 2.973763508207905e-07, "loss": 0.3115280866622925, "step": 17134, "token_acc": 0.8876712328767123 }, { "epoch": 0.9245669886149032, "grad_norm": 0.4139997363090515, "learning_rate": 2.969534916448813e-07, "loss": 0.3164724111557007, "step": 17135, "token_acc": 0.8831020174560023 }, { "epoch": 0.9246209464198997, "grad_norm": 0.39700740575790405, "learning_rate": 2.9653092879747046e-07, "loss": 0.3372208774089813, "step": 17136, "token_acc": 0.8788706739526412 }, { "epoch": 0.9246749042248962, "grad_norm": 0.44455376267433167, "learning_rate": 2.961086622914633e-07, "loss": 0.35382306575775146, "step": 17137, "token_acc": 0.8757462686567165 }, { "epoch": 0.9247288620298926, "grad_norm": 0.39298439025878906, "learning_rate": 2.9568669213975497e-07, "loss": 0.28635555505752563, "step": 17138, "token_acc": 0.8973760932944607 }, { "epoch": 0.9247828198348891, "grad_norm": 0.3603549003601074, "learning_rate": 2.95265018355233e-07, "loss": 0.3249935507774353, "step": 17139, "token_acc": 0.884195309990363 }, { "epoch": 0.9248367776398856, "grad_norm": 0.47544604539871216, "learning_rate": 2.9484364095077376e-07, "loss": 0.3979901075363159, "step": 17140, "token_acc": 0.8624555160142349 }, { "epoch": 0.9248907354448821, "grad_norm": 0.38715556263923645, "learning_rate": 2.944225599392481e-07, "loss": 0.3625563979148865, "step": 17141, "token_acc": 0.8678234540584605 }, { "epoch": 0.9249446932498786, "grad_norm": 0.2901158332824707, "learning_rate": 2.940017753335156e-07, "loss": 0.3214050233364105, "step": 17142, "token_acc": 0.8836186950485886 }, { "epoch": 0.9249986510548751, "grad_norm": 0.4307454824447632, "learning_rate": 2.935812871464261e-07, "loss": 0.31313157081604004, "step": 17143, "token_acc": 0.8911742133537989 }, { "epoch": 0.9250526088598716, "grad_norm": 0.2929834723472595, "learning_rate": 2.931610953908215e-07, "loss": 0.3285345137119293, "step": 17144, "token_acc": 0.8836051288435205 }, { "epoch": 0.9251065666648681, "grad_norm": 0.36644601821899414, "learning_rate": 2.9274120007953377e-07, "loss": 0.30145853757858276, "step": 17145, "token_acc": 0.892827206322596 }, { "epoch": 0.9251605244698645, "grad_norm": 0.35746756196022034, "learning_rate": 2.923216012253871e-07, "loss": 0.3224310278892517, "step": 17146, "token_acc": 0.8850634899856002 }, { "epoch": 0.925214482274861, "grad_norm": 0.42281588912010193, "learning_rate": 2.919022988411957e-07, "loss": 0.31800925731658936, "step": 17147, "token_acc": 0.8846153846153846 }, { "epoch": 0.9252684400798575, "grad_norm": 0.3530261218547821, "learning_rate": 2.9148329293976595e-07, "loss": 0.3314312994480133, "step": 17148, "token_acc": 0.8779076774737857 }, { "epoch": 0.925322397884854, "grad_norm": 0.3888097405433655, "learning_rate": 2.9106458353389213e-07, "loss": 0.31839263439178467, "step": 17149, "token_acc": 0.8846510395974043 }, { "epoch": 0.9253763556898505, "grad_norm": 0.38798537850379944, "learning_rate": 2.9064617063636167e-07, "loss": 0.27227193117141724, "step": 17150, "token_acc": 0.9006432188312578 }, { "epoch": 0.9254303134948471, "grad_norm": 0.4042400121688843, "learning_rate": 2.9022805425995447e-07, "loss": 0.3176327347755432, "step": 17151, "token_acc": 0.8849534643226473 }, { "epoch": 0.9254842712998436, "grad_norm": 0.3679184913635254, "learning_rate": 2.8981023441743803e-07, "loss": 0.27800488471984863, "step": 17152, "token_acc": 0.8974496165507402 }, { "epoch": 0.92553822910484, "grad_norm": 0.5077476501464844, "learning_rate": 2.893927111215744e-07, "loss": 0.3338271379470825, "step": 17153, "token_acc": 0.8811056418023476 }, { "epoch": 0.9255921869098365, "grad_norm": 0.49205201864242554, "learning_rate": 2.8897548438511334e-07, "loss": 0.3657865822315216, "step": 17154, "token_acc": 0.8752270504401285 }, { "epoch": 0.925646144714833, "grad_norm": 0.5092169046401978, "learning_rate": 2.885585542207969e-07, "loss": 0.29428911209106445, "step": 17155, "token_acc": 0.8923650975889782 }, { "epoch": 0.9257001025198295, "grad_norm": 0.4590095281600952, "learning_rate": 2.881419206413583e-07, "loss": 0.33882349729537964, "step": 17156, "token_acc": 0.8785032796660703 }, { "epoch": 0.925754060324826, "grad_norm": 0.38606148958206177, "learning_rate": 2.877255836595216e-07, "loss": 0.32967036962509155, "step": 17157, "token_acc": 0.8829636621047664 }, { "epoch": 0.9258080181298225, "grad_norm": 0.304543137550354, "learning_rate": 2.873095432880013e-07, "loss": 0.3299540579319, "step": 17158, "token_acc": 0.8841919401744911 }, { "epoch": 0.925861975934819, "grad_norm": 0.36894550919532776, "learning_rate": 2.868937995395016e-07, "loss": 0.2713117003440857, "step": 17159, "token_acc": 0.9044107789247888 }, { "epoch": 0.9259159337398155, "grad_norm": 0.4060944616794586, "learning_rate": 2.864783524267223e-07, "loss": 0.32218122482299805, "step": 17160, "token_acc": 0.8846763959390863 }, { "epoch": 0.9259698915448119, "grad_norm": 0.5274096131324768, "learning_rate": 2.8606320196234883e-07, "loss": 0.3361963629722595, "step": 17161, "token_acc": 0.8817529880478088 }, { "epoch": 0.9260238493498084, "grad_norm": 0.4754965603351593, "learning_rate": 2.8564834815906104e-07, "loss": 0.3236030340194702, "step": 17162, "token_acc": 0.8836942492470959 }, { "epoch": 0.9260778071548049, "grad_norm": 0.47027480602264404, "learning_rate": 2.8523379102952774e-07, "loss": 0.3389778733253479, "step": 17163, "token_acc": 0.8767295597484277 }, { "epoch": 0.9261317649598014, "grad_norm": 0.4268566370010376, "learning_rate": 2.848195305864099e-07, "loss": 0.3511130213737488, "step": 17164, "token_acc": 0.8731771984091914 }, { "epoch": 0.9261857227647979, "grad_norm": 0.5142501592636108, "learning_rate": 2.844055668423573e-07, "loss": 0.32196667790412903, "step": 17165, "token_acc": 0.8832398316970547 }, { "epoch": 0.9262396805697944, "grad_norm": 0.4974072575569153, "learning_rate": 2.8399189981001443e-07, "loss": 0.34903818368911743, "step": 17166, "token_acc": 0.8745611666216582 }, { "epoch": 0.926293638374791, "grad_norm": 0.41010475158691406, "learning_rate": 2.8357852950201437e-07, "loss": 0.31991344690322876, "step": 17167, "token_acc": 0.8873968923844235 }, { "epoch": 0.9263475961797875, "grad_norm": 0.44438618421554565, "learning_rate": 2.831654559309793e-07, "loss": 0.32637661695480347, "step": 17168, "token_acc": 0.8846735395189004 }, { "epoch": 0.9264015539847839, "grad_norm": 0.4995356798171997, "learning_rate": 2.8275267910952696e-07, "loss": 0.37126708030700684, "step": 17169, "token_acc": 0.8731113166820845 }, { "epoch": 0.9264555117897804, "grad_norm": 0.5016359090805054, "learning_rate": 2.8234019905026167e-07, "loss": 0.3056491017341614, "step": 17170, "token_acc": 0.8914323086984958 }, { "epoch": 0.9265094695947769, "grad_norm": 0.4931415021419525, "learning_rate": 2.819280157657811e-07, "loss": 0.31011781096458435, "step": 17171, "token_acc": 0.8867218282111899 }, { "epoch": 0.9265634273997734, "grad_norm": 0.4927743077278137, "learning_rate": 2.815161292686719e-07, "loss": 0.35809409618377686, "step": 17172, "token_acc": 0.8728658106391985 }, { "epoch": 0.9266173852047699, "grad_norm": 0.358994722366333, "learning_rate": 2.8110453957151504e-07, "loss": 0.25305014848709106, "step": 17173, "token_acc": 0.9067552324852739 }, { "epoch": 0.9266713430097664, "grad_norm": 0.48353758454322815, "learning_rate": 2.806932466868806e-07, "loss": 0.3485022187232971, "step": 17174, "token_acc": 0.8772741781040536 }, { "epoch": 0.9267253008147629, "grad_norm": 0.5305692553520203, "learning_rate": 2.802822506273273e-07, "loss": 0.37303417921066284, "step": 17175, "token_acc": 0.8723177598539035 }, { "epoch": 0.9267792586197593, "grad_norm": 0.41817277669906616, "learning_rate": 2.7987155140540844e-07, "loss": 0.38793227076530457, "step": 17176, "token_acc": 0.8634925003440209 }, { "epoch": 0.9268332164247558, "grad_norm": 0.469008207321167, "learning_rate": 2.794611490336652e-07, "loss": 0.3515157997608185, "step": 17177, "token_acc": 0.8749813070136085 }, { "epoch": 0.9268871742297523, "grad_norm": 0.620924174785614, "learning_rate": 2.7905104352463296e-07, "loss": 0.309526652097702, "step": 17178, "token_acc": 0.8925093632958802 }, { "epoch": 0.9269411320347488, "grad_norm": 0.48052558302879333, "learning_rate": 2.786412348908352e-07, "loss": 0.3566191792488098, "step": 17179, "token_acc": 0.8734385272846811 }, { "epoch": 0.9269950898397453, "grad_norm": 0.35135984420776367, "learning_rate": 2.782317231447873e-07, "loss": 0.28706079721450806, "step": 17180, "token_acc": 0.8961101137043687 }, { "epoch": 0.9270490476447418, "grad_norm": 0.4453437924385071, "learning_rate": 2.7782250829899493e-07, "loss": 0.34985533356666565, "step": 17181, "token_acc": 0.8755664421310472 }, { "epoch": 0.9271030054497383, "grad_norm": 0.38652148842811584, "learning_rate": 2.7741359036595804e-07, "loss": 0.3224621117115021, "step": 17182, "token_acc": 0.8828396050173473 }, { "epoch": 0.9271569632547348, "grad_norm": 0.3992989659309387, "learning_rate": 2.7700496935816225e-07, "loss": 0.3014974892139435, "step": 17183, "token_acc": 0.8891213389121339 }, { "epoch": 0.9272109210597312, "grad_norm": 0.43259820342063904, "learning_rate": 2.765966452880875e-07, "loss": 0.34475597739219666, "step": 17184, "token_acc": 0.878017626772257 }, { "epoch": 0.9272648788647277, "grad_norm": 0.4202800691127777, "learning_rate": 2.7618861816820275e-07, "loss": 0.34932148456573486, "step": 17185, "token_acc": 0.8743097554562188 }, { "epoch": 0.9273188366697243, "grad_norm": 0.453313946723938, "learning_rate": 2.7578088801097134e-07, "loss": 0.3287894129753113, "step": 17186, "token_acc": 0.8816216216216216 }, { "epoch": 0.9273727944747208, "grad_norm": 0.5000385046005249, "learning_rate": 2.753734548288445e-07, "loss": 0.34518158435821533, "step": 17187, "token_acc": 0.8794337780670355 }, { "epoch": 0.9274267522797173, "grad_norm": 0.4885380268096924, "learning_rate": 2.749663186342655e-07, "loss": 0.33224424719810486, "step": 17188, "token_acc": 0.8846018940722553 }, { "epoch": 0.9274807100847138, "grad_norm": 0.33673304319381714, "learning_rate": 2.7455947943966665e-07, "loss": 0.3045805096626282, "step": 17189, "token_acc": 0.8911879761169447 }, { "epoch": 0.9275346678897103, "grad_norm": 0.394594669342041, "learning_rate": 2.7415293725747363e-07, "loss": 0.306939035654068, "step": 17190, "token_acc": 0.8867023309132595 }, { "epoch": 0.9275886256947068, "grad_norm": 0.41962185502052307, "learning_rate": 2.7374669210010086e-07, "loss": 0.2957806885242462, "step": 17191, "token_acc": 0.8869749569365311 }, { "epoch": 0.9276425834997032, "grad_norm": 0.5763986706733704, "learning_rate": 2.7334074397995844e-07, "loss": 0.3190299868583679, "step": 17192, "token_acc": 0.887781036168133 }, { "epoch": 0.9276965413046997, "grad_norm": 0.44710573554039, "learning_rate": 2.72935092909441e-07, "loss": 0.28789961338043213, "step": 17193, "token_acc": 0.895957025728018 }, { "epoch": 0.9277504991096962, "grad_norm": 0.4054679274559021, "learning_rate": 2.7252973890093735e-07, "loss": 0.34109777212142944, "step": 17194, "token_acc": 0.8791087036809061 }, { "epoch": 0.9278044569146927, "grad_norm": 0.3713229298591614, "learning_rate": 2.721246819668266e-07, "loss": 0.3702372610569, "step": 17195, "token_acc": 0.8748071674379969 }, { "epoch": 0.9278584147196892, "grad_norm": 0.4558975398540497, "learning_rate": 2.7171992211948107e-07, "loss": 0.33089253306388855, "step": 17196, "token_acc": 0.8840274856261394 }, { "epoch": 0.9279123725246857, "grad_norm": 0.5007714629173279, "learning_rate": 2.713154593712597e-07, "loss": 0.30656981468200684, "step": 17197, "token_acc": 0.8856482818365579 }, { "epoch": 0.9279663303296822, "grad_norm": 0.3928448259830475, "learning_rate": 2.7091129373451596e-07, "loss": 0.3465777039527893, "step": 17198, "token_acc": 0.87701317715959 }, { "epoch": 0.9280202881346786, "grad_norm": 0.44549083709716797, "learning_rate": 2.7050742522159333e-07, "loss": 0.34973692893981934, "step": 17199, "token_acc": 0.8732547105869595 }, { "epoch": 0.9280742459396751, "grad_norm": 0.40941253304481506, "learning_rate": 2.7010385384482417e-07, "loss": 0.38219961524009705, "step": 17200, "token_acc": 0.869696292987941 }, { "epoch": 0.9281282037446716, "grad_norm": 0.3993390202522278, "learning_rate": 2.697005796165342e-07, "loss": 0.2715395390987396, "step": 17201, "token_acc": 0.9000600841177648 }, { "epoch": 0.9281821615496681, "grad_norm": 0.44900503754615784, "learning_rate": 2.6929760254904013e-07, "loss": 0.353801965713501, "step": 17202, "token_acc": 0.8724221305646424 }, { "epoch": 0.9282361193546647, "grad_norm": 0.4609206020832062, "learning_rate": 2.688949226546489e-07, "loss": 0.37366729974746704, "step": 17203, "token_acc": 0.8725152398621786 }, { "epoch": 0.9282900771596612, "grad_norm": 0.3482087254524231, "learning_rate": 2.6849253994565505e-07, "loss": 0.2878102958202362, "step": 17204, "token_acc": 0.895993413830955 }, { "epoch": 0.9283440349646577, "grad_norm": 0.3558156490325928, "learning_rate": 2.680904544343521e-07, "loss": 0.3102222681045532, "step": 17205, "token_acc": 0.8869496678301994 }, { "epoch": 0.9283979927696542, "grad_norm": 0.34597358107566833, "learning_rate": 2.676886661330158e-07, "loss": 0.2901371121406555, "step": 17206, "token_acc": 0.8938077131993482 }, { "epoch": 0.9284519505746506, "grad_norm": 0.439993679523468, "learning_rate": 2.6728717505391966e-07, "loss": 0.3585978150367737, "step": 17207, "token_acc": 0.8687440982058546 }, { "epoch": 0.9285059083796471, "grad_norm": 0.31900763511657715, "learning_rate": 2.6688598120932276e-07, "loss": 0.37837815284729004, "step": 17208, "token_acc": 0.8653818075631479 }, { "epoch": 0.9285598661846436, "grad_norm": 0.41688695549964905, "learning_rate": 2.664850846114775e-07, "loss": 0.30786409974098206, "step": 17209, "token_acc": 0.8873574144486692 }, { "epoch": 0.9286138239896401, "grad_norm": 0.5058415532112122, "learning_rate": 2.6608448527262854e-07, "loss": 0.3405630886554718, "step": 17210, "token_acc": 0.8780563775112911 }, { "epoch": 0.9286677817946366, "grad_norm": 0.3535303473472595, "learning_rate": 2.6568418320500946e-07, "loss": 0.3172813653945923, "step": 17211, "token_acc": 0.8831747561122486 }, { "epoch": 0.9287217395996331, "grad_norm": 0.3849613070487976, "learning_rate": 2.6528417842084596e-07, "loss": 0.31820204854011536, "step": 17212, "token_acc": 0.8840893436977271 }, { "epoch": 0.9287756974046296, "grad_norm": 0.3588012754917145, "learning_rate": 2.6488447093235393e-07, "loss": 0.33652037382125854, "step": 17213, "token_acc": 0.8778318111730519 }, { "epoch": 0.9288296552096261, "grad_norm": 0.42792168259620667, "learning_rate": 2.644850607517391e-07, "loss": 0.35245028138160706, "step": 17214, "token_acc": 0.8771631205673759 }, { "epoch": 0.9288836130146225, "grad_norm": 0.4002631604671478, "learning_rate": 2.6408594789119946e-07, "loss": 0.30931413173675537, "step": 17215, "token_acc": 0.8860130718954249 }, { "epoch": 0.928937570819619, "grad_norm": 0.5400881767272949, "learning_rate": 2.636871323629253e-07, "loss": 0.32618099451065063, "step": 17216, "token_acc": 0.880248833592535 }, { "epoch": 0.9289915286246155, "grad_norm": 0.43303680419921875, "learning_rate": 2.6328861417909466e-07, "loss": 0.3188568353652954, "step": 17217, "token_acc": 0.8844602609727165 }, { "epoch": 0.929045486429612, "grad_norm": 0.5010551810264587, "learning_rate": 2.6289039335188003e-07, "loss": 0.33534324169158936, "step": 17218, "token_acc": 0.8845157110463233 }, { "epoch": 0.9290994442346086, "grad_norm": 0.49495673179626465, "learning_rate": 2.624924698934417e-07, "loss": 0.2982178330421448, "step": 17219, "token_acc": 0.8850843494237515 }, { "epoch": 0.9291534020396051, "grad_norm": 0.41230180859565735, "learning_rate": 2.620948438159321e-07, "loss": 0.2893802225589752, "step": 17220, "token_acc": 0.8929268292682927 }, { "epoch": 0.9292073598446016, "grad_norm": 0.44063761830329895, "learning_rate": 2.61697515131496e-07, "loss": 0.3238215446472168, "step": 17221, "token_acc": 0.8793235972328978 }, { "epoch": 0.929261317649598, "grad_norm": 0.4311630427837372, "learning_rate": 2.613004838522659e-07, "loss": 0.2926476001739502, "step": 17222, "token_acc": 0.8924033880359978 }, { "epoch": 0.9293152754545945, "grad_norm": 0.4090111553668976, "learning_rate": 2.609037499903677e-07, "loss": 0.32298743724823, "step": 17223, "token_acc": 0.8876550502067336 }, { "epoch": 0.929369233259591, "grad_norm": 0.43754374980926514, "learning_rate": 2.6050731355791836e-07, "loss": 0.3027166724205017, "step": 17224, "token_acc": 0.8901027229740747 }, { "epoch": 0.9294231910645875, "grad_norm": 0.4411393404006958, "learning_rate": 2.6011117456702375e-07, "loss": 0.2470722794532776, "step": 17225, "token_acc": 0.9079743008314437 }, { "epoch": 0.929477148869584, "grad_norm": 0.4572189450263977, "learning_rate": 2.597153330297819e-07, "loss": 0.36691632866859436, "step": 17226, "token_acc": 0.8723461076245159 }, { "epoch": 0.9295311066745805, "grad_norm": 0.3785722255706787, "learning_rate": 2.5931978895828325e-07, "loss": 0.3261517286300659, "step": 17227, "token_acc": 0.8799158227015652 }, { "epoch": 0.929585064479577, "grad_norm": 0.4043603241443634, "learning_rate": 2.5892454236460587e-07, "loss": 0.320442795753479, "step": 17228, "token_acc": 0.8811164713990352 }, { "epoch": 0.9296390222845735, "grad_norm": 0.37113070487976074, "learning_rate": 2.585295932608223e-07, "loss": 0.26841986179351807, "step": 17229, "token_acc": 0.8991540217537264 }, { "epoch": 0.9296929800895699, "grad_norm": 0.38920027017593384, "learning_rate": 2.5813494165899176e-07, "loss": 0.3261920213699341, "step": 17230, "token_acc": 0.8846592204239799 }, { "epoch": 0.9297469378945664, "grad_norm": 0.3999081254005432, "learning_rate": 2.5774058757116807e-07, "loss": 0.32372695207595825, "step": 17231, "token_acc": 0.8839819268328232 }, { "epoch": 0.9298008956995629, "grad_norm": 0.3330304026603699, "learning_rate": 2.573465310093959e-07, "loss": 0.3670811057090759, "step": 17232, "token_acc": 0.8726925525143221 }, { "epoch": 0.9298548535045594, "grad_norm": 0.4252822697162628, "learning_rate": 2.569527719857079e-07, "loss": 0.29823029041290283, "step": 17233, "token_acc": 0.8922690131992458 }, { "epoch": 0.9299088113095559, "grad_norm": 0.3870421051979065, "learning_rate": 2.565593105121289e-07, "loss": 0.3299512267112732, "step": 17234, "token_acc": 0.8801284338209062 }, { "epoch": 0.9299627691145524, "grad_norm": 0.4194931089878082, "learning_rate": 2.5616614660067706e-07, "loss": 0.3254265785217285, "step": 17235, "token_acc": 0.884254225560937 }, { "epoch": 0.930016726919549, "grad_norm": 0.4954316318035126, "learning_rate": 2.557732802633572e-07, "loss": 0.351606547832489, "step": 17236, "token_acc": 0.8757250580046404 }, { "epoch": 0.9300706847245455, "grad_norm": 0.5850903391838074, "learning_rate": 2.553807115121698e-07, "loss": 0.331270694732666, "step": 17237, "token_acc": 0.8769385699899295 }, { "epoch": 0.9301246425295419, "grad_norm": 0.4805142879486084, "learning_rate": 2.54988440359103e-07, "loss": 0.33484965562820435, "step": 17238, "token_acc": 0.8769448373408769 }, { "epoch": 0.9301786003345384, "grad_norm": 0.34576642513275146, "learning_rate": 2.5459646681613716e-07, "loss": 0.2638988494873047, "step": 17239, "token_acc": 0.9060487435282233 }, { "epoch": 0.9302325581395349, "grad_norm": 0.3368743062019348, "learning_rate": 2.542047908952405e-07, "loss": 0.2768757939338684, "step": 17240, "token_acc": 0.8981094346482458 }, { "epoch": 0.9302865159445314, "grad_norm": 0.4697949290275574, "learning_rate": 2.53813412608378e-07, "loss": 0.31315597891807556, "step": 17241, "token_acc": 0.8822722283205269 }, { "epoch": 0.9303404737495279, "grad_norm": 0.3506501019001007, "learning_rate": 2.5342233196749997e-07, "loss": 0.2939857244491577, "step": 17242, "token_acc": 0.893218176464815 }, { "epoch": 0.9303944315545244, "grad_norm": 0.451515257358551, "learning_rate": 2.5303154898454916e-07, "loss": 0.35135599970817566, "step": 17243, "token_acc": 0.8688159263110979 }, { "epoch": 0.9304483893595209, "grad_norm": 0.4105342626571655, "learning_rate": 2.526410636714638e-07, "loss": 0.32399314641952515, "step": 17244, "token_acc": 0.8857467491478349 }, { "epoch": 0.9305023471645173, "grad_norm": 0.39554035663604736, "learning_rate": 2.522508760401665e-07, "loss": 0.3104149103164673, "step": 17245, "token_acc": 0.8911825892142743 }, { "epoch": 0.9305563049695138, "grad_norm": 0.41294899582862854, "learning_rate": 2.518609861025745e-07, "loss": 0.3259229362010956, "step": 17246, "token_acc": 0.8849970811441915 }, { "epoch": 0.9306102627745103, "grad_norm": 0.3989493250846863, "learning_rate": 2.514713938705926e-07, "loss": 0.2995745539665222, "step": 17247, "token_acc": 0.8915682414698163 }, { "epoch": 0.9306642205795068, "grad_norm": 0.45086342096328735, "learning_rate": 2.510820993561214e-07, "loss": 0.3654386103153229, "step": 17248, "token_acc": 0.8742374472078837 }, { "epoch": 0.9307181783845033, "grad_norm": 0.471310555934906, "learning_rate": 2.5069310257104797e-07, "loss": 0.3521149456501007, "step": 17249, "token_acc": 0.8715706964124209 }, { "epoch": 0.9307721361894998, "grad_norm": 0.3660490810871124, "learning_rate": 2.5030440352725503e-07, "loss": 0.3205081522464752, "step": 17250, "token_acc": 0.8852411388727484 }, { "epoch": 0.9308260939944963, "grad_norm": 0.5114922523498535, "learning_rate": 2.499160022366098e-07, "loss": 0.3924906253814697, "step": 17251, "token_acc": 0.8622501094411207 }, { "epoch": 0.9308800517994928, "grad_norm": 0.40689119696617126, "learning_rate": 2.4952789871097726e-07, "loss": 0.3235410153865814, "step": 17252, "token_acc": 0.8842294443612401 }, { "epoch": 0.9309340096044892, "grad_norm": 0.43005770444869995, "learning_rate": 2.4914009296220786e-07, "loss": 0.36691761016845703, "step": 17253, "token_acc": 0.873623048057136 }, { "epoch": 0.9309879674094858, "grad_norm": 0.3957701623439789, "learning_rate": 2.487525850021455e-07, "loss": 0.32015255093574524, "step": 17254, "token_acc": 0.8861432177494354 }, { "epoch": 0.9310419252144823, "grad_norm": 0.36660999059677124, "learning_rate": 2.4836537484262403e-07, "loss": 0.2653549313545227, "step": 17255, "token_acc": 0.8986404332928043 }, { "epoch": 0.9310958830194788, "grad_norm": 0.39190801978111267, "learning_rate": 2.4797846249546955e-07, "loss": 0.31014469265937805, "step": 17256, "token_acc": 0.8863003663003663 }, { "epoch": 0.9311498408244753, "grad_norm": 0.43645381927490234, "learning_rate": 2.4759184797249926e-07, "loss": 0.2814302444458008, "step": 17257, "token_acc": 0.8951757762381319 }, { "epoch": 0.9312037986294718, "grad_norm": 0.40195903182029724, "learning_rate": 2.4720553128551817e-07, "loss": 0.3108469843864441, "step": 17258, "token_acc": 0.8914213926776741 }, { "epoch": 0.9312577564344683, "grad_norm": 0.34532642364501953, "learning_rate": 2.4681951244632576e-07, "loss": 0.2962052822113037, "step": 17259, "token_acc": 0.8960984477695427 }, { "epoch": 0.9313117142394648, "grad_norm": 0.4118867516517639, "learning_rate": 2.4643379146671034e-07, "loss": 0.3240904211997986, "step": 17260, "token_acc": 0.8846679835891202 }, { "epoch": 0.9313656720444612, "grad_norm": 0.3060302734375, "learning_rate": 2.4604836835845245e-07, "loss": 0.34345442056655884, "step": 17261, "token_acc": 0.8791350087668031 }, { "epoch": 0.9314196298494577, "grad_norm": 0.44729164242744446, "learning_rate": 2.4566324313332166e-07, "loss": 0.31172722578048706, "step": 17262, "token_acc": 0.8911458333333333 }, { "epoch": 0.9314735876544542, "grad_norm": 0.44311055541038513, "learning_rate": 2.4527841580307967e-07, "loss": 0.3367343544960022, "step": 17263, "token_acc": 0.882863041982106 }, { "epoch": 0.9315275454594507, "grad_norm": 0.36871251463890076, "learning_rate": 2.448938863794803e-07, "loss": 0.31644198298454285, "step": 17264, "token_acc": 0.8836329233680227 }, { "epoch": 0.9315815032644472, "grad_norm": 0.3258785009384155, "learning_rate": 2.4450965487426646e-07, "loss": 0.3114417791366577, "step": 17265, "token_acc": 0.8910444394315362 }, { "epoch": 0.9316354610694437, "grad_norm": 0.5219203233718872, "learning_rate": 2.44125721299171e-07, "loss": 0.3644304871559143, "step": 17266, "token_acc": 0.8692348008385744 }, { "epoch": 0.9316894188744402, "grad_norm": 0.4118884205818176, "learning_rate": 2.437420856659212e-07, "loss": 0.32603976130485535, "step": 17267, "token_acc": 0.8812415654520918 }, { "epoch": 0.9317433766794366, "grad_norm": 0.4023287296295166, "learning_rate": 2.4335874798623094e-07, "loss": 0.28765156865119934, "step": 17268, "token_acc": 0.8961459754433834 }, { "epoch": 0.9317973344844331, "grad_norm": 0.36615827679634094, "learning_rate": 2.429757082718098e-07, "loss": 0.34601569175720215, "step": 17269, "token_acc": 0.8756110809342749 }, { "epoch": 0.9318512922894296, "grad_norm": 0.44296717643737793, "learning_rate": 2.4259296653435403e-07, "loss": 0.3033774197101593, "step": 17270, "token_acc": 0.8904947916666667 }, { "epoch": 0.9319052500944262, "grad_norm": 0.47635918855667114, "learning_rate": 2.422105227855531e-07, "loss": 0.3381909728050232, "step": 17271, "token_acc": 0.8747972753811223 }, { "epoch": 0.9319592078994227, "grad_norm": 0.32927078008651733, "learning_rate": 2.4182837703708663e-07, "loss": 0.29801467061042786, "step": 17272, "token_acc": 0.8879745155295992 }, { "epoch": 0.9320131657044192, "grad_norm": 0.4941295087337494, "learning_rate": 2.414465293006263e-07, "loss": 0.2886187732219696, "step": 17273, "token_acc": 0.8884758364312267 }, { "epoch": 0.9320671235094157, "grad_norm": 0.3709903955459595, "learning_rate": 2.410649795878317e-07, "loss": 0.30660709738731384, "step": 17274, "token_acc": 0.8880567599527001 }, { "epoch": 0.9321210813144122, "grad_norm": 0.42081958055496216, "learning_rate": 2.4068372791035467e-07, "loss": 0.2936343848705292, "step": 17275, "token_acc": 0.8939735739606832 }, { "epoch": 0.9321750391194086, "grad_norm": 0.46292704343795776, "learning_rate": 2.403027742798414e-07, "loss": 0.332882821559906, "step": 17276, "token_acc": 0.8815218911735907 }, { "epoch": 0.9322289969244051, "grad_norm": 0.49834686517715454, "learning_rate": 2.3992211870792483e-07, "loss": 0.30792340636253357, "step": 17277, "token_acc": 0.8904149620105202 }, { "epoch": 0.9322829547294016, "grad_norm": 0.5139163732528687, "learning_rate": 2.395417612062301e-07, "loss": 0.32048529386520386, "step": 17278, "token_acc": 0.8822958771220695 }, { "epoch": 0.9323369125343981, "grad_norm": 0.4878447353839874, "learning_rate": 2.3916170178637343e-07, "loss": 0.2870023846626282, "step": 17279, "token_acc": 0.8931767337807607 }, { "epoch": 0.9323908703393946, "grad_norm": 0.36277979612350464, "learning_rate": 2.3878194045996003e-07, "loss": 0.2974735498428345, "step": 17280, "token_acc": 0.8914689550731953 }, { "epoch": 0.9324448281443911, "grad_norm": 0.40114906430244446, "learning_rate": 2.3840247723859066e-07, "loss": 0.3105389475822449, "step": 17281, "token_acc": 0.8879445314247372 }, { "epoch": 0.9324987859493876, "grad_norm": 0.3708580434322357, "learning_rate": 2.3802331213385154e-07, "loss": 0.30142009258270264, "step": 17282, "token_acc": 0.8912418300653595 }, { "epoch": 0.932552743754384, "grad_norm": 0.3898419141769409, "learning_rate": 2.3764444515732343e-07, "loss": 0.30873575806617737, "step": 17283, "token_acc": 0.8883267076255873 }, { "epoch": 0.9326067015593805, "grad_norm": 0.30701369047164917, "learning_rate": 2.3726587632057595e-07, "loss": 0.27936315536499023, "step": 17284, "token_acc": 0.8920481174816435 }, { "epoch": 0.932660659364377, "grad_norm": 0.48943623900413513, "learning_rate": 2.368876056351721e-07, "loss": 0.38282155990600586, "step": 17285, "token_acc": 0.8666553422795991 }, { "epoch": 0.9327146171693735, "grad_norm": 0.43920576572418213, "learning_rate": 2.3650963311266262e-07, "loss": 0.34083276987075806, "step": 17286, "token_acc": 0.8771789541020311 }, { "epoch": 0.93276857497437, "grad_norm": 0.45731961727142334, "learning_rate": 2.361319587645916e-07, "loss": 0.3796370029449463, "step": 17287, "token_acc": 0.8657855293636735 }, { "epoch": 0.9328225327793666, "grad_norm": 0.45922982692718506, "learning_rate": 2.3575458260249096e-07, "loss": 0.31082749366760254, "step": 17288, "token_acc": 0.8861051115910727 }, { "epoch": 0.9328764905843631, "grad_norm": 0.4493147134780884, "learning_rate": 2.3537750463788922e-07, "loss": 0.36764490604400635, "step": 17289, "token_acc": 0.8686727565824826 }, { "epoch": 0.9329304483893596, "grad_norm": 0.42516306042671204, "learning_rate": 2.3500072488230052e-07, "loss": 0.3154730796813965, "step": 17290, "token_acc": 0.8824775013234516 }, { "epoch": 0.932984406194356, "grad_norm": 0.548580527305603, "learning_rate": 2.346242433472312e-07, "loss": 0.3596101403236389, "step": 17291, "token_acc": 0.8679092382495948 }, { "epoch": 0.9330383639993525, "grad_norm": 0.47347164154052734, "learning_rate": 2.342480600441799e-07, "loss": 0.3607054352760315, "step": 17292, "token_acc": 0.8764596378405821 }, { "epoch": 0.933092321804349, "grad_norm": 0.36733460426330566, "learning_rate": 2.33872174984634e-07, "loss": 0.3128598928451538, "step": 17293, "token_acc": 0.8849709003765833 }, { "epoch": 0.9331462796093455, "grad_norm": 0.3628436326980591, "learning_rate": 2.3349658818007325e-07, "loss": 0.24355968832969666, "step": 17294, "token_acc": 0.9107577710929666 }, { "epoch": 0.933200237414342, "grad_norm": 0.3993299901485443, "learning_rate": 2.3312129964196962e-07, "loss": 0.3089250922203064, "step": 17295, "token_acc": 0.8860227954409118 }, { "epoch": 0.9332541952193385, "grad_norm": 0.5131524801254272, "learning_rate": 2.3274630938178167e-07, "loss": 0.3608667850494385, "step": 17296, "token_acc": 0.8663548279318409 }, { "epoch": 0.933308153024335, "grad_norm": 0.38694122433662415, "learning_rate": 2.323716174109625e-07, "loss": 0.3051135241985321, "step": 17297, "token_acc": 0.8891298454865817 }, { "epoch": 0.9333621108293315, "grad_norm": 0.37938496470451355, "learning_rate": 2.3199722374095623e-07, "loss": 0.27908703684806824, "step": 17298, "token_acc": 0.8999042800492274 }, { "epoch": 0.9334160686343279, "grad_norm": 0.3784678876399994, "learning_rate": 2.31623128383196e-07, "loss": 0.31749963760375977, "step": 17299, "token_acc": 0.8887327542156361 }, { "epoch": 0.9334700264393244, "grad_norm": 0.49744802713394165, "learning_rate": 2.3124933134910489e-07, "loss": 0.3414700925350189, "step": 17300, "token_acc": 0.8792611251049538 }, { "epoch": 0.9335239842443209, "grad_norm": 0.4467427134513855, "learning_rate": 2.308758326501015e-07, "loss": 0.34890633821487427, "step": 17301, "token_acc": 0.8752115710109247 }, { "epoch": 0.9335779420493174, "grad_norm": 0.4034610986709595, "learning_rate": 2.3050263229759118e-07, "loss": 0.27090126276016235, "step": 17302, "token_acc": 0.8997502081598667 }, { "epoch": 0.9336318998543139, "grad_norm": 0.44021663069725037, "learning_rate": 2.3012973030297035e-07, "loss": 0.26946189999580383, "step": 17303, "token_acc": 0.9055131467345208 }, { "epoch": 0.9336858576593104, "grad_norm": 0.4288328289985657, "learning_rate": 2.2975712667762884e-07, "loss": 0.31505072116851807, "step": 17304, "token_acc": 0.8851340103212918 }, { "epoch": 0.933739815464307, "grad_norm": 0.5001468062400818, "learning_rate": 2.2938482143294527e-07, "loss": 0.289073646068573, "step": 17305, "token_acc": 0.8955444629321501 }, { "epoch": 0.9337937732693034, "grad_norm": 0.34921780228614807, "learning_rate": 2.2901281458028946e-07, "loss": 0.29373106360435486, "step": 17306, "token_acc": 0.8912397072278133 }, { "epoch": 0.9338477310742999, "grad_norm": 0.4492071866989136, "learning_rate": 2.286411061310212e-07, "loss": 0.3755279779434204, "step": 17307, "token_acc": 0.8663838812301167 }, { "epoch": 0.9339016888792964, "grad_norm": 0.37424221634864807, "learning_rate": 2.2826969609649585e-07, "loss": 0.3097667098045349, "step": 17308, "token_acc": 0.8884073672806068 }, { "epoch": 0.9339556466842929, "grad_norm": 0.5385515093803406, "learning_rate": 2.2789858448805323e-07, "loss": 0.28548163175582886, "step": 17309, "token_acc": 0.8971820258948972 }, { "epoch": 0.9340096044892894, "grad_norm": 0.5334557890892029, "learning_rate": 2.2752777131702875e-07, "loss": 0.2998238205909729, "step": 17310, "token_acc": 0.8856437679967092 }, { "epoch": 0.9340635622942859, "grad_norm": 0.4465148448944092, "learning_rate": 2.271572565947455e-07, "loss": 0.3737623989582062, "step": 17311, "token_acc": 0.8665347094585042 }, { "epoch": 0.9341175200992824, "grad_norm": 0.44081583619117737, "learning_rate": 2.2678704033251897e-07, "loss": 0.3237899839878082, "step": 17312, "token_acc": 0.8889508928571429 }, { "epoch": 0.9341714779042789, "grad_norm": 0.5364592671394348, "learning_rate": 2.264171225416567e-07, "loss": 0.34779876470565796, "step": 17313, "token_acc": 0.8725178111262695 }, { "epoch": 0.9342254357092753, "grad_norm": 0.3362738788127899, "learning_rate": 2.2604750323345526e-07, "loss": 0.28244900703430176, "step": 17314, "token_acc": 0.8928480204342273 }, { "epoch": 0.9342793935142718, "grad_norm": 0.32783493399620056, "learning_rate": 2.2567818241920226e-07, "loss": 0.35513007640838623, "step": 17315, "token_acc": 0.8748256182625238 }, { "epoch": 0.9343333513192683, "grad_norm": 0.40914255380630493, "learning_rate": 2.2530916011017757e-07, "loss": 0.3400266766548157, "step": 17316, "token_acc": 0.8788018433179724 }, { "epoch": 0.9343873091242648, "grad_norm": 0.3426402807235718, "learning_rate": 2.2494043631764995e-07, "loss": 0.2956709861755371, "step": 17317, "token_acc": 0.8979158369839373 }, { "epoch": 0.9344412669292613, "grad_norm": 0.39516082406044006, "learning_rate": 2.245720110528815e-07, "loss": 0.33016809821128845, "step": 17318, "token_acc": 0.8850819890681243 }, { "epoch": 0.9344952247342578, "grad_norm": 0.36429235339164734, "learning_rate": 2.2420388432712216e-07, "loss": 0.30671682953834534, "step": 17319, "token_acc": 0.8918661426700012 }, { "epoch": 0.9345491825392543, "grad_norm": 0.49514442682266235, "learning_rate": 2.2383605615161507e-07, "loss": 0.2975817620754242, "step": 17320, "token_acc": 0.8890401633764465 }, { "epoch": 0.9346031403442508, "grad_norm": 0.4085749387741089, "learning_rate": 2.2346852653759355e-07, "loss": 0.31165507435798645, "step": 17321, "token_acc": 0.8859550561797753 }, { "epoch": 0.9346570981492472, "grad_norm": 0.351073294878006, "learning_rate": 2.23101295496283e-07, "loss": 0.33327025175094604, "step": 17322, "token_acc": 0.8829654782116582 }, { "epoch": 0.9347110559542438, "grad_norm": 0.3923962116241455, "learning_rate": 2.2273436303889783e-07, "loss": 0.3045114576816559, "step": 17323, "token_acc": 0.888465204957102 }, { "epoch": 0.9347650137592403, "grad_norm": 0.38124844431877136, "learning_rate": 2.223677291766435e-07, "loss": 0.30825603008270264, "step": 17324, "token_acc": 0.8882242552193291 }, { "epoch": 0.9348189715642368, "grad_norm": 0.36864417791366577, "learning_rate": 2.2200139392071775e-07, "loss": 0.2926498055458069, "step": 17325, "token_acc": 0.893398146054714 }, { "epoch": 0.9348729293692333, "grad_norm": 0.46006521582603455, "learning_rate": 2.2163535728230712e-07, "loss": 0.26536479592323303, "step": 17326, "token_acc": 0.9021043000914913 }, { "epoch": 0.9349268871742298, "grad_norm": 0.3973214030265808, "learning_rate": 2.212696192725916e-07, "loss": 0.29205405712127686, "step": 17327, "token_acc": 0.894510582010582 }, { "epoch": 0.9349808449792263, "grad_norm": 0.3960897922515869, "learning_rate": 2.2090417990274116e-07, "loss": 0.3288455009460449, "step": 17328, "token_acc": 0.8898637004543318 }, { "epoch": 0.9350348027842227, "grad_norm": 0.3194219768047333, "learning_rate": 2.2053903918391462e-07, "loss": 0.29921504855155945, "step": 17329, "token_acc": 0.8906392694063927 }, { "epoch": 0.9350887605892192, "grad_norm": 0.3531222343444824, "learning_rate": 2.2017419712726307e-07, "loss": 0.33540982007980347, "step": 17330, "token_acc": 0.8806847215125192 }, { "epoch": 0.9351427183942157, "grad_norm": 0.4367055594921112, "learning_rate": 2.1980965374393093e-07, "loss": 0.38142460584640503, "step": 17331, "token_acc": 0.8677989130434782 }, { "epoch": 0.9351966761992122, "grad_norm": 0.46909791231155396, "learning_rate": 2.194454090450493e-07, "loss": 0.29835352301597595, "step": 17332, "token_acc": 0.8905706069320418 }, { "epoch": 0.9352506340042087, "grad_norm": 0.3798646032810211, "learning_rate": 2.190814630417415e-07, "loss": 0.3422137200832367, "step": 17333, "token_acc": 0.8765479382101367 }, { "epoch": 0.9353045918092052, "grad_norm": 0.42101961374282837, "learning_rate": 2.187178157451242e-07, "loss": 0.2962924540042877, "step": 17334, "token_acc": 0.89745727386411 }, { "epoch": 0.9353585496142017, "grad_norm": 0.49111422896385193, "learning_rate": 2.1835446716630293e-07, "loss": 0.3183416724205017, "step": 17335, "token_acc": 0.8872467618731319 }, { "epoch": 0.9354125074191982, "grad_norm": 0.36441168189048767, "learning_rate": 2.1799141731637442e-07, "loss": 0.2682778239250183, "step": 17336, "token_acc": 0.9047455884872366 }, { "epoch": 0.9354664652241946, "grad_norm": 0.37202444672584534, "learning_rate": 2.1762866620642532e-07, "loss": 0.31421029567718506, "step": 17337, "token_acc": 0.8853786496350365 }, { "epoch": 0.9355204230291911, "grad_norm": 0.41211068630218506, "learning_rate": 2.1726621384753342e-07, "loss": 0.3064021170139313, "step": 17338, "token_acc": 0.8831383138313832 }, { "epoch": 0.9355743808341876, "grad_norm": 0.3810352087020874, "learning_rate": 2.169040602507677e-07, "loss": 0.32154569029808044, "step": 17339, "token_acc": 0.8827771493212669 }, { "epoch": 0.9356283386391842, "grad_norm": 0.3846718966960907, "learning_rate": 2.165422054271904e-07, "loss": 0.29873335361480713, "step": 17340, "token_acc": 0.8904064324264787 }, { "epoch": 0.9356822964441807, "grad_norm": 0.47484055161476135, "learning_rate": 2.1618064938785156e-07, "loss": 0.35421085357666016, "step": 17341, "token_acc": 0.8762325739544373 }, { "epoch": 0.9357362542491772, "grad_norm": 0.43567967414855957, "learning_rate": 2.1581939214379234e-07, "loss": 0.30182960629463196, "step": 17342, "token_acc": 0.8925682507583418 }, { "epoch": 0.9357902120541737, "grad_norm": 0.4758235514163971, "learning_rate": 2.1545843370604503e-07, "loss": 0.3483541011810303, "step": 17343, "token_acc": 0.8752052545155994 }, { "epoch": 0.9358441698591702, "grad_norm": 0.4761309027671814, "learning_rate": 2.1509777408563525e-07, "loss": 0.3507717251777649, "step": 17344, "token_acc": 0.8715349279265009 }, { "epoch": 0.9358981276641666, "grad_norm": 0.396158903837204, "learning_rate": 2.1473741329357533e-07, "loss": 0.3405569791793823, "step": 17345, "token_acc": 0.8814407203601801 }, { "epoch": 0.9359520854691631, "grad_norm": 0.4047805368900299, "learning_rate": 2.1437735134087202e-07, "loss": 0.31745773553848267, "step": 17346, "token_acc": 0.8861952016078382 }, { "epoch": 0.9360060432741596, "grad_norm": 0.47279736399650574, "learning_rate": 2.1401758823852092e-07, "loss": 0.3183805048465729, "step": 17347, "token_acc": 0.8889583984985925 }, { "epoch": 0.9360600010791561, "grad_norm": 0.3917170464992523, "learning_rate": 2.1365812399750884e-07, "loss": 0.2909315228462219, "step": 17348, "token_acc": 0.8927510398098634 }, { "epoch": 0.9361139588841526, "grad_norm": 0.48809438943862915, "learning_rate": 2.1329895862881367e-07, "loss": 0.3527570366859436, "step": 17349, "token_acc": 0.8761300769126973 }, { "epoch": 0.9361679166891491, "grad_norm": 0.4239296615123749, "learning_rate": 2.129400921434044e-07, "loss": 0.28412240743637085, "step": 17350, "token_acc": 0.8972130656278094 }, { "epoch": 0.9362218744941456, "grad_norm": 0.345975399017334, "learning_rate": 2.1258152455224111e-07, "loss": 0.3030959963798523, "step": 17351, "token_acc": 0.8908879109500388 }, { "epoch": 0.936275832299142, "grad_norm": 0.3629370629787445, "learning_rate": 2.1222325586627402e-07, "loss": 0.29214853048324585, "step": 17352, "token_acc": 0.8940548780487805 }, { "epoch": 0.9363297901041385, "grad_norm": 0.3594500422477722, "learning_rate": 2.118652860964443e-07, "loss": 0.26531970500946045, "step": 17353, "token_acc": 0.9036264282165921 }, { "epoch": 0.936383747909135, "grad_norm": 0.4014480710029602, "learning_rate": 2.1150761525368546e-07, "loss": 0.34988313913345337, "step": 17354, "token_acc": 0.8762362637362637 }, { "epoch": 0.9364377057141315, "grad_norm": 0.4326767921447754, "learning_rate": 2.1115024334891875e-07, "loss": 0.32339537143707275, "step": 17355, "token_acc": 0.8840113762829233 }, { "epoch": 0.936491663519128, "grad_norm": 0.4184781610965729, "learning_rate": 2.107931703930599e-07, "loss": 0.28782418370246887, "step": 17356, "token_acc": 0.8942948161019404 }, { "epoch": 0.9365456213241246, "grad_norm": 0.44650959968566895, "learning_rate": 2.1043639639701353e-07, "loss": 0.32716959714889526, "step": 17357, "token_acc": 0.8779938916572898 }, { "epoch": 0.9365995791291211, "grad_norm": 0.5056502819061279, "learning_rate": 2.100799213716742e-07, "loss": 0.34780001640319824, "step": 17358, "token_acc": 0.8798628963153384 }, { "epoch": 0.9366535369341176, "grad_norm": 0.43670913577079773, "learning_rate": 2.0972374532792995e-07, "loss": 0.2870859205722809, "step": 17359, "token_acc": 0.8974543362577305 }, { "epoch": 0.936707494739114, "grad_norm": 0.31940382719039917, "learning_rate": 2.0936786827665867e-07, "loss": 0.3008403778076172, "step": 17360, "token_acc": 0.8877041742286751 }, { "epoch": 0.9367614525441105, "grad_norm": 0.5117673873901367, "learning_rate": 2.0901229022872726e-07, "loss": 0.3735688328742981, "step": 17361, "token_acc": 0.8657990484186958 }, { "epoch": 0.936815410349107, "grad_norm": 0.4023800492286682, "learning_rate": 2.0865701119499482e-07, "loss": 0.31338852643966675, "step": 17362, "token_acc": 0.8860694604639241 }, { "epoch": 0.9368693681541035, "grad_norm": 0.5103988647460938, "learning_rate": 2.0830203118631486e-07, "loss": 0.3300911486148834, "step": 17363, "token_acc": 0.8811015831134564 }, { "epoch": 0.9369233259591, "grad_norm": 0.5507033467292786, "learning_rate": 2.079473502135232e-07, "loss": 0.3462165594100952, "step": 17364, "token_acc": 0.8779358820504028 }, { "epoch": 0.9369772837640965, "grad_norm": 0.4079277813434601, "learning_rate": 2.0759296828745446e-07, "loss": 0.30135098099708557, "step": 17365, "token_acc": 0.8903605592347315 }, { "epoch": 0.937031241569093, "grad_norm": 0.3845241069793701, "learning_rate": 2.0723888541893223e-07, "loss": 0.31115418672561646, "step": 17366, "token_acc": 0.8900703674745896 }, { "epoch": 0.9370851993740895, "grad_norm": 0.4524913430213928, "learning_rate": 2.0688510161876785e-07, "loss": 0.36529541015625, "step": 17367, "token_acc": 0.8753372908796546 }, { "epoch": 0.9371391571790859, "grad_norm": 0.4770963788032532, "learning_rate": 2.0653161689776823e-07, "loss": 0.348553866147995, "step": 17368, "token_acc": 0.8733674987601256 }, { "epoch": 0.9371931149840824, "grad_norm": 0.4393032193183899, "learning_rate": 2.0617843126672698e-07, "loss": 0.3038293421268463, "step": 17369, "token_acc": 0.8918822691668644 }, { "epoch": 0.9372470727890789, "grad_norm": 0.4188483655452728, "learning_rate": 2.05825544736431e-07, "loss": 0.3291478753089905, "step": 17370, "token_acc": 0.8819763624425476 }, { "epoch": 0.9373010305940754, "grad_norm": 0.5187253952026367, "learning_rate": 2.0547295731765615e-07, "loss": 0.2840369641780853, "step": 17371, "token_acc": 0.8916876574307305 }, { "epoch": 0.9373549883990719, "grad_norm": 0.5184818506240845, "learning_rate": 2.0512066902117267e-07, "loss": 0.3668842017650604, "step": 17372, "token_acc": 0.8686310620012277 }, { "epoch": 0.9374089462040684, "grad_norm": 0.3553990423679352, "learning_rate": 2.047686798577375e-07, "loss": 0.24806389212608337, "step": 17373, "token_acc": 0.9062143206654706 }, { "epoch": 0.937462904009065, "grad_norm": 0.47672995924949646, "learning_rate": 2.04416989838101e-07, "loss": 0.3421747088432312, "step": 17374, "token_acc": 0.8858216269193074 }, { "epoch": 0.9375168618140614, "grad_norm": 0.4666317105293274, "learning_rate": 2.0406559897300336e-07, "loss": 0.34985601902008057, "step": 17375, "token_acc": 0.8767238479650185 }, { "epoch": 0.9375708196190579, "grad_norm": 0.3153911530971527, "learning_rate": 2.037145072731761e-07, "loss": 0.32781586050987244, "step": 17376, "token_acc": 0.8861117660929027 }, { "epoch": 0.9376247774240544, "grad_norm": 0.4006711542606354, "learning_rate": 2.033637147493428e-07, "loss": 0.3605300784111023, "step": 17377, "token_acc": 0.8717391304347826 }, { "epoch": 0.9376787352290509, "grad_norm": 0.33202388882637024, "learning_rate": 2.030132214122138e-07, "loss": 0.32087090611457825, "step": 17378, "token_acc": 0.8872712146422629 }, { "epoch": 0.9377326930340474, "grad_norm": 0.4380325675010681, "learning_rate": 2.0266302727249498e-07, "loss": 0.2973121404647827, "step": 17379, "token_acc": 0.8882183154027814 }, { "epoch": 0.9377866508390439, "grad_norm": 0.4304217994213104, "learning_rate": 2.0231313234088113e-07, "loss": 0.2803571820259094, "step": 17380, "token_acc": 0.8992542563669621 }, { "epoch": 0.9378406086440404, "grad_norm": 0.5013689994812012, "learning_rate": 2.0196353662805702e-07, "loss": 0.37386614084243774, "step": 17381, "token_acc": 0.8710503842869343 }, { "epoch": 0.9378945664490369, "grad_norm": 0.44663745164871216, "learning_rate": 2.0161424014469966e-07, "loss": 0.31734955310821533, "step": 17382, "token_acc": 0.8879674542325144 }, { "epoch": 0.9379485242540333, "grad_norm": 0.3974091112613678, "learning_rate": 2.012652429014772e-07, "loss": 0.280365526676178, "step": 17383, "token_acc": 0.8963810174120861 }, { "epoch": 0.9380024820590298, "grad_norm": 0.40868762135505676, "learning_rate": 2.0091654490904556e-07, "loss": 0.32769647240638733, "step": 17384, "token_acc": 0.8855501017598467 }, { "epoch": 0.9380564398640263, "grad_norm": 0.3725014626979828, "learning_rate": 2.005681461780573e-07, "loss": 0.3072751760482788, "step": 17385, "token_acc": 0.8867602196461257 }, { "epoch": 0.9381103976690228, "grad_norm": 0.5064095258712769, "learning_rate": 2.0022004671915062e-07, "loss": 0.3623533844947815, "step": 17386, "token_acc": 0.8715227989675939 }, { "epoch": 0.9381643554740193, "grad_norm": 0.5864655375480652, "learning_rate": 1.998722465429559e-07, "loss": 0.36138415336608887, "step": 17387, "token_acc": 0.8716392020815265 }, { "epoch": 0.9382183132790158, "grad_norm": 0.42341506481170654, "learning_rate": 1.9952474566009684e-07, "loss": 0.34209442138671875, "step": 17388, "token_acc": 0.8773072747014115 }, { "epoch": 0.9382722710840123, "grad_norm": 0.4610423743724823, "learning_rate": 1.9917754408118385e-07, "loss": 0.3301087021827698, "step": 17389, "token_acc": 0.8861033341817256 }, { "epoch": 0.9383262288890087, "grad_norm": 0.3218226730823517, "learning_rate": 1.9883064181682067e-07, "loss": 0.3290868401527405, "step": 17390, "token_acc": 0.877797625302091 }, { "epoch": 0.9383801866940052, "grad_norm": 0.49546656012535095, "learning_rate": 1.9848403887760215e-07, "loss": 0.3468766212463379, "step": 17391, "token_acc": 0.873031659207691 }, { "epoch": 0.9384341444990018, "grad_norm": 0.452488511800766, "learning_rate": 1.9813773527411316e-07, "loss": 0.33006519079208374, "step": 17392, "token_acc": 0.8814016172506739 }, { "epoch": 0.9384881023039983, "grad_norm": 0.3340819478034973, "learning_rate": 1.9779173101693084e-07, "loss": 0.3557548224925995, "step": 17393, "token_acc": 0.8766431701276434 }, { "epoch": 0.9385420601089948, "grad_norm": 0.376871258020401, "learning_rate": 1.974460261166211e-07, "loss": 0.33455830812454224, "step": 17394, "token_acc": 0.8812771698396042 }, { "epoch": 0.9385960179139913, "grad_norm": 0.3973521292209625, "learning_rate": 1.9710062058374224e-07, "loss": 0.3197845220565796, "step": 17395, "token_acc": 0.8800152120174938 }, { "epoch": 0.9386499757189878, "grad_norm": 0.380045086145401, "learning_rate": 1.9675551442884243e-07, "loss": 0.3143793046474457, "step": 17396, "token_acc": 0.8871135811045502 }, { "epoch": 0.9387039335239843, "grad_norm": 0.526103138923645, "learning_rate": 1.9641070766245996e-07, "loss": 0.35618406534194946, "step": 17397, "token_acc": 0.8764959310674965 }, { "epoch": 0.9387578913289807, "grad_norm": 0.4743744730949402, "learning_rate": 1.9606620029512747e-07, "loss": 0.36261922121047974, "step": 17398, "token_acc": 0.870314443062711 }, { "epoch": 0.9388118491339772, "grad_norm": 0.376703143119812, "learning_rate": 1.9572199233736434e-07, "loss": 0.32893747091293335, "step": 17399, "token_acc": 0.8826159865846842 }, { "epoch": 0.9388658069389737, "grad_norm": 0.4756292998790741, "learning_rate": 1.9537808379968325e-07, "loss": 0.2822659909725189, "step": 17400, "token_acc": 0.897054357728515 }, { "epoch": 0.9389197647439702, "grad_norm": 0.42976194620132446, "learning_rate": 1.9503447469258697e-07, "loss": 0.3161916732788086, "step": 17401, "token_acc": 0.8843441466854725 }, { "epoch": 0.9389737225489667, "grad_norm": 0.5664156079292297, "learning_rate": 1.9469116502656928e-07, "loss": 0.2882954180240631, "step": 17402, "token_acc": 0.8928247048138056 }, { "epoch": 0.9390276803539632, "grad_norm": 0.3025178611278534, "learning_rate": 1.94348154812114e-07, "loss": 0.3090032935142517, "step": 17403, "token_acc": 0.8896771904930827 }, { "epoch": 0.9390816381589597, "grad_norm": 0.3476795256137848, "learning_rate": 1.9400544405969834e-07, "loss": 0.27144941687583923, "step": 17404, "token_acc": 0.9006701771182384 }, { "epoch": 0.9391355959639562, "grad_norm": 0.40414944291114807, "learning_rate": 1.9366303277978837e-07, "loss": 0.34154847264289856, "step": 17405, "token_acc": 0.8801079414032382 }, { "epoch": 0.9391895537689526, "grad_norm": 0.40733349323272705, "learning_rate": 1.9332092098283905e-07, "loss": 0.3033404052257538, "step": 17406, "token_acc": 0.8848694085860102 }, { "epoch": 0.9392435115739491, "grad_norm": 0.4386099874973297, "learning_rate": 1.929791086793009e-07, "loss": 0.3019165098667145, "step": 17407, "token_acc": 0.888763197586727 }, { "epoch": 0.9392974693789456, "grad_norm": 0.40915533900260925, "learning_rate": 1.9263759587961118e-07, "loss": 0.36467477679252625, "step": 17408, "token_acc": 0.8707465718638903 }, { "epoch": 0.9393514271839422, "grad_norm": 0.3269152045249939, "learning_rate": 1.9229638259419925e-07, "loss": 0.30362966656684875, "step": 17409, "token_acc": 0.8884552436107265 }, { "epoch": 0.9394053849889387, "grad_norm": 0.3788778781890869, "learning_rate": 1.919554688334868e-07, "loss": 0.3099963068962097, "step": 17410, "token_acc": 0.8889578163771712 }, { "epoch": 0.9394593427939352, "grad_norm": 0.4605880379676819, "learning_rate": 1.9161485460788665e-07, "loss": 0.3412725329399109, "step": 17411, "token_acc": 0.8795747911921032 }, { "epoch": 0.9395133005989317, "grad_norm": 0.426680326461792, "learning_rate": 1.9127453992779822e-07, "loss": 0.3531666398048401, "step": 17412, "token_acc": 0.8835826620636748 }, { "epoch": 0.9395672584039281, "grad_norm": 0.47145727276802063, "learning_rate": 1.9093452480361542e-07, "loss": 0.3107316195964813, "step": 17413, "token_acc": 0.8860324606089326 }, { "epoch": 0.9396212162089246, "grad_norm": 0.2904377281665802, "learning_rate": 1.9059480924572327e-07, "loss": 0.2679455876350403, "step": 17414, "token_acc": 0.8972390373542005 }, { "epoch": 0.9396751740139211, "grad_norm": 0.4186665713787079, "learning_rate": 1.902553932644957e-07, "loss": 0.2698346674442291, "step": 17415, "token_acc": 0.8997433187377322 }, { "epoch": 0.9397291318189176, "grad_norm": 0.4104383587837219, "learning_rate": 1.8991627687029778e-07, "loss": 0.3563011586666107, "step": 17416, "token_acc": 0.8683229813664596 }, { "epoch": 0.9397830896239141, "grad_norm": 0.37362170219421387, "learning_rate": 1.8957746007348787e-07, "loss": 0.31669166684150696, "step": 17417, "token_acc": 0.8883261585211446 }, { "epoch": 0.9398370474289106, "grad_norm": 0.4336630403995514, "learning_rate": 1.8923894288441213e-07, "loss": 0.33354824781417847, "step": 17418, "token_acc": 0.8836877181847033 }, { "epoch": 0.9398910052339071, "grad_norm": 0.3778406083583832, "learning_rate": 1.8890072531340898e-07, "loss": 0.28031042218208313, "step": 17419, "token_acc": 0.8950112572579689 }, { "epoch": 0.9399449630389036, "grad_norm": 0.4416219890117645, "learning_rate": 1.885628073708079e-07, "loss": 0.3815421760082245, "step": 17420, "token_acc": 0.870076726342711 }, { "epoch": 0.9399989208439, "grad_norm": 0.45423611998558044, "learning_rate": 1.8822518906692733e-07, "loss": 0.39063575863838196, "step": 17421, "token_acc": 0.859662980516061 }, { "epoch": 0.9400528786488965, "grad_norm": 0.34642937779426575, "learning_rate": 1.8788787041208013e-07, "loss": 0.3226143419742584, "step": 17422, "token_acc": 0.8884620856451843 }, { "epoch": 0.940106836453893, "grad_norm": 0.30197036266326904, "learning_rate": 1.8755085141656583e-07, "loss": 0.30317550897598267, "step": 17423, "token_acc": 0.8919942756124253 }, { "epoch": 0.9401607942588895, "grad_norm": 0.3486687242984772, "learning_rate": 1.872141320906795e-07, "loss": 0.3274180293083191, "step": 17424, "token_acc": 0.8854842585916847 }, { "epoch": 0.940214752063886, "grad_norm": 0.43657565116882324, "learning_rate": 1.8687771244470188e-07, "loss": 0.3162328004837036, "step": 17425, "token_acc": 0.8851037472901827 }, { "epoch": 0.9402687098688826, "grad_norm": 0.43560224771499634, "learning_rate": 1.8654159248890912e-07, "loss": 0.3116462826728821, "step": 17426, "token_acc": 0.891935745578452 }, { "epoch": 0.9403226676738791, "grad_norm": 0.38931968808174133, "learning_rate": 1.8620577223356527e-07, "loss": 0.284761905670166, "step": 17427, "token_acc": 0.8934262948207171 }, { "epoch": 0.9403766254788756, "grad_norm": 0.4512755274772644, "learning_rate": 1.8587025168892548e-07, "loss": 0.3123561143875122, "step": 17428, "token_acc": 0.8855072463768116 }, { "epoch": 0.940430583283872, "grad_norm": 0.5087790489196777, "learning_rate": 1.8553503086523818e-07, "loss": 0.3379274606704712, "step": 17429, "token_acc": 0.8792900952629519 }, { "epoch": 0.9404845410888685, "grad_norm": 0.4169507622718811, "learning_rate": 1.8520010977273962e-07, "loss": 0.2945249676704407, "step": 17430, "token_acc": 0.8923122500370315 }, { "epoch": 0.940538498893865, "grad_norm": 0.5172280669212341, "learning_rate": 1.848654884216583e-07, "loss": 0.3826178312301636, "step": 17431, "token_acc": 0.8722547584187409 }, { "epoch": 0.9405924566988615, "grad_norm": 0.4155057370662689, "learning_rate": 1.8453116682221384e-07, "loss": 0.3500361442565918, "step": 17432, "token_acc": 0.874323279195669 }, { "epoch": 0.940646414503858, "grad_norm": 0.3287484645843506, "learning_rate": 1.8419714498461694e-07, "loss": 0.28966641426086426, "step": 17433, "token_acc": 0.8976812661023187 }, { "epoch": 0.9407003723088545, "grad_norm": 0.4180309474468231, "learning_rate": 1.8386342291906723e-07, "loss": 0.2866117060184479, "step": 17434, "token_acc": 0.8995952677459527 }, { "epoch": 0.940754330113851, "grad_norm": 0.4450474977493286, "learning_rate": 1.8353000063575654e-07, "loss": 0.3329660892486572, "step": 17435, "token_acc": 0.8814783347493628 }, { "epoch": 0.9408082879188474, "grad_norm": 0.40242400765419006, "learning_rate": 1.8319687814486786e-07, "loss": 0.3231089413166046, "step": 17436, "token_acc": 0.8822665883734586 }, { "epoch": 0.9408622457238439, "grad_norm": 0.3462265133857727, "learning_rate": 1.8286405545657638e-07, "loss": 0.28755974769592285, "step": 17437, "token_acc": 0.8927881519639408 }, { "epoch": 0.9409162035288404, "grad_norm": 0.4229469299316406, "learning_rate": 1.8253153258104393e-07, "loss": 0.33689242601394653, "step": 17438, "token_acc": 0.8839590443686007 }, { "epoch": 0.9409701613338369, "grad_norm": 0.3917606472969055, "learning_rate": 1.8219930952842692e-07, "loss": 0.3218390941619873, "step": 17439, "token_acc": 0.883964861675626 }, { "epoch": 0.9410241191388334, "grad_norm": 0.4687226712703705, "learning_rate": 1.8186738630887156e-07, "loss": 0.33330661058425903, "step": 17440, "token_acc": 0.8809778729478943 }, { "epoch": 0.9410780769438299, "grad_norm": 0.4274371564388275, "learning_rate": 1.8153576293251318e-07, "loss": 0.31399157643318176, "step": 17441, "token_acc": 0.8873216063723863 }, { "epoch": 0.9411320347488265, "grad_norm": 0.3520415425300598, "learning_rate": 1.8120443940948028e-07, "loss": 0.36356884241104126, "step": 17442, "token_acc": 0.8718344260187685 }, { "epoch": 0.941185992553823, "grad_norm": 0.40358951687812805, "learning_rate": 1.808734157498926e-07, "loss": 0.30114713311195374, "step": 17443, "token_acc": 0.8913453083783445 }, { "epoch": 0.9412399503588194, "grad_norm": 0.49583691358566284, "learning_rate": 1.8054269196385866e-07, "loss": 0.36351820826530457, "step": 17444, "token_acc": 0.870064495831367 }, { "epoch": 0.9412939081638159, "grad_norm": 0.4320010542869568, "learning_rate": 1.8021226806147928e-07, "loss": 0.28423240780830383, "step": 17445, "token_acc": 0.8948160306387635 }, { "epoch": 0.9413478659688124, "grad_norm": 0.45041099190711975, "learning_rate": 1.798821440528431e-07, "loss": 0.3164522647857666, "step": 17446, "token_acc": 0.8867472764276307 }, { "epoch": 0.9414018237738089, "grad_norm": 0.3463486433029175, "learning_rate": 1.7955231994803534e-07, "loss": 0.2638803720474243, "step": 17447, "token_acc": 0.9045055256446586 }, { "epoch": 0.9414557815788054, "grad_norm": 0.38425344228744507, "learning_rate": 1.7922279575712576e-07, "loss": 0.3169899582862854, "step": 17448, "token_acc": 0.8870830977953647 }, { "epoch": 0.9415097393838019, "grad_norm": 0.522255539894104, "learning_rate": 1.788935714901785e-07, "loss": 0.31110841035842896, "step": 17449, "token_acc": 0.8838323353293414 }, { "epoch": 0.9415636971887984, "grad_norm": 0.5506249070167542, "learning_rate": 1.7856464715724997e-07, "loss": 0.3413182497024536, "step": 17450, "token_acc": 0.8800432276657061 }, { "epoch": 0.9416176549937949, "grad_norm": 0.44993695616722107, "learning_rate": 1.7823602276838436e-07, "loss": 0.2892610430717468, "step": 17451, "token_acc": 0.8918961447678992 }, { "epoch": 0.9416716127987913, "grad_norm": 0.44658857583999634, "learning_rate": 1.7790769833361808e-07, "loss": 0.36005473136901855, "step": 17452, "token_acc": 0.8754189130118024 }, { "epoch": 0.9417255706037878, "grad_norm": 0.5180779695510864, "learning_rate": 1.7757967386297759e-07, "loss": 0.3555973768234253, "step": 17453, "token_acc": 0.8770888645947212 }, { "epoch": 0.9417795284087843, "grad_norm": 0.4083331823348999, "learning_rate": 1.7725194936648038e-07, "loss": 0.3560425043106079, "step": 17454, "token_acc": 0.876306900434293 }, { "epoch": 0.9418334862137808, "grad_norm": 0.48514264822006226, "learning_rate": 1.7692452485413402e-07, "loss": 0.3308366537094116, "step": 17455, "token_acc": 0.8822571090047393 }, { "epoch": 0.9418874440187773, "grad_norm": 0.4617466628551483, "learning_rate": 1.7659740033594052e-07, "loss": 0.35897478461265564, "step": 17456, "token_acc": 0.8718513853904282 }, { "epoch": 0.9419414018237738, "grad_norm": 0.5212405323982239, "learning_rate": 1.7627057582188967e-07, "loss": 0.3186614215373993, "step": 17457, "token_acc": 0.878633080852563 }, { "epoch": 0.9419953596287703, "grad_norm": 0.41043180227279663, "learning_rate": 1.7594405132196125e-07, "loss": 0.29719215631484985, "step": 17458, "token_acc": 0.893352694417554 }, { "epoch": 0.9420493174337667, "grad_norm": 0.5946293473243713, "learning_rate": 1.7561782684612838e-07, "loss": 0.3306364417076111, "step": 17459, "token_acc": 0.8836662749706228 }, { "epoch": 0.9421032752387632, "grad_norm": 0.39327362179756165, "learning_rate": 1.7529190240435311e-07, "loss": 0.2713759243488312, "step": 17460, "token_acc": 0.8962637362637362 }, { "epoch": 0.9421572330437598, "grad_norm": 0.4414583146572113, "learning_rate": 1.7496627800658971e-07, "loss": 0.32660406827926636, "step": 17461, "token_acc": 0.8837471783295711 }, { "epoch": 0.9422111908487563, "grad_norm": 0.39903900027275085, "learning_rate": 1.7464095366278245e-07, "loss": 0.3192984461784363, "step": 17462, "token_acc": 0.8858749121574139 }, { "epoch": 0.9422651486537528, "grad_norm": 0.4527902901172638, "learning_rate": 1.7431592938286667e-07, "loss": 0.3769182562828064, "step": 17463, "token_acc": 0.8679692125518058 }, { "epoch": 0.9423191064587493, "grad_norm": 0.4578252136707306, "learning_rate": 1.7399120517676782e-07, "loss": 0.31252652406692505, "step": 17464, "token_acc": 0.8848961257720381 }, { "epoch": 0.9423730642637458, "grad_norm": 0.31946882605552673, "learning_rate": 1.736667810544046e-07, "loss": 0.2948652505874634, "step": 17465, "token_acc": 0.8927407407407407 }, { "epoch": 0.9424270220687423, "grad_norm": 0.376298189163208, "learning_rate": 1.7334265702568243e-07, "loss": 0.3368866443634033, "step": 17466, "token_acc": 0.8785046728971962 }, { "epoch": 0.9424809798737387, "grad_norm": 0.414242148399353, "learning_rate": 1.7301883310050226e-07, "loss": 0.311786413192749, "step": 17467, "token_acc": 0.8870461676378306 }, { "epoch": 0.9425349376787352, "grad_norm": 0.3746770918369293, "learning_rate": 1.7269530928875065e-07, "loss": 0.3078569769859314, "step": 17468, "token_acc": 0.8904023205952831 }, { "epoch": 0.9425888954837317, "grad_norm": 0.42682087421417236, "learning_rate": 1.7237208560031194e-07, "loss": 0.3064207434654236, "step": 17469, "token_acc": 0.8946784922394678 }, { "epoch": 0.9426428532887282, "grad_norm": 0.41428592801094055, "learning_rate": 1.7204916204505483e-07, "loss": 0.3166738748550415, "step": 17470, "token_acc": 0.8876177658142664 }, { "epoch": 0.9426968110937247, "grad_norm": 0.3633944094181061, "learning_rate": 1.717265386328415e-07, "loss": 0.2880493998527527, "step": 17471, "token_acc": 0.897378277153558 }, { "epoch": 0.9427507688987212, "grad_norm": 0.43996095657348633, "learning_rate": 1.7140421537352515e-07, "loss": 0.3493466377258301, "step": 17472, "token_acc": 0.8746410426330904 }, { "epoch": 0.9428047267037177, "grad_norm": 0.4840207099914551, "learning_rate": 1.71082192276949e-07, "loss": 0.32735878229141235, "step": 17473, "token_acc": 0.8797977906759034 }, { "epoch": 0.9428586845087142, "grad_norm": 0.47454410791397095, "learning_rate": 1.7076046935294744e-07, "loss": 0.39282840490341187, "step": 17474, "token_acc": 0.8656177156177156 }, { "epoch": 0.9429126423137106, "grad_norm": 0.31216001510620117, "learning_rate": 1.7043904661134703e-07, "loss": 0.295259028673172, "step": 17475, "token_acc": 0.8937213516682656 }, { "epoch": 0.9429666001187071, "grad_norm": 0.44912487268447876, "learning_rate": 1.7011792406196327e-07, "loss": 0.272612601518631, "step": 17476, "token_acc": 0.9012640929279125 }, { "epoch": 0.9430205579237037, "grad_norm": 0.3665664494037628, "learning_rate": 1.6979710171460272e-07, "loss": 0.3450935482978821, "step": 17477, "token_acc": 0.8772632828732562 }, { "epoch": 0.9430745157287002, "grad_norm": 0.2903085947036743, "learning_rate": 1.6947657957906426e-07, "loss": 0.32408732175827026, "step": 17478, "token_acc": 0.8833277348561192 }, { "epoch": 0.9431284735336967, "grad_norm": 0.4170570373535156, "learning_rate": 1.691563576651345e-07, "loss": 0.394296258687973, "step": 17479, "token_acc": 0.8667633104902478 }, { "epoch": 0.9431824313386932, "grad_norm": 0.3696736991405487, "learning_rate": 1.6883643598259669e-07, "loss": 0.30751389265060425, "step": 17480, "token_acc": 0.8873143080531666 }, { "epoch": 0.9432363891436897, "grad_norm": 0.3601130545139313, "learning_rate": 1.6851681454121526e-07, "loss": 0.33710771799087524, "step": 17481, "token_acc": 0.8799186700155484 }, { "epoch": 0.9432903469486861, "grad_norm": 0.45314180850982666, "learning_rate": 1.6819749335075687e-07, "loss": 0.29729020595550537, "step": 17482, "token_acc": 0.8892355694227769 }, { "epoch": 0.9433443047536826, "grad_norm": 0.44040513038635254, "learning_rate": 1.6787847242097144e-07, "loss": 0.33761364221572876, "step": 17483, "token_acc": 0.8831908831908832 }, { "epoch": 0.9433982625586791, "grad_norm": 0.3674314022064209, "learning_rate": 1.6755975176160123e-07, "loss": 0.36523833870887756, "step": 17484, "token_acc": 0.8694774046957839 }, { "epoch": 0.9434522203636756, "grad_norm": 0.42073628306388855, "learning_rate": 1.6724133138238063e-07, "loss": 0.24691548943519592, "step": 17485, "token_acc": 0.9044003451251078 }, { "epoch": 0.9435061781686721, "grad_norm": 0.5366037487983704, "learning_rate": 1.6692321129303413e-07, "loss": 0.37002769112586975, "step": 17486, "token_acc": 0.8698296836982968 }, { "epoch": 0.9435601359736686, "grad_norm": 0.472929447889328, "learning_rate": 1.6660539150327616e-07, "loss": 0.3784949779510498, "step": 17487, "token_acc": 0.8673551502145923 }, { "epoch": 0.9436140937786651, "grad_norm": 0.3836878538131714, "learning_rate": 1.6628787202281338e-07, "loss": 0.3349524140357971, "step": 17488, "token_acc": 0.878868505811511 }, { "epoch": 0.9436680515836616, "grad_norm": 0.37998220324516296, "learning_rate": 1.6597065286134363e-07, "loss": 0.34616196155548096, "step": 17489, "token_acc": 0.8816286324084716 }, { "epoch": 0.943722009388658, "grad_norm": 0.5157719254493713, "learning_rate": 1.6565373402855467e-07, "loss": 0.27833518385887146, "step": 17490, "token_acc": 0.8968739381583418 }, { "epoch": 0.9437759671936545, "grad_norm": 0.321178138256073, "learning_rate": 1.6533711553412324e-07, "loss": 0.3456049859523773, "step": 17491, "token_acc": 0.8735182695832824 }, { "epoch": 0.943829924998651, "grad_norm": 0.48551714420318604, "learning_rate": 1.6502079738772047e-07, "loss": 0.32895803451538086, "step": 17492, "token_acc": 0.8856469735411381 }, { "epoch": 0.9438838828036475, "grad_norm": 0.37136730551719666, "learning_rate": 1.6470477959900644e-07, "loss": 0.3070054352283478, "step": 17493, "token_acc": 0.8902849740932642 }, { "epoch": 0.943937840608644, "grad_norm": 0.4488198459148407, "learning_rate": 1.6438906217763116e-07, "loss": 0.32658565044403076, "step": 17494, "token_acc": 0.8810384488991128 }, { "epoch": 0.9439917984136406, "grad_norm": 0.4089326858520508, "learning_rate": 1.640736451332381e-07, "loss": 0.3886128067970276, "step": 17495, "token_acc": 0.8651996310449335 }, { "epoch": 0.9440457562186371, "grad_norm": 0.31613922119140625, "learning_rate": 1.6375852847545948e-07, "loss": 0.2817307114601135, "step": 17496, "token_acc": 0.898221829256312 }, { "epoch": 0.9440997140236336, "grad_norm": 0.4508286714553833, "learning_rate": 1.6344371221391765e-07, "loss": 0.39086830615997314, "step": 17497, "token_acc": 0.8704675572519084 }, { "epoch": 0.94415367182863, "grad_norm": 0.5338336229324341, "learning_rate": 1.631291963582282e-07, "loss": 0.3284161686897278, "step": 17498, "token_acc": 0.877754301237549 }, { "epoch": 0.9442076296336265, "grad_norm": 0.3919535279273987, "learning_rate": 1.6281498091799686e-07, "loss": 0.3359353840351105, "step": 17499, "token_acc": 0.8808358682713927 }, { "epoch": 0.944261587438623, "grad_norm": 0.4434431493282318, "learning_rate": 1.6250106590281812e-07, "loss": 0.33319324254989624, "step": 17500, "token_acc": 0.8793291518952823 }, { "epoch": 0.9443155452436195, "grad_norm": 0.5662948489189148, "learning_rate": 1.6218745132227985e-07, "loss": 0.34760797023773193, "step": 17501, "token_acc": 0.8785379568884724 }, { "epoch": 0.944369503048616, "grad_norm": 0.5038936138153076, "learning_rate": 1.6187413718596e-07, "loss": 0.3340678811073303, "step": 17502, "token_acc": 0.8820347902353457 }, { "epoch": 0.9444234608536125, "grad_norm": 0.33401235938072205, "learning_rate": 1.6156112350342757e-07, "loss": 0.30887001752853394, "step": 17503, "token_acc": 0.8909155937052933 }, { "epoch": 0.944477418658609, "grad_norm": 0.4097648859024048, "learning_rate": 1.6124841028424043e-07, "loss": 0.34689608216285706, "step": 17504, "token_acc": 0.8791445623342176 }, { "epoch": 0.9445313764636054, "grad_norm": 0.38606154918670654, "learning_rate": 1.6093599753794874e-07, "loss": 0.2900242209434509, "step": 17505, "token_acc": 0.8966883668270591 }, { "epoch": 0.9445853342686019, "grad_norm": 0.37268924713134766, "learning_rate": 1.606238852740949e-07, "loss": 0.27884113788604736, "step": 17506, "token_acc": 0.8967382119317222 }, { "epoch": 0.9446392920735984, "grad_norm": 0.5199105739593506, "learning_rate": 1.60312073502209e-07, "loss": 0.3181290328502655, "step": 17507, "token_acc": 0.8855623100303951 }, { "epoch": 0.9446932498785949, "grad_norm": 0.3687945604324341, "learning_rate": 1.6000056223181572e-07, "loss": 0.3385559022426605, "step": 17508, "token_acc": 0.8794523867215895 }, { "epoch": 0.9447472076835914, "grad_norm": 0.3710883855819702, "learning_rate": 1.5968935147242847e-07, "loss": 0.3261098563671112, "step": 17509, "token_acc": 0.882891007472301 }, { "epoch": 0.944801165488588, "grad_norm": 0.4165804982185364, "learning_rate": 1.5937844123354972e-07, "loss": 0.27094680070877075, "step": 17510, "token_acc": 0.8988391376451078 }, { "epoch": 0.9448551232935845, "grad_norm": 0.4796081781387329, "learning_rate": 1.590678315246752e-07, "loss": 0.31267261505126953, "step": 17511, "token_acc": 0.8845783905097788 }, { "epoch": 0.944909081098581, "grad_norm": 0.49132877588272095, "learning_rate": 1.5875752235529063e-07, "loss": 0.36553189158439636, "step": 17512, "token_acc": 0.8811016144349477 }, { "epoch": 0.9449630389035774, "grad_norm": 0.540351390838623, "learning_rate": 1.58447513734874e-07, "loss": 0.31370043754577637, "step": 17513, "token_acc": 0.8861884009386524 }, { "epoch": 0.9450169967085739, "grad_norm": 0.33996903896331787, "learning_rate": 1.5813780567289218e-07, "loss": 0.2893613576889038, "step": 17514, "token_acc": 0.8957580139792721 }, { "epoch": 0.9450709545135704, "grad_norm": 0.5759543776512146, "learning_rate": 1.5782839817880425e-07, "loss": 0.33882594108581543, "step": 17515, "token_acc": 0.8800243605359318 }, { "epoch": 0.9451249123185669, "grad_norm": 0.2618592083454132, "learning_rate": 1.5751929126205822e-07, "loss": 0.27445149421691895, "step": 17516, "token_acc": 0.8938356164383562 }, { "epoch": 0.9451788701235634, "grad_norm": 0.40542811155319214, "learning_rate": 1.5721048493209435e-07, "loss": 0.29538315534591675, "step": 17517, "token_acc": 0.891357125515902 }, { "epoch": 0.9452328279285599, "grad_norm": 0.44017493724823, "learning_rate": 1.5690197919834392e-07, "loss": 0.30014875531196594, "step": 17518, "token_acc": 0.8900144717800289 }, { "epoch": 0.9452867857335564, "grad_norm": 0.44251614809036255, "learning_rate": 1.5659377407022724e-07, "loss": 0.32013005018234253, "step": 17519, "token_acc": 0.8844251236696148 }, { "epoch": 0.9453407435385528, "grad_norm": 0.4138110280036926, "learning_rate": 1.5628586955716006e-07, "loss": 0.32347479462623596, "step": 17520, "token_acc": 0.8817864851402956 }, { "epoch": 0.9453947013435493, "grad_norm": 0.3943823277950287, "learning_rate": 1.5597826566854267e-07, "loss": 0.33598828315734863, "step": 17521, "token_acc": 0.8790686623901164 }, { "epoch": 0.9454486591485458, "grad_norm": 0.4096502959728241, "learning_rate": 1.556709624137709e-07, "loss": 0.31357714533805847, "step": 17522, "token_acc": 0.8848959024002122 }, { "epoch": 0.9455026169535423, "grad_norm": 0.4492526352405548, "learning_rate": 1.553639598022283e-07, "loss": 0.3520008623600006, "step": 17523, "token_acc": 0.8755847953216375 }, { "epoch": 0.9455565747585388, "grad_norm": 0.37728214263916016, "learning_rate": 1.5505725784329183e-07, "loss": 0.3490879535675049, "step": 17524, "token_acc": 0.8754289636238847 }, { "epoch": 0.9456105325635353, "grad_norm": 0.4419466555118561, "learning_rate": 1.5475085654632848e-07, "loss": 0.3037518560886383, "step": 17525, "token_acc": 0.8900785153461813 }, { "epoch": 0.9456644903685318, "grad_norm": 0.4479590654373169, "learning_rate": 1.5444475592069296e-07, "loss": 0.32917851209640503, "step": 17526, "token_acc": 0.8800576142464318 }, { "epoch": 0.9457184481735283, "grad_norm": 0.5685207843780518, "learning_rate": 1.541389559757378e-07, "loss": 0.3878932297229767, "step": 17527, "token_acc": 0.8718381112984823 }, { "epoch": 0.9457724059785247, "grad_norm": 0.5491170287132263, "learning_rate": 1.5383345672079776e-07, "loss": 0.31745877861976624, "step": 17528, "token_acc": 0.8858807402330363 }, { "epoch": 0.9458263637835213, "grad_norm": 0.4281232953071594, "learning_rate": 1.5352825816520534e-07, "loss": 0.33810555934906006, "step": 17529, "token_acc": 0.8781524926686217 }, { "epoch": 0.9458803215885178, "grad_norm": 0.42493724822998047, "learning_rate": 1.532233603182809e-07, "loss": 0.297648161649704, "step": 17530, "token_acc": 0.8917793964620188 }, { "epoch": 0.9459342793935143, "grad_norm": 0.4031605124473572, "learning_rate": 1.5291876318933474e-07, "loss": 0.30489492416381836, "step": 17531, "token_acc": 0.889310391545861 }, { "epoch": 0.9459882371985108, "grad_norm": 0.5013189911842346, "learning_rate": 1.5261446678766945e-07, "loss": 0.2990437150001526, "step": 17532, "token_acc": 0.891121288225735 }, { "epoch": 0.9460421950035073, "grad_norm": 0.4782646894454956, "learning_rate": 1.5231047112257978e-07, "loss": 0.376187801361084, "step": 17533, "token_acc": 0.8689402049243004 }, { "epoch": 0.9460961528085038, "grad_norm": 0.5544544458389282, "learning_rate": 1.5200677620334836e-07, "loss": 0.41092681884765625, "step": 17534, "token_acc": 0.8571865443425076 }, { "epoch": 0.9461501106135003, "grad_norm": 0.5006328225135803, "learning_rate": 1.5170338203924995e-07, "loss": 0.3766046166419983, "step": 17535, "token_acc": 0.8692191053828658 }, { "epoch": 0.9462040684184967, "grad_norm": 0.4760633707046509, "learning_rate": 1.5140028863955048e-07, "loss": 0.3902345299720764, "step": 17536, "token_acc": 0.8642055618799804 }, { "epoch": 0.9462580262234932, "grad_norm": 0.501794695854187, "learning_rate": 1.51097496013507e-07, "loss": 0.33536866307258606, "step": 17537, "token_acc": 0.8814658951429073 }, { "epoch": 0.9463119840284897, "grad_norm": 0.3030371069908142, "learning_rate": 1.5079500417036541e-07, "loss": 0.2992735505104065, "step": 17538, "token_acc": 0.8943107463679716 }, { "epoch": 0.9463659418334862, "grad_norm": 0.5251502394676208, "learning_rate": 1.504928131193628e-07, "loss": 0.32359278202056885, "step": 17539, "token_acc": 0.884175927375176 }, { "epoch": 0.9464198996384827, "grad_norm": 0.34659576416015625, "learning_rate": 1.5019092286973068e-07, "loss": 0.3415972888469696, "step": 17540, "token_acc": 0.8791635548917103 }, { "epoch": 0.9464738574434792, "grad_norm": 0.5369040966033936, "learning_rate": 1.4988933343068723e-07, "loss": 0.3039635121822357, "step": 17541, "token_acc": 0.8915289256198347 }, { "epoch": 0.9465278152484757, "grad_norm": 0.3899606466293335, "learning_rate": 1.4958804481144396e-07, "loss": 0.29521444439888, "step": 17542, "token_acc": 0.8910143910143911 }, { "epoch": 0.9465817730534721, "grad_norm": 0.4247800409793854, "learning_rate": 1.492870570212013e-07, "loss": 0.33253467082977295, "step": 17543, "token_acc": 0.8812204458455303 }, { "epoch": 0.9466357308584686, "grad_norm": 0.3983880877494812, "learning_rate": 1.4898637006915074e-07, "loss": 0.3238305151462555, "step": 17544, "token_acc": 0.8861777346457784 }, { "epoch": 0.9466896886634651, "grad_norm": 0.534151554107666, "learning_rate": 1.4868598396447608e-07, "loss": 0.3364177942276001, "step": 17545, "token_acc": 0.8795425198302896 }, { "epoch": 0.9467436464684617, "grad_norm": 0.4747922718524933, "learning_rate": 1.4838589871635002e-07, "loss": 0.32265329360961914, "step": 17546, "token_acc": 0.8862359550561798 }, { "epoch": 0.9467976042734582, "grad_norm": 0.4577009081840515, "learning_rate": 1.480861143339385e-07, "loss": 0.3332250118255615, "step": 17547, "token_acc": 0.8797104139595322 }, { "epoch": 0.9468515620784547, "grad_norm": 0.3719142973423004, "learning_rate": 1.4778663082639643e-07, "loss": 0.3515353798866272, "step": 17548, "token_acc": 0.8737074080352603 }, { "epoch": 0.9469055198834512, "grad_norm": 0.5400019884109497, "learning_rate": 1.4748744820286986e-07, "loss": 0.34943708777427673, "step": 17549, "token_acc": 0.8744332054412277 }, { "epoch": 0.9469594776884477, "grad_norm": 0.5004376769065857, "learning_rate": 1.471885664724948e-07, "loss": 0.34112221002578735, "step": 17550, "token_acc": 0.8771373414230557 }, { "epoch": 0.9470134354934441, "grad_norm": 0.5065428018569946, "learning_rate": 1.468899856444006e-07, "loss": 0.33885982632637024, "step": 17551, "token_acc": 0.8794526061581808 }, { "epoch": 0.9470673932984406, "grad_norm": 0.3693625032901764, "learning_rate": 1.4659170572770444e-07, "loss": 0.3660283386707306, "step": 17552, "token_acc": 0.871724289804008 }, { "epoch": 0.9471213511034371, "grad_norm": 0.5111552476882935, "learning_rate": 1.4629372673151675e-07, "loss": 0.3035339415073395, "step": 17553, "token_acc": 0.8883818216390948 }, { "epoch": 0.9471753089084336, "grad_norm": 0.4723781943321228, "learning_rate": 1.45996048664937e-07, "loss": 0.33864593505859375, "step": 17554, "token_acc": 0.8787416363422937 }, { "epoch": 0.9472292667134301, "grad_norm": 0.42379409074783325, "learning_rate": 1.4569867153705675e-07, "loss": 0.3390180468559265, "step": 17555, "token_acc": 0.8805371199259145 }, { "epoch": 0.9472832245184266, "grad_norm": 0.39211875200271606, "learning_rate": 1.4540159535695764e-07, "loss": 0.36522871255874634, "step": 17556, "token_acc": 0.8673363864677205 }, { "epoch": 0.9473371823234231, "grad_norm": 0.4074522852897644, "learning_rate": 1.4510482013371242e-07, "loss": 0.3011048436164856, "step": 17557, "token_acc": 0.8980996970531534 }, { "epoch": 0.9473911401284196, "grad_norm": 0.3452428877353668, "learning_rate": 1.448083458763827e-07, "loss": 0.3563157916069031, "step": 17558, "token_acc": 0.8709943449575872 }, { "epoch": 0.947445097933416, "grad_norm": 0.5154284238815308, "learning_rate": 1.4451217259402572e-07, "loss": 0.33277013897895813, "step": 17559, "token_acc": 0.8819845857418112 }, { "epoch": 0.9474990557384125, "grad_norm": 0.4346383810043335, "learning_rate": 1.4421630029568645e-07, "loss": 0.3223831057548523, "step": 17560, "token_acc": 0.88510017248242 }, { "epoch": 0.947553013543409, "grad_norm": 0.5093342065811157, "learning_rate": 1.4392072899039767e-07, "loss": 0.3302716612815857, "step": 17561, "token_acc": 0.8812957157784744 }, { "epoch": 0.9476069713484055, "grad_norm": 0.3483836352825165, "learning_rate": 1.436254586871899e-07, "loss": 0.30700552463531494, "step": 17562, "token_acc": 0.8929043631555752 }, { "epoch": 0.947660929153402, "grad_norm": 0.28348276019096375, "learning_rate": 1.433304893950771e-07, "loss": 0.32213836908340454, "step": 17563, "token_acc": 0.8829376035339591 }, { "epoch": 0.9477148869583986, "grad_norm": 0.48867344856262207, "learning_rate": 1.430358211230687e-07, "loss": 0.35046008229255676, "step": 17564, "token_acc": 0.8747380299854909 }, { "epoch": 0.9477688447633951, "grad_norm": 0.32966047525405884, "learning_rate": 1.4274145388016413e-07, "loss": 0.3572913408279419, "step": 17565, "token_acc": 0.8732413985234713 }, { "epoch": 0.9478228025683915, "grad_norm": 0.4225441515445709, "learning_rate": 1.4244738767535404e-07, "loss": 0.31457996368408203, "step": 17566, "token_acc": 0.8887349953831948 }, { "epoch": 0.947876760373388, "grad_norm": 0.49181121587753296, "learning_rate": 1.42153622517619e-07, "loss": 0.30352717638015747, "step": 17567, "token_acc": 0.8927432216905901 }, { "epoch": 0.9479307181783845, "grad_norm": 0.4232393503189087, "learning_rate": 1.4186015841592848e-07, "loss": 0.34418636560440063, "step": 17568, "token_acc": 0.8776454099509461 }, { "epoch": 0.947984675983381, "grad_norm": 0.5200511813163757, "learning_rate": 1.4156699537924756e-07, "loss": 0.34409698843955994, "step": 17569, "token_acc": 0.8753457814661134 }, { "epoch": 0.9480386337883775, "grad_norm": 0.3386344909667969, "learning_rate": 1.4127413341652684e-07, "loss": 0.342378705739975, "step": 17570, "token_acc": 0.8809585158464314 }, { "epoch": 0.948092591593374, "grad_norm": 0.39126893877983093, "learning_rate": 1.409815725367114e-07, "loss": 0.2960873246192932, "step": 17571, "token_acc": 0.8910743801652893 }, { "epoch": 0.9481465493983705, "grad_norm": 0.40513578057289124, "learning_rate": 1.4068931274873633e-07, "loss": 0.3726032078266144, "step": 17572, "token_acc": 0.8710128707330722 }, { "epoch": 0.948200507203367, "grad_norm": 0.4587228000164032, "learning_rate": 1.4039735406152776e-07, "loss": 0.29044944047927856, "step": 17573, "token_acc": 0.8949798038084247 }, { "epoch": 0.9482544650083634, "grad_norm": 0.41075244545936584, "learning_rate": 1.4010569648400085e-07, "loss": 0.3354082703590393, "step": 17574, "token_acc": 0.8797133406835722 }, { "epoch": 0.9483084228133599, "grad_norm": 0.4234570264816284, "learning_rate": 1.3981434002506288e-07, "loss": 0.3013072907924652, "step": 17575, "token_acc": 0.8872778297474275 }, { "epoch": 0.9483623806183564, "grad_norm": 0.30398696660995483, "learning_rate": 1.3952328469361122e-07, "loss": 0.2764122784137726, "step": 17576, "token_acc": 0.8970955447496386 }, { "epoch": 0.9484163384233529, "grad_norm": 0.49453264474868774, "learning_rate": 1.3923253049853646e-07, "loss": 0.34240245819091797, "step": 17577, "token_acc": 0.8778625954198473 }, { "epoch": 0.9484702962283494, "grad_norm": 0.3758198916912079, "learning_rate": 1.38942077448716e-07, "loss": 0.2899978756904602, "step": 17578, "token_acc": 0.8944763190460119 }, { "epoch": 0.948524254033346, "grad_norm": 0.40451300144195557, "learning_rate": 1.3865192555302165e-07, "loss": 0.30287274718284607, "step": 17579, "token_acc": 0.8946515397082658 }, { "epoch": 0.9485782118383425, "grad_norm": 0.4374086558818817, "learning_rate": 1.3836207482031515e-07, "loss": 0.3342320919036865, "step": 17580, "token_acc": 0.8803701255783212 }, { "epoch": 0.948632169643339, "grad_norm": 0.4092409014701843, "learning_rate": 1.3807252525944613e-07, "loss": 0.35182422399520874, "step": 17581, "token_acc": 0.8827549947423764 }, { "epoch": 0.9486861274483354, "grad_norm": 0.35999736189842224, "learning_rate": 1.3778327687925975e-07, "loss": 0.349579393863678, "step": 17582, "token_acc": 0.8702116260380391 }, { "epoch": 0.9487400852533319, "grad_norm": 0.32101598381996155, "learning_rate": 1.374943296885878e-07, "loss": 0.32180094718933105, "step": 17583, "token_acc": 0.8843102915421861 }, { "epoch": 0.9487940430583284, "grad_norm": 0.4215802252292633, "learning_rate": 1.372056836962543e-07, "loss": 0.33100625872612, "step": 17584, "token_acc": 0.8815179198875615 }, { "epoch": 0.9488480008633249, "grad_norm": 0.39532312750816345, "learning_rate": 1.3691733891107673e-07, "loss": 0.2616537809371948, "step": 17585, "token_acc": 0.9053412462908011 }, { "epoch": 0.9489019586683214, "grad_norm": 0.4545242190361023, "learning_rate": 1.366292953418602e-07, "loss": 0.33512359857559204, "step": 17586, "token_acc": 0.8760540757596038 }, { "epoch": 0.9489559164733179, "grad_norm": 0.6521955728530884, "learning_rate": 1.3634155299740105e-07, "loss": 0.38787612318992615, "step": 17587, "token_acc": 0.8581130690161527 }, { "epoch": 0.9490098742783144, "grad_norm": 0.30731332302093506, "learning_rate": 1.3605411188648664e-07, "loss": 0.35119864344596863, "step": 17588, "token_acc": 0.8772417277090174 }, { "epoch": 0.9490638320833108, "grad_norm": 0.3730665147304535, "learning_rate": 1.3576697201789667e-07, "loss": 0.36333104968070984, "step": 17589, "token_acc": 0.8678867600800686 }, { "epoch": 0.9491177898883073, "grad_norm": 0.42527198791503906, "learning_rate": 1.3548013340039745e-07, "loss": 0.37929099798202515, "step": 17590, "token_acc": 0.8724524845431646 }, { "epoch": 0.9491717476933038, "grad_norm": 0.4358029067516327, "learning_rate": 1.3519359604275195e-07, "loss": 0.3529208302497864, "step": 17591, "token_acc": 0.8774495025625565 }, { "epoch": 0.9492257054983003, "grad_norm": 0.64082270860672, "learning_rate": 1.3490735995370874e-07, "loss": 0.3453819155693054, "step": 17592, "token_acc": 0.8811203319502074 }, { "epoch": 0.9492796633032968, "grad_norm": 0.40151309967041016, "learning_rate": 1.3462142514201193e-07, "loss": 0.31458261609077454, "step": 17593, "token_acc": 0.8927962209683769 }, { "epoch": 0.9493336211082933, "grad_norm": 0.5550570487976074, "learning_rate": 1.343357916163923e-07, "loss": 0.3691215515136719, "step": 17594, "token_acc": 0.8672402159244265 }, { "epoch": 0.9493875789132898, "grad_norm": 0.3928762972354889, "learning_rate": 1.3405045938557182e-07, "loss": 0.34781956672668457, "step": 17595, "token_acc": 0.8766772383508172 }, { "epoch": 0.9494415367182863, "grad_norm": 0.45291709899902344, "learning_rate": 1.337654284582679e-07, "loss": 0.3553704023361206, "step": 17596, "token_acc": 0.8741056477393819 }, { "epoch": 0.9494954945232827, "grad_norm": 0.49458324909210205, "learning_rate": 1.3348069884318137e-07, "loss": 0.32160240411758423, "step": 17597, "token_acc": 0.8789726533628973 }, { "epoch": 0.9495494523282793, "grad_norm": 0.40028247237205505, "learning_rate": 1.3319627054900975e-07, "loss": 0.38607466220855713, "step": 17598, "token_acc": 0.8692111340707427 }, { "epoch": 0.9496034101332758, "grad_norm": 0.48014935851097107, "learning_rate": 1.3291214358443937e-07, "loss": 0.3387834429740906, "step": 17599, "token_acc": 0.8771313941825476 }, { "epoch": 0.9496573679382723, "grad_norm": 0.47253182530403137, "learning_rate": 1.326283179581478e-07, "loss": 0.3626284897327423, "step": 17600, "token_acc": 0.8747567417292188 }, { "epoch": 0.9497113257432688, "grad_norm": 0.38105425238609314, "learning_rate": 1.323447936788025e-07, "loss": 0.32844266295433044, "step": 17601, "token_acc": 0.8837496145544249 }, { "epoch": 0.9497652835482653, "grad_norm": 0.5720860958099365, "learning_rate": 1.320615707550621e-07, "loss": 0.36954599618911743, "step": 17602, "token_acc": 0.867758749069248 }, { "epoch": 0.9498192413532618, "grad_norm": 0.42179298400878906, "learning_rate": 1.3177864919557525e-07, "loss": 0.3270515203475952, "step": 17603, "token_acc": 0.8818460812086875 }, { "epoch": 0.9498731991582583, "grad_norm": 0.37434864044189453, "learning_rate": 1.3149602900898506e-07, "loss": 0.3609253168106079, "step": 17604, "token_acc": 0.869325551232166 }, { "epoch": 0.9499271569632547, "grad_norm": 0.40663251280784607, "learning_rate": 1.3121371020392014e-07, "loss": 0.33404481410980225, "step": 17605, "token_acc": 0.885785536159601 }, { "epoch": 0.9499811147682512, "grad_norm": 0.29598575830459595, "learning_rate": 1.3093169278900364e-07, "loss": 0.27819210290908813, "step": 17606, "token_acc": 0.8984464602231258 }, { "epoch": 0.9500350725732477, "grad_norm": 0.3665207326412201, "learning_rate": 1.306499767728475e-07, "loss": 0.2982889711856842, "step": 17607, "token_acc": 0.8935557422296891 }, { "epoch": 0.9500890303782442, "grad_norm": 0.5158475041389465, "learning_rate": 1.3036856216405492e-07, "loss": 0.36681538820266724, "step": 17608, "token_acc": 0.8728139904610492 }, { "epoch": 0.9501429881832407, "grad_norm": 0.43899446725845337, "learning_rate": 1.3008744897122227e-07, "loss": 0.3368499279022217, "step": 17609, "token_acc": 0.8764671926111218 }, { "epoch": 0.9501969459882372, "grad_norm": 0.5370049476623535, "learning_rate": 1.2980663720293273e-07, "loss": 0.30790483951568604, "step": 17610, "token_acc": 0.89324374766704 }, { "epoch": 0.9502509037932337, "grad_norm": 0.39862698316574097, "learning_rate": 1.2952612686776278e-07, "loss": 0.3011999726295471, "step": 17611, "token_acc": 0.8919104518610081 }, { "epoch": 0.9503048615982301, "grad_norm": 0.36193910241127014, "learning_rate": 1.2924591797428e-07, "loss": 0.24184584617614746, "step": 17612, "token_acc": 0.9105123551584532 }, { "epoch": 0.9503588194032266, "grad_norm": 0.47272929549217224, "learning_rate": 1.2896601053103974e-07, "loss": 0.3592926263809204, "step": 17613, "token_acc": 0.8696395011105416 }, { "epoch": 0.9504127772082231, "grad_norm": 0.42098256945610046, "learning_rate": 1.2868640454659298e-07, "loss": 0.32021406292915344, "step": 17614, "token_acc": 0.8831210191082802 }, { "epoch": 0.9504667350132197, "grad_norm": 0.35044172406196594, "learning_rate": 1.2840710002947731e-07, "loss": 0.30935245752334595, "step": 17615, "token_acc": 0.8880061789694361 }, { "epoch": 0.9505206928182162, "grad_norm": 0.36091241240501404, "learning_rate": 1.2812809698822148e-07, "loss": 0.28365659713745117, "step": 17616, "token_acc": 0.8973303539927084 }, { "epoch": 0.9505746506232127, "grad_norm": 0.41896840929985046, "learning_rate": 1.2784939543134867e-07, "loss": 0.289084255695343, "step": 17617, "token_acc": 0.8915404209679582 }, { "epoch": 0.9506286084282092, "grad_norm": 0.4440893828868866, "learning_rate": 1.2757099536736873e-07, "loss": 0.30810195207595825, "step": 17618, "token_acc": 0.8859176606387072 }, { "epoch": 0.9506825662332057, "grad_norm": 0.39890292286872864, "learning_rate": 1.2729289680478484e-07, "loss": 0.3042573630809784, "step": 17619, "token_acc": 0.8906648906648906 }, { "epoch": 0.9507365240382021, "grad_norm": 0.4952653646469116, "learning_rate": 1.2701509975208915e-07, "loss": 0.3416769504547119, "step": 17620, "token_acc": 0.8760265936644506 }, { "epoch": 0.9507904818431986, "grad_norm": 0.37542787194252014, "learning_rate": 1.2673760421776592e-07, "loss": 0.32191699743270874, "step": 17621, "token_acc": 0.8836018671886764 }, { "epoch": 0.9508444396481951, "grad_norm": 0.3656744062900543, "learning_rate": 1.2646041021028955e-07, "loss": 0.3385555148124695, "step": 17622, "token_acc": 0.8817512666093108 }, { "epoch": 0.9508983974531916, "grad_norm": 0.5058419704437256, "learning_rate": 1.2618351773812543e-07, "loss": 0.3023490607738495, "step": 17623, "token_acc": 0.8874150223843476 }, { "epoch": 0.9509523552581881, "grad_norm": 0.5829048752784729, "learning_rate": 1.2590692680973127e-07, "loss": 0.40896278619766235, "step": 17624, "token_acc": 0.8584179747614651 }, { "epoch": 0.9510063130631846, "grad_norm": 0.318605899810791, "learning_rate": 1.2563063743355252e-07, "loss": 0.31855911016464233, "step": 17625, "token_acc": 0.8841206875826422 }, { "epoch": 0.9510602708681811, "grad_norm": 0.3941056728363037, "learning_rate": 1.253546496180269e-07, "loss": 0.31332266330718994, "step": 17626, "token_acc": 0.8902495025256391 }, { "epoch": 0.9511142286731775, "grad_norm": 0.4465503692626953, "learning_rate": 1.250789633715843e-07, "loss": 0.3309875726699829, "step": 17627, "token_acc": 0.8814691151919867 }, { "epoch": 0.951168186478174, "grad_norm": 0.42021793127059937, "learning_rate": 1.248035787026436e-07, "loss": 0.32723483443260193, "step": 17628, "token_acc": 0.8816050854191498 }, { "epoch": 0.9512221442831705, "grad_norm": 0.3834531009197235, "learning_rate": 1.2452849561961356e-07, "loss": 0.3381217122077942, "step": 17629, "token_acc": 0.8819730695245946 }, { "epoch": 0.951276102088167, "grad_norm": 0.5007591843605042, "learning_rate": 1.2425371413089639e-07, "loss": 0.3284394145011902, "step": 17630, "token_acc": 0.8841660802251935 }, { "epoch": 0.9513300598931635, "grad_norm": 0.4070816934108734, "learning_rate": 1.2397923424488422e-07, "loss": 0.3550485074520111, "step": 17631, "token_acc": 0.8752186588921282 }, { "epoch": 0.9513840176981601, "grad_norm": 0.4260728657245636, "learning_rate": 1.2370505596995818e-07, "loss": 0.3052136301994324, "step": 17632, "token_acc": 0.891384778012685 }, { "epoch": 0.9514379755031566, "grad_norm": 0.41388821601867676, "learning_rate": 1.2343117931449378e-07, "loss": 0.3392581343650818, "step": 17633, "token_acc": 0.8820577164366374 }, { "epoch": 0.9514919333081531, "grad_norm": 0.46637463569641113, "learning_rate": 1.231576042868532e-07, "loss": 0.33303302526474, "step": 17634, "token_acc": 0.8879322316344952 }, { "epoch": 0.9515458911131495, "grad_norm": 0.4350394904613495, "learning_rate": 1.2288433089539086e-07, "loss": 0.2716703414916992, "step": 17635, "token_acc": 0.9020979020979021 }, { "epoch": 0.951599848918146, "grad_norm": 0.44701722264289856, "learning_rate": 1.2261135914845456e-07, "loss": 0.39838707447052, "step": 17636, "token_acc": 0.8618064705021227 }, { "epoch": 0.9516538067231425, "grad_norm": 0.3906184136867523, "learning_rate": 1.2233868905437984e-07, "loss": 0.30904388427734375, "step": 17637, "token_acc": 0.8875957990349135 }, { "epoch": 0.951707764528139, "grad_norm": 0.3535537123680115, "learning_rate": 1.220663206214945e-07, "loss": 0.3493175506591797, "step": 17638, "token_acc": 0.8760262725779967 }, { "epoch": 0.9517617223331355, "grad_norm": 0.41559311747550964, "learning_rate": 1.2179425385811627e-07, "loss": 0.3127662241458893, "step": 17639, "token_acc": 0.8828182217665153 }, { "epoch": 0.951815680138132, "grad_norm": 0.4674305319786072, "learning_rate": 1.2152248877255301e-07, "loss": 0.31968677043914795, "step": 17640, "token_acc": 0.8873887240356083 }, { "epoch": 0.9518696379431285, "grad_norm": 0.45892390608787537, "learning_rate": 1.2125102537310474e-07, "loss": 0.35539576411247253, "step": 17641, "token_acc": 0.8741532440162577 }, { "epoch": 0.951923595748125, "grad_norm": 0.3879711925983429, "learning_rate": 1.2097986366806258e-07, "loss": 0.30262506008148193, "step": 17642, "token_acc": 0.8909090909090909 }, { "epoch": 0.9519775535531214, "grad_norm": 0.4611630141735077, "learning_rate": 1.2070900366570771e-07, "loss": 0.3220396935939789, "step": 17643, "token_acc": 0.8826302729528536 }, { "epoch": 0.9520315113581179, "grad_norm": 0.4750669002532959, "learning_rate": 1.2043844537431237e-07, "loss": 0.3231278657913208, "step": 17644, "token_acc": 0.8864781941705019 }, { "epoch": 0.9520854691631144, "grad_norm": 0.44457677006721497, "learning_rate": 1.2016818880213887e-07, "loss": 0.38218623399734497, "step": 17645, "token_acc": 0.8654434250764526 }, { "epoch": 0.9521394269681109, "grad_norm": 0.4741212725639343, "learning_rate": 1.198982339574395e-07, "loss": 0.31884655356407166, "step": 17646, "token_acc": 0.8809971777986829 }, { "epoch": 0.9521933847731074, "grad_norm": 0.32824060320854187, "learning_rate": 1.1962858084846097e-07, "loss": 0.2863481044769287, "step": 17647, "token_acc": 0.8935774123487434 }, { "epoch": 0.952247342578104, "grad_norm": 0.4300515353679657, "learning_rate": 1.193592294834356e-07, "loss": 0.3200567960739136, "step": 17648, "token_acc": 0.8876270962630397 }, { "epoch": 0.9523013003831005, "grad_norm": 0.38144010305404663, "learning_rate": 1.1909017987059235e-07, "loss": 0.3002755343914032, "step": 17649, "token_acc": 0.8935579093592637 }, { "epoch": 0.9523552581880969, "grad_norm": 0.41756388545036316, "learning_rate": 1.1882143201814578e-07, "loss": 0.3467661738395691, "step": 17650, "token_acc": 0.8789158746920098 }, { "epoch": 0.9524092159930934, "grad_norm": 0.49390003085136414, "learning_rate": 1.1855298593430487e-07, "loss": 0.33680471777915955, "step": 17651, "token_acc": 0.8782401569286815 }, { "epoch": 0.9524631737980899, "grad_norm": 0.39151233434677124, "learning_rate": 1.182848416272675e-07, "loss": 0.3311377167701721, "step": 17652, "token_acc": 0.8814932715960063 }, { "epoch": 0.9525171316030864, "grad_norm": 0.5090852379798889, "learning_rate": 1.1801699910522157e-07, "loss": 0.3470730781555176, "step": 17653, "token_acc": 0.8746177370030581 }, { "epoch": 0.9525710894080829, "grad_norm": 0.3336445987224579, "learning_rate": 1.1774945837634833e-07, "loss": 0.3221144676208496, "step": 17654, "token_acc": 0.8845763939174511 }, { "epoch": 0.9526250472130794, "grad_norm": 0.3872830271720886, "learning_rate": 1.1748221944881677e-07, "loss": 0.3347589075565338, "step": 17655, "token_acc": 0.8788241553604682 }, { "epoch": 0.9526790050180759, "grad_norm": 0.48405569791793823, "learning_rate": 1.1721528233079039e-07, "loss": 0.36682841181755066, "step": 17656, "token_acc": 0.8737258481667427 }, { "epoch": 0.9527329628230724, "grad_norm": 0.4317091703414917, "learning_rate": 1.1694864703042042e-07, "loss": 0.3537316918373108, "step": 17657, "token_acc": 0.8789232105197866 }, { "epoch": 0.9527869206280688, "grad_norm": 0.453708678483963, "learning_rate": 1.1668231355584925e-07, "loss": 0.33235597610473633, "step": 17658, "token_acc": 0.8833311679875276 }, { "epoch": 0.9528408784330653, "grad_norm": 0.4240306317806244, "learning_rate": 1.1641628191521148e-07, "loss": 0.31800562143325806, "step": 17659, "token_acc": 0.8846503178928247 }, { "epoch": 0.9528948362380618, "grad_norm": 0.4045332670211792, "learning_rate": 1.1615055211663173e-07, "loss": 0.3499469757080078, "step": 17660, "token_acc": 0.8757729061270376 }, { "epoch": 0.9529487940430583, "grad_norm": 0.4457959234714508, "learning_rate": 1.1588512416822462e-07, "loss": 0.30354321002960205, "step": 17661, "token_acc": 0.8927735517846694 }, { "epoch": 0.9530027518480548, "grad_norm": 0.46981310844421387, "learning_rate": 1.1561999807809587e-07, "loss": 0.3446654677391052, "step": 17662, "token_acc": 0.8737240698057294 }, { "epoch": 0.9530567096530513, "grad_norm": 0.44024160504341125, "learning_rate": 1.1535517385434347e-07, "loss": 0.29546868801116943, "step": 17663, "token_acc": 0.8919103813921669 }, { "epoch": 0.9531106674580478, "grad_norm": 0.41985493898391724, "learning_rate": 1.1509065150505538e-07, "loss": 0.3232540488243103, "step": 17664, "token_acc": 0.8887889148821306 }, { "epoch": 0.9531646252630444, "grad_norm": 0.4134085774421692, "learning_rate": 1.1482643103830848e-07, "loss": 0.30158013105392456, "step": 17665, "token_acc": 0.8875261324041812 }, { "epoch": 0.9532185830680407, "grad_norm": 0.36681410670280457, "learning_rate": 1.1456251246217299e-07, "loss": 0.34426212310791016, "step": 17666, "token_acc": 0.8775376555337263 }, { "epoch": 0.9532725408730373, "grad_norm": 0.28517693281173706, "learning_rate": 1.1429889578470798e-07, "loss": 0.27229395508766174, "step": 17667, "token_acc": 0.9022612133942913 }, { "epoch": 0.9533264986780338, "grad_norm": 0.39532670378685, "learning_rate": 1.1403558101396595e-07, "loss": 0.3091953694820404, "step": 17668, "token_acc": 0.8889125572478432 }, { "epoch": 0.9533804564830303, "grad_norm": 0.4166109263896942, "learning_rate": 1.137725681579871e-07, "loss": 0.31778591871261597, "step": 17669, "token_acc": 0.8844545345919465 }, { "epoch": 0.9534344142880268, "grad_norm": 0.46065059304237366, "learning_rate": 1.135098572248039e-07, "loss": 0.34770166873931885, "step": 17670, "token_acc": 0.8798443812658986 }, { "epoch": 0.9534883720930233, "grad_norm": 0.39459502696990967, "learning_rate": 1.1324744822243994e-07, "loss": 0.3431219458580017, "step": 17671, "token_acc": 0.8757741992567687 }, { "epoch": 0.9535423298980198, "grad_norm": 0.5933991074562073, "learning_rate": 1.1298534115890991e-07, "loss": 0.3257310390472412, "step": 17672, "token_acc": 0.8891494350884673 }, { "epoch": 0.9535962877030162, "grad_norm": 0.2968597412109375, "learning_rate": 1.127235360422163e-07, "loss": 0.34556475281715393, "step": 17673, "token_acc": 0.8760657101268455 }, { "epoch": 0.9536502455080127, "grad_norm": 0.31150126457214355, "learning_rate": 1.1246203288035606e-07, "loss": 0.3169444501399994, "step": 17674, "token_acc": 0.8838162544169611 }, { "epoch": 0.9537042033130092, "grad_norm": 0.302206426858902, "learning_rate": 1.1220083168131501e-07, "loss": 0.2861068844795227, "step": 17675, "token_acc": 0.8989039329464862 }, { "epoch": 0.9537581611180057, "grad_norm": 0.3998616635799408, "learning_rate": 1.1193993245307123e-07, "loss": 0.36203935742378235, "step": 17676, "token_acc": 0.8693722257450857 }, { "epoch": 0.9538121189230022, "grad_norm": 0.43027806282043457, "learning_rate": 1.1167933520359054e-07, "loss": 0.28613099455833435, "step": 17677, "token_acc": 0.8929628200180111 }, { "epoch": 0.9538660767279987, "grad_norm": 0.4751943349838257, "learning_rate": 1.1141903994083325e-07, "loss": 0.3107393980026245, "step": 17678, "token_acc": 0.8912552436269764 }, { "epoch": 0.9539200345329952, "grad_norm": 0.41144073009490967, "learning_rate": 1.1115904667274858e-07, "loss": 0.3355087637901306, "step": 17679, "token_acc": 0.8794078061911171 }, { "epoch": 0.9539739923379917, "grad_norm": 0.4648820161819458, "learning_rate": 1.1089935540727459e-07, "loss": 0.3816806375980377, "step": 17680, "token_acc": 0.8657095857634216 }, { "epoch": 0.9540279501429881, "grad_norm": 0.4174530804157257, "learning_rate": 1.1063996615234385e-07, "loss": 0.3088706433773041, "step": 17681, "token_acc": 0.885336414997432 }, { "epoch": 0.9540819079479846, "grad_norm": 0.420056015253067, "learning_rate": 1.103808789158789e-07, "loss": 0.3250245451927185, "step": 17682, "token_acc": 0.8820011668611435 }, { "epoch": 0.9541358657529811, "grad_norm": 0.3601936101913452, "learning_rate": 1.1012209370579119e-07, "loss": 0.3434889614582062, "step": 17683, "token_acc": 0.8779925323962223 }, { "epoch": 0.9541898235579777, "grad_norm": 0.44364047050476074, "learning_rate": 1.0986361052998329e-07, "loss": 0.33160629868507385, "step": 17684, "token_acc": 0.8826920081009503 }, { "epoch": 0.9542437813629742, "grad_norm": 0.5252246856689453, "learning_rate": 1.0960542939635e-07, "loss": 0.41065502166748047, "step": 17685, "token_acc": 0.8553507424475166 }, { "epoch": 0.9542977391679707, "grad_norm": 0.33098864555358887, "learning_rate": 1.093475503127761e-07, "loss": 0.299984335899353, "step": 17686, "token_acc": 0.8925878668367939 }, { "epoch": 0.9543516969729672, "grad_norm": 0.40254899859428406, "learning_rate": 1.0908997328713644e-07, "loss": 0.34682685136795044, "step": 17687, "token_acc": 0.8799824791940429 }, { "epoch": 0.9544056547779637, "grad_norm": 0.47164931893348694, "learning_rate": 1.0883269832729804e-07, "loss": 0.3395649790763855, "step": 17688, "token_acc": 0.8753964657906661 }, { "epoch": 0.9544596125829601, "grad_norm": 0.3632980287075043, "learning_rate": 1.0857572544111905e-07, "loss": 0.30127444863319397, "step": 17689, "token_acc": 0.891941391941392 }, { "epoch": 0.9545135703879566, "grad_norm": 0.41082844138145447, "learning_rate": 1.0831905463644543e-07, "loss": 0.29252588748931885, "step": 17690, "token_acc": 0.8947202521670606 }, { "epoch": 0.9545675281929531, "grad_norm": 0.3915325999259949, "learning_rate": 1.0806268592111646e-07, "loss": 0.3585948348045349, "step": 17691, "token_acc": 0.8690818728747057 }, { "epoch": 0.9546214859979496, "grad_norm": 0.4249671697616577, "learning_rate": 1.0780661930296144e-07, "loss": 0.280511736869812, "step": 17692, "token_acc": 0.897501784439686 }, { "epoch": 0.9546754438029461, "grad_norm": 0.42758893966674805, "learning_rate": 1.0755085478980188e-07, "loss": 0.2921139895915985, "step": 17693, "token_acc": 0.8949605343047966 }, { "epoch": 0.9547294016079426, "grad_norm": 0.4736936092376709, "learning_rate": 1.0729539238944709e-07, "loss": 0.34154677391052246, "step": 17694, "token_acc": 0.8806702775897088 }, { "epoch": 0.9547833594129391, "grad_norm": 0.3307316303253174, "learning_rate": 1.0704023210969972e-07, "loss": 0.29375529289245605, "step": 17695, "token_acc": 0.894211324570273 }, { "epoch": 0.9548373172179355, "grad_norm": 0.5557785630226135, "learning_rate": 1.0678537395835132e-07, "loss": 0.3100879192352295, "step": 17696, "token_acc": 0.8883059739380605 }, { "epoch": 0.954891275022932, "grad_norm": 0.41991496086120605, "learning_rate": 1.0653081794318676e-07, "loss": 0.2881837785243988, "step": 17697, "token_acc": 0.8943187937533656 }, { "epoch": 0.9549452328279285, "grad_norm": 0.41639283299446106, "learning_rate": 1.0627656407197873e-07, "loss": 0.3610522747039795, "step": 17698, "token_acc": 0.871608832807571 }, { "epoch": 0.954999190632925, "grad_norm": 0.39584723114967346, "learning_rate": 1.0602261235249323e-07, "loss": 0.30171090364456177, "step": 17699, "token_acc": 0.8922747928361401 }, { "epoch": 0.9550531484379216, "grad_norm": 0.48025867342948914, "learning_rate": 1.0576896279248406e-07, "loss": 0.36320051550865173, "step": 17700, "token_acc": 0.8682363264112314 }, { "epoch": 0.9551071062429181, "grad_norm": 0.46506205201148987, "learning_rate": 1.0551561539970056e-07, "loss": 0.32900846004486084, "step": 17701, "token_acc": 0.885047965597089 }, { "epoch": 0.9551610640479146, "grad_norm": 0.4494846761226654, "learning_rate": 1.0526257018187658e-07, "loss": 0.32267314195632935, "step": 17702, "token_acc": 0.8864121729845168 }, { "epoch": 0.9552150218529111, "grad_norm": 0.3364485204219818, "learning_rate": 1.0500982714674257e-07, "loss": 0.33239150047302246, "step": 17703, "token_acc": 0.8788995656180071 }, { "epoch": 0.9552689796579075, "grad_norm": 0.475841224193573, "learning_rate": 1.0475738630201681e-07, "loss": 0.32910168170928955, "step": 17704, "token_acc": 0.8858744800144691 }, { "epoch": 0.955322937462904, "grad_norm": 0.3991243541240692, "learning_rate": 1.0450524765540759e-07, "loss": 0.31113022565841675, "step": 17705, "token_acc": 0.8896698882891929 }, { "epoch": 0.9553768952679005, "grad_norm": 0.4689886271953583, "learning_rate": 1.0425341121461541e-07, "loss": 0.35523736476898193, "step": 17706, "token_acc": 0.8775510204081632 }, { "epoch": 0.955430853072897, "grad_norm": 0.524369478225708, "learning_rate": 1.0400187698733188e-07, "loss": 0.2905215322971344, "step": 17707, "token_acc": 0.8905304378782485 }, { "epoch": 0.9554848108778935, "grad_norm": 0.3814759850502014, "learning_rate": 1.0375064498123865e-07, "loss": 0.28648465871810913, "step": 17708, "token_acc": 0.8945614789337919 }, { "epoch": 0.95553876868289, "grad_norm": 0.3930143415927887, "learning_rate": 1.0349971520400847e-07, "loss": 0.2932497262954712, "step": 17709, "token_acc": 0.8966464296039318 }, { "epoch": 0.9555927264878865, "grad_norm": 0.4656735360622406, "learning_rate": 1.0324908766330521e-07, "loss": 0.35352516174316406, "step": 17710, "token_acc": 0.8773561811505508 }, { "epoch": 0.955646684292883, "grad_norm": 0.3787997364997864, "learning_rate": 1.0299876236678163e-07, "loss": 0.32855841517448425, "step": 17711, "token_acc": 0.8843738893496031 }, { "epoch": 0.9557006420978794, "grad_norm": 0.4547041654586792, "learning_rate": 1.0274873932208274e-07, "loss": 0.31865572929382324, "step": 17712, "token_acc": 0.888234449073409 }, { "epoch": 0.9557545999028759, "grad_norm": 0.44371259212493896, "learning_rate": 1.0249901853684463e-07, "loss": 0.29062384366989136, "step": 17713, "token_acc": 0.8910458626546954 }, { "epoch": 0.9558085577078724, "grad_norm": 0.4680747389793396, "learning_rate": 1.0224960001869454e-07, "loss": 0.3950788676738739, "step": 17714, "token_acc": 0.868025078369906 }, { "epoch": 0.9558625155128689, "grad_norm": 0.39770281314849854, "learning_rate": 1.0200048377524863e-07, "loss": 0.2847522497177124, "step": 17715, "token_acc": 0.8944069431051109 }, { "epoch": 0.9559164733178654, "grad_norm": 0.31170886754989624, "learning_rate": 1.0175166981411411e-07, "loss": 0.3083041310310364, "step": 17716, "token_acc": 0.8905447714464622 }, { "epoch": 0.955970431122862, "grad_norm": 0.4631551206111908, "learning_rate": 1.0150315814289158e-07, "loss": 0.33270591497421265, "step": 17717, "token_acc": 0.8784907269238968 }, { "epoch": 0.9560243889278585, "grad_norm": 0.33626991510391235, "learning_rate": 1.0125494876916942e-07, "loss": 0.32480305433273315, "step": 17718, "token_acc": 0.8822217717413339 }, { "epoch": 0.9560783467328549, "grad_norm": 0.4010402262210846, "learning_rate": 1.0100704170052711e-07, "loss": 0.30758559703826904, "step": 17719, "token_acc": 0.8845835403212452 }, { "epoch": 0.9561323045378514, "grad_norm": 0.4089992046356201, "learning_rate": 1.0075943694453749e-07, "loss": 0.3013113737106323, "step": 17720, "token_acc": 0.8963414634146342 }, { "epoch": 0.9561862623428479, "grad_norm": 0.4772307872772217, "learning_rate": 1.0051213450876118e-07, "loss": 0.34638711810112, "step": 17721, "token_acc": 0.878570176975644 }, { "epoch": 0.9562402201478444, "grad_norm": 0.3494146764278412, "learning_rate": 1.0026513440074992e-07, "loss": 0.33864137530326843, "step": 17722, "token_acc": 0.8823432614777028 }, { "epoch": 0.9562941779528409, "grad_norm": 0.3198411464691162, "learning_rate": 1.0001843662804989e-07, "loss": 0.33503127098083496, "step": 17723, "token_acc": 0.8785972426716637 }, { "epoch": 0.9563481357578374, "grad_norm": 0.4139443635940552, "learning_rate": 9.977204119819284e-08, "loss": 0.322437047958374, "step": 17724, "token_acc": 0.885758557543316 }, { "epoch": 0.9564020935628339, "grad_norm": 0.4688851833343506, "learning_rate": 9.952594811870276e-08, "loss": 0.3619183301925659, "step": 17725, "token_acc": 0.874051810505224 }, { "epoch": 0.9564560513678304, "grad_norm": 0.5452715754508972, "learning_rate": 9.928015739709806e-08, "loss": 0.3503551781177521, "step": 17726, "token_acc": 0.8722537207654146 }, { "epoch": 0.9565100091728268, "grad_norm": 0.42573028802871704, "learning_rate": 9.903466904088388e-08, "loss": 0.3585430979728699, "step": 17727, "token_acc": 0.8743801652892562 }, { "epoch": 0.9565639669778233, "grad_norm": 0.3983246088027954, "learning_rate": 9.878948305755642e-08, "loss": 0.327605664730072, "step": 17728, "token_acc": 0.8843575418994414 }, { "epoch": 0.9566179247828198, "grad_norm": 0.37738528847694397, "learning_rate": 9.854459945460526e-08, "loss": 0.27518242597579956, "step": 17729, "token_acc": 0.900955122334162 }, { "epoch": 0.9566718825878163, "grad_norm": 0.44288450479507446, "learning_rate": 9.830001823950775e-08, "loss": 0.3167663812637329, "step": 17730, "token_acc": 0.8875910402638449 }, { "epoch": 0.9567258403928128, "grad_norm": 0.519497811794281, "learning_rate": 9.805573941973346e-08, "loss": 0.3131539523601532, "step": 17731, "token_acc": 0.8890925756186985 }, { "epoch": 0.9567797981978093, "grad_norm": 0.44110000133514404, "learning_rate": 9.781176300274198e-08, "loss": 0.35477399826049805, "step": 17732, "token_acc": 0.8714227587142276 }, { "epoch": 0.9568337560028058, "grad_norm": 0.35795778036117554, "learning_rate": 9.756808899598735e-08, "loss": 0.3486572802066803, "step": 17733, "token_acc": 0.8777523591649986 }, { "epoch": 0.9568877138078024, "grad_norm": 0.31472986936569214, "learning_rate": 9.732471740690808e-08, "loss": 0.2727231979370117, "step": 17734, "token_acc": 0.8999899989999 }, { "epoch": 0.9569416716127987, "grad_norm": 0.48392319679260254, "learning_rate": 9.70816482429382e-08, "loss": 0.316779226064682, "step": 17735, "token_acc": 0.8838582677165354 }, { "epoch": 0.9569956294177953, "grad_norm": 0.36110618710517883, "learning_rate": 9.683888151149956e-08, "loss": 0.30401611328125, "step": 17736, "token_acc": 0.8901991873947082 }, { "epoch": 0.9570495872227918, "grad_norm": 0.42726314067840576, "learning_rate": 9.659641722000845e-08, "loss": 0.30508992075920105, "step": 17737, "token_acc": 0.8906342620035567 }, { "epoch": 0.9571035450277883, "grad_norm": 0.4284571409225464, "learning_rate": 9.635425537586673e-08, "loss": 0.30940014123916626, "step": 17738, "token_acc": 0.8860329776915616 }, { "epoch": 0.9571575028327848, "grad_norm": 0.3315035104751587, "learning_rate": 9.611239598647292e-08, "loss": 0.2941252887248993, "step": 17739, "token_acc": 0.8945392904710013 }, { "epoch": 0.9572114606377813, "grad_norm": 0.339308500289917, "learning_rate": 9.587083905921225e-08, "loss": 0.2785662114620209, "step": 17740, "token_acc": 0.9010738255033557 }, { "epoch": 0.9572654184427778, "grad_norm": 0.4436638355255127, "learning_rate": 9.56295846014621e-08, "loss": 0.28755706548690796, "step": 17741, "token_acc": 0.8975095785440613 }, { "epoch": 0.9573193762477742, "grad_norm": 0.46367570757865906, "learning_rate": 9.538863262058884e-08, "loss": 0.29248398542404175, "step": 17742, "token_acc": 0.8958864795918368 }, { "epoch": 0.9573733340527707, "grad_norm": 0.34125247597694397, "learning_rate": 9.514798312395213e-08, "loss": 0.333660364151001, "step": 17743, "token_acc": 0.8812492716466612 }, { "epoch": 0.9574272918577672, "grad_norm": 0.2983519434928894, "learning_rate": 9.490763611890164e-08, "loss": 0.34456032514572144, "step": 17744, "token_acc": 0.8809214412285883 }, { "epoch": 0.9574812496627637, "grad_norm": 0.4208865761756897, "learning_rate": 9.466759161277705e-08, "loss": 0.33219608664512634, "step": 17745, "token_acc": 0.8811270229363138 }, { "epoch": 0.9575352074677602, "grad_norm": 0.3807995617389679, "learning_rate": 9.442784961290919e-08, "loss": 0.3489794433116913, "step": 17746, "token_acc": 0.8799649532710281 }, { "epoch": 0.9575891652727567, "grad_norm": 0.394357293844223, "learning_rate": 9.418841012662105e-08, "loss": 0.3342081606388092, "step": 17747, "token_acc": 0.8884562240924507 }, { "epoch": 0.9576431230777532, "grad_norm": 0.3678574860095978, "learning_rate": 9.394927316122237e-08, "loss": 0.28460779786109924, "step": 17748, "token_acc": 0.8979958121447802 }, { "epoch": 0.9576970808827497, "grad_norm": 0.4273524582386017, "learning_rate": 9.371043872401842e-08, "loss": 0.30378007888793945, "step": 17749, "token_acc": 0.8913610557004756 }, { "epoch": 0.9577510386877461, "grad_norm": 0.377793550491333, "learning_rate": 9.347190682230223e-08, "loss": 0.2696133852005005, "step": 17750, "token_acc": 0.900577724036007 }, { "epoch": 0.9578049964927426, "grad_norm": 0.5038692951202393, "learning_rate": 9.32336774633591e-08, "loss": 0.29618266224861145, "step": 17751, "token_acc": 0.8911969785008715 }, { "epoch": 0.9578589542977392, "grad_norm": 0.5006957650184631, "learning_rate": 9.299575065446431e-08, "loss": 0.3290642499923706, "step": 17752, "token_acc": 0.8801526717557252 }, { "epoch": 0.9579129121027357, "grad_norm": 0.40125367045402527, "learning_rate": 9.275812640288428e-08, "loss": 0.3095788061618805, "step": 17753, "token_acc": 0.8861489387805177 }, { "epoch": 0.9579668699077322, "grad_norm": 0.4261813163757324, "learning_rate": 9.252080471587543e-08, "loss": 0.3446739912033081, "step": 17754, "token_acc": 0.878747433264887 }, { "epoch": 0.9580208277127287, "grad_norm": 0.39710530638694763, "learning_rate": 9.22837856006853e-08, "loss": 0.2571806311607361, "step": 17755, "token_acc": 0.9011231817344872 }, { "epoch": 0.9580747855177252, "grad_norm": 0.4320151209831238, "learning_rate": 9.204706906455363e-08, "loss": 0.3376951515674591, "step": 17756, "token_acc": 0.8768263943440692 }, { "epoch": 0.9581287433227216, "grad_norm": 0.32067635655403137, "learning_rate": 9.18106551147091e-08, "loss": 0.34412866830825806, "step": 17757, "token_acc": 0.8770538894634035 }, { "epoch": 0.9581827011277181, "grad_norm": 0.4450491666793823, "learning_rate": 9.157454375837038e-08, "loss": 0.2845284044742584, "step": 17758, "token_acc": 0.8937779305603301 }, { "epoch": 0.9582366589327146, "grad_norm": 0.4467537999153137, "learning_rate": 9.13387350027506e-08, "loss": 0.2700122892856598, "step": 17759, "token_acc": 0.8987362281270252 }, { "epoch": 0.9582906167377111, "grad_norm": 0.39347732067108154, "learning_rate": 9.110322885504952e-08, "loss": 0.32029974460601807, "step": 17760, "token_acc": 0.8840999637812387 }, { "epoch": 0.9583445745427076, "grad_norm": 0.5690864324569702, "learning_rate": 9.086802532246142e-08, "loss": 0.3923359811306, "step": 17761, "token_acc": 0.8660872127283441 }, { "epoch": 0.9583985323477041, "grad_norm": 0.36841708421707153, "learning_rate": 9.06331244121672e-08, "loss": 0.31782495975494385, "step": 17762, "token_acc": 0.8885842112858998 }, { "epoch": 0.9584524901527006, "grad_norm": 0.36325931549072266, "learning_rate": 9.039852613134114e-08, "loss": 0.30380895733833313, "step": 17763, "token_acc": 0.8882598732982881 }, { "epoch": 0.9585064479576971, "grad_norm": 0.44240084290504456, "learning_rate": 9.016423048714751e-08, "loss": 0.33901721239089966, "step": 17764, "token_acc": 0.8782879005260641 }, { "epoch": 0.9585604057626935, "grad_norm": 0.45940667390823364, "learning_rate": 8.993023748674278e-08, "loss": 0.30406785011291504, "step": 17765, "token_acc": 0.892536338396211 }, { "epoch": 0.95861436356769, "grad_norm": 0.3900504410266876, "learning_rate": 8.969654713727238e-08, "loss": 0.31993263959884644, "step": 17766, "token_acc": 0.8850281042411855 }, { "epoch": 0.9586683213726865, "grad_norm": 0.46219655871391296, "learning_rate": 8.946315944587391e-08, "loss": 0.2607539892196655, "step": 17767, "token_acc": 0.9014598540145985 }, { "epoch": 0.958722279177683, "grad_norm": 0.37951043248176575, "learning_rate": 8.923007441967391e-08, "loss": 0.35958001017570496, "step": 17768, "token_acc": 0.87093451066961 }, { "epoch": 0.9587762369826796, "grad_norm": 0.45754384994506836, "learning_rate": 8.899729206579111e-08, "loss": 0.3224300146102905, "step": 17769, "token_acc": 0.8843480678343064 }, { "epoch": 0.9588301947876761, "grad_norm": 0.408854603767395, "learning_rate": 8.876481239133538e-08, "loss": 0.29835787415504456, "step": 17770, "token_acc": 0.8869232946504387 }, { "epoch": 0.9588841525926726, "grad_norm": 0.39523035287857056, "learning_rate": 8.853263540340439e-08, "loss": 0.31910622119903564, "step": 17771, "token_acc": 0.8896693506938496 }, { "epoch": 0.9589381103976691, "grad_norm": 0.41905590891838074, "learning_rate": 8.830076110909136e-08, "loss": 0.38876205682754517, "step": 17772, "token_acc": 0.8636496777973052 }, { "epoch": 0.9589920682026655, "grad_norm": 0.44263243675231934, "learning_rate": 8.806918951547616e-08, "loss": 0.3777605891227722, "step": 17773, "token_acc": 0.8668665667166416 }, { "epoch": 0.959046026007662, "grad_norm": 0.42877569794654846, "learning_rate": 8.783792062963204e-08, "loss": 0.284063458442688, "step": 17774, "token_acc": 0.8946712371455282 }, { "epoch": 0.9590999838126585, "grad_norm": 0.29937270283699036, "learning_rate": 8.760695445862e-08, "loss": 0.30154651403427124, "step": 17775, "token_acc": 0.8911789964421544 }, { "epoch": 0.959153941617655, "grad_norm": 0.3308979570865631, "learning_rate": 8.737629100949663e-08, "loss": 0.31904610991477966, "step": 17776, "token_acc": 0.8849828047382499 }, { "epoch": 0.9592078994226515, "grad_norm": 0.4203523099422455, "learning_rate": 8.714593028930295e-08, "loss": 0.3372876048088074, "step": 17777, "token_acc": 0.8784866059099696 }, { "epoch": 0.959261857227648, "grad_norm": 0.4205529987812042, "learning_rate": 8.69158723050767e-08, "loss": 0.3657931089401245, "step": 17778, "token_acc": 0.8679221594475832 }, { "epoch": 0.9593158150326445, "grad_norm": 0.4925154745578766, "learning_rate": 8.668611706384222e-08, "loss": 0.27777886390686035, "step": 17779, "token_acc": 0.8980232772954 }, { "epoch": 0.9593697728376409, "grad_norm": 0.341658353805542, "learning_rate": 8.645666457261725e-08, "loss": 0.30221855640411377, "step": 17780, "token_acc": 0.889168448861492 }, { "epoch": 0.9594237306426374, "grad_norm": 0.4214651584625244, "learning_rate": 8.622751483840841e-08, "loss": 0.35099244117736816, "step": 17781, "token_acc": 0.8725766362883182 }, { "epoch": 0.9594776884476339, "grad_norm": 0.3765862286090851, "learning_rate": 8.599866786821454e-08, "loss": 0.30281829833984375, "step": 17782, "token_acc": 0.8940278401436911 }, { "epoch": 0.9595316462526304, "grad_norm": 0.42931824922561646, "learning_rate": 8.577012366902338e-08, "loss": 0.328122615814209, "step": 17783, "token_acc": 0.8812122953174375 }, { "epoch": 0.9595856040576269, "grad_norm": 0.4536280035972595, "learning_rate": 8.554188224781601e-08, "loss": 0.35771870613098145, "step": 17784, "token_acc": 0.8722749422928956 }, { "epoch": 0.9596395618626234, "grad_norm": 0.36231058835983276, "learning_rate": 8.531394361156242e-08, "loss": 0.3482203483581543, "step": 17785, "token_acc": 0.8770384866275277 }, { "epoch": 0.95969351966762, "grad_norm": 0.4014667272567749, "learning_rate": 8.50863077672237e-08, "loss": 0.2952934503555298, "step": 17786, "token_acc": 0.8959405374499714 }, { "epoch": 0.9597474774726165, "grad_norm": 0.30805686116218567, "learning_rate": 8.485897472175208e-08, "loss": 0.27158063650131226, "step": 17787, "token_acc": 0.8978590078328982 }, { "epoch": 0.9598014352776129, "grad_norm": 0.540637731552124, "learning_rate": 8.463194448208978e-08, "loss": 0.3061114549636841, "step": 17788, "token_acc": 0.8864746504968839 }, { "epoch": 0.9598553930826094, "grad_norm": 0.39820876717567444, "learning_rate": 8.440521705517013e-08, "loss": 0.3359065055847168, "step": 17789, "token_acc": 0.8803573740774311 }, { "epoch": 0.9599093508876059, "grad_norm": 0.4103876054286957, "learning_rate": 8.41787924479176e-08, "loss": 0.3187915086746216, "step": 17790, "token_acc": 0.8868467116779195 }, { "epoch": 0.9599633086926024, "grad_norm": 0.3754757046699524, "learning_rate": 8.395267066724777e-08, "loss": 0.2952768802642822, "step": 17791, "token_acc": 0.8907539900935608 }, { "epoch": 0.9600172664975989, "grad_norm": 0.46711963415145874, "learning_rate": 8.372685172006511e-08, "loss": 0.36832547187805176, "step": 17792, "token_acc": 0.8741353129745234 }, { "epoch": 0.9600712243025954, "grad_norm": 0.31664547324180603, "learning_rate": 8.350133561326746e-08, "loss": 0.2915189862251282, "step": 17793, "token_acc": 0.8989885721791672 }, { "epoch": 0.9601251821075919, "grad_norm": 0.4055129587650299, "learning_rate": 8.327612235374039e-08, "loss": 0.2561955451965332, "step": 17794, "token_acc": 0.9061057628906765 }, { "epoch": 0.9601791399125884, "grad_norm": 0.3843420445919037, "learning_rate": 8.305121194836285e-08, "loss": 0.30594491958618164, "step": 17795, "token_acc": 0.8950671520020083 }, { "epoch": 0.9602330977175848, "grad_norm": 0.40790465474128723, "learning_rate": 8.282660440400381e-08, "loss": 0.3288736045360565, "step": 17796, "token_acc": 0.8837002446075162 }, { "epoch": 0.9602870555225813, "grad_norm": 0.4140847325325012, "learning_rate": 8.260229972752221e-08, "loss": 0.3375493884086609, "step": 17797, "token_acc": 0.8810322204402974 }, { "epoch": 0.9603410133275778, "grad_norm": 0.3174380660057068, "learning_rate": 8.237829792576813e-08, "loss": 0.29056787490844727, "step": 17798, "token_acc": 0.8895481515289821 }, { "epoch": 0.9603949711325743, "grad_norm": 0.5595305562019348, "learning_rate": 8.215459900558275e-08, "loss": 0.35359320044517517, "step": 17799, "token_acc": 0.8743849493487699 }, { "epoch": 0.9604489289375708, "grad_norm": 0.5091368556022644, "learning_rate": 8.193120297379841e-08, "loss": 0.34844905138015747, "step": 17800, "token_acc": 0.8771929824561403 }, { "epoch": 0.9605028867425673, "grad_norm": 0.497178852558136, "learning_rate": 8.170810983723631e-08, "loss": 0.360365629196167, "step": 17801, "token_acc": 0.8774420580026696 }, { "epoch": 0.9605568445475638, "grad_norm": 0.3605669140815735, "learning_rate": 8.1485319602711e-08, "loss": 0.3148871660232544, "step": 17802, "token_acc": 0.8860541780134729 }, { "epoch": 0.9606108023525602, "grad_norm": 0.43682387471199036, "learning_rate": 8.126283227702369e-08, "loss": 0.3275083303451538, "step": 17803, "token_acc": 0.8782013454502537 }, { "epoch": 0.9606647601575568, "grad_norm": 0.3669579327106476, "learning_rate": 8.104064786697341e-08, "loss": 0.2641560137271881, "step": 17804, "token_acc": 0.9012461476617982 }, { "epoch": 0.9607187179625533, "grad_norm": 0.4197936952114105, "learning_rate": 8.081876637934249e-08, "loss": 0.3289705216884613, "step": 17805, "token_acc": 0.8838419350302349 }, { "epoch": 0.9607726757675498, "grad_norm": 0.45465484261512756, "learning_rate": 8.059718782090776e-08, "loss": 0.32822585105895996, "step": 17806, "token_acc": 0.8822643404041672 }, { "epoch": 0.9608266335725463, "grad_norm": 0.4389635920524597, "learning_rate": 8.03759121984371e-08, "loss": 0.3446160554885864, "step": 17807, "token_acc": 0.8837740777015998 }, { "epoch": 0.9608805913775428, "grad_norm": 0.3946649730205536, "learning_rate": 8.015493951868736e-08, "loss": 0.26228103041648865, "step": 17808, "token_acc": 0.9016808964781217 }, { "epoch": 0.9609345491825393, "grad_norm": 0.37162190675735474, "learning_rate": 7.993426978840646e-08, "loss": 0.3160412311553955, "step": 17809, "token_acc": 0.885741166631821 }, { "epoch": 0.9609885069875358, "grad_norm": 0.48068225383758545, "learning_rate": 7.971390301433456e-08, "loss": 0.3496968150138855, "step": 17810, "token_acc": 0.8776683087027914 }, { "epoch": 0.9610424647925322, "grad_norm": 0.37500855326652527, "learning_rate": 7.949383920320186e-08, "loss": 0.3183887004852295, "step": 17811, "token_acc": 0.8880266075388027 }, { "epoch": 0.9610964225975287, "grad_norm": 0.38102737069129944, "learning_rate": 7.92740783617274e-08, "loss": 0.2968144118785858, "step": 17812, "token_acc": 0.8939938080495357 }, { "epoch": 0.9611503804025252, "grad_norm": 0.42187753319740295, "learning_rate": 7.905462049662471e-08, "loss": 0.3002512454986572, "step": 17813, "token_acc": 0.8902190178207047 }, { "epoch": 0.9612043382075217, "grad_norm": 0.4982287883758545, "learning_rate": 7.88354656145951e-08, "loss": 0.30099040269851685, "step": 17814, "token_acc": 0.8886769550893012 }, { "epoch": 0.9612582960125182, "grad_norm": 0.4232161045074463, "learning_rate": 7.861661372232987e-08, "loss": 0.32134824991226196, "step": 17815, "token_acc": 0.8869689489411459 }, { "epoch": 0.9613122538175147, "grad_norm": 0.4601597487926483, "learning_rate": 7.83980648265148e-08, "loss": 0.2957419157028198, "step": 17816, "token_acc": 0.8918018539976825 }, { "epoch": 0.9613662116225112, "grad_norm": 0.49273836612701416, "learning_rate": 7.817981893382454e-08, "loss": 0.3036940097808838, "step": 17817, "token_acc": 0.8892660161221893 }, { "epoch": 0.9614201694275077, "grad_norm": 0.4523235559463501, "learning_rate": 7.796187605092265e-08, "loss": 0.33886414766311646, "step": 17818, "token_acc": 0.8700501952035694 }, { "epoch": 0.9614741272325041, "grad_norm": 0.44146808981895447, "learning_rate": 7.774423618446713e-08, "loss": 0.3145959973335266, "step": 17819, "token_acc": 0.8865873365653004 }, { "epoch": 0.9615280850375006, "grad_norm": 0.5476371049880981, "learning_rate": 7.752689934110158e-08, "loss": 0.3257203698158264, "step": 17820, "token_acc": 0.8845904473741085 }, { "epoch": 0.9615820428424972, "grad_norm": 0.418351411819458, "learning_rate": 7.730986552746622e-08, "loss": 0.25042492151260376, "step": 17821, "token_acc": 0.9071260151951794 }, { "epoch": 0.9616360006474937, "grad_norm": 0.39367350935935974, "learning_rate": 7.709313475018798e-08, "loss": 0.37090712785720825, "step": 17822, "token_acc": 0.8722478238607271 }, { "epoch": 0.9616899584524902, "grad_norm": 0.46142300963401794, "learning_rate": 7.687670701588601e-08, "loss": 0.32706770300865173, "step": 17823, "token_acc": 0.8822700035248502 }, { "epoch": 0.9617439162574867, "grad_norm": 0.35484734177589417, "learning_rate": 7.666058233116947e-08, "loss": 0.29829373955726624, "step": 17824, "token_acc": 0.8921638089839875 }, { "epoch": 0.9617978740624832, "grad_norm": 0.5024576783180237, "learning_rate": 7.644476070263973e-08, "loss": 0.39179298281669617, "step": 17825, "token_acc": 0.8633910820789549 }, { "epoch": 0.9618518318674796, "grad_norm": 0.4750313460826874, "learning_rate": 7.62292421368871e-08, "loss": 0.36410772800445557, "step": 17826, "token_acc": 0.8715447154471545 }, { "epoch": 0.9619057896724761, "grad_norm": 0.291290819644928, "learning_rate": 7.601402664049407e-08, "loss": 0.2797507345676422, "step": 17827, "token_acc": 0.8977366986609788 }, { "epoch": 0.9619597474774726, "grad_norm": 0.4740949273109436, "learning_rate": 7.579911422003316e-08, "loss": 0.2666623592376709, "step": 17828, "token_acc": 0.8958965029310694 }, { "epoch": 0.9620137052824691, "grad_norm": 0.5142905712127686, "learning_rate": 7.558450488206692e-08, "loss": 0.26704141497612, "step": 17829, "token_acc": 0.8987758945386064 }, { "epoch": 0.9620676630874656, "grad_norm": 0.553030788898468, "learning_rate": 7.53701986331501e-08, "loss": 0.37211155891418457, "step": 17830, "token_acc": 0.8671623794212219 }, { "epoch": 0.9621216208924621, "grad_norm": 0.4474453330039978, "learning_rate": 7.515619547982744e-08, "loss": 0.32005774974823, "step": 17831, "token_acc": 0.8883706467661692 }, { "epoch": 0.9621755786974586, "grad_norm": 0.3902297019958496, "learning_rate": 7.494249542863485e-08, "loss": 0.33433449268341064, "step": 17832, "token_acc": 0.8795698924731182 }, { "epoch": 0.9622295365024551, "grad_norm": 0.31358879804611206, "learning_rate": 7.472909848609932e-08, "loss": 0.2875734567642212, "step": 17833, "token_acc": 0.8932182210610019 }, { "epoch": 0.9622834943074515, "grad_norm": 0.34820884466171265, "learning_rate": 7.451600465873565e-08, "loss": 0.370850145816803, "step": 17834, "token_acc": 0.8671963677639046 }, { "epoch": 0.962337452112448, "grad_norm": 0.587022602558136, "learning_rate": 7.430321395305306e-08, "loss": 0.3829817771911621, "step": 17835, "token_acc": 0.8664515137544183 }, { "epoch": 0.9623914099174445, "grad_norm": 0.4078183174133301, "learning_rate": 7.409072637555081e-08, "loss": 0.2849874496459961, "step": 17836, "token_acc": 0.8930113859442481 }, { "epoch": 0.962445367722441, "grad_norm": 0.40782153606414795, "learning_rate": 7.387854193271704e-08, "loss": 0.3634738326072693, "step": 17837, "token_acc": 0.8719479110146501 }, { "epoch": 0.9624993255274376, "grad_norm": 0.46839454770088196, "learning_rate": 7.366666063103322e-08, "loss": 0.3301757574081421, "step": 17838, "token_acc": 0.8777433877321328 }, { "epoch": 0.9625532833324341, "grad_norm": 0.31863510608673096, "learning_rate": 7.345508247696864e-08, "loss": 0.28036603331565857, "step": 17839, "token_acc": 0.8966798810703667 }, { "epoch": 0.9626072411374306, "grad_norm": 0.5154357552528381, "learning_rate": 7.32438074769859e-08, "loss": 0.37190189957618713, "step": 17840, "token_acc": 0.8685567010309279 }, { "epoch": 0.9626611989424271, "grad_norm": 0.42144909501075745, "learning_rate": 7.303283563753539e-08, "loss": 0.29734647274017334, "step": 17841, "token_acc": 0.8942663011085946 }, { "epoch": 0.9627151567474235, "grad_norm": 0.5130241513252258, "learning_rate": 7.282216696506305e-08, "loss": 0.2913837432861328, "step": 17842, "token_acc": 0.8906333870101986 }, { "epoch": 0.96276911455242, "grad_norm": 0.5005762577056885, "learning_rate": 7.261180146600044e-08, "loss": 0.3156368136405945, "step": 17843, "token_acc": 0.8861513687600644 }, { "epoch": 0.9628230723574165, "grad_norm": 0.4128417372703552, "learning_rate": 7.240173914677351e-08, "loss": 0.37866467237472534, "step": 17844, "token_acc": 0.8649591149591149 }, { "epoch": 0.962877030162413, "grad_norm": 0.43945974111557007, "learning_rate": 7.219198001379602e-08, "loss": 0.3102406859397888, "step": 17845, "token_acc": 0.8889014979573309 }, { "epoch": 0.9629309879674095, "grad_norm": 0.38839972019195557, "learning_rate": 7.198252407347506e-08, "loss": 0.3443640172481537, "step": 17846, "token_acc": 0.8794216732172937 }, { "epoch": 0.962984945772406, "grad_norm": 0.4035695195198059, "learning_rate": 7.177337133220774e-08, "loss": 0.2855100631713867, "step": 17847, "token_acc": 0.8911065453478133 }, { "epoch": 0.9630389035774025, "grad_norm": 0.40526169538497925, "learning_rate": 7.156452179637896e-08, "loss": 0.3176383674144745, "step": 17848, "token_acc": 0.8848466163095422 }, { "epoch": 0.9630928613823989, "grad_norm": 0.5029575228691101, "learning_rate": 7.135597547237027e-08, "loss": 0.325836181640625, "step": 17849, "token_acc": 0.8858316955511881 }, { "epoch": 0.9631468191873954, "grad_norm": 0.4482942521572113, "learning_rate": 7.114773236654881e-08, "loss": 0.3028782904148102, "step": 17850, "token_acc": 0.8930033131943091 }, { "epoch": 0.9632007769923919, "grad_norm": 0.47810399532318115, "learning_rate": 7.093979248527394e-08, "loss": 0.3391486406326294, "step": 17851, "token_acc": 0.8790309379603863 }, { "epoch": 0.9632547347973884, "grad_norm": 0.49468284845352173, "learning_rate": 7.073215583489835e-08, "loss": 0.2985803484916687, "step": 17852, "token_acc": 0.892017903767251 }, { "epoch": 0.9633086926023849, "grad_norm": 0.44900092482566833, "learning_rate": 7.052482242176029e-08, "loss": 0.3151034116744995, "step": 17853, "token_acc": 0.8909669211195929 }, { "epoch": 0.9633626504073814, "grad_norm": 0.4765368103981018, "learning_rate": 7.031779225219249e-08, "loss": 0.2930613160133362, "step": 17854, "token_acc": 0.8961730449251248 }, { "epoch": 0.963416608212378, "grad_norm": 0.39530885219573975, "learning_rate": 7.011106533251877e-08, "loss": 0.31530165672302246, "step": 17855, "token_acc": 0.8848275862068965 }, { "epoch": 0.9634705660173745, "grad_norm": 0.3992500901222229, "learning_rate": 6.990464166905298e-08, "loss": 0.34275251626968384, "step": 17856, "token_acc": 0.881862404447533 }, { "epoch": 0.9635245238223709, "grad_norm": 0.38633614778518677, "learning_rate": 6.969852126809673e-08, "loss": 0.2727275788784027, "step": 17857, "token_acc": 0.8988045125441994 }, { "epoch": 0.9635784816273674, "grad_norm": 0.41969552636146545, "learning_rate": 6.949270413594611e-08, "loss": 0.3323695659637451, "step": 17858, "token_acc": 0.8789844073409687 }, { "epoch": 0.9636324394323639, "grad_norm": 0.35736533999443054, "learning_rate": 6.92871902788883e-08, "loss": 0.3281639814376831, "step": 17859, "token_acc": 0.8855650035856982 }, { "epoch": 0.9636863972373604, "grad_norm": 0.4284611642360687, "learning_rate": 6.908197970319718e-08, "loss": 0.3445218801498413, "step": 17860, "token_acc": 0.8753099539496989 }, { "epoch": 0.9637403550423569, "grad_norm": 0.36686640977859497, "learning_rate": 6.887707241514108e-08, "loss": 0.2799298167228699, "step": 17861, "token_acc": 0.8971807628524047 }, { "epoch": 0.9637943128473534, "grad_norm": 0.3914327025413513, "learning_rate": 6.867246842097719e-08, "loss": 0.261358380317688, "step": 17862, "token_acc": 0.9035812672176309 }, { "epoch": 0.9638482706523499, "grad_norm": 0.41123491525650024, "learning_rate": 6.846816772695497e-08, "loss": 0.30531054735183716, "step": 17863, "token_acc": 0.8887368421052632 }, { "epoch": 0.9639022284573464, "grad_norm": 0.387302428483963, "learning_rate": 6.826417033931276e-08, "loss": 0.3226022720336914, "step": 17864, "token_acc": 0.8844282238442822 }, { "epoch": 0.9639561862623428, "grad_norm": 0.3668528199195862, "learning_rate": 6.806047626428114e-08, "loss": 0.2958114743232727, "step": 17865, "token_acc": 0.8948500418210061 }, { "epoch": 0.9640101440673393, "grad_norm": 0.34875866770744324, "learning_rate": 6.785708550808068e-08, "loss": 0.2928847074508667, "step": 17866, "token_acc": 0.8934537804727137 }, { "epoch": 0.9640641018723358, "grad_norm": 0.45817115902900696, "learning_rate": 6.765399807692308e-08, "loss": 0.3437502086162567, "step": 17867, "token_acc": 0.8763921184677518 }, { "epoch": 0.9641180596773323, "grad_norm": 0.44957229495048523, "learning_rate": 6.745121397701004e-08, "loss": 0.368815153837204, "step": 17868, "token_acc": 0.8707399864222675 }, { "epoch": 0.9641720174823288, "grad_norm": 0.523980438709259, "learning_rate": 6.72487332145355e-08, "loss": 0.3580775558948517, "step": 17869, "token_acc": 0.868491849976262 }, { "epoch": 0.9642259752873253, "grad_norm": 0.425494521856308, "learning_rate": 6.704655579568342e-08, "loss": 0.3187742531299591, "step": 17870, "token_acc": 0.8854179528336832 }, { "epoch": 0.9642799330923219, "grad_norm": 0.43498465418815613, "learning_rate": 6.684468172662661e-08, "loss": 0.34196436405181885, "step": 17871, "token_acc": 0.8781374618813043 }, { "epoch": 0.9643338908973182, "grad_norm": 0.4299592971801758, "learning_rate": 6.664311101353127e-08, "loss": 0.31712719798088074, "step": 17872, "token_acc": 0.8846457729786674 }, { "epoch": 0.9643878487023148, "grad_norm": 0.38430461287498474, "learning_rate": 6.644184366255246e-08, "loss": 0.3351932168006897, "step": 17873, "token_acc": 0.880359394703657 }, { "epoch": 0.9644418065073113, "grad_norm": 0.4279933273792267, "learning_rate": 6.62408796798375e-08, "loss": 0.2810804843902588, "step": 17874, "token_acc": 0.8944616576297444 }, { "epoch": 0.9644957643123078, "grad_norm": 0.5473982095718384, "learning_rate": 6.604021907152369e-08, "loss": 0.32977256178855896, "step": 17875, "token_acc": 0.8814956855225312 }, { "epoch": 0.9645497221173043, "grad_norm": 0.44137516617774963, "learning_rate": 6.583986184374058e-08, "loss": 0.3556531071662903, "step": 17876, "token_acc": 0.8743681420293429 }, { "epoch": 0.9646036799223008, "grad_norm": 0.4321189820766449, "learning_rate": 6.563980800260439e-08, "loss": 0.3279302716255188, "step": 17877, "token_acc": 0.8819338422391858 }, { "epoch": 0.9646576377272973, "grad_norm": 0.4592342972755432, "learning_rate": 6.5440057554228e-08, "loss": 0.33904722332954407, "step": 17878, "token_acc": 0.8761408083441982 }, { "epoch": 0.9647115955322938, "grad_norm": 0.4107830226421356, "learning_rate": 6.524061050470765e-08, "loss": 0.3798869550228119, "step": 17879, "token_acc": 0.8674466443815891 }, { "epoch": 0.9647655533372902, "grad_norm": 0.4726989269256592, "learning_rate": 6.504146686013734e-08, "loss": 0.36237049102783203, "step": 17880, "token_acc": 0.8709036742800397 }, { "epoch": 0.9648195111422867, "grad_norm": 0.44512465596199036, "learning_rate": 6.48426266265978e-08, "loss": 0.3231890797615051, "step": 17881, "token_acc": 0.8857653973933044 }, { "epoch": 0.9648734689472832, "grad_norm": 0.5777716636657715, "learning_rate": 6.46440898101619e-08, "loss": 0.3444269001483917, "step": 17882, "token_acc": 0.8803125856868659 }, { "epoch": 0.9649274267522797, "grad_norm": 0.5501503348350525, "learning_rate": 6.444585641689261e-08, "loss": 0.3253929615020752, "step": 17883, "token_acc": 0.8826512133285042 }, { "epoch": 0.9649813845572762, "grad_norm": 0.36188337206840515, "learning_rate": 6.424792645284395e-08, "loss": 0.3429489731788635, "step": 17884, "token_acc": 0.8772585040880185 }, { "epoch": 0.9650353423622727, "grad_norm": 0.3666651248931885, "learning_rate": 6.405029992406109e-08, "loss": 0.3199297785758972, "step": 17885, "token_acc": 0.8791708078943632 }, { "epoch": 0.9650893001672692, "grad_norm": 0.44182467460632324, "learning_rate": 6.38529768365781e-08, "loss": 0.3132309317588806, "step": 17886, "token_acc": 0.8879589632829373 }, { "epoch": 0.9651432579722656, "grad_norm": 0.525606632232666, "learning_rate": 6.365595719642347e-08, "loss": 0.33827221393585205, "step": 17887, "token_acc": 0.8781397174254317 }, { "epoch": 0.9651972157772621, "grad_norm": 0.47696056962013245, "learning_rate": 6.345924100961353e-08, "loss": 0.28647392988204956, "step": 17888, "token_acc": 0.8943036320877973 }, { "epoch": 0.9652511735822586, "grad_norm": 0.3722754418849945, "learning_rate": 6.326282828215458e-08, "loss": 0.2547674775123596, "step": 17889, "token_acc": 0.9013182674199623 }, { "epoch": 0.9653051313872552, "grad_norm": 0.43743881583213806, "learning_rate": 6.306671902004624e-08, "loss": 0.3715830445289612, "step": 17890, "token_acc": 0.8682588597842835 }, { "epoch": 0.9653590891922517, "grad_norm": 0.5130615830421448, "learning_rate": 6.287091322927596e-08, "loss": 0.32848289608955383, "step": 17891, "token_acc": 0.8786990135963743 }, { "epoch": 0.9654130469972482, "grad_norm": 0.4532530903816223, "learning_rate": 6.267541091582563e-08, "loss": 0.3693593144416809, "step": 17892, "token_acc": 0.8728350756748323 }, { "epoch": 0.9654670048022447, "grad_norm": 0.4965437650680542, "learning_rate": 6.248021208566602e-08, "loss": 0.30857381224632263, "step": 17893, "token_acc": 0.8885811018775007 }, { "epoch": 0.9655209626072412, "grad_norm": 0.4412292242050171, "learning_rate": 6.228531674475569e-08, "loss": 0.3884128928184509, "step": 17894, "token_acc": 0.8659180977542933 }, { "epoch": 0.9655749204122376, "grad_norm": 0.33496221899986267, "learning_rate": 6.209072489904988e-08, "loss": 0.3020542860031128, "step": 17895, "token_acc": 0.8900961136648559 }, { "epoch": 0.9656288782172341, "grad_norm": 0.44004055857658386, "learning_rate": 6.18964365544894e-08, "loss": 0.38824713230133057, "step": 17896, "token_acc": 0.866232073011734 }, { "epoch": 0.9656828360222306, "grad_norm": 0.4589141309261322, "learning_rate": 6.170245171700839e-08, "loss": 0.26610317826271057, "step": 17897, "token_acc": 0.8981233243967829 }, { "epoch": 0.9657367938272271, "grad_norm": 0.341508686542511, "learning_rate": 6.150877039252989e-08, "loss": 0.3153979778289795, "step": 17898, "token_acc": 0.884611016467916 }, { "epoch": 0.9657907516322236, "grad_norm": 0.41139963269233704, "learning_rate": 6.13153925869714e-08, "loss": 0.28838807344436646, "step": 17899, "token_acc": 0.8956292939628709 }, { "epoch": 0.9658447094372201, "grad_norm": 0.45130178332328796, "learning_rate": 6.112231830623704e-08, "loss": 0.3171730935573578, "step": 17900, "token_acc": 0.8888722480155758 }, { "epoch": 0.9658986672422166, "grad_norm": 0.4820745289325714, "learning_rate": 6.092954755622327e-08, "loss": 0.37949496507644653, "step": 17901, "token_acc": 0.8672703751617077 }, { "epoch": 0.9659526250472131, "grad_norm": 0.4653770327568054, "learning_rate": 6.073708034281645e-08, "loss": 0.30230337381362915, "step": 17902, "token_acc": 0.8887897710377639 }, { "epoch": 0.9660065828522095, "grad_norm": 0.4616348147392273, "learning_rate": 6.054491667189744e-08, "loss": 0.32196539640426636, "step": 17903, "token_acc": 0.8819748358862144 }, { "epoch": 0.966060540657206, "grad_norm": 0.4332117438316345, "learning_rate": 6.035305654933155e-08, "loss": 0.32794424891471863, "step": 17904, "token_acc": 0.8832081415071481 }, { "epoch": 0.9661144984622025, "grad_norm": 0.39605656266212463, "learning_rate": 6.016149998097964e-08, "loss": 0.37292319536209106, "step": 17905, "token_acc": 0.8670777172354459 }, { "epoch": 0.966168456267199, "grad_norm": 0.42188653349876404, "learning_rate": 5.99702469726915e-08, "loss": 0.298220694065094, "step": 17906, "token_acc": 0.8883336205410994 }, { "epoch": 0.9662224140721956, "grad_norm": 0.37206771969795227, "learning_rate": 5.977929753030798e-08, "loss": 0.31474602222442627, "step": 17907, "token_acc": 0.8889422307569211 }, { "epoch": 0.9662763718771921, "grad_norm": 0.34155377745628357, "learning_rate": 5.95886516596611e-08, "loss": 0.2730005979537964, "step": 17908, "token_acc": 0.8960953011250827 }, { "epoch": 0.9663303296821886, "grad_norm": 0.349735826253891, "learning_rate": 5.9398309366572873e-08, "loss": 0.30374783277511597, "step": 17909, "token_acc": 0.8897811021889781 }, { "epoch": 0.966384287487185, "grad_norm": 0.4309405982494354, "learning_rate": 5.920827065685641e-08, "loss": 0.3211023211479187, "step": 17910, "token_acc": 0.8860394537177542 }, { "epoch": 0.9664382452921815, "grad_norm": 0.36248934268951416, "learning_rate": 5.9018535536314866e-08, "loss": 0.3148762583732605, "step": 17911, "token_acc": 0.8817414884001206 }, { "epoch": 0.966492203097178, "grad_norm": 0.476247638463974, "learning_rate": 5.88291040107436e-08, "loss": 0.33962589502334595, "step": 17912, "token_acc": 0.8780522230063514 }, { "epoch": 0.9665461609021745, "grad_norm": 0.4801620841026306, "learning_rate": 5.863997608592687e-08, "loss": 0.34385889768600464, "step": 17913, "token_acc": 0.8769877536099433 }, { "epoch": 0.966600118707171, "grad_norm": 0.514830470085144, "learning_rate": 5.845115176764004e-08, "loss": 0.33292871713638306, "step": 17914, "token_acc": 0.8782329092688382 }, { "epoch": 0.9666540765121675, "grad_norm": 0.46475985646247864, "learning_rate": 5.8262631061651864e-08, "loss": 0.35831397771835327, "step": 17915, "token_acc": 0.8753811480843166 }, { "epoch": 0.966708034317164, "grad_norm": 0.4142666757106781, "learning_rate": 5.8074413973718825e-08, "loss": 0.36185652017593384, "step": 17916, "token_acc": 0.8707922045051886 }, { "epoch": 0.9667619921221605, "grad_norm": 0.3122159540653229, "learning_rate": 5.788650050958744e-08, "loss": 0.33330053091049194, "step": 17917, "token_acc": 0.8820109024833435 }, { "epoch": 0.9668159499271569, "grad_norm": 0.4199334979057312, "learning_rate": 5.7698890674998674e-08, "loss": 0.3027399182319641, "step": 17918, "token_acc": 0.8852965982569581 }, { "epoch": 0.9668699077321534, "grad_norm": 0.5292345881462097, "learning_rate": 5.751158447568128e-08, "loss": 0.36323150992393494, "step": 17919, "token_acc": 0.8693092058020968 }, { "epoch": 0.9669238655371499, "grad_norm": 0.42883679270744324, "learning_rate": 5.7324581917355126e-08, "loss": 0.302162230014801, "step": 17920, "token_acc": 0.8914987119260493 }, { "epoch": 0.9669778233421464, "grad_norm": 0.4836772680282593, "learning_rate": 5.7137883005732306e-08, "loss": 0.3069411516189575, "step": 17921, "token_acc": 0.8879352226720648 }, { "epoch": 0.9670317811471429, "grad_norm": 0.521981954574585, "learning_rate": 5.6951487746513825e-08, "loss": 0.36268889904022217, "step": 17922, "token_acc": 0.8713031009141423 }, { "epoch": 0.9670857389521395, "grad_norm": 0.4711197018623352, "learning_rate": 5.676539614539178e-08, "loss": 0.2463066279888153, "step": 17923, "token_acc": 0.9082976117575016 }, { "epoch": 0.967139696757136, "grad_norm": 0.44847550988197327, "learning_rate": 5.657960820804942e-08, "loss": 0.3190479874610901, "step": 17924, "token_acc": 0.8818209225203497 }, { "epoch": 0.9671936545621325, "grad_norm": 0.4420897960662842, "learning_rate": 5.639412394016108e-08, "loss": 0.30550113320350647, "step": 17925, "token_acc": 0.8897920900422073 }, { "epoch": 0.9672476123671289, "grad_norm": 0.45045334100723267, "learning_rate": 5.620894334739224e-08, "loss": 0.31076639890670776, "step": 17926, "token_acc": 0.8864641277991727 }, { "epoch": 0.9673015701721254, "grad_norm": 0.4106191098690033, "learning_rate": 5.602406643539615e-08, "loss": 0.3028195798397064, "step": 17927, "token_acc": 0.8903610949330227 }, { "epoch": 0.9673555279771219, "grad_norm": 0.4346565008163452, "learning_rate": 5.5839493209821625e-08, "loss": 0.3386789858341217, "step": 17928, "token_acc": 0.8790050590219224 }, { "epoch": 0.9674094857821184, "grad_norm": 0.4303947389125824, "learning_rate": 5.565522367630305e-08, "loss": 0.38982659578323364, "step": 17929, "token_acc": 0.8656126482213439 }, { "epoch": 0.9674634435871149, "grad_norm": 0.5228308439254761, "learning_rate": 5.547125784046814e-08, "loss": 0.35218533873558044, "step": 17930, "token_acc": 0.8740031897926634 }, { "epoch": 0.9675174013921114, "grad_norm": 0.4794521629810333, "learning_rate": 5.528759570793685e-08, "loss": 0.30258074402809143, "step": 17931, "token_acc": 0.8877383300460223 }, { "epoch": 0.9675713591971079, "grad_norm": 0.3862726092338562, "learning_rate": 5.5104237284316907e-08, "loss": 0.34228163957595825, "step": 17932, "token_acc": 0.8750146421459529 }, { "epoch": 0.9676253170021043, "grad_norm": 0.40568435192108154, "learning_rate": 5.4921182575208287e-08, "loss": 0.3636586666107178, "step": 17933, "token_acc": 0.8746645619573796 }, { "epoch": 0.9676792748071008, "grad_norm": 0.4861087203025818, "learning_rate": 5.473843158620207e-08, "loss": 0.3341527283191681, "step": 17934, "token_acc": 0.8850102669404517 }, { "epoch": 0.9677332326120973, "grad_norm": 0.41961273550987244, "learning_rate": 5.455598432287823e-08, "loss": 0.342983216047287, "step": 17935, "token_acc": 0.8780833569605897 }, { "epoch": 0.9677871904170938, "grad_norm": 0.3967025876045227, "learning_rate": 5.437384079080899e-08, "loss": 0.32427242398262024, "step": 17936, "token_acc": 0.8849256900212314 }, { "epoch": 0.9678411482220903, "grad_norm": 0.4358881115913391, "learning_rate": 5.419200099555877e-08, "loss": 0.3282623887062073, "step": 17937, "token_acc": 0.8811630847029077 }, { "epoch": 0.9678951060270868, "grad_norm": 0.47732803225517273, "learning_rate": 5.4010464942677586e-08, "loss": 0.3117067217826843, "step": 17938, "token_acc": 0.8897007165940706 }, { "epoch": 0.9679490638320833, "grad_norm": 0.36449816823005676, "learning_rate": 5.3829232637712114e-08, "loss": 0.2403765618801117, "step": 17939, "token_acc": 0.9113545816733067 }, { "epoch": 0.9680030216370799, "grad_norm": 0.44320404529571533, "learning_rate": 5.36483040861957e-08, "loss": 0.36810505390167236, "step": 17940, "token_acc": 0.8694499668654738 }, { "epoch": 0.9680569794420762, "grad_norm": 0.37188616394996643, "learning_rate": 5.346767929365615e-08, "loss": 0.31659141182899475, "step": 17941, "token_acc": 0.8831702821494399 }, { "epoch": 0.9681109372470728, "grad_norm": 0.41752949357032776, "learning_rate": 5.328735826560683e-08, "loss": 0.3171570301055908, "step": 17942, "token_acc": 0.8823428669292687 }, { "epoch": 0.9681648950520693, "grad_norm": 0.35390526056289673, "learning_rate": 5.310734100755555e-08, "loss": 0.36315062642097473, "step": 17943, "token_acc": 0.8714452214452214 }, { "epoch": 0.9682188528570658, "grad_norm": 0.4030535817146301, "learning_rate": 5.2927627525001244e-08, "loss": 0.28184089064598083, "step": 17944, "token_acc": 0.8974688974688975 }, { "epoch": 0.9682728106620623, "grad_norm": 0.3823162913322449, "learning_rate": 5.274821782343065e-08, "loss": 0.28898656368255615, "step": 17945, "token_acc": 0.896113889957676 }, { "epoch": 0.9683267684670588, "grad_norm": 0.4976181089878082, "learning_rate": 5.256911190832381e-08, "loss": 0.3623840808868408, "step": 17946, "token_acc": 0.872656367181641 }, { "epoch": 0.9683807262720553, "grad_norm": 0.4727160334587097, "learning_rate": 5.239030978515081e-08, "loss": 0.3025042712688446, "step": 17947, "token_acc": 0.8943360304616849 }, { "epoch": 0.9684346840770518, "grad_norm": 0.3756290078163147, "learning_rate": 5.221181145937171e-08, "loss": 0.3135984539985657, "step": 17948, "token_acc": 0.8917132109882594 }, { "epoch": 0.9684886418820482, "grad_norm": 0.37221309542655945, "learning_rate": 5.203361693643882e-08, "loss": 0.28344613313674927, "step": 17949, "token_acc": 0.8989113530326595 }, { "epoch": 0.9685425996870447, "grad_norm": 0.4169497489929199, "learning_rate": 5.1855726221792245e-08, "loss": 0.31212079524993896, "step": 17950, "token_acc": 0.8872490360324425 }, { "epoch": 0.9685965574920412, "grad_norm": 0.4107806384563446, "learning_rate": 5.167813932086541e-08, "loss": 0.3160476088523865, "step": 17951, "token_acc": 0.8823161841283066 }, { "epoch": 0.9686505152970377, "grad_norm": 0.40591222047805786, "learning_rate": 5.150085623908174e-08, "loss": 0.2919532060623169, "step": 17952, "token_acc": 0.8933021806853583 }, { "epoch": 0.9687044731020342, "grad_norm": 0.3583689033985138, "learning_rate": 5.1323876981856926e-08, "loss": 0.30641108751296997, "step": 17953, "token_acc": 0.8911817102137767 }, { "epoch": 0.9687584309070307, "grad_norm": 0.5698283314704895, "learning_rate": 5.114720155459441e-08, "loss": 0.36479708552360535, "step": 17954, "token_acc": 0.8714603881641744 }, { "epoch": 0.9688123887120272, "grad_norm": 0.47757643461227417, "learning_rate": 5.097082996268987e-08, "loss": 0.4002087116241455, "step": 17955, "token_acc": 0.8625224148236701 }, { "epoch": 0.9688663465170236, "grad_norm": 0.427394300699234, "learning_rate": 5.079476221153012e-08, "loss": 0.28904926776885986, "step": 17956, "token_acc": 0.8908337171810226 }, { "epoch": 0.9689203043220201, "grad_norm": 0.3886503577232361, "learning_rate": 5.0618998306491974e-08, "loss": 0.31756582856178284, "step": 17957, "token_acc": 0.8820274852757451 }, { "epoch": 0.9689742621270167, "grad_norm": 0.4767574965953827, "learning_rate": 5.044353825294224e-08, "loss": 0.3337738811969757, "step": 17958, "token_acc": 0.8830559757942511 }, { "epoch": 0.9690282199320132, "grad_norm": 0.43195977807044983, "learning_rate": 5.026838205624218e-08, "loss": 0.30875998735427856, "step": 17959, "token_acc": 0.8848028973394623 }, { "epoch": 0.9690821777370097, "grad_norm": 0.4420611262321472, "learning_rate": 5.009352972173864e-08, "loss": 0.31022536754608154, "step": 17960, "token_acc": 0.885698038100654 }, { "epoch": 0.9691361355420062, "grad_norm": 0.39523154497146606, "learning_rate": 4.991898125477179e-08, "loss": 0.32919812202453613, "step": 17961, "token_acc": 0.8859639774059469 }, { "epoch": 0.9691900933470027, "grad_norm": 0.45098090171813965, "learning_rate": 4.974473666067292e-08, "loss": 0.3438867926597595, "step": 17962, "token_acc": 0.8817525125628141 }, { "epoch": 0.9692440511519992, "grad_norm": 0.4528573453426361, "learning_rate": 4.957079594476333e-08, "loss": 0.3643859028816223, "step": 17963, "token_acc": 0.872133425990271 }, { "epoch": 0.9692980089569956, "grad_norm": 0.4965988099575043, "learning_rate": 4.939715911235432e-08, "loss": 0.3181595206260681, "step": 17964, "token_acc": 0.8866070118820596 }, { "epoch": 0.9693519667619921, "grad_norm": 0.43691152334213257, "learning_rate": 4.922382616874943e-08, "loss": 0.3677067458629608, "step": 17965, "token_acc": 0.870556174904001 }, { "epoch": 0.9694059245669886, "grad_norm": 0.47385263442993164, "learning_rate": 4.905079711924332e-08, "loss": 0.32123538851737976, "step": 17966, "token_acc": 0.8832569425721218 }, { "epoch": 0.9694598823719851, "grad_norm": 0.3882564306259155, "learning_rate": 4.887807196911731e-08, "loss": 0.31880655884742737, "step": 17967, "token_acc": 0.8823975720789075 }, { "epoch": 0.9695138401769816, "grad_norm": 0.4547199606895447, "learning_rate": 4.870565072364941e-08, "loss": 0.32692795991897583, "step": 17968, "token_acc": 0.8861167684996606 }, { "epoch": 0.9695677979819781, "grad_norm": 0.5279833674430847, "learning_rate": 4.853353338810429e-08, "loss": 0.3458837568759918, "step": 17969, "token_acc": 0.8781596137460949 }, { "epoch": 0.9696217557869746, "grad_norm": 0.4051608443260193, "learning_rate": 4.8361719967736644e-08, "loss": 0.30350953340530396, "step": 17970, "token_acc": 0.8930901542111507 }, { "epoch": 0.9696757135919711, "grad_norm": 0.2781292200088501, "learning_rate": 4.8190210467796707e-08, "loss": 0.3309638500213623, "step": 17971, "token_acc": 0.8826970748636589 }, { "epoch": 0.9697296713969675, "grad_norm": 0.380354642868042, "learning_rate": 4.8019004893519183e-08, "loss": 0.3663361966609955, "step": 17972, "token_acc": 0.8703266674947212 }, { "epoch": 0.969783629201964, "grad_norm": 0.3507344722747803, "learning_rate": 4.784810325013545e-08, "loss": 0.31449025869369507, "step": 17973, "token_acc": 0.8885687946531459 }, { "epoch": 0.9698375870069605, "grad_norm": 0.3659409284591675, "learning_rate": 4.7677505542863544e-08, "loss": 0.32380765676498413, "step": 17974, "token_acc": 0.88719438392641 }, { "epoch": 0.969891544811957, "grad_norm": 0.3388093113899231, "learning_rate": 4.750721177691264e-08, "loss": 0.35649627447128296, "step": 17975, "token_acc": 0.87475358128532 }, { "epoch": 0.9699455026169536, "grad_norm": 0.43160784244537354, "learning_rate": 4.7337221957485246e-08, "loss": 0.29506564140319824, "step": 17976, "token_acc": 0.8924849149753155 }, { "epoch": 0.9699994604219501, "grad_norm": 0.3537198007106781, "learning_rate": 4.716753608977054e-08, "loss": 0.3016331195831299, "step": 17977, "token_acc": 0.8879784618986305 }, { "epoch": 0.9700534182269466, "grad_norm": 0.37112852931022644, "learning_rate": 4.699815417895326e-08, "loss": 0.3236660659313202, "step": 17978, "token_acc": 0.8828803586438778 }, { "epoch": 0.970107376031943, "grad_norm": 0.41358432173728943, "learning_rate": 4.682907623020483e-08, "loss": 0.30970409512519836, "step": 17979, "token_acc": 0.8860351058337635 }, { "epoch": 0.9701613338369395, "grad_norm": 0.3704274296760559, "learning_rate": 4.666030224868889e-08, "loss": 0.33004677295684814, "step": 17980, "token_acc": 0.8805946791862285 }, { "epoch": 0.970215291641936, "grad_norm": 0.4051043391227722, "learning_rate": 4.649183223955911e-08, "loss": 0.3000001907348633, "step": 17981, "token_acc": 0.8925435722709999 }, { "epoch": 0.9702692494469325, "grad_norm": 0.4902346432209015, "learning_rate": 4.632366620796247e-08, "loss": 0.3684299886226654, "step": 17982, "token_acc": 0.8746975458002074 }, { "epoch": 0.970323207251929, "grad_norm": 0.4141080677509308, "learning_rate": 4.615580415903265e-08, "loss": 0.30741727352142334, "step": 17983, "token_acc": 0.8862148292337639 }, { "epoch": 0.9703771650569255, "grad_norm": 0.40818190574645996, "learning_rate": 4.598824609789665e-08, "loss": 0.29977864027023315, "step": 17984, "token_acc": 0.8884255930984903 }, { "epoch": 0.970431122861922, "grad_norm": 0.4803276062011719, "learning_rate": 4.5820992029673716e-08, "loss": 0.3225109577178955, "step": 17985, "token_acc": 0.8829249810558222 }, { "epoch": 0.9704850806669185, "grad_norm": 0.33979782462120056, "learning_rate": 4.565404195946865e-08, "loss": 0.3259179890155792, "step": 17986, "token_acc": 0.8846591496844937 }, { "epoch": 0.9705390384719149, "grad_norm": 0.4657800793647766, "learning_rate": 4.5487395892381826e-08, "loss": 0.41132673621177673, "step": 17987, "token_acc": 0.858141702921069 }, { "epoch": 0.9705929962769114, "grad_norm": 0.424192875623703, "learning_rate": 4.5321053833502496e-08, "loss": 0.32124507427215576, "step": 17988, "token_acc": 0.8859784283513097 }, { "epoch": 0.9706469540819079, "grad_norm": 0.41648218035697937, "learning_rate": 4.5155015787909926e-08, "loss": 0.3069741725921631, "step": 17989, "token_acc": 0.8876453488372092 }, { "epoch": 0.9707009118869044, "grad_norm": 0.5074077844619751, "learning_rate": 4.498928176067563e-08, "loss": 0.3470076322555542, "step": 17990, "token_acc": 0.8810437452033768 }, { "epoch": 0.970754869691901, "grad_norm": 0.42974627017974854, "learning_rate": 4.48238517568611e-08, "loss": 0.332340806722641, "step": 17991, "token_acc": 0.8800216420938726 }, { "epoch": 0.9708088274968975, "grad_norm": 0.4359301030635834, "learning_rate": 4.4658725781517865e-08, "loss": 0.35822218656539917, "step": 17992, "token_acc": 0.8772858517805582 }, { "epoch": 0.970862785301894, "grad_norm": 0.4019111692905426, "learning_rate": 4.449390383968854e-08, "loss": 0.34933164715766907, "step": 17993, "token_acc": 0.8836823866204611 }, { "epoch": 0.9709167431068904, "grad_norm": 0.4433143138885498, "learning_rate": 4.432938593640912e-08, "loss": 0.2965373694896698, "step": 17994, "token_acc": 0.8935467898993233 }, { "epoch": 0.9709707009118869, "grad_norm": 0.48717308044433594, "learning_rate": 4.4165172076701124e-08, "loss": 0.3120446503162384, "step": 17995, "token_acc": 0.8864864864864865 }, { "epoch": 0.9710246587168834, "grad_norm": 0.4498848021030426, "learning_rate": 4.4001262265580546e-08, "loss": 0.36197102069854736, "step": 17996, "token_acc": 0.8776102088167054 }, { "epoch": 0.9710786165218799, "grad_norm": 0.4449981153011322, "learning_rate": 4.383765650805338e-08, "loss": 0.29056409001350403, "step": 17997, "token_acc": 0.8886379430159833 }, { "epoch": 0.9711325743268764, "grad_norm": 0.3711410164833069, "learning_rate": 4.3674354809116746e-08, "loss": 0.32854023575782776, "step": 17998, "token_acc": 0.8839516824849007 }, { "epoch": 0.9711865321318729, "grad_norm": 0.35346829891204834, "learning_rate": 4.351135717375665e-08, "loss": 0.3103063702583313, "step": 17999, "token_acc": 0.8837592745259687 }, { "epoch": 0.9712404899368694, "grad_norm": 0.4167206883430481, "learning_rate": 4.334866360695133e-08, "loss": 0.3707948625087738, "step": 18000, "token_acc": 0.8727009984235418 }, { "epoch": 0.9712944477418659, "grad_norm": 0.5316513776779175, "learning_rate": 4.3186274113669044e-08, "loss": 0.3458895683288574, "step": 18001, "token_acc": 0.8800959232613909 }, { "epoch": 0.9713484055468623, "grad_norm": 0.36393269896507263, "learning_rate": 4.3024188698870264e-08, "loss": 0.3079865276813507, "step": 18002, "token_acc": 0.890251859585019 }, { "epoch": 0.9714023633518588, "grad_norm": 0.45226213335990906, "learning_rate": 4.286240736750435e-08, "loss": 0.32494455575942993, "step": 18003, "token_acc": 0.8846671498084688 }, { "epoch": 0.9714563211568553, "grad_norm": 0.4680827260017395, "learning_rate": 4.270093012451182e-08, "loss": 0.3655228316783905, "step": 18004, "token_acc": 0.8741580999645516 }, { "epoch": 0.9715102789618518, "grad_norm": 0.44066759943962097, "learning_rate": 4.253975697482426e-08, "loss": 0.32605522871017456, "step": 18005, "token_acc": 0.8876436373953752 }, { "epoch": 0.9715642367668483, "grad_norm": 0.39446815848350525, "learning_rate": 4.237888792336442e-08, "loss": 0.31598877906799316, "step": 18006, "token_acc": 0.8857109638414138 }, { "epoch": 0.9716181945718448, "grad_norm": 0.43481478095054626, "learning_rate": 4.221832297504502e-08, "loss": 0.33832865953445435, "step": 18007, "token_acc": 0.8800130420606456 }, { "epoch": 0.9716721523768413, "grad_norm": 0.4993930160999298, "learning_rate": 4.205806213476993e-08, "loss": 0.3303540349006653, "step": 18008, "token_acc": 0.8819691577698695 }, { "epoch": 0.9717261101818379, "grad_norm": 0.41532400250434875, "learning_rate": 4.1898105407431886e-08, "loss": 0.35179921984672546, "step": 18009, "token_acc": 0.8793820558526441 }, { "epoch": 0.9717800679868343, "grad_norm": 0.4380110204219818, "learning_rate": 4.173845279791699e-08, "loss": 0.35586482286453247, "step": 18010, "token_acc": 0.8799370161396142 }, { "epoch": 0.9718340257918308, "grad_norm": 0.4369135797023773, "learning_rate": 4.157910431110246e-08, "loss": 0.31558430194854736, "step": 18011, "token_acc": 0.8839388881756323 }, { "epoch": 0.9718879835968273, "grad_norm": 0.5796857476234436, "learning_rate": 4.142005995185217e-08, "loss": 0.39151936769485474, "step": 18012, "token_acc": 0.8670634920634921 }, { "epoch": 0.9719419414018238, "grad_norm": 0.4406505227088928, "learning_rate": 4.126131972502445e-08, "loss": 0.34583982825279236, "step": 18013, "token_acc": 0.874390243902439 }, { "epoch": 0.9719958992068203, "grad_norm": 0.4340859353542328, "learning_rate": 4.110288363546766e-08, "loss": 0.30655160546302795, "step": 18014, "token_acc": 0.882976634596505 }, { "epoch": 0.9720498570118168, "grad_norm": 0.37177979946136475, "learning_rate": 4.094475168801903e-08, "loss": 0.35694801807403564, "step": 18015, "token_acc": 0.8755797005432622 }, { "epoch": 0.9721038148168133, "grad_norm": 0.4839099645614624, "learning_rate": 4.0786923887509156e-08, "loss": 0.3139042556285858, "step": 18016, "token_acc": 0.8845960863020572 }, { "epoch": 0.9721577726218097, "grad_norm": 0.526187539100647, "learning_rate": 4.062940023875861e-08, "loss": 0.34090813994407654, "step": 18017, "token_acc": 0.8776545166402535 }, { "epoch": 0.9722117304268062, "grad_norm": 0.4420802593231201, "learning_rate": 4.047218074657577e-08, "loss": 0.3306127190589905, "step": 18018, "token_acc": 0.8782565748857883 }, { "epoch": 0.9722656882318027, "grad_norm": 0.4534253478050232, "learning_rate": 4.031526541576458e-08, "loss": 0.36695992946624756, "step": 18019, "token_acc": 0.8700940438871473 }, { "epoch": 0.9723196460367992, "grad_norm": 0.5046262145042419, "learning_rate": 4.015865425111565e-08, "loss": 0.3126228451728821, "step": 18020, "token_acc": 0.8871804728041515 }, { "epoch": 0.9723736038417957, "grad_norm": 0.4313470423221588, "learning_rate": 4.000234725741292e-08, "loss": 0.31078267097473145, "step": 18021, "token_acc": 0.886628314858987 }, { "epoch": 0.9724275616467922, "grad_norm": 0.3427720069885254, "learning_rate": 3.984634443942814e-08, "loss": 0.2811848819255829, "step": 18022, "token_acc": 0.8948377204191158 }, { "epoch": 0.9724815194517887, "grad_norm": 0.35166606307029724, "learning_rate": 3.9690645801927494e-08, "loss": 0.30990728735923767, "step": 18023, "token_acc": 0.8921458098459226 }, { "epoch": 0.9725354772567852, "grad_norm": 0.44671380519866943, "learning_rate": 3.953525134966496e-08, "loss": 0.38411784172058105, "step": 18024, "token_acc": 0.8701153286306125 }, { "epoch": 0.9725894350617816, "grad_norm": 0.38447099924087524, "learning_rate": 3.9380161087386736e-08, "loss": 0.3181113600730896, "step": 18025, "token_acc": 0.8881516587677725 }, { "epoch": 0.9726433928667781, "grad_norm": 0.28036409616470337, "learning_rate": 3.922537501982904e-08, "loss": 0.3166239857673645, "step": 18026, "token_acc": 0.8875025778511033 }, { "epoch": 0.9726973506717747, "grad_norm": 0.4044150114059448, "learning_rate": 3.9070893151719193e-08, "loss": 0.33803457021713257, "step": 18027, "token_acc": 0.8802843247287692 }, { "epoch": 0.9727513084767712, "grad_norm": 0.5374763011932373, "learning_rate": 3.891671548777454e-08, "loss": 0.327723890542984, "step": 18028, "token_acc": 0.8861569552037724 }, { "epoch": 0.9728052662817677, "grad_norm": 0.3067063093185425, "learning_rate": 3.876284203270464e-08, "loss": 0.33558353781700134, "step": 18029, "token_acc": 0.8774053007382306 }, { "epoch": 0.9728592240867642, "grad_norm": 0.523138701915741, "learning_rate": 3.860927279120796e-08, "loss": 0.318573921918869, "step": 18030, "token_acc": 0.8847087378640777 }, { "epoch": 0.9729131818917607, "grad_norm": 0.44793134927749634, "learning_rate": 3.845600776797409e-08, "loss": 0.3180585503578186, "step": 18031, "token_acc": 0.882415738904987 }, { "epoch": 0.9729671396967572, "grad_norm": 0.4077072739601135, "learning_rate": 3.830304696768372e-08, "loss": 0.29185622930526733, "step": 18032, "token_acc": 0.891404672627632 }, { "epoch": 0.9730210975017536, "grad_norm": 0.4441224932670593, "learning_rate": 3.8150390395009784e-08, "loss": 0.3785032629966736, "step": 18033, "token_acc": 0.868246003034193 }, { "epoch": 0.9730750553067501, "grad_norm": 0.5009239912033081, "learning_rate": 3.7998038054613e-08, "loss": 0.36475762724876404, "step": 18034, "token_acc": 0.8727224364942147 }, { "epoch": 0.9731290131117466, "grad_norm": 0.49105820059776306, "learning_rate": 3.784598995114519e-08, "loss": 0.3108810484409332, "step": 18035, "token_acc": 0.8865532734274711 }, { "epoch": 0.9731829709167431, "grad_norm": 0.4732705354690552, "learning_rate": 3.769424608925265e-08, "loss": 0.2956249713897705, "step": 18036, "token_acc": 0.8850756775783175 }, { "epoch": 0.9732369287217396, "grad_norm": 0.3504776954650879, "learning_rate": 3.7542806473567225e-08, "loss": 0.3344602584838867, "step": 18037, "token_acc": 0.8807409286982999 }, { "epoch": 0.9732908865267361, "grad_norm": 0.4429982006549835, "learning_rate": 3.739167110871522e-08, "loss": 0.3207071125507355, "step": 18038, "token_acc": 0.883765752409192 }, { "epoch": 0.9733448443317326, "grad_norm": 0.3942181169986725, "learning_rate": 3.724083999931183e-08, "loss": 0.27038702368736267, "step": 18039, "token_acc": 0.8992837958818263 }, { "epoch": 0.973398802136729, "grad_norm": 0.38101935386657715, "learning_rate": 3.7090313149963365e-08, "loss": 0.34286776185035706, "step": 18040, "token_acc": 0.8765972550875533 }, { "epoch": 0.9734527599417255, "grad_norm": 0.40510281920433044, "learning_rate": 3.694009056526504e-08, "loss": 0.27727216482162476, "step": 18041, "token_acc": 0.8960189309576837 }, { "epoch": 0.973506717746722, "grad_norm": 0.430899441242218, "learning_rate": 3.679017224980874e-08, "loss": 0.3053653836250305, "step": 18042, "token_acc": 0.888138405024292 }, { "epoch": 0.9735606755517185, "grad_norm": 0.46926677227020264, "learning_rate": 3.66405582081697e-08, "loss": 0.3052416741847992, "step": 18043, "token_acc": 0.8877973112719751 }, { "epoch": 0.973614633356715, "grad_norm": 0.4009089171886444, "learning_rate": 3.649124844491758e-08, "loss": 0.3279106616973877, "step": 18044, "token_acc": 0.8871252204585538 }, { "epoch": 0.9736685911617116, "grad_norm": 0.43098142743110657, "learning_rate": 3.63422429646132e-08, "loss": 0.34755033254623413, "step": 18045, "token_acc": 0.8769914215686274 }, { "epoch": 0.9737225489667081, "grad_norm": 0.4424232244491577, "learning_rate": 3.6193541771806226e-08, "loss": 0.3464199900627136, "step": 18046, "token_acc": 0.8754644077871898 }, { "epoch": 0.9737765067717046, "grad_norm": 0.3765951097011566, "learning_rate": 3.6045144871038605e-08, "loss": 0.3165830671787262, "step": 18047, "token_acc": 0.8833494009089657 }, { "epoch": 0.973830464576701, "grad_norm": 0.5529436469078064, "learning_rate": 3.589705226684115e-08, "loss": 0.3416544198989868, "step": 18048, "token_acc": 0.8777925975325108 }, { "epoch": 0.9738844223816975, "grad_norm": 0.3750896453857422, "learning_rate": 3.5749263963739125e-08, "loss": 0.29932814836502075, "step": 18049, "token_acc": 0.8965605095541401 }, { "epoch": 0.973938380186694, "grad_norm": 0.45969125628471375, "learning_rate": 3.560177996624337e-08, "loss": 0.337901771068573, "step": 18050, "token_acc": 0.8841068537506745 }, { "epoch": 0.9739923379916905, "grad_norm": 0.43569254875183105, "learning_rate": 3.5454600278858056e-08, "loss": 0.3727380633354187, "step": 18051, "token_acc": 0.8692449355432781 }, { "epoch": 0.974046295796687, "grad_norm": 0.471636027097702, "learning_rate": 3.53077249060807e-08, "loss": 0.3817474842071533, "step": 18052, "token_acc": 0.864218070130535 }, { "epoch": 0.9741002536016835, "grad_norm": 0.45894855260849, "learning_rate": 3.516115385239327e-08, "loss": 0.3285805284976959, "step": 18053, "token_acc": 0.883267937219731 }, { "epoch": 0.97415421140668, "grad_norm": 0.3692840039730072, "learning_rate": 3.501488712227441e-08, "loss": 0.29480409622192383, "step": 18054, "token_acc": 0.8909505208333334 }, { "epoch": 0.9742081692116765, "grad_norm": 0.3741604685783386, "learning_rate": 3.4868924720190546e-08, "loss": 0.3284471929073334, "step": 18055, "token_acc": 0.8838459614903726 }, { "epoch": 0.9742621270166729, "grad_norm": 0.40001416206359863, "learning_rate": 3.472326665060033e-08, "loss": 0.305874764919281, "step": 18056, "token_acc": 0.8913183279742766 }, { "epoch": 0.9743160848216694, "grad_norm": 0.38917651772499084, "learning_rate": 3.45779129179491e-08, "loss": 0.3220721185207367, "step": 18057, "token_acc": 0.8862537934135102 }, { "epoch": 0.9743700426266659, "grad_norm": 0.42922139167785645, "learning_rate": 3.443286352667885e-08, "loss": 0.33899182081222534, "step": 18058, "token_acc": 0.8753958201393287 }, { "epoch": 0.9744240004316624, "grad_norm": 0.38518884778022766, "learning_rate": 3.428811848121827e-08, "loss": 0.34635651111602783, "step": 18059, "token_acc": 0.8759151729361272 }, { "epoch": 0.974477958236659, "grad_norm": 0.4902209937572479, "learning_rate": 3.414367778598826e-08, "loss": 0.31642553210258484, "step": 18060, "token_acc": 0.8844917977338068 }, { "epoch": 0.9745319160416555, "grad_norm": 0.4498668611049652, "learning_rate": 3.399954144539863e-08, "loss": 0.32069820165634155, "step": 18061, "token_acc": 0.8867513146081827 }, { "epoch": 0.974585873846652, "grad_norm": 0.46572527289390564, "learning_rate": 3.385570946385364e-08, "loss": 0.3724190592765808, "step": 18062, "token_acc": 0.867502661776884 }, { "epoch": 0.9746398316516484, "grad_norm": 0.32197609543800354, "learning_rate": 3.3712181845743094e-08, "loss": 0.3018624782562256, "step": 18063, "token_acc": 0.8931029828586854 }, { "epoch": 0.9746937894566449, "grad_norm": 0.474765419960022, "learning_rate": 3.35689585954524e-08, "loss": 0.32866865396499634, "step": 18064, "token_acc": 0.8821679892199431 }, { "epoch": 0.9747477472616414, "grad_norm": 0.5045841932296753, "learning_rate": 3.34260397173547e-08, "loss": 0.35175514221191406, "step": 18065, "token_acc": 0.8753608422482595 }, { "epoch": 0.9748017050666379, "grad_norm": 0.4082699120044708, "learning_rate": 3.32834252158154e-08, "loss": 0.367409884929657, "step": 18066, "token_acc": 0.877376591662306 }, { "epoch": 0.9748556628716344, "grad_norm": 0.4880746006965637, "learning_rate": 3.3141115095187696e-08, "loss": 0.32403093576431274, "step": 18067, "token_acc": 0.882073067119796 }, { "epoch": 0.9749096206766309, "grad_norm": 0.5196210145950317, "learning_rate": 3.2999109359821424e-08, "loss": 0.3743234872817993, "step": 18068, "token_acc": 0.8721438172043011 }, { "epoch": 0.9749635784816274, "grad_norm": 0.4746220111846924, "learning_rate": 3.2857408014049794e-08, "loss": 0.2978189289569855, "step": 18069, "token_acc": 0.8910471481546883 }, { "epoch": 0.9750175362866239, "grad_norm": 0.49101585149765015, "learning_rate": 3.271601106220268e-08, "loss": 0.37328237295150757, "step": 18070, "token_acc": 0.8663677130044843 }, { "epoch": 0.9750714940916203, "grad_norm": 0.4829100966453552, "learning_rate": 3.257491850859773e-08, "loss": 0.30829882621765137, "step": 18071, "token_acc": 0.8927256792287467 }, { "epoch": 0.9751254518966168, "grad_norm": 0.4599878787994385, "learning_rate": 3.2434130357543724e-08, "loss": 0.3252059817314148, "step": 18072, "token_acc": 0.8824976930175331 }, { "epoch": 0.9751794097016133, "grad_norm": 0.4141479730606079, "learning_rate": 3.2293646613339446e-08, "loss": 0.2960280478000641, "step": 18073, "token_acc": 0.8941233608547838 }, { "epoch": 0.9752333675066098, "grad_norm": 0.36496618390083313, "learning_rate": 3.2153467280275907e-08, "loss": 0.3717796802520752, "step": 18074, "token_acc": 0.8703587948493562 }, { "epoch": 0.9752873253116063, "grad_norm": 0.38599511981010437, "learning_rate": 3.201359236263524e-08, "loss": 0.30515822768211365, "step": 18075, "token_acc": 0.8895622895622896 }, { "epoch": 0.9753412831166028, "grad_norm": 0.4211626350879669, "learning_rate": 3.187402186468735e-08, "loss": 0.3022926449775696, "step": 18076, "token_acc": 0.8960715028060694 }, { "epoch": 0.9753952409215993, "grad_norm": 0.3755805194377899, "learning_rate": 3.1734755790696624e-08, "loss": 0.30260661244392395, "step": 18077, "token_acc": 0.8912177121771218 }, { "epoch": 0.9754491987265959, "grad_norm": 0.46173644065856934, "learning_rate": 3.15957941449141e-08, "loss": 0.34594249725341797, "step": 18078, "token_acc": 0.8745241581259151 }, { "epoch": 0.9755031565315923, "grad_norm": 0.41694962978363037, "learning_rate": 3.145713693158525e-08, "loss": 0.2972041964530945, "step": 18079, "token_acc": 0.8922541564634617 }, { "epoch": 0.9755571143365888, "grad_norm": 0.38151514530181885, "learning_rate": 3.1318784154943384e-08, "loss": 0.33067452907562256, "step": 18080, "token_acc": 0.8814229249011858 }, { "epoch": 0.9756110721415853, "grad_norm": 0.35568690299987793, "learning_rate": 3.11807358192151e-08, "loss": 0.3428444564342499, "step": 18081, "token_acc": 0.8741301428397021 }, { "epoch": 0.9756650299465818, "grad_norm": 0.47634848952293396, "learning_rate": 3.1042991928614816e-08, "loss": 0.29954802989959717, "step": 18082, "token_acc": 0.890625 }, { "epoch": 0.9757189877515783, "grad_norm": 0.47794806957244873, "learning_rate": 3.0905552487351386e-08, "loss": 0.31427615880966187, "step": 18083, "token_acc": 0.889607390300231 }, { "epoch": 0.9757729455565748, "grad_norm": 0.4573868215084076, "learning_rate": 3.076841749961923e-08, "loss": 0.2785716652870178, "step": 18084, "token_acc": 0.8964388047482603 }, { "epoch": 0.9758269033615713, "grad_norm": 0.4600953459739685, "learning_rate": 3.063158696960944e-08, "loss": 0.33976829051971436, "step": 18085, "token_acc": 0.8786008230452675 }, { "epoch": 0.9758808611665677, "grad_norm": 0.39338675141334534, "learning_rate": 3.0495060901498677e-08, "loss": 0.34751927852630615, "step": 18086, "token_acc": 0.8763280041258381 }, { "epoch": 0.9759348189715642, "grad_norm": 0.40001380443573, "learning_rate": 3.035883929945693e-08, "loss": 0.3290404677391052, "step": 18087, "token_acc": 0.8809001731102135 }, { "epoch": 0.9759887767765607, "grad_norm": 0.4319165349006653, "learning_rate": 3.022292216764422e-08, "loss": 0.32392075657844543, "step": 18088, "token_acc": 0.8843495622792198 }, { "epoch": 0.9760427345815572, "grad_norm": 0.537651538848877, "learning_rate": 3.008730951021277e-08, "loss": 0.35629454255104065, "step": 18089, "token_acc": 0.8775961113566063 }, { "epoch": 0.9760966923865537, "grad_norm": 0.2857757806777954, "learning_rate": 2.9952001331302603e-08, "loss": 0.29132306575775146, "step": 18090, "token_acc": 0.8936102574970859 }, { "epoch": 0.9761506501915502, "grad_norm": 0.3771083354949951, "learning_rate": 2.981699763504709e-08, "loss": 0.2737131416797638, "step": 18091, "token_acc": 0.8985108246777624 }, { "epoch": 0.9762046079965467, "grad_norm": 0.514370322227478, "learning_rate": 2.968229842556736e-08, "loss": 0.35451406240463257, "step": 18092, "token_acc": 0.8756186077202244 }, { "epoch": 0.9762585658015432, "grad_norm": 0.403337687253952, "learning_rate": 2.954790370697902e-08, "loss": 0.2791115641593933, "step": 18093, "token_acc": 0.896862077353442 }, { "epoch": 0.9763125236065396, "grad_norm": 0.3430492579936981, "learning_rate": 2.9413813483386567e-08, "loss": 0.30841705203056335, "step": 18094, "token_acc": 0.8925354591572626 }, { "epoch": 0.9763664814115361, "grad_norm": 0.498088538646698, "learning_rate": 2.928002775888339e-08, "loss": 0.3309909701347351, "step": 18095, "token_acc": 0.8850393700787401 }, { "epoch": 0.9764204392165327, "grad_norm": 0.4489368796348572, "learning_rate": 2.914654653755622e-08, "loss": 0.33182597160339355, "step": 18096, "token_acc": 0.8812392426850258 }, { "epoch": 0.9764743970215292, "grad_norm": 0.4892873167991638, "learning_rate": 2.9013369823481797e-08, "loss": 0.2808675765991211, "step": 18097, "token_acc": 0.8955367913148371 }, { "epoch": 0.9765283548265257, "grad_norm": 0.5045461654663086, "learning_rate": 2.8880497620726867e-08, "loss": 0.2891034781932831, "step": 18098, "token_acc": 0.8921241887388177 }, { "epoch": 0.9765823126315222, "grad_norm": 0.46125632524490356, "learning_rate": 2.8747929933349295e-08, "loss": 0.3819107413291931, "step": 18099, "token_acc": 0.8711264795850512 }, { "epoch": 0.9766362704365187, "grad_norm": 0.4357698857784271, "learning_rate": 2.8615666765398065e-08, "loss": 0.428378164768219, "step": 18100, "token_acc": 0.8516003879728419 }, { "epoch": 0.9766902282415152, "grad_norm": 0.38970664143562317, "learning_rate": 2.8483708120912168e-08, "loss": 0.35713690519332886, "step": 18101, "token_acc": 0.8755063728880546 }, { "epoch": 0.9767441860465116, "grad_norm": 0.49644672870635986, "learning_rate": 2.8352054003921715e-08, "loss": 0.34410738945007324, "step": 18102, "token_acc": 0.8775914432853559 }, { "epoch": 0.9767981438515081, "grad_norm": 0.5195117592811584, "learning_rate": 2.822070441844793e-08, "loss": 0.3202765882015228, "step": 18103, "token_acc": 0.8861985472154964 }, { "epoch": 0.9768521016565046, "grad_norm": 0.3753194212913513, "learning_rate": 2.8089659368500942e-08, "loss": 0.3388569951057434, "step": 18104, "token_acc": 0.8786314525810324 }, { "epoch": 0.9769060594615011, "grad_norm": 0.4393429160118103, "learning_rate": 2.7958918858083107e-08, "loss": 0.31274861097335815, "step": 18105, "token_acc": 0.8889202540578688 }, { "epoch": 0.9769600172664976, "grad_norm": 0.48148155212402344, "learning_rate": 2.7828482891187892e-08, "loss": 0.35166382789611816, "step": 18106, "token_acc": 0.8719729678993805 }, { "epoch": 0.9770139750714941, "grad_norm": 0.5063981413841248, "learning_rate": 2.7698351471798778e-08, "loss": 0.3829396963119507, "step": 18107, "token_acc": 0.8685393258426967 }, { "epoch": 0.9770679328764906, "grad_norm": 0.351142019033432, "learning_rate": 2.756852460388926e-08, "loss": 0.3242984414100647, "step": 18108, "token_acc": 0.8866822429906542 }, { "epoch": 0.977121890681487, "grad_norm": 0.39040565490722656, "learning_rate": 2.7439002291425043e-08, "loss": 0.34837719798088074, "step": 18109, "token_acc": 0.874639307470343 }, { "epoch": 0.9771758484864835, "grad_norm": 0.38862481713294983, "learning_rate": 2.730978453836075e-08, "loss": 0.33584102988243103, "step": 18110, "token_acc": 0.8829321663019694 }, { "epoch": 0.97722980629148, "grad_norm": 0.4748983085155487, "learning_rate": 2.7180871348643223e-08, "loss": 0.3050161600112915, "step": 18111, "token_acc": 0.8951813214108296 }, { "epoch": 0.9772837640964765, "grad_norm": 0.5411792993545532, "learning_rate": 2.705226272620931e-08, "loss": 0.30922818183898926, "step": 18112, "token_acc": 0.8836230558096981 }, { "epoch": 0.977337721901473, "grad_norm": 0.425273597240448, "learning_rate": 2.6923958674986983e-08, "loss": 0.2978857159614563, "step": 18113, "token_acc": 0.891515048461146 }, { "epoch": 0.9773916797064696, "grad_norm": 0.3777101933956146, "learning_rate": 2.6795959198894216e-08, "loss": 0.2599998712539673, "step": 18114, "token_acc": 0.9035645205857972 }, { "epoch": 0.9774456375114661, "grad_norm": 0.5227327942848206, "learning_rate": 2.6668264301840107e-08, "loss": 0.3501749336719513, "step": 18115, "token_acc": 0.87468211359141 }, { "epoch": 0.9774995953164626, "grad_norm": 0.42592447996139526, "learning_rate": 2.654087398772487e-08, "loss": 0.2893075942993164, "step": 18116, "token_acc": 0.8933665697271846 }, { "epoch": 0.977553553121459, "grad_norm": 0.34593069553375244, "learning_rate": 2.6413788260438722e-08, "loss": 0.33936983346939087, "step": 18117, "token_acc": 0.8754405286343613 }, { "epoch": 0.9776075109264555, "grad_norm": 0.5270633101463318, "learning_rate": 2.6287007123861897e-08, "loss": 0.3645949959754944, "step": 18118, "token_acc": 0.8686787927243654 }, { "epoch": 0.977661468731452, "grad_norm": 0.418239027261734, "learning_rate": 2.616053058186796e-08, "loss": 0.3104698657989502, "step": 18119, "token_acc": 0.8842405158194632 }, { "epoch": 0.9777154265364485, "grad_norm": 0.4876844584941864, "learning_rate": 2.603435863831827e-08, "loss": 0.3030395209789276, "step": 18120, "token_acc": 0.8890276043834097 }, { "epoch": 0.977769384341445, "grad_norm": 0.43864020705223083, "learning_rate": 2.5908491297066406e-08, "loss": 0.3378680348396301, "step": 18121, "token_acc": 0.879595478881618 }, { "epoch": 0.9778233421464415, "grad_norm": 0.4585868716239929, "learning_rate": 2.5782928561957077e-08, "loss": 0.2593226134777069, "step": 18122, "token_acc": 0.9041169132062021 }, { "epoch": 0.977877299951438, "grad_norm": 0.41073960065841675, "learning_rate": 2.565767043682388e-08, "loss": 0.3286505341529846, "step": 18123, "token_acc": 0.881869542886492 }, { "epoch": 0.9779312577564344, "grad_norm": 0.446067750453949, "learning_rate": 2.553271692549153e-08, "loss": 0.3707585334777832, "step": 18124, "token_acc": 0.8719806763285024 }, { "epoch": 0.9779852155614309, "grad_norm": 0.47862720489501953, "learning_rate": 2.540806803177809e-08, "loss": 0.3588232398033142, "step": 18125, "token_acc": 0.8782620041753654 }, { "epoch": 0.9780391733664274, "grad_norm": 0.41250109672546387, "learning_rate": 2.5283723759489397e-08, "loss": 0.3003402352333069, "step": 18126, "token_acc": 0.890849580190693 }, { "epoch": 0.9780931311714239, "grad_norm": 0.40553468465805054, "learning_rate": 2.515968411242242e-08, "loss": 0.30432069301605225, "step": 18127, "token_acc": 0.8921861019982106 }, { "epoch": 0.9781470889764204, "grad_norm": 0.45350635051727295, "learning_rate": 2.503594909436524e-08, "loss": 0.3329744338989258, "step": 18128, "token_acc": 0.8803404812571616 }, { "epoch": 0.978201046781417, "grad_norm": 0.418832927942276, "learning_rate": 2.4912518709097054e-08, "loss": 0.3676687479019165, "step": 18129, "token_acc": 0.8690456152162935 }, { "epoch": 0.9782550045864135, "grad_norm": 0.3865574896335602, "learning_rate": 2.478939296038818e-08, "loss": 0.2797263562679291, "step": 18130, "token_acc": 0.895970214629873 }, { "epoch": 0.97830896239141, "grad_norm": 0.4116117060184479, "learning_rate": 2.4666571851997835e-08, "loss": 0.3474580645561218, "step": 18131, "token_acc": 0.8768876743851575 }, { "epoch": 0.9783629201964064, "grad_norm": 0.5696354508399963, "learning_rate": 2.4544055387676348e-08, "loss": 0.34423816204071045, "step": 18132, "token_acc": 0.8802153432032301 }, { "epoch": 0.9784168780014029, "grad_norm": 0.4842834770679474, "learning_rate": 2.442184357116739e-08, "loss": 0.3242446780204773, "step": 18133, "token_acc": 0.879041160700205 }, { "epoch": 0.9784708358063994, "grad_norm": 0.47122693061828613, "learning_rate": 2.429993640620132e-08, "loss": 0.3055726885795593, "step": 18134, "token_acc": 0.8903881392637015 }, { "epoch": 0.9785247936113959, "grad_norm": 0.36716228723526, "learning_rate": 2.4178333896502927e-08, "loss": 0.3199131488800049, "step": 18135, "token_acc": 0.887785501489573 }, { "epoch": 0.9785787514163924, "grad_norm": 0.39908739924430847, "learning_rate": 2.405703604578369e-08, "loss": 0.27310070395469666, "step": 18136, "token_acc": 0.8977599799774747 }, { "epoch": 0.9786327092213889, "grad_norm": 0.5164926052093506, "learning_rate": 2.3936042857749532e-08, "loss": 0.28534260392189026, "step": 18137, "token_acc": 0.8913871951219512 }, { "epoch": 0.9786866670263854, "grad_norm": 0.4431048035621643, "learning_rate": 2.3815354336095274e-08, "loss": 0.31925275921821594, "step": 18138, "token_acc": 0.8862179487179487 }, { "epoch": 0.9787406248313819, "grad_norm": 0.3972828686237335, "learning_rate": 2.3694970484506864e-08, "loss": 0.3187161982059479, "step": 18139, "token_acc": 0.8831168831168831 }, { "epoch": 0.9787945826363783, "grad_norm": 0.4183948040008545, "learning_rate": 2.3574891306661353e-08, "loss": 0.2674945890903473, "step": 18140, "token_acc": 0.8997769006349751 }, { "epoch": 0.9788485404413748, "grad_norm": 0.4093245565891266, "learning_rate": 2.3455116806224698e-08, "loss": 0.2959306836128235, "step": 18141, "token_acc": 0.8956272401433691 }, { "epoch": 0.9789024982463713, "grad_norm": 0.44435572624206543, "learning_rate": 2.333564698685509e-08, "loss": 0.32643526792526245, "step": 18142, "token_acc": 0.8775134708897359 }, { "epoch": 0.9789564560513678, "grad_norm": 0.2927253842353821, "learning_rate": 2.321648185220071e-08, "loss": 0.23941290378570557, "step": 18143, "token_acc": 0.9078673289536933 }, { "epoch": 0.9790104138563643, "grad_norm": 0.32979175448417664, "learning_rate": 2.3097621405901994e-08, "loss": 0.28906482458114624, "step": 18144, "token_acc": 0.8935190032457334 }, { "epoch": 0.9790643716613608, "grad_norm": 0.3435779809951782, "learning_rate": 2.2979065651588252e-08, "loss": 0.3597390651702881, "step": 18145, "token_acc": 0.8704537160929328 }, { "epoch": 0.9791183294663574, "grad_norm": 0.3946160078048706, "learning_rate": 2.286081459287992e-08, "loss": 0.2829769551753998, "step": 18146, "token_acc": 0.8951486697965572 }, { "epoch": 0.9791722872713537, "grad_norm": 0.37154754996299744, "learning_rate": 2.274286823338967e-08, "loss": 0.3056817650794983, "step": 18147, "token_acc": 0.8878337026485175 }, { "epoch": 0.9792262450763503, "grad_norm": 0.43498945236206055, "learning_rate": 2.262522657671795e-08, "loss": 0.2884727418422699, "step": 18148, "token_acc": 0.8926773126168897 }, { "epoch": 0.9792802028813468, "grad_norm": 0.3970312774181366, "learning_rate": 2.2507889626457448e-08, "loss": 0.2568148374557495, "step": 18149, "token_acc": 0.9059383151756943 }, { "epoch": 0.9793341606863433, "grad_norm": 0.44608816504478455, "learning_rate": 2.2390857386193065e-08, "loss": 0.3058708906173706, "step": 18150, "token_acc": 0.8886020651310564 }, { "epoch": 0.9793881184913398, "grad_norm": 0.386436402797699, "learning_rate": 2.2274129859497506e-08, "loss": 0.3482027053833008, "step": 18151, "token_acc": 0.8755417956656347 }, { "epoch": 0.9794420762963363, "grad_norm": 0.45499852299690247, "learning_rate": 2.2157707049936806e-08, "loss": 0.2985185980796814, "step": 18152, "token_acc": 0.8942015786278081 }, { "epoch": 0.9794960341013328, "grad_norm": 0.2910255193710327, "learning_rate": 2.2041588961065897e-08, "loss": 0.2901034951210022, "step": 18153, "token_acc": 0.8935278596926898 }, { "epoch": 0.9795499919063293, "grad_norm": 0.38329795002937317, "learning_rate": 2.192577559643083e-08, "loss": 0.34300464391708374, "step": 18154, "token_acc": 0.876036665211698 }, { "epoch": 0.9796039497113257, "grad_norm": 0.410613089799881, "learning_rate": 2.1810266959567674e-08, "loss": 0.28033050894737244, "step": 18155, "token_acc": 0.8967061080908219 }, { "epoch": 0.9796579075163222, "grad_norm": 0.36804863810539246, "learning_rate": 2.1695063054006927e-08, "loss": 0.29545146226882935, "step": 18156, "token_acc": 0.8904653120115427 }, { "epoch": 0.9797118653213187, "grad_norm": 0.5227661728858948, "learning_rate": 2.1580163883263562e-08, "loss": 0.38347312808036804, "step": 18157, "token_acc": 0.8721991701244813 }, { "epoch": 0.9797658231263152, "grad_norm": 0.5161324143409729, "learning_rate": 2.14655694508481e-08, "loss": 0.30589839816093445, "step": 18158, "token_acc": 0.8881118881118881 }, { "epoch": 0.9798197809313117, "grad_norm": 0.5700051188468933, "learning_rate": 2.135127976026108e-08, "loss": 0.33269888162612915, "step": 18159, "token_acc": 0.8840981012658228 }, { "epoch": 0.9798737387363082, "grad_norm": 0.41249188780784607, "learning_rate": 2.1237294814990817e-08, "loss": 0.3489120900630951, "step": 18160, "token_acc": 0.8809917355371901 }, { "epoch": 0.9799276965413047, "grad_norm": 0.4614558517932892, "learning_rate": 2.112361461852008e-08, "loss": 0.39104127883911133, "step": 18161, "token_acc": 0.8657024793388429 }, { "epoch": 0.9799816543463012, "grad_norm": 0.41880089044570923, "learning_rate": 2.1010239174320546e-08, "loss": 0.3794102668762207, "step": 18162, "token_acc": 0.8714696026807085 }, { "epoch": 0.9800356121512976, "grad_norm": 0.3896424472332001, "learning_rate": 2.089716848585277e-08, "loss": 0.3531705141067505, "step": 18163, "token_acc": 0.8773738469886055 }, { "epoch": 0.9800895699562941, "grad_norm": 0.4409444034099579, "learning_rate": 2.078440255657288e-08, "loss": 0.407389760017395, "step": 18164, "token_acc": 0.8573256557901472 }, { "epoch": 0.9801435277612907, "grad_norm": 0.3852260708808899, "learning_rate": 2.067194138992146e-08, "loss": 0.31988996267318726, "step": 18165, "token_acc": 0.8856921687524191 }, { "epoch": 0.9801974855662872, "grad_norm": 0.4494524896144867, "learning_rate": 2.055978498933575e-08, "loss": 0.3389178514480591, "step": 18166, "token_acc": 0.880152559777028 }, { "epoch": 0.9802514433712837, "grad_norm": 0.44838273525238037, "learning_rate": 2.04479333582408e-08, "loss": 0.337915301322937, "step": 18167, "token_acc": 0.8745827372436814 }, { "epoch": 0.9803054011762802, "grad_norm": 0.4617377519607544, "learning_rate": 2.033638650005054e-08, "loss": 0.29754698276519775, "step": 18168, "token_acc": 0.891832229580574 }, { "epoch": 0.9803593589812767, "grad_norm": 0.4501558244228363, "learning_rate": 2.022514441817225e-08, "loss": 0.358672171831131, "step": 18169, "token_acc": 0.8776041666666666 }, { "epoch": 0.9804133167862731, "grad_norm": 0.34487152099609375, "learning_rate": 2.0114207116004312e-08, "loss": 0.25206780433654785, "step": 18170, "token_acc": 0.9058687767886581 }, { "epoch": 0.9804672745912696, "grad_norm": 0.36809417605400085, "learning_rate": 2.0003574596934027e-08, "loss": 0.3090890347957611, "step": 18171, "token_acc": 0.8888635846048736 }, { "epoch": 0.9805212323962661, "grad_norm": 0.407402366399765, "learning_rate": 1.9893246864340908e-08, "loss": 0.3006093502044678, "step": 18172, "token_acc": 0.8888451443569554 }, { "epoch": 0.9805751902012626, "grad_norm": 0.34958478808403015, "learning_rate": 1.9783223921592266e-08, "loss": 0.31459465622901917, "step": 18173, "token_acc": 0.885611232770836 }, { "epoch": 0.9806291480062591, "grad_norm": 0.422425776720047, "learning_rate": 1.967350577205096e-08, "loss": 0.2880173623561859, "step": 18174, "token_acc": 0.8942263279445728 }, { "epoch": 0.9806831058112556, "grad_norm": 0.38996362686157227, "learning_rate": 1.9564092419065427e-08, "loss": 0.3290528655052185, "step": 18175, "token_acc": 0.8830497163830497 }, { "epoch": 0.9807370636162521, "grad_norm": 0.41121387481689453, "learning_rate": 1.945498386597744e-08, "loss": 0.3382181227207184, "step": 18176, "token_acc": 0.8756726921484752 }, { "epoch": 0.9807910214212486, "grad_norm": 0.4425889551639557, "learning_rate": 1.9346180116120995e-08, "loss": 0.3192683458328247, "step": 18177, "token_acc": 0.8857428857428857 }, { "epoch": 0.980844979226245, "grad_norm": 0.45378339290618896, "learning_rate": 1.9237681172816768e-08, "loss": 0.2985329031944275, "step": 18178, "token_acc": 0.8892512569642614 }, { "epoch": 0.9808989370312415, "grad_norm": 0.5212161540985107, "learning_rate": 1.912948703937989e-08, "loss": 0.39195337891578674, "step": 18179, "token_acc": 0.8634198801659241 }, { "epoch": 0.980952894836238, "grad_norm": 0.4487907290458679, "learning_rate": 1.902159771911327e-08, "loss": 0.2806852459907532, "step": 18180, "token_acc": 0.8959086309093538 }, { "epoch": 0.9810068526412346, "grad_norm": 0.39089199900627136, "learning_rate": 1.8914013215312054e-08, "loss": 0.27681347727775574, "step": 18181, "token_acc": 0.8985294117647059 }, { "epoch": 0.9810608104462311, "grad_norm": 0.43269744515419006, "learning_rate": 1.88067335312625e-08, "loss": 0.30786794424057007, "step": 18182, "token_acc": 0.8890556597873671 }, { "epoch": 0.9811147682512276, "grad_norm": 0.4022831320762634, "learning_rate": 1.8699758670240877e-08, "loss": 0.33494237065315247, "step": 18183, "token_acc": 0.8832925600805871 }, { "epoch": 0.9811687260562241, "grad_norm": 0.3989386558532715, "learning_rate": 1.8593088635513456e-08, "loss": 0.3398120105266571, "step": 18184, "token_acc": 0.8837820914214152 }, { "epoch": 0.9812226838612206, "grad_norm": 0.44587209820747375, "learning_rate": 1.8486723430338748e-08, "loss": 0.29606571793556213, "step": 18185, "token_acc": 0.8924182265078465 }, { "epoch": 0.981276641666217, "grad_norm": 0.49295854568481445, "learning_rate": 1.8380663057964155e-08, "loss": 0.31058424711227417, "step": 18186, "token_acc": 0.8878199622058066 }, { "epoch": 0.9813305994712135, "grad_norm": 0.44605696201324463, "learning_rate": 1.8274907521630414e-08, "loss": 0.30376937985420227, "step": 18187, "token_acc": 0.8874452554744525 }, { "epoch": 0.98138455727621, "grad_norm": 0.43487656116485596, "learning_rate": 1.8169456824564947e-08, "loss": 0.30416780710220337, "step": 18188, "token_acc": 0.888492814971594 }, { "epoch": 0.9814385150812065, "grad_norm": 0.5834730863571167, "learning_rate": 1.8064310969989618e-08, "loss": 0.32776179909706116, "step": 18189, "token_acc": 0.8777497900923593 }, { "epoch": 0.981492472886203, "grad_norm": 0.2680501937866211, "learning_rate": 1.7959469961116304e-08, "loss": 0.3211347460746765, "step": 18190, "token_acc": 0.8870417732310315 }, { "epoch": 0.9815464306911995, "grad_norm": 0.3958500325679779, "learning_rate": 1.7854933801144668e-08, "loss": 0.24318858981132507, "step": 18191, "token_acc": 0.9073800241968006 }, { "epoch": 0.981600388496196, "grad_norm": 0.5106907486915588, "learning_rate": 1.7750702493268822e-08, "loss": 0.32625913619995117, "step": 18192, "token_acc": 0.8826291079812206 }, { "epoch": 0.9816543463011924, "grad_norm": 0.4457869529724121, "learning_rate": 1.7646776040671776e-08, "loss": 0.3070930242538452, "step": 18193, "token_acc": 0.8885233210760596 }, { "epoch": 0.9817083041061889, "grad_norm": 0.3379596769809723, "learning_rate": 1.7543154446526544e-08, "loss": 0.3220793306827545, "step": 18194, "token_acc": 0.8824360382985402 }, { "epoch": 0.9817622619111854, "grad_norm": 0.44911351799964905, "learning_rate": 1.7439837713998376e-08, "loss": 0.27808618545532227, "step": 18195, "token_acc": 0.8965669662590247 }, { "epoch": 0.9818162197161819, "grad_norm": 0.4175763726234436, "learning_rate": 1.7336825846243634e-08, "loss": 0.34006208181381226, "step": 18196, "token_acc": 0.8833588866611629 }, { "epoch": 0.9818701775211784, "grad_norm": 0.39180484414100647, "learning_rate": 1.7234118846405357e-08, "loss": 0.2910056412220001, "step": 18197, "token_acc": 0.893862933420153 }, { "epoch": 0.981924135326175, "grad_norm": 0.4529642164707184, "learning_rate": 1.713171671762326e-08, "loss": 0.2746405303478241, "step": 18198, "token_acc": 0.8933922094306892 }, { "epoch": 0.9819780931311715, "grad_norm": 0.38874393701553345, "learning_rate": 1.7029619463022618e-08, "loss": 0.28534775972366333, "step": 18199, "token_acc": 0.8934437543133196 }, { "epoch": 0.982032050936168, "grad_norm": 0.42591235041618347, "learning_rate": 1.6927827085723158e-08, "loss": 0.34919095039367676, "step": 18200, "token_acc": 0.8776978417266187 }, { "epoch": 0.9820860087411644, "grad_norm": 0.3296189606189728, "learning_rate": 1.6826339588832397e-08, "loss": 0.29584982991218567, "step": 18201, "token_acc": 0.892083934717442 }, { "epoch": 0.9821399665461609, "grad_norm": 0.4336935877799988, "learning_rate": 1.6725156975448965e-08, "loss": 0.3329341411590576, "step": 18202, "token_acc": 0.8790354989953114 }, { "epoch": 0.9821939243511574, "grad_norm": 0.4826164245605469, "learning_rate": 1.6624279248664832e-08, "loss": 0.26526734232902527, "step": 18203, "token_acc": 0.8984143579020736 }, { "epoch": 0.9822478821561539, "grad_norm": 0.47243571281433105, "learning_rate": 1.652370641155976e-08, "loss": 0.3629046678543091, "step": 18204, "token_acc": 0.8719456422318045 }, { "epoch": 0.9823018399611504, "grad_norm": 0.48011359572410583, "learning_rate": 1.6423438467205733e-08, "loss": 0.3455412983894348, "step": 18205, "token_acc": 0.878479381443299 }, { "epoch": 0.9823557977661469, "grad_norm": 0.4428323209285736, "learning_rate": 1.6323475418663636e-08, "loss": 0.35037344694137573, "step": 18206, "token_acc": 0.8792019677507515 }, { "epoch": 0.9824097555711434, "grad_norm": 0.46155357360839844, "learning_rate": 1.6223817268987695e-08, "loss": 0.3107978403568268, "step": 18207, "token_acc": 0.8863636363636364 }, { "epoch": 0.9824637133761399, "grad_norm": 0.47100764513015747, "learning_rate": 1.612446402122214e-08, "loss": 0.36049962043762207, "step": 18208, "token_acc": 0.8750355416548194 }, { "epoch": 0.9825176711811363, "grad_norm": 0.40947428345680237, "learning_rate": 1.6025415678398994e-08, "loss": 0.29532021284103394, "step": 18209, "token_acc": 0.8904027249048287 }, { "epoch": 0.9825716289861328, "grad_norm": 0.5373544096946716, "learning_rate": 1.5926672243543607e-08, "loss": 0.38169780373573303, "step": 18210, "token_acc": 0.8652804473178403 }, { "epoch": 0.9826255867911293, "grad_norm": 0.4202222526073456, "learning_rate": 1.582823371967246e-08, "loss": 0.363986611366272, "step": 18211, "token_acc": 0.8689088983050848 }, { "epoch": 0.9826795445961258, "grad_norm": 0.4583686888217926, "learning_rate": 1.5730100109792035e-08, "loss": 0.3093707859516144, "step": 18212, "token_acc": 0.886191378493605 }, { "epoch": 0.9827335024011223, "grad_norm": 0.4827410578727722, "learning_rate": 1.5632271416898825e-08, "loss": 0.2801116108894348, "step": 18213, "token_acc": 0.8951965065502183 }, { "epoch": 0.9827874602061188, "grad_norm": 0.4084767997264862, "learning_rate": 1.553474764398044e-08, "loss": 0.3705828785896301, "step": 18214, "token_acc": 0.8711469036378784 }, { "epoch": 0.9828414180111154, "grad_norm": 0.4049210548400879, "learning_rate": 1.543752879401561e-08, "loss": 0.30182915925979614, "step": 18215, "token_acc": 0.8896876549330689 }, { "epoch": 0.9828953758161117, "grad_norm": 0.46648135781288147, "learning_rate": 1.534061486997196e-08, "loss": 0.2810440957546234, "step": 18216, "token_acc": 0.8975402256334382 }, { "epoch": 0.9829493336211083, "grad_norm": 0.33751142024993896, "learning_rate": 1.5244005874811564e-08, "loss": 0.31018397212028503, "step": 18217, "token_acc": 0.8883646112600536 }, { "epoch": 0.9830032914261048, "grad_norm": 0.3923344612121582, "learning_rate": 1.5147701811483175e-08, "loss": 0.3052747845649719, "step": 18218, "token_acc": 0.8905500705218617 }, { "epoch": 0.9830572492311013, "grad_norm": 0.5059411525726318, "learning_rate": 1.505170268292777e-08, "loss": 0.31032246351242065, "step": 18219, "token_acc": 0.889102564102564 }, { "epoch": 0.9831112070360978, "grad_norm": 0.33292874693870544, "learning_rate": 1.4956008492077455e-08, "loss": 0.32287168502807617, "step": 18220, "token_acc": 0.884129390952742 }, { "epoch": 0.9831651648410943, "grad_norm": 0.45062994956970215, "learning_rate": 1.4860619241855446e-08, "loss": 0.30634260177612305, "step": 18221, "token_acc": 0.8826656540725271 }, { "epoch": 0.9832191226460908, "grad_norm": 0.43206697702407837, "learning_rate": 1.4765534935174963e-08, "loss": 0.3439202308654785, "step": 18222, "token_acc": 0.8771255532261821 }, { "epoch": 0.9832730804510873, "grad_norm": 0.4263887405395508, "learning_rate": 1.4670755574938134e-08, "loss": 0.2505400478839874, "step": 18223, "token_acc": 0.9074213457775102 }, { "epoch": 0.9833270382560837, "grad_norm": 0.4232049286365509, "learning_rate": 1.4576281164041529e-08, "loss": 0.39601126313209534, "step": 18224, "token_acc": 0.8566966028894963 }, { "epoch": 0.9833809960610802, "grad_norm": 0.4254598021507263, "learning_rate": 1.4482111705369507e-08, "loss": 0.30320557951927185, "step": 18225, "token_acc": 0.8898067561587255 }, { "epoch": 0.9834349538660767, "grad_norm": 0.4837762415409088, "learning_rate": 1.4388247201797545e-08, "loss": 0.31646400690078735, "step": 18226, "token_acc": 0.886418895449809 }, { "epoch": 0.9834889116710732, "grad_norm": 0.3676784932613373, "learning_rate": 1.4294687656192241e-08, "loss": 0.349559485912323, "step": 18227, "token_acc": 0.8701677607585704 }, { "epoch": 0.9835428694760697, "grad_norm": 0.4373631179332733, "learning_rate": 1.4201433071412418e-08, "loss": 0.31903892755508423, "step": 18228, "token_acc": 0.8845497630331753 }, { "epoch": 0.9835968272810662, "grad_norm": 0.5318553447723389, "learning_rate": 1.4108483450303578e-08, "loss": 0.3442613482475281, "step": 18229, "token_acc": 0.8768072289156627 }, { "epoch": 0.9836507850860627, "grad_norm": 0.3981340527534485, "learning_rate": 1.4015838795706782e-08, "loss": 0.3116297125816345, "step": 18230, "token_acc": 0.8896954170224879 }, { "epoch": 0.9837047428910591, "grad_norm": 0.45721814036369324, "learning_rate": 1.3923499110449768e-08, "loss": 0.30492085218429565, "step": 18231, "token_acc": 0.8922343324250681 }, { "epoch": 0.9837587006960556, "grad_norm": 0.3915620446205139, "learning_rate": 1.3831464397353612e-08, "loss": 0.29930368065834045, "step": 18232, "token_acc": 0.8935867827322793 }, { "epoch": 0.9838126585010522, "grad_norm": 0.40378057956695557, "learning_rate": 1.3739734659227178e-08, "loss": 0.31246817111968994, "step": 18233, "token_acc": 0.8878481636677348 }, { "epoch": 0.9838666163060487, "grad_norm": 0.3899383544921875, "learning_rate": 1.3648309898873779e-08, "loss": 0.3342248201370239, "step": 18234, "token_acc": 0.88063977082836 }, { "epoch": 0.9839205741110452, "grad_norm": 0.3780319094657898, "learning_rate": 1.3557190119085629e-08, "loss": 0.2955220341682434, "step": 18235, "token_acc": 0.8921004020428122 }, { "epoch": 0.9839745319160417, "grad_norm": 0.47503745555877686, "learning_rate": 1.3466375322644943e-08, "loss": 0.36756354570388794, "step": 18236, "token_acc": 0.8696655132641292 }, { "epoch": 0.9840284897210382, "grad_norm": 0.4030187726020813, "learning_rate": 1.3375865512323949e-08, "loss": 0.39338576793670654, "step": 18237, "token_acc": 0.8573692551505546 }, { "epoch": 0.9840824475260347, "grad_norm": 0.4630173444747925, "learning_rate": 1.3285660690888213e-08, "loss": 0.33929693698883057, "step": 18238, "token_acc": 0.8778578528827038 }, { "epoch": 0.9841364053310311, "grad_norm": 0.5315066576004028, "learning_rate": 1.319576086109331e-08, "loss": 0.34161996841430664, "step": 18239, "token_acc": 0.8839740214147797 }, { "epoch": 0.9841903631360276, "grad_norm": 0.41876670718193054, "learning_rate": 1.3106166025682598e-08, "loss": 0.2677338719367981, "step": 18240, "token_acc": 0.8998811645870469 }, { "epoch": 0.9842443209410241, "grad_norm": 0.5077086091041565, "learning_rate": 1.301687618739389e-08, "loss": 0.28637614846229553, "step": 18241, "token_acc": 0.8930057459877154 }, { "epoch": 0.9842982787460206, "grad_norm": 0.4857051372528076, "learning_rate": 1.2927891348952782e-08, "loss": 0.3143349587917328, "step": 18242, "token_acc": 0.888183366888648 }, { "epoch": 0.9843522365510171, "grad_norm": 0.34690365195274353, "learning_rate": 1.283921151307821e-08, "loss": 0.2703969478607178, "step": 18243, "token_acc": 0.9017490315764761 }, { "epoch": 0.9844061943560136, "grad_norm": 0.54759281873703, "learning_rate": 1.2750836682478007e-08, "loss": 0.3552451729774475, "step": 18244, "token_acc": 0.8712226871222687 }, { "epoch": 0.9844601521610101, "grad_norm": 0.36230766773223877, "learning_rate": 1.2662766859850017e-08, "loss": 0.3229687213897705, "step": 18245, "token_acc": 0.8848860118209964 }, { "epoch": 0.9845141099660066, "grad_norm": 0.4413008391857147, "learning_rate": 1.257500204788542e-08, "loss": 0.27286043763160706, "step": 18246, "token_acc": 0.9013910899806304 }, { "epoch": 0.984568067771003, "grad_norm": 0.3362733721733093, "learning_rate": 1.2487542249264295e-08, "loss": 0.31801342964172363, "step": 18247, "token_acc": 0.8854790419161677 }, { "epoch": 0.9846220255759995, "grad_norm": 0.3545997738838196, "learning_rate": 1.2400387466656726e-08, "loss": 0.29599103331565857, "step": 18248, "token_acc": 0.8954241749341972 }, { "epoch": 0.984675983380996, "grad_norm": 0.41132140159606934, "learning_rate": 1.2313537702725031e-08, "loss": 0.3147527575492859, "step": 18249, "token_acc": 0.8846200024012486 }, { "epoch": 0.9847299411859926, "grad_norm": 0.49748262763023376, "learning_rate": 1.222699296012153e-08, "loss": 0.2844321131706238, "step": 18250, "token_acc": 0.8951846180023821 }, { "epoch": 0.9847838989909891, "grad_norm": 0.5372202396392822, "learning_rate": 1.2140753241489667e-08, "loss": 0.34870222210884094, "step": 18251, "token_acc": 0.8784143904063957 }, { "epoch": 0.9848378567959856, "grad_norm": 0.4452188313007355, "learning_rate": 1.2054818549461777e-08, "loss": 0.35117867588996887, "step": 18252, "token_acc": 0.8753277972027972 }, { "epoch": 0.9848918146009821, "grad_norm": 0.34456667304039, "learning_rate": 1.1969188886663541e-08, "loss": 0.2951444983482361, "step": 18253, "token_acc": 0.8953439888811675 }, { "epoch": 0.9849457724059785, "grad_norm": 0.4102626442909241, "learning_rate": 1.1883864255710643e-08, "loss": 0.33850398659706116, "step": 18254, "token_acc": 0.8802807575155609 }, { "epoch": 0.984999730210975, "grad_norm": 0.4369318187236786, "learning_rate": 1.1798844659207665e-08, "loss": 0.32500672340393066, "step": 18255, "token_acc": 0.885263535317318 }, { "epoch": 0.9850536880159715, "grad_norm": 0.5865561366081238, "learning_rate": 1.1714130099750309e-08, "loss": 0.3893827199935913, "step": 18256, "token_acc": 0.8702613176297387 }, { "epoch": 0.985107645820968, "grad_norm": 0.4150152802467346, "learning_rate": 1.1629720579927616e-08, "loss": 0.30458855628967285, "step": 18257, "token_acc": 0.8945386064030132 }, { "epoch": 0.9851616036259645, "grad_norm": 0.5099431276321411, "learning_rate": 1.1545616102317525e-08, "loss": 0.34014183282852173, "step": 18258, "token_acc": 0.8785166240409207 }, { "epoch": 0.985215561430961, "grad_norm": 0.3137415945529938, "learning_rate": 1.1461816669486869e-08, "loss": 0.34146440029144287, "step": 18259, "token_acc": 0.8796657986111112 }, { "epoch": 0.9852695192359575, "grad_norm": 0.48926976323127747, "learning_rate": 1.1378322283995825e-08, "loss": 0.3717576861381531, "step": 18260, "token_acc": 0.8683849321335977 }, { "epoch": 0.985323477040954, "grad_norm": 0.41624659299850464, "learning_rate": 1.1295132948393461e-08, "loss": 0.2907414436340332, "step": 18261, "token_acc": 0.8941099476439791 }, { "epoch": 0.9853774348459504, "grad_norm": 0.5135635733604431, "learning_rate": 1.1212248665222192e-08, "loss": 0.3232921361923218, "step": 18262, "token_acc": 0.886221795487278 }, { "epoch": 0.9854313926509469, "grad_norm": 0.382459819316864, "learning_rate": 1.1129669437011103e-08, "loss": 0.2629796266555786, "step": 18263, "token_acc": 0.9056289267854853 }, { "epoch": 0.9854853504559434, "grad_norm": 0.44591400027275085, "learning_rate": 1.1047395266283734e-08, "loss": 0.3476625084877014, "step": 18264, "token_acc": 0.8780228541057666 }, { "epoch": 0.9855393082609399, "grad_norm": 0.4609566926956177, "learning_rate": 1.0965426155552516e-08, "loss": 0.3584370017051697, "step": 18265, "token_acc": 0.8723260222041701 }, { "epoch": 0.9855932660659364, "grad_norm": 0.42690953612327576, "learning_rate": 1.0883762107319895e-08, "loss": 0.3117706775665283, "step": 18266, "token_acc": 0.8908566181293454 }, { "epoch": 0.985647223870933, "grad_norm": 0.43401703238487244, "learning_rate": 1.080240312408054e-08, "loss": 0.3180829584598541, "step": 18267, "token_acc": 0.8876465284039675 }, { "epoch": 0.9857011816759295, "grad_norm": 0.4358714818954468, "learning_rate": 1.0721349208318022e-08, "loss": 0.3466748297214508, "step": 18268, "token_acc": 0.8781881533101046 }, { "epoch": 0.985755139480926, "grad_norm": 0.41049861907958984, "learning_rate": 1.0640600362510356e-08, "loss": 0.3432721197605133, "step": 18269, "token_acc": 0.8804409270774449 }, { "epoch": 0.9858090972859224, "grad_norm": 0.43031591176986694, "learning_rate": 1.056015658912113e-08, "loss": 0.28701847791671753, "step": 18270, "token_acc": 0.895425621323088 }, { "epoch": 0.9858630550909189, "grad_norm": 0.49832335114479065, "learning_rate": 1.0480017890607264e-08, "loss": 0.3424094319343567, "step": 18271, "token_acc": 0.8778258778258778 }, { "epoch": 0.9859170128959154, "grad_norm": 0.3302936255931854, "learning_rate": 1.0400184269417912e-08, "loss": 0.3721713423728943, "step": 18272, "token_acc": 0.8727351164797239 }, { "epoch": 0.9859709707009119, "grad_norm": 0.3981197774410248, "learning_rate": 1.0320655727988905e-08, "loss": 0.2996710240840912, "step": 18273, "token_acc": 0.8883712262392843 }, { "epoch": 0.9860249285059084, "grad_norm": 0.4388098120689392, "learning_rate": 1.0241432268750517e-08, "loss": 0.32230523228645325, "step": 18274, "token_acc": 0.8880989994734071 }, { "epoch": 0.9860788863109049, "grad_norm": 0.41469794511795044, "learning_rate": 1.0162513894121927e-08, "loss": 0.30676400661468506, "step": 18275, "token_acc": 0.888732592488395 }, { "epoch": 0.9861328441159014, "grad_norm": 0.45768511295318604, "learning_rate": 1.0083900606513431e-08, "loss": 0.2987614870071411, "step": 18276, "token_acc": 0.8905087319665907 }, { "epoch": 0.9861868019208978, "grad_norm": 0.41214653849601746, "learning_rate": 1.0005592408325326e-08, "loss": 0.3035355806350708, "step": 18277, "token_acc": 0.8901366056957629 }, { "epoch": 0.9862407597258943, "grad_norm": 0.3008691668510437, "learning_rate": 9.927589301949037e-09, "loss": 0.24067549407482147, "step": 18278, "token_acc": 0.9075538587401145 }, { "epoch": 0.9862947175308908, "grad_norm": 0.5264893770217896, "learning_rate": 9.84989128976821e-09, "loss": 0.3618350028991699, "step": 18279, "token_acc": 0.87324581258488 }, { "epoch": 0.9863486753358873, "grad_norm": 0.4777317941188812, "learning_rate": 9.772498374153172e-09, "loss": 0.3162631094455719, "step": 18280, "token_acc": 0.8852588383838383 }, { "epoch": 0.9864026331408838, "grad_norm": 0.38218310475349426, "learning_rate": 9.69541055746981e-09, "loss": 0.3356269299983978, "step": 18281, "token_acc": 0.8798037612428454 }, { "epoch": 0.9864565909458803, "grad_norm": 0.5530062913894653, "learning_rate": 9.618627842071793e-09, "loss": 0.3724575638771057, "step": 18282, "token_acc": 0.8734091294756491 }, { "epoch": 0.9865105487508768, "grad_norm": 0.37928369641304016, "learning_rate": 9.542150230303914e-09, "loss": 0.3525567650794983, "step": 18283, "token_acc": 0.8737945492662473 }, { "epoch": 0.9865645065558734, "grad_norm": 0.4383615255355835, "learning_rate": 9.465977724500974e-09, "loss": 0.32857462763786316, "step": 18284, "token_acc": 0.8816464237516869 }, { "epoch": 0.9866184643608698, "grad_norm": 0.36720961332321167, "learning_rate": 9.390110326989999e-09, "loss": 0.3494371175765991, "step": 18285, "token_acc": 0.8767300478592679 }, { "epoch": 0.9866724221658663, "grad_norm": 0.4399597942829132, "learning_rate": 9.314548040086912e-09, "loss": 0.39039525389671326, "step": 18286, "token_acc": 0.8652798415056959 }, { "epoch": 0.9867263799708628, "grad_norm": 0.4007895886898041, "learning_rate": 9.239290866102091e-09, "loss": 0.3135156035423279, "step": 18287, "token_acc": 0.8835044163284793 }, { "epoch": 0.9867803377758593, "grad_norm": 0.46700119972229004, "learning_rate": 9.164338807331474e-09, "loss": 0.3437047600746155, "step": 18288, "token_acc": 0.8778783547721137 }, { "epoch": 0.9868342955808558, "grad_norm": 0.4702310860157013, "learning_rate": 9.089691866064343e-09, "loss": 0.3453782796859741, "step": 18289, "token_acc": 0.8809155583049799 }, { "epoch": 0.9868882533858523, "grad_norm": 0.478415846824646, "learning_rate": 9.015350044581095e-09, "loss": 0.3339402675628662, "step": 18290, "token_acc": 0.8795051102743411 }, { "epoch": 0.9869422111908488, "grad_norm": 0.36025163531303406, "learning_rate": 8.941313345151026e-09, "loss": 0.2551101744174957, "step": 18291, "token_acc": 0.9040790312300828 }, { "epoch": 0.9869961689958453, "grad_norm": 0.3889752924442291, "learning_rate": 8.86758177003677e-09, "loss": 0.28935661911964417, "step": 18292, "token_acc": 0.89513499413069 }, { "epoch": 0.9870501268008417, "grad_norm": 0.456866979598999, "learning_rate": 8.794155321489862e-09, "loss": 0.2983618676662445, "step": 18293, "token_acc": 0.8978528776521407 }, { "epoch": 0.9871040846058382, "grad_norm": 0.4590167999267578, "learning_rate": 8.721034001750728e-09, "loss": 0.30701005458831787, "step": 18294, "token_acc": 0.8902940728325461 }, { "epoch": 0.9871580424108347, "grad_norm": 0.46591052412986755, "learning_rate": 8.648217813054249e-09, "loss": 0.3006293475627899, "step": 18295, "token_acc": 0.8879360465116279 }, { "epoch": 0.9872120002158312, "grad_norm": 0.3569505214691162, "learning_rate": 8.575706757624203e-09, "loss": 0.337008535861969, "step": 18296, "token_acc": 0.8800508259212199 }, { "epoch": 0.9872659580208277, "grad_norm": 0.4669598937034607, "learning_rate": 8.503500837674372e-09, "loss": 0.3314805030822754, "step": 18297, "token_acc": 0.8810963321241435 }, { "epoch": 0.9873199158258242, "grad_norm": 0.39230597019195557, "learning_rate": 8.431600055410772e-09, "loss": 0.3076927065849304, "step": 18298, "token_acc": 0.8912831750669589 }, { "epoch": 0.9873738736308207, "grad_norm": 0.30694401264190674, "learning_rate": 8.360004413028311e-09, "loss": 0.27079713344573975, "step": 18299, "token_acc": 0.8956903650837222 }, { "epoch": 0.9874278314358171, "grad_norm": 0.4953742027282715, "learning_rate": 8.28871391271302e-09, "loss": 0.357331246137619, "step": 18300, "token_acc": 0.8742325907735485 }, { "epoch": 0.9874817892408136, "grad_norm": 0.43581676483154297, "learning_rate": 8.217728556644267e-09, "loss": 0.32740139961242676, "step": 18301, "token_acc": 0.877896381150742 }, { "epoch": 0.9875357470458102, "grad_norm": 0.421750009059906, "learning_rate": 8.147048346988096e-09, "loss": 0.2796103358268738, "step": 18302, "token_acc": 0.8939952594153279 }, { "epoch": 0.9875897048508067, "grad_norm": 0.4445289969444275, "learning_rate": 8.076673285902781e-09, "loss": 0.31762516498565674, "step": 18303, "token_acc": 0.8817966903073287 }, { "epoch": 0.9876436626558032, "grad_norm": 0.4706801176071167, "learning_rate": 8.006603375539935e-09, "loss": 0.3016074299812317, "step": 18304, "token_acc": 0.8895213999578326 }, { "epoch": 0.9876976204607997, "grad_norm": 0.3679320514202118, "learning_rate": 7.936838618036735e-09, "loss": 0.277396023273468, "step": 18305, "token_acc": 0.896013618953043 }, { "epoch": 0.9877515782657962, "grad_norm": 0.4417021572589874, "learning_rate": 7.867379015524812e-09, "loss": 0.3309553265571594, "step": 18306, "token_acc": 0.884748427672956 }, { "epoch": 0.9878055360707927, "grad_norm": 0.5712267160415649, "learning_rate": 7.798224570125802e-09, "loss": 0.3652178645133972, "step": 18307, "token_acc": 0.8719512195121951 }, { "epoch": 0.9878594938757891, "grad_norm": 0.4627528786659241, "learning_rate": 7.729375283951346e-09, "loss": 0.37829864025115967, "step": 18308, "token_acc": 0.8703098240697493 }, { "epoch": 0.9879134516807856, "grad_norm": 0.43486082553863525, "learning_rate": 7.66083115910421e-09, "loss": 0.30776190757751465, "step": 18309, "token_acc": 0.8914516129032258 }, { "epoch": 0.9879674094857821, "grad_norm": 0.4099879562854767, "learning_rate": 7.592592197678272e-09, "loss": 0.29420724511146545, "step": 18310, "token_acc": 0.892912264038941 }, { "epoch": 0.9880213672907786, "grad_norm": 0.41742438077926636, "learning_rate": 7.524658401756313e-09, "loss": 0.3190525472164154, "step": 18311, "token_acc": 0.8889050458048567 }, { "epoch": 0.9880753250957751, "grad_norm": 0.26843520998954773, "learning_rate": 7.457029773413338e-09, "loss": 0.33947092294692993, "step": 18312, "token_acc": 0.8797735212624985 }, { "epoch": 0.9881292829007716, "grad_norm": 0.3787285387516022, "learning_rate": 7.389706314715472e-09, "loss": 0.3234611749649048, "step": 18313, "token_acc": 0.8816054353296426 }, { "epoch": 0.9881832407057681, "grad_norm": 0.4257846176624298, "learning_rate": 7.322688027718849e-09, "loss": 0.31428468227386475, "step": 18314, "token_acc": 0.891056656781645 }, { "epoch": 0.9882371985107646, "grad_norm": 0.42374706268310547, "learning_rate": 7.2559749144685e-09, "loss": 0.3396121859550476, "step": 18315, "token_acc": 0.8801969365426696 }, { "epoch": 0.988291156315761, "grad_norm": 0.4744497239589691, "learning_rate": 7.1895669770027936e-09, "loss": 0.3308509588241577, "step": 18316, "token_acc": 0.8816213726393367 }, { "epoch": 0.9883451141207575, "grad_norm": 0.37952467799186707, "learning_rate": 7.123464217351217e-09, "loss": 0.3327329754829407, "step": 18317, "token_acc": 0.881519558676028 }, { "epoch": 0.988399071925754, "grad_norm": 0.46970847249031067, "learning_rate": 7.057666637529936e-09, "loss": 0.33262473344802856, "step": 18318, "token_acc": 0.8826277785643494 }, { "epoch": 0.9884530297307506, "grad_norm": 0.3078001141548157, "learning_rate": 6.992174239550676e-09, "loss": 0.308646023273468, "step": 18319, "token_acc": 0.8939701038763618 }, { "epoch": 0.9885069875357471, "grad_norm": 0.4433657228946686, "learning_rate": 6.926987025411835e-09, "loss": 0.25310903787612915, "step": 18320, "token_acc": 0.9088372093023256 }, { "epoch": 0.9885609453407436, "grad_norm": 0.35584378242492676, "learning_rate": 6.862104997106267e-09, "loss": 0.2553269565105438, "step": 18321, "token_acc": 0.9048603929679421 }, { "epoch": 0.9886149031457401, "grad_norm": 0.4441450536251068, "learning_rate": 6.7975281566123875e-09, "loss": 0.297891765832901, "step": 18322, "token_acc": 0.8890862944162436 }, { "epoch": 0.9886688609507365, "grad_norm": 0.37640565633773804, "learning_rate": 6.7332565059052834e-09, "loss": 0.31432682275772095, "step": 18323, "token_acc": 0.8851547694251422 }, { "epoch": 0.988722818755733, "grad_norm": 0.3661697506904602, "learning_rate": 6.669290046946719e-09, "loss": 0.31966492533683777, "step": 18324, "token_acc": 0.8866957962935061 }, { "epoch": 0.9887767765607295, "grad_norm": 0.42995041608810425, "learning_rate": 6.605628781689577e-09, "loss": 0.3181453347206116, "step": 18325, "token_acc": 0.8823529411764706 }, { "epoch": 0.988830734365726, "grad_norm": 0.4033670425415039, "learning_rate": 6.542272712078968e-09, "loss": 0.332835853099823, "step": 18326, "token_acc": 0.882330732443997 }, { "epoch": 0.9888846921707225, "grad_norm": 0.545479953289032, "learning_rate": 6.479221840048899e-09, "loss": 0.3680604100227356, "step": 18327, "token_acc": 0.8733803720333547 }, { "epoch": 0.988938649975719, "grad_norm": 0.47988590598106384, "learning_rate": 6.4164761675267195e-09, "loss": 0.38164544105529785, "step": 18328, "token_acc": 0.8687469044081229 }, { "epoch": 0.9889926077807155, "grad_norm": 0.5199543237686157, "learning_rate": 6.354035696426452e-09, "loss": 0.34488245844841003, "step": 18329, "token_acc": 0.873892736350459 }, { "epoch": 0.989046565585712, "grad_norm": 0.46113821864128113, "learning_rate": 6.291900428655462e-09, "loss": 0.2889787554740906, "step": 18330, "token_acc": 0.8967269466447467 }, { "epoch": 0.9891005233907084, "grad_norm": 0.3639158010482788, "learning_rate": 6.230070366112229e-09, "loss": 0.2772267758846283, "step": 18331, "token_acc": 0.8981739399566697 }, { "epoch": 0.9891544811957049, "grad_norm": 0.5407784581184387, "learning_rate": 6.1685455106852424e-09, "loss": 0.35809391736984253, "step": 18332, "token_acc": 0.8750200032005121 }, { "epoch": 0.9892084390007014, "grad_norm": 0.4229305386543274, "learning_rate": 6.1073258642529996e-09, "loss": 0.3426724970340729, "step": 18333, "token_acc": 0.8816875166356135 }, { "epoch": 0.9892623968056979, "grad_norm": 0.3400370478630066, "learning_rate": 6.046411428684007e-09, "loss": 0.324969083070755, "step": 18334, "token_acc": 0.8826402805611222 }, { "epoch": 0.9893163546106944, "grad_norm": 0.3655397891998291, "learning_rate": 5.985802205840108e-09, "loss": 0.2964504063129425, "step": 18335, "token_acc": 0.8923207707565882 }, { "epoch": 0.989370312415691, "grad_norm": 0.421246737241745, "learning_rate": 5.925498197570934e-09, "loss": 0.34939080476760864, "step": 18336, "token_acc": 0.874949083503055 }, { "epoch": 0.9894242702206875, "grad_norm": 0.45289522409439087, "learning_rate": 5.865499405719455e-09, "loss": 0.2770642936229706, "step": 18337, "token_acc": 0.8967039434961742 }, { "epoch": 0.989478228025684, "grad_norm": 0.44650426506996155, "learning_rate": 5.805805832118649e-09, "loss": 0.35475629568099976, "step": 18338, "token_acc": 0.8761765321062539 }, { "epoch": 0.9895321858306804, "grad_norm": 0.37129271030426025, "learning_rate": 5.746417478589283e-09, "loss": 0.30450519919395447, "step": 18339, "token_acc": 0.887905604719764 }, { "epoch": 0.9895861436356769, "grad_norm": 0.4238572418689728, "learning_rate": 5.687334346945461e-09, "loss": 0.3530111610889435, "step": 18340, "token_acc": 0.8733762804942444 }, { "epoch": 0.9896401014406734, "grad_norm": 0.4938097894191742, "learning_rate": 5.628556438993515e-09, "loss": 0.34785789251327515, "step": 18341, "token_acc": 0.8771903549498278 }, { "epoch": 0.9896940592456699, "grad_norm": 0.4160763919353485, "learning_rate": 5.5700837565275666e-09, "loss": 0.32374808192253113, "step": 18342, "token_acc": 0.8888888888888888 }, { "epoch": 0.9897480170506664, "grad_norm": 0.3689879775047302, "learning_rate": 5.511916301331743e-09, "loss": 0.34789344668388367, "step": 18343, "token_acc": 0.8788116738646774 }, { "epoch": 0.9898019748556629, "grad_norm": 0.4838049113750458, "learning_rate": 5.4540540751846224e-09, "loss": 0.36703795194625854, "step": 18344, "token_acc": 0.867180680826302 }, { "epoch": 0.9898559326606594, "grad_norm": 0.457764595746994, "learning_rate": 5.39649707985257e-09, "loss": 0.3127318024635315, "step": 18345, "token_acc": 0.8842975206611571 }, { "epoch": 0.9899098904656558, "grad_norm": 0.4646136462688446, "learning_rate": 5.339245317093067e-09, "loss": 0.3153350353240967, "step": 18346, "token_acc": 0.8875211744776962 }, { "epoch": 0.9899638482706523, "grad_norm": 0.43803343176841736, "learning_rate": 5.2822987886547165e-09, "loss": 0.2767722010612488, "step": 18347, "token_acc": 0.9015440508628519 }, { "epoch": 0.9900178060756488, "grad_norm": 0.4173719882965088, "learning_rate": 5.225657496277236e-09, "loss": 0.350768506526947, "step": 18348, "token_acc": 0.8786525974025974 }, { "epoch": 0.9900717638806453, "grad_norm": 0.38047924637794495, "learning_rate": 5.169321441689246e-09, "loss": 0.31984254717826843, "step": 18349, "token_acc": 0.884696952784909 }, { "epoch": 0.9901257216856418, "grad_norm": 0.40421155095100403, "learning_rate": 5.113290626611589e-09, "loss": 0.27603664994239807, "step": 18350, "token_acc": 0.8970947597067608 }, { "epoch": 0.9901796794906383, "grad_norm": 0.4193633794784546, "learning_rate": 5.05756505275623e-09, "loss": 0.32834023237228394, "step": 18351, "token_acc": 0.8845470692717584 }, { "epoch": 0.9902336372956348, "grad_norm": 0.4772644639015198, "learning_rate": 5.002144721824032e-09, "loss": 0.354370653629303, "step": 18352, "token_acc": 0.8794920634920635 }, { "epoch": 0.9902875951006314, "grad_norm": 0.44938409328460693, "learning_rate": 4.947029635509193e-09, "loss": 0.3261840343475342, "step": 18353, "token_acc": 0.8843478260869565 }, { "epoch": 0.9903415529056278, "grad_norm": 0.4678541123867035, "learning_rate": 4.892219795492592e-09, "loss": 0.3120790123939514, "step": 18354, "token_acc": 0.890429747028345 }, { "epoch": 0.9903955107106243, "grad_norm": 0.40225887298583984, "learning_rate": 4.837715203449556e-09, "loss": 0.3172191381454468, "step": 18355, "token_acc": 0.8857827476038339 }, { "epoch": 0.9904494685156208, "grad_norm": 0.43767914175987244, "learning_rate": 4.7835158610443075e-09, "loss": 0.331434965133667, "step": 18356, "token_acc": 0.8812577065351418 }, { "epoch": 0.9905034263206173, "grad_norm": 0.38539525866508484, "learning_rate": 4.729621769932191e-09, "loss": 0.32447898387908936, "step": 18357, "token_acc": 0.8890524625267666 }, { "epoch": 0.9905573841256138, "grad_norm": 0.4288516640663147, "learning_rate": 4.676032931759666e-09, "loss": 0.3344009518623352, "step": 18358, "token_acc": 0.8806262230919765 }, { "epoch": 0.9906113419306103, "grad_norm": 0.37709569931030273, "learning_rate": 4.622749348162092e-09, "loss": 0.29465818405151367, "step": 18359, "token_acc": 0.8954557394273707 }, { "epoch": 0.9906652997356068, "grad_norm": 0.34540894627571106, "learning_rate": 4.569771020767055e-09, "loss": 0.2753944396972656, "step": 18360, "token_acc": 0.9000087100426792 }, { "epoch": 0.9907192575406032, "grad_norm": 0.3679696321487427, "learning_rate": 4.517097951192151e-09, "loss": 0.3527063727378845, "step": 18361, "token_acc": 0.8749158475831426 }, { "epoch": 0.9907732153455997, "grad_norm": 0.4644010066986084, "learning_rate": 4.464730141048312e-09, "loss": 0.3239562511444092, "step": 18362, "token_acc": 0.8841593189419277 }, { "epoch": 0.9908271731505962, "grad_norm": 0.44953790307044983, "learning_rate": 4.4126675919320404e-09, "loss": 0.32660916447639465, "step": 18363, "token_acc": 0.8797657952069716 }, { "epoch": 0.9908811309555927, "grad_norm": 0.3816116750240326, "learning_rate": 4.360910305434285e-09, "loss": 0.2989579439163208, "step": 18364, "token_acc": 0.8920193063194523 }, { "epoch": 0.9909350887605892, "grad_norm": 0.37562739849090576, "learning_rate": 4.309458283136003e-09, "loss": 0.32342100143432617, "step": 18365, "token_acc": 0.8883110128721883 }, { "epoch": 0.9909890465655857, "grad_norm": 0.4016976058483124, "learning_rate": 4.258311526608161e-09, "loss": 0.3736152648925781, "step": 18366, "token_acc": 0.8693327392224574 }, { "epoch": 0.9910430043705822, "grad_norm": 0.4260745048522949, "learning_rate": 4.2074700374139525e-09, "loss": 0.2968934178352356, "step": 18367, "token_acc": 0.8916127255199009 }, { "epoch": 0.9910969621755787, "grad_norm": 0.44515278935432434, "learning_rate": 4.156933817104358e-09, "loss": 0.34088775515556335, "step": 18368, "token_acc": 0.8756355641060877 }, { "epoch": 0.9911509199805751, "grad_norm": 0.5056493282318115, "learning_rate": 4.1067028672237e-09, "loss": 0.27391985058784485, "step": 18369, "token_acc": 0.8977457903313416 }, { "epoch": 0.9912048777855716, "grad_norm": 0.4393419623374939, "learning_rate": 4.0567771893051946e-09, "loss": 0.3290162682533264, "step": 18370, "token_acc": 0.8838039597533268 }, { "epoch": 0.9912588355905682, "grad_norm": 0.47100430727005005, "learning_rate": 4.007156784874289e-09, "loss": 0.35472747683525085, "step": 18371, "token_acc": 0.8753553793206643 }, { "epoch": 0.9913127933955647, "grad_norm": 0.3637392520904541, "learning_rate": 3.957841655446437e-09, "loss": 0.37164872884750366, "step": 18372, "token_acc": 0.871244635193133 }, { "epoch": 0.9913667512005612, "grad_norm": 0.28447452187538147, "learning_rate": 3.908831802528212e-09, "loss": 0.31442123651504517, "step": 18373, "token_acc": 0.8889153754469606 }, { "epoch": 0.9914207090055577, "grad_norm": 0.402517706155777, "learning_rate": 3.860127227613974e-09, "loss": 0.28316593170166016, "step": 18374, "token_acc": 0.8974266906044285 }, { "epoch": 0.9914746668105542, "grad_norm": 0.4122755825519562, "learning_rate": 3.8117279321947535e-09, "loss": 0.2668322026729584, "step": 18375, "token_acc": 0.9046341027017355 }, { "epoch": 0.9915286246155507, "grad_norm": 0.5163583159446716, "learning_rate": 3.763633917744925e-09, "loss": 0.2924061417579651, "step": 18376, "token_acc": 0.8939475093733262 }, { "epoch": 0.9915825824205471, "grad_norm": 0.4136172831058502, "learning_rate": 3.715845185736644e-09, "loss": 0.3092765808105469, "step": 18377, "token_acc": 0.8872767857142857 }, { "epoch": 0.9916365402255436, "grad_norm": 0.3855631649494171, "learning_rate": 3.6683617376265246e-09, "loss": 0.2902684509754181, "step": 18378, "token_acc": 0.8958529688972667 }, { "epoch": 0.9916904980305401, "grad_norm": 0.5024374127388, "learning_rate": 3.621183574866738e-09, "loss": 0.323018878698349, "step": 18379, "token_acc": 0.8852429296591733 }, { "epoch": 0.9917444558355366, "grad_norm": 0.4019958972930908, "learning_rate": 3.574310698897243e-09, "loss": 0.24707156419754028, "step": 18380, "token_acc": 0.9067524115755627 }, { "epoch": 0.9917984136405331, "grad_norm": 0.39140960574150085, "learning_rate": 3.5277431111491177e-09, "loss": 0.2737574875354767, "step": 18381, "token_acc": 0.8973660308810173 }, { "epoch": 0.9918523714455296, "grad_norm": 0.4944484531879425, "learning_rate": 3.4814808130445577e-09, "loss": 0.30532070994377136, "step": 18382, "token_acc": 0.8855317679558011 }, { "epoch": 0.9919063292505261, "grad_norm": 0.39842891693115234, "learning_rate": 3.435523805997987e-09, "loss": 0.3372490406036377, "step": 18383, "token_acc": 0.8835542816194912 }, { "epoch": 0.9919602870555225, "grad_norm": 0.5453296899795532, "learning_rate": 3.389872091411617e-09, "loss": 0.29396846890449524, "step": 18384, "token_acc": 0.8916831683168317 }, { "epoch": 0.992014244860519, "grad_norm": 0.43302589654922485, "learning_rate": 3.3445256706787776e-09, "loss": 0.33034420013427734, "step": 18385, "token_acc": 0.883623349376774 }, { "epoch": 0.9920682026655155, "grad_norm": 0.3271612823009491, "learning_rate": 3.299484545186138e-09, "loss": 0.3594578504562378, "step": 18386, "token_acc": 0.8780463641767386 }, { "epoch": 0.992122160470512, "grad_norm": 0.36686909198760986, "learning_rate": 3.254748716308154e-09, "loss": 0.31526559591293335, "step": 18387, "token_acc": 0.8872332365197626 }, { "epoch": 0.9921761182755086, "grad_norm": 0.4406448304653168, "learning_rate": 3.21031818541262e-09, "loss": 0.3380666971206665, "step": 18388, "token_acc": 0.879395740465577 }, { "epoch": 0.9922300760805051, "grad_norm": 0.536105215549469, "learning_rate": 3.166192953854008e-09, "loss": 0.36470767855644226, "step": 18389, "token_acc": 0.8724132565738292 }, { "epoch": 0.9922840338855016, "grad_norm": 0.43026837706565857, "learning_rate": 3.12237302298124e-09, "loss": 0.2800227999687195, "step": 18390, "token_acc": 0.8950159066808059 }, { "epoch": 0.9923379916904981, "grad_norm": 0.4615948796272278, "learning_rate": 3.0788583941321335e-09, "loss": 0.3414249122142792, "step": 18391, "token_acc": 0.8807966706302022 }, { "epoch": 0.9923919494954945, "grad_norm": 0.5403567552566528, "learning_rate": 3.0356490686356265e-09, "loss": 0.30728399753570557, "step": 18392, "token_acc": 0.8876205471363905 }, { "epoch": 0.992445907300491, "grad_norm": 0.40385642647743225, "learning_rate": 2.9927450478117736e-09, "loss": 0.3075517416000366, "step": 18393, "token_acc": 0.8902219556088782 }, { "epoch": 0.9924998651054875, "grad_norm": 0.3831028640270233, "learning_rate": 2.9501463329717483e-09, "loss": 0.28994297981262207, "step": 18394, "token_acc": 0.8941306405806249 }, { "epoch": 0.992553822910484, "grad_norm": 0.445139616727829, "learning_rate": 2.9078529254134014e-09, "loss": 0.35253435373306274, "step": 18395, "token_acc": 0.8746955345060893 }, { "epoch": 0.9926077807154805, "grad_norm": 0.30869340896606445, "learning_rate": 2.865864826432363e-09, "loss": 0.3284280002117157, "step": 18396, "token_acc": 0.8839846952719322 }, { "epoch": 0.992661738520477, "grad_norm": 0.3918445110321045, "learning_rate": 2.8241820373076103e-09, "loss": 0.31287190318107605, "step": 18397, "token_acc": 0.8845092629089476 }, { "epoch": 0.9927156963254735, "grad_norm": 0.27915719151496887, "learning_rate": 2.7828045593136787e-09, "loss": 0.2984621822834015, "step": 18398, "token_acc": 0.890548006570106 }, { "epoch": 0.99276965413047, "grad_norm": 0.45961037278175354, "learning_rate": 2.741732393714003e-09, "loss": 0.3194742202758789, "step": 18399, "token_acc": 0.8842691111474873 }, { "epoch": 0.9928236119354664, "grad_norm": 0.4003415107727051, "learning_rate": 2.7009655417631343e-09, "loss": 0.30908748507499695, "step": 18400, "token_acc": 0.8908967391304348 }, { "epoch": 0.9928775697404629, "grad_norm": 0.40781518816947937, "learning_rate": 2.660504004705633e-09, "loss": 0.3858151435852051, "step": 18401, "token_acc": 0.8668583752695902 }, { "epoch": 0.9929315275454594, "grad_norm": 0.4152919352054596, "learning_rate": 2.620347783777177e-09, "loss": 0.26088181138038635, "step": 18402, "token_acc": 0.9001320567844173 }, { "epoch": 0.9929854853504559, "grad_norm": 0.3882639408111572, "learning_rate": 2.580496880204564e-09, "loss": 0.3305956721305847, "step": 18403, "token_acc": 0.8818494937897923 }, { "epoch": 0.9930394431554525, "grad_norm": 0.5099523067474365, "learning_rate": 2.5409512952045966e-09, "loss": 0.340512752532959, "step": 18404, "token_acc": 0.8762224492774777 }, { "epoch": 0.993093400960449, "grad_norm": 0.47891566157341003, "learning_rate": 2.5017110299851987e-09, "loss": 0.3331236243247986, "step": 18405, "token_acc": 0.8830521472392638 }, { "epoch": 0.9931473587654455, "grad_norm": 0.39841315150260925, "learning_rate": 2.4627760857443007e-09, "loss": 0.3151108920574188, "step": 18406, "token_acc": 0.8877661007814606 }, { "epoch": 0.9932013165704419, "grad_norm": 0.4686003029346466, "learning_rate": 2.424146463670951e-09, "loss": 0.3684149980545044, "step": 18407, "token_acc": 0.8677790241166573 }, { "epoch": 0.9932552743754384, "grad_norm": 0.4767885208129883, "learning_rate": 2.3858221649464276e-09, "loss": 0.3300016224384308, "step": 18408, "token_acc": 0.8762278978388998 }, { "epoch": 0.9933092321804349, "grad_norm": 0.38785216212272644, "learning_rate": 2.3478031907386843e-09, "loss": 0.3472200930118561, "step": 18409, "token_acc": 0.8743706545193 }, { "epoch": 0.9933631899854314, "grad_norm": 0.47133588790893555, "learning_rate": 2.3100895422101255e-09, "loss": 0.32391512393951416, "step": 18410, "token_acc": 0.8839031339031339 }, { "epoch": 0.9934171477904279, "grad_norm": 0.5350270867347717, "learning_rate": 2.2726812205131623e-09, "loss": 0.3162584602832794, "step": 18411, "token_acc": 0.8817005545286506 }, { "epoch": 0.9934711055954244, "grad_norm": 0.4337441623210907, "learning_rate": 2.235578226787993e-09, "loss": 0.3258191645145416, "step": 18412, "token_acc": 0.8786031042128604 }, { "epoch": 0.9935250634004209, "grad_norm": 0.36782607436180115, "learning_rate": 2.1987805621703773e-09, "loss": 0.34782224893569946, "step": 18413, "token_acc": 0.8766801513767454 }, { "epoch": 0.9935790212054174, "grad_norm": 0.5299727320671082, "learning_rate": 2.1622882277827496e-09, "loss": 0.32010775804519653, "step": 18414, "token_acc": 0.8816175034186364 }, { "epoch": 0.9936329790104138, "grad_norm": 0.47449544072151184, "learning_rate": 2.126101224739774e-09, "loss": 0.3544183373451233, "step": 18415, "token_acc": 0.8805049088359046 }, { "epoch": 0.9936869368154103, "grad_norm": 0.42811235785484314, "learning_rate": 2.090219554146122e-09, "loss": 0.3500145971775055, "step": 18416, "token_acc": 0.8753277711561382 }, { "epoch": 0.9937408946204068, "grad_norm": 0.48840638995170593, "learning_rate": 2.054643217098695e-09, "loss": 0.28317099809646606, "step": 18417, "token_acc": 0.897757450575391 }, { "epoch": 0.9937948524254033, "grad_norm": 0.3861558735370636, "learning_rate": 2.0193722146832906e-09, "loss": 0.2512795925140381, "step": 18418, "token_acc": 0.9044877665838021 }, { "epoch": 0.9938488102303998, "grad_norm": 0.4496040642261505, "learning_rate": 1.984406547976825e-09, "loss": 0.3176238536834717, "step": 18419, "token_acc": 0.8864226001729605 }, { "epoch": 0.9939027680353963, "grad_norm": 0.36909544467926025, "learning_rate": 1.9497462180473326e-09, "loss": 0.3696749210357666, "step": 18420, "token_acc": 0.8706960614317563 }, { "epoch": 0.9939567258403929, "grad_norm": 0.40898165106773376, "learning_rate": 1.9153912259539662e-09, "loss": 0.3304483890533447, "step": 18421, "token_acc": 0.8870631798194862 }, { "epoch": 0.9940106836453894, "grad_norm": 0.4320838451385498, "learning_rate": 1.881341572744777e-09, "loss": 0.35149312019348145, "step": 18422, "token_acc": 0.8743849938499385 }, { "epoch": 0.9940646414503858, "grad_norm": 0.42463764548301697, "learning_rate": 1.8475972594600433e-09, "loss": 0.298457533121109, "step": 18423, "token_acc": 0.8915375446960667 }, { "epoch": 0.9941185992553823, "grad_norm": 0.46528419852256775, "learning_rate": 1.8141582871311626e-09, "loss": 0.2744160592556, "step": 18424, "token_acc": 0.9006151480199923 }, { "epoch": 0.9941725570603788, "grad_norm": 0.40702173113822937, "learning_rate": 1.7810246567795398e-09, "loss": 0.2940986454486847, "step": 18425, "token_acc": 0.8918408826945412 }, { "epoch": 0.9942265148653753, "grad_norm": 0.3304220736026764, "learning_rate": 1.748196369415478e-09, "loss": 0.27809661626815796, "step": 18426, "token_acc": 0.8996431757359501 }, { "epoch": 0.9942804726703718, "grad_norm": 0.472281277179718, "learning_rate": 1.7156734260415087e-09, "loss": 0.3823453187942505, "step": 18427, "token_acc": 0.8703358208955224 }, { "epoch": 0.9943344304753683, "grad_norm": 0.41546282172203064, "learning_rate": 1.6834558276523917e-09, "loss": 0.3652586340904236, "step": 18428, "token_acc": 0.8677462887989204 }, { "epoch": 0.9943883882803648, "grad_norm": 0.5684829950332642, "learning_rate": 1.6515435752317842e-09, "loss": 0.3222642242908478, "step": 18429, "token_acc": 0.8803654908627284 }, { "epoch": 0.9944423460853612, "grad_norm": 0.4722382128238678, "learning_rate": 1.6199366697533525e-09, "loss": 0.37185555696487427, "step": 18430, "token_acc": 0.8669965075669382 }, { "epoch": 0.9944963038903577, "grad_norm": 0.36462002992630005, "learning_rate": 1.5886351121829902e-09, "loss": 0.30331283807754517, "step": 18431, "token_acc": 0.8892540256325994 }, { "epoch": 0.9945502616953542, "grad_norm": 0.38493427634239197, "learning_rate": 1.5576389034754892e-09, "loss": 0.30067694187164307, "step": 18432, "token_acc": 0.8937952577026835 }, { "epoch": 0.9946042195003507, "grad_norm": 0.4063984453678131, "learning_rate": 1.5269480445800899e-09, "loss": 0.352152556180954, "step": 18433, "token_acc": 0.8758248350329934 }, { "epoch": 0.9946581773053472, "grad_norm": 0.48279038071632385, "learning_rate": 1.4965625364316005e-09, "loss": 0.28423184156417847, "step": 18434, "token_acc": 0.8888114325548971 }, { "epoch": 0.9947121351103437, "grad_norm": 0.53221195936203, "learning_rate": 1.4664823799581673e-09, "loss": 0.3399926424026489, "step": 18435, "token_acc": 0.8805752776260696 }, { "epoch": 0.9947660929153402, "grad_norm": 0.3875278830528259, "learning_rate": 1.4367075760801653e-09, "loss": 0.322706937789917, "step": 18436, "token_acc": 0.8838801343322139 }, { "epoch": 0.9948200507203367, "grad_norm": 0.5126437544822693, "learning_rate": 1.4072381257057565e-09, "loss": 0.35486745834350586, "step": 18437, "token_acc": 0.8697564276048715 }, { "epoch": 0.9948740085253331, "grad_norm": 0.5132560729980469, "learning_rate": 1.378074029734222e-09, "loss": 0.3260030746459961, "step": 18438, "token_acc": 0.882307092751364 }, { "epoch": 0.9949279663303296, "grad_norm": 0.3047095537185669, "learning_rate": 1.3492152890581812e-09, "loss": 0.2926889657974243, "step": 18439, "token_acc": 0.8947414471957301 }, { "epoch": 0.9949819241353262, "grad_norm": 0.36154264211654663, "learning_rate": 1.3206619045569302e-09, "loss": 0.2966071367263794, "step": 18440, "token_acc": 0.894511238891793 }, { "epoch": 0.9950358819403227, "grad_norm": 0.41697922348976135, "learning_rate": 1.292413877104215e-09, "loss": 0.2844287157058716, "step": 18441, "token_acc": 0.8943956503554998 }, { "epoch": 0.9950898397453192, "grad_norm": 0.3907957971096039, "learning_rate": 1.2644712075604581e-09, "loss": 0.3026670217514038, "step": 18442, "token_acc": 0.890602684947158 }, { "epoch": 0.9951437975503157, "grad_norm": 0.49773532152175903, "learning_rate": 1.2368338967816418e-09, "loss": 0.3648022413253784, "step": 18443, "token_acc": 0.8734930566152908 }, { "epoch": 0.9951977553553122, "grad_norm": 0.43641096353530884, "learning_rate": 1.209501945610425e-09, "loss": 0.37582287192344666, "step": 18444, "token_acc": 0.8718378543127095 }, { "epoch": 0.9952517131603087, "grad_norm": 0.4873611032962799, "learning_rate": 1.182475354881696e-09, "loss": 0.3455350995063782, "step": 18445, "token_acc": 0.8733277591973244 }, { "epoch": 0.9953056709653051, "grad_norm": 0.3847355246543884, "learning_rate": 1.1557541254203498e-09, "loss": 0.30857914686203003, "step": 18446, "token_acc": 0.8883089770354906 }, { "epoch": 0.9953596287703016, "grad_norm": 0.42866477370262146, "learning_rate": 1.129338258043511e-09, "loss": 0.30085235834121704, "step": 18447, "token_acc": 0.8914556081269135 }, { "epoch": 0.9954135865752981, "grad_norm": 0.5273899435997009, "learning_rate": 1.1032277535572011e-09, "loss": 0.287137895822525, "step": 18448, "token_acc": 0.8920545746388443 }, { "epoch": 0.9954675443802946, "grad_norm": 0.36752814054489136, "learning_rate": 1.0774226127585607e-09, "loss": 0.27111586928367615, "step": 18449, "token_acc": 0.9007557677008751 }, { "epoch": 0.9955215021852911, "grad_norm": 0.4365544617176056, "learning_rate": 1.051922836435848e-09, "loss": 0.2822115123271942, "step": 18450, "token_acc": 0.893340965464606 }, { "epoch": 0.9955754599902876, "grad_norm": 0.4067833125591278, "learning_rate": 1.0267284253684396e-09, "loss": 0.2929767966270447, "step": 18451, "token_acc": 0.8904969485614647 }, { "epoch": 0.9956294177952841, "grad_norm": 0.32665491104125977, "learning_rate": 1.0018393803257198e-09, "loss": 0.3278723359107971, "step": 18452, "token_acc": 0.8801289202925499 }, { "epoch": 0.9956833756002805, "grad_norm": 0.4797182083129883, "learning_rate": 9.772557020670814e-10, "loss": 0.3737141489982605, "step": 18453, "token_acc": 0.8684947663275837 }, { "epoch": 0.995737333405277, "grad_norm": 0.5178639888763428, "learning_rate": 9.529773913441454e-10, "loss": 0.3029516041278839, "step": 18454, "token_acc": 0.8843895525410002 }, { "epoch": 0.9957912912102735, "grad_norm": 0.4527056813240051, "learning_rate": 9.290044488974304e-10, "loss": 0.34073472023010254, "step": 18455, "token_acc": 0.8782608695652174 }, { "epoch": 0.99584524901527, "grad_norm": 0.4151313900947571, "learning_rate": 9.053368754596836e-10, "loss": 0.3000141680240631, "step": 18456, "token_acc": 0.8883101851851852 }, { "epoch": 0.9958992068202666, "grad_norm": 0.45592501759529114, "learning_rate": 8.819746717536604e-10, "loss": 0.34929484128952026, "step": 18457, "token_acc": 0.882137090855717 }, { "epoch": 0.9959531646252631, "grad_norm": 0.47453731298446655, "learning_rate": 8.589178384921237e-10, "loss": 0.26911383867263794, "step": 18458, "token_acc": 0.9026639344262295 }, { "epoch": 0.9960071224302596, "grad_norm": 0.3894958198070526, "learning_rate": 8.361663763800653e-10, "loss": 0.3284848630428314, "step": 18459, "token_acc": 0.8851880877742947 }, { "epoch": 0.9960610802352561, "grad_norm": 0.5019347667694092, "learning_rate": 8.137202861124849e-10, "loss": 0.34587860107421875, "step": 18460, "token_acc": 0.8836890243902439 }, { "epoch": 0.9961150380402525, "grad_norm": 0.38248422741889954, "learning_rate": 7.915795683743898e-10, "loss": 0.30127954483032227, "step": 18461, "token_acc": 0.8926364572605562 }, { "epoch": 0.996168995845249, "grad_norm": 0.4840399920940399, "learning_rate": 7.697442238419062e-10, "loss": 0.375133216381073, "step": 18462, "token_acc": 0.8656451612903225 }, { "epoch": 0.9962229536502455, "grad_norm": 0.4626084566116333, "learning_rate": 7.482142531811675e-10, "loss": 0.30398914217948914, "step": 18463, "token_acc": 0.8851639192443045 }, { "epoch": 0.996276911455242, "grad_norm": 0.5372627377510071, "learning_rate": 7.269896570516466e-10, "loss": 0.31490084528923035, "step": 18464, "token_acc": 0.891989198919892 }, { "epoch": 0.9963308692602385, "grad_norm": 0.38803908228874207, "learning_rate": 7.06070436099493e-10, "loss": 0.2980782091617584, "step": 18465, "token_acc": 0.8905409232512036 }, { "epoch": 0.996384827065235, "grad_norm": 0.5415540337562561, "learning_rate": 6.854565909653054e-10, "loss": 0.3999820947647095, "step": 18466, "token_acc": 0.8627790003143666 }, { "epoch": 0.9964387848702315, "grad_norm": 0.5042116045951843, "learning_rate": 6.651481222774703e-10, "loss": 0.3767162561416626, "step": 18467, "token_acc": 0.8691951620128415 }, { "epoch": 0.996492742675228, "grad_norm": 0.49968090653419495, "learning_rate": 6.451450306566021e-10, "loss": 0.38724979758262634, "step": 18468, "token_acc": 0.8633147674243564 }, { "epoch": 0.9965467004802244, "grad_norm": 0.31282058358192444, "learning_rate": 6.254473167133235e-10, "loss": 0.2856634855270386, "step": 18469, "token_acc": 0.898276586801177 }, { "epoch": 0.9966006582852209, "grad_norm": 0.4304710328578949, "learning_rate": 6.060549810493754e-10, "loss": 0.3308742344379425, "step": 18470, "token_acc": 0.8851340877383351 }, { "epoch": 0.9966546160902174, "grad_norm": 0.40255725383758545, "learning_rate": 5.86968024257617e-10, "loss": 0.3478339910507202, "step": 18471, "token_acc": 0.8792134831460674 }, { "epoch": 0.996708573895214, "grad_norm": 0.4951413571834564, "learning_rate": 5.681864469198051e-10, "loss": 0.299802303314209, "step": 18472, "token_acc": 0.889329873301786 }, { "epoch": 0.9967625317002105, "grad_norm": 0.518684446811676, "learning_rate": 5.497102496110351e-10, "loss": 0.3483690917491913, "step": 18473, "token_acc": 0.8780442113150992 }, { "epoch": 0.996816489505207, "grad_norm": 0.337285578250885, "learning_rate": 5.315394328930801e-10, "loss": 0.31674981117248535, "step": 18474, "token_acc": 0.8841493570722058 }, { "epoch": 0.9968704473102035, "grad_norm": 0.35702723264694214, "learning_rate": 5.136739973232718e-10, "loss": 0.32356947660446167, "step": 18475, "token_acc": 0.8804923918618567 }, { "epoch": 0.9969244051151999, "grad_norm": 0.47656315565109253, "learning_rate": 4.961139434467299e-10, "loss": 0.32469290494918823, "step": 18476, "token_acc": 0.87627765064836 }, { "epoch": 0.9969783629201964, "grad_norm": 0.4084749221801758, "learning_rate": 4.788592717985818e-10, "loss": 0.35395729541778564, "step": 18477, "token_acc": 0.8758844339622641 }, { "epoch": 0.9970323207251929, "grad_norm": 0.3788113594055176, "learning_rate": 4.6190998290729374e-10, "loss": 0.33364346623420715, "step": 18478, "token_acc": 0.876351173213815 }, { "epoch": 0.9970862785301894, "grad_norm": 0.42912280559539795, "learning_rate": 4.452660772891193e-10, "loss": 0.2664833068847656, "step": 18479, "token_acc": 0.9024767801857585 }, { "epoch": 0.9971402363351859, "grad_norm": 0.44918379187583923, "learning_rate": 4.2892755545254074e-10, "loss": 0.26232534646987915, "step": 18480, "token_acc": 0.9036360379724163 }, { "epoch": 0.9971941941401824, "grad_norm": 0.4336806833744049, "learning_rate": 4.128944178982686e-10, "loss": 0.30263790488243103, "step": 18481, "token_acc": 0.8919508867667122 }, { "epoch": 0.9972481519451789, "grad_norm": 0.46248453855514526, "learning_rate": 3.971666651136907e-10, "loss": 0.36348670721054077, "step": 18482, "token_acc": 0.8684518583552824 }, { "epoch": 0.9973021097501754, "grad_norm": 0.43330371379852295, "learning_rate": 3.8174429758064404e-10, "loss": 0.351080060005188, "step": 18483, "token_acc": 0.8771090740002657 }, { "epoch": 0.9973560675551718, "grad_norm": 0.4289426803588867, "learning_rate": 3.6662731576875275e-10, "loss": 0.3352252244949341, "step": 18484, "token_acc": 0.8798038544873988 }, { "epoch": 0.9974100253601683, "grad_norm": 0.3871814012527466, "learning_rate": 3.5181572014098e-10, "loss": 0.31631287932395935, "step": 18485, "token_acc": 0.8885310019555964 }, { "epoch": 0.9974639831651648, "grad_norm": 0.5191115140914917, "learning_rate": 3.3730951115029663e-10, "loss": 0.35597649216651917, "step": 18486, "token_acc": 0.8699647725532241 }, { "epoch": 0.9975179409701613, "grad_norm": 0.34234240651130676, "learning_rate": 3.231086892374613e-10, "loss": 0.32324036955833435, "step": 18487, "token_acc": 0.8853627848455193 }, { "epoch": 0.9975718987751578, "grad_norm": 0.4164784550666809, "learning_rate": 3.092132548376814e-10, "loss": 0.2845064401626587, "step": 18488, "token_acc": 0.8956898750318796 }, { "epoch": 0.9976258565801543, "grad_norm": 0.3818829655647278, "learning_rate": 2.9562320837395186e-10, "loss": 0.25681042671203613, "step": 18489, "token_acc": 0.906503524404841 }, { "epoch": 0.9976798143851509, "grad_norm": 0.43505990505218506, "learning_rate": 2.823385502637166e-10, "loss": 0.33595016598701477, "step": 18490, "token_acc": 0.8767066035107273 }, { "epoch": 0.9977337721901473, "grad_norm": 0.41170620918273926, "learning_rate": 2.693592809110968e-10, "loss": 0.33945733308792114, "step": 18491, "token_acc": 0.8781453867660765 }, { "epoch": 0.9977877299951438, "grad_norm": 0.40788033604621887, "learning_rate": 2.56685400712442e-10, "loss": 0.2835373282432556, "step": 18492, "token_acc": 0.8955439814814815 }, { "epoch": 0.9978416878001403, "grad_norm": 0.391826331615448, "learning_rate": 2.4431691005410984e-10, "loss": 0.30698707699775696, "step": 18493, "token_acc": 0.8841329570398244 }, { "epoch": 0.9978956456051368, "grad_norm": 0.4326017498970032, "learning_rate": 2.3225380931579667e-10, "loss": 0.3176504373550415, "step": 18494, "token_acc": 0.8825269752871563 }, { "epoch": 0.9979496034101333, "grad_norm": 0.4337274730205536, "learning_rate": 2.2049609886498625e-10, "loss": 0.3448692560195923, "step": 18495, "token_acc": 0.8765928906773978 }, { "epoch": 0.9980035612151298, "grad_norm": 0.3620390295982361, "learning_rate": 2.090437790602806e-10, "loss": 0.3674277663230896, "step": 18496, "token_acc": 0.8715975651774435 }, { "epoch": 0.9980575190201263, "grad_norm": 0.5006697773933411, "learning_rate": 1.9789685025140005e-10, "loss": 0.30468177795410156, "step": 18497, "token_acc": 0.8928507755393118 }, { "epoch": 0.9981114768251228, "grad_norm": 0.38812056183815, "learning_rate": 1.870553127802932e-10, "loss": 0.32277268171310425, "step": 18498, "token_acc": 0.8826380915933155 }, { "epoch": 0.9981654346301192, "grad_norm": 0.47435376048088074, "learning_rate": 1.7651916697558614e-10, "loss": 0.32076844573020935, "step": 18499, "token_acc": 0.8817089858598145 }, { "epoch": 0.9982193924351157, "grad_norm": 0.4582131803035736, "learning_rate": 1.6628841316146393e-10, "loss": 0.3213859796524048, "step": 18500, "token_acc": 0.8883934791695695 }, { "epoch": 0.9982733502401122, "grad_norm": 0.3974860608577728, "learning_rate": 1.5636305164878906e-10, "loss": 0.29686617851257324, "step": 18501, "token_acc": 0.8974683544303798 }, { "epoch": 0.9983273080451087, "grad_norm": 0.4804874360561371, "learning_rate": 1.467430827406524e-10, "loss": 0.36579006910324097, "step": 18502, "token_acc": 0.8719800747198008 }, { "epoch": 0.9983812658501052, "grad_norm": 0.4669334590435028, "learning_rate": 1.374285067323733e-10, "loss": 0.2895624339580536, "step": 18503, "token_acc": 0.8906343209385563 }, { "epoch": 0.9984352236551017, "grad_norm": 0.5514506697654724, "learning_rate": 1.2841932390705859e-10, "loss": 0.35361814498901367, "step": 18504, "token_acc": 0.8721389108129439 }, { "epoch": 0.9984891814600982, "grad_norm": 0.47677671909332275, "learning_rate": 1.197155345400436e-10, "loss": 0.31254494190216064, "step": 18505, "token_acc": 0.8840619432643552 }, { "epoch": 0.9985431392650947, "grad_norm": 0.38986989855766296, "learning_rate": 1.1131713889778183e-10, "loss": 0.31391221284866333, "step": 18506, "token_acc": 0.885709360455094 }, { "epoch": 0.9985970970700911, "grad_norm": 0.5007978677749634, "learning_rate": 1.0322413723562463e-10, "loss": 0.3566787838935852, "step": 18507, "token_acc": 0.8736462093862816 }, { "epoch": 0.9986510548750877, "grad_norm": 0.44123244285583496, "learning_rate": 9.54365298011517e-11, "loss": 0.31007230281829834, "step": 18508, "token_acc": 0.886104149262044 }, { "epoch": 0.9987050126800842, "grad_norm": 0.41072750091552734, "learning_rate": 8.795431683306099e-11, "loss": 0.34472352266311646, "step": 18509, "token_acc": 0.8788341606211022 }, { "epoch": 0.9987589704850807, "grad_norm": 0.4027498960494995, "learning_rate": 8.077749856005845e-11, "loss": 0.2760802209377289, "step": 18510, "token_acc": 0.8975501113585747 }, { "epoch": 0.9988129282900772, "grad_norm": 0.42190077900886536, "learning_rate": 7.390607519863757e-11, "loss": 0.3257083296775818, "step": 18511, "token_acc": 0.8822728398118346 }, { "epoch": 0.9988668860950737, "grad_norm": 0.3741764426231384, "learning_rate": 6.734004696196117e-11, "loss": 0.34492939710617065, "step": 18512, "token_acc": 0.881941796924641 }, { "epoch": 0.9989208439000702, "grad_norm": 0.5011943578720093, "learning_rate": 6.107941404875916e-11, "loss": 0.311834454536438, "step": 18513, "token_acc": 0.8818424566088118 }, { "epoch": 0.9989748017050666, "grad_norm": 0.5348672270774841, "learning_rate": 5.5124176649989924e-11, "loss": 0.386283278465271, "step": 18514, "token_acc": 0.8626934984520124 }, { "epoch": 0.9990287595100631, "grad_norm": 0.42413246631622314, "learning_rate": 4.947433494884024e-11, "loss": 0.34565746784210205, "step": 18515, "token_acc": 0.8742978404693547 }, { "epoch": 0.9990827173150596, "grad_norm": 0.44746556878089905, "learning_rate": 4.4129889116284463e-11, "loss": 0.3370015621185303, "step": 18516, "token_acc": 0.8813967489464178 }, { "epoch": 0.9991366751200561, "grad_norm": 0.4011741578578949, "learning_rate": 3.90908393166356e-11, "loss": 0.32877522706985474, "step": 18517, "token_acc": 0.8885881179655222 }, { "epoch": 0.9991906329250526, "grad_norm": 0.42770102620124817, "learning_rate": 3.4357185704214646e-11, "loss": 0.32134488224983215, "step": 18518, "token_acc": 0.8867419738406659 }, { "epoch": 0.9992445907300491, "grad_norm": 0.39545130729675293, "learning_rate": 2.9928928422240375e-11, "loss": 0.28353065252304077, "step": 18519, "token_acc": 0.8975751954418975 }, { "epoch": 0.9992985485350456, "grad_norm": 0.6539737582206726, "learning_rate": 2.5806067606160002e-11, "loss": 0.33537977933883667, "step": 18520, "token_acc": 0.8783068783068783 }, { "epoch": 0.9993525063400421, "grad_norm": 0.45558398962020874, "learning_rate": 2.1988603383649164e-11, "loss": 0.30668261647224426, "step": 18521, "token_acc": 0.8844297338298166 }, { "epoch": 0.9994064641450385, "grad_norm": 0.41726019978523254, "learning_rate": 1.847653586906084e-11, "loss": 0.342326819896698, "step": 18522, "token_acc": 0.8765686072824521 }, { "epoch": 0.999460421950035, "grad_norm": 0.44247984886169434, "learning_rate": 1.5269865170086663e-11, "loss": 0.3790825307369232, "step": 18523, "token_acc": 0.8702557755775577 }, { "epoch": 0.9995143797550315, "grad_norm": 0.5760031938552856, "learning_rate": 1.23685913866467e-11, "loss": 0.34628990292549133, "step": 18524, "token_acc": 0.8804313352362829 }, { "epoch": 0.999568337560028, "grad_norm": 0.38422560691833496, "learning_rate": 9.772714604228128e-12, "loss": 0.36530131101608276, "step": 18525, "token_acc": 0.8723155432702531 }, { "epoch": 0.9996222953650246, "grad_norm": 0.43962401151657104, "learning_rate": 7.48223490498745e-12, "loss": 0.3022114634513855, "step": 18526, "token_acc": 0.8863787375415283 }, { "epoch": 0.9996762531700211, "grad_norm": 0.3946301341056824, "learning_rate": 5.497152356648272e-12, "loss": 0.2985163927078247, "step": 18527, "token_acc": 0.8920270270270271 }, { "epoch": 0.9997302109750176, "grad_norm": 0.36417120695114136, "learning_rate": 3.817467021383081e-12, "loss": 0.3219412863254547, "step": 18528, "token_acc": 0.8827210884353741 }, { "epoch": 0.9997841687800141, "grad_norm": 0.3696037828922272, "learning_rate": 2.4431789491519144e-12, "loss": 0.31055086851119995, "step": 18529, "token_acc": 0.8923146678757651 }, { "epoch": 0.9998381265850105, "grad_norm": 0.46089738607406616, "learning_rate": 1.3742881832534694e-12, "loss": 0.33190464973449707, "step": 18530, "token_acc": 0.8817963920777091 }, { "epoch": 0.999892084390007, "grad_norm": 0.4277398884296417, "learning_rate": 6.107947558842142e-13, "loss": 0.34671613574028015, "step": 18531, "token_acc": 0.8807704066034852 }, { "epoch": 0.9999460421950035, "grad_norm": 0.3973446488380432, "learning_rate": 1.5269869035883234e-13, "loss": 0.32545769214630127, "step": 18532, "token_acc": 0.8834376396960214 }, { "epoch": 1.0, "grad_norm": 0.39181551337242126, "learning_rate": 0.0, "loss": 0.3369232416152954, "step": 18533, "token_acc": 0.8757199151257957 } ], "logging_steps": 1, "max_steps": 18533, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.1330578619425096e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }