{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 9747, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006155740227762388, "grad_norm": 5.71875, "learning_rate": 1.0238907849829352e-08, "loss": 1.2493510246276855, "step": 2 }, { "epoch": 0.0012311480455524776, "grad_norm": 11.4375, "learning_rate": 3.071672354948805e-08, "loss": 1.9457250833511353, "step": 4 }, { "epoch": 0.0018467220683287165, "grad_norm": 15.25, "learning_rate": 5.119453924914676e-08, "loss": 1.6203389167785645, "step": 6 }, { "epoch": 0.0024622960911049553, "grad_norm": 16.625, "learning_rate": 7.167235494880547e-08, "loss": 1.9556751251220703, "step": 8 }, { "epoch": 0.0030778701138811943, "grad_norm": 16.375, "learning_rate": 9.215017064846416e-08, "loss": 2.2844934463500977, "step": 10 }, { "epoch": 0.003693444136657433, "grad_norm": 14.5, "learning_rate": 1.1262798634812287e-07, "loss": 1.4806036949157715, "step": 12 }, { "epoch": 0.0043090181594336715, "grad_norm": 38.75, "learning_rate": 1.3310580204778158e-07, "loss": 2.605030059814453, "step": 14 }, { "epoch": 0.0049245921822099106, "grad_norm": 24.625, "learning_rate": 1.5358361774744026e-07, "loss": 1.8370442390441895, "step": 16 }, { "epoch": 0.00554016620498615, "grad_norm": 16.875, "learning_rate": 1.7406143344709898e-07, "loss": 1.6506725549697876, "step": 18 }, { "epoch": 0.006155740227762389, "grad_norm": 11.8125, "learning_rate": 1.9453924914675767e-07, "loss": 1.5109401941299438, "step": 20 }, { "epoch": 0.006771314250538628, "grad_norm": 6.34375, "learning_rate": 2.1501706484641638e-07, "loss": 1.8124639987945557, "step": 22 }, { "epoch": 0.007386888273314866, "grad_norm": 18.375, "learning_rate": 2.354948805460751e-07, "loss": 1.8528108596801758, "step": 24 }, { "epoch": 0.008002462296091105, "grad_norm": 7.40625, "learning_rate": 2.559726962457338e-07, "loss": 1.7236714363098145, "step": 26 }, { "epoch": 0.008618036318867343, "grad_norm": 26.625, "learning_rate": 2.764505119453925e-07, "loss": 1.6073427200317383, "step": 28 }, { "epoch": 0.009233610341643583, "grad_norm": 7.03125, "learning_rate": 2.969283276450512e-07, "loss": 1.8255822658538818, "step": 30 }, { "epoch": 0.009849184364419821, "grad_norm": 14.6875, "learning_rate": 3.174061433447099e-07, "loss": 1.890990138053894, "step": 32 }, { "epoch": 0.010464758387196061, "grad_norm": 13.1875, "learning_rate": 3.378839590443686e-07, "loss": 2.264435291290283, "step": 34 }, { "epoch": 0.0110803324099723, "grad_norm": 6.4375, "learning_rate": 3.583617747440273e-07, "loss": 1.5108485221862793, "step": 36 }, { "epoch": 0.011695906432748537, "grad_norm": 40.0, "learning_rate": 3.78839590443686e-07, "loss": 1.935383677482605, "step": 38 }, { "epoch": 0.012311480455524777, "grad_norm": 5.6875, "learning_rate": 3.9931740614334473e-07, "loss": 1.2184486389160156, "step": 40 }, { "epoch": 0.012927054478301015, "grad_norm": 10.5625, "learning_rate": 4.197952218430034e-07, "loss": 1.2659871578216553, "step": 42 }, { "epoch": 0.013542628501077255, "grad_norm": 18.625, "learning_rate": 4.402730375426621e-07, "loss": 2.0540785789489746, "step": 44 }, { "epoch": 0.014158202523853494, "grad_norm": 12.375, "learning_rate": 4.6075085324232084e-07, "loss": 1.4828977584838867, "step": 46 }, { "epoch": 0.014773776546629732, "grad_norm": 27.625, "learning_rate": 4.812286689419796e-07, "loss": 1.7720004320144653, "step": 48 }, { "epoch": 0.015389350569405972, "grad_norm": 5.34375, "learning_rate": 5.017064846416382e-07, "loss": 1.3994762897491455, "step": 50 }, { "epoch": 0.01600492459218221, "grad_norm": 22.625, "learning_rate": 5.22184300341297e-07, "loss": 1.7023072242736816, "step": 52 }, { "epoch": 0.01662049861495845, "grad_norm": 55.25, "learning_rate": 5.426621160409556e-07, "loss": 2.0461301803588867, "step": 54 }, { "epoch": 0.017236072637734686, "grad_norm": 22.375, "learning_rate": 5.631399317406143e-07, "loss": 1.5622308254241943, "step": 56 }, { "epoch": 0.017851646660510926, "grad_norm": 10.8125, "learning_rate": 5.83617747440273e-07, "loss": 2.0164151191711426, "step": 58 }, { "epoch": 0.018467220683287166, "grad_norm": 6.375, "learning_rate": 6.040955631399317e-07, "loss": 1.8669424057006836, "step": 60 }, { "epoch": 0.019082794706063406, "grad_norm": 29.125, "learning_rate": 6.245733788395904e-07, "loss": 1.5571472644805908, "step": 62 }, { "epoch": 0.019698368728839642, "grad_norm": 4.6875, "learning_rate": 6.450511945392492e-07, "loss": 1.204810619354248, "step": 64 }, { "epoch": 0.020313942751615882, "grad_norm": 28.25, "learning_rate": 6.655290102389079e-07, "loss": 1.7420827150344849, "step": 66 }, { "epoch": 0.020929516774392122, "grad_norm": 7.1875, "learning_rate": 6.860068259385666e-07, "loss": 1.3108768463134766, "step": 68 }, { "epoch": 0.02154509079716836, "grad_norm": 24.625, "learning_rate": 7.064846416382253e-07, "loss": 2.4043214321136475, "step": 70 }, { "epoch": 0.0221606648199446, "grad_norm": 13.4375, "learning_rate": 7.269624573378839e-07, "loss": 1.9473189115524292, "step": 72 }, { "epoch": 0.02277623884272084, "grad_norm": 12.6875, "learning_rate": 7.474402730375426e-07, "loss": 1.7476930618286133, "step": 74 }, { "epoch": 0.023391812865497075, "grad_norm": 39.75, "learning_rate": 7.679180887372013e-07, "loss": 2.1216416358947754, "step": 76 }, { "epoch": 0.024007386888273315, "grad_norm": 41.0, "learning_rate": 7.883959044368601e-07, "loss": 2.0714941024780273, "step": 78 }, { "epoch": 0.024622960911049555, "grad_norm": 10.8125, "learning_rate": 8.088737201365188e-07, "loss": 1.2925407886505127, "step": 80 }, { "epoch": 0.02523853493382579, "grad_norm": 11.1875, "learning_rate": 8.293515358361775e-07, "loss": 1.6049776077270508, "step": 82 }, { "epoch": 0.02585410895660203, "grad_norm": 37.5, "learning_rate": 8.498293515358362e-07, "loss": 1.492782473564148, "step": 84 }, { "epoch": 0.02646968297937827, "grad_norm": 12.625, "learning_rate": 8.703071672354949e-07, "loss": 1.536354899406433, "step": 86 }, { "epoch": 0.02708525700215451, "grad_norm": 26.125, "learning_rate": 8.907849829351535e-07, "loss": 1.6706409454345703, "step": 88 }, { "epoch": 0.027700831024930747, "grad_norm": 23.0, "learning_rate": 9.112627986348123e-07, "loss": 2.148848533630371, "step": 90 }, { "epoch": 0.028316405047706987, "grad_norm": 20.75, "learning_rate": 9.31740614334471e-07, "loss": 1.4585912227630615, "step": 92 }, { "epoch": 0.028931979070483227, "grad_norm": 8.9375, "learning_rate": 9.522184300341297e-07, "loss": 1.9553436040878296, "step": 94 }, { "epoch": 0.029547553093259463, "grad_norm": 8.1875, "learning_rate": 9.726962457337883e-07, "loss": 1.6098637580871582, "step": 96 }, { "epoch": 0.030163127116035703, "grad_norm": 15.5625, "learning_rate": 9.93174061433447e-07, "loss": 1.8004854917526245, "step": 98 }, { "epoch": 0.030778701138811943, "grad_norm": 9.25, "learning_rate": 1.0136518771331057e-06, "loss": 1.1443376541137695, "step": 100 }, { "epoch": 0.03139427516158818, "grad_norm": 9.75, "learning_rate": 1.0341296928327646e-06, "loss": 1.6315455436706543, "step": 102 }, { "epoch": 0.03200984918436442, "grad_norm": 10.75, "learning_rate": 1.0546075085324233e-06, "loss": 1.6523348093032837, "step": 104 }, { "epoch": 0.03262542320714066, "grad_norm": 15.75, "learning_rate": 1.075085324232082e-06, "loss": 1.861860752105713, "step": 106 }, { "epoch": 0.0332409972299169, "grad_norm": 13.6875, "learning_rate": 1.0955631399317406e-06, "loss": 1.5432401895523071, "step": 108 }, { "epoch": 0.03385657125269314, "grad_norm": 10.4375, "learning_rate": 1.1160409556313993e-06, "loss": 1.820489525794983, "step": 110 }, { "epoch": 0.03447214527546937, "grad_norm": 34.75, "learning_rate": 1.136518771331058e-06, "loss": 2.099825143814087, "step": 112 }, { "epoch": 0.03508771929824561, "grad_norm": 18.875, "learning_rate": 1.156996587030717e-06, "loss": 2.152771472930908, "step": 114 }, { "epoch": 0.03570329332102185, "grad_norm": 6.5625, "learning_rate": 1.1774744027303754e-06, "loss": 1.5281484127044678, "step": 116 }, { "epoch": 0.03631886734379809, "grad_norm": 34.75, "learning_rate": 1.197952218430034e-06, "loss": 1.9934353828430176, "step": 118 }, { "epoch": 0.03693444136657433, "grad_norm": 10.625, "learning_rate": 1.2184300341296928e-06, "loss": 1.680020809173584, "step": 120 }, { "epoch": 0.03755001538935057, "grad_norm": 11.0, "learning_rate": 1.2389078498293515e-06, "loss": 1.525346279144287, "step": 122 }, { "epoch": 0.03816558941212681, "grad_norm": 5.625, "learning_rate": 1.2593856655290101e-06, "loss": 1.124660849571228, "step": 124 }, { "epoch": 0.038781163434903045, "grad_norm": 10.4375, "learning_rate": 1.279863481228669e-06, "loss": 1.4922833442687988, "step": 126 }, { "epoch": 0.039396737457679284, "grad_norm": 51.5, "learning_rate": 1.3003412969283277e-06, "loss": 1.9254200458526611, "step": 128 }, { "epoch": 0.040012311480455524, "grad_norm": 13.125, "learning_rate": 1.3208191126279864e-06, "loss": 1.4378312826156616, "step": 130 }, { "epoch": 0.040627885503231764, "grad_norm": 13.625, "learning_rate": 1.341296928327645e-06, "loss": 1.788328766822815, "step": 132 }, { "epoch": 0.041243459526008004, "grad_norm": 14.875, "learning_rate": 1.3617747440273038e-06, "loss": 1.807824969291687, "step": 134 }, { "epoch": 0.041859033548784244, "grad_norm": 6.8125, "learning_rate": 1.3822525597269625e-06, "loss": 0.9935953617095947, "step": 136 }, { "epoch": 0.04247460757156048, "grad_norm": 25.125, "learning_rate": 1.4027303754266212e-06, "loss": 1.6776387691497803, "step": 138 }, { "epoch": 0.04309018159433672, "grad_norm": 26.75, "learning_rate": 1.4232081911262799e-06, "loss": 1.9442132711410522, "step": 140 }, { "epoch": 0.04370575561711296, "grad_norm": 18.875, "learning_rate": 1.4436860068259385e-06, "loss": 1.4468404054641724, "step": 142 }, { "epoch": 0.0443213296398892, "grad_norm": 10.0625, "learning_rate": 1.4641638225255972e-06, "loss": 1.5079838037490845, "step": 144 }, { "epoch": 0.04493690366266544, "grad_norm": 15.375, "learning_rate": 1.484641638225256e-06, "loss": 1.6095199584960938, "step": 146 }, { "epoch": 0.04555247768544168, "grad_norm": 11.0625, "learning_rate": 1.5051194539249148e-06, "loss": 2.0582830905914307, "step": 148 }, { "epoch": 0.046168051708217916, "grad_norm": 12.5625, "learning_rate": 1.5255972696245735e-06, "loss": 1.5852106809616089, "step": 150 }, { "epoch": 0.04678362573099415, "grad_norm": 31.25, "learning_rate": 1.5460750853242322e-06, "loss": 1.6878998279571533, "step": 152 }, { "epoch": 0.04739919975377039, "grad_norm": 10.375, "learning_rate": 1.5665529010238909e-06, "loss": 1.7584779262542725, "step": 154 }, { "epoch": 0.04801477377654663, "grad_norm": 15.0625, "learning_rate": 1.5870307167235496e-06, "loss": 1.3234977722167969, "step": 156 }, { "epoch": 0.04863034779932287, "grad_norm": 5.46875, "learning_rate": 1.6075085324232083e-06, "loss": 1.4745519161224365, "step": 158 }, { "epoch": 0.04924592182209911, "grad_norm": 24.0, "learning_rate": 1.627986348122867e-06, "loss": 1.8369395732879639, "step": 160 }, { "epoch": 0.04986149584487535, "grad_norm": 7.75, "learning_rate": 1.6484641638225254e-06, "loss": 1.3853015899658203, "step": 162 }, { "epoch": 0.05047706986765158, "grad_norm": 14.0625, "learning_rate": 1.668941979522184e-06, "loss": 1.332862377166748, "step": 164 }, { "epoch": 0.05109264389042782, "grad_norm": 38.75, "learning_rate": 1.6894197952218432e-06, "loss": 1.777852177619934, "step": 166 }, { "epoch": 0.05170821791320406, "grad_norm": 5.53125, "learning_rate": 1.709897610921502e-06, "loss": 1.8864566087722778, "step": 168 }, { "epoch": 0.0523237919359803, "grad_norm": 13.6875, "learning_rate": 1.7303754266211606e-06, "loss": 1.484859585762024, "step": 170 }, { "epoch": 0.05293936595875654, "grad_norm": 55.0, "learning_rate": 1.7508532423208193e-06, "loss": 1.9417674541473389, "step": 172 }, { "epoch": 0.05355493998153278, "grad_norm": 20.5, "learning_rate": 1.771331058020478e-06, "loss": 1.4582123756408691, "step": 174 }, { "epoch": 0.05417051400430902, "grad_norm": 14.75, "learning_rate": 1.7918088737201367e-06, "loss": 1.568121314048767, "step": 176 }, { "epoch": 0.054786088027085254, "grad_norm": 17.25, "learning_rate": 1.8122866894197953e-06, "loss": 1.8054301738739014, "step": 178 }, { "epoch": 0.055401662049861494, "grad_norm": 17.5, "learning_rate": 1.832764505119454e-06, "loss": 1.3522915840148926, "step": 180 }, { "epoch": 0.056017236072637734, "grad_norm": 16.625, "learning_rate": 1.8532423208191125e-06, "loss": 1.4976370334625244, "step": 182 }, { "epoch": 0.056632810095413974, "grad_norm": 20.875, "learning_rate": 1.8737201365187712e-06, "loss": 1.0564684867858887, "step": 184 }, { "epoch": 0.057248384118190214, "grad_norm": 21.625, "learning_rate": 1.8941979522184299e-06, "loss": 1.8197346925735474, "step": 186 }, { "epoch": 0.057863958140966454, "grad_norm": 20.75, "learning_rate": 1.9146757679180888e-06, "loss": 1.5595773458480835, "step": 188 }, { "epoch": 0.05847953216374269, "grad_norm": 17.625, "learning_rate": 1.9351535836177475e-06, "loss": 1.7840571403503418, "step": 190 }, { "epoch": 0.05909510618651893, "grad_norm": 8.5625, "learning_rate": 1.955631399317406e-06, "loss": 1.4016996622085571, "step": 192 }, { "epoch": 0.05971068020929517, "grad_norm": 6.53125, "learning_rate": 1.976109215017065e-06, "loss": 1.2504699230194092, "step": 194 }, { "epoch": 0.060326254232071407, "grad_norm": 8.6875, "learning_rate": 1.9965870307167235e-06, "loss": 1.5754780769348145, "step": 196 }, { "epoch": 0.060941828254847646, "grad_norm": 13.125, "learning_rate": 2.0170648464163822e-06, "loss": 1.3269094228744507, "step": 198 }, { "epoch": 0.061557402277623886, "grad_norm": 5.1875, "learning_rate": 2.037542662116041e-06, "loss": 1.445792555809021, "step": 200 }, { "epoch": 0.062172976300400126, "grad_norm": 26.375, "learning_rate": 2.0580204778156996e-06, "loss": 1.6902906894683838, "step": 202 }, { "epoch": 0.06278855032317636, "grad_norm": 8.1875, "learning_rate": 2.0784982935153583e-06, "loss": 1.0760879516601562, "step": 204 }, { "epoch": 0.0634041243459526, "grad_norm": 15.5625, "learning_rate": 2.098976109215017e-06, "loss": 1.7913198471069336, "step": 206 }, { "epoch": 0.06401969836872884, "grad_norm": 15.9375, "learning_rate": 2.1194539249146757e-06, "loss": 1.7480218410491943, "step": 208 }, { "epoch": 0.06463527239150507, "grad_norm": 12.6875, "learning_rate": 2.1399317406143343e-06, "loss": 1.8400408029556274, "step": 210 }, { "epoch": 0.06525084641428132, "grad_norm": 23.75, "learning_rate": 2.1604095563139935e-06, "loss": 1.8554953336715698, "step": 212 }, { "epoch": 0.06586642043705755, "grad_norm": 102.0, "learning_rate": 2.180887372013652e-06, "loss": 1.7532259225845337, "step": 214 }, { "epoch": 0.0664819944598338, "grad_norm": 16.875, "learning_rate": 2.201365187713311e-06, "loss": 1.4390134811401367, "step": 216 }, { "epoch": 0.06709756848261003, "grad_norm": 14.875, "learning_rate": 2.2218430034129695e-06, "loss": 1.714995265007019, "step": 218 }, { "epoch": 0.06771314250538628, "grad_norm": 19.125, "learning_rate": 2.242320819112628e-06, "loss": 1.4827989339828491, "step": 220 }, { "epoch": 0.06832871652816251, "grad_norm": 3.609375, "learning_rate": 2.262798634812287e-06, "loss": 1.4392755031585693, "step": 222 }, { "epoch": 0.06894429055093874, "grad_norm": 8.125, "learning_rate": 2.2832764505119456e-06, "loss": 1.1972272396087646, "step": 224 }, { "epoch": 0.06955986457371499, "grad_norm": 5.40625, "learning_rate": 2.3037542662116043e-06, "loss": 1.4912139177322388, "step": 226 }, { "epoch": 0.07017543859649122, "grad_norm": 17.0, "learning_rate": 2.324232081911263e-06, "loss": 1.4952316284179688, "step": 228 }, { "epoch": 0.07079101261926747, "grad_norm": 32.5, "learning_rate": 2.3447098976109216e-06, "loss": 1.7515941858291626, "step": 230 }, { "epoch": 0.0714065866420437, "grad_norm": 28.625, "learning_rate": 2.3651877133105803e-06, "loss": 1.332948088645935, "step": 232 }, { "epoch": 0.07202216066481995, "grad_norm": 15.25, "learning_rate": 2.385665529010239e-06, "loss": 1.3763008117675781, "step": 234 }, { "epoch": 0.07263773468759618, "grad_norm": 6.0625, "learning_rate": 2.4061433447098977e-06, "loss": 1.3950610160827637, "step": 236 }, { "epoch": 0.07325330871037242, "grad_norm": 14.9375, "learning_rate": 2.4266211604095564e-06, "loss": 1.6538128852844238, "step": 238 }, { "epoch": 0.07386888273314866, "grad_norm": 10.9375, "learning_rate": 2.447098976109215e-06, "loss": 1.3810994625091553, "step": 240 }, { "epoch": 0.0744844567559249, "grad_norm": 10.625, "learning_rate": 2.4675767918088738e-06, "loss": 1.2243789434432983, "step": 242 }, { "epoch": 0.07510003077870114, "grad_norm": 7.125, "learning_rate": 2.4880546075085325e-06, "loss": 1.4241597652435303, "step": 244 }, { "epoch": 0.07571560480147738, "grad_norm": 20.375, "learning_rate": 2.508532423208191e-06, "loss": 1.6715261936187744, "step": 246 }, { "epoch": 0.07633117882425362, "grad_norm": 13.375, "learning_rate": 2.52901023890785e-06, "loss": 1.5806057453155518, "step": 248 }, { "epoch": 0.07694675284702986, "grad_norm": 4.71875, "learning_rate": 2.5494880546075085e-06, "loss": 1.0935168266296387, "step": 250 }, { "epoch": 0.07756232686980609, "grad_norm": 10.8125, "learning_rate": 2.569965870307167e-06, "loss": 1.5497374534606934, "step": 252 }, { "epoch": 0.07817790089258234, "grad_norm": 10.0, "learning_rate": 2.590443686006826e-06, "loss": 1.7406679391860962, "step": 254 }, { "epoch": 0.07879347491535857, "grad_norm": 23.375, "learning_rate": 2.6109215017064846e-06, "loss": 1.9079008102416992, "step": 256 }, { "epoch": 0.07940904893813482, "grad_norm": 17.125, "learning_rate": 2.6313993174061433e-06, "loss": 1.521014928817749, "step": 258 }, { "epoch": 0.08002462296091105, "grad_norm": 21.75, "learning_rate": 2.6518771331058024e-06, "loss": 1.6001818180084229, "step": 260 }, { "epoch": 0.08064019698368728, "grad_norm": 26.125, "learning_rate": 2.672354948805461e-06, "loss": 1.6336616277694702, "step": 262 }, { "epoch": 0.08125577100646353, "grad_norm": 13.625, "learning_rate": 2.6928327645051198e-06, "loss": 1.6789780855178833, "step": 264 }, { "epoch": 0.08187134502923976, "grad_norm": 9.625, "learning_rate": 2.7133105802047784e-06, "loss": 1.3298799991607666, "step": 266 }, { "epoch": 0.08248691905201601, "grad_norm": 10.6875, "learning_rate": 2.733788395904437e-06, "loss": 1.4433543682098389, "step": 268 }, { "epoch": 0.08310249307479224, "grad_norm": 3.765625, "learning_rate": 2.754266211604096e-06, "loss": 1.3586629629135132, "step": 270 }, { "epoch": 0.08371806709756849, "grad_norm": 12.5, "learning_rate": 2.7747440273037545e-06, "loss": 1.6898560523986816, "step": 272 }, { "epoch": 0.08433364112034472, "grad_norm": 9.9375, "learning_rate": 2.795221843003413e-06, "loss": 1.2337279319763184, "step": 274 }, { "epoch": 0.08494921514312095, "grad_norm": 7.125, "learning_rate": 2.8156996587030715e-06, "loss": 1.5503768920898438, "step": 276 }, { "epoch": 0.0855647891658972, "grad_norm": 26.875, "learning_rate": 2.83617747440273e-06, "loss": 1.4721266031265259, "step": 278 }, { "epoch": 0.08618036318867343, "grad_norm": 20.0, "learning_rate": 2.856655290102389e-06, "loss": 1.5436632633209229, "step": 280 }, { "epoch": 0.08679593721144968, "grad_norm": 10.0, "learning_rate": 2.8771331058020475e-06, "loss": 1.6570696830749512, "step": 282 }, { "epoch": 0.08741151123422591, "grad_norm": 37.0, "learning_rate": 2.8976109215017066e-06, "loss": 1.4481430053710938, "step": 284 }, { "epoch": 0.08802708525700216, "grad_norm": 22.75, "learning_rate": 2.9180887372013653e-06, "loss": 2.1153812408447266, "step": 286 }, { "epoch": 0.0886426592797784, "grad_norm": 16.5, "learning_rate": 2.938566552901024e-06, "loss": 1.0929564237594604, "step": 288 }, { "epoch": 0.08925823330255463, "grad_norm": 13.25, "learning_rate": 2.9590443686006827e-06, "loss": 1.6294186115264893, "step": 290 }, { "epoch": 0.08987380732533087, "grad_norm": 14.125, "learning_rate": 2.9795221843003414e-06, "loss": 1.1360838413238525, "step": 292 }, { "epoch": 0.0904893813481071, "grad_norm": 12.3125, "learning_rate": 3e-06, "loss": 1.477271318435669, "step": 294 }, { "epoch": 0.09110495537088335, "grad_norm": 8.6875, "learning_rate": 2.9999997349793134e-06, "loss": 1.6074985265731812, "step": 296 }, { "epoch": 0.09172052939365959, "grad_norm": 18.125, "learning_rate": 2.99999893991737e-06, "loss": 1.2198758125305176, "step": 298 }, { "epoch": 0.09233610341643583, "grad_norm": 15.375, "learning_rate": 2.999997614814521e-06, "loss": 1.467660903930664, "step": 300 }, { "epoch": 0.09295167743921207, "grad_norm": 10.8125, "learning_rate": 2.999995759671352e-06, "loss": 1.0507367849349976, "step": 302 }, { "epoch": 0.0935672514619883, "grad_norm": 27.375, "learning_rate": 2.9999933744886825e-06, "loss": 1.9891753196716309, "step": 304 }, { "epoch": 0.09418282548476455, "grad_norm": 30.125, "learning_rate": 2.9999904592675653e-06, "loss": 1.7953660488128662, "step": 306 }, { "epoch": 0.09479839950754078, "grad_norm": 3.65625, "learning_rate": 2.9999870140092888e-06, "loss": 0.8793841600418091, "step": 308 }, { "epoch": 0.09541397353031703, "grad_norm": 42.75, "learning_rate": 2.9999830387153745e-06, "loss": 2.1185731887817383, "step": 310 }, { "epoch": 0.09602954755309326, "grad_norm": 15.375, "learning_rate": 2.9999785333875786e-06, "loss": 1.90293288230896, "step": 312 }, { "epoch": 0.09664512157586949, "grad_norm": 19.75, "learning_rate": 2.9999734980278905e-06, "loss": 1.8595056533813477, "step": 314 }, { "epoch": 0.09726069559864574, "grad_norm": 6.125, "learning_rate": 2.9999679326385347e-06, "loss": 0.754072368144989, "step": 316 }, { "epoch": 0.09787626962142197, "grad_norm": 4.65625, "learning_rate": 2.9999618372219697e-06, "loss": 1.2867162227630615, "step": 318 }, { "epoch": 0.09849184364419822, "grad_norm": 18.125, "learning_rate": 2.9999552117808872e-06, "loss": 1.009380578994751, "step": 320 }, { "epoch": 0.09910741766697445, "grad_norm": 3.28125, "learning_rate": 2.9999480563182143e-06, "loss": 1.360229253768921, "step": 322 }, { "epoch": 0.0997229916897507, "grad_norm": 52.25, "learning_rate": 2.999940370837111e-06, "loss": 1.5169321298599243, "step": 324 }, { "epoch": 0.10033856571252693, "grad_norm": 18.875, "learning_rate": 2.999932155340973e-06, "loss": 1.471400499343872, "step": 326 }, { "epoch": 0.10095413973530316, "grad_norm": 14.1875, "learning_rate": 2.999923409833428e-06, "loss": 1.649963617324829, "step": 328 }, { "epoch": 0.10156971375807941, "grad_norm": 16.0, "learning_rate": 2.9999141343183392e-06, "loss": 1.264378547668457, "step": 330 }, { "epoch": 0.10218528778085564, "grad_norm": 15.1875, "learning_rate": 2.9999043287998035e-06, "loss": 1.2814195156097412, "step": 332 }, { "epoch": 0.10280086180363189, "grad_norm": 10.875, "learning_rate": 2.999893993282153e-06, "loss": 1.59095299243927, "step": 334 }, { "epoch": 0.10341643582640812, "grad_norm": 3.3125, "learning_rate": 2.999883127769951e-06, "loss": 1.2800519466400146, "step": 336 }, { "epoch": 0.10403200984918437, "grad_norm": 12.375, "learning_rate": 2.999871732267998e-06, "loss": 1.1677653789520264, "step": 338 }, { "epoch": 0.1046475838719606, "grad_norm": 19.0, "learning_rate": 2.999859806781328e-06, "loss": 1.6544604301452637, "step": 340 }, { "epoch": 0.10526315789473684, "grad_norm": 10.875, "learning_rate": 2.9998473513152085e-06, "loss": 1.2596807479858398, "step": 342 }, { "epoch": 0.10587873191751308, "grad_norm": 37.75, "learning_rate": 2.999834365875139e-06, "loss": 1.5575487613677979, "step": 344 }, { "epoch": 0.10649430594028932, "grad_norm": 5.71875, "learning_rate": 2.999820850466857e-06, "loss": 1.3673763275146484, "step": 346 }, { "epoch": 0.10710987996306556, "grad_norm": 8.0625, "learning_rate": 2.999806805096332e-06, "loss": 1.4206829071044922, "step": 348 }, { "epoch": 0.1077254539858418, "grad_norm": 12.625, "learning_rate": 2.9997922297697676e-06, "loss": 1.0419559478759766, "step": 350 }, { "epoch": 0.10834102800861804, "grad_norm": 5.0, "learning_rate": 2.999777124493602e-06, "loss": 1.1992754936218262, "step": 352 }, { "epoch": 0.10895660203139428, "grad_norm": 12.6875, "learning_rate": 2.999761489274507e-06, "loss": 1.6422064304351807, "step": 354 }, { "epoch": 0.10957217605417051, "grad_norm": 12.6875, "learning_rate": 2.9997453241193892e-06, "loss": 1.4691483974456787, "step": 356 }, { "epoch": 0.11018775007694676, "grad_norm": 10.6875, "learning_rate": 2.999728629035388e-06, "loss": 1.451507806777954, "step": 358 }, { "epoch": 0.11080332409972299, "grad_norm": 12.9375, "learning_rate": 2.999711404029878e-06, "loss": 1.5703644752502441, "step": 360 }, { "epoch": 0.11141889812249924, "grad_norm": 17.375, "learning_rate": 2.999693649110467e-06, "loss": 1.541607141494751, "step": 362 }, { "epoch": 0.11203447214527547, "grad_norm": 42.0, "learning_rate": 2.999675364284999e-06, "loss": 1.6094763278961182, "step": 364 }, { "epoch": 0.11265004616805172, "grad_norm": 21.0, "learning_rate": 2.999656549561548e-06, "loss": 0.6293473243713379, "step": 366 }, { "epoch": 0.11326562019082795, "grad_norm": 15.1875, "learning_rate": 2.999637204948427e-06, "loss": 1.6930108070373535, "step": 368 }, { "epoch": 0.11388119421360418, "grad_norm": 20.75, "learning_rate": 2.9996173304541787e-06, "loss": 1.4504094123840332, "step": 370 }, { "epoch": 0.11449676823638043, "grad_norm": 8.1875, "learning_rate": 2.9995969260875816e-06, "loss": 1.029111385345459, "step": 372 }, { "epoch": 0.11511234225915666, "grad_norm": 17.625, "learning_rate": 2.99957599185765e-06, "loss": 1.711849331855774, "step": 374 }, { "epoch": 0.11572791628193291, "grad_norm": 6.625, "learning_rate": 2.999554527773629e-06, "loss": 1.3091596364974976, "step": 376 }, { "epoch": 0.11634349030470914, "grad_norm": 14.9375, "learning_rate": 2.999532533845001e-06, "loss": 1.4927258491516113, "step": 378 }, { "epoch": 0.11695906432748537, "grad_norm": 8.3125, "learning_rate": 2.9995100100814786e-06, "loss": 1.2266597747802734, "step": 380 }, { "epoch": 0.11757463835026162, "grad_norm": 11.25, "learning_rate": 2.9994869564930125e-06, "loss": 1.2330584526062012, "step": 382 }, { "epoch": 0.11819021237303785, "grad_norm": 15.3125, "learning_rate": 2.9994633730897832e-06, "loss": 1.7261693477630615, "step": 384 }, { "epoch": 0.1188057863958141, "grad_norm": 21.5, "learning_rate": 2.99943925988221e-06, "loss": 1.339095115661621, "step": 386 }, { "epoch": 0.11942136041859033, "grad_norm": 17.25, "learning_rate": 2.9994146168809423e-06, "loss": 1.7015557289123535, "step": 388 }, { "epoch": 0.12003693444136658, "grad_norm": 18.5, "learning_rate": 2.9993894440968657e-06, "loss": 1.0266611576080322, "step": 390 }, { "epoch": 0.12065250846414281, "grad_norm": 20.875, "learning_rate": 2.9993637415410987e-06, "loss": 1.4492390155792236, "step": 392 }, { "epoch": 0.12126808248691905, "grad_norm": 22.375, "learning_rate": 2.9993375092249934e-06, "loss": 1.4749621152877808, "step": 394 }, { "epoch": 0.12188365650969529, "grad_norm": 6.4375, "learning_rate": 2.999310747160138e-06, "loss": 1.355834722518921, "step": 396 }, { "epoch": 0.12249923053247153, "grad_norm": 13.5, "learning_rate": 2.999283455358353e-06, "loss": 1.307915210723877, "step": 398 }, { "epoch": 0.12311480455524777, "grad_norm": 9.0625, "learning_rate": 2.9992556338316925e-06, "loss": 1.5071804523468018, "step": 400 }, { "epoch": 0.123730378578024, "grad_norm": 13.25, "learning_rate": 2.9992272825924454e-06, "loss": 1.6163640022277832, "step": 402 }, { "epoch": 0.12434595260080025, "grad_norm": 14.1875, "learning_rate": 2.9991984016531344e-06, "loss": 1.0656492710113525, "step": 404 }, { "epoch": 0.12496152662357649, "grad_norm": 21.25, "learning_rate": 2.999168991026518e-06, "loss": 1.2286940813064575, "step": 406 }, { "epoch": 0.12557710064635272, "grad_norm": 19.625, "learning_rate": 2.9991390507255847e-06, "loss": 1.4255913496017456, "step": 408 }, { "epoch": 0.12619267466912895, "grad_norm": 6.90625, "learning_rate": 2.99910858076356e-06, "loss": 1.3530218601226807, "step": 410 }, { "epoch": 0.1268082486919052, "grad_norm": 11.5, "learning_rate": 2.9990775811539026e-06, "loss": 1.2902710437774658, "step": 412 }, { "epoch": 0.12742382271468145, "grad_norm": 11.6875, "learning_rate": 2.9990460519103045e-06, "loss": 1.5794827938079834, "step": 414 }, { "epoch": 0.12803939673745768, "grad_norm": 25.25, "learning_rate": 2.9990139930466934e-06, "loss": 1.5939992666244507, "step": 416 }, { "epoch": 0.1286549707602339, "grad_norm": 18.25, "learning_rate": 2.9989814045772287e-06, "loss": 0.8886748552322388, "step": 418 }, { "epoch": 0.12927054478301014, "grad_norm": 14.4375, "learning_rate": 2.9989482865163058e-06, "loss": 1.484604835510254, "step": 420 }, { "epoch": 0.1298861188057864, "grad_norm": 25.75, "learning_rate": 2.9989146388785516e-06, "loss": 1.5589027404785156, "step": 422 }, { "epoch": 0.13050169282856264, "grad_norm": 11.3125, "learning_rate": 2.9988804616788287e-06, "loss": 1.100600004196167, "step": 424 }, { "epoch": 0.13111726685133887, "grad_norm": 11.625, "learning_rate": 2.998845754932234e-06, "loss": 1.454401969909668, "step": 426 }, { "epoch": 0.1317328408741151, "grad_norm": 17.875, "learning_rate": 2.9988105186540964e-06, "loss": 1.2516822814941406, "step": 428 }, { "epoch": 0.13234841489689136, "grad_norm": 10.8125, "learning_rate": 2.998774752859981e-06, "loss": 1.4223829507827759, "step": 430 }, { "epoch": 0.1329639889196676, "grad_norm": 13.875, "learning_rate": 2.998738457565685e-06, "loss": 1.5337071418762207, "step": 432 }, { "epoch": 0.13357956294244383, "grad_norm": 17.0, "learning_rate": 2.99870163278724e-06, "loss": 1.8929377794265747, "step": 434 }, { "epoch": 0.13419513696522006, "grad_norm": 29.375, "learning_rate": 2.998664278540911e-06, "loss": 1.9043139219284058, "step": 436 }, { "epoch": 0.1348107109879963, "grad_norm": 28.625, "learning_rate": 2.9986263948431986e-06, "loss": 1.4659531116485596, "step": 438 }, { "epoch": 0.13542628501077256, "grad_norm": 9.0, "learning_rate": 2.998587981710835e-06, "loss": 1.0244336128234863, "step": 440 }, { "epoch": 0.1360418590335488, "grad_norm": 17.25, "learning_rate": 2.9985490391607883e-06, "loss": 1.6359564065933228, "step": 442 }, { "epoch": 0.13665743305632502, "grad_norm": 7.875, "learning_rate": 2.998509567210259e-06, "loss": 1.3486268520355225, "step": 444 }, { "epoch": 0.13727300707910126, "grad_norm": 12.6875, "learning_rate": 2.9984695658766814e-06, "loss": 1.4941446781158447, "step": 446 }, { "epoch": 0.1378885811018775, "grad_norm": 25.75, "learning_rate": 2.9984290351777252e-06, "loss": 2.0870232582092285, "step": 448 }, { "epoch": 0.13850415512465375, "grad_norm": 11.5, "learning_rate": 2.9983879751312923e-06, "loss": 1.317858099937439, "step": 450 }, { "epoch": 0.13911972914742998, "grad_norm": 21.25, "learning_rate": 2.9983463857555184e-06, "loss": 1.5850346088409424, "step": 452 }, { "epoch": 0.13973530317020622, "grad_norm": 20.875, "learning_rate": 2.9983042670687745e-06, "loss": 1.2323607206344604, "step": 454 }, { "epoch": 0.14035087719298245, "grad_norm": 16.375, "learning_rate": 2.9982616190896635e-06, "loss": 1.528209924697876, "step": 456 }, { "epoch": 0.14096645121575868, "grad_norm": 70.5, "learning_rate": 2.9982184418370242e-06, "loss": 1.527733564376831, "step": 458 }, { "epoch": 0.14158202523853494, "grad_norm": 17.125, "learning_rate": 2.9981747353299277e-06, "loss": 1.887821912765503, "step": 460 }, { "epoch": 0.14219759926131118, "grad_norm": 16.75, "learning_rate": 2.998130499587679e-06, "loss": 1.7688817977905273, "step": 462 }, { "epoch": 0.1428131732840874, "grad_norm": 11.375, "learning_rate": 2.9980857346298167e-06, "loss": 1.4512908458709717, "step": 464 }, { "epoch": 0.14342874730686364, "grad_norm": 21.625, "learning_rate": 2.9980404404761143e-06, "loss": 1.417380928993225, "step": 466 }, { "epoch": 0.1440443213296399, "grad_norm": 15.625, "learning_rate": 2.997994617146578e-06, "loss": 1.804483413696289, "step": 468 }, { "epoch": 0.14465989535241613, "grad_norm": 94.5, "learning_rate": 2.997948264661447e-06, "loss": 1.7305872440338135, "step": 470 }, { "epoch": 0.14527546937519237, "grad_norm": 9.4375, "learning_rate": 2.9979013830411973e-06, "loss": 1.4537417888641357, "step": 472 }, { "epoch": 0.1458910433979686, "grad_norm": 21.75, "learning_rate": 2.9978539723065345e-06, "loss": 1.5609411001205444, "step": 474 }, { "epoch": 0.14650661742074483, "grad_norm": 12.5625, "learning_rate": 2.997806032478402e-06, "loss": 1.7527482509613037, "step": 476 }, { "epoch": 0.1471221914435211, "grad_norm": 6.15625, "learning_rate": 2.9977575635779727e-06, "loss": 1.0448154211044312, "step": 478 }, { "epoch": 0.14773776546629733, "grad_norm": 14.1875, "learning_rate": 2.997708565626657e-06, "loss": 1.3397457599639893, "step": 480 }, { "epoch": 0.14835333948907356, "grad_norm": 11.5, "learning_rate": 2.997659038646097e-06, "loss": 1.3842558860778809, "step": 482 }, { "epoch": 0.1489689135118498, "grad_norm": 8.8125, "learning_rate": 2.9976089826581675e-06, "loss": 1.3095613718032837, "step": 484 }, { "epoch": 0.14958448753462603, "grad_norm": 13.9375, "learning_rate": 2.997558397684981e-06, "loss": 1.6145265102386475, "step": 486 }, { "epoch": 0.1502000615574023, "grad_norm": 13.4375, "learning_rate": 2.9975072837488783e-06, "loss": 1.6280035972595215, "step": 488 }, { "epoch": 0.15081563558017852, "grad_norm": 27.875, "learning_rate": 2.9974556408724377e-06, "loss": 1.5152499675750732, "step": 490 }, { "epoch": 0.15143120960295475, "grad_norm": 24.125, "learning_rate": 2.99740346907847e-06, "loss": 1.25870680809021, "step": 492 }, { "epoch": 0.15204678362573099, "grad_norm": 16.375, "learning_rate": 2.997350768390019e-06, "loss": 0.947930634021759, "step": 494 }, { "epoch": 0.15266235764850725, "grad_norm": 36.0, "learning_rate": 2.9972975388303636e-06, "loss": 1.5841643810272217, "step": 496 }, { "epoch": 0.15327793167128348, "grad_norm": 15.5625, "learning_rate": 2.997243780423014e-06, "loss": 1.7457714080810547, "step": 498 }, { "epoch": 0.1538935056940597, "grad_norm": 11.5, "learning_rate": 2.9971894931917164e-06, "loss": 1.27567720413208, "step": 500 }, { "epoch": 0.15450907971683595, "grad_norm": 9.8125, "learning_rate": 2.9971346771604488e-06, "loss": 1.5354039669036865, "step": 502 }, { "epoch": 0.15512465373961218, "grad_norm": 13.3125, "learning_rate": 2.997079332353425e-06, "loss": 1.7307982444763184, "step": 504 }, { "epoch": 0.15574022776238844, "grad_norm": 10.125, "learning_rate": 2.9970234587950887e-06, "loss": 1.5125446319580078, "step": 506 }, { "epoch": 0.15635580178516467, "grad_norm": 10.3125, "learning_rate": 2.996967056510121e-06, "loss": 1.3819962739944458, "step": 508 }, { "epoch": 0.1569713758079409, "grad_norm": 12.1875, "learning_rate": 2.9969101255234336e-06, "loss": 1.5607001781463623, "step": 510 }, { "epoch": 0.15758694983071714, "grad_norm": 14.0625, "learning_rate": 2.996852665860174e-06, "loss": 1.8390923738479614, "step": 512 }, { "epoch": 0.15820252385349337, "grad_norm": 9.9375, "learning_rate": 2.9967946775457216e-06, "loss": 1.5656869411468506, "step": 514 }, { "epoch": 0.15881809787626963, "grad_norm": 10.3125, "learning_rate": 2.9967361606056903e-06, "loss": 1.39482581615448, "step": 516 }, { "epoch": 0.15943367189904586, "grad_norm": 12.75, "learning_rate": 2.996677115065927e-06, "loss": 1.5271615982055664, "step": 518 }, { "epoch": 0.1600492459218221, "grad_norm": 15.4375, "learning_rate": 2.9966175409525118e-06, "loss": 1.4671976566314697, "step": 520 }, { "epoch": 0.16066481994459833, "grad_norm": 16.5, "learning_rate": 2.9965574382917595e-06, "loss": 1.8529220819473267, "step": 522 }, { "epoch": 0.16128039396737456, "grad_norm": 13.125, "learning_rate": 2.996496807110216e-06, "loss": 1.4948711395263672, "step": 524 }, { "epoch": 0.16189596799015082, "grad_norm": 12.0625, "learning_rate": 2.996435647434664e-06, "loss": 1.8076704740524292, "step": 526 }, { "epoch": 0.16251154201292706, "grad_norm": 14.625, "learning_rate": 2.9963739592921166e-06, "loss": 1.5516061782836914, "step": 528 }, { "epoch": 0.1631271160357033, "grad_norm": 17.125, "learning_rate": 2.9963117427098225e-06, "loss": 1.004725694656372, "step": 530 }, { "epoch": 0.16374269005847952, "grad_norm": 6.8125, "learning_rate": 2.996248997715261e-06, "loss": 1.3982107639312744, "step": 532 }, { "epoch": 0.16435826408125578, "grad_norm": 10.1875, "learning_rate": 2.996185724336149e-06, "loss": 1.3941655158996582, "step": 534 }, { "epoch": 0.16497383810403202, "grad_norm": 18.25, "learning_rate": 2.996121922600432e-06, "loss": 1.4721497297286987, "step": 536 }, { "epoch": 0.16558941212680825, "grad_norm": 22.75, "learning_rate": 2.9960575925362933e-06, "loss": 1.7670438289642334, "step": 538 }, { "epoch": 0.16620498614958448, "grad_norm": 8.8125, "learning_rate": 2.995992734172147e-06, "loss": 1.3671045303344727, "step": 540 }, { "epoch": 0.16682056017236072, "grad_norm": 10.8125, "learning_rate": 2.9959273475366404e-06, "loss": 1.5833959579467773, "step": 542 }, { "epoch": 0.16743613419513698, "grad_norm": 24.5, "learning_rate": 2.995861432658656e-06, "loss": 1.4065296649932861, "step": 544 }, { "epoch": 0.1680517082179132, "grad_norm": 9.6875, "learning_rate": 2.995794989567307e-06, "loss": 0.9803017973899841, "step": 546 }, { "epoch": 0.16866728224068944, "grad_norm": 21.875, "learning_rate": 2.995728018291943e-06, "loss": 1.305236577987671, "step": 548 }, { "epoch": 0.16928285626346568, "grad_norm": 10.3125, "learning_rate": 2.995660518862144e-06, "loss": 1.4379463195800781, "step": 550 }, { "epoch": 0.1698984302862419, "grad_norm": 29.0, "learning_rate": 2.995592491307726e-06, "loss": 1.6318714618682861, "step": 552 }, { "epoch": 0.17051400430901817, "grad_norm": 25.0, "learning_rate": 2.995523935658735e-06, "loss": 1.435588002204895, "step": 554 }, { "epoch": 0.1711295783317944, "grad_norm": 38.0, "learning_rate": 2.9954548519454537e-06, "loss": 1.6767455339431763, "step": 556 }, { "epoch": 0.17174515235457063, "grad_norm": 11.3125, "learning_rate": 2.995385240198395e-06, "loss": 1.7289783954620361, "step": 558 }, { "epoch": 0.17236072637734687, "grad_norm": 21.375, "learning_rate": 2.9953151004483084e-06, "loss": 1.3982090950012207, "step": 560 }, { "epoch": 0.17297630040012313, "grad_norm": 9.1875, "learning_rate": 2.995244432726173e-06, "loss": 1.022979974746704, "step": 562 }, { "epoch": 0.17359187442289936, "grad_norm": 15.3125, "learning_rate": 2.9951732370632034e-06, "loss": 1.4568507671356201, "step": 564 }, { "epoch": 0.1742074484456756, "grad_norm": 27.125, "learning_rate": 2.9951015134908465e-06, "loss": 0.9370282888412476, "step": 566 }, { "epoch": 0.17482302246845183, "grad_norm": 15.0625, "learning_rate": 2.9950292620407835e-06, "loss": 1.2536559104919434, "step": 568 }, { "epoch": 0.17543859649122806, "grad_norm": 12.4375, "learning_rate": 2.994956482744927e-06, "loss": 1.5881582498550415, "step": 570 }, { "epoch": 0.17605417051400432, "grad_norm": 14.1875, "learning_rate": 2.994883175635425e-06, "loss": 1.379671573638916, "step": 572 }, { "epoch": 0.17666974453678055, "grad_norm": 15.0, "learning_rate": 2.9948093407446564e-06, "loss": 1.261031985282898, "step": 574 }, { "epoch": 0.1772853185595568, "grad_norm": 23.0, "learning_rate": 2.9947349781052336e-06, "loss": 1.6905066967010498, "step": 576 }, { "epoch": 0.17790089258233302, "grad_norm": 5.5625, "learning_rate": 2.9946600877500037e-06, "loss": 0.9953762292861938, "step": 578 }, { "epoch": 0.17851646660510925, "grad_norm": 14.3125, "learning_rate": 2.9945846697120454e-06, "loss": 1.0212024450302124, "step": 580 }, { "epoch": 0.1791320406278855, "grad_norm": 17.0, "learning_rate": 2.9945087240246713e-06, "loss": 1.3336846828460693, "step": 582 }, { "epoch": 0.17974761465066175, "grad_norm": 21.5, "learning_rate": 2.994432250721426e-06, "loss": 1.8026593923568726, "step": 584 }, { "epoch": 0.18036318867343798, "grad_norm": 26.125, "learning_rate": 2.9943552498360883e-06, "loss": 1.6114405393600464, "step": 586 }, { "epoch": 0.1809787626962142, "grad_norm": 10.125, "learning_rate": 2.9942777214026696e-06, "loss": 1.6910456418991089, "step": 588 }, { "epoch": 0.18159433671899045, "grad_norm": 12.75, "learning_rate": 2.994199665455414e-06, "loss": 1.414069652557373, "step": 590 }, { "epoch": 0.1822099107417667, "grad_norm": 6.78125, "learning_rate": 2.9941210820287994e-06, "loss": 1.504883050918579, "step": 592 }, { "epoch": 0.18282548476454294, "grad_norm": 15.9375, "learning_rate": 2.994041971157536e-06, "loss": 1.501386046409607, "step": 594 }, { "epoch": 0.18344105878731917, "grad_norm": 9.75, "learning_rate": 2.993962332876567e-06, "loss": 1.4829013347625732, "step": 596 }, { "epoch": 0.1840566328100954, "grad_norm": 4.1875, "learning_rate": 2.9938821672210684e-06, "loss": 0.966797411441803, "step": 598 }, { "epoch": 0.18467220683287167, "grad_norm": 24.25, "learning_rate": 2.9938014742264505e-06, "loss": 0.9075067043304443, "step": 600 }, { "epoch": 0.1852877808556479, "grad_norm": 17.0, "learning_rate": 2.9937202539283544e-06, "loss": 1.418922781944275, "step": 602 }, { "epoch": 0.18590335487842413, "grad_norm": 35.75, "learning_rate": 2.993638506362656e-06, "loss": 1.4964491128921509, "step": 604 }, { "epoch": 0.18651892890120036, "grad_norm": 20.875, "learning_rate": 2.993556231565463e-06, "loss": 1.4599058628082275, "step": 606 }, { "epoch": 0.1871345029239766, "grad_norm": 7.875, "learning_rate": 2.993473429573116e-06, "loss": 1.621177315711975, "step": 608 }, { "epoch": 0.18775007694675286, "grad_norm": 9.5625, "learning_rate": 2.993390100422189e-06, "loss": 1.665502905845642, "step": 610 }, { "epoch": 0.1883656509695291, "grad_norm": 14.75, "learning_rate": 2.993306244149488e-06, "loss": 0.6662383675575256, "step": 612 }, { "epoch": 0.18898122499230532, "grad_norm": 10.5, "learning_rate": 2.9932218607920542e-06, "loss": 1.319474220275879, "step": 614 }, { "epoch": 0.18959679901508156, "grad_norm": 17.25, "learning_rate": 2.9931369503871573e-06, "loss": 1.4323532581329346, "step": 616 }, { "epoch": 0.1902123730378578, "grad_norm": 20.5, "learning_rate": 2.993051512972304e-06, "loss": 1.3529409170150757, "step": 618 }, { "epoch": 0.19082794706063405, "grad_norm": 16.875, "learning_rate": 2.992965548585232e-06, "loss": 1.02125883102417, "step": 620 }, { "epoch": 0.19144352108341028, "grad_norm": 13.6875, "learning_rate": 2.9928790572639117e-06, "loss": 1.5696746110916138, "step": 622 }, { "epoch": 0.19205909510618652, "grad_norm": 27.875, "learning_rate": 2.9927920390465453e-06, "loss": 1.7022054195404053, "step": 624 }, { "epoch": 0.19267466912896275, "grad_norm": 18.125, "learning_rate": 2.9927044939715703e-06, "loss": 1.096092700958252, "step": 626 }, { "epoch": 0.19329024315173898, "grad_norm": 27.5, "learning_rate": 2.992616422077655e-06, "loss": 1.7465717792510986, "step": 628 }, { "epoch": 0.19390581717451524, "grad_norm": 41.75, "learning_rate": 2.9925278234037014e-06, "loss": 1.9313154220581055, "step": 630 }, { "epoch": 0.19452139119729148, "grad_norm": 12.875, "learning_rate": 2.9924386979888424e-06, "loss": 1.3957619667053223, "step": 632 }, { "epoch": 0.1951369652200677, "grad_norm": 7.25, "learning_rate": 2.9923490458724457e-06, "loss": 1.2672849893569946, "step": 634 }, { "epoch": 0.19575253924284394, "grad_norm": 8.8125, "learning_rate": 2.99225886709411e-06, "loss": 1.5344815254211426, "step": 636 }, { "epoch": 0.1963681132656202, "grad_norm": 13.4375, "learning_rate": 2.992168161693669e-06, "loss": 1.2782959938049316, "step": 638 }, { "epoch": 0.19698368728839644, "grad_norm": 12.375, "learning_rate": 2.9920769297111856e-06, "loss": 1.523129940032959, "step": 640 }, { "epoch": 0.19759926131117267, "grad_norm": 25.625, "learning_rate": 2.991985171186958e-06, "loss": 1.7012336254119873, "step": 642 }, { "epoch": 0.1982148353339489, "grad_norm": 11.3125, "learning_rate": 2.9918928861615156e-06, "loss": 1.3830182552337646, "step": 644 }, { "epoch": 0.19883040935672514, "grad_norm": 11.625, "learning_rate": 2.9918000746756205e-06, "loss": 1.4474706649780273, "step": 646 }, { "epoch": 0.1994459833795014, "grad_norm": 13.875, "learning_rate": 2.9917067367702693e-06, "loss": 1.2021737098693848, "step": 648 }, { "epoch": 0.20006155740227763, "grad_norm": 16.0, "learning_rate": 2.9916128724866877e-06, "loss": 1.569669246673584, "step": 650 }, { "epoch": 0.20067713142505386, "grad_norm": 18.125, "learning_rate": 2.9915184818663356e-06, "loss": 1.5191471576690674, "step": 652 }, { "epoch": 0.2012927054478301, "grad_norm": 15.9375, "learning_rate": 2.991423564950907e-06, "loss": 1.0736037492752075, "step": 654 }, { "epoch": 0.20190827947060633, "grad_norm": 22.0, "learning_rate": 2.991328121782325e-06, "loss": 1.7012343406677246, "step": 656 }, { "epoch": 0.2025238534933826, "grad_norm": 18.125, "learning_rate": 2.9912321524027485e-06, "loss": 1.4998762607574463, "step": 658 }, { "epoch": 0.20313942751615882, "grad_norm": 14.125, "learning_rate": 2.9911356568545667e-06, "loss": 1.5033804178237915, "step": 660 }, { "epoch": 0.20375500153893505, "grad_norm": 7.1875, "learning_rate": 2.9910386351804014e-06, "loss": 1.372567892074585, "step": 662 }, { "epoch": 0.2043705755617113, "grad_norm": 14.3125, "learning_rate": 2.9909410874231075e-06, "loss": 1.382359504699707, "step": 664 }, { "epoch": 0.20498614958448755, "grad_norm": 7.125, "learning_rate": 2.990843013625772e-06, "loss": 0.7901002764701843, "step": 666 }, { "epoch": 0.20560172360726378, "grad_norm": 14.875, "learning_rate": 2.990744413831715e-06, "loss": 1.2381068468093872, "step": 668 }, { "epoch": 0.20621729763004001, "grad_norm": 11.625, "learning_rate": 2.990645288084487e-06, "loss": 1.49295973777771, "step": 670 }, { "epoch": 0.20683287165281625, "grad_norm": 9.1875, "learning_rate": 2.990545636427872e-06, "loss": 1.5857125520706177, "step": 672 }, { "epoch": 0.20744844567559248, "grad_norm": 15.0, "learning_rate": 2.990445458905886e-06, "loss": 1.3795162439346313, "step": 674 }, { "epoch": 0.20806401969836874, "grad_norm": 16.125, "learning_rate": 2.9903447555627782e-06, "loss": 1.2645190954208374, "step": 676 }, { "epoch": 0.20867959372114497, "grad_norm": 10.6875, "learning_rate": 2.9902435264430303e-06, "loss": 1.7142274379730225, "step": 678 }, { "epoch": 0.2092951677439212, "grad_norm": 14.625, "learning_rate": 2.990141771591353e-06, "loss": 1.548229455947876, "step": 680 }, { "epoch": 0.20991074176669744, "grad_norm": 19.75, "learning_rate": 2.990039491052694e-06, "loss": 1.3462605476379395, "step": 682 }, { "epoch": 0.21052631578947367, "grad_norm": 13.375, "learning_rate": 2.9899366848722284e-06, "loss": 1.3686890602111816, "step": 684 }, { "epoch": 0.21114188981224993, "grad_norm": 13.5625, "learning_rate": 2.9898333530953674e-06, "loss": 1.6143863201141357, "step": 686 }, { "epoch": 0.21175746383502617, "grad_norm": 17.75, "learning_rate": 2.9897294957677522e-06, "loss": 1.8706777095794678, "step": 688 }, { "epoch": 0.2123730378578024, "grad_norm": 7.9375, "learning_rate": 2.989625112935257e-06, "loss": 1.5297441482543945, "step": 690 }, { "epoch": 0.21298861188057863, "grad_norm": 112.0, "learning_rate": 2.9895202046439872e-06, "loss": 1.1892492771148682, "step": 692 }, { "epoch": 0.21360418590335487, "grad_norm": 17.5, "learning_rate": 2.989414770940282e-06, "loss": 1.5064308643341064, "step": 694 }, { "epoch": 0.21421975992613113, "grad_norm": 10.9375, "learning_rate": 2.9893088118707103e-06, "loss": 1.5546306371688843, "step": 696 }, { "epoch": 0.21483533394890736, "grad_norm": 6.6875, "learning_rate": 2.989202327482076e-06, "loss": 1.3973575830459595, "step": 698 }, { "epoch": 0.2154509079716836, "grad_norm": 8.3125, "learning_rate": 2.989095317821411e-06, "loss": 1.2875769138336182, "step": 700 }, { "epoch": 0.21606648199445982, "grad_norm": 11.5, "learning_rate": 2.9889877829359837e-06, "loss": 1.3528231382369995, "step": 702 }, { "epoch": 0.21668205601723609, "grad_norm": 9.75, "learning_rate": 2.9888797228732908e-06, "loss": 1.139479637145996, "step": 704 }, { "epoch": 0.21729763004001232, "grad_norm": 18.25, "learning_rate": 2.9887711376810643e-06, "loss": 1.7238795757293701, "step": 706 }, { "epoch": 0.21791320406278855, "grad_norm": 6.84375, "learning_rate": 2.988662027407265e-06, "loss": 1.401573657989502, "step": 708 }, { "epoch": 0.21852877808556478, "grad_norm": 17.125, "learning_rate": 2.9885523921000877e-06, "loss": 1.4582817554473877, "step": 710 }, { "epoch": 0.21914435210834102, "grad_norm": 11.625, "learning_rate": 2.988442231807958e-06, "loss": 1.4865729808807373, "step": 712 }, { "epoch": 0.21975992613111728, "grad_norm": 11.875, "learning_rate": 2.988331546579534e-06, "loss": 1.4546085596084595, "step": 714 }, { "epoch": 0.2203755001538935, "grad_norm": 15.9375, "learning_rate": 2.9882203364637058e-06, "loss": 1.2999931573867798, "step": 716 }, { "epoch": 0.22099107417666974, "grad_norm": 17.75, "learning_rate": 2.9881086015095945e-06, "loss": 1.215187907218933, "step": 718 }, { "epoch": 0.22160664819944598, "grad_norm": 7.4375, "learning_rate": 2.9879963417665544e-06, "loss": 1.5152490139007568, "step": 720 }, { "epoch": 0.2222222222222222, "grad_norm": 23.625, "learning_rate": 2.98788355728417e-06, "loss": 1.7452555894851685, "step": 722 }, { "epoch": 0.22283779624499847, "grad_norm": 11.75, "learning_rate": 2.9877702481122586e-06, "loss": 0.7922830581665039, "step": 724 }, { "epoch": 0.2234533702677747, "grad_norm": 14.875, "learning_rate": 2.987656414300869e-06, "loss": 1.6105217933654785, "step": 726 }, { "epoch": 0.22406894429055094, "grad_norm": 14.9375, "learning_rate": 2.9875420559002812e-06, "loss": 1.8192713260650635, "step": 728 }, { "epoch": 0.22468451831332717, "grad_norm": 23.125, "learning_rate": 2.9874271729610083e-06, "loss": 1.4074468612670898, "step": 730 }, { "epoch": 0.22530009233610343, "grad_norm": 10.8125, "learning_rate": 2.9873117655337934e-06, "loss": 1.2618192434310913, "step": 732 }, { "epoch": 0.22591566635887966, "grad_norm": 19.75, "learning_rate": 2.987195833669613e-06, "loss": 1.167978286743164, "step": 734 }, { "epoch": 0.2265312403816559, "grad_norm": 17.125, "learning_rate": 2.987079377419673e-06, "loss": 1.499091386795044, "step": 736 }, { "epoch": 0.22714681440443213, "grad_norm": 14.6875, "learning_rate": 2.9869623968354133e-06, "loss": 1.4751125574111938, "step": 738 }, { "epoch": 0.22776238842720836, "grad_norm": 18.25, "learning_rate": 2.986844891968505e-06, "loss": 1.824317455291748, "step": 740 }, { "epoch": 0.22837796244998462, "grad_norm": 21.625, "learning_rate": 2.9867268628708485e-06, "loss": 1.7516217231750488, "step": 742 }, { "epoch": 0.22899353647276086, "grad_norm": 15.5625, "learning_rate": 2.9866083095945785e-06, "loss": 1.0053646564483643, "step": 744 }, { "epoch": 0.2296091104955371, "grad_norm": 11.5, "learning_rate": 2.98648923219206e-06, "loss": 1.4207651615142822, "step": 746 }, { "epoch": 0.23022468451831332, "grad_norm": 24.875, "learning_rate": 2.9863696307158894e-06, "loss": 1.2401245832443237, "step": 748 }, { "epoch": 0.23084025854108955, "grad_norm": 6.40625, "learning_rate": 2.9862495052188947e-06, "loss": 1.2797411680221558, "step": 750 }, { "epoch": 0.23145583256386582, "grad_norm": 17.25, "learning_rate": 2.9861288557541357e-06, "loss": 1.4601941108703613, "step": 752 }, { "epoch": 0.23207140658664205, "grad_norm": 12.75, "learning_rate": 2.986007682374903e-06, "loss": 1.5369291305541992, "step": 754 }, { "epoch": 0.23268698060941828, "grad_norm": 8.6875, "learning_rate": 2.9858859851347193e-06, "loss": 1.4711337089538574, "step": 756 }, { "epoch": 0.23330255463219451, "grad_norm": 12.4375, "learning_rate": 2.9857637640873394e-06, "loss": 1.3970842361450195, "step": 758 }, { "epoch": 0.23391812865497075, "grad_norm": 8.9375, "learning_rate": 2.985641019286746e-06, "loss": 1.535696029663086, "step": 760 }, { "epoch": 0.234533702677747, "grad_norm": 21.5, "learning_rate": 2.9855177507871586e-06, "loss": 1.2274210453033447, "step": 762 }, { "epoch": 0.23514927670052324, "grad_norm": 17.25, "learning_rate": 2.9853939586430222e-06, "loss": 1.777534008026123, "step": 764 }, { "epoch": 0.23576485072329947, "grad_norm": 13.75, "learning_rate": 2.9852696429090186e-06, "loss": 1.7154924869537354, "step": 766 }, { "epoch": 0.2363804247460757, "grad_norm": 11.75, "learning_rate": 2.9851448036400562e-06, "loss": 1.6001613140106201, "step": 768 }, { "epoch": 0.23699599876885197, "grad_norm": 12.375, "learning_rate": 2.9850194408912777e-06, "loss": 1.2444454431533813, "step": 770 }, { "epoch": 0.2376115727916282, "grad_norm": 16.0, "learning_rate": 2.984893554718055e-06, "loss": 1.2592495679855347, "step": 772 }, { "epoch": 0.23822714681440443, "grad_norm": 15.8125, "learning_rate": 2.984767145175993e-06, "loss": 1.7219655513763428, "step": 774 }, { "epoch": 0.23884272083718067, "grad_norm": 30.5, "learning_rate": 2.9846402123209276e-06, "loss": 1.068901777267456, "step": 776 }, { "epoch": 0.2394582948599569, "grad_norm": 17.75, "learning_rate": 2.9845127562089237e-06, "loss": 1.4635910987854004, "step": 778 }, { "epoch": 0.24007386888273316, "grad_norm": 17.875, "learning_rate": 2.9843847768962794e-06, "loss": 1.433922529220581, "step": 780 }, { "epoch": 0.2406894429055094, "grad_norm": 19.75, "learning_rate": 2.9842562744395232e-06, "loss": 1.1140307188034058, "step": 782 }, { "epoch": 0.24130501692828563, "grad_norm": 5.03125, "learning_rate": 2.984127248895415e-06, "loss": 1.5116472244262695, "step": 784 }, { "epoch": 0.24192059095106186, "grad_norm": 13.875, "learning_rate": 2.983997700320946e-06, "loss": 1.7026944160461426, "step": 786 }, { "epoch": 0.2425361649738381, "grad_norm": 41.5, "learning_rate": 2.9838676287733367e-06, "loss": 1.4336001873016357, "step": 788 }, { "epoch": 0.24315173899661435, "grad_norm": 39.25, "learning_rate": 2.9837370343100405e-06, "loss": 1.4539496898651123, "step": 790 }, { "epoch": 0.24376731301939059, "grad_norm": 14.4375, "learning_rate": 2.9836059169887415e-06, "loss": 1.3547931909561157, "step": 792 }, { "epoch": 0.24438288704216682, "grad_norm": 13.9375, "learning_rate": 2.983474276867354e-06, "loss": 1.308903455734253, "step": 794 }, { "epoch": 0.24499846106494305, "grad_norm": 18.375, "learning_rate": 2.9833421140040242e-06, "loss": 1.670867681503296, "step": 796 }, { "epoch": 0.24561403508771928, "grad_norm": 10.5625, "learning_rate": 2.983209428457127e-06, "loss": 1.5410202741622925, "step": 798 }, { "epoch": 0.24622960911049555, "grad_norm": 10.75, "learning_rate": 2.9830762202852714e-06, "loss": 1.6194417476654053, "step": 800 }, { "epoch": 0.24684518313327178, "grad_norm": 11.125, "learning_rate": 2.9829424895472952e-06, "loss": 1.698732614517212, "step": 802 }, { "epoch": 0.247460757156048, "grad_norm": 19.5, "learning_rate": 2.982808236302267e-06, "loss": 1.6608161926269531, "step": 804 }, { "epoch": 0.24807633117882424, "grad_norm": 31.875, "learning_rate": 2.982673460609486e-06, "loss": 1.8473336696624756, "step": 806 }, { "epoch": 0.2486919052016005, "grad_norm": 10.375, "learning_rate": 2.9825381625284846e-06, "loss": 1.4293184280395508, "step": 808 }, { "epoch": 0.24930747922437674, "grad_norm": 42.5, "learning_rate": 2.982402342119023e-06, "loss": 1.045412302017212, "step": 810 }, { "epoch": 0.24992305324715297, "grad_norm": 22.5, "learning_rate": 2.982265999441093e-06, "loss": 1.688213586807251, "step": 812 }, { "epoch": 0.2505386272699292, "grad_norm": 6.84375, "learning_rate": 2.9821291345549178e-06, "loss": 1.1688685417175293, "step": 814 }, { "epoch": 0.25115420129270544, "grad_norm": 6.4375, "learning_rate": 2.9819917475209513e-06, "loss": 1.2294797897338867, "step": 816 }, { "epoch": 0.25176977531548167, "grad_norm": 21.5, "learning_rate": 2.9818538383998756e-06, "loss": 1.0619512796401978, "step": 818 }, { "epoch": 0.2523853493382579, "grad_norm": 6.53125, "learning_rate": 2.981715407252608e-06, "loss": 1.170961618423462, "step": 820 }, { "epoch": 0.2530009233610342, "grad_norm": 79.0, "learning_rate": 2.9815764541402914e-06, "loss": 1.622196912765503, "step": 822 }, { "epoch": 0.2536164973838104, "grad_norm": 5.71875, "learning_rate": 2.981436979124302e-06, "loss": 1.3246055841445923, "step": 824 }, { "epoch": 0.25423207140658666, "grad_norm": 10.375, "learning_rate": 2.9812969822662474e-06, "loss": 1.241051197052002, "step": 826 }, { "epoch": 0.2548476454293629, "grad_norm": 12.0625, "learning_rate": 2.981156463627963e-06, "loss": 0.7300369739532471, "step": 828 }, { "epoch": 0.2554632194521391, "grad_norm": 16.125, "learning_rate": 2.981015423271517e-06, "loss": 1.586928129196167, "step": 830 }, { "epoch": 0.25607879347491536, "grad_norm": 5.875, "learning_rate": 2.9808738612592065e-06, "loss": 1.3401447534561157, "step": 832 }, { "epoch": 0.2566943674976916, "grad_norm": 13.5625, "learning_rate": 2.980731777653559e-06, "loss": 1.4773958921432495, "step": 834 }, { "epoch": 0.2573099415204678, "grad_norm": 14.875, "learning_rate": 2.980589172517334e-06, "loss": 1.0504045486450195, "step": 836 }, { "epoch": 0.25792551554324405, "grad_norm": 24.375, "learning_rate": 2.9804460459135203e-06, "loss": 0.729271650314331, "step": 838 }, { "epoch": 0.2585410895660203, "grad_norm": 20.75, "learning_rate": 2.9803023979053365e-06, "loss": 1.6884655952453613, "step": 840 }, { "epoch": 0.2591566635887966, "grad_norm": 10.125, "learning_rate": 2.9801582285562325e-06, "loss": 1.403031349182129, "step": 842 }, { "epoch": 0.2597722376115728, "grad_norm": 13.0625, "learning_rate": 2.980013537929888e-06, "loss": 1.461706280708313, "step": 844 }, { "epoch": 0.26038781163434904, "grad_norm": 21.25, "learning_rate": 2.9798683260902125e-06, "loss": 1.5261836051940918, "step": 846 }, { "epoch": 0.2610033856571253, "grad_norm": 12.3125, "learning_rate": 2.979722593101348e-06, "loss": 1.8656940460205078, "step": 848 }, { "epoch": 0.2616189596799015, "grad_norm": 5.625, "learning_rate": 2.979576339027663e-06, "loss": 1.134974718093872, "step": 850 }, { "epoch": 0.26223453370267774, "grad_norm": 11.5, "learning_rate": 2.979429563933759e-06, "loss": 1.4083569049835205, "step": 852 }, { "epoch": 0.262850107725454, "grad_norm": 11.1875, "learning_rate": 2.9792822678844656e-06, "loss": 1.208848237991333, "step": 854 }, { "epoch": 0.2634656817482302, "grad_norm": 34.5, "learning_rate": 2.979134450944845e-06, "loss": 1.7384560108184814, "step": 856 }, { "epoch": 0.26408125577100644, "grad_norm": 14.3125, "learning_rate": 2.9789861131801877e-06, "loss": 1.7467668056488037, "step": 858 }, { "epoch": 0.26469682979378273, "grad_norm": 2.90625, "learning_rate": 2.978837254656015e-06, "loss": 1.5746519565582275, "step": 860 }, { "epoch": 0.26531240381655896, "grad_norm": 12.75, "learning_rate": 2.9786878754380767e-06, "loss": 1.5870490074157715, "step": 862 }, { "epoch": 0.2659279778393352, "grad_norm": 20.125, "learning_rate": 2.9785379755923553e-06, "loss": 1.548311710357666, "step": 864 }, { "epoch": 0.2665435518621114, "grad_norm": 17.5, "learning_rate": 2.9783875551850606e-06, "loss": 0.8423838019371033, "step": 866 }, { "epoch": 0.26715912588488766, "grad_norm": 7.8125, "learning_rate": 2.9782366142826335e-06, "loss": 1.2350690364837646, "step": 868 }, { "epoch": 0.2677746999076639, "grad_norm": 9.1875, "learning_rate": 2.978085152951745e-06, "loss": 1.2413235902786255, "step": 870 }, { "epoch": 0.2683902739304401, "grad_norm": 13.75, "learning_rate": 2.9779331712592967e-06, "loss": 1.14556086063385, "step": 872 }, { "epoch": 0.26900584795321636, "grad_norm": 10.0625, "learning_rate": 2.977780669272418e-06, "loss": 1.0823867321014404, "step": 874 }, { "epoch": 0.2696214219759926, "grad_norm": 14.9375, "learning_rate": 2.977627647058469e-06, "loss": 1.029285192489624, "step": 876 }, { "epoch": 0.2702369959987688, "grad_norm": 13.5, "learning_rate": 2.9774741046850404e-06, "loss": 1.5127992630004883, "step": 878 }, { "epoch": 0.2708525700215451, "grad_norm": 7.09375, "learning_rate": 2.9773200422199524e-06, "loss": 1.503528118133545, "step": 880 }, { "epoch": 0.27146814404432135, "grad_norm": 13.0625, "learning_rate": 2.9771654597312527e-06, "loss": 0.9524327516555786, "step": 882 }, { "epoch": 0.2720837180670976, "grad_norm": 17.625, "learning_rate": 2.977010357287223e-06, "loss": 1.3801374435424805, "step": 884 }, { "epoch": 0.2726992920898738, "grad_norm": 13.25, "learning_rate": 2.976854734956371e-06, "loss": 1.24113130569458, "step": 886 }, { "epoch": 0.27331486611265005, "grad_norm": 9.875, "learning_rate": 2.9766985928074356e-06, "loss": 1.4660515785217285, "step": 888 }, { "epoch": 0.2739304401354263, "grad_norm": 12.0, "learning_rate": 2.976541930909385e-06, "loss": 1.6487599611282349, "step": 890 }, { "epoch": 0.2745460141582025, "grad_norm": 13.375, "learning_rate": 2.9763847493314152e-06, "loss": 1.4463802576065063, "step": 892 }, { "epoch": 0.27516158818097874, "grad_norm": 21.625, "learning_rate": 2.976227048142956e-06, "loss": 1.746794581413269, "step": 894 }, { "epoch": 0.275777162203755, "grad_norm": 14.75, "learning_rate": 2.9760688274136632e-06, "loss": 1.4405450820922852, "step": 896 }, { "epoch": 0.27639273622653127, "grad_norm": 12.4375, "learning_rate": 2.975910087213423e-06, "loss": 1.3549253940582275, "step": 898 }, { "epoch": 0.2770083102493075, "grad_norm": 55.75, "learning_rate": 2.975750827612351e-06, "loss": 1.2329659461975098, "step": 900 }, { "epoch": 0.27762388427208373, "grad_norm": 8.9375, "learning_rate": 2.9755910486807922e-06, "loss": 1.2978835105895996, "step": 902 }, { "epoch": 0.27823945829485996, "grad_norm": 8.9375, "learning_rate": 2.9754307504893223e-06, "loss": 1.1330984830856323, "step": 904 }, { "epoch": 0.2788550323176362, "grad_norm": 16.25, "learning_rate": 2.9752699331087436e-06, "loss": 1.4983842372894287, "step": 906 }, { "epoch": 0.27947060634041243, "grad_norm": 6.65625, "learning_rate": 2.9751085966100907e-06, "loss": 1.216892123222351, "step": 908 }, { "epoch": 0.28008618036318866, "grad_norm": 14.625, "learning_rate": 2.974946741064625e-06, "loss": 1.4386199712753296, "step": 910 }, { "epoch": 0.2807017543859649, "grad_norm": 17.75, "learning_rate": 2.9747843665438393e-06, "loss": 0.9468835592269897, "step": 912 }, { "epoch": 0.28131732840874113, "grad_norm": 9.125, "learning_rate": 2.9746214731194534e-06, "loss": 0.9744749665260315, "step": 914 }, { "epoch": 0.28193290243151736, "grad_norm": 8.625, "learning_rate": 2.9744580608634188e-06, "loss": 0.960205078125, "step": 916 }, { "epoch": 0.28254847645429365, "grad_norm": 19.375, "learning_rate": 2.9742941298479137e-06, "loss": 1.7566368579864502, "step": 918 }, { "epoch": 0.2831640504770699, "grad_norm": 21.25, "learning_rate": 2.9741296801453476e-06, "loss": 1.284712314605713, "step": 920 }, { "epoch": 0.2837796244998461, "grad_norm": 21.125, "learning_rate": 2.9739647118283574e-06, "loss": 1.3857460021972656, "step": 922 }, { "epoch": 0.28439519852262235, "grad_norm": 16.125, "learning_rate": 2.9737992249698107e-06, "loss": 0.9767564535140991, "step": 924 }, { "epoch": 0.2850107725453986, "grad_norm": 24.625, "learning_rate": 2.9736332196428024e-06, "loss": 1.5778262615203857, "step": 926 }, { "epoch": 0.2856263465681748, "grad_norm": 29.375, "learning_rate": 2.9734666959206575e-06, "loss": 1.5621557235717773, "step": 928 }, { "epoch": 0.28624192059095105, "grad_norm": 20.75, "learning_rate": 2.9732996538769293e-06, "loss": 1.3215174674987793, "step": 930 }, { "epoch": 0.2868574946137273, "grad_norm": 143.0, "learning_rate": 2.9731320935854016e-06, "loss": 1.1781163215637207, "step": 932 }, { "epoch": 0.2874730686365035, "grad_norm": 13.875, "learning_rate": 2.9729640151200845e-06, "loss": 1.2226213216781616, "step": 934 }, { "epoch": 0.2880886426592798, "grad_norm": 13.0, "learning_rate": 2.9727954185552193e-06, "loss": 1.3734395503997803, "step": 936 }, { "epoch": 0.28870421668205604, "grad_norm": 5.375, "learning_rate": 2.9726263039652757e-06, "loss": 1.252492070198059, "step": 938 }, { "epoch": 0.28931979070483227, "grad_norm": 11.125, "learning_rate": 2.9724566714249505e-06, "loss": 1.4262735843658447, "step": 940 }, { "epoch": 0.2899353647276085, "grad_norm": 5.9375, "learning_rate": 2.9722865210091717e-06, "loss": 1.052842140197754, "step": 942 }, { "epoch": 0.29055093875038474, "grad_norm": 10.9375, "learning_rate": 2.9721158527930945e-06, "loss": 0.9967107176780701, "step": 944 }, { "epoch": 0.29116651277316097, "grad_norm": 24.375, "learning_rate": 2.971944666852104e-06, "loss": 1.2896000146865845, "step": 946 }, { "epoch": 0.2917820867959372, "grad_norm": 40.25, "learning_rate": 2.9717729632618123e-06, "loss": 1.8066881895065308, "step": 948 }, { "epoch": 0.29239766081871343, "grad_norm": 15.5625, "learning_rate": 2.9716007420980614e-06, "loss": 1.358643651008606, "step": 950 }, { "epoch": 0.29301323484148967, "grad_norm": 17.875, "learning_rate": 2.9714280034369213e-06, "loss": 1.4859265089035034, "step": 952 }, { "epoch": 0.29362880886426596, "grad_norm": 13.5625, "learning_rate": 2.9712547473546918e-06, "loss": 1.2124885320663452, "step": 954 }, { "epoch": 0.2942443828870422, "grad_norm": 19.0, "learning_rate": 2.971080973927899e-06, "loss": 1.194804072380066, "step": 956 }, { "epoch": 0.2948599569098184, "grad_norm": 10.4375, "learning_rate": 2.9709066832332996e-06, "loss": 1.4251993894577026, "step": 958 }, { "epoch": 0.29547553093259465, "grad_norm": 14.6875, "learning_rate": 2.970731875347877e-06, "loss": 1.6262626647949219, "step": 960 }, { "epoch": 0.2960911049553709, "grad_norm": 19.875, "learning_rate": 2.9705565503488456e-06, "loss": 1.4257208108901978, "step": 962 }, { "epoch": 0.2967066789781471, "grad_norm": 103.0, "learning_rate": 2.9703807083136464e-06, "loss": 1.3943548202514648, "step": 964 }, { "epoch": 0.29732225300092335, "grad_norm": 11.5, "learning_rate": 2.970204349319948e-06, "loss": 1.5504299402236938, "step": 966 }, { "epoch": 0.2979378270236996, "grad_norm": 11.875, "learning_rate": 2.970027473445649e-06, "loss": 1.0902049541473389, "step": 968 }, { "epoch": 0.2985534010464758, "grad_norm": 15.125, "learning_rate": 2.969850080768876e-06, "loss": 1.1666667461395264, "step": 970 }, { "epoch": 0.29916897506925205, "grad_norm": 15.25, "learning_rate": 2.9696721713679825e-06, "loss": 0.994618833065033, "step": 972 }, { "epoch": 0.29978454909202834, "grad_norm": 18.25, "learning_rate": 2.969493745321552e-06, "loss": 1.4349355697631836, "step": 974 }, { "epoch": 0.3004001231148046, "grad_norm": 17.5, "learning_rate": 2.969314802708396e-06, "loss": 1.5411276817321777, "step": 976 }, { "epoch": 0.3010156971375808, "grad_norm": 17.5, "learning_rate": 2.9691353436075527e-06, "loss": 1.2102165222167969, "step": 978 }, { "epoch": 0.30163127116035704, "grad_norm": 11.0625, "learning_rate": 2.96895536809829e-06, "loss": 1.524396300315857, "step": 980 }, { "epoch": 0.3022468451831333, "grad_norm": 21.25, "learning_rate": 2.968774876260103e-06, "loss": 1.6361780166625977, "step": 982 }, { "epoch": 0.3028624192059095, "grad_norm": 8.9375, "learning_rate": 2.968593868172715e-06, "loss": 1.2558728456497192, "step": 984 }, { "epoch": 0.30347799322868574, "grad_norm": 23.375, "learning_rate": 2.9684123439160782e-06, "loss": 1.4768342971801758, "step": 986 }, { "epoch": 0.30409356725146197, "grad_norm": 26.875, "learning_rate": 2.9682303035703714e-06, "loss": 1.4299650192260742, "step": 988 }, { "epoch": 0.3047091412742382, "grad_norm": 5.40625, "learning_rate": 2.9680477472160018e-06, "loss": 1.1562550067901611, "step": 990 }, { "epoch": 0.3053247152970145, "grad_norm": 12.1875, "learning_rate": 2.9678646749336048e-06, "loss": 1.4728153944015503, "step": 992 }, { "epoch": 0.3059402893197907, "grad_norm": 27.625, "learning_rate": 2.967681086804045e-06, "loss": 1.624634027481079, "step": 994 }, { "epoch": 0.30655586334256696, "grad_norm": 17.875, "learning_rate": 2.9674969829084126e-06, "loss": 1.3707025051116943, "step": 996 }, { "epoch": 0.3071714373653432, "grad_norm": 12.3125, "learning_rate": 2.967312363328025e-06, "loss": 1.451162338256836, "step": 998 }, { "epoch": 0.3077870113881194, "grad_norm": 49.5, "learning_rate": 2.9671272281444314e-06, "loss": 1.2668009996414185, "step": 1000 }, { "epoch": 0.30840258541089566, "grad_norm": 29.375, "learning_rate": 2.9669415774394046e-06, "loss": 1.7370235919952393, "step": 1002 }, { "epoch": 0.3090181594336719, "grad_norm": 8.75, "learning_rate": 2.9667554112949477e-06, "loss": 1.146094799041748, "step": 1004 }, { "epoch": 0.3096337334564481, "grad_norm": 17.375, "learning_rate": 2.9665687297932896e-06, "loss": 1.7455390691757202, "step": 1006 }, { "epoch": 0.31024930747922436, "grad_norm": 3.953125, "learning_rate": 2.9663815330168885e-06, "loss": 1.3706302642822266, "step": 1008 }, { "epoch": 0.3108648815020006, "grad_norm": 12.3125, "learning_rate": 2.9661938210484287e-06, "loss": 1.325300693511963, "step": 1010 }, { "epoch": 0.3114804555247769, "grad_norm": 12.125, "learning_rate": 2.966005593970823e-06, "loss": 1.3024173974990845, "step": 1012 }, { "epoch": 0.3120960295475531, "grad_norm": 8.0625, "learning_rate": 2.965816851867212e-06, "loss": 1.2130205631256104, "step": 1014 }, { "epoch": 0.31271160357032934, "grad_norm": 15.875, "learning_rate": 2.965627594820963e-06, "loss": 1.447901725769043, "step": 1016 }, { "epoch": 0.3133271775931056, "grad_norm": 11.875, "learning_rate": 2.9654378229156708e-06, "loss": 1.5102931261062622, "step": 1018 }, { "epoch": 0.3139427516158818, "grad_norm": 9.0, "learning_rate": 2.965247536235159e-06, "loss": 1.594402551651001, "step": 1020 }, { "epoch": 0.31455832563865804, "grad_norm": 11.5, "learning_rate": 2.9650567348634753e-06, "loss": 1.2735745906829834, "step": 1022 }, { "epoch": 0.3151738996614343, "grad_norm": 10.5, "learning_rate": 2.9648654188848986e-06, "loss": 1.1038970947265625, "step": 1024 }, { "epoch": 0.3157894736842105, "grad_norm": 15.5, "learning_rate": 2.964673588383933e-06, "loss": 0.7514253854751587, "step": 1026 }, { "epoch": 0.31640504770698674, "grad_norm": 10.375, "learning_rate": 2.9644812434453106e-06, "loss": 1.4254611730575562, "step": 1028 }, { "epoch": 0.31702062172976303, "grad_norm": 12.25, "learning_rate": 2.964288384153989e-06, "loss": 1.0978376865386963, "step": 1030 }, { "epoch": 0.31763619575253926, "grad_norm": 9.0625, "learning_rate": 2.9640950105951563e-06, "loss": 1.5165598392486572, "step": 1032 }, { "epoch": 0.3182517697753155, "grad_norm": 14.125, "learning_rate": 2.9639011228542236e-06, "loss": 1.4810335636138916, "step": 1034 }, { "epoch": 0.31886734379809173, "grad_norm": 19.125, "learning_rate": 2.9637067210168337e-06, "loss": 1.6129978895187378, "step": 1036 }, { "epoch": 0.31948291782086796, "grad_norm": 7.75, "learning_rate": 2.9635118051688527e-06, "loss": 1.1115729808807373, "step": 1038 }, { "epoch": 0.3200984918436442, "grad_norm": 24.5, "learning_rate": 2.963316375396375e-06, "loss": 1.5309553146362305, "step": 1040 }, { "epoch": 0.32071406586642043, "grad_norm": 17.375, "learning_rate": 2.9631204317857236e-06, "loss": 1.5364904403686523, "step": 1042 }, { "epoch": 0.32132963988919666, "grad_norm": 18.75, "learning_rate": 2.9629239744234453e-06, "loss": 1.5094444751739502, "step": 1044 }, { "epoch": 0.3219452139119729, "grad_norm": 9.1875, "learning_rate": 2.9627270033963164e-06, "loss": 1.279676914215088, "step": 1046 }, { "epoch": 0.3225607879347491, "grad_norm": 12.5, "learning_rate": 2.962529518791339e-06, "loss": 1.359884262084961, "step": 1048 }, { "epoch": 0.3231763619575254, "grad_norm": 22.75, "learning_rate": 2.962331520695742e-06, "loss": 1.2789549827575684, "step": 1050 }, { "epoch": 0.32379193598030165, "grad_norm": 15.5625, "learning_rate": 2.9621330091969828e-06, "loss": 1.1411724090576172, "step": 1052 }, { "epoch": 0.3244075100030779, "grad_norm": 23.75, "learning_rate": 2.961933984382742e-06, "loss": 0.9410820007324219, "step": 1054 }, { "epoch": 0.3250230840258541, "grad_norm": 11.9375, "learning_rate": 2.9617344463409305e-06, "loss": 1.3496623039245605, "step": 1056 }, { "epoch": 0.32563865804863035, "grad_norm": 19.0, "learning_rate": 2.9615343951596846e-06, "loss": 1.4122042655944824, "step": 1058 }, { "epoch": 0.3262542320714066, "grad_norm": 15.1875, "learning_rate": 2.9613338309273664e-06, "loss": 1.2870073318481445, "step": 1060 }, { "epoch": 0.3268698060941828, "grad_norm": 13.9375, "learning_rate": 2.961132753732566e-06, "loss": 1.7000384330749512, "step": 1062 }, { "epoch": 0.32748538011695905, "grad_norm": 95.0, "learning_rate": 2.960931163664099e-06, "loss": 1.5783706903457642, "step": 1064 }, { "epoch": 0.3281009541397353, "grad_norm": 48.75, "learning_rate": 2.960729060811007e-06, "loss": 1.178473949432373, "step": 1066 }, { "epoch": 0.32871652816251157, "grad_norm": 33.0, "learning_rate": 2.9605264452625615e-06, "loss": 0.9566202163696289, "step": 1068 }, { "epoch": 0.3293321021852878, "grad_norm": 22.375, "learning_rate": 2.960323317108256e-06, "loss": 1.2555310726165771, "step": 1070 }, { "epoch": 0.32994767620806403, "grad_norm": 44.0, "learning_rate": 2.9601196764378128e-06, "loss": 1.7536683082580566, "step": 1072 }, { "epoch": 0.33056325023084027, "grad_norm": 15.75, "learning_rate": 2.959915523341181e-06, "loss": 1.7330282926559448, "step": 1074 }, { "epoch": 0.3311788242536165, "grad_norm": 23.375, "learning_rate": 2.959710857908535e-06, "loss": 1.600478172302246, "step": 1076 }, { "epoch": 0.33179439827639273, "grad_norm": 10.0625, "learning_rate": 2.959505680230275e-06, "loss": 1.4186145067214966, "step": 1078 }, { "epoch": 0.33240997229916897, "grad_norm": 3.375, "learning_rate": 2.959299990397029e-06, "loss": 1.4805734157562256, "step": 1080 }, { "epoch": 0.3330255463219452, "grad_norm": 18.25, "learning_rate": 2.95909378849965e-06, "loss": 1.3895663022994995, "step": 1082 }, { "epoch": 0.33364112034472143, "grad_norm": 19.75, "learning_rate": 2.9588870746292177e-06, "loss": 1.277693271636963, "step": 1084 }, { "epoch": 0.33425669436749766, "grad_norm": 8.5, "learning_rate": 2.9586798488770386e-06, "loss": 1.3742859363555908, "step": 1086 }, { "epoch": 0.33487226839027395, "grad_norm": 19.375, "learning_rate": 2.958472111334643e-06, "loss": 1.028365135192871, "step": 1088 }, { "epoch": 0.3354878424130502, "grad_norm": 4.0625, "learning_rate": 2.958263862093789e-06, "loss": 1.2564489841461182, "step": 1090 }, { "epoch": 0.3361034164358264, "grad_norm": 20.875, "learning_rate": 2.958055101246463e-06, "loss": 1.198014259338379, "step": 1092 }, { "epoch": 0.33671899045860265, "grad_norm": 11.125, "learning_rate": 2.9578458288848717e-06, "loss": 0.5163677930831909, "step": 1094 }, { "epoch": 0.3373345644813789, "grad_norm": 18.625, "learning_rate": 2.957636045101453e-06, "loss": 1.3656609058380127, "step": 1096 }, { "epoch": 0.3379501385041551, "grad_norm": 5.71875, "learning_rate": 2.957425749988868e-06, "loss": 1.1800310611724854, "step": 1098 }, { "epoch": 0.33856571252693135, "grad_norm": 14.5, "learning_rate": 2.957214943640004e-06, "loss": 1.435111165046692, "step": 1100 }, { "epoch": 0.3391812865497076, "grad_norm": 26.625, "learning_rate": 2.9570036261479754e-06, "loss": 1.6497091054916382, "step": 1102 }, { "epoch": 0.3397968605724838, "grad_norm": 32.5, "learning_rate": 2.9567917976061203e-06, "loss": 1.0989763736724854, "step": 1104 }, { "epoch": 0.3404124345952601, "grad_norm": 13.25, "learning_rate": 2.9565794581080035e-06, "loss": 1.1690188646316528, "step": 1106 }, { "epoch": 0.34102800861803634, "grad_norm": 15.75, "learning_rate": 2.9563666077474167e-06, "loss": 1.2261661291122437, "step": 1108 }, { "epoch": 0.34164358264081257, "grad_norm": 12.375, "learning_rate": 2.9561532466183753e-06, "loss": 1.4731149673461914, "step": 1110 }, { "epoch": 0.3422591566635888, "grad_norm": 11.4375, "learning_rate": 2.9559393748151212e-06, "loss": 1.3430174589157104, "step": 1112 }, { "epoch": 0.34287473068636504, "grad_norm": 30.25, "learning_rate": 2.9557249924321223e-06, "loss": 1.5253651142120361, "step": 1114 }, { "epoch": 0.34349030470914127, "grad_norm": 16.75, "learning_rate": 2.955510099564071e-06, "loss": 1.8036619424819946, "step": 1116 }, { "epoch": 0.3441058787319175, "grad_norm": 12.625, "learning_rate": 2.9552946963058858e-06, "loss": 1.3675833940505981, "step": 1118 }, { "epoch": 0.34472145275469374, "grad_norm": 12.8125, "learning_rate": 2.9550787827527114e-06, "loss": 1.5371782779693604, "step": 1120 }, { "epoch": 0.34533702677746997, "grad_norm": 6.28125, "learning_rate": 2.9548623589999155e-06, "loss": 1.0250526666641235, "step": 1122 }, { "epoch": 0.34595260080024626, "grad_norm": 17.75, "learning_rate": 2.954645425143094e-06, "loss": 1.5854618549346924, "step": 1124 }, { "epoch": 0.3465681748230225, "grad_norm": 13.9375, "learning_rate": 2.954427981278067e-06, "loss": 1.5529431104660034, "step": 1126 }, { "epoch": 0.3471837488457987, "grad_norm": 10.25, "learning_rate": 2.9542100275008786e-06, "loss": 1.3933606147766113, "step": 1128 }, { "epoch": 0.34779932286857496, "grad_norm": 38.75, "learning_rate": 2.9539915639078004e-06, "loss": 1.7668472528457642, "step": 1130 }, { "epoch": 0.3484148968913512, "grad_norm": 14.375, "learning_rate": 2.9537725905953264e-06, "loss": 1.350250244140625, "step": 1132 }, { "epoch": 0.3490304709141274, "grad_norm": 26.625, "learning_rate": 2.9535531076601794e-06, "loss": 1.6775567531585693, "step": 1134 }, { "epoch": 0.34964604493690365, "grad_norm": 16.875, "learning_rate": 2.953333115199303e-06, "loss": 1.5840508937835693, "step": 1136 }, { "epoch": 0.3502616189596799, "grad_norm": 10.625, "learning_rate": 2.9531126133098705e-06, "loss": 1.3124401569366455, "step": 1138 }, { "epoch": 0.3508771929824561, "grad_norm": 23.75, "learning_rate": 2.9528916020892764e-06, "loss": 1.412221908569336, "step": 1140 }, { "epoch": 0.35149276700523235, "grad_norm": 4.71875, "learning_rate": 2.9526700816351416e-06, "loss": 1.1349772214889526, "step": 1142 }, { "epoch": 0.35210834102800864, "grad_norm": 14.0, "learning_rate": 2.9524480520453126e-06, "loss": 1.2706981897354126, "step": 1144 }, { "epoch": 0.3527239150507849, "grad_norm": 11.625, "learning_rate": 2.9522255134178596e-06, "loss": 1.5382128953933716, "step": 1146 }, { "epoch": 0.3533394890735611, "grad_norm": 21.5, "learning_rate": 2.9520024658510786e-06, "loss": 1.4682729244232178, "step": 1148 }, { "epoch": 0.35395506309633734, "grad_norm": 12.625, "learning_rate": 2.9517789094434894e-06, "loss": 1.5460891723632812, "step": 1150 }, { "epoch": 0.3545706371191136, "grad_norm": 32.25, "learning_rate": 2.9515548442938373e-06, "loss": 1.6807613372802734, "step": 1152 }, { "epoch": 0.3551862111418898, "grad_norm": 38.25, "learning_rate": 2.9513302705010923e-06, "loss": 1.1830241680145264, "step": 1154 }, { "epoch": 0.35580178516466604, "grad_norm": 20.625, "learning_rate": 2.9511051881644487e-06, "loss": 1.5960803031921387, "step": 1156 }, { "epoch": 0.3564173591874423, "grad_norm": 5.9375, "learning_rate": 2.950879597383327e-06, "loss": 1.2252815961837769, "step": 1158 }, { "epoch": 0.3570329332102185, "grad_norm": 30.875, "learning_rate": 2.9506534982573685e-06, "loss": 1.4243431091308594, "step": 1160 }, { "epoch": 0.3576485072329948, "grad_norm": 17.5, "learning_rate": 2.9504268908864425e-06, "loss": 1.4747540950775146, "step": 1162 }, { "epoch": 0.358264081255771, "grad_norm": 15.5, "learning_rate": 2.9501997753706424e-06, "loss": 1.5682637691497803, "step": 1164 }, { "epoch": 0.35887965527854726, "grad_norm": 20.375, "learning_rate": 2.949972151810285e-06, "loss": 1.194216251373291, "step": 1166 }, { "epoch": 0.3594952293013235, "grad_norm": 9.4375, "learning_rate": 2.9497440203059114e-06, "loss": 1.379780888557434, "step": 1168 }, { "epoch": 0.3601108033240997, "grad_norm": 14.875, "learning_rate": 2.9495153809582875e-06, "loss": 1.4812633991241455, "step": 1170 }, { "epoch": 0.36072637734687596, "grad_norm": 26.75, "learning_rate": 2.9492862338684042e-06, "loss": 1.6413441896438599, "step": 1172 }, { "epoch": 0.3613419513696522, "grad_norm": 34.25, "learning_rate": 2.949056579137476e-06, "loss": 1.731945276260376, "step": 1174 }, { "epoch": 0.3619575253924284, "grad_norm": 12.9375, "learning_rate": 2.9488264168669418e-06, "loss": 1.2650394439697266, "step": 1176 }, { "epoch": 0.36257309941520466, "grad_norm": 13.5, "learning_rate": 2.9485957471584633e-06, "loss": 1.5294060707092285, "step": 1178 }, { "epoch": 0.3631886734379809, "grad_norm": 13.9375, "learning_rate": 2.9483645701139293e-06, "loss": 1.2630178928375244, "step": 1180 }, { "epoch": 0.3638042474607572, "grad_norm": 40.75, "learning_rate": 2.9481328858354497e-06, "loss": 1.5602120161056519, "step": 1182 }, { "epoch": 0.3644198214835334, "grad_norm": 13.0, "learning_rate": 2.9479006944253604e-06, "loss": 1.4180080890655518, "step": 1184 }, { "epoch": 0.36503539550630965, "grad_norm": 51.0, "learning_rate": 2.94766799598622e-06, "loss": 1.4425501823425293, "step": 1186 }, { "epoch": 0.3656509695290859, "grad_norm": 14.625, "learning_rate": 2.947434790620812e-06, "loss": 1.3580070734024048, "step": 1188 }, { "epoch": 0.3662665435518621, "grad_norm": 14.6875, "learning_rate": 2.9472010784321433e-06, "loss": 1.5068962574005127, "step": 1190 }, { "epoch": 0.36688211757463834, "grad_norm": 11.375, "learning_rate": 2.946966859523445e-06, "loss": 1.1820440292358398, "step": 1192 }, { "epoch": 0.3674976915974146, "grad_norm": 8.5625, "learning_rate": 2.9467321339981725e-06, "loss": 1.4404610395431519, "step": 1194 }, { "epoch": 0.3681132656201908, "grad_norm": 20.375, "learning_rate": 2.9464969019600027e-06, "loss": 1.4200704097747803, "step": 1196 }, { "epoch": 0.36872883964296704, "grad_norm": 19.375, "learning_rate": 2.94626116351284e-06, "loss": 1.927262544631958, "step": 1198 }, { "epoch": 0.36934441366574333, "grad_norm": 24.625, "learning_rate": 2.9460249187608086e-06, "loss": 1.6129209995269775, "step": 1200 }, { "epoch": 0.36995998768851956, "grad_norm": 3.90625, "learning_rate": 2.945788167808259e-06, "loss": 1.3959664106369019, "step": 1202 }, { "epoch": 0.3705755617112958, "grad_norm": 11.8125, "learning_rate": 2.945550910759764e-06, "loss": 1.3280491828918457, "step": 1204 }, { "epoch": 0.37119113573407203, "grad_norm": 14.8125, "learning_rate": 2.9453131477201202e-06, "loss": 1.5886045694351196, "step": 1206 }, { "epoch": 0.37180670975684826, "grad_norm": 12.125, "learning_rate": 2.9450748787943476e-06, "loss": 1.714353084564209, "step": 1208 }, { "epoch": 0.3724222837796245, "grad_norm": 10.1875, "learning_rate": 2.944836104087691e-06, "loss": 1.3882367610931396, "step": 1210 }, { "epoch": 0.37303785780240073, "grad_norm": 13.4375, "learning_rate": 2.9445968237056167e-06, "loss": 1.5091825723648071, "step": 1212 }, { "epoch": 0.37365343182517696, "grad_norm": 23.25, "learning_rate": 2.9443570377538145e-06, "loss": 1.0328447818756104, "step": 1214 }, { "epoch": 0.3742690058479532, "grad_norm": 15.5625, "learning_rate": 2.944116746338199e-06, "loss": 1.6457622051239014, "step": 1216 }, { "epoch": 0.37488457987072943, "grad_norm": 11.9375, "learning_rate": 2.943875949564907e-06, "loss": 1.0500051975250244, "step": 1218 }, { "epoch": 0.3755001538935057, "grad_norm": 4.875, "learning_rate": 2.9436346475402983e-06, "loss": 1.0904285907745361, "step": 1220 }, { "epoch": 0.37611572791628195, "grad_norm": 50.5, "learning_rate": 2.9433928403709567e-06, "loss": 1.5347020626068115, "step": 1222 }, { "epoch": 0.3767313019390582, "grad_norm": 47.25, "learning_rate": 2.943150528163689e-06, "loss": 1.447998046875, "step": 1224 }, { "epoch": 0.3773468759618344, "grad_norm": 34.5, "learning_rate": 2.9429077110255244e-06, "loss": 1.3163659572601318, "step": 1226 }, { "epoch": 0.37796244998461065, "grad_norm": 14.625, "learning_rate": 2.942664389063715e-06, "loss": 1.315096378326416, "step": 1228 }, { "epoch": 0.3785780240073869, "grad_norm": 11.3125, "learning_rate": 2.9424205623857374e-06, "loss": 1.392237663269043, "step": 1230 }, { "epoch": 0.3791935980301631, "grad_norm": 16.375, "learning_rate": 2.9421762310992895e-06, "loss": 1.4208223819732666, "step": 1232 }, { "epoch": 0.37980917205293935, "grad_norm": 17.375, "learning_rate": 2.9419313953122932e-06, "loss": 1.4550400972366333, "step": 1234 }, { "epoch": 0.3804247460757156, "grad_norm": 19.125, "learning_rate": 2.9416860551328915e-06, "loss": 1.4298524856567383, "step": 1236 }, { "epoch": 0.38104032009849187, "grad_norm": 16.625, "learning_rate": 2.9414402106694528e-06, "loss": 1.93170166015625, "step": 1238 }, { "epoch": 0.3816558941212681, "grad_norm": 14.0625, "learning_rate": 2.9411938620305663e-06, "loss": 1.3365933895111084, "step": 1240 }, { "epoch": 0.38227146814404434, "grad_norm": 13.9375, "learning_rate": 2.9409470093250453e-06, "loss": 1.9269330501556396, "step": 1242 }, { "epoch": 0.38288704216682057, "grad_norm": 20.375, "learning_rate": 2.9406996526619237e-06, "loss": 1.3893201351165771, "step": 1244 }, { "epoch": 0.3835026161895968, "grad_norm": 14.3125, "learning_rate": 2.94045179215046e-06, "loss": 1.711153507232666, "step": 1246 }, { "epoch": 0.38411819021237303, "grad_norm": 10.375, "learning_rate": 2.940203427900133e-06, "loss": 1.2741600275039673, "step": 1248 }, { "epoch": 0.38473376423514927, "grad_norm": 22.375, "learning_rate": 2.9399545600206474e-06, "loss": 1.5442752838134766, "step": 1250 }, { "epoch": 0.3853493382579255, "grad_norm": 19.375, "learning_rate": 2.939705188621928e-06, "loss": 1.328803300857544, "step": 1252 }, { "epoch": 0.38596491228070173, "grad_norm": 8.25, "learning_rate": 2.939455313814122e-06, "loss": 1.207406997680664, "step": 1254 }, { "epoch": 0.38658048630347797, "grad_norm": 15.5625, "learning_rate": 2.9392049357075994e-06, "loss": 1.4009120464324951, "step": 1256 }, { "epoch": 0.38719606032625425, "grad_norm": 8.25, "learning_rate": 2.9389540544129524e-06, "loss": 1.562011480331421, "step": 1258 }, { "epoch": 0.3878116343490305, "grad_norm": 15.875, "learning_rate": 2.9387026700409965e-06, "loss": 1.8288936614990234, "step": 1260 }, { "epoch": 0.3884272083718067, "grad_norm": 10.9375, "learning_rate": 2.938450782702767e-06, "loss": 1.4957469701766968, "step": 1262 }, { "epoch": 0.38904278239458295, "grad_norm": 14.1875, "learning_rate": 2.938198392509524e-06, "loss": 1.7026026248931885, "step": 1264 }, { "epoch": 0.3896583564173592, "grad_norm": 14.25, "learning_rate": 2.937945499572748e-06, "loss": 1.2688217163085938, "step": 1266 }, { "epoch": 0.3902739304401354, "grad_norm": 7.46875, "learning_rate": 2.937692104004142e-06, "loss": 1.181006908416748, "step": 1268 }, { "epoch": 0.39088950446291165, "grad_norm": 19.125, "learning_rate": 2.9374382059156316e-06, "loss": 1.2825920581817627, "step": 1270 }, { "epoch": 0.3915050784856879, "grad_norm": 23.75, "learning_rate": 2.937183805419363e-06, "loss": 1.861384630203247, "step": 1272 }, { "epoch": 0.3921206525084641, "grad_norm": 19.375, "learning_rate": 2.9369289026277063e-06, "loss": 1.5804314613342285, "step": 1274 }, { "epoch": 0.3927362265312404, "grad_norm": 8.4375, "learning_rate": 2.936673497653252e-06, "loss": 1.4048612117767334, "step": 1276 }, { "epoch": 0.39335180055401664, "grad_norm": 32.75, "learning_rate": 2.936417590608812e-06, "loss": 1.1659860610961914, "step": 1278 }, { "epoch": 0.3939673745767929, "grad_norm": 12.25, "learning_rate": 2.936161181607422e-06, "loss": 1.2615526914596558, "step": 1280 }, { "epoch": 0.3945829485995691, "grad_norm": 51.75, "learning_rate": 2.935904270762337e-06, "loss": 0.9141525626182556, "step": 1282 }, { "epoch": 0.39519852262234534, "grad_norm": 7.6875, "learning_rate": 2.935646858187035e-06, "loss": 1.1676512956619263, "step": 1284 }, { "epoch": 0.39581409664512157, "grad_norm": 23.0, "learning_rate": 2.935388943995216e-06, "loss": 1.3372483253479004, "step": 1286 }, { "epoch": 0.3964296706678978, "grad_norm": 15.3125, "learning_rate": 2.935130528300801e-06, "loss": 1.3295966386795044, "step": 1288 }, { "epoch": 0.39704524469067404, "grad_norm": 12.0, "learning_rate": 2.9348716112179328e-06, "loss": 1.472252607345581, "step": 1290 }, { "epoch": 0.39766081871345027, "grad_norm": 11.875, "learning_rate": 2.9346121928609734e-06, "loss": 1.84720778465271, "step": 1292 }, { "epoch": 0.39827639273622656, "grad_norm": 16.25, "learning_rate": 2.93435227334451e-06, "loss": 1.7415895462036133, "step": 1294 }, { "epoch": 0.3988919667590028, "grad_norm": 11.0, "learning_rate": 2.93409185278335e-06, "loss": 1.4139404296875, "step": 1296 }, { "epoch": 0.399507540781779, "grad_norm": 13.0625, "learning_rate": 2.9338309312925193e-06, "loss": 1.4103145599365234, "step": 1298 }, { "epoch": 0.40012311480455526, "grad_norm": 13.375, "learning_rate": 2.9335695089872687e-06, "loss": 1.3001097440719604, "step": 1300 }, { "epoch": 0.4007386888273315, "grad_norm": 7.3125, "learning_rate": 2.9333075859830684e-06, "loss": 1.4066133499145508, "step": 1302 }, { "epoch": 0.4013542628501077, "grad_norm": 21.375, "learning_rate": 2.93304516239561e-06, "loss": 1.3882697820663452, "step": 1304 }, { "epoch": 0.40196983687288396, "grad_norm": 17.875, "learning_rate": 2.932782238340806e-06, "loss": 1.5596365928649902, "step": 1306 }, { "epoch": 0.4025854108956602, "grad_norm": 12.9375, "learning_rate": 2.932518813934791e-06, "loss": 1.5969336032867432, "step": 1308 }, { "epoch": 0.4032009849184364, "grad_norm": 8.875, "learning_rate": 2.9322548892939188e-06, "loss": 1.2696571350097656, "step": 1310 }, { "epoch": 0.40381655894121266, "grad_norm": 13.0, "learning_rate": 2.931990464534767e-06, "loss": 1.4799418449401855, "step": 1312 }, { "epoch": 0.40443213296398894, "grad_norm": 12.5, "learning_rate": 2.9317255397741303e-06, "loss": 1.061401605606079, "step": 1314 }, { "epoch": 0.4050477069867652, "grad_norm": 4.5, "learning_rate": 2.9314601151290277e-06, "loss": 1.3438705205917358, "step": 1316 }, { "epoch": 0.4056632810095414, "grad_norm": 59.0, "learning_rate": 2.9311941907166965e-06, "loss": 1.691558837890625, "step": 1318 }, { "epoch": 0.40627885503231764, "grad_norm": 10.75, "learning_rate": 2.9309277666545967e-06, "loss": 1.52768874168396, "step": 1320 }, { "epoch": 0.4068944290550939, "grad_norm": 83.0, "learning_rate": 2.9306608430604075e-06, "loss": 1.3572287559509277, "step": 1322 }, { "epoch": 0.4075100030778701, "grad_norm": 4.8125, "learning_rate": 2.93039342005203e-06, "loss": 1.1406067609786987, "step": 1324 }, { "epoch": 0.40812557710064634, "grad_norm": 12.125, "learning_rate": 2.9301254977475843e-06, "loss": 1.5260097980499268, "step": 1326 }, { "epoch": 0.4087411511234226, "grad_norm": 15.25, "learning_rate": 2.929857076265413e-06, "loss": 1.1194944381713867, "step": 1328 }, { "epoch": 0.4093567251461988, "grad_norm": 8.4375, "learning_rate": 2.929588155724078e-06, "loss": 1.281527042388916, "step": 1330 }, { "epoch": 0.4099722991689751, "grad_norm": 14.8125, "learning_rate": 2.929318736242361e-06, "loss": 1.3978681564331055, "step": 1332 }, { "epoch": 0.41058787319175133, "grad_norm": 56.25, "learning_rate": 2.9290488179392657e-06, "loss": 1.7001137733459473, "step": 1334 }, { "epoch": 0.41120344721452756, "grad_norm": 30.375, "learning_rate": 2.928778400934015e-06, "loss": 1.5537958145141602, "step": 1336 }, { "epoch": 0.4118190212373038, "grad_norm": 11.3125, "learning_rate": 2.9285074853460523e-06, "loss": 1.308958888053894, "step": 1338 }, { "epoch": 0.41243459526008003, "grad_norm": 10.5, "learning_rate": 2.9282360712950418e-06, "loss": 1.2196264266967773, "step": 1340 }, { "epoch": 0.41305016928285626, "grad_norm": 9.9375, "learning_rate": 2.927964158900867e-06, "loss": 1.2395238876342773, "step": 1342 }, { "epoch": 0.4136657433056325, "grad_norm": 18.375, "learning_rate": 2.927691748283631e-06, "loss": 1.388479471206665, "step": 1344 }, { "epoch": 0.4142813173284087, "grad_norm": 18.875, "learning_rate": 2.9274188395636597e-06, "loss": 1.6272698640823364, "step": 1346 }, { "epoch": 0.41489689135118496, "grad_norm": 14.0, "learning_rate": 2.9271454328614973e-06, "loss": 1.5498168468475342, "step": 1348 }, { "epoch": 0.4155124653739612, "grad_norm": 10.0625, "learning_rate": 2.9268715282979057e-06, "loss": 1.6276700496673584, "step": 1350 }, { "epoch": 0.4161280393967375, "grad_norm": 13.8125, "learning_rate": 2.9265971259938705e-06, "loss": 1.3738837242126465, "step": 1352 }, { "epoch": 0.4167436134195137, "grad_norm": 39.5, "learning_rate": 2.926322226070595e-06, "loss": 1.1020140647888184, "step": 1354 }, { "epoch": 0.41735918744228995, "grad_norm": 11.9375, "learning_rate": 2.926046828649503e-06, "loss": 1.3103208541870117, "step": 1356 }, { "epoch": 0.4179747614650662, "grad_norm": 22.875, "learning_rate": 2.9257709338522375e-06, "loss": 1.6737949848175049, "step": 1358 }, { "epoch": 0.4185903354878424, "grad_norm": 24.75, "learning_rate": 2.925494541800662e-06, "loss": 1.2139620780944824, "step": 1360 }, { "epoch": 0.41920590951061865, "grad_norm": 14.25, "learning_rate": 2.9252176526168586e-06, "loss": 1.862136960029602, "step": 1362 }, { "epoch": 0.4198214835333949, "grad_norm": 9.9375, "learning_rate": 2.924940266423131e-06, "loss": 1.1231043338775635, "step": 1364 }, { "epoch": 0.4204370575561711, "grad_norm": 11.25, "learning_rate": 2.924662383341999e-06, "loss": 1.3487285375595093, "step": 1366 }, { "epoch": 0.42105263157894735, "grad_norm": 26.875, "learning_rate": 2.9243840034962055e-06, "loss": 1.7209620475769043, "step": 1368 }, { "epoch": 0.42166820560172363, "grad_norm": 34.75, "learning_rate": 2.92410512700871e-06, "loss": 1.3228155374526978, "step": 1370 }, { "epoch": 0.42228377962449987, "grad_norm": 11.0625, "learning_rate": 2.923825754002693e-06, "loss": 1.228677749633789, "step": 1372 }, { "epoch": 0.4228993536472761, "grad_norm": 19.625, "learning_rate": 2.923545884601555e-06, "loss": 1.5254507064819336, "step": 1374 }, { "epoch": 0.42351492767005233, "grad_norm": 12.1875, "learning_rate": 2.9232655189289123e-06, "loss": 1.4658113718032837, "step": 1376 }, { "epoch": 0.42413050169282857, "grad_norm": 13.1875, "learning_rate": 2.9229846571086044e-06, "loss": 1.4198139905929565, "step": 1378 }, { "epoch": 0.4247460757156048, "grad_norm": 7.84375, "learning_rate": 2.9227032992646887e-06, "loss": 1.259503960609436, "step": 1380 }, { "epoch": 0.42536164973838103, "grad_norm": 12.1875, "learning_rate": 2.9224214455214398e-06, "loss": 1.034414291381836, "step": 1382 }, { "epoch": 0.42597722376115726, "grad_norm": 18.75, "learning_rate": 2.922139096003354e-06, "loss": 1.646314263343811, "step": 1384 }, { "epoch": 0.4265927977839335, "grad_norm": 8.125, "learning_rate": 2.9218562508351447e-06, "loss": 1.3071296215057373, "step": 1386 }, { "epoch": 0.42720837180670973, "grad_norm": 13.125, "learning_rate": 2.921572910141745e-06, "loss": 1.4267911911010742, "step": 1388 }, { "epoch": 0.427823945829486, "grad_norm": 6.28125, "learning_rate": 2.9212890740483074e-06, "loss": 1.0795435905456543, "step": 1390 }, { "epoch": 0.42843951985226225, "grad_norm": 10.25, "learning_rate": 2.921004742680202e-06, "loss": 1.2007197141647339, "step": 1392 }, { "epoch": 0.4290550938750385, "grad_norm": 15.6875, "learning_rate": 2.9207199161630184e-06, "loss": 1.3095588684082031, "step": 1394 }, { "epoch": 0.4296706678978147, "grad_norm": 77.0, "learning_rate": 2.920434594622565e-06, "loss": 1.3891422748565674, "step": 1396 }, { "epoch": 0.43028624192059095, "grad_norm": 10.5, "learning_rate": 2.9201487781848682e-06, "loss": 1.5573385953903198, "step": 1398 }, { "epoch": 0.4309018159433672, "grad_norm": 15.3125, "learning_rate": 2.9198624669761748e-06, "loss": 1.2296578884124756, "step": 1400 }, { "epoch": 0.4315173899661434, "grad_norm": 21.0, "learning_rate": 2.9195756611229465e-06, "loss": 1.5690207481384277, "step": 1402 }, { "epoch": 0.43213296398891965, "grad_norm": 12.0, "learning_rate": 2.919288360751868e-06, "loss": 1.4062907695770264, "step": 1404 }, { "epoch": 0.4327485380116959, "grad_norm": 3.140625, "learning_rate": 2.9190005659898386e-06, "loss": 1.1973246335983276, "step": 1406 }, { "epoch": 0.43336411203447217, "grad_norm": 11.8125, "learning_rate": 2.918712276963979e-06, "loss": 1.28580904006958, "step": 1408 }, { "epoch": 0.4339796860572484, "grad_norm": 11.5625, "learning_rate": 2.918423493801626e-06, "loss": 1.411057710647583, "step": 1410 }, { "epoch": 0.43459526008002464, "grad_norm": 18.625, "learning_rate": 2.918134216630335e-06, "loss": 1.4428560733795166, "step": 1412 }, { "epoch": 0.43521083410280087, "grad_norm": 5.6875, "learning_rate": 2.9178444455778806e-06, "loss": 1.1413902044296265, "step": 1414 }, { "epoch": 0.4358264081255771, "grad_norm": 23.875, "learning_rate": 2.9175541807722552e-06, "loss": 1.7097017765045166, "step": 1416 }, { "epoch": 0.43644198214835334, "grad_norm": 17.625, "learning_rate": 2.917263422341668e-06, "loss": 1.5849788188934326, "step": 1418 }, { "epoch": 0.43705755617112957, "grad_norm": 11.375, "learning_rate": 2.9169721704145496e-06, "loss": 1.6044025421142578, "step": 1420 }, { "epoch": 0.4376731301939058, "grad_norm": 26.0, "learning_rate": 2.916680425119544e-06, "loss": 1.6424756050109863, "step": 1422 }, { "epoch": 0.43828870421668203, "grad_norm": 13.0625, "learning_rate": 2.9163881865855165e-06, "loss": 1.1550233364105225, "step": 1424 }, { "epoch": 0.43890427823945827, "grad_norm": 19.875, "learning_rate": 2.916095454941549e-06, "loss": 0.9205485582351685, "step": 1426 }, { "epoch": 0.43951985226223456, "grad_norm": 30.5, "learning_rate": 2.915802230316941e-06, "loss": 1.3150010108947754, "step": 1428 }, { "epoch": 0.4401354262850108, "grad_norm": 9.9375, "learning_rate": 2.9155085128412115e-06, "loss": 1.126185655593872, "step": 1430 }, { "epoch": 0.440751000307787, "grad_norm": 23.5, "learning_rate": 2.9152143026440945e-06, "loss": 1.3212380409240723, "step": 1432 }, { "epoch": 0.44136657433056325, "grad_norm": 19.25, "learning_rate": 2.9149195998555434e-06, "loss": 1.1305532455444336, "step": 1434 }, { "epoch": 0.4419821483533395, "grad_norm": 172.0, "learning_rate": 2.914624404605729e-06, "loss": 1.5058670043945312, "step": 1436 }, { "epoch": 0.4425977223761157, "grad_norm": 13.5, "learning_rate": 2.914328717025039e-06, "loss": 1.3187425136566162, "step": 1438 }, { "epoch": 0.44321329639889195, "grad_norm": 17.25, "learning_rate": 2.9140325372440786e-06, "loss": 1.4263267517089844, "step": 1440 }, { "epoch": 0.4438288704216682, "grad_norm": 9.125, "learning_rate": 2.913735865393672e-06, "loss": 1.4333791732788086, "step": 1442 }, { "epoch": 0.4444444444444444, "grad_norm": 7.25, "learning_rate": 2.9134387016048578e-06, "loss": 1.5287044048309326, "step": 1444 }, { "epoch": 0.4450600184672207, "grad_norm": 8.3125, "learning_rate": 2.9131410460088953e-06, "loss": 1.2323602437973022, "step": 1446 }, { "epoch": 0.44567559248999694, "grad_norm": 7.96875, "learning_rate": 2.912842898737258e-06, "loss": 1.1562038660049438, "step": 1448 }, { "epoch": 0.4462911665127732, "grad_norm": 24.25, "learning_rate": 2.9125442599216385e-06, "loss": 1.52219820022583, "step": 1450 }, { "epoch": 0.4469067405355494, "grad_norm": 15.0, "learning_rate": 2.912245129693946e-06, "loss": 1.4202659130096436, "step": 1452 }, { "epoch": 0.44752231455832564, "grad_norm": 6.53125, "learning_rate": 2.9119455081863065e-06, "loss": 1.0215532779693604, "step": 1454 }, { "epoch": 0.4481378885811019, "grad_norm": 15.0625, "learning_rate": 2.9116453955310632e-06, "loss": 1.053874135017395, "step": 1456 }, { "epoch": 0.4487534626038781, "grad_norm": 16.625, "learning_rate": 2.9113447918607764e-06, "loss": 1.2909146547317505, "step": 1458 }, { "epoch": 0.44936903662665434, "grad_norm": 35.0, "learning_rate": 2.9110436973082223e-06, "loss": 1.6827542781829834, "step": 1460 }, { "epoch": 0.44998461064943057, "grad_norm": 38.25, "learning_rate": 2.9107421120063953e-06, "loss": 1.359308123588562, "step": 1462 }, { "epoch": 0.45060018467220686, "grad_norm": 13.9375, "learning_rate": 2.9104400360885066e-06, "loss": 1.2988219261169434, "step": 1464 }, { "epoch": 0.4512157586949831, "grad_norm": 9.0625, "learning_rate": 2.9101374696879824e-06, "loss": 1.3027936220169067, "step": 1466 }, { "epoch": 0.4518313327177593, "grad_norm": 6.40625, "learning_rate": 2.9098344129384667e-06, "loss": 1.2598680257797241, "step": 1468 }, { "epoch": 0.45244690674053556, "grad_norm": 13.375, "learning_rate": 2.90953086597382e-06, "loss": 1.4774788618087769, "step": 1470 }, { "epoch": 0.4530624807633118, "grad_norm": 12.75, "learning_rate": 2.9092268289281206e-06, "loss": 1.5974787473678589, "step": 1472 }, { "epoch": 0.453678054786088, "grad_norm": 32.75, "learning_rate": 2.908922301935661e-06, "loss": 1.4435534477233887, "step": 1474 }, { "epoch": 0.45429362880886426, "grad_norm": 45.5, "learning_rate": 2.9086172851309508e-06, "loss": 1.6630170345306396, "step": 1476 }, { "epoch": 0.4549092028316405, "grad_norm": 16.625, "learning_rate": 2.908311778648717e-06, "loss": 1.562852382659912, "step": 1478 }, { "epoch": 0.4555247768544167, "grad_norm": 36.0, "learning_rate": 2.908005782623902e-06, "loss": 1.4895464181900024, "step": 1480 }, { "epoch": 0.45614035087719296, "grad_norm": 12.375, "learning_rate": 2.907699297191664e-06, "loss": 1.5021754503250122, "step": 1482 }, { "epoch": 0.45675592489996925, "grad_norm": 16.125, "learning_rate": 2.9073923224873787e-06, "loss": 1.359191656112671, "step": 1484 }, { "epoch": 0.4573714989227455, "grad_norm": 17.125, "learning_rate": 2.9070848586466364e-06, "loss": 1.1666719913482666, "step": 1486 }, { "epoch": 0.4579870729455217, "grad_norm": 43.25, "learning_rate": 2.9067769058052452e-06, "loss": 0.9205150604248047, "step": 1488 }, { "epoch": 0.45860264696829794, "grad_norm": 5.40625, "learning_rate": 2.9064684640992278e-06, "loss": 0.7707520127296448, "step": 1490 }, { "epoch": 0.4592182209910742, "grad_norm": 31.0, "learning_rate": 2.906159533664823e-06, "loss": 1.3506240844726562, "step": 1492 }, { "epoch": 0.4598337950138504, "grad_norm": 14.3125, "learning_rate": 2.9058501146384863e-06, "loss": 1.8203885555267334, "step": 1494 }, { "epoch": 0.46044936903662664, "grad_norm": 19.75, "learning_rate": 2.9055402071568873e-06, "loss": 1.492109775543213, "step": 1496 }, { "epoch": 0.4610649430594029, "grad_norm": 17.375, "learning_rate": 2.9052298113569134e-06, "loss": 1.2597657442092896, "step": 1498 }, { "epoch": 0.4616805170821791, "grad_norm": 10.25, "learning_rate": 2.904918927375667e-06, "loss": 0.9372320175170898, "step": 1500 }, { "epoch": 0.4622960911049554, "grad_norm": 19.25, "learning_rate": 2.904607555350466e-06, "loss": 1.623645544052124, "step": 1502 }, { "epoch": 0.46291166512773163, "grad_norm": 11.5625, "learning_rate": 2.9042956954188426e-06, "loss": 1.3719435930252075, "step": 1504 }, { "epoch": 0.46352723915050786, "grad_norm": 17.375, "learning_rate": 2.9039833477185464e-06, "loss": 2.0057315826416016, "step": 1506 }, { "epoch": 0.4641428131732841, "grad_norm": 25.625, "learning_rate": 2.9036705123875417e-06, "loss": 1.4058794975280762, "step": 1508 }, { "epoch": 0.46475838719606033, "grad_norm": 16.75, "learning_rate": 2.9033571895640084e-06, "loss": 1.2668242454528809, "step": 1510 }, { "epoch": 0.46537396121883656, "grad_norm": 16.75, "learning_rate": 2.903043379386342e-06, "loss": 1.5426902770996094, "step": 1512 }, { "epoch": 0.4659895352416128, "grad_norm": 8.3125, "learning_rate": 2.9027290819931513e-06, "loss": 0.8808223605155945, "step": 1514 }, { "epoch": 0.46660510926438903, "grad_norm": 22.125, "learning_rate": 2.9024142975232635e-06, "loss": 0.8790304064750671, "step": 1516 }, { "epoch": 0.46722068328716526, "grad_norm": 20.25, "learning_rate": 2.9020990261157176e-06, "loss": 1.3890122175216675, "step": 1518 }, { "epoch": 0.4678362573099415, "grad_norm": 43.0, "learning_rate": 2.9017832679097717e-06, "loss": 1.4353948831558228, "step": 1520 }, { "epoch": 0.4684518313327178, "grad_norm": 10.375, "learning_rate": 2.9014670230448936e-06, "loss": 1.5785167217254639, "step": 1522 }, { "epoch": 0.469067405355494, "grad_norm": 25.5, "learning_rate": 2.9011502916607713e-06, "loss": 1.5384750366210938, "step": 1524 }, { "epoch": 0.46968297937827025, "grad_norm": 15.1875, "learning_rate": 2.9008330738973046e-06, "loss": 1.5118379592895508, "step": 1526 }, { "epoch": 0.4702985534010465, "grad_norm": 6.84375, "learning_rate": 2.9005153698946093e-06, "loss": 1.3365740776062012, "step": 1528 }, { "epoch": 0.4709141274238227, "grad_norm": 12.5625, "learning_rate": 2.900197179793015e-06, "loss": 0.9900118112564087, "step": 1530 }, { "epoch": 0.47152970144659895, "grad_norm": 11.5, "learning_rate": 2.899878503733067e-06, "loss": 1.3724645376205444, "step": 1532 }, { "epoch": 0.4721452754693752, "grad_norm": 10.9375, "learning_rate": 2.899559341855525e-06, "loss": 1.733636498451233, "step": 1534 }, { "epoch": 0.4727608494921514, "grad_norm": 17.0, "learning_rate": 2.8992396943013624e-06, "loss": 1.7250124216079712, "step": 1536 }, { "epoch": 0.47337642351492765, "grad_norm": 14.375, "learning_rate": 2.898919561211769e-06, "loss": 1.8821617364883423, "step": 1538 }, { "epoch": 0.47399199753770394, "grad_norm": 13.6875, "learning_rate": 2.898598942728147e-06, "loss": 1.5046849250793457, "step": 1540 }, { "epoch": 0.47460757156048017, "grad_norm": 18.5, "learning_rate": 2.8982778389921146e-06, "loss": 1.4122357368469238, "step": 1542 }, { "epoch": 0.4752231455832564, "grad_norm": 9.375, "learning_rate": 2.8979562501455037e-06, "loss": 1.287928819656372, "step": 1544 }, { "epoch": 0.47583871960603263, "grad_norm": 20.25, "learning_rate": 2.8976341763303605e-06, "loss": 1.093557357788086, "step": 1546 }, { "epoch": 0.47645429362880887, "grad_norm": 15.5, "learning_rate": 2.8973116176889447e-06, "loss": 1.4339439868927002, "step": 1548 }, { "epoch": 0.4770698676515851, "grad_norm": 13.4375, "learning_rate": 2.896988574363731e-06, "loss": 1.27329683303833, "step": 1550 }, { "epoch": 0.47768544167436133, "grad_norm": 7.84375, "learning_rate": 2.8966650464974084e-06, "loss": 1.039716124534607, "step": 1552 }, { "epoch": 0.47830101569713757, "grad_norm": 13.9375, "learning_rate": 2.89634103423288e-06, "loss": 1.5718767642974854, "step": 1554 }, { "epoch": 0.4789165897199138, "grad_norm": 17.75, "learning_rate": 2.896016537713261e-06, "loss": 1.633246660232544, "step": 1556 }, { "epoch": 0.47953216374269003, "grad_norm": 13.1875, "learning_rate": 2.895691557081883e-06, "loss": 1.159442663192749, "step": 1558 }, { "epoch": 0.4801477377654663, "grad_norm": 13.8125, "learning_rate": 2.89536609248229e-06, "loss": 1.199776530265808, "step": 1560 }, { "epoch": 0.48076331178824255, "grad_norm": 11.5625, "learning_rate": 2.89504014405824e-06, "loss": 1.600058674812317, "step": 1562 }, { "epoch": 0.4813788858110188, "grad_norm": 16.0, "learning_rate": 2.8947137119537048e-06, "loss": 1.3177971839904785, "step": 1564 }, { "epoch": 0.481994459833795, "grad_norm": 4.40625, "learning_rate": 2.89438679631287e-06, "loss": 1.278930902481079, "step": 1566 }, { "epoch": 0.48261003385657125, "grad_norm": 16.125, "learning_rate": 2.894059397280134e-06, "loss": 1.3205333948135376, "step": 1568 }, { "epoch": 0.4832256078793475, "grad_norm": 12.5, "learning_rate": 2.89373151500011e-06, "loss": 1.354341983795166, "step": 1570 }, { "epoch": 0.4838411819021237, "grad_norm": 27.0, "learning_rate": 2.8934031496176247e-06, "loss": 1.4841325283050537, "step": 1572 }, { "epoch": 0.48445675592489995, "grad_norm": 14.1875, "learning_rate": 2.893074301277715e-06, "loss": 1.2359914779663086, "step": 1574 }, { "epoch": 0.4850723299476762, "grad_norm": 5.90625, "learning_rate": 2.8927449701256367e-06, "loss": 1.1367638111114502, "step": 1576 }, { "epoch": 0.4856879039704525, "grad_norm": 14.375, "learning_rate": 2.892415156306853e-06, "loss": 1.4632461071014404, "step": 1578 }, { "epoch": 0.4863034779932287, "grad_norm": 3.46875, "learning_rate": 2.8920848599670444e-06, "loss": 1.1607825756072998, "step": 1580 }, { "epoch": 0.48691905201600494, "grad_norm": 13.0, "learning_rate": 2.8917540812521034e-06, "loss": 1.3275295495986938, "step": 1582 }, { "epoch": 0.48753462603878117, "grad_norm": 13.125, "learning_rate": 2.891422820308135e-06, "loss": 1.3891751766204834, "step": 1584 }, { "epoch": 0.4881502000615574, "grad_norm": 9.1875, "learning_rate": 2.8910910772814575e-06, "loss": 1.415259838104248, "step": 1586 }, { "epoch": 0.48876577408433364, "grad_norm": 15.9375, "learning_rate": 2.890758852318602e-06, "loss": 1.2171683311462402, "step": 1588 }, { "epoch": 0.48938134810710987, "grad_norm": 12.4375, "learning_rate": 2.890426145566313e-06, "loss": 1.3115642070770264, "step": 1590 }, { "epoch": 0.4899969221298861, "grad_norm": 16.25, "learning_rate": 2.8900929571715465e-06, "loss": 1.3981764316558838, "step": 1592 }, { "epoch": 0.49061249615266234, "grad_norm": 11.6875, "learning_rate": 2.8897592872814738e-06, "loss": 1.0410218238830566, "step": 1594 }, { "epoch": 0.49122807017543857, "grad_norm": 46.0, "learning_rate": 2.8894251360434756e-06, "loss": 0.5647349953651428, "step": 1596 }, { "epoch": 0.49184364419821486, "grad_norm": 30.0, "learning_rate": 2.8890905036051487e-06, "loss": 1.3878058195114136, "step": 1598 }, { "epoch": 0.4924592182209911, "grad_norm": 44.75, "learning_rate": 2.888755390114299e-06, "loss": 1.670772910118103, "step": 1600 }, { "epoch": 0.4930747922437673, "grad_norm": 27.5, "learning_rate": 2.8884197957189477e-06, "loss": 1.9882588386535645, "step": 1602 }, { "epoch": 0.49369036626654356, "grad_norm": 15.1875, "learning_rate": 2.888083720567326e-06, "loss": 1.4575190544128418, "step": 1604 }, { "epoch": 0.4943059402893198, "grad_norm": 23.125, "learning_rate": 2.8877471648078796e-06, "loss": 1.4484859704971313, "step": 1606 }, { "epoch": 0.494921514312096, "grad_norm": 9.3125, "learning_rate": 2.887410128589266e-06, "loss": 1.4162225723266602, "step": 1608 }, { "epoch": 0.49553708833487226, "grad_norm": 23.75, "learning_rate": 2.887072612060353e-06, "loss": 1.5183629989624023, "step": 1610 }, { "epoch": 0.4961526623576485, "grad_norm": 13.6875, "learning_rate": 2.8867346153702226e-06, "loss": 1.486363172531128, "step": 1612 }, { "epoch": 0.4967682363804247, "grad_norm": 6.65625, "learning_rate": 2.886396138668169e-06, "loss": 1.0761302709579468, "step": 1614 }, { "epoch": 0.497383810403201, "grad_norm": 9.125, "learning_rate": 2.8860571821036973e-06, "loss": 1.2252916097640991, "step": 1616 }, { "epoch": 0.49799938442597724, "grad_norm": 16.375, "learning_rate": 2.885717745826525e-06, "loss": 1.1279650926589966, "step": 1618 }, { "epoch": 0.4986149584487535, "grad_norm": 19.625, "learning_rate": 2.8853778299865823e-06, "loss": 1.4863353967666626, "step": 1620 }, { "epoch": 0.4992305324715297, "grad_norm": 8.375, "learning_rate": 2.8850374347340086e-06, "loss": 1.3660411834716797, "step": 1622 }, { "epoch": 0.49984610649430594, "grad_norm": 7.78125, "learning_rate": 2.884696560219158e-06, "loss": 1.0915755033493042, "step": 1624 }, { "epoch": 0.5004616805170822, "grad_norm": 10.6875, "learning_rate": 2.8843552065925955e-06, "loss": 1.3456721305847168, "step": 1626 }, { "epoch": 0.5010772545398584, "grad_norm": 18.125, "learning_rate": 2.884013374005097e-06, "loss": 1.553337574005127, "step": 1628 }, { "epoch": 0.5016928285626346, "grad_norm": 127.5, "learning_rate": 2.88367106260765e-06, "loss": 1.7117575407028198, "step": 1630 }, { "epoch": 0.5023084025854109, "grad_norm": 8.125, "learning_rate": 2.8833282725514537e-06, "loss": 0.8663567304611206, "step": 1632 }, { "epoch": 0.5029239766081871, "grad_norm": 19.25, "learning_rate": 2.88298500398792e-06, "loss": 1.617782473564148, "step": 1634 }, { "epoch": 0.5035395506309633, "grad_norm": 26.875, "learning_rate": 2.8826412570686696e-06, "loss": 1.2509739398956299, "step": 1636 }, { "epoch": 0.5041551246537396, "grad_norm": 22.625, "learning_rate": 2.8822970319455376e-06, "loss": 1.4614055156707764, "step": 1638 }, { "epoch": 0.5047706986765158, "grad_norm": 20.375, "learning_rate": 2.881952328770567e-06, "loss": 1.1225662231445312, "step": 1640 }, { "epoch": 0.505386272699292, "grad_norm": 13.25, "learning_rate": 2.881607147696014e-06, "loss": 1.4158776998519897, "step": 1642 }, { "epoch": 0.5060018467220684, "grad_norm": 28.875, "learning_rate": 2.881261488874346e-06, "loss": 1.7590508460998535, "step": 1644 }, { "epoch": 0.5066174207448446, "grad_norm": 10.3125, "learning_rate": 2.8809153524582406e-06, "loss": 0.5588256120681763, "step": 1646 }, { "epoch": 0.5072329947676208, "grad_norm": 11.6875, "learning_rate": 2.8805687386005873e-06, "loss": 1.0516297817230225, "step": 1648 }, { "epoch": 0.5078485687903971, "grad_norm": 9.5625, "learning_rate": 2.8802216474544842e-06, "loss": 1.1188182830810547, "step": 1650 }, { "epoch": 0.5084641428131733, "grad_norm": 9.4375, "learning_rate": 2.8798740791732435e-06, "loss": 1.3351898193359375, "step": 1652 }, { "epoch": 0.5090797168359495, "grad_norm": 14.9375, "learning_rate": 2.8795260339103864e-06, "loss": 1.552315592765808, "step": 1654 }, { "epoch": 0.5096952908587258, "grad_norm": 8.625, "learning_rate": 2.879177511819643e-06, "loss": 1.0963990688323975, "step": 1656 }, { "epoch": 0.510310864881502, "grad_norm": 15.1875, "learning_rate": 2.878828513054958e-06, "loss": 1.716416358947754, "step": 1658 }, { "epoch": 0.5109264389042782, "grad_norm": 8.1875, "learning_rate": 2.8784790377704833e-06, "loss": 1.1489286422729492, "step": 1660 }, { "epoch": 0.5115420129270545, "grad_norm": 74.5, "learning_rate": 2.8781290861205835e-06, "loss": 0.5450893640518188, "step": 1662 }, { "epoch": 0.5121575869498307, "grad_norm": 13.5625, "learning_rate": 2.8777786582598325e-06, "loss": 1.4440956115722656, "step": 1664 }, { "epoch": 0.512773160972607, "grad_norm": 24.75, "learning_rate": 2.877427754343014e-06, "loss": 1.051446795463562, "step": 1666 }, { "epoch": 0.5133887349953832, "grad_norm": 24.875, "learning_rate": 2.8770763745251223e-06, "loss": 1.3044072389602661, "step": 1668 }, { "epoch": 0.5140043090181594, "grad_norm": 15.0625, "learning_rate": 2.8767245189613643e-06, "loss": 1.0021661520004272, "step": 1670 }, { "epoch": 0.5146198830409356, "grad_norm": 18.375, "learning_rate": 2.876372187807153e-06, "loss": 1.3740590810775757, "step": 1672 }, { "epoch": 0.5152354570637119, "grad_norm": 20.25, "learning_rate": 2.8760193812181143e-06, "loss": 1.4852182865142822, "step": 1674 }, { "epoch": 0.5158510310864881, "grad_norm": 28.875, "learning_rate": 2.875666099350083e-06, "loss": 1.6808966398239136, "step": 1676 }, { "epoch": 0.5164666051092643, "grad_norm": 20.5, "learning_rate": 2.8753123423591046e-06, "loss": 1.5063765048980713, "step": 1678 }, { "epoch": 0.5170821791320406, "grad_norm": 15.75, "learning_rate": 2.8749581104014334e-06, "loss": 1.543807029724121, "step": 1680 }, { "epoch": 0.5176977531548169, "grad_norm": 15.5, "learning_rate": 2.874603403633535e-06, "loss": 1.5597282648086548, "step": 1682 }, { "epoch": 0.5183133271775932, "grad_norm": 14.3125, "learning_rate": 2.874248222212082e-06, "loss": 1.5003395080566406, "step": 1684 }, { "epoch": 0.5189289012003694, "grad_norm": 13.8125, "learning_rate": 2.87389256629396e-06, "loss": 1.4587888717651367, "step": 1686 }, { "epoch": 0.5195444752231456, "grad_norm": 32.5, "learning_rate": 2.873536436036262e-06, "loss": 1.307799220085144, "step": 1688 }, { "epoch": 0.5201600492459219, "grad_norm": 11.25, "learning_rate": 2.873179831596292e-06, "loss": 1.3242509365081787, "step": 1690 }, { "epoch": 0.5207756232686981, "grad_norm": 7.5625, "learning_rate": 2.872822753131561e-06, "loss": 1.322249412536621, "step": 1692 }, { "epoch": 0.5213911972914743, "grad_norm": 10.25, "learning_rate": 2.8724652007997922e-06, "loss": 1.420886754989624, "step": 1694 }, { "epoch": 0.5220067713142506, "grad_norm": 21.625, "learning_rate": 2.8721071747589165e-06, "loss": 1.4671920537948608, "step": 1696 }, { "epoch": 0.5226223453370268, "grad_norm": 15.1875, "learning_rate": 2.8717486751670743e-06, "loss": 1.6293821334838867, "step": 1698 }, { "epoch": 0.523237919359803, "grad_norm": 19.25, "learning_rate": 2.871389702182616e-06, "loss": 1.1151347160339355, "step": 1700 }, { "epoch": 0.5238534933825792, "grad_norm": 11.9375, "learning_rate": 2.871030255964099e-06, "loss": 1.3921657800674438, "step": 1702 }, { "epoch": 0.5244690674053555, "grad_norm": 10.0, "learning_rate": 2.8706703366702926e-06, "loss": 1.331470251083374, "step": 1704 }, { "epoch": 0.5250846414281317, "grad_norm": 7.65625, "learning_rate": 2.870309944460172e-06, "loss": 1.0913875102996826, "step": 1706 }, { "epoch": 0.525700215450908, "grad_norm": 4.71875, "learning_rate": 2.869949079492924e-06, "loss": 1.2790942192077637, "step": 1708 }, { "epoch": 0.5263157894736842, "grad_norm": 30.0, "learning_rate": 2.8695877419279436e-06, "loss": 1.7259317636489868, "step": 1710 }, { "epoch": 0.5269313634964604, "grad_norm": 10.625, "learning_rate": 2.869225931924832e-06, "loss": 1.1891872882843018, "step": 1712 }, { "epoch": 0.5275469375192366, "grad_norm": 9.75, "learning_rate": 2.868863649643403e-06, "loss": 1.1908620595932007, "step": 1714 }, { "epoch": 0.5281625115420129, "grad_norm": 29.0, "learning_rate": 2.8685008952436762e-06, "loss": 1.4761006832122803, "step": 1716 }, { "epoch": 0.5287780855647891, "grad_norm": 6.40625, "learning_rate": 2.8681376688858812e-06, "loss": 1.137245774269104, "step": 1718 }, { "epoch": 0.5293936595875655, "grad_norm": 10.8125, "learning_rate": 2.867773970730455e-06, "loss": 1.0170716047286987, "step": 1720 }, { "epoch": 0.5300092336103417, "grad_norm": 10.1875, "learning_rate": 2.867409800938043e-06, "loss": 1.353341817855835, "step": 1722 }, { "epoch": 0.5306248076331179, "grad_norm": 16.625, "learning_rate": 2.8670451596695006e-06, "loss": 1.406203031539917, "step": 1724 }, { "epoch": 0.5312403816558942, "grad_norm": 8.625, "learning_rate": 2.8666800470858897e-06, "loss": 1.1297376155853271, "step": 1726 }, { "epoch": 0.5318559556786704, "grad_norm": 11.125, "learning_rate": 2.866314463348481e-06, "loss": 1.3886916637420654, "step": 1728 }, { "epoch": 0.5324715297014466, "grad_norm": 13.6875, "learning_rate": 2.865948408618753e-06, "loss": 1.3519071340560913, "step": 1730 }, { "epoch": 0.5330871037242229, "grad_norm": 18.25, "learning_rate": 2.8655818830583925e-06, "loss": 1.651322603225708, "step": 1732 }, { "epoch": 0.5337026777469991, "grad_norm": 16.75, "learning_rate": 2.865214886829295e-06, "loss": 1.5197107791900635, "step": 1734 }, { "epoch": 0.5343182517697753, "grad_norm": 29.875, "learning_rate": 2.864847420093562e-06, "loss": 1.8645293712615967, "step": 1736 }, { "epoch": 0.5349338257925516, "grad_norm": 19.625, "learning_rate": 2.8644794830135047e-06, "loss": 1.5438506603240967, "step": 1738 }, { "epoch": 0.5355493998153278, "grad_norm": 6.875, "learning_rate": 2.864111075751641e-06, "loss": 1.1475858688354492, "step": 1740 }, { "epoch": 0.536164973838104, "grad_norm": 21.0, "learning_rate": 2.8637421984706977e-06, "loss": 1.403162956237793, "step": 1742 }, { "epoch": 0.5367805478608803, "grad_norm": 7.09375, "learning_rate": 2.8633728513336076e-06, "loss": 1.1231191158294678, "step": 1744 }, { "epoch": 0.5373961218836565, "grad_norm": 11.625, "learning_rate": 2.863003034503511e-06, "loss": 1.4450881481170654, "step": 1746 }, { "epoch": 0.5380116959064327, "grad_norm": 10.0, "learning_rate": 2.8626327481437573e-06, "loss": 1.3785327672958374, "step": 1748 }, { "epoch": 0.538627269929209, "grad_norm": 13.375, "learning_rate": 2.862261992417903e-06, "loss": 1.3406766653060913, "step": 1750 }, { "epoch": 0.5392428439519852, "grad_norm": 9.5625, "learning_rate": 2.8618907674897106e-06, "loss": 1.3030524253845215, "step": 1752 }, { "epoch": 0.5398584179747614, "grad_norm": 7.84375, "learning_rate": 2.86151907352315e-06, "loss": 1.221339225769043, "step": 1754 }, { "epoch": 0.5404739919975377, "grad_norm": 26.0, "learning_rate": 2.8611469106824e-06, "loss": 1.056274175643921, "step": 1756 }, { "epoch": 0.541089566020314, "grad_norm": 19.75, "learning_rate": 2.8607742791318442e-06, "loss": 1.3449528217315674, "step": 1758 }, { "epoch": 0.5417051400430902, "grad_norm": 6.34375, "learning_rate": 2.8604011790360755e-06, "loss": 1.1858000755310059, "step": 1760 }, { "epoch": 0.5423207140658665, "grad_norm": 86.0, "learning_rate": 2.860027610559892e-06, "loss": 1.5040273666381836, "step": 1762 }, { "epoch": 0.5429362880886427, "grad_norm": 31.5, "learning_rate": 2.8596535738682998e-06, "loss": 1.4095792770385742, "step": 1764 }, { "epoch": 0.5435518621114189, "grad_norm": 55.75, "learning_rate": 2.8592790691265104e-06, "loss": 1.7419880628585815, "step": 1766 }, { "epoch": 0.5441674361341952, "grad_norm": 15.3125, "learning_rate": 2.858904096499944e-06, "loss": 1.3096810579299927, "step": 1768 }, { "epoch": 0.5447830101569714, "grad_norm": 11.8125, "learning_rate": 2.858528656154226e-06, "loss": 1.6850093603134155, "step": 1770 }, { "epoch": 0.5453985841797476, "grad_norm": 10.375, "learning_rate": 2.8581527482551887e-06, "loss": 1.2738856077194214, "step": 1772 }, { "epoch": 0.5460141582025239, "grad_norm": 30.5, "learning_rate": 2.8577763729688717e-06, "loss": 1.3164045810699463, "step": 1774 }, { "epoch": 0.5466297322253001, "grad_norm": 19.375, "learning_rate": 2.857399530461519e-06, "loss": 1.4970653057098389, "step": 1776 }, { "epoch": 0.5472453062480763, "grad_norm": 13.5, "learning_rate": 2.857022220899584e-06, "loss": 1.4046273231506348, "step": 1778 }, { "epoch": 0.5478608802708526, "grad_norm": 8.5, "learning_rate": 2.856644444449724e-06, "loss": 1.539980411529541, "step": 1780 }, { "epoch": 0.5484764542936288, "grad_norm": 23.25, "learning_rate": 2.8562662012788028e-06, "loss": 1.7872920036315918, "step": 1782 }, { "epoch": 0.549092028316405, "grad_norm": 49.5, "learning_rate": 2.855887491553892e-06, "loss": 1.6001696586608887, "step": 1784 }, { "epoch": 0.5497076023391813, "grad_norm": 13.1875, "learning_rate": 2.8555083154422666e-06, "loss": 1.1596770286560059, "step": 1786 }, { "epoch": 0.5503231763619575, "grad_norm": 36.75, "learning_rate": 2.8551286731114104e-06, "loss": 1.4008393287658691, "step": 1788 }, { "epoch": 0.5509387503847337, "grad_norm": 25.25, "learning_rate": 2.8547485647290116e-06, "loss": 1.474470853805542, "step": 1790 }, { "epoch": 0.55155432440751, "grad_norm": 8.625, "learning_rate": 2.8543679904629644e-06, "loss": 1.4998549222946167, "step": 1792 }, { "epoch": 0.5521698984302862, "grad_norm": 11.4375, "learning_rate": 2.8539869504813684e-06, "loss": 1.033094048500061, "step": 1794 }, { "epoch": 0.5527854724530625, "grad_norm": 8.6875, "learning_rate": 2.8536054449525304e-06, "loss": 1.3654334545135498, "step": 1796 }, { "epoch": 0.5534010464758388, "grad_norm": 19.125, "learning_rate": 2.853223474044961e-06, "loss": 1.45066499710083, "step": 1798 }, { "epoch": 0.554016620498615, "grad_norm": 12.1875, "learning_rate": 2.852841037927377e-06, "loss": 1.2047758102416992, "step": 1800 }, { "epoch": 0.5546321945213912, "grad_norm": 14.625, "learning_rate": 2.8524581367687023e-06, "loss": 1.1410934925079346, "step": 1802 }, { "epoch": 0.5552477685441675, "grad_norm": 24.375, "learning_rate": 2.8520747707380634e-06, "loss": 1.5290203094482422, "step": 1804 }, { "epoch": 0.5558633425669437, "grad_norm": 20.5, "learning_rate": 2.8516909400047937e-06, "loss": 1.005765438079834, "step": 1806 }, { "epoch": 0.5564789165897199, "grad_norm": 22.25, "learning_rate": 2.851306644738432e-06, "loss": 1.7779279947280884, "step": 1808 }, { "epoch": 0.5570944906124962, "grad_norm": 8.0, "learning_rate": 2.850921885108722e-06, "loss": 1.135983943939209, "step": 1810 }, { "epoch": 0.5577100646352724, "grad_norm": 20.375, "learning_rate": 2.850536661285612e-06, "loss": 1.7957868576049805, "step": 1812 }, { "epoch": 0.5583256386580486, "grad_norm": 27.875, "learning_rate": 2.8501509734392566e-06, "loss": 1.8340908288955688, "step": 1814 }, { "epoch": 0.5589412126808249, "grad_norm": 15.1875, "learning_rate": 2.8497648217400137e-06, "loss": 1.5594115257263184, "step": 1816 }, { "epoch": 0.5595567867036011, "grad_norm": 15.4375, "learning_rate": 2.849378206358447e-06, "loss": 1.1051418781280518, "step": 1818 }, { "epoch": 0.5601723607263773, "grad_norm": 12.5, "learning_rate": 2.8489911274653263e-06, "loss": 1.3022823333740234, "step": 1820 }, { "epoch": 0.5607879347491536, "grad_norm": 33.0, "learning_rate": 2.848603585231623e-06, "loss": 1.4590941667556763, "step": 1822 }, { "epoch": 0.5614035087719298, "grad_norm": 6.75, "learning_rate": 2.848215579828516e-06, "loss": 1.4862349033355713, "step": 1824 }, { "epoch": 0.562019082794706, "grad_norm": 15.3125, "learning_rate": 2.8478271114273873e-06, "loss": 1.1151635646820068, "step": 1826 }, { "epoch": 0.5626346568174823, "grad_norm": 24.375, "learning_rate": 2.8474381801998244e-06, "loss": 1.2278783321380615, "step": 1828 }, { "epoch": 0.5632502308402585, "grad_norm": 23.375, "learning_rate": 2.847048786317618e-06, "loss": 1.7876145839691162, "step": 1830 }, { "epoch": 0.5638658048630347, "grad_norm": 17.0, "learning_rate": 2.846658929952764e-06, "loss": 1.65370512008667, "step": 1832 }, { "epoch": 0.5644813788858111, "grad_norm": 12.6875, "learning_rate": 2.8462686112774625e-06, "loss": 1.4286746978759766, "step": 1834 }, { "epoch": 0.5650969529085873, "grad_norm": 9.5, "learning_rate": 2.845877830464118e-06, "loss": 1.5145810842514038, "step": 1836 }, { "epoch": 0.5657125269313635, "grad_norm": 20.875, "learning_rate": 2.845486587685338e-06, "loss": 1.6489734649658203, "step": 1838 }, { "epoch": 0.5663281009541398, "grad_norm": 11.9375, "learning_rate": 2.8450948831139355e-06, "loss": 1.3738142251968384, "step": 1840 }, { "epoch": 0.566943674976916, "grad_norm": 14.5625, "learning_rate": 2.8447027169229277e-06, "loss": 1.5936473608016968, "step": 1842 }, { "epoch": 0.5675592489996922, "grad_norm": 74.0, "learning_rate": 2.8443100892855328e-06, "loss": 1.0497071743011475, "step": 1844 }, { "epoch": 0.5681748230224685, "grad_norm": 7.65625, "learning_rate": 2.843917000375177e-06, "loss": 1.1069320440292358, "step": 1846 }, { "epoch": 0.5687903970452447, "grad_norm": 24.625, "learning_rate": 2.843523450365486e-06, "loss": 1.5907843112945557, "step": 1848 }, { "epoch": 0.5694059710680209, "grad_norm": 9.0, "learning_rate": 2.8431294394302937e-06, "loss": 1.447683334350586, "step": 1850 }, { "epoch": 0.5700215450907972, "grad_norm": 19.375, "learning_rate": 2.842734967743633e-06, "loss": 1.3191251754760742, "step": 1852 }, { "epoch": 0.5706371191135734, "grad_norm": 13.9375, "learning_rate": 2.8423400354797437e-06, "loss": 1.2646541595458984, "step": 1854 }, { "epoch": 0.5712526931363496, "grad_norm": 25.0, "learning_rate": 2.841944642813067e-06, "loss": 0.9847139120101929, "step": 1856 }, { "epoch": 0.5718682671591259, "grad_norm": 24.625, "learning_rate": 2.84154878991825e-06, "loss": 1.350081443786621, "step": 1858 }, { "epoch": 0.5724838411819021, "grad_norm": 79.5, "learning_rate": 2.841152476970139e-06, "loss": 1.20438551902771, "step": 1860 }, { "epoch": 0.5730994152046783, "grad_norm": 10.4375, "learning_rate": 2.8407557041437875e-06, "loss": 1.325690507888794, "step": 1862 }, { "epoch": 0.5737149892274546, "grad_norm": 19.875, "learning_rate": 2.84035847161445e-06, "loss": 1.2209234237670898, "step": 1864 }, { "epoch": 0.5743305632502308, "grad_norm": 7.03125, "learning_rate": 2.8399607795575845e-06, "loss": 1.2860186100006104, "step": 1866 }, { "epoch": 0.574946137273007, "grad_norm": 15.0625, "learning_rate": 2.8395626281488528e-06, "loss": 1.4151506423950195, "step": 1868 }, { "epoch": 0.5755617112957833, "grad_norm": 8.1875, "learning_rate": 2.8391640175641177e-06, "loss": 1.2648940086364746, "step": 1870 }, { "epoch": 0.5761772853185596, "grad_norm": 26.75, "learning_rate": 2.838764947979447e-06, "loss": 0.8969897627830505, "step": 1872 }, { "epoch": 0.5767928593413358, "grad_norm": 17.375, "learning_rate": 2.838365419571109e-06, "loss": 1.4495514631271362, "step": 1874 }, { "epoch": 0.5774084333641121, "grad_norm": 4.0625, "learning_rate": 2.8379654325155772e-06, "loss": 1.1686997413635254, "step": 1876 }, { "epoch": 0.5780240073868883, "grad_norm": 16.625, "learning_rate": 2.837564986989525e-06, "loss": 1.185441255569458, "step": 1878 }, { "epoch": 0.5786395814096645, "grad_norm": 10.0, "learning_rate": 2.8371640831698305e-06, "loss": 1.539408564567566, "step": 1880 }, { "epoch": 0.5792551554324408, "grad_norm": 16.375, "learning_rate": 2.8367627212335734e-06, "loss": 1.452717661857605, "step": 1882 }, { "epoch": 0.579870729455217, "grad_norm": 20.0, "learning_rate": 2.8363609013580353e-06, "loss": 1.2913964986801147, "step": 1884 }, { "epoch": 0.5804863034779932, "grad_norm": 7.125, "learning_rate": 2.8359586237207013e-06, "loss": 1.2374382019042969, "step": 1886 }, { "epoch": 0.5811018775007695, "grad_norm": 26.625, "learning_rate": 2.8355558884992565e-06, "loss": 1.564466953277588, "step": 1888 }, { "epoch": 0.5817174515235457, "grad_norm": 18.25, "learning_rate": 2.8351526958715914e-06, "loss": 1.572069525718689, "step": 1890 }, { "epoch": 0.5823330255463219, "grad_norm": 41.5, "learning_rate": 2.834749046015794e-06, "loss": 1.0248215198516846, "step": 1892 }, { "epoch": 0.5829485995690982, "grad_norm": 6.03125, "learning_rate": 2.8343449391101594e-06, "loss": 1.573186993598938, "step": 1894 }, { "epoch": 0.5835641735918744, "grad_norm": 26.125, "learning_rate": 2.8339403753331814e-06, "loss": 1.384692907333374, "step": 1896 }, { "epoch": 0.5841797476146506, "grad_norm": 15.6875, "learning_rate": 2.833535354863556e-06, "loss": 1.307197093963623, "step": 1898 }, { "epoch": 0.5847953216374269, "grad_norm": 5.46875, "learning_rate": 2.8331298778801806e-06, "loss": 1.4879049062728882, "step": 1900 }, { "epoch": 0.5854108956602031, "grad_norm": 20.5, "learning_rate": 2.8327239445621555e-06, "loss": 1.6862337589263916, "step": 1902 }, { "epoch": 0.5860264696829793, "grad_norm": 24.125, "learning_rate": 2.8323175550887824e-06, "loss": 1.6703479290008545, "step": 1904 }, { "epoch": 0.5866420437057556, "grad_norm": 15.1875, "learning_rate": 2.831910709639563e-06, "loss": 1.492964744567871, "step": 1906 }, { "epoch": 0.5872576177285319, "grad_norm": 13.1875, "learning_rate": 2.8315034083942028e-06, "loss": 1.3600826263427734, "step": 1908 }, { "epoch": 0.5878731917513081, "grad_norm": 8.0625, "learning_rate": 2.8310956515326053e-06, "loss": 1.158477544784546, "step": 1910 }, { "epoch": 0.5884887657740844, "grad_norm": 14.6875, "learning_rate": 2.8306874392348786e-06, "loss": 1.2859324216842651, "step": 1912 }, { "epoch": 0.5891043397968606, "grad_norm": 9.5, "learning_rate": 2.8302787716813304e-06, "loss": 1.4909124374389648, "step": 1914 }, { "epoch": 0.5897199138196368, "grad_norm": 16.125, "learning_rate": 2.8298696490524687e-06, "loss": 1.2859081029891968, "step": 1916 }, { "epoch": 0.5903354878424131, "grad_norm": 10.25, "learning_rate": 2.8294600715290046e-06, "loss": 1.3993711471557617, "step": 1918 }, { "epoch": 0.5909510618651893, "grad_norm": 20.375, "learning_rate": 2.8290500392918485e-06, "loss": 0.9705616235733032, "step": 1920 }, { "epoch": 0.5915666358879655, "grad_norm": 12.1875, "learning_rate": 2.8286395525221118e-06, "loss": 1.364678144454956, "step": 1922 }, { "epoch": 0.5921822099107418, "grad_norm": 40.75, "learning_rate": 2.8282286114011074e-06, "loss": 1.29304838180542, "step": 1924 }, { "epoch": 0.592797783933518, "grad_norm": 7.375, "learning_rate": 2.8278172161103485e-06, "loss": 1.2006807327270508, "step": 1926 }, { "epoch": 0.5934133579562942, "grad_norm": 9.75, "learning_rate": 2.8274053668315483e-06, "loss": 1.387573480606079, "step": 1928 }, { "epoch": 0.5940289319790705, "grad_norm": 7.15625, "learning_rate": 2.8269930637466216e-06, "loss": 1.365392804145813, "step": 1930 }, { "epoch": 0.5946445060018467, "grad_norm": 11.875, "learning_rate": 2.8265803070376824e-06, "loss": 1.5575824975967407, "step": 1932 }, { "epoch": 0.5952600800246229, "grad_norm": 20.5, "learning_rate": 2.826167096887047e-06, "loss": 1.1728448867797852, "step": 1934 }, { "epoch": 0.5958756540473992, "grad_norm": 11.8125, "learning_rate": 2.8257534334772303e-06, "loss": 1.4417364597320557, "step": 1936 }, { "epoch": 0.5964912280701754, "grad_norm": 25.125, "learning_rate": 2.8253393169909474e-06, "loss": 1.6214367151260376, "step": 1938 }, { "epoch": 0.5971068020929516, "grad_norm": 38.5, "learning_rate": 2.824924747611115e-06, "loss": 1.4998779296875, "step": 1940 }, { "epoch": 0.5977223761157279, "grad_norm": 7.90625, "learning_rate": 2.824509725520848e-06, "loss": 1.2933369874954224, "step": 1942 }, { "epoch": 0.5983379501385041, "grad_norm": 11.1875, "learning_rate": 2.8240942509034626e-06, "loss": 1.4675788879394531, "step": 1944 }, { "epoch": 0.5989535241612804, "grad_norm": 9.9375, "learning_rate": 2.823678323942474e-06, "loss": 1.1986143589019775, "step": 1946 }, { "epoch": 0.5995690981840567, "grad_norm": 8.4375, "learning_rate": 2.8232619448215984e-06, "loss": 1.014643669128418, "step": 1948 }, { "epoch": 0.6001846722068329, "grad_norm": 14.375, "learning_rate": 2.82284511372475e-06, "loss": 1.5362460613250732, "step": 1950 }, { "epoch": 0.6008002462296091, "grad_norm": 10.0625, "learning_rate": 2.822427830836044e-06, "loss": 1.085378646850586, "step": 1952 }, { "epoch": 0.6014158202523854, "grad_norm": 70.0, "learning_rate": 2.8220100963397945e-06, "loss": 1.327484369277954, "step": 1954 }, { "epoch": 0.6020313942751616, "grad_norm": 14.625, "learning_rate": 2.821591910420516e-06, "loss": 1.308484435081482, "step": 1956 }, { "epoch": 0.6026469682979378, "grad_norm": 7.6875, "learning_rate": 2.821173273262921e-06, "loss": 1.0947763919830322, "step": 1958 }, { "epoch": 0.6032625423207141, "grad_norm": 23.625, "learning_rate": 2.8207541850519226e-06, "loss": 1.7331056594848633, "step": 1960 }, { "epoch": 0.6038781163434903, "grad_norm": 11.6875, "learning_rate": 2.8203346459726315e-06, "loss": 1.389496088027954, "step": 1962 }, { "epoch": 0.6044936903662665, "grad_norm": 22.625, "learning_rate": 2.81991465621036e-06, "loss": 1.372626543045044, "step": 1964 }, { "epoch": 0.6051092643890428, "grad_norm": 18.25, "learning_rate": 2.8194942159506163e-06, "loss": 1.157492995262146, "step": 1966 }, { "epoch": 0.605724838411819, "grad_norm": 22.0, "learning_rate": 2.819073325379111e-06, "loss": 0.7561789155006409, "step": 1968 }, { "epoch": 0.6063404124345952, "grad_norm": 25.25, "learning_rate": 2.8186519846817515e-06, "loss": 1.4574605226516724, "step": 1970 }, { "epoch": 0.6069559864573715, "grad_norm": 10.3125, "learning_rate": 2.818230194044644e-06, "loss": 1.4265029430389404, "step": 1972 }, { "epoch": 0.6075715604801477, "grad_norm": 6.96875, "learning_rate": 2.817807953654094e-06, "loss": 0.9847813844680786, "step": 1974 }, { "epoch": 0.6081871345029239, "grad_norm": 8.8125, "learning_rate": 2.817385263696606e-06, "loss": 1.3704838752746582, "step": 1976 }, { "epoch": 0.6088027085257002, "grad_norm": 29.125, "learning_rate": 2.816962124358883e-06, "loss": 0.8931136727333069, "step": 1978 }, { "epoch": 0.6094182825484764, "grad_norm": 33.25, "learning_rate": 2.8165385358278245e-06, "loss": 1.4192783832550049, "step": 1980 }, { "epoch": 0.6100338565712526, "grad_norm": 16.25, "learning_rate": 2.8161144982905313e-06, "loss": 1.2977051734924316, "step": 1982 }, { "epoch": 0.610649430594029, "grad_norm": 18.125, "learning_rate": 2.8156900119343013e-06, "loss": 1.8155004978179932, "step": 1984 }, { "epoch": 0.6112650046168052, "grad_norm": 21.125, "learning_rate": 2.81526507694663e-06, "loss": 1.7881275415420532, "step": 1986 }, { "epoch": 0.6118805786395815, "grad_norm": 14.0, "learning_rate": 2.8148396935152125e-06, "loss": 1.238133192062378, "step": 1988 }, { "epoch": 0.6124961526623577, "grad_norm": 33.75, "learning_rate": 2.81441386182794e-06, "loss": 1.4487230777740479, "step": 1990 }, { "epoch": 0.6131117266851339, "grad_norm": 7.09375, "learning_rate": 2.813987582072904e-06, "loss": 1.2874596118927002, "step": 1992 }, { "epoch": 0.6137273007079102, "grad_norm": 11.4375, "learning_rate": 2.813560854438392e-06, "loss": 1.3983826637268066, "step": 1994 }, { "epoch": 0.6143428747306864, "grad_norm": 22.75, "learning_rate": 2.8131336791128914e-06, "loss": 1.0365115404129028, "step": 1996 }, { "epoch": 0.6149584487534626, "grad_norm": 38.0, "learning_rate": 2.8127060562850835e-06, "loss": 1.5444605350494385, "step": 1998 }, { "epoch": 0.6155740227762388, "grad_norm": 15.8125, "learning_rate": 2.812277986143852e-06, "loss": 1.8046175241470337, "step": 2000 }, { "epoch": 0.6161895967990151, "grad_norm": 25.125, "learning_rate": 2.8118494688782747e-06, "loss": 1.093684196472168, "step": 2002 }, { "epoch": 0.6168051708217913, "grad_norm": 15.25, "learning_rate": 2.8114205046776295e-06, "loss": 1.3421696424484253, "step": 2004 }, { "epoch": 0.6174207448445675, "grad_norm": 13.3125, "learning_rate": 2.810991093731389e-06, "loss": 1.4425666332244873, "step": 2006 }, { "epoch": 0.6180363188673438, "grad_norm": 14.0625, "learning_rate": 2.810561236229225e-06, "loss": 1.2806977033615112, "step": 2008 }, { "epoch": 0.61865189289012, "grad_norm": 6.9375, "learning_rate": 2.8101309323610063e-06, "loss": 1.0246402025222778, "step": 2010 }, { "epoch": 0.6192674669128962, "grad_norm": 11.1875, "learning_rate": 2.8097001823167988e-06, "loss": 1.3080570697784424, "step": 2012 }, { "epoch": 0.6198830409356725, "grad_norm": 18.375, "learning_rate": 2.809268986286864e-06, "loss": 1.5274906158447266, "step": 2014 }, { "epoch": 0.6204986149584487, "grad_norm": 50.25, "learning_rate": 2.8088373444616635e-06, "loss": 1.5145537853240967, "step": 2016 }, { "epoch": 0.621114188981225, "grad_norm": 11.5625, "learning_rate": 2.808405257031853e-06, "loss": 1.186488389968872, "step": 2018 }, { "epoch": 0.6217297630040012, "grad_norm": 11.6875, "learning_rate": 2.807972724188286e-06, "loss": 1.3994492292404175, "step": 2020 }, { "epoch": 0.6223453370267775, "grad_norm": 27.875, "learning_rate": 2.8075397461220128e-06, "loss": 1.7313969135284424, "step": 2022 }, { "epoch": 0.6229609110495538, "grad_norm": 17.125, "learning_rate": 2.80710632302428e-06, "loss": 0.9865247011184692, "step": 2024 }, { "epoch": 0.62357648507233, "grad_norm": 5.9375, "learning_rate": 2.806672455086532e-06, "loss": 1.3501904010772705, "step": 2026 }, { "epoch": 0.6241920590951062, "grad_norm": 12.125, "learning_rate": 2.8062381425004084e-06, "loss": 1.665205717086792, "step": 2028 }, { "epoch": 0.6248076331178825, "grad_norm": 24.625, "learning_rate": 2.805803385457745e-06, "loss": 1.8587546348571777, "step": 2030 }, { "epoch": 0.6254232071406587, "grad_norm": 28.625, "learning_rate": 2.8053681841505746e-06, "loss": 1.045028567314148, "step": 2032 }, { "epoch": 0.6260387811634349, "grad_norm": 39.75, "learning_rate": 2.804932538771127e-06, "loss": 1.2187459468841553, "step": 2034 }, { "epoch": 0.6266543551862112, "grad_norm": 18.75, "learning_rate": 2.804496449511826e-06, "loss": 1.2937474250793457, "step": 2036 }, { "epoch": 0.6272699292089874, "grad_norm": 44.25, "learning_rate": 2.8040599165652944e-06, "loss": 1.2768559455871582, "step": 2038 }, { "epoch": 0.6278855032317636, "grad_norm": 29.25, "learning_rate": 2.8036229401243473e-06, "loss": 1.0333044528961182, "step": 2040 }, { "epoch": 0.6285010772545399, "grad_norm": 9.4375, "learning_rate": 2.8031855203819993e-06, "loss": 0.9537710547447205, "step": 2042 }, { "epoch": 0.6291166512773161, "grad_norm": 8.375, "learning_rate": 2.8027476575314575e-06, "loss": 1.1429424285888672, "step": 2044 }, { "epoch": 0.6297322253000923, "grad_norm": 38.0, "learning_rate": 2.8023093517661286e-06, "loss": 1.6575042009353638, "step": 2046 }, { "epoch": 0.6303477993228686, "grad_norm": 8.1875, "learning_rate": 2.8018706032796115e-06, "loss": 1.3889267444610596, "step": 2048 }, { "epoch": 0.6309633733456448, "grad_norm": 17.5, "learning_rate": 2.801431412265702e-06, "loss": 1.4309298992156982, "step": 2050 }, { "epoch": 0.631578947368421, "grad_norm": 11.75, "learning_rate": 2.8009917789183904e-06, "loss": 1.4273048639297485, "step": 2052 }, { "epoch": 0.6321945213911973, "grad_norm": 12.625, "learning_rate": 2.8005517034318654e-06, "loss": 1.0454750061035156, "step": 2054 }, { "epoch": 0.6328100954139735, "grad_norm": 14.375, "learning_rate": 2.8001111860005067e-06, "loss": 1.2234904766082764, "step": 2056 }, { "epoch": 0.6334256694367497, "grad_norm": 9.75, "learning_rate": 2.799670226818893e-06, "loss": 1.093738079071045, "step": 2058 }, { "epoch": 0.6340412434595261, "grad_norm": 22.0, "learning_rate": 2.799228826081796e-06, "loss": 1.7311053276062012, "step": 2060 }, { "epoch": 0.6346568174823023, "grad_norm": 21.625, "learning_rate": 2.7987869839841817e-06, "loss": 1.5796515941619873, "step": 2062 }, { "epoch": 0.6352723915050785, "grad_norm": 9.0, "learning_rate": 2.7983447007212133e-06, "loss": 1.5824609994888306, "step": 2064 }, { "epoch": 0.6358879655278548, "grad_norm": 5.71875, "learning_rate": 2.7979019764882487e-06, "loss": 1.1825772523880005, "step": 2066 }, { "epoch": 0.636503539550631, "grad_norm": 27.0, "learning_rate": 2.7974588114808382e-06, "loss": 1.2671689987182617, "step": 2068 }, { "epoch": 0.6371191135734072, "grad_norm": 15.125, "learning_rate": 2.797015205894729e-06, "loss": 1.1446564197540283, "step": 2070 }, { "epoch": 0.6377346875961835, "grad_norm": 11.8125, "learning_rate": 2.7965711599258618e-06, "loss": 1.421542763710022, "step": 2072 }, { "epoch": 0.6383502616189597, "grad_norm": 17.625, "learning_rate": 2.7961266737703725e-06, "loss": 1.7223405838012695, "step": 2074 }, { "epoch": 0.6389658356417359, "grad_norm": 11.125, "learning_rate": 2.795681747624591e-06, "loss": 1.7312476634979248, "step": 2076 }, { "epoch": 0.6395814096645122, "grad_norm": 432.0, "learning_rate": 2.795236381685042e-06, "loss": 1.6300084590911865, "step": 2078 }, { "epoch": 0.6401969836872884, "grad_norm": 13.1875, "learning_rate": 2.7947905761484434e-06, "loss": 1.3182811737060547, "step": 2080 }, { "epoch": 0.6408125577100646, "grad_norm": 32.5, "learning_rate": 2.7943443312117096e-06, "loss": 1.3044389486312866, "step": 2082 }, { "epoch": 0.6414281317328409, "grad_norm": 16.75, "learning_rate": 2.793897647071946e-06, "loss": 1.5463333129882812, "step": 2084 }, { "epoch": 0.6420437057556171, "grad_norm": 29.5, "learning_rate": 2.7934505239264535e-06, "loss": 1.4204747676849365, "step": 2086 }, { "epoch": 0.6426592797783933, "grad_norm": 14.75, "learning_rate": 2.793002961972728e-06, "loss": 1.5265976190567017, "step": 2088 }, { "epoch": 0.6432748538011696, "grad_norm": 7.84375, "learning_rate": 2.792554961408457e-06, "loss": 1.242282509803772, "step": 2090 }, { "epoch": 0.6438904278239458, "grad_norm": 15.875, "learning_rate": 2.792106522431523e-06, "loss": 1.3729312419891357, "step": 2092 }, { "epoch": 0.644506001846722, "grad_norm": 14.5, "learning_rate": 2.7916576452400033e-06, "loss": 1.8193776607513428, "step": 2094 }, { "epoch": 0.6451215758694983, "grad_norm": 20.75, "learning_rate": 2.7912083300321656e-06, "loss": 1.2827033996582031, "step": 2096 }, { "epoch": 0.6457371498922746, "grad_norm": 14.0625, "learning_rate": 2.7907585770064747e-06, "loss": 1.1103391647338867, "step": 2098 }, { "epoch": 0.6463527239150508, "grad_norm": 9.5625, "learning_rate": 2.7903083863615856e-06, "loss": 1.3382441997528076, "step": 2100 }, { "epoch": 0.6469682979378271, "grad_norm": 20.625, "learning_rate": 2.789857758296349e-06, "loss": 1.7949204444885254, "step": 2102 }, { "epoch": 0.6475838719606033, "grad_norm": 16.875, "learning_rate": 2.789406693009807e-06, "loss": 1.326945424079895, "step": 2104 }, { "epoch": 0.6481994459833795, "grad_norm": 11.9375, "learning_rate": 2.7889551907011965e-06, "loss": 1.5753204822540283, "step": 2106 }, { "epoch": 0.6488150200061558, "grad_norm": 20.5, "learning_rate": 2.788503251569946e-06, "loss": 1.9633748531341553, "step": 2108 }, { "epoch": 0.649430594028932, "grad_norm": 25.25, "learning_rate": 2.7880508758156777e-06, "loss": 1.1748063564300537, "step": 2110 }, { "epoch": 0.6500461680517082, "grad_norm": 110.0, "learning_rate": 2.7875980636382068e-06, "loss": 1.2353019714355469, "step": 2112 }, { "epoch": 0.6506617420744845, "grad_norm": 40.0, "learning_rate": 2.78714481523754e-06, "loss": 1.6957015991210938, "step": 2114 }, { "epoch": 0.6512773160972607, "grad_norm": 20.5, "learning_rate": 2.7866911308138785e-06, "loss": 1.7818708419799805, "step": 2116 }, { "epoch": 0.6518928901200369, "grad_norm": 10.0, "learning_rate": 2.786237010567615e-06, "loss": 1.3317992687225342, "step": 2118 }, { "epoch": 0.6525084641428132, "grad_norm": 27.375, "learning_rate": 2.7857824546993356e-06, "loss": 1.1547877788543701, "step": 2120 }, { "epoch": 0.6531240381655894, "grad_norm": 8.375, "learning_rate": 2.7853274634098166e-06, "loss": 1.246095895767212, "step": 2122 }, { "epoch": 0.6537396121883656, "grad_norm": 15.75, "learning_rate": 2.7848720369000297e-06, "loss": 1.4210995435714722, "step": 2124 }, { "epoch": 0.6543551862111419, "grad_norm": 26.5, "learning_rate": 2.7844161753711363e-06, "loss": 1.2631009817123413, "step": 2126 }, { "epoch": 0.6549707602339181, "grad_norm": 12.8125, "learning_rate": 2.7839598790244913e-06, "loss": 1.5142743587493896, "step": 2128 }, { "epoch": 0.6555863342566943, "grad_norm": 42.0, "learning_rate": 2.783503148061642e-06, "loss": 1.3284528255462646, "step": 2130 }, { "epoch": 0.6562019082794706, "grad_norm": 22.625, "learning_rate": 2.7830459826843256e-06, "loss": 1.7355668544769287, "step": 2132 }, { "epoch": 0.6568174823022468, "grad_norm": 8.3125, "learning_rate": 2.782588383094474e-06, "loss": 1.3677023649215698, "step": 2134 }, { "epoch": 0.6574330563250231, "grad_norm": 8.0, "learning_rate": 2.7821303494942085e-06, "loss": 1.4090447425842285, "step": 2136 }, { "epoch": 0.6580486303477994, "grad_norm": 17.25, "learning_rate": 2.7816718820858432e-06, "loss": 1.2605023384094238, "step": 2138 }, { "epoch": 0.6586642043705756, "grad_norm": 63.75, "learning_rate": 2.7812129810718836e-06, "loss": 1.1370813846588135, "step": 2140 }, { "epoch": 0.6592797783933518, "grad_norm": 10.5, "learning_rate": 2.780753646655028e-06, "loss": 1.4284716844558716, "step": 2142 }, { "epoch": 0.6598953524161281, "grad_norm": 16.75, "learning_rate": 2.780293879038163e-06, "loss": 1.4087693691253662, "step": 2144 }, { "epoch": 0.6605109264389043, "grad_norm": 72.0, "learning_rate": 2.7798336784243695e-06, "loss": 1.7108557224273682, "step": 2146 }, { "epoch": 0.6611265004616805, "grad_norm": 87.0, "learning_rate": 2.7793730450169186e-06, "loss": 1.2445833683013916, "step": 2148 }, { "epoch": 0.6617420744844568, "grad_norm": 34.75, "learning_rate": 2.778911979019273e-06, "loss": 1.3835158348083496, "step": 2150 }, { "epoch": 0.662357648507233, "grad_norm": 20.25, "learning_rate": 2.778450480635086e-06, "loss": 1.3453741073608398, "step": 2152 }, { "epoch": 0.6629732225300092, "grad_norm": 19.375, "learning_rate": 2.777988550068201e-06, "loss": 1.3057246208190918, "step": 2154 }, { "epoch": 0.6635887965527855, "grad_norm": 20.125, "learning_rate": 2.7775261875226544e-06, "loss": 1.776896357536316, "step": 2156 }, { "epoch": 0.6642043705755617, "grad_norm": 10.6875, "learning_rate": 2.7770633932026714e-06, "loss": 1.359185814857483, "step": 2158 }, { "epoch": 0.6648199445983379, "grad_norm": 24.125, "learning_rate": 2.776600167312669e-06, "loss": 1.4824891090393066, "step": 2160 }, { "epoch": 0.6654355186211142, "grad_norm": 11.375, "learning_rate": 2.776136510057255e-06, "loss": 1.4170936346054077, "step": 2162 }, { "epoch": 0.6660510926438904, "grad_norm": 19.25, "learning_rate": 2.7756724216412274e-06, "loss": 1.7150704860687256, "step": 2164 }, { "epoch": 0.6666666666666666, "grad_norm": 26.0, "learning_rate": 2.7752079022695735e-06, "loss": 1.2592456340789795, "step": 2166 }, { "epoch": 0.6672822406894429, "grad_norm": 10.1875, "learning_rate": 2.7747429521474738e-06, "loss": 1.4872198104858398, "step": 2168 }, { "epoch": 0.6678978147122191, "grad_norm": 21.625, "learning_rate": 2.7742775714802955e-06, "loss": 1.5821186304092407, "step": 2170 }, { "epoch": 0.6685133887349953, "grad_norm": 15.6875, "learning_rate": 2.7738117604735985e-06, "loss": 1.4031760692596436, "step": 2172 }, { "epoch": 0.6691289627577717, "grad_norm": 41.0, "learning_rate": 2.7733455193331332e-06, "loss": 1.5786303281784058, "step": 2174 }, { "epoch": 0.6697445367805479, "grad_norm": 24.125, "learning_rate": 2.7728788482648364e-06, "loss": 1.2885444164276123, "step": 2176 }, { "epoch": 0.6703601108033241, "grad_norm": 12.8125, "learning_rate": 2.7724117474748393e-06, "loss": 1.0977085828781128, "step": 2178 }, { "epoch": 0.6709756848261004, "grad_norm": 26.875, "learning_rate": 2.7719442171694602e-06, "loss": 1.7864042520523071, "step": 2180 }, { "epoch": 0.6715912588488766, "grad_norm": 16.5, "learning_rate": 2.7714762575552083e-06, "loss": 1.787111759185791, "step": 2182 }, { "epoch": 0.6722068328716528, "grad_norm": 33.0, "learning_rate": 2.7710078688387807e-06, "loss": 1.441255807876587, "step": 2184 }, { "epoch": 0.6728224068944291, "grad_norm": 13.5, "learning_rate": 2.770539051227066e-06, "loss": 1.3002700805664062, "step": 2186 }, { "epoch": 0.6734379809172053, "grad_norm": 25.25, "learning_rate": 2.770069804927141e-06, "loss": 1.2706061601638794, "step": 2188 }, { "epoch": 0.6740535549399815, "grad_norm": 13.9375, "learning_rate": 2.7696001301462732e-06, "loss": 1.1968492269515991, "step": 2190 }, { "epoch": 0.6746691289627578, "grad_norm": 19.0, "learning_rate": 2.769130027091918e-06, "loss": 1.6952558755874634, "step": 2192 }, { "epoch": 0.675284702985534, "grad_norm": 8.625, "learning_rate": 2.76865949597172e-06, "loss": 1.7061266899108887, "step": 2194 }, { "epoch": 0.6759002770083102, "grad_norm": 17.75, "learning_rate": 2.768188536993514e-06, "loss": 0.9394193887710571, "step": 2196 }, { "epoch": 0.6765158510310865, "grad_norm": 8.9375, "learning_rate": 2.7677171503653236e-06, "loss": 0.793316125869751, "step": 2198 }, { "epoch": 0.6771314250538627, "grad_norm": 13.4375, "learning_rate": 2.7672453362953588e-06, "loss": 1.0395151376724243, "step": 2200 }, { "epoch": 0.6777469990766389, "grad_norm": 28.625, "learning_rate": 2.766773094992023e-06, "loss": 1.7974693775177002, "step": 2202 }, { "epoch": 0.6783625730994152, "grad_norm": 11.5625, "learning_rate": 2.766300426663904e-06, "loss": 0.9771690368652344, "step": 2204 }, { "epoch": 0.6789781471221914, "grad_norm": 22.25, "learning_rate": 2.76582733151978e-06, "loss": 0.8614020347595215, "step": 2206 }, { "epoch": 0.6795937211449676, "grad_norm": 1184.0, "learning_rate": 2.7653538097686183e-06, "loss": 1.854684829711914, "step": 2208 }, { "epoch": 0.6802092951677439, "grad_norm": 13.25, "learning_rate": 2.7648798616195734e-06, "loss": 1.3579025268554688, "step": 2210 }, { "epoch": 0.6808248691905202, "grad_norm": 9.9375, "learning_rate": 2.7644054872819902e-06, "loss": 0.8971392512321472, "step": 2212 }, { "epoch": 0.6814404432132964, "grad_norm": 6.71875, "learning_rate": 2.7639306869653982e-06, "loss": 1.313225269317627, "step": 2214 }, { "epoch": 0.6820560172360727, "grad_norm": 19.125, "learning_rate": 2.7634554608795185e-06, "loss": 1.3202639818191528, "step": 2216 }, { "epoch": 0.6826715912588489, "grad_norm": 14.25, "learning_rate": 2.762979809234259e-06, "loss": 1.181537389755249, "step": 2218 }, { "epoch": 0.6832871652816251, "grad_norm": 4.65625, "learning_rate": 2.7625037322397156e-06, "loss": 1.23629629611969, "step": 2220 }, { "epoch": 0.6839027393044014, "grad_norm": 41.75, "learning_rate": 2.7620272301061716e-06, "loss": 1.233704686164856, "step": 2222 }, { "epoch": 0.6845183133271776, "grad_norm": 11.9375, "learning_rate": 2.7615503030440984e-06, "loss": 1.2978665828704834, "step": 2224 }, { "epoch": 0.6851338873499538, "grad_norm": 13.75, "learning_rate": 2.761072951264156e-06, "loss": 1.2603421211242676, "step": 2226 }, { "epoch": 0.6857494613727301, "grad_norm": 28.0, "learning_rate": 2.7605951749771914e-06, "loss": 1.3707977533340454, "step": 2228 }, { "epoch": 0.6863650353955063, "grad_norm": 15.9375, "learning_rate": 2.7601169743942375e-06, "loss": 1.8459405899047852, "step": 2230 }, { "epoch": 0.6869806094182825, "grad_norm": 19.125, "learning_rate": 2.7596383497265174e-06, "loss": 1.42882239818573, "step": 2232 }, { "epoch": 0.6875961834410588, "grad_norm": 28.375, "learning_rate": 2.7591593011854395e-06, "loss": 1.8060115575790405, "step": 2234 }, { "epoch": 0.688211757463835, "grad_norm": 32.75, "learning_rate": 2.758679828982601e-06, "loss": 1.7955055236816406, "step": 2236 }, { "epoch": 0.6888273314866112, "grad_norm": 15.3125, "learning_rate": 2.758199933329784e-06, "loss": 1.348275899887085, "step": 2238 }, { "epoch": 0.6894429055093875, "grad_norm": 13.25, "learning_rate": 2.7577196144389592e-06, "loss": 1.5242661237716675, "step": 2240 }, { "epoch": 0.6900584795321637, "grad_norm": 12.875, "learning_rate": 2.7572388725222848e-06, "loss": 1.3932478427886963, "step": 2242 }, { "epoch": 0.6906740535549399, "grad_norm": 14.5625, "learning_rate": 2.7567577077921046e-06, "loss": 1.2572059631347656, "step": 2244 }, { "epoch": 0.6912896275777162, "grad_norm": 18.0, "learning_rate": 2.7562761204609494e-06, "loss": 1.7579888105392456, "step": 2246 }, { "epoch": 0.6919052016004925, "grad_norm": 22.25, "learning_rate": 2.7557941107415375e-06, "loss": 1.524322271347046, "step": 2248 }, { "epoch": 0.6925207756232687, "grad_norm": 9.9375, "learning_rate": 2.755311678846773e-06, "loss": 1.2153962850570679, "step": 2250 }, { "epoch": 0.693136349646045, "grad_norm": 11.1875, "learning_rate": 2.7548288249897455e-06, "loss": 1.6535332202911377, "step": 2252 }, { "epoch": 0.6937519236688212, "grad_norm": 13.4375, "learning_rate": 2.7543455493837334e-06, "loss": 0.6955275535583496, "step": 2254 }, { "epoch": 0.6943674976915974, "grad_norm": 5.6875, "learning_rate": 2.7538618522422e-06, "loss": 1.3415193557739258, "step": 2256 }, { "epoch": 0.6949830717143737, "grad_norm": 10.3125, "learning_rate": 2.7533777337787945e-06, "loss": 1.3508849143981934, "step": 2258 }, { "epoch": 0.6955986457371499, "grad_norm": 13.5, "learning_rate": 2.752893194207352e-06, "loss": 1.2617613077163696, "step": 2260 }, { "epoch": 0.6962142197599261, "grad_norm": 17.75, "learning_rate": 2.7524082337418948e-06, "loss": 1.5909432172775269, "step": 2262 }, { "epoch": 0.6968297937827024, "grad_norm": 16.0, "learning_rate": 2.751922852596631e-06, "loss": 1.5903215408325195, "step": 2264 }, { "epoch": 0.6974453678054786, "grad_norm": 6.5, "learning_rate": 2.751437050985954e-06, "loss": 1.2537171840667725, "step": 2266 }, { "epoch": 0.6980609418282548, "grad_norm": 23.375, "learning_rate": 2.7509508291244417e-06, "loss": 1.4879339933395386, "step": 2268 }, { "epoch": 0.6986765158510311, "grad_norm": 14.375, "learning_rate": 2.75046418722686e-06, "loss": 1.5560357570648193, "step": 2270 }, { "epoch": 0.6992920898738073, "grad_norm": 12.75, "learning_rate": 2.749977125508158e-06, "loss": 1.4228450059890747, "step": 2272 }, { "epoch": 0.6999076638965835, "grad_norm": 143.0, "learning_rate": 2.7494896441834726e-06, "loss": 1.4056365489959717, "step": 2274 }, { "epoch": 0.7005232379193598, "grad_norm": 3.40625, "learning_rate": 2.749001743468125e-06, "loss": 1.3732974529266357, "step": 2276 }, { "epoch": 0.701138811942136, "grad_norm": 16.875, "learning_rate": 2.7485134235776207e-06, "loss": 1.351379156112671, "step": 2278 }, { "epoch": 0.7017543859649122, "grad_norm": 3.46875, "learning_rate": 2.7480246847276512e-06, "loss": 1.1557962894439697, "step": 2280 }, { "epoch": 0.7023699599876885, "grad_norm": 36.25, "learning_rate": 2.747535527134093e-06, "loss": 1.2525272369384766, "step": 2282 }, { "epoch": 0.7029855340104647, "grad_norm": 16.25, "learning_rate": 2.747045951013008e-06, "loss": 0.9319900274276733, "step": 2284 }, { "epoch": 0.703601108033241, "grad_norm": 12.3125, "learning_rate": 2.7465559565806423e-06, "loss": 1.4514952898025513, "step": 2286 }, { "epoch": 0.7042166820560173, "grad_norm": 9.0, "learning_rate": 2.7460655440534277e-06, "loss": 1.169830322265625, "step": 2288 }, { "epoch": 0.7048322560787935, "grad_norm": 6.125, "learning_rate": 2.7455747136479778e-06, "loss": 1.2618813514709473, "step": 2290 }, { "epoch": 0.7054478301015698, "grad_norm": 2.96875, "learning_rate": 2.745083465581096e-06, "loss": 1.0544602870941162, "step": 2292 }, { "epoch": 0.706063404124346, "grad_norm": 8.8125, "learning_rate": 2.744591800069765e-06, "loss": 1.2584667205810547, "step": 2294 }, { "epoch": 0.7066789781471222, "grad_norm": 12.625, "learning_rate": 2.7440997173311546e-06, "loss": 1.459695816040039, "step": 2296 }, { "epoch": 0.7072945521698984, "grad_norm": 8.0, "learning_rate": 2.7436072175826177e-06, "loss": 1.2529194355010986, "step": 2298 }, { "epoch": 0.7079101261926747, "grad_norm": 10.75, "learning_rate": 2.7431143010416932e-06, "loss": 1.2173200845718384, "step": 2300 }, { "epoch": 0.7085257002154509, "grad_norm": 24.0, "learning_rate": 2.7426209679261024e-06, "loss": 1.2202057838439941, "step": 2302 }, { "epoch": 0.7091412742382271, "grad_norm": 15.0, "learning_rate": 2.7421272184537516e-06, "loss": 1.6685733795166016, "step": 2304 }, { "epoch": 0.7097568482610034, "grad_norm": 38.0, "learning_rate": 2.7416330528427285e-06, "loss": 1.4149043560028076, "step": 2306 }, { "epoch": 0.7103724222837796, "grad_norm": 12.4375, "learning_rate": 2.7411384713113094e-06, "loss": 1.2093353271484375, "step": 2308 }, { "epoch": 0.7109879963065558, "grad_norm": 10.4375, "learning_rate": 2.740643474077949e-06, "loss": 1.1467716693878174, "step": 2310 }, { "epoch": 0.7116035703293321, "grad_norm": 25.625, "learning_rate": 2.74014806136129e-06, "loss": 1.4263098239898682, "step": 2312 }, { "epoch": 0.7122191443521083, "grad_norm": 12.6875, "learning_rate": 2.739652233380156e-06, "loss": 1.4533731937408447, "step": 2314 }, { "epoch": 0.7128347183748845, "grad_norm": 7.375, "learning_rate": 2.7391559903535543e-06, "loss": 1.3134911060333252, "step": 2316 }, { "epoch": 0.7134502923976608, "grad_norm": 11.9375, "learning_rate": 2.7386593325006774e-06, "loss": 1.3923784494400024, "step": 2318 }, { "epoch": 0.714065866420437, "grad_norm": 18.875, "learning_rate": 2.7381622600408983e-06, "loss": 1.280435562133789, "step": 2320 }, { "epoch": 0.7146814404432132, "grad_norm": 15.75, "learning_rate": 2.737664773193776e-06, "loss": 0.7519451379776001, "step": 2322 }, { "epoch": 0.7152970144659896, "grad_norm": 15.625, "learning_rate": 2.7371668721790487e-06, "loss": 1.4741475582122803, "step": 2324 }, { "epoch": 0.7159125884887658, "grad_norm": 13.5625, "learning_rate": 2.7366685572166416e-06, "loss": 1.3647998571395874, "step": 2326 }, { "epoch": 0.716528162511542, "grad_norm": 19.375, "learning_rate": 2.736169828526661e-06, "loss": 1.6258673667907715, "step": 2328 }, { "epoch": 0.7171437365343183, "grad_norm": 16.25, "learning_rate": 2.7356706863293943e-06, "loss": 1.5511900186538696, "step": 2330 }, { "epoch": 0.7177593105570945, "grad_norm": 10.3125, "learning_rate": 2.7351711308453158e-06, "loss": 1.2366533279418945, "step": 2332 }, { "epoch": 0.7183748845798708, "grad_norm": 7.84375, "learning_rate": 2.734671162295077e-06, "loss": 1.3121178150177002, "step": 2334 }, { "epoch": 0.718990458602647, "grad_norm": 12.5625, "learning_rate": 2.7341707808995167e-06, "loss": 1.1641346216201782, "step": 2336 }, { "epoch": 0.7196060326254232, "grad_norm": 12.5625, "learning_rate": 2.733669986879653e-06, "loss": 1.4536982774734497, "step": 2338 }, { "epoch": 0.7202216066481995, "grad_norm": 22.5, "learning_rate": 2.733168780456687e-06, "loss": 1.1714286804199219, "step": 2340 }, { "epoch": 0.7208371806709757, "grad_norm": 33.25, "learning_rate": 2.732667161852003e-06, "loss": 1.4636425971984863, "step": 2342 }, { "epoch": 0.7214527546937519, "grad_norm": 12.5625, "learning_rate": 2.732165131287165e-06, "loss": 1.734552025794983, "step": 2344 }, { "epoch": 0.7220683287165282, "grad_norm": 16.5, "learning_rate": 2.731662688983922e-06, "loss": 1.1435258388519287, "step": 2346 }, { "epoch": 0.7226839027393044, "grad_norm": 11.5, "learning_rate": 2.731159835164203e-06, "loss": 1.2161524295806885, "step": 2348 }, { "epoch": 0.7232994767620806, "grad_norm": 14.25, "learning_rate": 2.7306565700501187e-06, "loss": 1.2991399765014648, "step": 2350 }, { "epoch": 0.7239150507848569, "grad_norm": 18.125, "learning_rate": 2.730152893863962e-06, "loss": 1.5045785903930664, "step": 2352 }, { "epoch": 0.7245306248076331, "grad_norm": 19.75, "learning_rate": 2.7296488068282075e-06, "loss": 1.297149658203125, "step": 2354 }, { "epoch": 0.7251461988304093, "grad_norm": 33.25, "learning_rate": 2.7291443091655106e-06, "loss": 1.441976547241211, "step": 2356 }, { "epoch": 0.7257617728531855, "grad_norm": 22.625, "learning_rate": 2.728639401098709e-06, "loss": 1.2886719703674316, "step": 2358 }, { "epoch": 0.7263773468759618, "grad_norm": 30.25, "learning_rate": 2.7281340828508204e-06, "loss": 1.936105489730835, "step": 2360 }, { "epoch": 0.7269929208987381, "grad_norm": 12.125, "learning_rate": 2.7276283546450453e-06, "loss": 1.4851388931274414, "step": 2362 }, { "epoch": 0.7276084949215144, "grad_norm": 15.875, "learning_rate": 2.727122216704764e-06, "loss": 1.3581610918045044, "step": 2364 }, { "epoch": 0.7282240689442906, "grad_norm": 14.6875, "learning_rate": 2.7266156692535384e-06, "loss": 1.2150791883468628, "step": 2366 }, { "epoch": 0.7288396429670668, "grad_norm": 13.4375, "learning_rate": 2.7261087125151103e-06, "loss": 1.5836424827575684, "step": 2368 }, { "epoch": 0.7294552169898431, "grad_norm": 9.6875, "learning_rate": 2.7256013467134044e-06, "loss": 1.5009336471557617, "step": 2370 }, { "epoch": 0.7300707910126193, "grad_norm": 43.25, "learning_rate": 2.725093572072524e-06, "loss": 1.7373287677764893, "step": 2372 }, { "epoch": 0.7306863650353955, "grad_norm": 12.4375, "learning_rate": 2.7245853888167537e-06, "loss": 1.2735278606414795, "step": 2374 }, { "epoch": 0.7313019390581718, "grad_norm": 32.75, "learning_rate": 2.724076797170559e-06, "loss": 1.5648930072784424, "step": 2376 }, { "epoch": 0.731917513080948, "grad_norm": 18.0, "learning_rate": 2.723567797358585e-06, "loss": 1.8062489032745361, "step": 2378 }, { "epoch": 0.7325330871037242, "grad_norm": 15.0, "learning_rate": 2.7230583896056573e-06, "loss": 1.4886645078659058, "step": 2380 }, { "epoch": 0.7331486611265005, "grad_norm": 11.625, "learning_rate": 2.7225485741367827e-06, "loss": 1.6303420066833496, "step": 2382 }, { "epoch": 0.7337642351492767, "grad_norm": 15.1875, "learning_rate": 2.7220383511771466e-06, "loss": 1.212205410003662, "step": 2384 }, { "epoch": 0.7343798091720529, "grad_norm": 14.125, "learning_rate": 2.7215277209521153e-06, "loss": 1.199798345565796, "step": 2386 }, { "epoch": 0.7349953831948292, "grad_norm": 14.0625, "learning_rate": 2.721016683687235e-06, "loss": 1.6477723121643066, "step": 2388 }, { "epoch": 0.7356109572176054, "grad_norm": 36.25, "learning_rate": 2.7205052396082316e-06, "loss": 1.1711366176605225, "step": 2390 }, { "epoch": 0.7362265312403816, "grad_norm": 9.1875, "learning_rate": 2.7199933889410095e-06, "loss": 1.4491968154907227, "step": 2392 }, { "epoch": 0.7368421052631579, "grad_norm": 8.4375, "learning_rate": 2.7194811319116537e-06, "loss": 1.3231985569000244, "step": 2394 }, { "epoch": 0.7374576792859341, "grad_norm": 17.375, "learning_rate": 2.718968468746431e-06, "loss": 1.2740378379821777, "step": 2396 }, { "epoch": 0.7380732533087103, "grad_norm": 15.25, "learning_rate": 2.7184553996717827e-06, "loss": 1.203467607498169, "step": 2398 }, { "epoch": 0.7386888273314867, "grad_norm": 12.4375, "learning_rate": 2.7179419249143326e-06, "loss": 1.423843264579773, "step": 2400 }, { "epoch": 0.7393044013542629, "grad_norm": 13.125, "learning_rate": 2.7174280447008843e-06, "loss": 1.42924165725708, "step": 2402 }, { "epoch": 0.7399199753770391, "grad_norm": 12.875, "learning_rate": 2.7169137592584177e-06, "loss": 1.3557947874069214, "step": 2404 }, { "epoch": 0.7405355493998154, "grad_norm": 23.875, "learning_rate": 2.7163990688140948e-06, "loss": 1.376276969909668, "step": 2406 }, { "epoch": 0.7411511234225916, "grad_norm": 7.90625, "learning_rate": 2.7158839735952536e-06, "loss": 1.2827298641204834, "step": 2408 }, { "epoch": 0.7417666974453678, "grad_norm": 14.875, "learning_rate": 2.715368473829413e-06, "loss": 1.4431157112121582, "step": 2410 }, { "epoch": 0.7423822714681441, "grad_norm": 8.5625, "learning_rate": 2.71485256974427e-06, "loss": 1.05255126953125, "step": 2412 }, { "epoch": 0.7429978454909203, "grad_norm": 12.625, "learning_rate": 2.7143362615676994e-06, "loss": 1.1840324401855469, "step": 2414 }, { "epoch": 0.7436134195136965, "grad_norm": 16.75, "learning_rate": 2.7138195495277556e-06, "loss": 1.7737140655517578, "step": 2416 }, { "epoch": 0.7442289935364728, "grad_norm": 5.8125, "learning_rate": 2.7133024338526705e-06, "loss": 1.3097442388534546, "step": 2418 }, { "epoch": 0.744844567559249, "grad_norm": 15.75, "learning_rate": 2.7127849147708544e-06, "loss": 1.3597081899642944, "step": 2420 }, { "epoch": 0.7454601415820252, "grad_norm": 7.875, "learning_rate": 2.712266992510897e-06, "loss": 1.2132012844085693, "step": 2422 }, { "epoch": 0.7460757156048015, "grad_norm": 9.5, "learning_rate": 2.7117486673015647e-06, "loss": 1.2576617002487183, "step": 2424 }, { "epoch": 0.7466912896275777, "grad_norm": 17.25, "learning_rate": 2.7112299393718024e-06, "loss": 0.8877116441726685, "step": 2426 }, { "epoch": 0.7473068636503539, "grad_norm": 12.875, "learning_rate": 2.710710808950733e-06, "loss": 1.40200936794281, "step": 2428 }, { "epoch": 0.7479224376731302, "grad_norm": 23.25, "learning_rate": 2.710191276267656e-06, "loss": 0.8947640061378479, "step": 2430 }, { "epoch": 0.7485380116959064, "grad_norm": 12.6875, "learning_rate": 2.7096713415520514e-06, "loss": 1.2542033195495605, "step": 2432 }, { "epoch": 0.7491535857186826, "grad_norm": 13.3125, "learning_rate": 2.709151005033573e-06, "loss": 1.3930459022521973, "step": 2434 }, { "epoch": 0.7497691597414589, "grad_norm": 15.5625, "learning_rate": 2.7086302669420553e-06, "loss": 1.506596326828003, "step": 2436 }, { "epoch": 0.7503847337642352, "grad_norm": 12.0, "learning_rate": 2.708109127507509e-06, "loss": 1.3037588596343994, "step": 2438 }, { "epoch": 0.7510003077870114, "grad_norm": 10.3125, "learning_rate": 2.707587586960121e-06, "loss": 1.2608420848846436, "step": 2440 }, { "epoch": 0.7516158818097877, "grad_norm": 34.0, "learning_rate": 2.7070656455302567e-06, "loss": 1.4738867282867432, "step": 2442 }, { "epoch": 0.7522314558325639, "grad_norm": 21.875, "learning_rate": 2.706543303448459e-06, "loss": 1.5918917655944824, "step": 2444 }, { "epoch": 0.7528470298553401, "grad_norm": 8.9375, "learning_rate": 2.706020560945446e-06, "loss": 1.3004255294799805, "step": 2446 }, { "epoch": 0.7534626038781164, "grad_norm": 12.0, "learning_rate": 2.705497418252114e-06, "loss": 1.5478678941726685, "step": 2448 }, { "epoch": 0.7540781779008926, "grad_norm": 3.25, "learning_rate": 2.7049738755995356e-06, "loss": 1.1431105136871338, "step": 2450 }, { "epoch": 0.7546937519236688, "grad_norm": 11.375, "learning_rate": 2.704449933218961e-06, "loss": 1.0184783935546875, "step": 2452 }, { "epoch": 0.7553093259464451, "grad_norm": 33.75, "learning_rate": 2.7039255913418157e-06, "loss": 1.7602791786193848, "step": 2454 }, { "epoch": 0.7559248999692213, "grad_norm": 5.96875, "learning_rate": 2.7034008501997013e-06, "loss": 1.3575026988983154, "step": 2456 }, { "epoch": 0.7565404739919975, "grad_norm": 10.0, "learning_rate": 2.7028757100243973e-06, "loss": 1.474034309387207, "step": 2458 }, { "epoch": 0.7571560480147738, "grad_norm": 55.0, "learning_rate": 2.702350171047859e-06, "loss": 1.3285274505615234, "step": 2460 }, { "epoch": 0.75777162203755, "grad_norm": 19.875, "learning_rate": 2.701824233502217e-06, "loss": 1.4691517353057861, "step": 2462 }, { "epoch": 0.7583871960603262, "grad_norm": 36.5, "learning_rate": 2.7012978976197793e-06, "loss": 1.464407205581665, "step": 2464 }, { "epoch": 0.7590027700831025, "grad_norm": 15.0, "learning_rate": 2.7007711636330273e-06, "loss": 1.5255820751190186, "step": 2466 }, { "epoch": 0.7596183441058787, "grad_norm": 9.375, "learning_rate": 2.7002440317746224e-06, "loss": 1.279646396636963, "step": 2468 }, { "epoch": 0.7602339181286549, "grad_norm": 6.21875, "learning_rate": 2.699716502277397e-06, "loss": 1.1852023601531982, "step": 2470 }, { "epoch": 0.7608494921514312, "grad_norm": 23.5, "learning_rate": 2.6991885753743632e-06, "loss": 1.4758989810943604, "step": 2472 }, { "epoch": 0.7614650661742074, "grad_norm": 20.875, "learning_rate": 2.698660251298706e-06, "loss": 1.7962994575500488, "step": 2474 }, { "epoch": 0.7620806401969837, "grad_norm": 12.125, "learning_rate": 2.698131530283788e-06, "loss": 1.674941062927246, "step": 2476 }, { "epoch": 0.76269621421976, "grad_norm": 20.625, "learning_rate": 2.697602412563144e-06, "loss": 1.3710949420928955, "step": 2478 }, { "epoch": 0.7633117882425362, "grad_norm": 13.0, "learning_rate": 2.697072898370487e-06, "loss": 1.4410480260849, "step": 2480 }, { "epoch": 0.7639273622653124, "grad_norm": 25.875, "learning_rate": 2.696542987939704e-06, "loss": 1.4642608165740967, "step": 2482 }, { "epoch": 0.7645429362880887, "grad_norm": 81.5, "learning_rate": 2.6960126815048573e-06, "loss": 1.811263084411621, "step": 2484 }, { "epoch": 0.7651585103108649, "grad_norm": 13.25, "learning_rate": 2.6954819793001828e-06, "loss": 1.1713266372680664, "step": 2486 }, { "epoch": 0.7657740843336411, "grad_norm": 16.375, "learning_rate": 2.694950881560094e-06, "loss": 1.1104718446731567, "step": 2488 }, { "epoch": 0.7663896583564174, "grad_norm": 28.375, "learning_rate": 2.6944193885191753e-06, "loss": 1.6377125978469849, "step": 2490 }, { "epoch": 0.7670052323791936, "grad_norm": 12.125, "learning_rate": 2.693887500412189e-06, "loss": 1.28836190700531, "step": 2492 }, { "epoch": 0.7676208064019698, "grad_norm": 9.875, "learning_rate": 2.6933552174740704e-06, "loss": 0.921204686164856, "step": 2494 }, { "epoch": 0.7682363804247461, "grad_norm": 15.8125, "learning_rate": 2.6928225399399296e-06, "loss": 1.48775315284729, "step": 2496 }, { "epoch": 0.7688519544475223, "grad_norm": 16.5, "learning_rate": 2.692289468045051e-06, "loss": 1.3238601684570312, "step": 2498 }, { "epoch": 0.7694675284702985, "grad_norm": 4.71875, "learning_rate": 2.6917560020248935e-06, "loss": 1.343148946762085, "step": 2500 }, { "epoch": 0.7700831024930748, "grad_norm": 40.0, "learning_rate": 2.6912221421150883e-06, "loss": 1.518383264541626, "step": 2502 }, { "epoch": 0.770698676515851, "grad_norm": 11.0625, "learning_rate": 2.6906878885514435e-06, "loss": 1.2683169841766357, "step": 2504 }, { "epoch": 0.7713142505386272, "grad_norm": 6.09375, "learning_rate": 2.6901532415699378e-06, "loss": 1.1903431415557861, "step": 2506 }, { "epoch": 0.7719298245614035, "grad_norm": 89.0, "learning_rate": 2.6896182014067273e-06, "loss": 1.6317634582519531, "step": 2508 }, { "epoch": 0.7725453985841797, "grad_norm": 10.4375, "learning_rate": 2.689082768298138e-06, "loss": 1.2749760150909424, "step": 2510 }, { "epoch": 0.7731609726069559, "grad_norm": 36.75, "learning_rate": 2.688546942480673e-06, "loss": 1.9662134647369385, "step": 2512 }, { "epoch": 0.7737765466297323, "grad_norm": 11.5, "learning_rate": 2.688010724191006e-06, "loss": 1.454052209854126, "step": 2514 }, { "epoch": 0.7743921206525085, "grad_norm": 15.875, "learning_rate": 2.687474113665985e-06, "loss": 1.078743577003479, "step": 2516 }, { "epoch": 0.7750076946752847, "grad_norm": 50.25, "learning_rate": 2.686937111142633e-06, "loss": 1.247643232345581, "step": 2518 }, { "epoch": 0.775623268698061, "grad_norm": 10.0625, "learning_rate": 2.6863997168581427e-06, "loss": 1.3732048273086548, "step": 2520 }, { "epoch": 0.7762388427208372, "grad_norm": 11.625, "learning_rate": 2.685861931049884e-06, "loss": 1.6267473697662354, "step": 2522 }, { "epoch": 0.7768544167436134, "grad_norm": 12.5, "learning_rate": 2.6853237539553947e-06, "loss": 1.3458107709884644, "step": 2524 }, { "epoch": 0.7774699907663897, "grad_norm": 10.0, "learning_rate": 2.684785185812391e-06, "loss": 1.3616960048675537, "step": 2526 }, { "epoch": 0.7780855647891659, "grad_norm": 6.125, "learning_rate": 2.684246226858758e-06, "loss": 1.3410570621490479, "step": 2528 }, { "epoch": 0.7787011388119421, "grad_norm": 28.875, "learning_rate": 2.6837068773325537e-06, "loss": 1.5064880847930908, "step": 2530 }, { "epoch": 0.7793167128347184, "grad_norm": 10.0625, "learning_rate": 2.68316713747201e-06, "loss": 1.109811782836914, "step": 2532 }, { "epoch": 0.7799322868574946, "grad_norm": 19.0, "learning_rate": 2.6826270075155315e-06, "loss": 1.1550883054733276, "step": 2534 }, { "epoch": 0.7805478608802708, "grad_norm": 4.28125, "learning_rate": 2.682086487701693e-06, "loss": 1.033223032951355, "step": 2536 }, { "epoch": 0.7811634349030471, "grad_norm": 12.5, "learning_rate": 2.6815455782692434e-06, "loss": 1.2696874141693115, "step": 2538 }, { "epoch": 0.7817790089258233, "grad_norm": 19.0, "learning_rate": 2.681004279457102e-06, "loss": 0.9384381771087646, "step": 2540 }, { "epoch": 0.7823945829485995, "grad_norm": 6.53125, "learning_rate": 2.6804625915043623e-06, "loss": 1.2728493213653564, "step": 2542 }, { "epoch": 0.7830101569713758, "grad_norm": 15.4375, "learning_rate": 2.679920514650288e-06, "loss": 0.9392402172088623, "step": 2544 }, { "epoch": 0.783625730994152, "grad_norm": 10.75, "learning_rate": 2.679378049134315e-06, "loss": 1.66042160987854, "step": 2546 }, { "epoch": 0.7842413050169282, "grad_norm": 9.8125, "learning_rate": 2.678835195196051e-06, "loss": 1.1751291751861572, "step": 2548 }, { "epoch": 0.7848568790397045, "grad_norm": 13.9375, "learning_rate": 2.6782919530752756e-06, "loss": 1.453952431678772, "step": 2550 }, { "epoch": 0.7854724530624808, "grad_norm": 12.1875, "learning_rate": 2.6777483230119388e-06, "loss": 1.568293809890747, "step": 2552 }, { "epoch": 0.786088027085257, "grad_norm": 17.75, "learning_rate": 2.6772043052461632e-06, "loss": 1.2449113130569458, "step": 2554 }, { "epoch": 0.7867036011080333, "grad_norm": 71.5, "learning_rate": 2.676659900018242e-06, "loss": 1.2687745094299316, "step": 2556 }, { "epoch": 0.7873191751308095, "grad_norm": 106.5, "learning_rate": 2.6761151075686386e-06, "loss": 1.4777586460113525, "step": 2558 }, { "epoch": 0.7879347491535857, "grad_norm": 8.5625, "learning_rate": 2.6755699281379897e-06, "loss": 1.2233953475952148, "step": 2560 }, { "epoch": 0.788550323176362, "grad_norm": 12.6875, "learning_rate": 2.6750243619671015e-06, "loss": 1.5392688512802124, "step": 2562 }, { "epoch": 0.7891658971991382, "grad_norm": 26.25, "learning_rate": 2.6744784092969506e-06, "loss": 1.1792702674865723, "step": 2564 }, { "epoch": 0.7897814712219144, "grad_norm": 13.3125, "learning_rate": 2.673932070368686e-06, "loss": 1.4379987716674805, "step": 2566 }, { "epoch": 0.7903970452446907, "grad_norm": 5.0625, "learning_rate": 2.6733853454236242e-06, "loss": 1.1217145919799805, "step": 2568 }, { "epoch": 0.7910126192674669, "grad_norm": 12.0, "learning_rate": 2.6728382347032564e-06, "loss": 1.3270764350891113, "step": 2570 }, { "epoch": 0.7916281932902431, "grad_norm": 16.625, "learning_rate": 2.672290738449241e-06, "loss": 1.4277535676956177, "step": 2572 }, { "epoch": 0.7922437673130194, "grad_norm": 96.0, "learning_rate": 2.6717428569034083e-06, "loss": 1.6692981719970703, "step": 2574 }, { "epoch": 0.7928593413357956, "grad_norm": 16.625, "learning_rate": 2.6711945903077576e-06, "loss": 1.504457712173462, "step": 2576 }, { "epoch": 0.7934749153585718, "grad_norm": 17.0, "learning_rate": 2.6706459389044587e-06, "loss": 1.1550219058990479, "step": 2578 }, { "epoch": 0.7940904893813481, "grad_norm": 12.625, "learning_rate": 2.670096902935852e-06, "loss": 0.9650784134864807, "step": 2580 }, { "epoch": 0.7947060634041243, "grad_norm": 9.375, "learning_rate": 2.6695474826444473e-06, "loss": 1.3110347986221313, "step": 2582 }, { "epoch": 0.7953216374269005, "grad_norm": 8.4375, "learning_rate": 2.6689976782729238e-06, "loss": 1.3138242959976196, "step": 2584 }, { "epoch": 0.7959372114496768, "grad_norm": 19.75, "learning_rate": 2.6684474900641317e-06, "loss": 1.6382602453231812, "step": 2586 }, { "epoch": 0.7965527854724531, "grad_norm": 12.9375, "learning_rate": 2.6678969182610885e-06, "loss": 1.4878780841827393, "step": 2588 }, { "epoch": 0.7971683594952294, "grad_norm": 21.625, "learning_rate": 2.667345963106984e-06, "loss": 1.1575593948364258, "step": 2590 }, { "epoch": 0.7977839335180056, "grad_norm": 19.0, "learning_rate": 2.6667946248451737e-06, "loss": 1.3297533988952637, "step": 2592 }, { "epoch": 0.7983995075407818, "grad_norm": 44.25, "learning_rate": 2.6662429037191855e-06, "loss": 1.3626294136047363, "step": 2594 }, { "epoch": 0.799015081563558, "grad_norm": 18.375, "learning_rate": 2.6656907999727156e-06, "loss": 1.394080638885498, "step": 2596 }, { "epoch": 0.7996306555863343, "grad_norm": 58.75, "learning_rate": 2.665138313849628e-06, "loss": 1.375070571899414, "step": 2598 }, { "epoch": 0.8002462296091105, "grad_norm": 15.0, "learning_rate": 2.664585445593957e-06, "loss": 1.5231802463531494, "step": 2600 }, { "epoch": 0.8008618036318867, "grad_norm": 7.53125, "learning_rate": 2.664032195449905e-06, "loss": 1.1144105195999146, "step": 2602 }, { "epoch": 0.801477377654663, "grad_norm": 5.40625, "learning_rate": 2.6634785636618434e-06, "loss": 1.070705771446228, "step": 2604 }, { "epoch": 0.8020929516774392, "grad_norm": 16.875, "learning_rate": 2.6629245504743108e-06, "loss": 1.2115540504455566, "step": 2606 }, { "epoch": 0.8027085257002154, "grad_norm": 8.25, "learning_rate": 2.662370156132017e-06, "loss": 1.3473308086395264, "step": 2608 }, { "epoch": 0.8033240997229917, "grad_norm": 16.625, "learning_rate": 2.6618153808798385e-06, "loss": 1.4530270099639893, "step": 2610 }, { "epoch": 0.8039396737457679, "grad_norm": 22.75, "learning_rate": 2.661260224962819e-06, "loss": 0.9463323354721069, "step": 2612 }, { "epoch": 0.8045552477685441, "grad_norm": 4.625, "learning_rate": 2.6607046886261728e-06, "loss": 1.0715365409851074, "step": 2614 }, { "epoch": 0.8051708217913204, "grad_norm": 11.3125, "learning_rate": 2.66014877211528e-06, "loss": 1.1417533159255981, "step": 2616 }, { "epoch": 0.8057863958140966, "grad_norm": 12.625, "learning_rate": 2.65959247567569e-06, "loss": 1.3943442106246948, "step": 2618 }, { "epoch": 0.8064019698368728, "grad_norm": 11.5625, "learning_rate": 2.6590357995531195e-06, "loss": 1.2495189905166626, "step": 2620 }, { "epoch": 0.8070175438596491, "grad_norm": 11.6875, "learning_rate": 2.658478743993453e-06, "loss": 1.047597885131836, "step": 2622 }, { "epoch": 0.8076331178824253, "grad_norm": 7.9375, "learning_rate": 2.657921309242743e-06, "loss": 1.1524721384048462, "step": 2624 }, { "epoch": 0.8082486919052017, "grad_norm": 9.875, "learning_rate": 2.6573634955472074e-06, "loss": 1.3182554244995117, "step": 2626 }, { "epoch": 0.8088642659279779, "grad_norm": 13.1875, "learning_rate": 2.656805303153235e-06, "loss": 1.3932991027832031, "step": 2628 }, { "epoch": 0.8094798399507541, "grad_norm": 10.0625, "learning_rate": 2.65624673230738e-06, "loss": 0.956896185874939, "step": 2630 }, { "epoch": 0.8100954139735304, "grad_norm": 9.8125, "learning_rate": 2.6556877832563627e-06, "loss": 1.201633334159851, "step": 2632 }, { "epoch": 0.8107109879963066, "grad_norm": 17.25, "learning_rate": 2.6551284562470716e-06, "loss": 1.1008045673370361, "step": 2634 }, { "epoch": 0.8113265620190828, "grad_norm": 11.0625, "learning_rate": 2.6545687515265633e-06, "loss": 1.2820231914520264, "step": 2636 }, { "epoch": 0.811942136041859, "grad_norm": 20.875, "learning_rate": 2.6540086693420585e-06, "loss": 1.4971567392349243, "step": 2638 }, { "epoch": 0.8125577100646353, "grad_norm": 19.875, "learning_rate": 2.653448209940947e-06, "loss": 1.4473721981048584, "step": 2640 }, { "epoch": 0.8131732840874115, "grad_norm": 10.3125, "learning_rate": 2.652887373570784e-06, "loss": 1.492368221282959, "step": 2642 }, { "epoch": 0.8137888581101878, "grad_norm": 12.1875, "learning_rate": 2.6523261604792924e-06, "loss": 1.4002925157546997, "step": 2644 }, { "epoch": 0.814404432132964, "grad_norm": 7.96875, "learning_rate": 2.65176457091436e-06, "loss": 1.4026880264282227, "step": 2646 }, { "epoch": 0.8150200061557402, "grad_norm": 9.5625, "learning_rate": 2.651202605124041e-06, "loss": 1.2172515392303467, "step": 2648 }, { "epoch": 0.8156355801785165, "grad_norm": 22.625, "learning_rate": 2.6506402633565574e-06, "loss": 1.5221962928771973, "step": 2650 }, { "epoch": 0.8162511542012927, "grad_norm": 9.5625, "learning_rate": 2.650077545860295e-06, "loss": 1.2790491580963135, "step": 2652 }, { "epoch": 0.8168667282240689, "grad_norm": 15.0625, "learning_rate": 2.6495144528838083e-06, "loss": 1.5245963335037231, "step": 2654 }, { "epoch": 0.8174823022468451, "grad_norm": 15.5, "learning_rate": 2.6489509846758146e-06, "loss": 1.5136122703552246, "step": 2656 }, { "epoch": 0.8180978762696214, "grad_norm": 18.75, "learning_rate": 2.6483871414851997e-06, "loss": 1.0351097583770752, "step": 2658 }, { "epoch": 0.8187134502923976, "grad_norm": 11.3125, "learning_rate": 2.647822923561013e-06, "loss": 1.3081021308898926, "step": 2660 }, { "epoch": 0.8193290243151738, "grad_norm": 15.25, "learning_rate": 2.6472583311524704e-06, "loss": 1.4934989213943481, "step": 2662 }, { "epoch": 0.8199445983379502, "grad_norm": 28.25, "learning_rate": 2.646693364508953e-06, "loss": 1.2610775232315063, "step": 2664 }, { "epoch": 0.8205601723607264, "grad_norm": 18.875, "learning_rate": 2.6461280238800076e-06, "loss": 1.1182706356048584, "step": 2666 }, { "epoch": 0.8211757463835027, "grad_norm": 10.0, "learning_rate": 2.645562309515345e-06, "loss": 1.1223533153533936, "step": 2668 }, { "epoch": 0.8217913204062789, "grad_norm": 18.0, "learning_rate": 2.644996221664843e-06, "loss": 1.2998898029327393, "step": 2670 }, { "epoch": 0.8224068944290551, "grad_norm": 11.4375, "learning_rate": 2.644429760578542e-06, "loss": 1.4623315334320068, "step": 2672 }, { "epoch": 0.8230224684518314, "grad_norm": 14.1875, "learning_rate": 2.6438629265066496e-06, "loss": 1.1372411251068115, "step": 2674 }, { "epoch": 0.8236380424746076, "grad_norm": 13.0625, "learning_rate": 2.643295719699536e-06, "loss": 1.5036474466323853, "step": 2676 }, { "epoch": 0.8242536164973838, "grad_norm": 7.8125, "learning_rate": 2.642728140407738e-06, "loss": 1.2718346118927002, "step": 2678 }, { "epoch": 0.8248691905201601, "grad_norm": 8.8125, "learning_rate": 2.642160188881955e-06, "loss": 1.0505506992340088, "step": 2680 }, { "epoch": 0.8254847645429363, "grad_norm": 69.0, "learning_rate": 2.6415918653730535e-06, "loss": 1.3011550903320312, "step": 2682 }, { "epoch": 0.8261003385657125, "grad_norm": 16.0, "learning_rate": 2.641023170132062e-06, "loss": 1.2865910530090332, "step": 2684 }, { "epoch": 0.8267159125884888, "grad_norm": 15.3125, "learning_rate": 2.6404541034101723e-06, "loss": 1.4186267852783203, "step": 2686 }, { "epoch": 0.827331486611265, "grad_norm": 4.9375, "learning_rate": 2.639884665458744e-06, "loss": 1.345642328262329, "step": 2688 }, { "epoch": 0.8279470606340412, "grad_norm": 17.5, "learning_rate": 2.6393148565292973e-06, "loss": 1.4285130500793457, "step": 2690 }, { "epoch": 0.8285626346568175, "grad_norm": 12.0, "learning_rate": 2.638744676873517e-06, "loss": 1.0859196186065674, "step": 2692 }, { "epoch": 0.8291782086795937, "grad_norm": 22.125, "learning_rate": 2.6381741267432527e-06, "loss": 1.5866267681121826, "step": 2694 }, { "epoch": 0.8297937827023699, "grad_norm": 20.125, "learning_rate": 2.6376032063905177e-06, "loss": 1.4759366512298584, "step": 2696 }, { "epoch": 0.8304093567251462, "grad_norm": 13.1875, "learning_rate": 2.6370319160674883e-06, "loss": 1.1637474298477173, "step": 2698 }, { "epoch": 0.8310249307479224, "grad_norm": 10.5, "learning_rate": 2.6364602560265027e-06, "loss": 1.2510435581207275, "step": 2700 }, { "epoch": 0.8316405047706987, "grad_norm": 10.4375, "learning_rate": 2.6358882265200637e-06, "loss": 1.4966708421707153, "step": 2702 }, { "epoch": 0.832256078793475, "grad_norm": 3.359375, "learning_rate": 2.6353158278008395e-06, "loss": 1.2599449157714844, "step": 2704 }, { "epoch": 0.8328716528162512, "grad_norm": 15.75, "learning_rate": 2.6347430601216575e-06, "loss": 1.3064113855361938, "step": 2706 }, { "epoch": 0.8334872268390274, "grad_norm": 33.25, "learning_rate": 2.63416992373551e-06, "loss": 1.6911900043487549, "step": 2708 }, { "epoch": 0.8341028008618037, "grad_norm": 21.0, "learning_rate": 2.6335964188955523e-06, "loss": 1.5358657836914062, "step": 2710 }, { "epoch": 0.8347183748845799, "grad_norm": 22.625, "learning_rate": 2.6330225458551026e-06, "loss": 1.5055537223815918, "step": 2712 }, { "epoch": 0.8353339489073561, "grad_norm": 4.28125, "learning_rate": 2.6324483048676403e-06, "loss": 1.00602126121521, "step": 2714 }, { "epoch": 0.8359495229301324, "grad_norm": 9.3125, "learning_rate": 2.631873696186809e-06, "loss": 1.2267096042633057, "step": 2716 }, { "epoch": 0.8365650969529086, "grad_norm": 22.25, "learning_rate": 2.6312987200664134e-06, "loss": 1.7715106010437012, "step": 2718 }, { "epoch": 0.8371806709756848, "grad_norm": 14.0, "learning_rate": 2.6307233767604223e-06, "loss": 1.4620671272277832, "step": 2720 }, { "epoch": 0.8377962449984611, "grad_norm": 4.90625, "learning_rate": 2.6301476665229644e-06, "loss": 1.3678604364395142, "step": 2722 }, { "epoch": 0.8384118190212373, "grad_norm": 15.9375, "learning_rate": 2.629571589608332e-06, "loss": 1.2953975200653076, "step": 2724 }, { "epoch": 0.8390273930440135, "grad_norm": 6.3125, "learning_rate": 2.628995146270979e-06, "loss": 1.2497246265411377, "step": 2726 }, { "epoch": 0.8396429670667898, "grad_norm": 13.0, "learning_rate": 2.6284183367655206e-06, "loss": 1.2648379802703857, "step": 2728 }, { "epoch": 0.840258541089566, "grad_norm": 9.3125, "learning_rate": 2.627841161346734e-06, "loss": 1.284532070159912, "step": 2730 }, { "epoch": 0.8408741151123422, "grad_norm": 10.125, "learning_rate": 2.6272636202695597e-06, "loss": 1.401597261428833, "step": 2732 }, { "epoch": 0.8414896891351185, "grad_norm": 28.75, "learning_rate": 2.626685713789097e-06, "loss": 1.6952519416809082, "step": 2734 }, { "epoch": 0.8421052631578947, "grad_norm": 18.875, "learning_rate": 2.626107442160608e-06, "loss": 1.7030467987060547, "step": 2736 }, { "epoch": 0.8427208371806709, "grad_norm": 13.1875, "learning_rate": 2.6255288056395165e-06, "loss": 1.5869030952453613, "step": 2738 }, { "epoch": 0.8433364112034473, "grad_norm": 5.09375, "learning_rate": 2.6249498044814064e-06, "loss": 1.4417071342468262, "step": 2740 }, { "epoch": 0.8439519852262235, "grad_norm": 12.875, "learning_rate": 2.6243704389420225e-06, "loss": 1.567571759223938, "step": 2742 }, { "epoch": 0.8445675592489997, "grad_norm": 34.0, "learning_rate": 2.623790709277273e-06, "loss": 1.6155643463134766, "step": 2744 }, { "epoch": 0.845183133271776, "grad_norm": 38.5, "learning_rate": 2.623210615743224e-06, "loss": 1.5020222663879395, "step": 2746 }, { "epoch": 0.8457987072945522, "grad_norm": 21.625, "learning_rate": 2.6226301585961033e-06, "loss": 1.3307198286056519, "step": 2748 }, { "epoch": 0.8464142813173284, "grad_norm": 24.0, "learning_rate": 2.622049338092301e-06, "loss": 1.7647689580917358, "step": 2750 }, { "epoch": 0.8470298553401047, "grad_norm": 42.5, "learning_rate": 2.621468154488364e-06, "loss": 1.4326261281967163, "step": 2752 }, { "epoch": 0.8476454293628809, "grad_norm": 7.46875, "learning_rate": 2.6208866080410026e-06, "loss": 1.283789873123169, "step": 2754 }, { "epoch": 0.8482610033856571, "grad_norm": 11.5, "learning_rate": 2.6203046990070875e-06, "loss": 0.9671059250831604, "step": 2756 }, { "epoch": 0.8488765774084334, "grad_norm": 18.75, "learning_rate": 2.6197224276436474e-06, "loss": 1.3419411182403564, "step": 2758 }, { "epoch": 0.8494921514312096, "grad_norm": 10.6875, "learning_rate": 2.619139794207873e-06, "loss": 1.4022338390350342, "step": 2760 }, { "epoch": 0.8501077254539858, "grad_norm": 19.25, "learning_rate": 2.618556798957113e-06, "loss": 1.330336570739746, "step": 2762 }, { "epoch": 0.8507232994767621, "grad_norm": 11.1875, "learning_rate": 2.6179734421488785e-06, "loss": 1.348820686340332, "step": 2764 }, { "epoch": 0.8513388734995383, "grad_norm": 6.34375, "learning_rate": 2.6173897240408385e-06, "loss": 1.0717982053756714, "step": 2766 }, { "epoch": 0.8519544475223145, "grad_norm": 21.0, "learning_rate": 2.616805644890821e-06, "loss": 1.4334378242492676, "step": 2768 }, { "epoch": 0.8525700215450908, "grad_norm": 12.125, "learning_rate": 2.6162212049568155e-06, "loss": 1.236671805381775, "step": 2770 }, { "epoch": 0.853185595567867, "grad_norm": 22.375, "learning_rate": 2.6156364044969694e-06, "loss": 1.4810919761657715, "step": 2772 }, { "epoch": 0.8538011695906432, "grad_norm": 13.5, "learning_rate": 2.61505124376959e-06, "loss": 1.5031414031982422, "step": 2774 }, { "epoch": 0.8544167436134195, "grad_norm": 13.25, "learning_rate": 2.614465723033143e-06, "loss": 1.3797107934951782, "step": 2776 }, { "epoch": 0.8550323176361958, "grad_norm": 7.59375, "learning_rate": 2.6138798425462537e-06, "loss": 1.4212639331817627, "step": 2778 }, { "epoch": 0.855647891658972, "grad_norm": 38.0, "learning_rate": 2.613293602567706e-06, "loss": 1.4438972473144531, "step": 2780 }, { "epoch": 0.8562634656817483, "grad_norm": 20.625, "learning_rate": 2.6127070033564434e-06, "loss": 1.578815221786499, "step": 2782 }, { "epoch": 0.8568790397045245, "grad_norm": 4.3125, "learning_rate": 2.6121200451715675e-06, "loss": 1.0285112857818604, "step": 2784 }, { "epoch": 0.8574946137273007, "grad_norm": 13.4375, "learning_rate": 2.6115327282723372e-06, "loss": 1.0916186571121216, "step": 2786 }, { "epoch": 0.858110187750077, "grad_norm": 9.375, "learning_rate": 2.6109450529181725e-06, "loss": 1.2124738693237305, "step": 2788 }, { "epoch": 0.8587257617728532, "grad_norm": 15.4375, "learning_rate": 2.610357019368649e-06, "loss": 1.2717947959899902, "step": 2790 }, { "epoch": 0.8593413357956294, "grad_norm": 14.125, "learning_rate": 2.6097686278835026e-06, "loss": 0.9262675642967224, "step": 2792 }, { "epoch": 0.8599569098184057, "grad_norm": 67.5, "learning_rate": 2.6091798787226263e-06, "loss": 1.0638432502746582, "step": 2794 }, { "epoch": 0.8605724838411819, "grad_norm": 16.5, "learning_rate": 2.6085907721460716e-06, "loss": 1.2468934059143066, "step": 2796 }, { "epoch": 0.8611880578639581, "grad_norm": 13.125, "learning_rate": 2.6080013084140466e-06, "loss": 1.290550708770752, "step": 2798 }, { "epoch": 0.8618036318867344, "grad_norm": 16.25, "learning_rate": 2.60741148778692e-06, "loss": 1.3275871276855469, "step": 2800 }, { "epoch": 0.8624192059095106, "grad_norm": 13.5, "learning_rate": 2.606821310525213e-06, "loss": 0.9118420481681824, "step": 2802 }, { "epoch": 0.8630347799322868, "grad_norm": 12.4375, "learning_rate": 2.606230776889611e-06, "loss": 1.3373104333877563, "step": 2804 }, { "epoch": 0.8636503539550631, "grad_norm": 18.75, "learning_rate": 2.605639887140952e-06, "loss": 1.4756443500518799, "step": 2806 }, { "epoch": 0.8642659279778393, "grad_norm": 19.25, "learning_rate": 2.605048641540232e-06, "loss": 1.3395130634307861, "step": 2808 }, { "epoch": 0.8648815020006155, "grad_norm": 6.09375, "learning_rate": 2.6044570403486055e-06, "loss": 1.3719476461410522, "step": 2810 }, { "epoch": 0.8654970760233918, "grad_norm": 7.84375, "learning_rate": 2.6038650838273833e-06, "loss": 1.368028998374939, "step": 2812 }, { "epoch": 0.866112650046168, "grad_norm": 20.5, "learning_rate": 2.6032727722380332e-06, "loss": 1.309047818183899, "step": 2814 }, { "epoch": 0.8667282240689443, "grad_norm": 12.0, "learning_rate": 2.602680105842181e-06, "loss": 1.3907105922698975, "step": 2816 }, { "epoch": 0.8673437980917206, "grad_norm": 15.1875, "learning_rate": 2.6020870849016066e-06, "loss": 1.6243841648101807, "step": 2818 }, { "epoch": 0.8679593721144968, "grad_norm": 37.25, "learning_rate": 2.6014937096782484e-06, "loss": 1.253342628479004, "step": 2820 }, { "epoch": 0.868574946137273, "grad_norm": 17.25, "learning_rate": 2.6008999804342017e-06, "loss": 1.4394148588180542, "step": 2822 }, { "epoch": 0.8691905201600493, "grad_norm": 12.5625, "learning_rate": 2.6003058974317166e-06, "loss": 1.3138532638549805, "step": 2824 }, { "epoch": 0.8698060941828255, "grad_norm": 5.25, "learning_rate": 2.5997114609332e-06, "loss": 1.1129381656646729, "step": 2826 }, { "epoch": 0.8704216682056017, "grad_norm": 10.0, "learning_rate": 2.5991166712012163e-06, "loss": 1.2607460021972656, "step": 2828 }, { "epoch": 0.871037242228378, "grad_norm": 14.375, "learning_rate": 2.5985215284984843e-06, "loss": 1.262549638748169, "step": 2830 }, { "epoch": 0.8716528162511542, "grad_norm": 15.5, "learning_rate": 2.5979260330878776e-06, "loss": 1.0864108800888062, "step": 2832 }, { "epoch": 0.8722683902739304, "grad_norm": 5.28125, "learning_rate": 2.59733018523243e-06, "loss": 1.2294626235961914, "step": 2834 }, { "epoch": 0.8728839642967067, "grad_norm": 12.5625, "learning_rate": 2.596733985195327e-06, "loss": 1.5669360160827637, "step": 2836 }, { "epoch": 0.8734995383194829, "grad_norm": 54.5, "learning_rate": 2.5961374332399104e-06, "loss": 1.681774616241455, "step": 2838 }, { "epoch": 0.8741151123422591, "grad_norm": 13.5625, "learning_rate": 2.595540529629678e-06, "loss": 1.4134490489959717, "step": 2840 }, { "epoch": 0.8747306863650354, "grad_norm": 22.0, "learning_rate": 2.594943274628283e-06, "loss": 1.5794594287872314, "step": 2842 }, { "epoch": 0.8753462603878116, "grad_norm": 16.125, "learning_rate": 2.5943456684995334e-06, "loss": 1.5964782238006592, "step": 2844 }, { "epoch": 0.8759618344105878, "grad_norm": 20.75, "learning_rate": 2.5937477115073933e-06, "loss": 1.819379210472107, "step": 2846 }, { "epoch": 0.8765774084333641, "grad_norm": 14.9375, "learning_rate": 2.5931494039159797e-06, "loss": 1.4165709018707275, "step": 2848 }, { "epoch": 0.8771929824561403, "grad_norm": 15.875, "learning_rate": 2.5925507459895673e-06, "loss": 1.6482594013214111, "step": 2850 }, { "epoch": 0.8778085564789165, "grad_norm": 40.0, "learning_rate": 2.5919517379925825e-06, "loss": 1.3646832704544067, "step": 2852 }, { "epoch": 0.8784241305016929, "grad_norm": 15.3125, "learning_rate": 2.5913523801896083e-06, "loss": 1.118886947631836, "step": 2854 }, { "epoch": 0.8790397045244691, "grad_norm": 15.3125, "learning_rate": 2.5907526728453826e-06, "loss": 1.0934598445892334, "step": 2856 }, { "epoch": 0.8796552785472453, "grad_norm": 7.0, "learning_rate": 2.5901526162247956e-06, "loss": 1.2138285636901855, "step": 2858 }, { "epoch": 0.8802708525700216, "grad_norm": 19.0, "learning_rate": 2.5895522105928932e-06, "loss": 1.4065654277801514, "step": 2860 }, { "epoch": 0.8808864265927978, "grad_norm": 84.0, "learning_rate": 2.5889514562148764e-06, "loss": 1.5109992027282715, "step": 2862 }, { "epoch": 0.881502000615574, "grad_norm": 12.375, "learning_rate": 2.5883503533560976e-06, "loss": 1.4605238437652588, "step": 2864 }, { "epoch": 0.8821175746383503, "grad_norm": 49.5, "learning_rate": 2.5877489022820653e-06, "loss": 0.7409439086914062, "step": 2866 }, { "epoch": 0.8827331486611265, "grad_norm": 14.0, "learning_rate": 2.5871471032584412e-06, "loss": 1.4429962635040283, "step": 2868 }, { "epoch": 0.8833487226839027, "grad_norm": 23.25, "learning_rate": 2.586544956551041e-06, "loss": 1.6522072553634644, "step": 2870 }, { "epoch": 0.883964296706679, "grad_norm": 7.25, "learning_rate": 2.5859424624258324e-06, "loss": 1.1676018238067627, "step": 2872 }, { "epoch": 0.8845798707294552, "grad_norm": 7.71875, "learning_rate": 2.585339621148939e-06, "loss": 1.1003012657165527, "step": 2874 }, { "epoch": 0.8851954447522314, "grad_norm": 9.875, "learning_rate": 2.5847364329866354e-06, "loss": 1.3532074689865112, "step": 2876 }, { "epoch": 0.8858110187750077, "grad_norm": 13.0, "learning_rate": 2.5841328982053518e-06, "loss": 1.2843401432037354, "step": 2878 }, { "epoch": 0.8864265927977839, "grad_norm": 10.1875, "learning_rate": 2.5835290170716688e-06, "loss": 1.2896305322647095, "step": 2880 }, { "epoch": 0.8870421668205601, "grad_norm": 17.75, "learning_rate": 2.5829247898523217e-06, "loss": 1.3048279285430908, "step": 2882 }, { "epoch": 0.8876577408433364, "grad_norm": 13.0, "learning_rate": 2.5823202168141993e-06, "loss": 1.1085158586502075, "step": 2884 }, { "epoch": 0.8882733148661126, "grad_norm": 9.0625, "learning_rate": 2.5817152982243413e-06, "loss": 1.4354221820831299, "step": 2886 }, { "epoch": 0.8888888888888888, "grad_norm": 15.75, "learning_rate": 2.581110034349942e-06, "loss": 1.0920836925506592, "step": 2888 }, { "epoch": 0.8895044629116651, "grad_norm": 29.625, "learning_rate": 2.5805044254583456e-06, "loss": 0.6879758834838867, "step": 2890 }, { "epoch": 0.8901200369344414, "grad_norm": 11.125, "learning_rate": 2.5798984718170507e-06, "loss": 1.3846540451049805, "step": 2892 }, { "epoch": 0.8907356109572176, "grad_norm": 33.5, "learning_rate": 2.579292173693709e-06, "loss": 1.2994391918182373, "step": 2894 }, { "epoch": 0.8913511849799939, "grad_norm": 13.3125, "learning_rate": 2.5786855313561216e-06, "loss": 1.164937973022461, "step": 2896 }, { "epoch": 0.8919667590027701, "grad_norm": 23.0, "learning_rate": 2.5780785450722434e-06, "loss": 1.4573490619659424, "step": 2898 }, { "epoch": 0.8925823330255463, "grad_norm": 22.875, "learning_rate": 2.5774712151101814e-06, "loss": 1.6126925945281982, "step": 2900 }, { "epoch": 0.8931979070483226, "grad_norm": 21.625, "learning_rate": 2.576863541738193e-06, "loss": 1.1551085710525513, "step": 2902 }, { "epoch": 0.8938134810710988, "grad_norm": 20.0, "learning_rate": 2.5762555252246896e-06, "loss": 1.3950368165969849, "step": 2904 }, { "epoch": 0.894429055093875, "grad_norm": 13.4375, "learning_rate": 2.5756471658382325e-06, "loss": 1.4055218696594238, "step": 2906 }, { "epoch": 0.8950446291166513, "grad_norm": 11.0, "learning_rate": 2.5750384638475337e-06, "loss": 1.2864396572113037, "step": 2908 }, { "epoch": 0.8956602031394275, "grad_norm": 20.25, "learning_rate": 2.5744294195214584e-06, "loss": 1.6634044647216797, "step": 2910 }, { "epoch": 0.8962757771622037, "grad_norm": 12.6875, "learning_rate": 2.573820033129022e-06, "loss": 1.3990838527679443, "step": 2912 }, { "epoch": 0.89689135118498, "grad_norm": 15.75, "learning_rate": 2.5732103049393908e-06, "loss": 1.301020860671997, "step": 2914 }, { "epoch": 0.8975069252077562, "grad_norm": 41.5, "learning_rate": 2.572600235221883e-06, "loss": 1.5635402202606201, "step": 2916 }, { "epoch": 0.8981224992305324, "grad_norm": 8.625, "learning_rate": 2.571989824245967e-06, "loss": 1.3051490783691406, "step": 2918 }, { "epoch": 0.8987380732533087, "grad_norm": 12.6875, "learning_rate": 2.571379072281262e-06, "loss": 1.5666720867156982, "step": 2920 }, { "epoch": 0.8993536472760849, "grad_norm": 15.6875, "learning_rate": 2.5707679795975377e-06, "loss": 1.0976910591125488, "step": 2922 }, { "epoch": 0.8999692212988611, "grad_norm": 16.875, "learning_rate": 2.5701565464647146e-06, "loss": 1.0979533195495605, "step": 2924 }, { "epoch": 0.9005847953216374, "grad_norm": 12.75, "learning_rate": 2.5695447731528634e-06, "loss": 1.7441458702087402, "step": 2926 }, { "epoch": 0.9012003693444137, "grad_norm": 7.71875, "learning_rate": 2.5689326599322043e-06, "loss": 1.4589111804962158, "step": 2928 }, { "epoch": 0.90181594336719, "grad_norm": 10.0625, "learning_rate": 2.5683202070731097e-06, "loss": 1.434112310409546, "step": 2930 }, { "epoch": 0.9024315173899662, "grad_norm": 29.0, "learning_rate": 2.5677074148460995e-06, "loss": 1.6956961154937744, "step": 2932 }, { "epoch": 0.9030470914127424, "grad_norm": 27.125, "learning_rate": 2.5670942835218465e-06, "loss": 1.1124253273010254, "step": 2934 }, { "epoch": 0.9036626654355187, "grad_norm": 7.21875, "learning_rate": 2.5664808133711695e-06, "loss": 1.3332182168960571, "step": 2936 }, { "epoch": 0.9042782394582949, "grad_norm": 12.75, "learning_rate": 2.5658670046650395e-06, "loss": 1.623244285583496, "step": 2938 }, { "epoch": 0.9048938134810711, "grad_norm": 8.4375, "learning_rate": 2.565252857674578e-06, "loss": 1.2698122262954712, "step": 2940 }, { "epoch": 0.9055093875038474, "grad_norm": 12.3125, "learning_rate": 2.564638372671052e-06, "loss": 1.6054935455322266, "step": 2942 }, { "epoch": 0.9061249615266236, "grad_norm": 31.25, "learning_rate": 2.564023549925882e-06, "loss": 1.413541555404663, "step": 2944 }, { "epoch": 0.9067405355493998, "grad_norm": 20.875, "learning_rate": 2.563408389710636e-06, "loss": 1.2901825904846191, "step": 2946 }, { "epoch": 0.907356109572176, "grad_norm": 17.25, "learning_rate": 2.5627928922970294e-06, "loss": 1.7496740818023682, "step": 2948 }, { "epoch": 0.9079716835949523, "grad_norm": 14.1875, "learning_rate": 2.562177057956929e-06, "loss": 1.3308651447296143, "step": 2950 }, { "epoch": 0.9085872576177285, "grad_norm": 12.6875, "learning_rate": 2.5615608869623505e-06, "loss": 0.8857111930847168, "step": 2952 }, { "epoch": 0.9092028316405047, "grad_norm": 11.5, "learning_rate": 2.5609443795854557e-06, "loss": 0.7823306322097778, "step": 2954 }, { "epoch": 0.909818405663281, "grad_norm": 37.5, "learning_rate": 2.560327536098558e-06, "loss": 1.6251142024993896, "step": 2956 }, { "epoch": 0.9104339796860572, "grad_norm": 8.0, "learning_rate": 2.5597103567741162e-06, "loss": 1.3983535766601562, "step": 2958 }, { "epoch": 0.9110495537088334, "grad_norm": 17.5, "learning_rate": 2.5590928418847415e-06, "loss": 1.1519997119903564, "step": 2960 }, { "epoch": 0.9116651277316097, "grad_norm": 21.625, "learning_rate": 2.5584749917031887e-06, "loss": 1.6490130424499512, "step": 2962 }, { "epoch": 0.9122807017543859, "grad_norm": 14.3125, "learning_rate": 2.557856806502364e-06, "loss": 1.4404358863830566, "step": 2964 }, { "epoch": 0.9128962757771623, "grad_norm": 19.75, "learning_rate": 2.5572382865553203e-06, "loss": 1.3353450298309326, "step": 2966 }, { "epoch": 0.9135118497999385, "grad_norm": 15.75, "learning_rate": 2.5566194321352584e-06, "loss": 1.310957670211792, "step": 2968 }, { "epoch": 0.9141274238227147, "grad_norm": 12.375, "learning_rate": 2.5560002435155283e-06, "loss": 1.3766510486602783, "step": 2970 }, { "epoch": 0.914742997845491, "grad_norm": 31.125, "learning_rate": 2.5553807209696237e-06, "loss": 1.389007329940796, "step": 2972 }, { "epoch": 0.9153585718682672, "grad_norm": 38.5, "learning_rate": 2.554760864771191e-06, "loss": 1.6427724361419678, "step": 2974 }, { "epoch": 0.9159741458910434, "grad_norm": 5.125, "learning_rate": 2.5541406751940193e-06, "loss": 1.3378264904022217, "step": 2976 }, { "epoch": 0.9165897199138197, "grad_norm": 32.5, "learning_rate": 2.5535201525120492e-06, "loss": 1.6025424003601074, "step": 2978 }, { "epoch": 0.9172052939365959, "grad_norm": 7.28125, "learning_rate": 2.5528992969993648e-06, "loss": 1.0566959381103516, "step": 2980 }, { "epoch": 0.9178208679593721, "grad_norm": 8.9375, "learning_rate": 2.5522781089301983e-06, "loss": 1.0162148475646973, "step": 2982 }, { "epoch": 0.9184364419821484, "grad_norm": 6.1875, "learning_rate": 2.551656588578931e-06, "loss": 1.179545283317566, "step": 2984 }, { "epoch": 0.9190520160049246, "grad_norm": 5.375, "learning_rate": 2.551034736220087e-06, "loss": 1.2015800476074219, "step": 2986 }, { "epoch": 0.9196675900277008, "grad_norm": 14.75, "learning_rate": 2.5504125521283416e-06, "loss": 1.3361399173736572, "step": 2988 }, { "epoch": 0.920283164050477, "grad_norm": 11.5625, "learning_rate": 2.5497900365785116e-06, "loss": 1.6196482181549072, "step": 2990 }, { "epoch": 0.9208987380732533, "grad_norm": 12.875, "learning_rate": 2.549167189845565e-06, "loss": 1.4826675653457642, "step": 2992 }, { "epoch": 0.9215143120960295, "grad_norm": 10.875, "learning_rate": 2.5485440122046133e-06, "loss": 0.8327569365501404, "step": 2994 }, { "epoch": 0.9221298861188058, "grad_norm": 18.75, "learning_rate": 2.5479205039309135e-06, "loss": 1.507612943649292, "step": 2996 }, { "epoch": 0.922745460141582, "grad_norm": 52.75, "learning_rate": 2.547296665299872e-06, "loss": 1.764232873916626, "step": 2998 }, { "epoch": 0.9233610341643582, "grad_norm": 21.25, "learning_rate": 2.546672496587037e-06, "loss": 1.5592687129974365, "step": 3000 }, { "epoch": 0.9239766081871345, "grad_norm": 6.21875, "learning_rate": 2.5460479980681062e-06, "loss": 1.2681667804718018, "step": 3002 }, { "epoch": 0.9245921822099108, "grad_norm": 12.25, "learning_rate": 2.5454231700189204e-06, "loss": 1.4879635572433472, "step": 3004 }, { "epoch": 0.925207756232687, "grad_norm": 5.84375, "learning_rate": 2.5447980127154673e-06, "loss": 1.446776032447815, "step": 3006 }, { "epoch": 0.9258233302554633, "grad_norm": 25.625, "learning_rate": 2.544172526433879e-06, "loss": 1.4826745986938477, "step": 3008 }, { "epoch": 0.9264389042782395, "grad_norm": 11.25, "learning_rate": 2.543546711450434e-06, "loss": 1.2050938606262207, "step": 3010 }, { "epoch": 0.9270544783010157, "grad_norm": 4.40625, "learning_rate": 2.542920568041555e-06, "loss": 1.1081424951553345, "step": 3012 }, { "epoch": 0.927670052323792, "grad_norm": 19.625, "learning_rate": 2.542294096483811e-06, "loss": 1.2566344738006592, "step": 3014 }, { "epoch": 0.9282856263465682, "grad_norm": 7.1875, "learning_rate": 2.5416672970539154e-06, "loss": 1.2727718353271484, "step": 3016 }, { "epoch": 0.9289012003693444, "grad_norm": 16.75, "learning_rate": 2.541040170028725e-06, "loss": 1.3702088594436646, "step": 3018 }, { "epoch": 0.9295167743921207, "grad_norm": 12.25, "learning_rate": 2.5404127156852436e-06, "loss": 1.037917137145996, "step": 3020 }, { "epoch": 0.9301323484148969, "grad_norm": 7.03125, "learning_rate": 2.539784934300618e-06, "loss": 1.2113661766052246, "step": 3022 }, { "epoch": 0.9307479224376731, "grad_norm": 8.6875, "learning_rate": 2.53915682615214e-06, "loss": 1.376421332359314, "step": 3024 }, { "epoch": 0.9313634964604494, "grad_norm": 34.75, "learning_rate": 2.5385283915172454e-06, "loss": 1.5175917148590088, "step": 3026 }, { "epoch": 0.9319790704832256, "grad_norm": 161.0, "learning_rate": 2.5378996306735157e-06, "loss": 1.3562586307525635, "step": 3028 }, { "epoch": 0.9325946445060018, "grad_norm": 11.6875, "learning_rate": 2.5372705438986742e-06, "loss": 1.3766993284225464, "step": 3030 }, { "epoch": 0.9332102185287781, "grad_norm": 13.3125, "learning_rate": 2.5366411314705884e-06, "loss": 1.4237804412841797, "step": 3032 }, { "epoch": 0.9338257925515543, "grad_norm": 17.75, "learning_rate": 2.5360113936672727e-06, "loss": 1.5633426904678345, "step": 3034 }, { "epoch": 0.9344413665743305, "grad_norm": 10.5, "learning_rate": 2.5353813307668818e-06, "loss": 1.4739643335342407, "step": 3036 }, { "epoch": 0.9350569405971068, "grad_norm": 12.9375, "learning_rate": 2.534750943047715e-06, "loss": 1.355829119682312, "step": 3038 }, { "epoch": 0.935672514619883, "grad_norm": 28.625, "learning_rate": 2.534120230788216e-06, "loss": 1.4471814632415771, "step": 3040 }, { "epoch": 0.9362880886426593, "grad_norm": 10.625, "learning_rate": 2.53348919426697e-06, "loss": 1.619966983795166, "step": 3042 }, { "epoch": 0.9369036626654356, "grad_norm": 8.4375, "learning_rate": 2.5328578337627076e-06, "loss": 1.022241234779358, "step": 3044 }, { "epoch": 0.9375192366882118, "grad_norm": 10.1875, "learning_rate": 2.5322261495543017e-06, "loss": 1.1702439785003662, "step": 3046 }, { "epoch": 0.938134810710988, "grad_norm": 10.875, "learning_rate": 2.531594141920766e-06, "loss": 1.3634662628173828, "step": 3048 }, { "epoch": 0.9387503847337643, "grad_norm": 15.1875, "learning_rate": 2.530961811141261e-06, "loss": 1.2916343212127686, "step": 3050 }, { "epoch": 0.9393659587565405, "grad_norm": 20.5, "learning_rate": 2.5303291574950877e-06, "loss": 1.716345191001892, "step": 3052 }, { "epoch": 0.9399815327793167, "grad_norm": 19.875, "learning_rate": 2.529696181261689e-06, "loss": 1.5666673183441162, "step": 3054 }, { "epoch": 0.940597106802093, "grad_norm": 17.625, "learning_rate": 2.5290628827206517e-06, "loss": 1.3357521295547485, "step": 3056 }, { "epoch": 0.9412126808248692, "grad_norm": 29.0, "learning_rate": 2.528429262151705e-06, "loss": 1.4472358226776123, "step": 3058 }, { "epoch": 0.9418282548476454, "grad_norm": 13.875, "learning_rate": 2.5277953198347186e-06, "loss": 1.5934563875198364, "step": 3060 }, { "epoch": 0.9424438288704217, "grad_norm": 18.375, "learning_rate": 2.5271610560497068e-06, "loss": 1.1851208209991455, "step": 3062 }, { "epoch": 0.9430594028931979, "grad_norm": 9.9375, "learning_rate": 2.526526471076824e-06, "loss": 1.8103055953979492, "step": 3064 }, { "epoch": 0.9436749769159741, "grad_norm": 21.125, "learning_rate": 2.5258915651963673e-06, "loss": 1.5106641054153442, "step": 3066 }, { "epoch": 0.9442905509387504, "grad_norm": 11.8125, "learning_rate": 2.5252563386887754e-06, "loss": 1.179125189781189, "step": 3068 }, { "epoch": 0.9449061249615266, "grad_norm": 10.8125, "learning_rate": 2.5246207918346286e-06, "loss": 1.4003870487213135, "step": 3070 }, { "epoch": 0.9455216989843028, "grad_norm": 47.75, "learning_rate": 2.523984924914648e-06, "loss": 1.1455589532852173, "step": 3072 }, { "epoch": 0.9461372730070791, "grad_norm": 10.375, "learning_rate": 2.523348738209698e-06, "loss": 1.1926007270812988, "step": 3074 }, { "epoch": 0.9467528470298553, "grad_norm": 10.0, "learning_rate": 2.5227122320007817e-06, "loss": 1.4721944332122803, "step": 3076 }, { "epoch": 0.9473684210526315, "grad_norm": 13.25, "learning_rate": 2.5220754065690455e-06, "loss": 1.2761927843093872, "step": 3078 }, { "epoch": 0.9479839950754079, "grad_norm": 15.125, "learning_rate": 2.5214382621957754e-06, "loss": 1.6848111152648926, "step": 3080 }, { "epoch": 0.9485995690981841, "grad_norm": 26.5, "learning_rate": 2.520800799162399e-06, "loss": 1.3228620290756226, "step": 3082 }, { "epoch": 0.9492151431209603, "grad_norm": 16.5, "learning_rate": 2.5201630177504848e-06, "loss": 0.9865208864212036, "step": 3084 }, { "epoch": 0.9498307171437366, "grad_norm": 5.53125, "learning_rate": 2.519524918241741e-06, "loss": 1.5489556789398193, "step": 3086 }, { "epoch": 0.9504462911665128, "grad_norm": 20.125, "learning_rate": 2.5188865009180176e-06, "loss": 1.191929578781128, "step": 3088 }, { "epoch": 0.951061865189289, "grad_norm": 4.8125, "learning_rate": 2.5182477660613033e-06, "loss": 1.1938698291778564, "step": 3090 }, { "epoch": 0.9516774392120653, "grad_norm": 28.25, "learning_rate": 2.517608713953729e-06, "loss": 1.7811293601989746, "step": 3092 }, { "epoch": 0.9522930132348415, "grad_norm": 12.125, "learning_rate": 2.5169693448775642e-06, "loss": 1.4118529558181763, "step": 3094 }, { "epoch": 0.9529085872576177, "grad_norm": 30.75, "learning_rate": 2.5163296591152186e-06, "loss": 1.3781766891479492, "step": 3096 }, { "epoch": 0.953524161280394, "grad_norm": 15.6875, "learning_rate": 2.515689656949243e-06, "loss": 1.4856109619140625, "step": 3098 }, { "epoch": 0.9541397353031702, "grad_norm": 24.875, "learning_rate": 2.5150493386623265e-06, "loss": 1.1463229656219482, "step": 3100 }, { "epoch": 0.9547553093259464, "grad_norm": 10.1875, "learning_rate": 2.5144087045372987e-06, "loss": 1.2381125688552856, "step": 3102 }, { "epoch": 0.9553708833487227, "grad_norm": 78.0, "learning_rate": 2.513767754857128e-06, "loss": 1.2273215055465698, "step": 3104 }, { "epoch": 0.9559864573714989, "grad_norm": 7.03125, "learning_rate": 2.5131264899049225e-06, "loss": 1.1484160423278809, "step": 3106 }, { "epoch": 0.9566020313942751, "grad_norm": 117.5, "learning_rate": 2.512484909963931e-06, "loss": 1.3532674312591553, "step": 3108 }, { "epoch": 0.9572176054170514, "grad_norm": 17.0, "learning_rate": 2.5118430153175384e-06, "loss": 1.0924203395843506, "step": 3110 }, { "epoch": 0.9578331794398276, "grad_norm": 36.25, "learning_rate": 2.511200806249272e-06, "loss": 1.5392255783081055, "step": 3112 }, { "epoch": 0.9584487534626038, "grad_norm": 17.0, "learning_rate": 2.5105582830427948e-06, "loss": 1.3981271982192993, "step": 3114 }, { "epoch": 0.9590643274853801, "grad_norm": 10.1875, "learning_rate": 2.50991544598191e-06, "loss": 1.2499940395355225, "step": 3116 }, { "epoch": 0.9596799015081564, "grad_norm": 11.25, "learning_rate": 2.509272295350561e-06, "loss": 1.3273930549621582, "step": 3118 }, { "epoch": 0.9602954755309326, "grad_norm": 12.6875, "learning_rate": 2.5086288314328267e-06, "loss": 1.0946621894836426, "step": 3120 }, { "epoch": 0.9609110495537089, "grad_norm": 8.8125, "learning_rate": 2.5079850545129265e-06, "loss": 1.2586936950683594, "step": 3122 }, { "epoch": 0.9615266235764851, "grad_norm": 12.3125, "learning_rate": 2.5073409648752176e-06, "loss": 1.404738426208496, "step": 3124 }, { "epoch": 0.9621421975992613, "grad_norm": 32.75, "learning_rate": 2.5066965628041944e-06, "loss": 1.5974805355072021, "step": 3126 }, { "epoch": 0.9627577716220376, "grad_norm": 16.875, "learning_rate": 2.5060518485844903e-06, "loss": 1.1815767288208008, "step": 3128 }, { "epoch": 0.9633733456448138, "grad_norm": 26.125, "learning_rate": 2.505406822500877e-06, "loss": 0.96288001537323, "step": 3130 }, { "epoch": 0.96398891966759, "grad_norm": 5.96875, "learning_rate": 2.504761484838262e-06, "loss": 1.0400323867797852, "step": 3132 }, { "epoch": 0.9646044936903663, "grad_norm": 21.625, "learning_rate": 2.5041158358816925e-06, "loss": 1.462847113609314, "step": 3134 }, { "epoch": 0.9652200677131425, "grad_norm": 14.75, "learning_rate": 2.5034698759163528e-06, "loss": 1.285290241241455, "step": 3136 }, { "epoch": 0.9658356417359187, "grad_norm": 16.75, "learning_rate": 2.502823605227563e-06, "loss": 1.515703797340393, "step": 3138 }, { "epoch": 0.966451215758695, "grad_norm": 16.25, "learning_rate": 2.5021770241007826e-06, "loss": 1.472865343093872, "step": 3140 }, { "epoch": 0.9670667897814712, "grad_norm": 13.5625, "learning_rate": 2.501530132821607e-06, "loss": 1.6380629539489746, "step": 3142 }, { "epoch": 0.9676823638042474, "grad_norm": 11.875, "learning_rate": 2.5008829316757685e-06, "loss": 1.2624460458755493, "step": 3144 }, { "epoch": 0.9682979378270237, "grad_norm": 9.8125, "learning_rate": 2.5002354209491364e-06, "loss": 1.141180157661438, "step": 3146 }, { "epoch": 0.9689135118497999, "grad_norm": 15.875, "learning_rate": 2.4995876009277176e-06, "loss": 1.371462106704712, "step": 3148 }, { "epoch": 0.9695290858725761, "grad_norm": 20.5, "learning_rate": 2.4989394718976542e-06, "loss": 1.5800580978393555, "step": 3150 }, { "epoch": 0.9701446598953524, "grad_norm": 17.0, "learning_rate": 2.4982910341452274e-06, "loss": 1.5933725833892822, "step": 3152 }, { "epoch": 0.9707602339181286, "grad_norm": 13.25, "learning_rate": 2.4976422879568497e-06, "loss": 1.7934191226959229, "step": 3154 }, { "epoch": 0.971375807940905, "grad_norm": 16.125, "learning_rate": 2.496993233619076e-06, "loss": 1.5261006355285645, "step": 3156 }, { "epoch": 0.9719913819636812, "grad_norm": 7.46875, "learning_rate": 2.4963438714185924e-06, "loss": 1.2210710048675537, "step": 3158 }, { "epoch": 0.9726069559864574, "grad_norm": 7.84375, "learning_rate": 2.4956942016422243e-06, "loss": 1.0445549488067627, "step": 3160 }, { "epoch": 0.9732225300092336, "grad_norm": 14.0, "learning_rate": 2.4950442245769304e-06, "loss": 1.2202482223510742, "step": 3162 }, { "epoch": 0.9738381040320099, "grad_norm": 24.75, "learning_rate": 2.494393940509807e-06, "loss": 1.2303857803344727, "step": 3164 }, { "epoch": 0.9744536780547861, "grad_norm": 9.8125, "learning_rate": 2.493743349728085e-06, "loss": 1.519113540649414, "step": 3166 }, { "epoch": 0.9750692520775623, "grad_norm": 14.125, "learning_rate": 2.4930924525191317e-06, "loss": 1.6462736129760742, "step": 3168 }, { "epoch": 0.9756848261003386, "grad_norm": 21.5, "learning_rate": 2.4924412491704485e-06, "loss": 1.3465995788574219, "step": 3170 }, { "epoch": 0.9763004001231148, "grad_norm": 16.25, "learning_rate": 2.491789739969673e-06, "loss": 1.1416728496551514, "step": 3172 }, { "epoch": 0.976915974145891, "grad_norm": 35.0, "learning_rate": 2.4911379252045775e-06, "loss": 1.6752818822860718, "step": 3174 }, { "epoch": 0.9775315481686673, "grad_norm": 15.0625, "learning_rate": 2.490485805163069e-06, "loss": 1.5092238187789917, "step": 3176 }, { "epoch": 0.9781471221914435, "grad_norm": 12.3125, "learning_rate": 2.48983338013319e-06, "loss": 1.4850773811340332, "step": 3178 }, { "epoch": 0.9787626962142197, "grad_norm": 20.0, "learning_rate": 2.4891806504031175e-06, "loss": 1.7530406713485718, "step": 3180 }, { "epoch": 0.979378270236996, "grad_norm": 16.375, "learning_rate": 2.488527616261163e-06, "loss": 2.0001983642578125, "step": 3182 }, { "epoch": 0.9799938442597722, "grad_norm": 35.25, "learning_rate": 2.487874277995771e-06, "loss": 1.9470124244689941, "step": 3184 }, { "epoch": 0.9806094182825484, "grad_norm": 30.875, "learning_rate": 2.4872206358955244e-06, "loss": 1.3628416061401367, "step": 3186 }, { "epoch": 0.9812249923053247, "grad_norm": 25.75, "learning_rate": 2.486566690249136e-06, "loss": 1.5063343048095703, "step": 3188 }, { "epoch": 0.9818405663281009, "grad_norm": 4.5, "learning_rate": 2.485912441345454e-06, "loss": 1.3452836275100708, "step": 3190 }, { "epoch": 0.9824561403508771, "grad_norm": 17.125, "learning_rate": 2.4852578894734615e-06, "loss": 0.7489191889762878, "step": 3192 }, { "epoch": 0.9830717143736535, "grad_norm": 27.5, "learning_rate": 2.484603034922275e-06, "loss": 1.5885820388793945, "step": 3194 }, { "epoch": 0.9836872883964297, "grad_norm": 4.75, "learning_rate": 2.4839478779811445e-06, "loss": 1.1795306205749512, "step": 3196 }, { "epoch": 0.984302862419206, "grad_norm": 8.5, "learning_rate": 2.483292418939454e-06, "loss": 1.0942715406417847, "step": 3198 }, { "epoch": 0.9849184364419822, "grad_norm": 11.5, "learning_rate": 2.4826366580867188e-06, "loss": 1.322864294052124, "step": 3200 }, { "epoch": 0.9855340104647584, "grad_norm": 14.0, "learning_rate": 2.4819805957125903e-06, "loss": 1.2085504531860352, "step": 3202 }, { "epoch": 0.9861495844875346, "grad_norm": 44.5, "learning_rate": 2.4813242321068525e-06, "loss": 0.9992244839668274, "step": 3204 }, { "epoch": 0.9867651585103109, "grad_norm": 19.875, "learning_rate": 2.480667567559421e-06, "loss": 1.217922568321228, "step": 3206 }, { "epoch": 0.9873807325330871, "grad_norm": 15.25, "learning_rate": 2.4800106023603457e-06, "loss": 1.1221704483032227, "step": 3208 }, { "epoch": 0.9879963065558633, "grad_norm": 13.25, "learning_rate": 2.479353336799809e-06, "loss": 1.4771157503128052, "step": 3210 }, { "epoch": 0.9886118805786396, "grad_norm": 15.625, "learning_rate": 2.478695771168126e-06, "loss": 1.4712547063827515, "step": 3212 }, { "epoch": 0.9892274546014158, "grad_norm": 5.90625, "learning_rate": 2.478037905755744e-06, "loss": 1.2736375331878662, "step": 3214 }, { "epoch": 0.989843028624192, "grad_norm": 20.125, "learning_rate": 2.477379740853242e-06, "loss": 1.5006616115570068, "step": 3216 }, { "epoch": 0.9904586026469683, "grad_norm": 20.0, "learning_rate": 2.4767212767513344e-06, "loss": 1.2370715141296387, "step": 3218 }, { "epoch": 0.9910741766697445, "grad_norm": 10.875, "learning_rate": 2.4760625137408635e-06, "loss": 1.1793454885482788, "step": 3220 }, { "epoch": 0.9916897506925207, "grad_norm": 26.625, "learning_rate": 2.475403452112806e-06, "loss": 1.6577112674713135, "step": 3222 }, { "epoch": 0.992305324715297, "grad_norm": 20.625, "learning_rate": 2.474744092158271e-06, "loss": 1.3132104873657227, "step": 3224 }, { "epoch": 0.9929208987380732, "grad_norm": 15.1875, "learning_rate": 2.4740844341684984e-06, "loss": 1.5861358642578125, "step": 3226 }, { "epoch": 0.9935364727608494, "grad_norm": 25.125, "learning_rate": 2.473424478434859e-06, "loss": 1.5777220726013184, "step": 3228 }, { "epoch": 0.9941520467836257, "grad_norm": 21.75, "learning_rate": 2.4727642252488566e-06, "loss": 1.5983284711837769, "step": 3230 }, { "epoch": 0.994767620806402, "grad_norm": 15.75, "learning_rate": 2.472103674902126e-06, "loss": 1.368581771850586, "step": 3232 }, { "epoch": 0.9953831948291783, "grad_norm": 7.71875, "learning_rate": 2.4714428276864327e-06, "loss": 0.8675891160964966, "step": 3234 }, { "epoch": 0.9959987688519545, "grad_norm": 7.21875, "learning_rate": 2.4707816838936743e-06, "loss": 0.9874320030212402, "step": 3236 }, { "epoch": 0.9966143428747307, "grad_norm": 14.4375, "learning_rate": 2.470120243815878e-06, "loss": 0.9434853792190552, "step": 3238 }, { "epoch": 0.997229916897507, "grad_norm": 15.125, "learning_rate": 2.4694585077452024e-06, "loss": 0.8188871145248413, "step": 3240 }, { "epoch": 0.9978454909202832, "grad_norm": 8.125, "learning_rate": 2.4687964759739384e-06, "loss": 1.2960641384124756, "step": 3242 }, { "epoch": 0.9984610649430594, "grad_norm": 8.4375, "learning_rate": 2.468134148794504e-06, "loss": 1.224454641342163, "step": 3244 }, { "epoch": 0.9990766389658357, "grad_norm": 26.5, "learning_rate": 2.467471526499453e-06, "loss": 1.3921452760696411, "step": 3246 }, { "epoch": 0.9996922129886119, "grad_norm": 13.0, "learning_rate": 2.4668086093814634e-06, "loss": 1.4095790386199951, "step": 3248 }, { "epoch": 1.0003077870113881, "grad_norm": 5.9375, "learning_rate": 2.4661453977333482e-06, "loss": 1.392961025238037, "step": 3250 }, { "epoch": 1.0009233610341643, "grad_norm": 32.0, "learning_rate": 2.4654818918480477e-06, "loss": 1.4555277824401855, "step": 3252 }, { "epoch": 1.0015389350569406, "grad_norm": 15.625, "learning_rate": 2.464818092018635e-06, "loss": 1.2271881103515625, "step": 3254 }, { "epoch": 1.0021545090797168, "grad_norm": 17.0, "learning_rate": 2.4641539985383088e-06, "loss": 1.4368352890014648, "step": 3256 }, { "epoch": 1.002770083102493, "grad_norm": 23.25, "learning_rate": 2.463489611700402e-06, "loss": 1.69225013256073, "step": 3258 }, { "epoch": 1.0033856571252693, "grad_norm": 8.25, "learning_rate": 2.4628249317983737e-06, "loss": 1.1970336437225342, "step": 3260 }, { "epoch": 1.0040012311480455, "grad_norm": 23.625, "learning_rate": 2.4621599591258145e-06, "loss": 0.747069239616394, "step": 3262 }, { "epoch": 1.0046168051708217, "grad_norm": 18.875, "learning_rate": 2.461494693976443e-06, "loss": 1.5445091724395752, "step": 3264 }, { "epoch": 1.005232379193598, "grad_norm": 3.515625, "learning_rate": 2.4608291366441085e-06, "loss": 1.2502844333648682, "step": 3266 }, { "epoch": 1.0058479532163742, "grad_norm": 8.3125, "learning_rate": 2.4601632874227873e-06, "loss": 1.341188669204712, "step": 3268 }, { "epoch": 1.0064635272391504, "grad_norm": 15.375, "learning_rate": 2.459497146606587e-06, "loss": 1.2452143430709839, "step": 3270 }, { "epoch": 1.0070791012619267, "grad_norm": 20.0, "learning_rate": 2.4588307144897412e-06, "loss": 1.5691194534301758, "step": 3272 }, { "epoch": 1.007694675284703, "grad_norm": 15.6875, "learning_rate": 2.458163991366615e-06, "loss": 1.076235294342041, "step": 3274 }, { "epoch": 1.0083102493074791, "grad_norm": 9.625, "learning_rate": 2.4574969775317e-06, "loss": 1.206156849861145, "step": 3276 }, { "epoch": 1.0089258233302554, "grad_norm": 20.25, "learning_rate": 2.456829673279618e-06, "loss": 1.520322561264038, "step": 3278 }, { "epoch": 1.0095413973530316, "grad_norm": 20.375, "learning_rate": 2.456162078905118e-06, "loss": 1.2282922267913818, "step": 3280 }, { "epoch": 1.0101569713758078, "grad_norm": 13.875, "learning_rate": 2.4554941947030754e-06, "loss": 1.8445767164230347, "step": 3282 }, { "epoch": 1.010772545398584, "grad_norm": 8.6875, "learning_rate": 2.454826020968497e-06, "loss": 1.3538954257965088, "step": 3284 }, { "epoch": 1.0113881194213603, "grad_norm": 12.1875, "learning_rate": 2.4541575579965167e-06, "loss": 1.2854328155517578, "step": 3286 }, { "epoch": 1.0120036934441368, "grad_norm": 4.40625, "learning_rate": 2.4534888060823927e-06, "loss": 1.1238691806793213, "step": 3288 }, { "epoch": 1.012619267466913, "grad_norm": 18.0, "learning_rate": 2.4528197655215153e-06, "loss": 1.1130653619766235, "step": 3290 }, { "epoch": 1.0132348414896892, "grad_norm": 11.1875, "learning_rate": 2.4521504366094e-06, "loss": 1.1687474250793457, "step": 3292 }, { "epoch": 1.0138504155124655, "grad_norm": 15.5625, "learning_rate": 2.4514808196416907e-06, "loss": 1.5649261474609375, "step": 3294 }, { "epoch": 1.0144659895352417, "grad_norm": 36.25, "learning_rate": 2.4508109149141577e-06, "loss": 1.2250068187713623, "step": 3296 }, { "epoch": 1.015081563558018, "grad_norm": 13.1875, "learning_rate": 2.4501407227226984e-06, "loss": 1.4038679599761963, "step": 3298 }, { "epoch": 1.0156971375807942, "grad_norm": 4.5625, "learning_rate": 2.449470243363338e-06, "loss": 1.0873544216156006, "step": 3300 }, { "epoch": 1.0163127116035704, "grad_norm": 14.0625, "learning_rate": 2.448799477132227e-06, "loss": 1.562752366065979, "step": 3302 }, { "epoch": 1.0169282856263466, "grad_norm": 5.75, "learning_rate": 2.448128424325645e-06, "loss": 1.3743469715118408, "step": 3304 }, { "epoch": 1.0175438596491229, "grad_norm": 26.25, "learning_rate": 2.4474570852399953e-06, "loss": 1.6857503652572632, "step": 3306 }, { "epoch": 1.018159433671899, "grad_norm": 24.625, "learning_rate": 2.4467854601718094e-06, "loss": 1.5393667221069336, "step": 3308 }, { "epoch": 1.0187750076946753, "grad_norm": 8.5625, "learning_rate": 2.446113549417747e-06, "loss": 1.216958999633789, "step": 3310 }, { "epoch": 1.0193905817174516, "grad_norm": 6.1875, "learning_rate": 2.4454413532745893e-06, "loss": 1.2533522844314575, "step": 3312 }, { "epoch": 1.0200061557402278, "grad_norm": 11.8125, "learning_rate": 2.444768872039247e-06, "loss": 1.1868257522583008, "step": 3314 }, { "epoch": 1.020621729763004, "grad_norm": 8.375, "learning_rate": 2.444096106008756e-06, "loss": 1.022985816001892, "step": 3316 }, { "epoch": 1.0212373037857803, "grad_norm": 30.625, "learning_rate": 2.443423055480277e-06, "loss": 1.6255028247833252, "step": 3318 }, { "epoch": 1.0218528778085565, "grad_norm": 21.125, "learning_rate": 2.4427497207510983e-06, "loss": 1.4674348831176758, "step": 3320 }, { "epoch": 1.0224684518313327, "grad_norm": 16.125, "learning_rate": 2.4420761021186323e-06, "loss": 1.4604638814926147, "step": 3322 }, { "epoch": 1.023084025854109, "grad_norm": 17.375, "learning_rate": 2.4414021998804167e-06, "loss": 1.3937731981277466, "step": 3324 }, { "epoch": 1.0236995998768852, "grad_norm": 12.625, "learning_rate": 2.4407280143341155e-06, "loss": 1.5467102527618408, "step": 3326 }, { "epoch": 1.0243151738996614, "grad_norm": 16.875, "learning_rate": 2.440053545777517e-06, "loss": 1.1986671686172485, "step": 3328 }, { "epoch": 1.0249307479224377, "grad_norm": 8.1875, "learning_rate": 2.4393787945085343e-06, "loss": 1.1440664529800415, "step": 3330 }, { "epoch": 1.025546321945214, "grad_norm": 15.625, "learning_rate": 2.4387037608252063e-06, "loss": 1.2961888313293457, "step": 3332 }, { "epoch": 1.0261618959679901, "grad_norm": 14.5, "learning_rate": 2.4380284450256955e-06, "loss": 0.9457817673683167, "step": 3334 }, { "epoch": 1.0267774699907664, "grad_norm": 23.375, "learning_rate": 2.437352847408291e-06, "loss": 1.6424400806427002, "step": 3336 }, { "epoch": 1.0273930440135426, "grad_norm": 32.5, "learning_rate": 2.436676968271404e-06, "loss": 1.4064046144485474, "step": 3338 }, { "epoch": 1.0280086180363188, "grad_norm": 65.5, "learning_rate": 2.436000807913571e-06, "loss": 1.326033592224121, "step": 3340 }, { "epoch": 1.028624192059095, "grad_norm": 21.625, "learning_rate": 2.4353243666334535e-06, "loss": 1.7490839958190918, "step": 3342 }, { "epoch": 1.0292397660818713, "grad_norm": 19.75, "learning_rate": 2.4346476447298353e-06, "loss": 1.5640172958374023, "step": 3344 }, { "epoch": 1.0298553401046475, "grad_norm": 18.125, "learning_rate": 2.433970642501626e-06, "loss": 1.1779654026031494, "step": 3346 }, { "epoch": 1.0304709141274238, "grad_norm": 4.65625, "learning_rate": 2.433293360247859e-06, "loss": 1.087789535522461, "step": 3348 }, { "epoch": 1.0310864881502, "grad_norm": 23.25, "learning_rate": 2.4326157982676892e-06, "loss": 1.2343831062316895, "step": 3350 }, { "epoch": 1.0317020621729762, "grad_norm": 32.25, "learning_rate": 2.4319379568603974e-06, "loss": 1.4259557723999023, "step": 3352 }, { "epoch": 1.0323176361957525, "grad_norm": 16.0, "learning_rate": 2.431259836325386e-06, "loss": 1.3478798866271973, "step": 3354 }, { "epoch": 1.0329332102185287, "grad_norm": 10.0, "learning_rate": 2.4305814369621833e-06, "loss": 1.4005303382873535, "step": 3356 }, { "epoch": 1.033548784241305, "grad_norm": 31.75, "learning_rate": 2.429902759070437e-06, "loss": 1.5675277709960938, "step": 3358 }, { "epoch": 1.0341643582640812, "grad_norm": 9.75, "learning_rate": 2.4292238029499213e-06, "loss": 1.10936439037323, "step": 3360 }, { "epoch": 1.0347799322868574, "grad_norm": 29.375, "learning_rate": 2.428544568900532e-06, "loss": 1.9242689609527588, "step": 3362 }, { "epoch": 1.0353955063096338, "grad_norm": 14.625, "learning_rate": 2.427865057222287e-06, "loss": 1.5668294429779053, "step": 3364 }, { "epoch": 1.03601108033241, "grad_norm": 15.6875, "learning_rate": 2.427185268215328e-06, "loss": 1.1507887840270996, "step": 3366 }, { "epoch": 1.0366266543551863, "grad_norm": 11.25, "learning_rate": 2.4265052021799166e-06, "loss": 1.597625494003296, "step": 3368 }, { "epoch": 1.0372422283779625, "grad_norm": 14.5625, "learning_rate": 2.4258248594164414e-06, "loss": 1.2777953147888184, "step": 3370 }, { "epoch": 1.0378578024007388, "grad_norm": 5.90625, "learning_rate": 2.4251442402254084e-06, "loss": 1.0946611166000366, "step": 3372 }, { "epoch": 1.038473376423515, "grad_norm": 4.4375, "learning_rate": 2.4244633449074495e-06, "loss": 1.0733752250671387, "step": 3374 }, { "epoch": 1.0390889504462912, "grad_norm": 47.0, "learning_rate": 2.423782173763317e-06, "loss": 1.3776036500930786, "step": 3376 }, { "epoch": 1.0397045244690675, "grad_norm": 11.8125, "learning_rate": 2.423100727093883e-06, "loss": 1.0430665016174316, "step": 3378 }, { "epoch": 1.0403200984918437, "grad_norm": 7.6875, "learning_rate": 2.4224190052001455e-06, "loss": 1.343729019165039, "step": 3380 }, { "epoch": 1.04093567251462, "grad_norm": 27.125, "learning_rate": 2.4217370083832206e-06, "loss": 1.4689085483551025, "step": 3382 }, { "epoch": 1.0415512465373962, "grad_norm": 3.875, "learning_rate": 2.421054736944347e-06, "loss": 1.1416873931884766, "step": 3384 }, { "epoch": 1.0421668205601724, "grad_norm": 8.4375, "learning_rate": 2.4203721911848854e-06, "loss": 0.9953557252883911, "step": 3386 }, { "epoch": 1.0427823945829486, "grad_norm": 12.125, "learning_rate": 2.419689371406317e-06, "loss": 1.7007834911346436, "step": 3388 }, { "epoch": 1.0433979686057249, "grad_norm": 5.0625, "learning_rate": 2.419006277910243e-06, "loss": 1.4867901802062988, "step": 3390 }, { "epoch": 1.044013542628501, "grad_norm": 10.8125, "learning_rate": 2.4183229109983885e-06, "loss": 1.3887057304382324, "step": 3392 }, { "epoch": 1.0446291166512773, "grad_norm": 10.4375, "learning_rate": 2.417639270972596e-06, "loss": 1.2495818138122559, "step": 3394 }, { "epoch": 1.0452446906740536, "grad_norm": 21.125, "learning_rate": 2.4169553581348307e-06, "loss": 1.646527647972107, "step": 3396 }, { "epoch": 1.0458602646968298, "grad_norm": 13.1875, "learning_rate": 2.416271172787177e-06, "loss": 1.0748255252838135, "step": 3398 }, { "epoch": 1.046475838719606, "grad_norm": 16.375, "learning_rate": 2.415586715231842e-06, "loss": 1.2074308395385742, "step": 3400 }, { "epoch": 1.0470914127423823, "grad_norm": 18.5, "learning_rate": 2.414901985771149e-06, "loss": 1.6861954927444458, "step": 3402 }, { "epoch": 1.0477069867651585, "grad_norm": 6.75, "learning_rate": 2.414216984707545e-06, "loss": 1.1067943572998047, "step": 3404 }, { "epoch": 1.0483225607879347, "grad_norm": 26.875, "learning_rate": 2.4135317123435956e-06, "loss": 1.556877613067627, "step": 3406 }, { "epoch": 1.048938134810711, "grad_norm": 44.5, "learning_rate": 2.412846168981987e-06, "loss": 1.095926284790039, "step": 3408 }, { "epoch": 1.0495537088334872, "grad_norm": 15.6875, "learning_rate": 2.412160354925525e-06, "loss": 1.4250001907348633, "step": 3410 }, { "epoch": 1.0501692828562634, "grad_norm": 2.875, "learning_rate": 2.411474270477132e-06, "loss": 0.9531576633453369, "step": 3412 }, { "epoch": 1.0507848568790397, "grad_norm": 53.0, "learning_rate": 2.410787915939854e-06, "loss": 0.9053239822387695, "step": 3414 }, { "epoch": 1.051400430901816, "grad_norm": 21.625, "learning_rate": 2.4101012916168544e-06, "loss": 1.9192160367965698, "step": 3416 }, { "epoch": 1.0520160049245921, "grad_norm": 14.875, "learning_rate": 2.4094143978114163e-06, "loss": 1.3932238817214966, "step": 3418 }, { "epoch": 1.0526315789473684, "grad_norm": 3.71875, "learning_rate": 2.4087272348269403e-06, "loss": 1.208620309829712, "step": 3420 }, { "epoch": 1.0532471529701446, "grad_norm": 18.5, "learning_rate": 2.408039802966949e-06, "loss": 1.367321252822876, "step": 3422 }, { "epoch": 1.0538627269929208, "grad_norm": 5.34375, "learning_rate": 2.4073521025350797e-06, "loss": 1.345115065574646, "step": 3424 }, { "epoch": 1.054478301015697, "grad_norm": 17.25, "learning_rate": 2.406664133835092e-06, "loss": 1.4480860233306885, "step": 3426 }, { "epoch": 1.0550938750384733, "grad_norm": 7.625, "learning_rate": 2.405975897170862e-06, "loss": 1.298915147781372, "step": 3428 }, { "epoch": 1.0557094490612495, "grad_norm": 22.625, "learning_rate": 2.4052873928463838e-06, "loss": 1.5393712520599365, "step": 3430 }, { "epoch": 1.0563250230840258, "grad_norm": 29.375, "learning_rate": 2.4045986211657718e-06, "loss": 0.9303069114685059, "step": 3432 }, { "epoch": 1.056940597106802, "grad_norm": 25.625, "learning_rate": 2.4039095824332567e-06, "loss": 1.3752050399780273, "step": 3434 }, { "epoch": 1.0575561711295782, "grad_norm": 13.9375, "learning_rate": 2.4032202769531878e-06, "loss": 0.9744688272476196, "step": 3436 }, { "epoch": 1.0581717451523547, "grad_norm": 9.4375, "learning_rate": 2.4025307050300317e-06, "loss": 1.7089290618896484, "step": 3438 }, { "epoch": 1.058787319175131, "grad_norm": 16.625, "learning_rate": 2.401840866968373e-06, "loss": 1.3734301328659058, "step": 3440 }, { "epoch": 1.0594028931979071, "grad_norm": 3.53125, "learning_rate": 2.4011507630729158e-06, "loss": 1.1722290515899658, "step": 3442 }, { "epoch": 1.0600184672206834, "grad_norm": 12.5625, "learning_rate": 2.4004603936484778e-06, "loss": 1.180796504020691, "step": 3444 }, { "epoch": 1.0606340412434596, "grad_norm": 21.75, "learning_rate": 2.399769758999996e-06, "loss": 1.2893471717834473, "step": 3446 }, { "epoch": 1.0612496152662358, "grad_norm": 20.75, "learning_rate": 2.3990788594325256e-06, "loss": 1.4683547019958496, "step": 3448 }, { "epoch": 1.061865189289012, "grad_norm": 16.375, "learning_rate": 2.3983876952512377e-06, "loss": 1.2302544116973877, "step": 3450 }, { "epoch": 1.0624807633117883, "grad_norm": 4.09375, "learning_rate": 2.397696266761419e-06, "loss": 1.1894465684890747, "step": 3452 }, { "epoch": 1.0630963373345645, "grad_norm": 109.0, "learning_rate": 2.397004574268475e-06, "loss": 1.4176809787750244, "step": 3454 }, { "epoch": 1.0637119113573408, "grad_norm": 24.25, "learning_rate": 2.396312618077928e-06, "loss": 1.4848954677581787, "step": 3456 }, { "epoch": 1.064327485380117, "grad_norm": 15.5625, "learning_rate": 2.395620398495414e-06, "loss": 1.4666824340820312, "step": 3458 }, { "epoch": 1.0649430594028932, "grad_norm": 27.25, "learning_rate": 2.394927915826689e-06, "loss": 1.942247986793518, "step": 3460 }, { "epoch": 1.0655586334256695, "grad_norm": 17.75, "learning_rate": 2.394235170377622e-06, "loss": 1.3470737934112549, "step": 3462 }, { "epoch": 1.0661742074484457, "grad_norm": 38.25, "learning_rate": 2.3935421624542e-06, "loss": 1.3554115295410156, "step": 3464 }, { "epoch": 1.066789781471222, "grad_norm": 17.125, "learning_rate": 2.3928488923625255e-06, "loss": 1.2180290222167969, "step": 3466 }, { "epoch": 1.0674053554939982, "grad_norm": 23.625, "learning_rate": 2.392155360408817e-06, "loss": 1.5887868404388428, "step": 3468 }, { "epoch": 1.0680209295167744, "grad_norm": 12.9375, "learning_rate": 2.391461566899407e-06, "loss": 1.319965124130249, "step": 3470 }, { "epoch": 1.0686365035395506, "grad_norm": 15.375, "learning_rate": 2.3907675121407463e-06, "loss": 1.2301232814788818, "step": 3472 }, { "epoch": 1.0692520775623269, "grad_norm": 13.125, "learning_rate": 2.3900731964393996e-06, "loss": 1.1541111469268799, "step": 3474 }, { "epoch": 1.069867651585103, "grad_norm": 3.40625, "learning_rate": 2.3893786201020466e-06, "loss": 1.2411861419677734, "step": 3476 }, { "epoch": 1.0704832256078793, "grad_norm": 12.875, "learning_rate": 2.3886837834354815e-06, "loss": 1.3128635883331299, "step": 3478 }, { "epoch": 1.0710987996306556, "grad_norm": 8.6875, "learning_rate": 2.387988686746616e-06, "loss": 1.594773292541504, "step": 3480 }, { "epoch": 1.0717143736534318, "grad_norm": 12.25, "learning_rate": 2.3872933303424746e-06, "loss": 1.1944515705108643, "step": 3482 }, { "epoch": 1.072329947676208, "grad_norm": 24.625, "learning_rate": 2.386597714530197e-06, "loss": 1.2775322198867798, "step": 3484 }, { "epoch": 1.0729455216989843, "grad_norm": 12.9375, "learning_rate": 2.385901839617037e-06, "loss": 1.2027215957641602, "step": 3486 }, { "epoch": 1.0735610957217605, "grad_norm": 7.6875, "learning_rate": 2.3852057059103642e-06, "loss": 1.2455811500549316, "step": 3488 }, { "epoch": 1.0741766697445367, "grad_norm": 14.375, "learning_rate": 2.3845093137176605e-06, "loss": 1.3259642124176025, "step": 3490 }, { "epoch": 1.074792243767313, "grad_norm": 12.625, "learning_rate": 2.383812663346524e-06, "loss": 1.031188726425171, "step": 3492 }, { "epoch": 1.0754078177900892, "grad_norm": 12.8125, "learning_rate": 2.3831157551046664e-06, "loss": 1.3350074291229248, "step": 3494 }, { "epoch": 1.0760233918128654, "grad_norm": 9.5625, "learning_rate": 2.3824185892999113e-06, "loss": 1.3674886226654053, "step": 3496 }, { "epoch": 1.0766389658356417, "grad_norm": 7.71875, "learning_rate": 2.3817211662401994e-06, "loss": 1.2225689888000488, "step": 3498 }, { "epoch": 1.077254539858418, "grad_norm": 9.75, "learning_rate": 2.381023486233582e-06, "loss": 1.1854305267333984, "step": 3500 }, { "epoch": 1.0778701138811941, "grad_norm": 10.0, "learning_rate": 2.380325549588226e-06, "loss": 1.411360502243042, "step": 3502 }, { "epoch": 1.0784856879039704, "grad_norm": 8.9375, "learning_rate": 2.3796273566124105e-06, "loss": 1.5251002311706543, "step": 3504 }, { "epoch": 1.0791012619267466, "grad_norm": 12.9375, "learning_rate": 2.378928907614528e-06, "loss": 1.8434513807296753, "step": 3506 }, { "epoch": 1.0797168359495228, "grad_norm": 17.25, "learning_rate": 2.3782302029030837e-06, "loss": 1.564579963684082, "step": 3508 }, { "epoch": 1.080332409972299, "grad_norm": 20.875, "learning_rate": 2.3775312427866972e-06, "loss": 1.0563139915466309, "step": 3510 }, { "epoch": 1.0809479839950753, "grad_norm": 13.5625, "learning_rate": 2.3768320275740995e-06, "loss": 1.7783114910125732, "step": 3512 }, { "epoch": 1.0815635580178515, "grad_norm": 51.25, "learning_rate": 2.3761325575741357e-06, "loss": 1.170400619506836, "step": 3514 }, { "epoch": 1.082179132040628, "grad_norm": 8.0625, "learning_rate": 2.375432833095761e-06, "loss": 1.2846317291259766, "step": 3516 }, { "epoch": 1.0827947060634042, "grad_norm": 13.8125, "learning_rate": 2.374732854448045e-06, "loss": 1.447777271270752, "step": 3518 }, { "epoch": 1.0834102800861805, "grad_norm": 13.5, "learning_rate": 2.3740326219401694e-06, "loss": 1.1701204776763916, "step": 3520 }, { "epoch": 1.0840258541089567, "grad_norm": 11.9375, "learning_rate": 2.3733321358814276e-06, "loss": 1.0754103660583496, "step": 3522 }, { "epoch": 1.084641428131733, "grad_norm": 13.9375, "learning_rate": 2.3726313965812255e-06, "loss": 1.5020666122436523, "step": 3524 }, { "epoch": 1.0852570021545092, "grad_norm": 14.3125, "learning_rate": 2.3719304043490795e-06, "loss": 1.2469940185546875, "step": 3526 }, { "epoch": 1.0858725761772854, "grad_norm": 10.375, "learning_rate": 2.3712291594946197e-06, "loss": 1.41176176071167, "step": 3528 }, { "epoch": 1.0864881502000616, "grad_norm": 13.125, "learning_rate": 2.370527662327586e-06, "loss": 1.514477014541626, "step": 3530 }, { "epoch": 1.0871037242228379, "grad_norm": 22.875, "learning_rate": 2.369825913157831e-06, "loss": 1.0701708793640137, "step": 3532 }, { "epoch": 1.087719298245614, "grad_norm": 35.5, "learning_rate": 2.3691239122953174e-06, "loss": 1.7896881103515625, "step": 3534 }, { "epoch": 1.0883348722683903, "grad_norm": 16.375, "learning_rate": 2.3684216600501205e-06, "loss": 1.608804702758789, "step": 3536 }, { "epoch": 1.0889504462911666, "grad_norm": 9.8125, "learning_rate": 2.3677191567324254e-06, "loss": 0.8485841155052185, "step": 3538 }, { "epoch": 1.0895660203139428, "grad_norm": 17.125, "learning_rate": 2.3670164026525296e-06, "loss": 1.7242555618286133, "step": 3540 }, { "epoch": 1.090181594336719, "grad_norm": 4.5, "learning_rate": 2.3663133981208388e-06, "loss": 0.7557381987571716, "step": 3542 }, { "epoch": 1.0907971683594953, "grad_norm": 16.25, "learning_rate": 2.3656101434478724e-06, "loss": 1.5691182613372803, "step": 3544 }, { "epoch": 1.0914127423822715, "grad_norm": 11.0, "learning_rate": 2.3649066389442577e-06, "loss": 1.3667467832565308, "step": 3546 }, { "epoch": 1.0920283164050477, "grad_norm": 29.25, "learning_rate": 2.3642028849207337e-06, "loss": 0.8939248323440552, "step": 3548 }, { "epoch": 1.092643890427824, "grad_norm": 6.46875, "learning_rate": 2.3634988816881503e-06, "loss": 1.3205374479293823, "step": 3550 }, { "epoch": 1.0932594644506002, "grad_norm": 22.625, "learning_rate": 2.362794629557465e-06, "loss": 1.3853535652160645, "step": 3552 }, { "epoch": 1.0938750384733764, "grad_norm": 13.8125, "learning_rate": 2.3620901288397484e-06, "loss": 1.7651689052581787, "step": 3554 }, { "epoch": 1.0944906124961526, "grad_norm": 26.625, "learning_rate": 2.3613853798461783e-06, "loss": 1.0660011768341064, "step": 3556 }, { "epoch": 1.0951061865189289, "grad_norm": 28.875, "learning_rate": 2.360680382888043e-06, "loss": 1.4802651405334473, "step": 3558 }, { "epoch": 1.0957217605417051, "grad_norm": 28.125, "learning_rate": 2.3599751382767406e-06, "loss": 2.1089253425598145, "step": 3560 }, { "epoch": 1.0963373345644813, "grad_norm": 16.75, "learning_rate": 2.359269646323779e-06, "loss": 1.5472657680511475, "step": 3562 }, { "epoch": 1.0969529085872576, "grad_norm": 9.875, "learning_rate": 2.3585639073407743e-06, "loss": 0.8907637000083923, "step": 3564 }, { "epoch": 1.0975684826100338, "grad_norm": 22.125, "learning_rate": 2.3578579216394523e-06, "loss": 1.1718206405639648, "step": 3566 }, { "epoch": 1.09818405663281, "grad_norm": 30.5, "learning_rate": 2.357151689531647e-06, "loss": 0.9224869608879089, "step": 3568 }, { "epoch": 1.0987996306555863, "grad_norm": 11.9375, "learning_rate": 2.356445211329304e-06, "loss": 1.2034261226654053, "step": 3570 }, { "epoch": 1.0994152046783625, "grad_norm": 18.625, "learning_rate": 2.355738487344473e-06, "loss": 1.4323277473449707, "step": 3572 }, { "epoch": 1.1000307787011387, "grad_norm": 8.875, "learning_rate": 2.355031517889317e-06, "loss": 0.9144423007965088, "step": 3574 }, { "epoch": 1.100646352723915, "grad_norm": 20.625, "learning_rate": 2.3543243032761033e-06, "loss": 1.5986289978027344, "step": 3576 }, { "epoch": 1.1012619267466912, "grad_norm": 10.0625, "learning_rate": 2.353616843817211e-06, "loss": 1.3675158023834229, "step": 3578 }, { "epoch": 1.1018775007694674, "grad_norm": 9.4375, "learning_rate": 2.352909139825124e-06, "loss": 1.145599126815796, "step": 3580 }, { "epoch": 1.1024930747922437, "grad_norm": 19.25, "learning_rate": 2.352201191612438e-06, "loss": 1.3675510883331299, "step": 3582 }, { "epoch": 1.10310864881502, "grad_norm": 10.125, "learning_rate": 2.351492999491853e-06, "loss": 1.4357773065567017, "step": 3584 }, { "epoch": 1.1037242228377961, "grad_norm": 13.5625, "learning_rate": 2.350784563776179e-06, "loss": 0.8242530822753906, "step": 3586 }, { "epoch": 1.1043397968605726, "grad_norm": 8.9375, "learning_rate": 2.350075884778333e-06, "loss": 1.350933313369751, "step": 3588 }, { "epoch": 1.1049553708833488, "grad_norm": 8.6875, "learning_rate": 2.349366962811339e-06, "loss": 1.3204925060272217, "step": 3590 }, { "epoch": 1.105570944906125, "grad_norm": 12.625, "learning_rate": 2.348657798188328e-06, "loss": 1.407926321029663, "step": 3592 }, { "epoch": 1.1061865189289013, "grad_norm": 9.0625, "learning_rate": 2.3479483912225396e-06, "loss": 1.3072185516357422, "step": 3594 }, { "epoch": 1.1068020929516775, "grad_norm": 103.5, "learning_rate": 2.3472387422273202e-06, "loss": 1.3156657218933105, "step": 3596 }, { "epoch": 1.1074176669744538, "grad_norm": 4.125, "learning_rate": 2.346528851516122e-06, "loss": 1.2110552787780762, "step": 3598 }, { "epoch": 1.10803324099723, "grad_norm": 13.6875, "learning_rate": 2.3458187194025036e-06, "loss": 0.8570442199707031, "step": 3600 }, { "epoch": 1.1086488150200062, "grad_norm": 10.9375, "learning_rate": 2.3451083462001325e-06, "loss": 1.2895807027816772, "step": 3602 }, { "epoch": 1.1092643890427825, "grad_norm": 11.5, "learning_rate": 2.3443977322227804e-06, "loss": 1.4392380714416504, "step": 3604 }, { "epoch": 1.1098799630655587, "grad_norm": 16.0, "learning_rate": 2.3436868777843278e-06, "loss": 1.277353286743164, "step": 3606 }, { "epoch": 1.110495537088335, "grad_norm": 17.0, "learning_rate": 2.3429757831987573e-06, "loss": 1.3384151458740234, "step": 3608 }, { "epoch": 1.1111111111111112, "grad_norm": 6.15625, "learning_rate": 2.3422644487801633e-06, "loss": 1.3099740743637085, "step": 3610 }, { "epoch": 1.1117266851338874, "grad_norm": 104.5, "learning_rate": 2.3415528748427407e-06, "loss": 1.686471700668335, "step": 3612 }, { "epoch": 1.1123422591566636, "grad_norm": 35.25, "learning_rate": 2.3408410617007943e-06, "loss": 1.1620298624038696, "step": 3614 }, { "epoch": 1.1129578331794399, "grad_norm": 9.6875, "learning_rate": 2.3401290096687307e-06, "loss": 0.7311451435089111, "step": 3616 }, { "epoch": 1.113573407202216, "grad_norm": 19.125, "learning_rate": 2.3394167190610658e-06, "loss": 1.417179822921753, "step": 3618 }, { "epoch": 1.1141889812249923, "grad_norm": 7.84375, "learning_rate": 2.338704190192418e-06, "loss": 1.3925962448120117, "step": 3620 }, { "epoch": 1.1148045552477686, "grad_norm": 27.125, "learning_rate": 2.3379914233775135e-06, "loss": 1.372132658958435, "step": 3622 }, { "epoch": 1.1154201292705448, "grad_norm": 43.25, "learning_rate": 2.337278418931181e-06, "loss": 1.3415924310684204, "step": 3624 }, { "epoch": 1.116035703293321, "grad_norm": 16.0, "learning_rate": 2.3365651771683563e-06, "loss": 1.215116024017334, "step": 3626 }, { "epoch": 1.1166512773160973, "grad_norm": 9.4375, "learning_rate": 2.335851698404078e-06, "loss": 1.2907482385635376, "step": 3628 }, { "epoch": 1.1172668513388735, "grad_norm": 10.5625, "learning_rate": 2.3351379829534917e-06, "loss": 1.1975575685501099, "step": 3630 }, { "epoch": 1.1178824253616497, "grad_norm": 13.375, "learning_rate": 2.3344240311318454e-06, "loss": 1.269575834274292, "step": 3632 }, { "epoch": 1.118497999384426, "grad_norm": 54.5, "learning_rate": 2.333709843254493e-06, "loss": 1.30656898021698, "step": 3634 }, { "epoch": 1.1191135734072022, "grad_norm": 17.125, "learning_rate": 2.3329954196368916e-06, "loss": 1.7163310050964355, "step": 3636 }, { "epoch": 1.1197291474299784, "grad_norm": 31.0, "learning_rate": 2.3322807605946033e-06, "loss": 0.9354555606842041, "step": 3638 }, { "epoch": 1.1203447214527547, "grad_norm": 22.75, "learning_rate": 2.3315658664432933e-06, "loss": 1.4401531219482422, "step": 3640 }, { "epoch": 1.1209602954755309, "grad_norm": 12.0, "learning_rate": 2.330850737498732e-06, "loss": 1.307511568069458, "step": 3642 }, { "epoch": 1.1215758694983071, "grad_norm": 22.25, "learning_rate": 2.330135374076792e-06, "loss": 1.3069522380828857, "step": 3644 }, { "epoch": 1.1221914435210834, "grad_norm": 4.5, "learning_rate": 2.3294197764934507e-06, "loss": 1.1047172546386719, "step": 3646 }, { "epoch": 1.1228070175438596, "grad_norm": 9.1875, "learning_rate": 2.3287039450647888e-06, "loss": 1.3423113822937012, "step": 3648 }, { "epoch": 1.1234225915666358, "grad_norm": 14.4375, "learning_rate": 2.3279878801069884e-06, "loss": 1.4939258098602295, "step": 3650 }, { "epoch": 1.124038165589412, "grad_norm": 19.125, "learning_rate": 2.3272715819363368e-06, "loss": 0.857292652130127, "step": 3652 }, { "epoch": 1.1246537396121883, "grad_norm": 5.875, "learning_rate": 2.326555050869224e-06, "loss": 1.2719879150390625, "step": 3654 }, { "epoch": 1.1252693136349645, "grad_norm": 15.9375, "learning_rate": 2.3258382872221423e-06, "loss": 1.321458339691162, "step": 3656 }, { "epoch": 1.1258848876577408, "grad_norm": 19.375, "learning_rate": 2.3251212913116876e-06, "loss": 1.5195269584655762, "step": 3658 }, { "epoch": 1.126500461680517, "grad_norm": 12.0625, "learning_rate": 2.3244040634545574e-06, "loss": 1.1064283847808838, "step": 3660 }, { "epoch": 1.1271160357032932, "grad_norm": 18.875, "learning_rate": 2.3236866039675514e-06, "loss": 1.3086144924163818, "step": 3662 }, { "epoch": 1.1277316097260695, "grad_norm": 19.75, "learning_rate": 2.322968913167574e-06, "loss": 1.4040865898132324, "step": 3664 }, { "epoch": 1.1283471837488457, "grad_norm": 19.25, "learning_rate": 2.322250991371628e-06, "loss": 1.2450525760650635, "step": 3666 }, { "epoch": 1.128962757771622, "grad_norm": 10.625, "learning_rate": 2.321532838896822e-06, "loss": 0.8933705687522888, "step": 3668 }, { "epoch": 1.1295783317943984, "grad_norm": 26.125, "learning_rate": 2.320814456060363e-06, "loss": 1.368276596069336, "step": 3670 }, { "epoch": 1.1301939058171746, "grad_norm": 16.875, "learning_rate": 2.3200958431795637e-06, "loss": 1.0339691638946533, "step": 3672 }, { "epoch": 1.1308094798399508, "grad_norm": 11.4375, "learning_rate": 2.319377000571835e-06, "loss": 1.3817522525787354, "step": 3674 }, { "epoch": 1.131425053862727, "grad_norm": 10.125, "learning_rate": 2.3186579285546903e-06, "loss": 1.1731239557266235, "step": 3676 }, { "epoch": 1.1320406278855033, "grad_norm": 26.875, "learning_rate": 2.3179386274457446e-06, "loss": 1.2372663021087646, "step": 3678 }, { "epoch": 1.1326562019082795, "grad_norm": 18.25, "learning_rate": 2.3172190975627146e-06, "loss": 1.3585962057113647, "step": 3680 }, { "epoch": 1.1332717759310558, "grad_norm": 22.375, "learning_rate": 2.316499339223417e-06, "loss": 1.6335867643356323, "step": 3682 }, { "epoch": 1.133887349953832, "grad_norm": 23.25, "learning_rate": 2.3157793527457697e-06, "loss": 1.7519006729125977, "step": 3684 }, { "epoch": 1.1345029239766082, "grad_norm": 17.875, "learning_rate": 2.3150591384477923e-06, "loss": 1.8296453952789307, "step": 3686 }, { "epoch": 1.1351184979993845, "grad_norm": 22.75, "learning_rate": 2.314338696647603e-06, "loss": 0.7551788091659546, "step": 3688 }, { "epoch": 1.1357340720221607, "grad_norm": 86.0, "learning_rate": 2.3136180276634238e-06, "loss": 1.4087929725646973, "step": 3690 }, { "epoch": 1.136349646044937, "grad_norm": 8.3125, "learning_rate": 2.3128971318135732e-06, "loss": 1.5488674640655518, "step": 3692 }, { "epoch": 1.1369652200677132, "grad_norm": 13.25, "learning_rate": 2.3121760094164727e-06, "loss": 1.1042814254760742, "step": 3694 }, { "epoch": 1.1375807940904894, "grad_norm": 16.375, "learning_rate": 2.3114546607906422e-06, "loss": 1.7696267366409302, "step": 3696 }, { "epoch": 1.1381963681132656, "grad_norm": 15.5, "learning_rate": 2.3107330862547025e-06, "loss": 1.6327096223831177, "step": 3698 }, { "epoch": 1.1388119421360419, "grad_norm": 21.375, "learning_rate": 2.3100112861273747e-06, "loss": 1.5142444372177124, "step": 3700 }, { "epoch": 1.139427516158818, "grad_norm": 14.5, "learning_rate": 2.3092892607274777e-06, "loss": 1.1201368570327759, "step": 3702 }, { "epoch": 1.1400430901815943, "grad_norm": 18.125, "learning_rate": 2.3085670103739305e-06, "loss": 1.3404749631881714, "step": 3704 }, { "epoch": 1.1406586642043706, "grad_norm": 7.96875, "learning_rate": 2.3078445353857537e-06, "loss": 1.3205509185791016, "step": 3706 }, { "epoch": 1.1412742382271468, "grad_norm": 24.0, "learning_rate": 2.307121836082063e-06, "loss": 1.7567417621612549, "step": 3708 }, { "epoch": 1.141889812249923, "grad_norm": 24.375, "learning_rate": 2.3063989127820773e-06, "loss": 1.647988200187683, "step": 3710 }, { "epoch": 1.1425053862726993, "grad_norm": 9.125, "learning_rate": 2.3056757658051123e-06, "loss": 1.558417558670044, "step": 3712 }, { "epoch": 1.1431209602954755, "grad_norm": 10.125, "learning_rate": 2.304952395470583e-06, "loss": 1.4553438425064087, "step": 3714 }, { "epoch": 1.1437365343182517, "grad_norm": 12.125, "learning_rate": 2.3042288020980025e-06, "loss": 1.2713730335235596, "step": 3716 }, { "epoch": 1.144352108341028, "grad_norm": 14.5, "learning_rate": 2.3035049860069827e-06, "loss": 1.8827029466629028, "step": 3718 }, { "epoch": 1.1449676823638042, "grad_norm": 12.625, "learning_rate": 2.302780947517234e-06, "loss": 1.3714704513549805, "step": 3720 }, { "epoch": 1.1455832563865804, "grad_norm": 8.8125, "learning_rate": 2.3020566869485657e-06, "loss": 1.372882604598999, "step": 3722 }, { "epoch": 1.1461988304093567, "grad_norm": 48.0, "learning_rate": 2.301332204620883e-06, "loss": 1.7148422002792358, "step": 3724 }, { "epoch": 1.146814404432133, "grad_norm": 10.9375, "learning_rate": 2.300607500854193e-06, "loss": 1.164696455001831, "step": 3726 }, { "epoch": 1.1474299784549091, "grad_norm": 9.0625, "learning_rate": 2.2998825759685964e-06, "loss": 1.1495816707611084, "step": 3728 }, { "epoch": 1.1480455524776854, "grad_norm": 30.25, "learning_rate": 2.2991574302842934e-06, "loss": 1.2931642532348633, "step": 3730 }, { "epoch": 1.1486611265004616, "grad_norm": 15.375, "learning_rate": 2.298432064121582e-06, "loss": 1.1962416172027588, "step": 3732 }, { "epoch": 1.1492767005232378, "grad_norm": 18.875, "learning_rate": 2.297706477800858e-06, "loss": 1.4143441915512085, "step": 3734 }, { "epoch": 1.1498922745460143, "grad_norm": 19.5, "learning_rate": 2.296980671642612e-06, "loss": 1.5150713920593262, "step": 3736 }, { "epoch": 1.1505078485687905, "grad_norm": 15.5, "learning_rate": 2.296254645967435e-06, "loss": 1.386347770690918, "step": 3738 }, { "epoch": 1.1511234225915667, "grad_norm": 23.0, "learning_rate": 2.2955284010960128e-06, "loss": 1.5230674743652344, "step": 3740 }, { "epoch": 1.151738996614343, "grad_norm": 7.625, "learning_rate": 2.294801937349129e-06, "loss": 1.0187252759933472, "step": 3742 }, { "epoch": 1.1523545706371192, "grad_norm": 16.75, "learning_rate": 2.294075255047662e-06, "loss": 0.9895851016044617, "step": 3744 }, { "epoch": 1.1529701446598954, "grad_norm": 20.5, "learning_rate": 2.29334835451259e-06, "loss": 1.5257803201675415, "step": 3746 }, { "epoch": 1.1535857186826717, "grad_norm": 6.46875, "learning_rate": 2.292621236064985e-06, "loss": 1.4674441814422607, "step": 3748 }, { "epoch": 1.154201292705448, "grad_norm": 39.0, "learning_rate": 2.291893900026016e-06, "loss": 1.4346339702606201, "step": 3750 }, { "epoch": 1.1548168667282241, "grad_norm": 14.625, "learning_rate": 2.2911663467169485e-06, "loss": 1.461643934249878, "step": 3752 }, { "epoch": 1.1554324407510004, "grad_norm": 11.8125, "learning_rate": 2.2904385764591426e-06, "loss": 1.651922583580017, "step": 3754 }, { "epoch": 1.1560480147737766, "grad_norm": 9.4375, "learning_rate": 2.289710589574057e-06, "loss": 1.1730600595474243, "step": 3756 }, { "epoch": 1.1566635887965528, "grad_norm": 17.25, "learning_rate": 2.2889823863832433e-06, "loss": 1.525307536125183, "step": 3758 }, { "epoch": 1.157279162819329, "grad_norm": 12.5625, "learning_rate": 2.2882539672083495e-06, "loss": 1.4266164302825928, "step": 3760 }, { "epoch": 1.1578947368421053, "grad_norm": 17.0, "learning_rate": 2.2875253323711195e-06, "loss": 1.7639350891113281, "step": 3762 }, { "epoch": 1.1585103108648815, "grad_norm": 8.375, "learning_rate": 2.286796482193392e-06, "loss": 1.4397724866867065, "step": 3764 }, { "epoch": 1.1591258848876578, "grad_norm": 12.0, "learning_rate": 2.286067416997101e-06, "loss": 1.2925528287887573, "step": 3766 }, { "epoch": 1.159741458910434, "grad_norm": 22.75, "learning_rate": 2.2853381371042762e-06, "loss": 1.3446506261825562, "step": 3768 }, { "epoch": 1.1603570329332102, "grad_norm": 23.75, "learning_rate": 2.2846086428370396e-06, "loss": 1.6084314584732056, "step": 3770 }, { "epoch": 1.1609726069559865, "grad_norm": 10.3125, "learning_rate": 2.283878934517611e-06, "loss": 1.509368896484375, "step": 3772 }, { "epoch": 1.1615881809787627, "grad_norm": 21.75, "learning_rate": 2.2831490124683035e-06, "loss": 1.6360807418823242, "step": 3774 }, { "epoch": 1.162203755001539, "grad_norm": 13.9375, "learning_rate": 2.2824188770115244e-06, "loss": 1.5900465250015259, "step": 3776 }, { "epoch": 1.1628193290243152, "grad_norm": 8.0625, "learning_rate": 2.2816885284697742e-06, "loss": 1.228595495223999, "step": 3778 }, { "epoch": 1.1634349030470914, "grad_norm": 19.375, "learning_rate": 2.2809579671656504e-06, "loss": 1.2203516960144043, "step": 3780 }, { "epoch": 1.1640504770698676, "grad_norm": 24.5, "learning_rate": 2.280227193421841e-06, "loss": 1.2803552150726318, "step": 3782 }, { "epoch": 1.1646660510926439, "grad_norm": 38.75, "learning_rate": 2.2794962075611312e-06, "loss": 1.5093402862548828, "step": 3784 }, { "epoch": 1.16528162511542, "grad_norm": 19.5, "learning_rate": 2.278765009906398e-06, "loss": 1.3051506280899048, "step": 3786 }, { "epoch": 1.1658971991381963, "grad_norm": 6.0625, "learning_rate": 2.2780336007806107e-06, "loss": 1.3691089153289795, "step": 3788 }, { "epoch": 1.1665127731609726, "grad_norm": 15.375, "learning_rate": 2.2773019805068355e-06, "loss": 1.620005488395691, "step": 3790 }, { "epoch": 1.1671283471837488, "grad_norm": 20.75, "learning_rate": 2.276570149408229e-06, "loss": 1.3951692581176758, "step": 3792 }, { "epoch": 1.167743921206525, "grad_norm": 16.5, "learning_rate": 2.2758381078080425e-06, "loss": 0.8126804828643799, "step": 3794 }, { "epoch": 1.1683594952293013, "grad_norm": 9.625, "learning_rate": 2.2751058560296183e-06, "loss": 1.2357356548309326, "step": 3796 }, { "epoch": 1.1689750692520775, "grad_norm": 12.75, "learning_rate": 2.2743733943963937e-06, "loss": 1.2538156509399414, "step": 3798 }, { "epoch": 1.1695906432748537, "grad_norm": 16.5, "learning_rate": 2.2736407232318983e-06, "loss": 1.3101701736450195, "step": 3800 }, { "epoch": 1.17020621729763, "grad_norm": 9.9375, "learning_rate": 2.2729078428597526e-06, "loss": 1.4193048477172852, "step": 3802 }, { "epoch": 1.1708217913204062, "grad_norm": 14.375, "learning_rate": 2.272174753603672e-06, "loss": 1.587726354598999, "step": 3804 }, { "epoch": 1.1714373653431824, "grad_norm": 19.25, "learning_rate": 2.271441455787462e-06, "loss": 1.8314234018325806, "step": 3806 }, { "epoch": 1.1720529393659587, "grad_norm": 16.875, "learning_rate": 2.2707079497350205e-06, "loss": 1.4434683322906494, "step": 3808 }, { "epoch": 1.172668513388735, "grad_norm": 78.0, "learning_rate": 2.26997423577034e-06, "loss": 0.9510101079940796, "step": 3810 }, { "epoch": 1.1732840874115111, "grad_norm": 16.5, "learning_rate": 2.269240314217501e-06, "loss": 1.235957145690918, "step": 3812 }, { "epoch": 1.1738996614342874, "grad_norm": 13.4375, "learning_rate": 2.2685061854006778e-06, "loss": 1.0225133895874023, "step": 3814 }, { "epoch": 1.1745152354570636, "grad_norm": 8.8125, "learning_rate": 2.2677718496441363e-06, "loss": 1.081822395324707, "step": 3816 }, { "epoch": 1.1751308094798398, "grad_norm": 31.5, "learning_rate": 2.267037307272234e-06, "loss": 1.5427581071853638, "step": 3818 }, { "epoch": 1.175746383502616, "grad_norm": 16.125, "learning_rate": 2.2663025586094177e-06, "loss": 1.3339014053344727, "step": 3820 }, { "epoch": 1.1763619575253925, "grad_norm": 11.8125, "learning_rate": 2.2655676039802275e-06, "loss": 1.3625433444976807, "step": 3822 }, { "epoch": 1.1769775315481688, "grad_norm": 21.5, "learning_rate": 2.2648324437092943e-06, "loss": 1.2665989398956299, "step": 3824 }, { "epoch": 1.177593105570945, "grad_norm": 12.3125, "learning_rate": 2.264097078121338e-06, "loss": 1.31890869140625, "step": 3826 }, { "epoch": 1.1782086795937212, "grad_norm": 25.625, "learning_rate": 2.263361507541171e-06, "loss": 0.8552164435386658, "step": 3828 }, { "epoch": 1.1788242536164975, "grad_norm": 25.125, "learning_rate": 2.2626257322936967e-06, "loss": 1.3524929285049438, "step": 3830 }, { "epoch": 1.1794398276392737, "grad_norm": 20.5, "learning_rate": 2.2618897527039055e-06, "loss": 1.2687150239944458, "step": 3832 }, { "epoch": 1.18005540166205, "grad_norm": 29.875, "learning_rate": 2.2611535690968824e-06, "loss": 1.767914056777954, "step": 3834 }, { "epoch": 1.1806709756848262, "grad_norm": 22.875, "learning_rate": 2.2604171817978e-06, "loss": 1.393390417098999, "step": 3836 }, { "epoch": 1.1812865497076024, "grad_norm": 6.28125, "learning_rate": 2.259680591131921e-06, "loss": 1.4195334911346436, "step": 3838 }, { "epoch": 1.1819021237303786, "grad_norm": 15.4375, "learning_rate": 2.2589437974245994e-06, "loss": 1.593935251235962, "step": 3840 }, { "epoch": 1.1825176977531549, "grad_norm": 12.6875, "learning_rate": 2.2582068010012767e-06, "loss": 1.3784018754959106, "step": 3842 }, { "epoch": 1.183133271775931, "grad_norm": 10.0625, "learning_rate": 2.257469602187485e-06, "loss": 1.3572193384170532, "step": 3844 }, { "epoch": 1.1837488457987073, "grad_norm": 4.09375, "learning_rate": 2.2567322013088466e-06, "loss": 1.1336941719055176, "step": 3846 }, { "epoch": 1.1843644198214835, "grad_norm": 15.75, "learning_rate": 2.2559945986910728e-06, "loss": 0.8909260034561157, "step": 3848 }, { "epoch": 1.1849799938442598, "grad_norm": 20.375, "learning_rate": 2.255256794659962e-06, "loss": 0.9847655296325684, "step": 3850 }, { "epoch": 1.185595567867036, "grad_norm": 21.875, "learning_rate": 2.2545187895414036e-06, "loss": 1.426526427268982, "step": 3852 }, { "epoch": 1.1862111418898122, "grad_norm": 14.25, "learning_rate": 2.253780583661376e-06, "loss": 1.4254299402236938, "step": 3854 }, { "epoch": 1.1868267159125885, "grad_norm": 21.125, "learning_rate": 2.2530421773459455e-06, "loss": 1.7375472784042358, "step": 3856 }, { "epoch": 1.1874422899353647, "grad_norm": 16.75, "learning_rate": 2.2523035709212657e-06, "loss": 1.397571086883545, "step": 3858 }, { "epoch": 1.188057863958141, "grad_norm": 20.625, "learning_rate": 2.2515647647135815e-06, "loss": 1.0399236679077148, "step": 3860 }, { "epoch": 1.1886734379809172, "grad_norm": 12.3125, "learning_rate": 2.2508257590492237e-06, "loss": 0.8612368702888489, "step": 3862 }, { "epoch": 1.1892890120036934, "grad_norm": 19.875, "learning_rate": 2.250086554254612e-06, "loss": 1.3931348323822021, "step": 3864 }, { "epoch": 1.1899045860264696, "grad_norm": 33.5, "learning_rate": 2.2493471506562544e-06, "loss": 1.150251865386963, "step": 3866 }, { "epoch": 1.1905201600492459, "grad_norm": 21.75, "learning_rate": 2.248607548580746e-06, "loss": 0.958466112613678, "step": 3868 }, { "epoch": 1.1911357340720221, "grad_norm": 15.25, "learning_rate": 2.247867748354769e-06, "loss": 1.409304141998291, "step": 3870 }, { "epoch": 1.1917513080947983, "grad_norm": 27.875, "learning_rate": 2.2471277503050955e-06, "loss": 1.5489780902862549, "step": 3872 }, { "epoch": 1.1923668821175746, "grad_norm": 11.9375, "learning_rate": 2.246387554758583e-06, "loss": 1.063664197921753, "step": 3874 }, { "epoch": 1.1929824561403508, "grad_norm": 12.6875, "learning_rate": 2.2456471620421762e-06, "loss": 1.606360673904419, "step": 3876 }, { "epoch": 1.193598030163127, "grad_norm": 17.875, "learning_rate": 2.244906572482908e-06, "loss": 1.8889983892440796, "step": 3878 }, { "epoch": 1.1942136041859033, "grad_norm": 12.3125, "learning_rate": 2.244165786407898e-06, "loss": 1.4994919300079346, "step": 3880 }, { "epoch": 1.1948291782086795, "grad_norm": 7.21875, "learning_rate": 2.2434248041443507e-06, "loss": 1.1762011051177979, "step": 3882 }, { "epoch": 1.1954447522314557, "grad_norm": 31.375, "learning_rate": 2.24268362601956e-06, "loss": 1.547112226486206, "step": 3884 }, { "epoch": 1.196060326254232, "grad_norm": 14.375, "learning_rate": 2.241942252360905e-06, "loss": 1.0352790355682373, "step": 3886 }, { "epoch": 1.1966759002770084, "grad_norm": 20.5, "learning_rate": 2.2412006834958517e-06, "loss": 1.4299671649932861, "step": 3888 }, { "epoch": 1.1972914742997847, "grad_norm": 18.75, "learning_rate": 2.2404589197519507e-06, "loss": 1.6509742736816406, "step": 3890 }, { "epoch": 1.197907048322561, "grad_norm": 15.75, "learning_rate": 2.239716961456841e-06, "loss": 1.3373303413391113, "step": 3892 }, { "epoch": 1.1985226223453371, "grad_norm": 118.0, "learning_rate": 2.238974808938246e-06, "loss": 1.3991224765777588, "step": 3894 }, { "epoch": 1.1991381963681134, "grad_norm": 14.3125, "learning_rate": 2.2382324625239757e-06, "loss": 1.0333058834075928, "step": 3896 }, { "epoch": 1.1997537703908896, "grad_norm": 14.75, "learning_rate": 2.2374899225419247e-06, "loss": 1.4212753772735596, "step": 3898 }, { "epoch": 1.2003693444136658, "grad_norm": 10.25, "learning_rate": 2.2367471893200744e-06, "loss": 1.3892631530761719, "step": 3900 }, { "epoch": 1.200984918436442, "grad_norm": 294.0, "learning_rate": 2.23600426318649e-06, "loss": 1.336948037147522, "step": 3902 }, { "epoch": 1.2016004924592183, "grad_norm": 12.4375, "learning_rate": 2.235261144469324e-06, "loss": 1.2685046195983887, "step": 3904 }, { "epoch": 1.2022160664819945, "grad_norm": 32.0, "learning_rate": 2.2345178334968125e-06, "loss": 1.7446030378341675, "step": 3906 }, { "epoch": 1.2028316405047708, "grad_norm": 73.0, "learning_rate": 2.233774330597276e-06, "loss": 1.4774670600891113, "step": 3908 }, { "epoch": 1.203447214527547, "grad_norm": 7.90625, "learning_rate": 2.233030636099121e-06, "loss": 1.3360192775726318, "step": 3910 }, { "epoch": 1.2040627885503232, "grad_norm": 8.5, "learning_rate": 2.232286750330839e-06, "loss": 1.2566142082214355, "step": 3912 }, { "epoch": 1.2046783625730995, "grad_norm": 8.875, "learning_rate": 2.2315426736210037e-06, "loss": 0.8111532926559448, "step": 3914 }, { "epoch": 1.2052939365958757, "grad_norm": 13.8125, "learning_rate": 2.230798406298276e-06, "loss": 1.1176693439483643, "step": 3916 }, { "epoch": 1.205909510618652, "grad_norm": 19.625, "learning_rate": 2.2300539486913985e-06, "loss": 1.320571780204773, "step": 3918 }, { "epoch": 1.2065250846414282, "grad_norm": 17.0, "learning_rate": 2.2293093011292006e-06, "loss": 1.564365029335022, "step": 3920 }, { "epoch": 1.2071406586642044, "grad_norm": 11.375, "learning_rate": 2.228564463940592e-06, "loss": 1.2905172109603882, "step": 3922 }, { "epoch": 1.2077562326869806, "grad_norm": 9.8125, "learning_rate": 2.2278194374545697e-06, "loss": 1.1235172748565674, "step": 3924 }, { "epoch": 1.2083718067097569, "grad_norm": 15.3125, "learning_rate": 2.227074222000212e-06, "loss": 1.4910166263580322, "step": 3926 }, { "epoch": 1.208987380732533, "grad_norm": 37.0, "learning_rate": 2.226328817906681e-06, "loss": 1.5519063472747803, "step": 3928 }, { "epoch": 1.2096029547553093, "grad_norm": 13.5, "learning_rate": 2.225583225503224e-06, "loss": 1.3468170166015625, "step": 3930 }, { "epoch": 1.2102185287780856, "grad_norm": 18.25, "learning_rate": 2.2248374451191687e-06, "loss": 1.344160556793213, "step": 3932 }, { "epoch": 1.2108341028008618, "grad_norm": 13.375, "learning_rate": 2.2240914770839273e-06, "loss": 1.3389885425567627, "step": 3934 }, { "epoch": 1.211449676823638, "grad_norm": 15.125, "learning_rate": 2.223345321726995e-06, "loss": 1.783838152885437, "step": 3936 }, { "epoch": 1.2120652508464143, "grad_norm": 20.25, "learning_rate": 2.2225989793779502e-06, "loss": 1.6713354587554932, "step": 3938 }, { "epoch": 1.2126808248691905, "grad_norm": 14.0625, "learning_rate": 2.2218524503664514e-06, "loss": 1.3737525939941406, "step": 3940 }, { "epoch": 1.2132963988919667, "grad_norm": 10.75, "learning_rate": 2.221105735022243e-06, "loss": 1.1370043754577637, "step": 3942 }, { "epoch": 1.213911972914743, "grad_norm": 13.3125, "learning_rate": 2.2203588336751496e-06, "loss": 1.3710174560546875, "step": 3944 }, { "epoch": 1.2145275469375192, "grad_norm": 14.125, "learning_rate": 2.2196117466550774e-06, "loss": 1.5901612043380737, "step": 3946 }, { "epoch": 1.2151431209602954, "grad_norm": 5.375, "learning_rate": 2.2188644742920173e-06, "loss": 1.0958540439605713, "step": 3948 }, { "epoch": 1.2157586949830717, "grad_norm": 9.0625, "learning_rate": 2.2181170169160385e-06, "loss": 1.2019283771514893, "step": 3950 }, { "epoch": 1.2163742690058479, "grad_norm": 4.71875, "learning_rate": 2.217369374857296e-06, "loss": 1.2523112297058105, "step": 3952 }, { "epoch": 1.2169898430286241, "grad_norm": 19.0, "learning_rate": 2.216621548446021e-06, "loss": 1.3643252849578857, "step": 3954 }, { "epoch": 1.2176054170514004, "grad_norm": 12.1875, "learning_rate": 2.2158735380125325e-06, "loss": 1.606905221939087, "step": 3956 }, { "epoch": 1.2182209910741766, "grad_norm": 17.75, "learning_rate": 2.2151253438872263e-06, "loss": 1.2848656177520752, "step": 3958 }, { "epoch": 1.2188365650969528, "grad_norm": 19.25, "learning_rate": 2.2143769664005797e-06, "loss": 1.4402281045913696, "step": 3960 }, { "epoch": 1.219452139119729, "grad_norm": 19.25, "learning_rate": 2.2136284058831533e-06, "loss": 1.4613749980926514, "step": 3962 }, { "epoch": 1.2200677131425053, "grad_norm": 8.875, "learning_rate": 2.2128796626655855e-06, "loss": 1.257155179977417, "step": 3964 }, { "epoch": 1.2206832871652815, "grad_norm": 14.6875, "learning_rate": 2.212130737078599e-06, "loss": 1.0831825733184814, "step": 3966 }, { "epoch": 1.2212988611880577, "grad_norm": 15.9375, "learning_rate": 2.211381629452994e-06, "loss": 1.5734920501708984, "step": 3968 }, { "epoch": 1.221914435210834, "grad_norm": 17.25, "learning_rate": 2.2106323401196528e-06, "loss": 1.361938238143921, "step": 3970 }, { "epoch": 1.2225300092336104, "grad_norm": 15.0625, "learning_rate": 2.2098828694095356e-06, "loss": 1.3760902881622314, "step": 3972 }, { "epoch": 1.2231455832563867, "grad_norm": 13.4375, "learning_rate": 2.209133217653687e-06, "loss": 0.9740235805511475, "step": 3974 }, { "epoch": 1.223761157279163, "grad_norm": 22.0, "learning_rate": 2.2083833851832277e-06, "loss": 1.5925462245941162, "step": 3976 }, { "epoch": 1.2243767313019391, "grad_norm": 12.0625, "learning_rate": 2.2076333723293588e-06, "loss": 1.377173900604248, "step": 3978 }, { "epoch": 1.2249923053247154, "grad_norm": 8.5, "learning_rate": 2.206883179423364e-06, "loss": 1.2487232685089111, "step": 3980 }, { "epoch": 1.2256078793474916, "grad_norm": 5.40625, "learning_rate": 2.2061328067966016e-06, "loss": 1.1240956783294678, "step": 3982 }, { "epoch": 1.2262234533702678, "grad_norm": 13.9375, "learning_rate": 2.2053822547805145e-06, "loss": 1.1868245601654053, "step": 3984 }, { "epoch": 1.226839027393044, "grad_norm": 18.625, "learning_rate": 2.2046315237066213e-06, "loss": 1.6078002452850342, "step": 3986 }, { "epoch": 1.2274546014158203, "grad_norm": 11.6875, "learning_rate": 2.2038806139065206e-06, "loss": 1.2998437881469727, "step": 3988 }, { "epoch": 1.2280701754385965, "grad_norm": 41.5, "learning_rate": 2.2031295257118905e-06, "loss": 2.041577100753784, "step": 3990 }, { "epoch": 1.2286857494613728, "grad_norm": 40.5, "learning_rate": 2.202378259454488e-06, "loss": 1.0328714847564697, "step": 3992 }, { "epoch": 1.229301323484149, "grad_norm": 7.8125, "learning_rate": 2.201626815466147e-06, "loss": 1.3742680549621582, "step": 3994 }, { "epoch": 1.2299168975069252, "grad_norm": 10.375, "learning_rate": 2.2008751940787817e-06, "loss": 1.1253302097320557, "step": 3996 }, { "epoch": 1.2305324715297015, "grad_norm": 15.25, "learning_rate": 2.2001233956243846e-06, "loss": 1.2740788459777832, "step": 3998 }, { "epoch": 1.2311480455524777, "grad_norm": 18.75, "learning_rate": 2.1993714204350257e-06, "loss": 1.2912187576293945, "step": 4000 }, { "epoch": 1.231763619575254, "grad_norm": 17.125, "learning_rate": 2.198619268842853e-06, "loss": 1.438889980316162, "step": 4002 }, { "epoch": 1.2323791935980302, "grad_norm": 13.125, "learning_rate": 2.1978669411800936e-06, "loss": 1.3957526683807373, "step": 4004 }, { "epoch": 1.2329947676208064, "grad_norm": 16.625, "learning_rate": 2.1971144377790504e-06, "loss": 1.3101451396942139, "step": 4006 }, { "epoch": 1.2336103416435826, "grad_norm": 26.5, "learning_rate": 2.196361758972105e-06, "loss": 1.486053705215454, "step": 4008 }, { "epoch": 1.2342259156663589, "grad_norm": 11.4375, "learning_rate": 2.1956089050917174e-06, "loss": 1.1454870700836182, "step": 4010 }, { "epoch": 1.234841489689135, "grad_norm": 25.75, "learning_rate": 2.1948558764704234e-06, "loss": 1.5119868516921997, "step": 4012 }, { "epoch": 1.2354570637119113, "grad_norm": 17.75, "learning_rate": 2.1941026734408368e-06, "loss": 1.6028649806976318, "step": 4014 }, { "epoch": 1.2360726377346876, "grad_norm": 11.5625, "learning_rate": 2.1933492963356486e-06, "loss": 1.7800073623657227, "step": 4016 }, { "epoch": 1.2366882117574638, "grad_norm": 10.4375, "learning_rate": 2.192595745487625e-06, "loss": 1.2908351421356201, "step": 4018 }, { "epoch": 1.23730378578024, "grad_norm": 7.96875, "learning_rate": 2.1918420212296126e-06, "loss": 1.0685988664627075, "step": 4020 }, { "epoch": 1.2379193598030163, "grad_norm": 11.125, "learning_rate": 2.1910881238945293e-06, "loss": 1.470991611480713, "step": 4022 }, { "epoch": 1.2385349338257925, "grad_norm": 9.1875, "learning_rate": 2.190334053815375e-06, "loss": 1.4521594047546387, "step": 4024 }, { "epoch": 1.2391505078485687, "grad_norm": 14.4375, "learning_rate": 2.189579811325222e-06, "loss": 1.1041626930236816, "step": 4026 }, { "epoch": 1.239766081871345, "grad_norm": 16.0, "learning_rate": 2.18882539675722e-06, "loss": 1.468430519104004, "step": 4028 }, { "epoch": 1.2403816558941212, "grad_norm": 24.25, "learning_rate": 2.1880708104445954e-06, "loss": 0.9561784267425537, "step": 4030 }, { "epoch": 1.2409972299168974, "grad_norm": 20.75, "learning_rate": 2.1873160527206505e-06, "loss": 1.477291464805603, "step": 4032 }, { "epoch": 1.2416128039396737, "grad_norm": 36.0, "learning_rate": 2.18656112391876e-06, "loss": 1.5140433311462402, "step": 4034 }, { "epoch": 1.24222837796245, "grad_norm": 9.9375, "learning_rate": 2.18580602437238e-06, "loss": 1.5981786251068115, "step": 4036 }, { "epoch": 1.2428439519852263, "grad_norm": 21.375, "learning_rate": 2.1850507544150368e-06, "loss": 1.4360090494155884, "step": 4038 }, { "epoch": 1.2434595260080026, "grad_norm": 8.25, "learning_rate": 2.184295314380335e-06, "loss": 1.2938246726989746, "step": 4040 }, { "epoch": 1.2440751000307788, "grad_norm": 11.625, "learning_rate": 2.1835397046019524e-06, "loss": 1.2391458749771118, "step": 4042 }, { "epoch": 1.244690674053555, "grad_norm": 50.5, "learning_rate": 2.1827839254136436e-06, "loss": 1.4162282943725586, "step": 4044 }, { "epoch": 1.2453062480763313, "grad_norm": 15.3125, "learning_rate": 2.1820279771492364e-06, "loss": 1.5083057880401611, "step": 4046 }, { "epoch": 1.2459218220991075, "grad_norm": 238.0, "learning_rate": 2.1812718601426346e-06, "loss": 1.6508371829986572, "step": 4048 }, { "epoch": 1.2465373961218837, "grad_norm": 13.375, "learning_rate": 2.1805155747278157e-06, "loss": 1.3943655490875244, "step": 4050 }, { "epoch": 1.24715297014466, "grad_norm": 10.625, "learning_rate": 2.1797591212388317e-06, "loss": 1.657834768295288, "step": 4052 }, { "epoch": 1.2477685441674362, "grad_norm": 30.125, "learning_rate": 2.17900250000981e-06, "loss": 1.6758224964141846, "step": 4054 }, { "epoch": 1.2483841181902124, "grad_norm": 15.375, "learning_rate": 2.1782457113749485e-06, "loss": 1.800303339958191, "step": 4056 }, { "epoch": 1.2489996922129887, "grad_norm": 25.75, "learning_rate": 2.177488755668525e-06, "loss": 1.2303414344787598, "step": 4058 }, { "epoch": 1.249615266235765, "grad_norm": 22.75, "learning_rate": 2.176731633224885e-06, "loss": 1.1429264545440674, "step": 4060 }, { "epoch": 1.2502308402585411, "grad_norm": 7.09375, "learning_rate": 2.1759743443784515e-06, "loss": 1.3129208087921143, "step": 4062 }, { "epoch": 1.2508464142813174, "grad_norm": 14.5, "learning_rate": 2.1752168894637197e-06, "loss": 1.12176513671875, "step": 4064 }, { "epoch": 1.2514619883040936, "grad_norm": 29.75, "learning_rate": 2.1744592688152588e-06, "loss": 1.3381752967834473, "step": 4066 }, { "epoch": 1.2520775623268698, "grad_norm": 4.59375, "learning_rate": 2.1737014827677097e-06, "loss": 0.7888558506965637, "step": 4068 }, { "epoch": 1.252693136349646, "grad_norm": 7.84375, "learning_rate": 2.1729435316557878e-06, "loss": 1.3463082313537598, "step": 4070 }, { "epoch": 1.2533087103724223, "grad_norm": 11.875, "learning_rate": 2.1721854158142814e-06, "loss": 1.5913687944412231, "step": 4072 }, { "epoch": 1.2539242843951985, "grad_norm": 15.5, "learning_rate": 2.17142713557805e-06, "loss": 1.1627197265625, "step": 4074 }, { "epoch": 1.2545398584179748, "grad_norm": 8.0, "learning_rate": 2.170668691282029e-06, "loss": 0.9327364563941956, "step": 4076 }, { "epoch": 1.255155432440751, "grad_norm": 9.0625, "learning_rate": 2.1699100832612216e-06, "loss": 0.8239724636077881, "step": 4078 }, { "epoch": 1.2557710064635272, "grad_norm": 57.5, "learning_rate": 2.1691513118507074e-06, "loss": 1.562792181968689, "step": 4080 }, { "epoch": 1.2563865804863035, "grad_norm": 23.375, "learning_rate": 2.1683923773856368e-06, "loss": 1.2528904676437378, "step": 4082 }, { "epoch": 1.2570021545090797, "grad_norm": 10.75, "learning_rate": 2.16763328020123e-06, "loss": 1.369584560394287, "step": 4084 }, { "epoch": 1.257617728531856, "grad_norm": 59.0, "learning_rate": 2.1668740206327837e-06, "loss": 0.8014663457870483, "step": 4086 }, { "epoch": 1.2582333025546322, "grad_norm": 49.25, "learning_rate": 2.1661145990156617e-06, "loss": 0.9348541498184204, "step": 4088 }, { "epoch": 1.2588488765774084, "grad_norm": 39.25, "learning_rate": 2.1653550156853026e-06, "loss": 1.7951834201812744, "step": 4090 }, { "epoch": 1.2594644506001846, "grad_norm": 15.9375, "learning_rate": 2.1645952709772147e-06, "loss": 1.2039365768432617, "step": 4092 }, { "epoch": 1.2600800246229609, "grad_norm": 27.0, "learning_rate": 2.1638353652269784e-06, "loss": 1.275390386581421, "step": 4094 }, { "epoch": 1.260695598645737, "grad_norm": 11.375, "learning_rate": 2.163075298770245e-06, "loss": 1.6113206148147583, "step": 4096 }, { "epoch": 1.2613111726685133, "grad_norm": 21.0, "learning_rate": 2.1623150719427364e-06, "loss": 1.623373031616211, "step": 4098 }, { "epoch": 1.2619267466912896, "grad_norm": 31.75, "learning_rate": 2.1615546850802454e-06, "loss": 1.2032837867736816, "step": 4100 }, { "epoch": 1.2625423207140658, "grad_norm": 8.875, "learning_rate": 2.1607941385186364e-06, "loss": 1.058774471282959, "step": 4102 }, { "epoch": 1.263157894736842, "grad_norm": 39.0, "learning_rate": 2.160033432593843e-06, "loss": 1.4389801025390625, "step": 4104 }, { "epoch": 1.2637734687596183, "grad_norm": 12.75, "learning_rate": 2.1592725676418705e-06, "loss": 1.7803723812103271, "step": 4106 }, { "epoch": 1.2643890427823945, "grad_norm": 19.875, "learning_rate": 2.1585115439987935e-06, "loss": 1.6537094116210938, "step": 4108 }, { "epoch": 1.2650046168051707, "grad_norm": 7.90625, "learning_rate": 2.1577503620007577e-06, "loss": 1.5434420108795166, "step": 4110 }, { "epoch": 1.265620190827947, "grad_norm": 8.3125, "learning_rate": 2.1569890219839776e-06, "loss": 1.4224700927734375, "step": 4112 }, { "epoch": 1.2662357648507232, "grad_norm": 13.375, "learning_rate": 2.156227524284737e-06, "loss": 1.266633152961731, "step": 4114 }, { "epoch": 1.2668513388734994, "grad_norm": 10.4375, "learning_rate": 2.1554658692393915e-06, "loss": 0.9019656181335449, "step": 4116 }, { "epoch": 1.2674669128962757, "grad_norm": 11.1875, "learning_rate": 2.1547040571843644e-06, "loss": 1.0631968975067139, "step": 4118 }, { "epoch": 1.268082486919052, "grad_norm": 12.0625, "learning_rate": 2.1539420884561497e-06, "loss": 1.1509294509887695, "step": 4120 }, { "epoch": 1.2686980609418281, "grad_norm": 16.875, "learning_rate": 2.153179963391309e-06, "loss": 1.1578824520111084, "step": 4122 }, { "epoch": 1.2693136349646044, "grad_norm": 9.3125, "learning_rate": 2.152417682326474e-06, "loss": 0.6979641318321228, "step": 4124 }, { "epoch": 1.2699292089873806, "grad_norm": 24.75, "learning_rate": 2.1516552455983456e-06, "loss": 1.3547711372375488, "step": 4126 }, { "epoch": 1.270544783010157, "grad_norm": 21.25, "learning_rate": 2.150892653543693e-06, "loss": 1.553722858428955, "step": 4128 }, { "epoch": 1.2711603570329333, "grad_norm": 7.625, "learning_rate": 2.150129906499353e-06, "loss": 1.1862623691558838, "step": 4130 }, { "epoch": 1.2717759310557095, "grad_norm": 15.5625, "learning_rate": 2.1493670048022324e-06, "loss": 0.948223888874054, "step": 4132 }, { "epoch": 1.2723915050784858, "grad_norm": 17.875, "learning_rate": 2.148603948789307e-06, "loss": 1.3155548572540283, "step": 4134 }, { "epoch": 1.273007079101262, "grad_norm": 10.0, "learning_rate": 2.1478407387976172e-06, "loss": 1.2378668785095215, "step": 4136 }, { "epoch": 1.2736226531240382, "grad_norm": 17.75, "learning_rate": 2.147077375164275e-06, "loss": 1.4044148921966553, "step": 4138 }, { "epoch": 1.2742382271468145, "grad_norm": 13.375, "learning_rate": 2.146313858226459e-06, "loss": 1.5460163354873657, "step": 4140 }, { "epoch": 1.2748538011695907, "grad_norm": 14.875, "learning_rate": 2.1455501883214155e-06, "loss": 1.4281752109527588, "step": 4142 }, { "epoch": 1.275469375192367, "grad_norm": 12.6875, "learning_rate": 2.144786365786458e-06, "loss": 1.5017046928405762, "step": 4144 }, { "epoch": 1.2760849492151431, "grad_norm": 12.625, "learning_rate": 2.1440223909589686e-06, "loss": 1.4624148607254028, "step": 4146 }, { "epoch": 1.2767005232379194, "grad_norm": 6.90625, "learning_rate": 2.1432582641763956e-06, "loss": 1.1030162572860718, "step": 4148 }, { "epoch": 1.2773160972606956, "grad_norm": 18.375, "learning_rate": 2.1424939857762535e-06, "loss": 1.322242259979248, "step": 4150 }, { "epoch": 1.2779316712834718, "grad_norm": 19.5, "learning_rate": 2.1417295560961258e-06, "loss": 1.0702810287475586, "step": 4152 }, { "epoch": 1.278547245306248, "grad_norm": 18.875, "learning_rate": 2.140964975473663e-06, "loss": 1.2559038400650024, "step": 4154 }, { "epoch": 1.2791628193290243, "grad_norm": 23.125, "learning_rate": 2.1402002442465792e-06, "loss": 1.339484453201294, "step": 4156 }, { "epoch": 1.2797783933518005, "grad_norm": 20.375, "learning_rate": 2.139435362752658e-06, "loss": 1.4479165077209473, "step": 4158 }, { "epoch": 1.2803939673745768, "grad_norm": 14.0625, "learning_rate": 2.138670331329749e-06, "loss": 0.7785294055938721, "step": 4160 }, { "epoch": 1.281009541397353, "grad_norm": 5.25, "learning_rate": 2.137905150315767e-06, "loss": 1.0688307285308838, "step": 4162 }, { "epoch": 1.2816251154201292, "grad_norm": 86.0, "learning_rate": 2.1371398200486937e-06, "loss": 0.8996104598045349, "step": 4164 }, { "epoch": 1.2822406894429055, "grad_norm": 13.1875, "learning_rate": 2.1363743408665754e-06, "loss": 1.5372533798217773, "step": 4166 }, { "epoch": 1.2828562634656817, "grad_norm": 9.6875, "learning_rate": 2.135608713107525e-06, "loss": 1.2518038749694824, "step": 4168 }, { "epoch": 1.283471837488458, "grad_norm": 16.375, "learning_rate": 2.1348429371097226e-06, "loss": 1.4110199213027954, "step": 4170 }, { "epoch": 1.2840874115112342, "grad_norm": 13.5625, "learning_rate": 2.134077013211412e-06, "loss": 0.7723270654678345, "step": 4172 }, { "epoch": 1.2847029855340104, "grad_norm": 13.6875, "learning_rate": 2.1333109417509017e-06, "loss": 1.4334893226623535, "step": 4174 }, { "epoch": 1.2853185595567866, "grad_norm": 30.0, "learning_rate": 2.132544723066567e-06, "loss": 1.5801007747650146, "step": 4176 }, { "epoch": 1.2859341335795629, "grad_norm": 14.6875, "learning_rate": 2.131778357496847e-06, "loss": 1.2111811637878418, "step": 4178 }, { "epoch": 1.286549707602339, "grad_norm": 10.5625, "learning_rate": 2.131011845380247e-06, "loss": 1.333784818649292, "step": 4180 }, { "epoch": 1.2871652816251153, "grad_norm": 13.5625, "learning_rate": 2.1302451870553363e-06, "loss": 1.223961591720581, "step": 4182 }, { "epoch": 1.2877808556478916, "grad_norm": 11.8125, "learning_rate": 2.129478382860748e-06, "loss": 1.148566722869873, "step": 4184 }, { "epoch": 1.288396429670668, "grad_norm": 6.03125, "learning_rate": 2.128711433135181e-06, "loss": 1.365966558456421, "step": 4186 }, { "epoch": 1.2890120036934443, "grad_norm": 14.875, "learning_rate": 2.127944338217398e-06, "loss": 1.2503944635391235, "step": 4188 }, { "epoch": 1.2896275777162205, "grad_norm": 19.75, "learning_rate": 2.127177098446225e-06, "loss": 1.1578048467636108, "step": 4190 }, { "epoch": 1.2902431517389967, "grad_norm": 15.5625, "learning_rate": 2.126409714160553e-06, "loss": 1.0331218242645264, "step": 4192 }, { "epoch": 1.290858725761773, "grad_norm": 6.15625, "learning_rate": 2.1256421856993367e-06, "loss": 0.9528582096099854, "step": 4194 }, { "epoch": 1.2914742997845492, "grad_norm": 61.0, "learning_rate": 2.124874513401594e-06, "loss": 1.3459211587905884, "step": 4196 }, { "epoch": 1.2920898738073254, "grad_norm": 14.625, "learning_rate": 2.1241066976064076e-06, "loss": 1.7501788139343262, "step": 4198 }, { "epoch": 1.2927054478301017, "grad_norm": 9.4375, "learning_rate": 2.1233387386529216e-06, "loss": 1.2650032043457031, "step": 4200 }, { "epoch": 1.293321021852878, "grad_norm": 7.6875, "learning_rate": 2.122570636880344e-06, "loss": 1.304715871810913, "step": 4202 }, { "epoch": 1.2939365958756541, "grad_norm": 10.25, "learning_rate": 2.1218023926279474e-06, "loss": 1.2947635650634766, "step": 4204 }, { "epoch": 1.2945521698984304, "grad_norm": 6.09375, "learning_rate": 2.1210340062350656e-06, "loss": 1.1739459037780762, "step": 4206 }, { "epoch": 1.2951677439212066, "grad_norm": 7.21875, "learning_rate": 2.120265478041095e-06, "loss": 1.3887667655944824, "step": 4208 }, { "epoch": 1.2957833179439828, "grad_norm": 17.0, "learning_rate": 2.119496808385497e-06, "loss": 1.4132437705993652, "step": 4210 }, { "epoch": 1.296398891966759, "grad_norm": 12.4375, "learning_rate": 2.1187279976077927e-06, "loss": 1.393709421157837, "step": 4212 }, { "epoch": 1.2970144659895353, "grad_norm": 29.625, "learning_rate": 2.1179590460475666e-06, "loss": 1.6518043279647827, "step": 4214 }, { "epoch": 1.2976300400123115, "grad_norm": 7.6875, "learning_rate": 2.1171899540444667e-06, "loss": 1.0748077630996704, "step": 4216 }, { "epoch": 1.2982456140350878, "grad_norm": 44.25, "learning_rate": 2.1164207219382007e-06, "loss": 1.0568442344665527, "step": 4218 }, { "epoch": 1.298861188057864, "grad_norm": 23.75, "learning_rate": 2.1156513500685388e-06, "loss": 0.8065166473388672, "step": 4220 }, { "epoch": 1.2994767620806402, "grad_norm": 16.5, "learning_rate": 2.114881838775315e-06, "loss": 1.3090413808822632, "step": 4222 }, { "epoch": 1.3000923361034165, "grad_norm": 45.0, "learning_rate": 2.114112188398423e-06, "loss": 1.681335687637329, "step": 4224 }, { "epoch": 1.3007079101261927, "grad_norm": 85.0, "learning_rate": 2.113342399277817e-06, "loss": 0.8728981018066406, "step": 4226 }, { "epoch": 1.301323484148969, "grad_norm": 21.75, "learning_rate": 2.1125724717535147e-06, "loss": 1.6157889366149902, "step": 4228 }, { "epoch": 1.3019390581717452, "grad_norm": 21.625, "learning_rate": 2.111802406165594e-06, "loss": 1.3912405967712402, "step": 4230 }, { "epoch": 1.3025546321945214, "grad_norm": 12.9375, "learning_rate": 2.111032202854194e-06, "loss": 1.4589062929153442, "step": 4232 }, { "epoch": 1.3031702062172976, "grad_norm": 19.0, "learning_rate": 2.110261862159513e-06, "loss": 1.3329696655273438, "step": 4234 }, { "epoch": 1.3037857802400739, "grad_norm": 13.0, "learning_rate": 2.1094913844218126e-06, "loss": 1.2835090160369873, "step": 4236 }, { "epoch": 1.30440135426285, "grad_norm": 14.375, "learning_rate": 2.1087207699814135e-06, "loss": 1.3457741737365723, "step": 4238 }, { "epoch": 1.3050169282856263, "grad_norm": 17.875, "learning_rate": 2.1079500191786973e-06, "loss": 1.2177116870880127, "step": 4240 }, { "epoch": 1.3056325023084026, "grad_norm": 13.25, "learning_rate": 2.1071791323541047e-06, "loss": 1.3747305870056152, "step": 4242 }, { "epoch": 1.3062480763311788, "grad_norm": 10.4375, "learning_rate": 2.1064081098481374e-06, "loss": 1.4916646480560303, "step": 4244 }, { "epoch": 1.306863650353955, "grad_norm": 24.375, "learning_rate": 2.1056369520013582e-06, "loss": 1.630761742591858, "step": 4246 }, { "epoch": 1.3074792243767313, "grad_norm": 52.75, "learning_rate": 2.104865659154387e-06, "loss": 0.7215687036514282, "step": 4248 }, { "epoch": 1.3080947983995075, "grad_norm": 35.25, "learning_rate": 2.1040942316479046e-06, "loss": 1.8062270879745483, "step": 4250 }, { "epoch": 1.3087103724222837, "grad_norm": 24.25, "learning_rate": 2.1033226698226526e-06, "loss": 1.3249752521514893, "step": 4252 }, { "epoch": 1.30932594644506, "grad_norm": 23.625, "learning_rate": 2.10255097401943e-06, "loss": 1.3274242877960205, "step": 4254 }, { "epoch": 1.3099415204678362, "grad_norm": 10.3125, "learning_rate": 2.1017791445790953e-06, "loss": 1.582962989807129, "step": 4256 }, { "epoch": 1.3105570944906124, "grad_norm": 17.625, "learning_rate": 2.101007181842568e-06, "loss": 1.228154182434082, "step": 4258 }, { "epoch": 1.3111726685133887, "grad_norm": 8.625, "learning_rate": 2.1002350861508234e-06, "loss": 1.2410879135131836, "step": 4260 }, { "epoch": 1.3117882425361649, "grad_norm": 7.34375, "learning_rate": 2.099462857844897e-06, "loss": 1.2221719026565552, "step": 4262 }, { "epoch": 1.3124038165589411, "grad_norm": 13.0, "learning_rate": 2.0986904972658837e-06, "loss": 1.1706748008728027, "step": 4264 }, { "epoch": 1.3130193905817173, "grad_norm": 18.0, "learning_rate": 2.0979180047549363e-06, "loss": 1.4403222799301147, "step": 4266 }, { "epoch": 1.3136349646044936, "grad_norm": 35.25, "learning_rate": 2.097145380653265e-06, "loss": 1.600974440574646, "step": 4268 }, { "epoch": 1.3142505386272698, "grad_norm": 6.59375, "learning_rate": 2.0963726253021393e-06, "loss": 1.3970341682434082, "step": 4270 }, { "epoch": 1.314866112650046, "grad_norm": 16.0, "learning_rate": 2.095599739042885e-06, "loss": 0.7584080696105957, "step": 4272 }, { "epoch": 1.3154816866728223, "grad_norm": 11.3125, "learning_rate": 2.094826722216888e-06, "loss": 1.0941411256790161, "step": 4274 }, { "epoch": 1.3160972606955985, "grad_norm": 141.0, "learning_rate": 2.0940535751655897e-06, "loss": 0.9738726615905762, "step": 4276 }, { "epoch": 1.3167128347183747, "grad_norm": 8.125, "learning_rate": 2.0932802982304915e-06, "loss": 1.0021553039550781, "step": 4278 }, { "epoch": 1.3173284087411512, "grad_norm": 33.25, "learning_rate": 2.0925068917531495e-06, "loss": 1.660839319229126, "step": 4280 }, { "epoch": 1.3179439827639274, "grad_norm": 90.0, "learning_rate": 2.091733356075179e-06, "loss": 1.2870283126831055, "step": 4282 }, { "epoch": 1.3185595567867037, "grad_norm": 14.125, "learning_rate": 2.0909596915382504e-06, "loss": 1.5686073303222656, "step": 4284 }, { "epoch": 1.31917513080948, "grad_norm": 11.625, "learning_rate": 2.0901858984840935e-06, "loss": 1.1524231433868408, "step": 4286 }, { "epoch": 1.3197907048322561, "grad_norm": 20.75, "learning_rate": 2.0894119772544927e-06, "loss": 1.5431840419769287, "step": 4288 }, { "epoch": 1.3204062788550324, "grad_norm": 13.3125, "learning_rate": 2.0886379281912903e-06, "loss": 1.4432337284088135, "step": 4290 }, { "epoch": 1.3210218528778086, "grad_norm": 37.25, "learning_rate": 2.0878637516363846e-06, "loss": 1.8126132488250732, "step": 4292 }, { "epoch": 1.3216374269005848, "grad_norm": 5.8125, "learning_rate": 2.0870894479317306e-06, "loss": 0.8514418601989746, "step": 4294 }, { "epoch": 1.322253000923361, "grad_norm": 9.8125, "learning_rate": 2.086315017419338e-06, "loss": 1.323155403137207, "step": 4296 }, { "epoch": 1.3228685749461373, "grad_norm": 10.75, "learning_rate": 2.0855404604412754e-06, "loss": 1.3291047811508179, "step": 4298 }, { "epoch": 1.3234841489689135, "grad_norm": 14.75, "learning_rate": 2.084765777339664e-06, "loss": 0.9919479489326477, "step": 4300 }, { "epoch": 1.3240997229916898, "grad_norm": 13.625, "learning_rate": 2.083990968456683e-06, "loss": 1.2738001346588135, "step": 4302 }, { "epoch": 1.324715297014466, "grad_norm": 10.125, "learning_rate": 2.0832160341345657e-06, "loss": 0.8954488039016724, "step": 4304 }, { "epoch": 1.3253308710372422, "grad_norm": 10.625, "learning_rate": 2.082440974715603e-06, "loss": 1.3220200538635254, "step": 4306 }, { "epoch": 1.3259464450600185, "grad_norm": 5.5, "learning_rate": 2.0816657905421376e-06, "loss": 1.226555347442627, "step": 4308 }, { "epoch": 1.3265620190827947, "grad_norm": 16.625, "learning_rate": 2.0808904819565703e-06, "loss": 1.5791137218475342, "step": 4310 }, { "epoch": 1.327177593105571, "grad_norm": 34.0, "learning_rate": 2.0801150493013557e-06, "loss": 1.5559651851654053, "step": 4312 }, { "epoch": 1.3277931671283472, "grad_norm": 14.5625, "learning_rate": 2.0793394929190026e-06, "loss": 1.4778270721435547, "step": 4314 }, { "epoch": 1.3284087411511234, "grad_norm": 13.5, "learning_rate": 2.078563813152076e-06, "loss": 0.5933064222335815, "step": 4316 }, { "epoch": 1.3290243151738996, "grad_norm": 5.125, "learning_rate": 2.0777880103431946e-06, "loss": 1.1889657974243164, "step": 4318 }, { "epoch": 1.3296398891966759, "grad_norm": 26.625, "learning_rate": 2.077012084835031e-06, "loss": 1.584001898765564, "step": 4320 }, { "epoch": 1.330255463219452, "grad_norm": 14.9375, "learning_rate": 2.0762360369703122e-06, "loss": 1.5891938209533691, "step": 4322 }, { "epoch": 1.3308710372422283, "grad_norm": 13.5625, "learning_rate": 2.0754598670918193e-06, "loss": 1.6458081007003784, "step": 4324 }, { "epoch": 1.3314866112650046, "grad_norm": 26.75, "learning_rate": 2.0746835755423883e-06, "loss": 1.5826940536499023, "step": 4326 }, { "epoch": 1.3321021852877808, "grad_norm": 12.875, "learning_rate": 2.0739071626649074e-06, "loss": 1.4658899307250977, "step": 4328 }, { "epoch": 1.332717759310557, "grad_norm": 20.625, "learning_rate": 2.073130628802319e-06, "loss": 1.2064714431762695, "step": 4330 }, { "epoch": 1.3333333333333333, "grad_norm": 4.8125, "learning_rate": 2.07235397429762e-06, "loss": 1.3495519161224365, "step": 4332 }, { "epoch": 1.3339489073561095, "grad_norm": 9.4375, "learning_rate": 2.071577199493858e-06, "loss": 1.3448805809020996, "step": 4334 }, { "epoch": 1.334564481378886, "grad_norm": 22.625, "learning_rate": 2.0708003047341366e-06, "loss": 1.2070777416229248, "step": 4336 }, { "epoch": 1.3351800554016622, "grad_norm": 11.3125, "learning_rate": 2.07002329036161e-06, "loss": 1.0715398788452148, "step": 4338 }, { "epoch": 1.3357956294244384, "grad_norm": 10.8125, "learning_rate": 2.069246156719487e-06, "loss": 1.1646559238433838, "step": 4340 }, { "epoch": 1.3364112034472146, "grad_norm": 15.9375, "learning_rate": 2.068468904151028e-06, "loss": 0.6743322014808655, "step": 4342 }, { "epoch": 1.3370267774699909, "grad_norm": 87.5, "learning_rate": 2.067691532999548e-06, "loss": 0.7150102853775024, "step": 4344 }, { "epoch": 1.3376423514927671, "grad_norm": 9.1875, "learning_rate": 2.0669140436084105e-06, "loss": 1.5677911043167114, "step": 4346 }, { "epoch": 1.3382579255155433, "grad_norm": 13.0, "learning_rate": 2.066136436321035e-06, "loss": 1.2293686866760254, "step": 4348 }, { "epoch": 1.3388734995383196, "grad_norm": 11.6875, "learning_rate": 2.0653587114808902e-06, "loss": 1.3133034706115723, "step": 4350 }, { "epoch": 1.3394890735610958, "grad_norm": 23.875, "learning_rate": 2.064580869431499e-06, "loss": 1.7450048923492432, "step": 4352 }, { "epoch": 1.340104647583872, "grad_norm": 9.625, "learning_rate": 2.063802910516435e-06, "loss": 0.7626341581344604, "step": 4354 }, { "epoch": 1.3407202216066483, "grad_norm": 10.8125, "learning_rate": 2.0630248350793238e-06, "loss": 1.1981799602508545, "step": 4356 }, { "epoch": 1.3413357956294245, "grad_norm": 15.0625, "learning_rate": 2.0622466434638414e-06, "loss": 1.2713027000427246, "step": 4358 }, { "epoch": 1.3419513696522007, "grad_norm": 16.125, "learning_rate": 2.0614683360137164e-06, "loss": 1.393301010131836, "step": 4360 }, { "epoch": 1.342566943674977, "grad_norm": 13.9375, "learning_rate": 2.060689913072728e-06, "loss": 1.3362078666687012, "step": 4362 }, { "epoch": 1.3431825176977532, "grad_norm": 21.75, "learning_rate": 2.0599113749847066e-06, "loss": 1.6755115985870361, "step": 4364 }, { "epoch": 1.3437980917205294, "grad_norm": 20.875, "learning_rate": 2.059132722093533e-06, "loss": 1.3301327228546143, "step": 4366 }, { "epoch": 1.3444136657433057, "grad_norm": 13.875, "learning_rate": 2.0583539547431407e-06, "loss": 1.7222154140472412, "step": 4368 }, { "epoch": 1.345029239766082, "grad_norm": 8.6875, "learning_rate": 2.0575750732775097e-06, "loss": 1.095766544342041, "step": 4370 }, { "epoch": 1.3456448137888581, "grad_norm": 13.5625, "learning_rate": 2.056796078040675e-06, "loss": 1.230293869972229, "step": 4372 }, { "epoch": 1.3462603878116344, "grad_norm": 20.75, "learning_rate": 2.0560169693767174e-06, "loss": 1.4030444622039795, "step": 4374 }, { "epoch": 1.3468759618344106, "grad_norm": 12.875, "learning_rate": 2.0552377476297716e-06, "loss": 1.573530912399292, "step": 4376 }, { "epoch": 1.3474915358571868, "grad_norm": 15.25, "learning_rate": 2.0544584131440212e-06, "loss": 1.604763150215149, "step": 4378 }, { "epoch": 1.348107109879963, "grad_norm": 8.5, "learning_rate": 2.053678966263698e-06, "loss": 1.346675992012024, "step": 4380 }, { "epoch": 1.3487226839027393, "grad_norm": 14.9375, "learning_rate": 2.052899407333085e-06, "loss": 1.5418078899383545, "step": 4382 }, { "epoch": 1.3493382579255155, "grad_norm": 97.0, "learning_rate": 2.052119736696514e-06, "loss": 1.6574347019195557, "step": 4384 }, { "epoch": 1.3499538319482918, "grad_norm": 17.625, "learning_rate": 2.0513399546983677e-06, "loss": 1.5158668756484985, "step": 4386 }, { "epoch": 1.350569405971068, "grad_norm": 12.5, "learning_rate": 2.050560061683075e-06, "loss": 1.090888261795044, "step": 4388 }, { "epoch": 1.3511849799938442, "grad_norm": 114.5, "learning_rate": 2.049780057995116e-06, "loss": 1.3082555532455444, "step": 4390 }, { "epoch": 1.3518005540166205, "grad_norm": 6.6875, "learning_rate": 2.04899994397902e-06, "loss": 1.070220947265625, "step": 4392 }, { "epoch": 1.3524161280393967, "grad_norm": 11.5, "learning_rate": 2.048219719979363e-06, "loss": 1.3319404125213623, "step": 4394 }, { "epoch": 1.353031702062173, "grad_norm": 15.6875, "learning_rate": 2.0474393863407724e-06, "loss": 1.5495846271514893, "step": 4396 }, { "epoch": 1.3536472760849492, "grad_norm": 17.625, "learning_rate": 2.046658943407921e-06, "loss": 1.338994026184082, "step": 4398 }, { "epoch": 1.3542628501077254, "grad_norm": 18.5, "learning_rate": 2.045878391525532e-06, "loss": 1.5177886486053467, "step": 4400 }, { "epoch": 1.3548784241305016, "grad_norm": 3.25, "learning_rate": 2.045097731038376e-06, "loss": 1.3279091119766235, "step": 4402 }, { "epoch": 1.3554939981532779, "grad_norm": 29.625, "learning_rate": 2.0443169622912717e-06, "loss": 1.4942060708999634, "step": 4404 }, { "epoch": 1.356109572176054, "grad_norm": 12.625, "learning_rate": 2.043536085629086e-06, "loss": 1.3356786966323853, "step": 4406 }, { "epoch": 1.3567251461988303, "grad_norm": 9.0, "learning_rate": 2.0427551013967314e-06, "loss": 1.1315312385559082, "step": 4408 }, { "epoch": 1.3573407202216066, "grad_norm": 14.1875, "learning_rate": 2.0419740099391717e-06, "loss": 1.504289150238037, "step": 4410 }, { "epoch": 1.3579562942443828, "grad_norm": 17.625, "learning_rate": 2.041192811601414e-06, "loss": 1.5331953763961792, "step": 4412 }, { "epoch": 1.358571868267159, "grad_norm": 19.625, "learning_rate": 2.0404115067285157e-06, "loss": 1.4719030857086182, "step": 4414 }, { "epoch": 1.3591874422899353, "grad_norm": 11.0625, "learning_rate": 2.0396300956655794e-06, "loss": 1.0315923690795898, "step": 4416 }, { "epoch": 1.3598030163127115, "grad_norm": 16.125, "learning_rate": 2.038848578757756e-06, "loss": 1.3465533256530762, "step": 4418 }, { "epoch": 1.3604185903354877, "grad_norm": 21.125, "learning_rate": 2.0380669563502418e-06, "loss": 1.7399990558624268, "step": 4420 }, { "epoch": 1.361034164358264, "grad_norm": 17.25, "learning_rate": 2.03728522878828e-06, "loss": 1.406071424484253, "step": 4422 }, { "epoch": 1.3616497383810402, "grad_norm": 10.5, "learning_rate": 2.036503396417162e-06, "loss": 1.495185375213623, "step": 4424 }, { "epoch": 1.3622653124038164, "grad_norm": 10.75, "learning_rate": 2.0357214595822224e-06, "loss": 1.3955354690551758, "step": 4426 }, { "epoch": 1.3628808864265927, "grad_norm": 12.375, "learning_rate": 2.0349394186288443e-06, "loss": 1.2213549613952637, "step": 4428 }, { "epoch": 1.3634964604493691, "grad_norm": 11.8125, "learning_rate": 2.034157273902456e-06, "loss": 1.4318450689315796, "step": 4430 }, { "epoch": 1.3641120344721454, "grad_norm": 14.4375, "learning_rate": 2.0333750257485317e-06, "loss": 1.503476619720459, "step": 4432 }, { "epoch": 1.3647276084949216, "grad_norm": 12.1875, "learning_rate": 2.0325926745125914e-06, "loss": 1.298352599143982, "step": 4434 }, { "epoch": 1.3653431825176978, "grad_norm": 14.6875, "learning_rate": 2.0318102205402003e-06, "loss": 1.4269036054611206, "step": 4436 }, { "epoch": 1.365958756540474, "grad_norm": 27.75, "learning_rate": 2.03102766417697e-06, "loss": 1.390794038772583, "step": 4438 }, { "epoch": 1.3665743305632503, "grad_norm": 5.84375, "learning_rate": 2.0302450057685555e-06, "loss": 1.1889770030975342, "step": 4440 }, { "epoch": 1.3671899045860265, "grad_norm": 8.75, "learning_rate": 2.0294622456606585e-06, "loss": 1.3410286903381348, "step": 4442 }, { "epoch": 1.3678054786088027, "grad_norm": 10.5, "learning_rate": 2.0286793841990247e-06, "loss": 1.4344102144241333, "step": 4444 }, { "epoch": 1.368421052631579, "grad_norm": 33.5, "learning_rate": 2.027896421729446e-06, "loss": 1.5609376430511475, "step": 4446 }, { "epoch": 1.3690366266543552, "grad_norm": 25.5, "learning_rate": 2.0271133585977562e-06, "loss": 1.7404444217681885, "step": 4448 }, { "epoch": 1.3696522006771314, "grad_norm": 33.5, "learning_rate": 2.026330195149836e-06, "loss": 1.5617144107818604, "step": 4450 }, { "epoch": 1.3702677746999077, "grad_norm": 17.375, "learning_rate": 2.02554693173161e-06, "loss": 1.167197585105896, "step": 4452 }, { "epoch": 1.370883348722684, "grad_norm": 20.125, "learning_rate": 2.0247635686890457e-06, "loss": 1.384680986404419, "step": 4454 }, { "epoch": 1.3714989227454601, "grad_norm": 55.75, "learning_rate": 2.0239801063681557e-06, "loss": 1.807742714881897, "step": 4456 }, { "epoch": 1.3721144967682364, "grad_norm": 16.375, "learning_rate": 2.023196545114996e-06, "loss": 1.3313524723052979, "step": 4458 }, { "epoch": 1.3727300707910126, "grad_norm": 20.25, "learning_rate": 2.0224128852756677e-06, "loss": 1.5227307081222534, "step": 4460 }, { "epoch": 1.3733456448137888, "grad_norm": 24.75, "learning_rate": 2.0216291271963127e-06, "loss": 0.921663224697113, "step": 4462 }, { "epoch": 1.373961218836565, "grad_norm": 16.75, "learning_rate": 2.020845271223119e-06, "loss": 1.6640578508377075, "step": 4464 }, { "epoch": 1.3745767928593413, "grad_norm": 77.0, "learning_rate": 2.020061317702316e-06, "loss": 0.973523736000061, "step": 4466 }, { "epoch": 1.3751923668821175, "grad_norm": 18.75, "learning_rate": 2.019277266980177e-06, "loss": 1.3174777030944824, "step": 4468 }, { "epoch": 1.3758079409048938, "grad_norm": 29.5, "learning_rate": 2.0184931194030174e-06, "loss": 1.2604511976242065, "step": 4470 }, { "epoch": 1.37642351492767, "grad_norm": 17.875, "learning_rate": 2.0177088753171976e-06, "loss": 1.442176103591919, "step": 4472 }, { "epoch": 1.3770390889504462, "grad_norm": 32.0, "learning_rate": 2.0169245350691186e-06, "loss": 1.22340989112854, "step": 4474 }, { "epoch": 1.3776546629732225, "grad_norm": 25.625, "learning_rate": 2.0161400990052236e-06, "loss": 1.3502919673919678, "step": 4476 }, { "epoch": 1.3782702369959987, "grad_norm": 13.5625, "learning_rate": 2.015355567472e-06, "loss": 1.2456159591674805, "step": 4478 }, { "epoch": 1.378885811018775, "grad_norm": 7.09375, "learning_rate": 2.0145709408159754e-06, "loss": 1.300649642944336, "step": 4480 }, { "epoch": 1.3795013850415512, "grad_norm": 13.9375, "learning_rate": 2.0137862193837205e-06, "loss": 1.518198847770691, "step": 4482 }, { "epoch": 1.3801169590643274, "grad_norm": 14.8125, "learning_rate": 2.013001403521848e-06, "loss": 1.3599984645843506, "step": 4484 }, { "epoch": 1.3807325330871036, "grad_norm": 16.125, "learning_rate": 2.012216493577012e-06, "loss": 1.7173393964767456, "step": 4486 }, { "epoch": 1.38134810710988, "grad_norm": 11.6875, "learning_rate": 2.011431489895907e-06, "loss": 1.530381679534912, "step": 4488 }, { "epoch": 1.3819636811326563, "grad_norm": 15.5, "learning_rate": 2.010646392825272e-06, "loss": 1.6565570831298828, "step": 4490 }, { "epoch": 1.3825792551554326, "grad_norm": 9.625, "learning_rate": 2.009861202711883e-06, "loss": 1.581078290939331, "step": 4492 }, { "epoch": 1.3831948291782088, "grad_norm": 19.125, "learning_rate": 2.009075919902561e-06, "loss": 1.5048232078552246, "step": 4494 }, { "epoch": 1.383810403200985, "grad_norm": 11.25, "learning_rate": 2.0082905447441658e-06, "loss": 1.4397077560424805, "step": 4496 }, { "epoch": 1.3844259772237613, "grad_norm": 13.8125, "learning_rate": 2.007505077583599e-06, "loss": 1.2956914901733398, "step": 4498 }, { "epoch": 1.3850415512465375, "grad_norm": 27.625, "learning_rate": 2.0067195187678015e-06, "loss": 1.5892021656036377, "step": 4500 }, { "epoch": 1.3856571252693137, "grad_norm": 10.5, "learning_rate": 2.005933868643756e-06, "loss": 1.0231890678405762, "step": 4502 }, { "epoch": 1.38627269929209, "grad_norm": 9.5, "learning_rate": 2.0051481275584847e-06, "loss": 1.1836471557617188, "step": 4504 }, { "epoch": 1.3868882733148662, "grad_norm": 14.5625, "learning_rate": 2.004362295859051e-06, "loss": 1.558052897453308, "step": 4506 }, { "epoch": 1.3875038473376424, "grad_norm": 16.125, "learning_rate": 2.0035763738925575e-06, "loss": 1.3589587211608887, "step": 4508 }, { "epoch": 1.3881194213604187, "grad_norm": 21.625, "learning_rate": 2.002790362006146e-06, "loss": 1.8019932508468628, "step": 4510 }, { "epoch": 1.388734995383195, "grad_norm": 26.25, "learning_rate": 2.0020042605469997e-06, "loss": 1.527815580368042, "step": 4512 }, { "epoch": 1.3893505694059711, "grad_norm": 17.25, "learning_rate": 2.00121806986234e-06, "loss": 1.4716898202896118, "step": 4514 }, { "epoch": 1.3899661434287474, "grad_norm": 12.1875, "learning_rate": 2.000431790299429e-06, "loss": 1.12889564037323, "step": 4516 }, { "epoch": 1.3905817174515236, "grad_norm": 39.0, "learning_rate": 1.9996454222055665e-06, "loss": 1.1350713968276978, "step": 4518 }, { "epoch": 1.3911972914742998, "grad_norm": 23.125, "learning_rate": 1.998858965928093e-06, "loss": 1.5621490478515625, "step": 4520 }, { "epoch": 1.391812865497076, "grad_norm": 27.875, "learning_rate": 1.9980724218143855e-06, "loss": 1.580711841583252, "step": 4522 }, { "epoch": 1.3924284395198523, "grad_norm": 32.5, "learning_rate": 1.997285790211864e-06, "loss": 1.6474636793136597, "step": 4524 }, { "epoch": 1.3930440135426285, "grad_norm": 48.75, "learning_rate": 1.996499071467982e-06, "loss": 1.2069621086120605, "step": 4526 }, { "epoch": 1.3936595875654048, "grad_norm": 5.9375, "learning_rate": 1.9957122659302354e-06, "loss": 1.103395700454712, "step": 4528 }, { "epoch": 1.394275161588181, "grad_norm": 42.5, "learning_rate": 1.994925373946157e-06, "loss": 1.1344338655471802, "step": 4530 }, { "epoch": 1.3948907356109572, "grad_norm": 8.3125, "learning_rate": 1.994138395863318e-06, "loss": 1.0106946229934692, "step": 4532 }, { "epoch": 1.3955063096337335, "grad_norm": 14.25, "learning_rate": 1.9933513320293267e-06, "loss": 1.0307326316833496, "step": 4534 }, { "epoch": 1.3961218836565097, "grad_norm": 21.875, "learning_rate": 1.992564182791832e-06, "loss": 1.3763636350631714, "step": 4536 }, { "epoch": 1.396737457679286, "grad_norm": 13.5, "learning_rate": 1.9917769484985157e-06, "loss": 1.2347264289855957, "step": 4538 }, { "epoch": 1.3973530317020622, "grad_norm": 17.0, "learning_rate": 1.990989629497103e-06, "loss": 1.7017972469329834, "step": 4540 }, { "epoch": 1.3979686057248384, "grad_norm": 18.0, "learning_rate": 1.9902022261353515e-06, "loss": 1.6707942485809326, "step": 4542 }, { "epoch": 1.3985841797476146, "grad_norm": 12.0625, "learning_rate": 1.989414738761059e-06, "loss": 1.6330020427703857, "step": 4544 }, { "epoch": 1.3991997537703909, "grad_norm": 11.0625, "learning_rate": 1.9886271677220603e-06, "loss": 1.3158289194107056, "step": 4546 }, { "epoch": 1.399815327793167, "grad_norm": 19.625, "learning_rate": 1.9878395133662248e-06, "loss": 1.3446176052093506, "step": 4548 }, { "epoch": 1.4004309018159433, "grad_norm": 14.875, "learning_rate": 1.987051776041462e-06, "loss": 1.260849118232727, "step": 4550 }, { "epoch": 1.4010464758387196, "grad_norm": 14.4375, "learning_rate": 1.986263956095715e-06, "loss": 1.2888896465301514, "step": 4552 }, { "epoch": 1.4016620498614958, "grad_norm": 12.5, "learning_rate": 1.9854760538769655e-06, "loss": 1.490917444229126, "step": 4554 }, { "epoch": 1.402277623884272, "grad_norm": 23.875, "learning_rate": 1.9846880697332307e-06, "loss": 1.6391327381134033, "step": 4556 }, { "epoch": 1.4028931979070483, "grad_norm": 97.0, "learning_rate": 1.9839000040125647e-06, "loss": 1.3920652866363525, "step": 4558 }, { "epoch": 1.4035087719298245, "grad_norm": 19.625, "learning_rate": 1.9831118570630555e-06, "loss": 1.3244240283966064, "step": 4560 }, { "epoch": 1.4041243459526007, "grad_norm": 9.625, "learning_rate": 1.98232362923283e-06, "loss": 1.2185591459274292, "step": 4562 }, { "epoch": 1.404739919975377, "grad_norm": 16.125, "learning_rate": 1.9815353208700483e-06, "loss": 1.27888822555542, "step": 4564 }, { "epoch": 1.4053554939981532, "grad_norm": 15.3125, "learning_rate": 1.9807469323229084e-06, "loss": 1.0425304174423218, "step": 4566 }, { "epoch": 1.4059710680209294, "grad_norm": 57.25, "learning_rate": 1.9799584639396406e-06, "loss": 1.788959264755249, "step": 4568 }, { "epoch": 1.4065866420437056, "grad_norm": 13.75, "learning_rate": 1.979169916068514e-06, "loss": 1.2876644134521484, "step": 4570 }, { "epoch": 1.4072022160664819, "grad_norm": 47.75, "learning_rate": 1.9783812890578297e-06, "loss": 1.4198131561279297, "step": 4572 }, { "epoch": 1.4078177900892581, "grad_norm": 24.625, "learning_rate": 1.9775925832559266e-06, "loss": 1.0596034526824951, "step": 4574 }, { "epoch": 1.4084333641120343, "grad_norm": 9.4375, "learning_rate": 1.976803799011176e-06, "loss": 1.3388042449951172, "step": 4576 }, { "epoch": 1.4090489381348106, "grad_norm": 8.9375, "learning_rate": 1.976014936671984e-06, "loss": 1.1961338520050049, "step": 4578 }, { "epoch": 1.4096645121575868, "grad_norm": 16.75, "learning_rate": 1.9752259965867944e-06, "loss": 1.3246583938598633, "step": 4580 }, { "epoch": 1.4102800861803633, "grad_norm": 16.75, "learning_rate": 1.97443697910408e-06, "loss": 1.446965217590332, "step": 4582 }, { "epoch": 1.4108956602031395, "grad_norm": 12.8125, "learning_rate": 1.973647884572354e-06, "loss": 1.5359292030334473, "step": 4584 }, { "epoch": 1.4115112342259157, "grad_norm": 8.6875, "learning_rate": 1.9728587133401577e-06, "loss": 1.2705938816070557, "step": 4586 }, { "epoch": 1.412126808248692, "grad_norm": 10.125, "learning_rate": 1.9720694657560695e-06, "loss": 1.2938408851623535, "step": 4588 }, { "epoch": 1.4127423822714682, "grad_norm": 14.5625, "learning_rate": 1.9712801421687013e-06, "loss": 1.3553228378295898, "step": 4590 }, { "epoch": 1.4133579562942444, "grad_norm": 8.9375, "learning_rate": 1.970490742926699e-06, "loss": 1.1003388166427612, "step": 4592 }, { "epoch": 1.4139735303170207, "grad_norm": 12.75, "learning_rate": 1.9697012683787397e-06, "loss": 1.5267784595489502, "step": 4594 }, { "epoch": 1.414589104339797, "grad_norm": 10.4375, "learning_rate": 1.9689117188735365e-06, "loss": 1.6333833932876587, "step": 4596 }, { "epoch": 1.4152046783625731, "grad_norm": 13.8125, "learning_rate": 1.9681220947598328e-06, "loss": 1.5597987174987793, "step": 4598 }, { "epoch": 1.4158202523853494, "grad_norm": 27.0, "learning_rate": 1.9673323963864084e-06, "loss": 1.5046354532241821, "step": 4600 }, { "epoch": 1.4164358264081256, "grad_norm": 20.25, "learning_rate": 1.9665426241020727e-06, "loss": 0.9578072428703308, "step": 4602 }, { "epoch": 1.4170514004309018, "grad_norm": 8.4375, "learning_rate": 1.9657527782556687e-06, "loss": 1.2864506244659424, "step": 4604 }, { "epoch": 1.417666974453678, "grad_norm": 27.875, "learning_rate": 1.964962859196073e-06, "loss": 1.6020686626434326, "step": 4606 }, { "epoch": 1.4182825484764543, "grad_norm": 15.3125, "learning_rate": 1.964172867272194e-06, "loss": 1.1308155059814453, "step": 4608 }, { "epoch": 1.4188981224992305, "grad_norm": 18.75, "learning_rate": 1.963382802832972e-06, "loss": 1.6528303623199463, "step": 4610 }, { "epoch": 1.4195136965220068, "grad_norm": 7.84375, "learning_rate": 1.962592666227378e-06, "loss": 1.3737719058990479, "step": 4612 }, { "epoch": 1.420129270544783, "grad_norm": 14.125, "learning_rate": 1.9618024578044174e-06, "loss": 0.8094227910041809, "step": 4614 }, { "epoch": 1.4207448445675592, "grad_norm": 20.875, "learning_rate": 1.9610121779131256e-06, "loss": 1.9535927772521973, "step": 4616 }, { "epoch": 1.4213604185903355, "grad_norm": 20.0, "learning_rate": 1.9602218269025713e-06, "loss": 1.4262498617172241, "step": 4618 }, { "epoch": 1.4219759926131117, "grad_norm": 12.375, "learning_rate": 1.9594314051218526e-06, "loss": 1.1243507862091064, "step": 4620 }, { "epoch": 1.422591566635888, "grad_norm": 35.0, "learning_rate": 1.9586409129200992e-06, "loss": 1.2531572580337524, "step": 4622 }, { "epoch": 1.4232071406586642, "grad_norm": 14.5625, "learning_rate": 1.9578503506464738e-06, "loss": 1.5430424213409424, "step": 4624 }, { "epoch": 1.4238227146814404, "grad_norm": 17.5, "learning_rate": 1.957059718650167e-06, "loss": 1.426865577697754, "step": 4626 }, { "epoch": 1.4244382887042166, "grad_norm": 13.1875, "learning_rate": 1.956269017280403e-06, "loss": 1.37661612033844, "step": 4628 }, { "epoch": 1.4250538627269929, "grad_norm": 24.875, "learning_rate": 1.955478246886435e-06, "loss": 1.147402286529541, "step": 4630 }, { "epoch": 1.425669436749769, "grad_norm": 48.25, "learning_rate": 1.9546874078175476e-06, "loss": 1.189788579940796, "step": 4632 }, { "epoch": 1.4262850107725453, "grad_norm": 22.0, "learning_rate": 1.9538965004230553e-06, "loss": 1.5144860744476318, "step": 4634 }, { "epoch": 1.4269005847953216, "grad_norm": 16.5, "learning_rate": 1.9531055250523026e-06, "loss": 1.277520775794983, "step": 4636 }, { "epoch": 1.427516158818098, "grad_norm": 12.875, "learning_rate": 1.952314482054663e-06, "loss": 1.2806596755981445, "step": 4638 }, { "epoch": 1.4281317328408742, "grad_norm": 17.0, "learning_rate": 1.9515233717795435e-06, "loss": 1.0641429424285889, "step": 4640 }, { "epoch": 1.4287473068636505, "grad_norm": 16.875, "learning_rate": 1.950732194576377e-06, "loss": 1.2826216220855713, "step": 4642 }, { "epoch": 1.4293628808864267, "grad_norm": 9.6875, "learning_rate": 1.9499409507946277e-06, "loss": 1.0223610401153564, "step": 4644 }, { "epoch": 1.429978454909203, "grad_norm": 108.5, "learning_rate": 1.9491496407837886e-06, "loss": 1.5407743453979492, "step": 4646 }, { "epoch": 1.4305940289319792, "grad_norm": 13.0625, "learning_rate": 1.9483582648933827e-06, "loss": 1.4629024267196655, "step": 4648 }, { "epoch": 1.4312096029547554, "grad_norm": 13.625, "learning_rate": 1.9475668234729606e-06, "loss": 1.2143301963806152, "step": 4650 }, { "epoch": 1.4318251769775316, "grad_norm": 11.25, "learning_rate": 1.9467753168721047e-06, "loss": 1.543229103088379, "step": 4652 }, { "epoch": 1.4324407510003079, "grad_norm": 14.125, "learning_rate": 1.9459837454404227e-06, "loss": 1.3003244400024414, "step": 4654 }, { "epoch": 1.433056325023084, "grad_norm": 6.28125, "learning_rate": 1.9451921095275534e-06, "loss": 1.0961573123931885, "step": 4656 }, { "epoch": 1.4336718990458603, "grad_norm": 9.125, "learning_rate": 1.944400409483163e-06, "loss": 1.4426017999649048, "step": 4658 }, { "epoch": 1.4342874730686366, "grad_norm": 11.6875, "learning_rate": 1.9436086456569463e-06, "loss": 1.3466172218322754, "step": 4660 }, { "epoch": 1.4349030470914128, "grad_norm": 11.5625, "learning_rate": 1.9428168183986265e-06, "loss": 1.3730425834655762, "step": 4662 }, { "epoch": 1.435518621114189, "grad_norm": 13.0625, "learning_rate": 1.942024928057955e-06, "loss": 1.225844144821167, "step": 4664 }, { "epoch": 1.4361341951369653, "grad_norm": 10.9375, "learning_rate": 1.9412329749847094e-06, "loss": 1.5529940128326416, "step": 4666 }, { "epoch": 1.4367497691597415, "grad_norm": 24.375, "learning_rate": 1.9404409595286978e-06, "loss": 1.7074967622756958, "step": 4668 }, { "epoch": 1.4373653431825177, "grad_norm": 11.375, "learning_rate": 1.9396488820397535e-06, "loss": 1.468512773513794, "step": 4670 }, { "epoch": 1.437980917205294, "grad_norm": 9.75, "learning_rate": 1.938856742867738e-06, "loss": 1.344146490097046, "step": 4672 }, { "epoch": 1.4385964912280702, "grad_norm": 49.5, "learning_rate": 1.93806454236254e-06, "loss": 0.8190611600875854, "step": 4674 }, { "epoch": 1.4392120652508464, "grad_norm": 22.25, "learning_rate": 1.9372722808740756e-06, "loss": 1.4838563203811646, "step": 4676 }, { "epoch": 1.4398276392736227, "grad_norm": 4.65625, "learning_rate": 1.936479958752288e-06, "loss": 0.9019955396652222, "step": 4678 }, { "epoch": 1.440443213296399, "grad_norm": 10.0, "learning_rate": 1.935687576347146e-06, "loss": 1.3104298114776611, "step": 4680 }, { "epoch": 1.4410587873191751, "grad_norm": 19.125, "learning_rate": 1.9348951340086463e-06, "loss": 1.061185359954834, "step": 4682 }, { "epoch": 1.4416743613419514, "grad_norm": 28.5, "learning_rate": 1.934102632086811e-06, "loss": 1.4017858505249023, "step": 4684 }, { "epoch": 1.4422899353647276, "grad_norm": 9.6875, "learning_rate": 1.933310070931691e-06, "loss": 1.3744781017303467, "step": 4686 }, { "epoch": 1.4429055093875038, "grad_norm": 8.6875, "learning_rate": 1.9325174508933594e-06, "loss": 1.2082910537719727, "step": 4688 }, { "epoch": 1.44352108341028, "grad_norm": 26.5, "learning_rate": 1.9317247723219176e-06, "loss": 1.7057225704193115, "step": 4690 }, { "epoch": 1.4441366574330563, "grad_norm": 16.375, "learning_rate": 1.9309320355674933e-06, "loss": 1.4047242403030396, "step": 4692 }, { "epoch": 1.4447522314558325, "grad_norm": 9.0625, "learning_rate": 1.930139240980239e-06, "loss": 1.20115327835083, "step": 4694 }, { "epoch": 1.4453678054786088, "grad_norm": 28.875, "learning_rate": 1.929346388910333e-06, "loss": 1.047264814376831, "step": 4696 }, { "epoch": 1.445983379501385, "grad_norm": 13.6875, "learning_rate": 1.928553479707979e-06, "loss": 1.3328430652618408, "step": 4698 }, { "epoch": 1.4465989535241612, "grad_norm": 17.25, "learning_rate": 1.9277605137234057e-06, "loss": 1.4096957445144653, "step": 4700 }, { "epoch": 1.4472145275469375, "grad_norm": 14.9375, "learning_rate": 1.9269674913068676e-06, "loss": 1.224859595298767, "step": 4702 }, { "epoch": 1.4478301015697137, "grad_norm": 19.25, "learning_rate": 1.9261744128086427e-06, "loss": 1.1249094009399414, "step": 4704 }, { "epoch": 1.44844567559249, "grad_norm": 12.875, "learning_rate": 1.925381278579036e-06, "loss": 1.0842483043670654, "step": 4706 }, { "epoch": 1.4490612496152662, "grad_norm": 15.0625, "learning_rate": 1.9245880889683744e-06, "loss": 1.2176549434661865, "step": 4708 }, { "epoch": 1.4496768236380424, "grad_norm": 18.125, "learning_rate": 1.9237948443270115e-06, "loss": 1.6450581550598145, "step": 4710 }, { "epoch": 1.4502923976608186, "grad_norm": 7.5625, "learning_rate": 1.9230015450053236e-06, "loss": 1.1668829917907715, "step": 4712 }, { "epoch": 1.4509079716835949, "grad_norm": 11.5, "learning_rate": 1.9222081913537135e-06, "loss": 1.2747738361358643, "step": 4714 }, { "epoch": 1.451523545706371, "grad_norm": 7.65625, "learning_rate": 1.9214147837226045e-06, "loss": 1.2668383121490479, "step": 4716 }, { "epoch": 1.4521391197291473, "grad_norm": 41.25, "learning_rate": 1.920621322462447e-06, "loss": 1.328049898147583, "step": 4718 }, { "epoch": 1.4527546937519236, "grad_norm": 20.75, "learning_rate": 1.9198278079237127e-06, "loss": 1.444204330444336, "step": 4720 }, { "epoch": 1.4533702677746998, "grad_norm": 11.8125, "learning_rate": 1.9190342404568996e-06, "loss": 1.454003095626831, "step": 4722 }, { "epoch": 1.453985841797476, "grad_norm": 18.0, "learning_rate": 1.918240620412525e-06, "loss": 1.5295004844665527, "step": 4724 }, { "epoch": 1.4546014158202523, "grad_norm": 14.8125, "learning_rate": 1.917446948141134e-06, "loss": 1.567083477973938, "step": 4726 }, { "epoch": 1.4552169898430285, "grad_norm": 13.3125, "learning_rate": 1.9166532239932906e-06, "loss": 1.5100464820861816, "step": 4728 }, { "epoch": 1.4558325638658047, "grad_norm": 11.625, "learning_rate": 1.915859448319586e-06, "loss": 1.5772287845611572, "step": 4730 }, { "epoch": 1.4564481378885812, "grad_norm": 19.25, "learning_rate": 1.9150656214706295e-06, "loss": 1.397694706916809, "step": 4732 }, { "epoch": 1.4570637119113574, "grad_norm": 7.90625, "learning_rate": 1.9142717437970564e-06, "loss": 1.155037760734558, "step": 4734 }, { "epoch": 1.4576792859341337, "grad_norm": 33.5, "learning_rate": 1.913477815649523e-06, "loss": 1.1763298511505127, "step": 4736 }, { "epoch": 1.4582948599569099, "grad_norm": 17.375, "learning_rate": 1.912683837378709e-06, "loss": 0.5639537572860718, "step": 4738 }, { "epoch": 1.4589104339796861, "grad_norm": 5.375, "learning_rate": 1.9118898093353146e-06, "loss": 0.9941329956054688, "step": 4740 }, { "epoch": 1.4595260080024623, "grad_norm": 14.625, "learning_rate": 1.9110957318700634e-06, "loss": 1.6984761953353882, "step": 4742 }, { "epoch": 1.4601415820252386, "grad_norm": 29.125, "learning_rate": 1.9103016053337004e-06, "loss": 1.4265990257263184, "step": 4744 }, { "epoch": 1.4607571560480148, "grad_norm": 51.25, "learning_rate": 1.909507430076992e-06, "loss": 1.5186126232147217, "step": 4746 }, { "epoch": 1.461372730070791, "grad_norm": 29.625, "learning_rate": 1.908713206450727e-06, "loss": 0.9814944267272949, "step": 4748 }, { "epoch": 1.4619883040935673, "grad_norm": 16.5, "learning_rate": 1.9079189348057132e-06, "loss": 1.252488613128662, "step": 4750 }, { "epoch": 1.4626038781163435, "grad_norm": 13.0, "learning_rate": 1.907124615492783e-06, "loss": 1.610899806022644, "step": 4752 }, { "epoch": 1.4632194521391197, "grad_norm": 20.75, "learning_rate": 1.9063302488627872e-06, "loss": 1.6237657070159912, "step": 4754 }, { "epoch": 1.463835026161896, "grad_norm": 13.75, "learning_rate": 1.9055358352665995e-06, "loss": 1.5839223861694336, "step": 4756 }, { "epoch": 1.4644506001846722, "grad_norm": 22.25, "learning_rate": 1.9047413750551118e-06, "loss": 1.3604011535644531, "step": 4758 }, { "epoch": 1.4650661742074484, "grad_norm": 36.5, "learning_rate": 1.9039468685792391e-06, "loss": 1.3691078424453735, "step": 4760 }, { "epoch": 1.4656817482302247, "grad_norm": 6.90625, "learning_rate": 1.9031523161899152e-06, "loss": 1.2302625179290771, "step": 4762 }, { "epoch": 1.466297322253001, "grad_norm": 16.875, "learning_rate": 1.9023577182380954e-06, "loss": 0.8660012483596802, "step": 4764 }, { "epoch": 1.4669128962757771, "grad_norm": 23.875, "learning_rate": 1.9015630750747544e-06, "loss": 1.0083507299423218, "step": 4766 }, { "epoch": 1.4675284702985534, "grad_norm": 7.96875, "learning_rate": 1.9007683870508866e-06, "loss": 1.3119466304779053, "step": 4768 }, { "epoch": 1.4681440443213296, "grad_norm": 19.75, "learning_rate": 1.899973654517507e-06, "loss": 1.646362543106079, "step": 4770 }, { "epoch": 1.4687596183441058, "grad_norm": 24.625, "learning_rate": 1.8991788778256505e-06, "loss": 1.4080818891525269, "step": 4772 }, { "epoch": 1.469375192366882, "grad_norm": 23.625, "learning_rate": 1.898384057326369e-06, "loss": 1.5112760066986084, "step": 4774 }, { "epoch": 1.4699907663896583, "grad_norm": 10.75, "learning_rate": 1.8975891933707373e-06, "loss": 1.438962459564209, "step": 4776 }, { "epoch": 1.4706063404124345, "grad_norm": 14.3125, "learning_rate": 1.8967942863098472e-06, "loss": 1.0983920097351074, "step": 4778 }, { "epoch": 1.4712219144352108, "grad_norm": 10.6875, "learning_rate": 1.8959993364948096e-06, "loss": 1.042116403579712, "step": 4780 }, { "epoch": 1.471837488457987, "grad_norm": 10.5, "learning_rate": 1.8952043442767555e-06, "loss": 1.5508229732513428, "step": 4782 }, { "epoch": 1.4724530624807632, "grad_norm": 15.75, "learning_rate": 1.8944093100068334e-06, "loss": 1.648895263671875, "step": 4784 }, { "epoch": 1.4730686365035395, "grad_norm": 23.625, "learning_rate": 1.8936142340362116e-06, "loss": 1.737051010131836, "step": 4786 }, { "epoch": 1.4736842105263157, "grad_norm": 19.875, "learning_rate": 1.8928191167160749e-06, "loss": 1.876301884651184, "step": 4788 }, { "epoch": 1.4742997845490922, "grad_norm": 6.8125, "learning_rate": 1.8920239583976283e-06, "loss": 1.2666301727294922, "step": 4790 }, { "epoch": 1.4749153585718684, "grad_norm": 12.0625, "learning_rate": 1.891228759432094e-06, "loss": 1.2881522178649902, "step": 4792 }, { "epoch": 1.4755309325946446, "grad_norm": 7.75, "learning_rate": 1.890433520170712e-06, "loss": 1.2567453384399414, "step": 4794 }, { "epoch": 1.4761465066174209, "grad_norm": 12.625, "learning_rate": 1.8896382409647403e-06, "loss": 1.103519320487976, "step": 4796 }, { "epoch": 1.476762080640197, "grad_norm": 16.25, "learning_rate": 1.8888429221654555e-06, "loss": 1.3396915197372437, "step": 4798 }, { "epoch": 1.4773776546629733, "grad_norm": 53.5, "learning_rate": 1.8880475641241502e-06, "loss": 1.0970816612243652, "step": 4800 }, { "epoch": 1.4779932286857496, "grad_norm": 18.75, "learning_rate": 1.8872521671921347e-06, "loss": 1.3329362869262695, "step": 4802 }, { "epoch": 1.4786088027085258, "grad_norm": 13.1875, "learning_rate": 1.8864567317207377e-06, "loss": 1.5142507553100586, "step": 4804 }, { "epoch": 1.479224376731302, "grad_norm": 19.125, "learning_rate": 1.8856612580613028e-06, "loss": 1.3657267093658447, "step": 4806 }, { "epoch": 1.4798399507540783, "grad_norm": 26.125, "learning_rate": 1.8848657465651925e-06, "loss": 0.9717738628387451, "step": 4808 }, { "epoch": 1.4804555247768545, "grad_norm": 17.0, "learning_rate": 1.884070197583784e-06, "loss": 1.6565666198730469, "step": 4810 }, { "epoch": 1.4810710987996307, "grad_norm": 11.6875, "learning_rate": 1.883274611468474e-06, "loss": 1.247531533241272, "step": 4812 }, { "epoch": 1.481686672822407, "grad_norm": 44.75, "learning_rate": 1.8824789885706721e-06, "loss": 1.4119795560836792, "step": 4814 }, { "epoch": 1.4823022468451832, "grad_norm": 13.125, "learning_rate": 1.8816833292418075e-06, "loss": 1.174653172492981, "step": 4816 }, { "epoch": 1.4829178208679594, "grad_norm": 138.0, "learning_rate": 1.8808876338333218e-06, "loss": 1.3747670650482178, "step": 4818 }, { "epoch": 1.4835333948907357, "grad_norm": 16.625, "learning_rate": 1.8800919026966764e-06, "loss": 1.3020074367523193, "step": 4820 }, { "epoch": 1.484148968913512, "grad_norm": 6.15625, "learning_rate": 1.8792961361833448e-06, "loss": 1.279649257659912, "step": 4822 }, { "epoch": 1.4847645429362881, "grad_norm": 30.875, "learning_rate": 1.8785003346448202e-06, "loss": 1.3550713062286377, "step": 4824 }, { "epoch": 1.4853801169590644, "grad_norm": 10.625, "learning_rate": 1.8777044984326075e-06, "loss": 1.080082893371582, "step": 4826 }, { "epoch": 1.4859956909818406, "grad_norm": 10.0, "learning_rate": 1.8769086278982287e-06, "loss": 1.4219310283660889, "step": 4828 }, { "epoch": 1.4866112650046168, "grad_norm": 7.90625, "learning_rate": 1.8761127233932209e-06, "loss": 1.1008557081222534, "step": 4830 }, { "epoch": 1.487226839027393, "grad_norm": 5.9375, "learning_rate": 1.8753167852691365e-06, "loss": 1.295001745223999, "step": 4832 }, { "epoch": 1.4878424130501693, "grad_norm": 16.5, "learning_rate": 1.8745208138775416e-06, "loss": 1.5518990755081177, "step": 4834 }, { "epoch": 1.4884579870729455, "grad_norm": 7.21875, "learning_rate": 1.8737248095700179e-06, "loss": 1.103243112564087, "step": 4836 }, { "epoch": 1.4890735610957218, "grad_norm": 25.5, "learning_rate": 1.8729287726981617e-06, "loss": 1.256454348564148, "step": 4838 }, { "epoch": 1.489689135118498, "grad_norm": 14.3125, "learning_rate": 1.8721327036135826e-06, "loss": 1.3482131958007812, "step": 4840 }, { "epoch": 1.4903047091412742, "grad_norm": 8.0, "learning_rate": 1.8713366026679064e-06, "loss": 1.1732832193374634, "step": 4842 }, { "epoch": 1.4909202831640505, "grad_norm": 19.5, "learning_rate": 1.8705404702127706e-06, "loss": 0.6500099897384644, "step": 4844 }, { "epoch": 1.4915358571868267, "grad_norm": 10.125, "learning_rate": 1.8697443065998284e-06, "loss": 1.0080634355545044, "step": 4846 }, { "epoch": 1.492151431209603, "grad_norm": 14.375, "learning_rate": 1.868948112180746e-06, "loss": 1.523182988166809, "step": 4848 }, { "epoch": 1.4927670052323792, "grad_norm": 28.875, "learning_rate": 1.8681518873072036e-06, "loss": 1.9081377983093262, "step": 4850 }, { "epoch": 1.4933825792551554, "grad_norm": 30.0, "learning_rate": 1.867355632330895e-06, "loss": 1.7250075340270996, "step": 4852 }, { "epoch": 1.4939981532779316, "grad_norm": 12.5, "learning_rate": 1.8665593476035259e-06, "loss": 1.2684253454208374, "step": 4854 }, { "epoch": 1.4946137273007079, "grad_norm": 16.75, "learning_rate": 1.8657630334768171e-06, "loss": 1.582967758178711, "step": 4856 }, { "epoch": 1.495229301323484, "grad_norm": 23.5, "learning_rate": 1.8649666903025013e-06, "loss": 1.17526113986969, "step": 4858 }, { "epoch": 1.4958448753462603, "grad_norm": 14.5, "learning_rate": 1.8641703184323235e-06, "loss": 1.4988826513290405, "step": 4860 }, { "epoch": 1.4964604493690365, "grad_norm": 7.6875, "learning_rate": 1.863373918218043e-06, "loss": 1.3049447536468506, "step": 4862 }, { "epoch": 1.4970760233918128, "grad_norm": 20.125, "learning_rate": 1.8625774900114303e-06, "loss": 1.086333990097046, "step": 4864 }, { "epoch": 1.497691597414589, "grad_norm": 14.5, "learning_rate": 1.8617810341642682e-06, "loss": 1.2647141218185425, "step": 4866 }, { "epoch": 1.4983071714373652, "grad_norm": 14.5625, "learning_rate": 1.8609845510283534e-06, "loss": 1.0968520641326904, "step": 4868 }, { "epoch": 1.4989227454601415, "grad_norm": 20.625, "learning_rate": 1.8601880409554924e-06, "loss": 1.589921236038208, "step": 4870 }, { "epoch": 1.4995383194829177, "grad_norm": 10.625, "learning_rate": 1.8593915042975043e-06, "loss": 1.0053234100341797, "step": 4872 }, { "epoch": 1.500153893505694, "grad_norm": 10.3125, "learning_rate": 1.8585949414062207e-06, "loss": 1.2411702871322632, "step": 4874 }, { "epoch": 1.5007694675284702, "grad_norm": 32.25, "learning_rate": 1.8577983526334847e-06, "loss": 1.2974989414215088, "step": 4876 }, { "epoch": 1.5013850415512464, "grad_norm": 17.5, "learning_rate": 1.8570017383311504e-06, "loss": 1.8156037330627441, "step": 4878 }, { "epoch": 1.5020006155740226, "grad_norm": 7.5625, "learning_rate": 1.8562050988510824e-06, "loss": 1.23923659324646, "step": 4880 }, { "epoch": 1.5026161895967989, "grad_norm": 21.125, "learning_rate": 1.8554084345451586e-06, "loss": 0.8623427748680115, "step": 4882 }, { "epoch": 1.5032317636195751, "grad_norm": 22.0, "learning_rate": 1.8546117457652654e-06, "loss": 1.8232307434082031, "step": 4884 }, { "epoch": 1.5038473376423513, "grad_norm": 15.6875, "learning_rate": 1.853815032863302e-06, "loss": 1.102805495262146, "step": 4886 }, { "epoch": 1.5044629116651276, "grad_norm": 9.4375, "learning_rate": 1.8530182961911766e-06, "loss": 1.3882505893707275, "step": 4888 }, { "epoch": 1.5050784856879038, "grad_norm": 13.1875, "learning_rate": 1.8522215361008086e-06, "loss": 1.0989861488342285, "step": 4890 }, { "epoch": 1.50569405971068, "grad_norm": 26.125, "learning_rate": 1.8514247529441292e-06, "loss": 1.6275972127914429, "step": 4892 }, { "epoch": 1.5063096337334565, "grad_norm": 10.375, "learning_rate": 1.8506279470730775e-06, "loss": 1.095172643661499, "step": 4894 }, { "epoch": 1.5069252077562327, "grad_norm": 9.8125, "learning_rate": 1.849831118839603e-06, "loss": 0.8544666171073914, "step": 4896 }, { "epoch": 1.507540781779009, "grad_norm": 6.875, "learning_rate": 1.8490342685956666e-06, "loss": 0.8565798997879028, "step": 4898 }, { "epoch": 1.5081563558017852, "grad_norm": 15.0, "learning_rate": 1.8482373966932377e-06, "loss": 1.2892870903015137, "step": 4900 }, { "epoch": 1.5087719298245614, "grad_norm": 13.1875, "learning_rate": 1.8474405034842957e-06, "loss": 1.41206955909729, "step": 4902 }, { "epoch": 1.5093875038473377, "grad_norm": 6.53125, "learning_rate": 1.8466435893208286e-06, "loss": 1.286069393157959, "step": 4904 }, { "epoch": 1.510003077870114, "grad_norm": 14.5, "learning_rate": 1.8458466545548348e-06, "loss": 1.253098726272583, "step": 4906 }, { "epoch": 1.5106186518928901, "grad_norm": 10.375, "learning_rate": 1.8450496995383209e-06, "loss": 1.565866231918335, "step": 4908 }, { "epoch": 1.5112342259156664, "grad_norm": 41.25, "learning_rate": 1.8442527246233041e-06, "loss": 0.7049061059951782, "step": 4910 }, { "epoch": 1.5118497999384426, "grad_norm": 16.25, "learning_rate": 1.843455730161807e-06, "loss": 1.042777419090271, "step": 4912 }, { "epoch": 1.5124653739612188, "grad_norm": 7.8125, "learning_rate": 1.8426587165058651e-06, "loss": 1.1403331756591797, "step": 4914 }, { "epoch": 1.513080947983995, "grad_norm": 5.71875, "learning_rate": 1.841861684007519e-06, "loss": 1.1445809602737427, "step": 4916 }, { "epoch": 1.5136965220067713, "grad_norm": 4.75, "learning_rate": 1.8410646330188187e-06, "loss": 1.1456329822540283, "step": 4918 }, { "epoch": 1.5143120960295475, "grad_norm": 9.4375, "learning_rate": 1.8402675638918232e-06, "loss": 1.1502320766448975, "step": 4920 }, { "epoch": 1.5149276700523238, "grad_norm": 20.0, "learning_rate": 1.839470476978599e-06, "loss": 1.4103009700775146, "step": 4922 }, { "epoch": 1.5155432440751, "grad_norm": 14.125, "learning_rate": 1.8386733726312196e-06, "loss": 1.3720169067382812, "step": 4924 }, { "epoch": 1.5161588180978762, "grad_norm": 20.0, "learning_rate": 1.8378762512017674e-06, "loss": 1.7142300605773926, "step": 4926 }, { "epoch": 1.5167743921206525, "grad_norm": 15.25, "learning_rate": 1.8370791130423314e-06, "loss": 1.574350357055664, "step": 4928 }, { "epoch": 1.5173899661434287, "grad_norm": 27.0, "learning_rate": 1.836281958505009e-06, "loss": 1.3752760887145996, "step": 4930 }, { "epoch": 1.5180055401662051, "grad_norm": 37.0, "learning_rate": 1.8354847879419038e-06, "loss": 1.4977751970291138, "step": 4932 }, { "epoch": 1.5186211141889814, "grad_norm": 22.75, "learning_rate": 1.834687601705127e-06, "loss": 1.4560470581054688, "step": 4934 }, { "epoch": 1.5192366882117576, "grad_norm": 20.75, "learning_rate": 1.8338904001467976e-06, "loss": 1.2786219120025635, "step": 4936 }, { "epoch": 1.5198522622345338, "grad_norm": 6.875, "learning_rate": 1.833093183619039e-06, "loss": 1.3729817867279053, "step": 4938 }, { "epoch": 1.52046783625731, "grad_norm": 32.5, "learning_rate": 1.8322959524739835e-06, "loss": 1.3546814918518066, "step": 4940 }, { "epoch": 1.5210834102800863, "grad_norm": 13.125, "learning_rate": 1.8314987070637687e-06, "loss": 1.3367919921875, "step": 4942 }, { "epoch": 1.5216989843028625, "grad_norm": 16.875, "learning_rate": 1.830701447740539e-06, "loss": 1.428148865699768, "step": 4944 }, { "epoch": 1.5223145583256388, "grad_norm": 16.375, "learning_rate": 1.829904174856445e-06, "loss": 1.4339628219604492, "step": 4946 }, { "epoch": 1.522930132348415, "grad_norm": 8.625, "learning_rate": 1.829106888763642e-06, "loss": 1.3263335227966309, "step": 4948 }, { "epoch": 1.5235457063711912, "grad_norm": 9.0, "learning_rate": 1.828309589814294e-06, "loss": 1.1701977252960205, "step": 4950 }, { "epoch": 1.5241612803939675, "grad_norm": 15.8125, "learning_rate": 1.8275122783605668e-06, "loss": 1.5451289415359497, "step": 4952 }, { "epoch": 1.5247768544167437, "grad_norm": 7.75, "learning_rate": 1.8267149547546353e-06, "loss": 1.0618703365325928, "step": 4954 }, { "epoch": 1.52539242843952, "grad_norm": 13.1875, "learning_rate": 1.8259176193486771e-06, "loss": 1.249274730682373, "step": 4956 }, { "epoch": 1.5260080024622962, "grad_norm": 23.5, "learning_rate": 1.825120272494877e-06, "loss": 1.2727432250976562, "step": 4958 }, { "epoch": 1.5266235764850724, "grad_norm": 14.9375, "learning_rate": 1.8243229145454242e-06, "loss": 1.678273320198059, "step": 4960 }, { "epoch": 1.5272391505078486, "grad_norm": 7.71875, "learning_rate": 1.823525545852512e-06, "loss": 1.028113842010498, "step": 4962 }, { "epoch": 1.5278547245306249, "grad_norm": 34.5, "learning_rate": 1.8227281667683392e-06, "loss": 1.322481393814087, "step": 4964 }, { "epoch": 1.528470298553401, "grad_norm": 9.625, "learning_rate": 1.821930777645109e-06, "loss": 1.3164238929748535, "step": 4966 }, { "epoch": 1.5290858725761773, "grad_norm": 10.375, "learning_rate": 1.8211333788350292e-06, "loss": 1.2079682350158691, "step": 4968 }, { "epoch": 1.5297014465989536, "grad_norm": 19.25, "learning_rate": 1.8203359706903122e-06, "loss": 1.0435426235198975, "step": 4970 }, { "epoch": 1.5303170206217298, "grad_norm": 21.625, "learning_rate": 1.8195385535631735e-06, "loss": 1.2816870212554932, "step": 4972 }, { "epoch": 1.530932594644506, "grad_norm": 4.03125, "learning_rate": 1.8187411278058333e-06, "loss": 1.0759096145629883, "step": 4974 }, { "epoch": 1.5315481686672823, "grad_norm": 28.875, "learning_rate": 1.8179436937705147e-06, "loss": 1.3759675025939941, "step": 4976 }, { "epoch": 1.5321637426900585, "grad_norm": 18.25, "learning_rate": 1.8171462518094472e-06, "loss": 1.2887439727783203, "step": 4978 }, { "epoch": 1.5327793167128347, "grad_norm": 17.5, "learning_rate": 1.8163488022748597e-06, "loss": 1.437053918838501, "step": 4980 }, { "epoch": 1.533394890735611, "grad_norm": 13.6875, "learning_rate": 1.8155513455189886e-06, "loss": 1.6811416149139404, "step": 4982 }, { "epoch": 1.5340104647583872, "grad_norm": 13.5, "learning_rate": 1.8147538818940692e-06, "loss": 1.391784906387329, "step": 4984 }, { "epoch": 1.5346260387811634, "grad_norm": 16.25, "learning_rate": 1.813956411752344e-06, "loss": 1.7641667127609253, "step": 4986 }, { "epoch": 1.5352416128039397, "grad_norm": 18.25, "learning_rate": 1.8131589354460563e-06, "loss": 1.4568517208099365, "step": 4988 }, { "epoch": 1.535857186826716, "grad_norm": 11.125, "learning_rate": 1.8123614533274505e-06, "loss": 1.0670044422149658, "step": 4990 }, { "epoch": 1.5364727608494921, "grad_norm": 4.5625, "learning_rate": 1.811563965748777e-06, "loss": 1.1624255180358887, "step": 4992 }, { "epoch": 1.5370883348722684, "grad_norm": 13.1875, "learning_rate": 1.8107664730622871e-06, "loss": 1.3043831586837769, "step": 4994 }, { "epoch": 1.5377039088950446, "grad_norm": 41.0, "learning_rate": 1.8099689756202334e-06, "loss": 1.5602350234985352, "step": 4996 }, { "epoch": 1.5383194829178208, "grad_norm": 8.5625, "learning_rate": 1.8091714737748712e-06, "loss": 1.1072698831558228, "step": 4998 }, { "epoch": 1.538935056940597, "grad_norm": 5.9375, "learning_rate": 1.8083739678784596e-06, "loss": 1.3773562908172607, "step": 5000 }, { "epoch": 1.5395506309633733, "grad_norm": 19.875, "learning_rate": 1.807576458283256e-06, "loss": 1.3957157135009766, "step": 5002 }, { "epoch": 1.5401662049861495, "grad_norm": 6.375, "learning_rate": 1.8067789453415222e-06, "loss": 0.9105068445205688, "step": 5004 }, { "epoch": 1.5407817790089258, "grad_norm": 14.6875, "learning_rate": 1.8059814294055209e-06, "loss": 1.1876349449157715, "step": 5006 }, { "epoch": 1.541397353031702, "grad_norm": 3.765625, "learning_rate": 1.8051839108275152e-06, "loss": 1.2273304462432861, "step": 5008 }, { "epoch": 1.5420129270544782, "grad_norm": 10.1875, "learning_rate": 1.8043863899597704e-06, "loss": 1.285908579826355, "step": 5010 }, { "epoch": 1.5426285010772545, "grad_norm": 14.625, "learning_rate": 1.803588867154551e-06, "loss": 1.3618907928466797, "step": 5012 }, { "epoch": 1.5432440751000307, "grad_norm": 9.1875, "learning_rate": 1.8027913427641265e-06, "loss": 1.556501865386963, "step": 5014 }, { "epoch": 1.543859649122807, "grad_norm": 21.125, "learning_rate": 1.8019938171407614e-06, "loss": 1.5120654106140137, "step": 5016 }, { "epoch": 1.5444752231455832, "grad_norm": 16.75, "learning_rate": 1.8011962906367256e-06, "loss": 1.6533551216125488, "step": 5018 }, { "epoch": 1.5450907971683594, "grad_norm": 15.5, "learning_rate": 1.8003987636042864e-06, "loss": 1.369877576828003, "step": 5020 }, { "epoch": 1.5457063711911356, "grad_norm": 10.625, "learning_rate": 1.7996012363957136e-06, "loss": 1.258755087852478, "step": 5022 }, { "epoch": 1.5463219452139119, "grad_norm": 12.125, "learning_rate": 1.798803709363275e-06, "loss": 1.3866196870803833, "step": 5024 }, { "epoch": 1.546937519236688, "grad_norm": 16.125, "learning_rate": 1.798006182859239e-06, "loss": 1.1527132987976074, "step": 5026 }, { "epoch": 1.5475530932594643, "grad_norm": 37.5, "learning_rate": 1.7972086572358742e-06, "loss": 1.7384552955627441, "step": 5028 }, { "epoch": 1.5481686672822406, "grad_norm": 18.625, "learning_rate": 1.7964111328454488e-06, "loss": 1.354884386062622, "step": 5030 }, { "epoch": 1.5487842413050168, "grad_norm": 9.75, "learning_rate": 1.7956136100402307e-06, "loss": 1.752507209777832, "step": 5032 }, { "epoch": 1.549399815327793, "grad_norm": 8.1875, "learning_rate": 1.794816089172485e-06, "loss": 1.3854937553405762, "step": 5034 }, { "epoch": 1.5500153893505693, "grad_norm": 19.25, "learning_rate": 1.7940185705944792e-06, "loss": 1.3696260452270508, "step": 5036 }, { "epoch": 1.5506309633733455, "grad_norm": 14.125, "learning_rate": 1.7932210546584777e-06, "loss": 1.2782727479934692, "step": 5038 }, { "epoch": 1.5512465373961217, "grad_norm": 22.0, "learning_rate": 1.7924235417167442e-06, "loss": 1.621812343597412, "step": 5040 }, { "epoch": 1.551862111418898, "grad_norm": 472.0, "learning_rate": 1.7916260321215409e-06, "loss": 1.1303892135620117, "step": 5042 }, { "epoch": 1.5524776854416744, "grad_norm": 34.5, "learning_rate": 1.7908285262251287e-06, "loss": 1.2478010654449463, "step": 5044 }, { "epoch": 1.5530932594644506, "grad_norm": 43.5, "learning_rate": 1.7900310243797673e-06, "loss": 1.3689146041870117, "step": 5046 }, { "epoch": 1.5537088334872269, "grad_norm": 6.84375, "learning_rate": 1.7892335269377136e-06, "loss": 1.0796773433685303, "step": 5048 }, { "epoch": 1.5543244075100031, "grad_norm": 16.0, "learning_rate": 1.7884360342512231e-06, "loss": 1.28507661819458, "step": 5050 }, { "epoch": 1.5549399815327793, "grad_norm": 32.75, "learning_rate": 1.7876385466725502e-06, "loss": 1.0493865013122559, "step": 5052 }, { "epoch": 1.5555555555555556, "grad_norm": 18.75, "learning_rate": 1.7868410645539444e-06, "loss": 1.580011010169983, "step": 5054 }, { "epoch": 1.5561711295783318, "grad_norm": 17.75, "learning_rate": 1.7860435882476564e-06, "loss": 1.2562227249145508, "step": 5056 }, { "epoch": 1.556786703601108, "grad_norm": 9.6875, "learning_rate": 1.785246118105931e-06, "loss": 1.388770580291748, "step": 5058 }, { "epoch": 1.5574022776238843, "grad_norm": 12.3125, "learning_rate": 1.7844486544810121e-06, "loss": 1.4561406373977661, "step": 5060 }, { "epoch": 1.5580178516466605, "grad_norm": 23.125, "learning_rate": 1.78365119772514e-06, "loss": 1.747645378112793, "step": 5062 }, { "epoch": 1.5586334256694367, "grad_norm": 18.375, "learning_rate": 1.782853748190553e-06, "loss": 1.6524887084960938, "step": 5064 }, { "epoch": 1.559248999692213, "grad_norm": 7.09375, "learning_rate": 1.7820563062294853e-06, "loss": 1.2352979183197021, "step": 5066 }, { "epoch": 1.5598645737149892, "grad_norm": 7.84375, "learning_rate": 1.7812588721941674e-06, "loss": 1.2969111204147339, "step": 5068 }, { "epoch": 1.5604801477377654, "grad_norm": 10.375, "learning_rate": 1.7804614464368272e-06, "loss": 1.2578907012939453, "step": 5070 }, { "epoch": 1.5610957217605417, "grad_norm": 14.5, "learning_rate": 1.779664029309688e-06, "loss": 1.5978870391845703, "step": 5072 }, { "epoch": 1.561711295783318, "grad_norm": 5.71875, "learning_rate": 1.778866621164971e-06, "loss": 1.0977182388305664, "step": 5074 }, { "epoch": 1.5623268698060941, "grad_norm": 11.5, "learning_rate": 1.7780692223548915e-06, "loss": 1.1397321224212646, "step": 5076 }, { "epoch": 1.5629424438288704, "grad_norm": 35.75, "learning_rate": 1.777271833231661e-06, "loss": 1.603751540184021, "step": 5078 }, { "epoch": 1.5635580178516466, "grad_norm": 24.0, "learning_rate": 1.7764744541474883e-06, "loss": 1.832308053970337, "step": 5080 }, { "epoch": 1.564173591874423, "grad_norm": 56.75, "learning_rate": 1.775677085454576e-06, "loss": 1.3269708156585693, "step": 5082 }, { "epoch": 1.5647891658971993, "grad_norm": 10.5625, "learning_rate": 1.774879727505123e-06, "loss": 1.3348369598388672, "step": 5084 }, { "epoch": 1.5654047399199755, "grad_norm": 16.375, "learning_rate": 1.7740823806513231e-06, "loss": 1.548821210861206, "step": 5086 }, { "epoch": 1.5660203139427518, "grad_norm": 7.5625, "learning_rate": 1.7732850452453652e-06, "loss": 1.516291856765747, "step": 5088 }, { "epoch": 1.566635887965528, "grad_norm": 44.0, "learning_rate": 1.7724877216394336e-06, "loss": 1.5322810411453247, "step": 5090 }, { "epoch": 1.5672514619883042, "grad_norm": 17.25, "learning_rate": 1.7716904101857067e-06, "loss": 1.3388553857803345, "step": 5092 }, { "epoch": 1.5678670360110805, "grad_norm": 20.0, "learning_rate": 1.770893111236358e-06, "loss": 0.8509764075279236, "step": 5094 }, { "epoch": 1.5684826100338567, "grad_norm": 18.375, "learning_rate": 1.7700958251435551e-06, "loss": 1.3260862827301025, "step": 5096 }, { "epoch": 1.569098184056633, "grad_norm": 10.5, "learning_rate": 1.7692985522594612e-06, "loss": 1.5346083641052246, "step": 5098 }, { "epoch": 1.5697137580794092, "grad_norm": 11.75, "learning_rate": 1.7685012929362318e-06, "loss": 1.3697761297225952, "step": 5100 }, { "epoch": 1.5703293321021854, "grad_norm": 6.8125, "learning_rate": 1.7677040475260166e-06, "loss": 0.9767165184020996, "step": 5102 }, { "epoch": 1.5709449061249616, "grad_norm": 5.3125, "learning_rate": 1.766906816380961e-06, "loss": 1.2552387714385986, "step": 5104 }, { "epoch": 1.5715604801477379, "grad_norm": 10.5, "learning_rate": 1.766109599853203e-06, "loss": 1.2528355121612549, "step": 5106 }, { "epoch": 1.572176054170514, "grad_norm": 12.5625, "learning_rate": 1.7653123982948729e-06, "loss": 1.2786812782287598, "step": 5108 }, { "epoch": 1.5727916281932903, "grad_norm": 16.625, "learning_rate": 1.7645152120580964e-06, "loss": 1.2018368244171143, "step": 5110 }, { "epoch": 1.5734072022160666, "grad_norm": 27.375, "learning_rate": 1.7637180414949915e-06, "loss": 1.089292287826538, "step": 5112 }, { "epoch": 1.5740227762388428, "grad_norm": 8.875, "learning_rate": 1.7629208869576693e-06, "loss": 1.360584020614624, "step": 5114 }, { "epoch": 1.574638350261619, "grad_norm": 10.0, "learning_rate": 1.762123748798233e-06, "loss": 1.3072019815444946, "step": 5116 }, { "epoch": 1.5752539242843953, "grad_norm": 9.3125, "learning_rate": 1.761326627368781e-06, "loss": 1.2890172004699707, "step": 5118 }, { "epoch": 1.5758694983071715, "grad_norm": 8.0625, "learning_rate": 1.7605295230214015e-06, "loss": 1.119748592376709, "step": 5120 }, { "epoch": 1.5764850723299477, "grad_norm": 20.5, "learning_rate": 1.759732436108177e-06, "loss": 1.0745265483856201, "step": 5122 }, { "epoch": 1.577100646352724, "grad_norm": 13.3125, "learning_rate": 1.7589353669811816e-06, "loss": 1.3376274108886719, "step": 5124 }, { "epoch": 1.5777162203755002, "grad_norm": 13.125, "learning_rate": 1.7581383159924818e-06, "loss": 0.7650940418243408, "step": 5126 }, { "epoch": 1.5783317943982764, "grad_norm": 19.375, "learning_rate": 1.7573412834941355e-06, "loss": 1.7586708068847656, "step": 5128 }, { "epoch": 1.5789473684210527, "grad_norm": 16.625, "learning_rate": 1.756544269838193e-06, "loss": 1.4873216152191162, "step": 5130 }, { "epoch": 1.5795629424438289, "grad_norm": 15.375, "learning_rate": 1.7557472753766966e-06, "loss": 1.3263533115386963, "step": 5132 }, { "epoch": 1.5801785164666051, "grad_norm": 20.75, "learning_rate": 1.7549503004616792e-06, "loss": 1.0792770385742188, "step": 5134 }, { "epoch": 1.5807940904893814, "grad_norm": 13.3125, "learning_rate": 1.7541533454451655e-06, "loss": 1.2500200271606445, "step": 5136 }, { "epoch": 1.5814096645121576, "grad_norm": 14.125, "learning_rate": 1.753356410679172e-06, "loss": 1.622786521911621, "step": 5138 }, { "epoch": 1.5820252385349338, "grad_norm": 14.9375, "learning_rate": 1.752559496515705e-06, "loss": 1.4444410800933838, "step": 5140 }, { "epoch": 1.58264081255771, "grad_norm": 15.0, "learning_rate": 1.7517626033067628e-06, "loss": 1.2542698383331299, "step": 5142 }, { "epoch": 1.5832563865804863, "grad_norm": 14.5625, "learning_rate": 1.7509657314043339e-06, "loss": 1.3525272607803345, "step": 5144 }, { "epoch": 1.5838719606032625, "grad_norm": 14.0, "learning_rate": 1.7501688811603972e-06, "loss": 1.3406755924224854, "step": 5146 }, { "epoch": 1.5844875346260388, "grad_norm": 16.5, "learning_rate": 1.7493720529269227e-06, "loss": 1.49215829372406, "step": 5148 }, { "epoch": 1.585103108648815, "grad_norm": 18.375, "learning_rate": 1.748575247055871e-06, "loss": 1.2907540798187256, "step": 5150 }, { "epoch": 1.5857186826715912, "grad_norm": 21.25, "learning_rate": 1.7477784638991915e-06, "loss": 1.827765941619873, "step": 5152 }, { "epoch": 1.5863342566943675, "grad_norm": 27.625, "learning_rate": 1.7469817038088241e-06, "loss": 1.5776755809783936, "step": 5154 }, { "epoch": 1.5869498307171437, "grad_norm": 8.25, "learning_rate": 1.7461849671366987e-06, "loss": 1.2929551601409912, "step": 5156 }, { "epoch": 1.58756540473992, "grad_norm": 19.625, "learning_rate": 1.7453882542347349e-06, "loss": 1.1364490985870361, "step": 5158 }, { "epoch": 1.5881809787626961, "grad_norm": 16.125, "learning_rate": 1.744591565454842e-06, "loss": 1.199864387512207, "step": 5160 }, { "epoch": 1.5887965527854724, "grad_norm": 12.8125, "learning_rate": 1.7437949011489179e-06, "loss": 1.3706705570220947, "step": 5162 }, { "epoch": 1.5894121268082486, "grad_norm": 12.375, "learning_rate": 1.7429982616688503e-06, "loss": 1.5815324783325195, "step": 5164 }, { "epoch": 1.5900277008310248, "grad_norm": 21.75, "learning_rate": 1.7422016473665151e-06, "loss": 1.1833311319351196, "step": 5166 }, { "epoch": 1.590643274853801, "grad_norm": 16.625, "learning_rate": 1.74140505859378e-06, "loss": 1.182098627090454, "step": 5168 }, { "epoch": 1.5912588488765773, "grad_norm": 12.125, "learning_rate": 1.7406084957024964e-06, "loss": 1.1151795387268066, "step": 5170 }, { "epoch": 1.5918744228993535, "grad_norm": 15.9375, "learning_rate": 1.7398119590445083e-06, "loss": 1.2372705936431885, "step": 5172 }, { "epoch": 1.5924899969221298, "grad_norm": 16.5, "learning_rate": 1.739015448971647e-06, "loss": 1.3048455715179443, "step": 5174 }, { "epoch": 1.593105570944906, "grad_norm": 10.375, "learning_rate": 1.7382189658357316e-06, "loss": 1.2205870151519775, "step": 5176 }, { "epoch": 1.5937211449676822, "grad_norm": 12.4375, "learning_rate": 1.73742250998857e-06, "loss": 1.3982374668121338, "step": 5178 }, { "epoch": 1.5943367189904585, "grad_norm": 22.75, "learning_rate": 1.7366260817819574e-06, "loss": 1.1613901853561401, "step": 5180 }, { "epoch": 1.5949522930132347, "grad_norm": 11.1875, "learning_rate": 1.7358296815676768e-06, "loss": 1.548307180404663, "step": 5182 }, { "epoch": 1.595567867036011, "grad_norm": 21.0, "learning_rate": 1.7350333096974992e-06, "loss": 1.2549443244934082, "step": 5184 }, { "epoch": 1.5961834410587872, "grad_norm": 11.75, "learning_rate": 1.7342369665231833e-06, "loss": 1.5804221630096436, "step": 5186 }, { "epoch": 1.5967990150815634, "grad_norm": 17.125, "learning_rate": 1.7334406523964748e-06, "loss": 1.4917235374450684, "step": 5188 }, { "epoch": 1.5974145891043396, "grad_norm": 11.3125, "learning_rate": 1.732644367669105e-06, "loss": 1.3621652126312256, "step": 5190 }, { "epoch": 1.5980301631271159, "grad_norm": 10.0625, "learning_rate": 1.7318481126927962e-06, "loss": 1.3136223554611206, "step": 5192 }, { "epoch": 1.598645737149892, "grad_norm": 9.875, "learning_rate": 1.7310518878192546e-06, "loss": 1.3015594482421875, "step": 5194 }, { "epoch": 1.5992613111726686, "grad_norm": 6.28125, "learning_rate": 1.730255693400172e-06, "loss": 1.1437548398971558, "step": 5196 }, { "epoch": 1.5998768851954448, "grad_norm": 20.375, "learning_rate": 1.7294595297872298e-06, "loss": 1.1758710145950317, "step": 5198 }, { "epoch": 1.600492459218221, "grad_norm": 12.875, "learning_rate": 1.7286633973320943e-06, "loss": 1.402280330657959, "step": 5200 }, { "epoch": 1.6011080332409973, "grad_norm": 21.875, "learning_rate": 1.7278672963864177e-06, "loss": 1.2749931812286377, "step": 5202 }, { "epoch": 1.6017236072637735, "grad_norm": 12.9375, "learning_rate": 1.727071227301839e-06, "loss": 1.0368297100067139, "step": 5204 }, { "epoch": 1.6023391812865497, "grad_norm": 12.75, "learning_rate": 1.7262751904299828e-06, "loss": 1.2086350917816162, "step": 5206 }, { "epoch": 1.602954755309326, "grad_norm": 17.0, "learning_rate": 1.7254791861224584e-06, "loss": 1.3812611103057861, "step": 5208 }, { "epoch": 1.6035703293321022, "grad_norm": 15.0625, "learning_rate": 1.724683214730864e-06, "loss": 1.6722028255462646, "step": 5210 }, { "epoch": 1.6041859033548784, "grad_norm": 36.25, "learning_rate": 1.7238872766067794e-06, "loss": 1.2400789260864258, "step": 5212 }, { "epoch": 1.6048014773776547, "grad_norm": 18.0, "learning_rate": 1.7230913721017715e-06, "loss": 1.4790221452713013, "step": 5214 }, { "epoch": 1.605417051400431, "grad_norm": 27.375, "learning_rate": 1.7222955015673927e-06, "loss": 0.6969753503799438, "step": 5216 }, { "epoch": 1.6060326254232071, "grad_norm": 11.375, "learning_rate": 1.7214996653551805e-06, "loss": 1.1425102949142456, "step": 5218 }, { "epoch": 1.6066481994459834, "grad_norm": 15.75, "learning_rate": 1.7207038638166554e-06, "loss": 1.4025464057922363, "step": 5220 }, { "epoch": 1.6072637734687596, "grad_norm": 25.625, "learning_rate": 1.7199080973033243e-06, "loss": 1.1340413093566895, "step": 5222 }, { "epoch": 1.6078793474915358, "grad_norm": 7.9375, "learning_rate": 1.7191123661666785e-06, "loss": 1.1119030714035034, "step": 5224 }, { "epoch": 1.608494921514312, "grad_norm": 29.5, "learning_rate": 1.7183166707581932e-06, "loss": 1.1708309650421143, "step": 5226 }, { "epoch": 1.6091104955370883, "grad_norm": 11.4375, "learning_rate": 1.717521011429328e-06, "loss": 0.9883697628974915, "step": 5228 }, { "epoch": 1.6097260695598645, "grad_norm": 20.375, "learning_rate": 1.7167253885315265e-06, "loss": 1.5919955968856812, "step": 5230 }, { "epoch": 1.6103416435826408, "grad_norm": 29.375, "learning_rate": 1.7159298024162164e-06, "loss": 1.1920177936553955, "step": 5232 }, { "epoch": 1.6109572176054172, "grad_norm": 23.125, "learning_rate": 1.7151342534348078e-06, "loss": 2.032461166381836, "step": 5234 }, { "epoch": 1.6115727916281934, "grad_norm": 6.8125, "learning_rate": 1.7143387419386974e-06, "loss": 1.5207996368408203, "step": 5236 }, { "epoch": 1.6121883656509697, "grad_norm": 11.6875, "learning_rate": 1.7135432682792634e-06, "loss": 1.305809736251831, "step": 5238 }, { "epoch": 1.612803939673746, "grad_norm": 5.9375, "learning_rate": 1.712747832807865e-06, "loss": 1.299574375152588, "step": 5240 }, { "epoch": 1.6134195136965221, "grad_norm": 12.6875, "learning_rate": 1.71195243587585e-06, "loss": 1.2237389087677002, "step": 5242 }, { "epoch": 1.6140350877192984, "grad_norm": 12.9375, "learning_rate": 1.711157077834545e-06, "loss": 1.2686474323272705, "step": 5244 }, { "epoch": 1.6146506617420746, "grad_norm": 15.5, "learning_rate": 1.7103617590352597e-06, "loss": 1.2688703536987305, "step": 5246 }, { "epoch": 1.6152662357648508, "grad_norm": 7.59375, "learning_rate": 1.7095664798292884e-06, "loss": 1.488532304763794, "step": 5248 }, { "epoch": 1.615881809787627, "grad_norm": 15.8125, "learning_rate": 1.7087712405679065e-06, "loss": 1.678232192993164, "step": 5250 }, { "epoch": 1.6164973838104033, "grad_norm": 15.4375, "learning_rate": 1.707976041602372e-06, "loss": 1.0275745391845703, "step": 5252 }, { "epoch": 1.6171129578331795, "grad_norm": 22.375, "learning_rate": 1.7071808832839258e-06, "loss": 1.2949142456054688, "step": 5254 }, { "epoch": 1.6177285318559558, "grad_norm": 30.75, "learning_rate": 1.7063857659637888e-06, "loss": 1.493941068649292, "step": 5256 }, { "epoch": 1.618344105878732, "grad_norm": 17.125, "learning_rate": 1.7055906899931665e-06, "loss": 0.8534493446350098, "step": 5258 }, { "epoch": 1.6189596799015082, "grad_norm": 17.5, "learning_rate": 1.7047956557232446e-06, "loss": 1.3491942882537842, "step": 5260 }, { "epoch": 1.6195752539242845, "grad_norm": 45.75, "learning_rate": 1.7040006635051904e-06, "loss": 1.4251902103424072, "step": 5262 }, { "epoch": 1.6201908279470607, "grad_norm": 24.25, "learning_rate": 1.7032057136901533e-06, "loss": 1.4901896715164185, "step": 5264 }, { "epoch": 1.620806401969837, "grad_norm": 8.25, "learning_rate": 1.7024108066292631e-06, "loss": 1.2126847505569458, "step": 5266 }, { "epoch": 1.6214219759926132, "grad_norm": 36.0, "learning_rate": 1.7016159426736315e-06, "loss": 1.3032283782958984, "step": 5268 }, { "epoch": 1.6220375500153894, "grad_norm": 17.875, "learning_rate": 1.7008211221743501e-06, "loss": 1.6620005369186401, "step": 5270 }, { "epoch": 1.6226531240381656, "grad_norm": 13.8125, "learning_rate": 1.7000263454824928e-06, "loss": 1.160904049873352, "step": 5272 }, { "epoch": 1.6232686980609419, "grad_norm": 15.625, "learning_rate": 1.6992316129491138e-06, "loss": 1.1405680179595947, "step": 5274 }, { "epoch": 1.623884272083718, "grad_norm": 27.375, "learning_rate": 1.6984369249252463e-06, "loss": 1.6693198680877686, "step": 5276 }, { "epoch": 1.6244998461064943, "grad_norm": 27.125, "learning_rate": 1.6976422817619049e-06, "loss": 1.5274195671081543, "step": 5278 }, { "epoch": 1.6251154201292706, "grad_norm": 15.5625, "learning_rate": 1.6968476838100854e-06, "loss": 1.1784543991088867, "step": 5280 }, { "epoch": 1.6257309941520468, "grad_norm": 25.875, "learning_rate": 1.6960531314207618e-06, "loss": 1.3474241495132446, "step": 5282 }, { "epoch": 1.626346568174823, "grad_norm": 10.6875, "learning_rate": 1.6952586249448885e-06, "loss": 1.2467565536499023, "step": 5284 }, { "epoch": 1.6269621421975993, "grad_norm": 9.1875, "learning_rate": 1.6944641647334012e-06, "loss": 1.2050135135650635, "step": 5286 }, { "epoch": 1.6275777162203755, "grad_norm": 23.75, "learning_rate": 1.6936697511372128e-06, "loss": 1.5005052089691162, "step": 5288 }, { "epoch": 1.6281932902431517, "grad_norm": 14.1875, "learning_rate": 1.6928753845072173e-06, "loss": 0.44793587923049927, "step": 5290 }, { "epoch": 1.628808864265928, "grad_norm": 8.3125, "learning_rate": 1.6920810651942868e-06, "loss": 1.2666034698486328, "step": 5292 }, { "epoch": 1.6294244382887042, "grad_norm": 10.5625, "learning_rate": 1.691286793549274e-06, "loss": 1.2090697288513184, "step": 5294 }, { "epoch": 1.6300400123114804, "grad_norm": 17.875, "learning_rate": 1.690492569923008e-06, "loss": 1.7023463249206543, "step": 5296 }, { "epoch": 1.6306555863342567, "grad_norm": 63.25, "learning_rate": 1.6896983946662998e-06, "loss": 1.1873985528945923, "step": 5298 }, { "epoch": 1.631271160357033, "grad_norm": 28.75, "learning_rate": 1.6889042681299366e-06, "loss": 1.5106173753738403, "step": 5300 }, { "epoch": 1.6318867343798091, "grad_norm": 45.5, "learning_rate": 1.6881101906646855e-06, "loss": 1.2140668630599976, "step": 5302 }, { "epoch": 1.6325023084025854, "grad_norm": 6.28125, "learning_rate": 1.6873161626212914e-06, "loss": 1.0440281629562378, "step": 5304 }, { "epoch": 1.6331178824253616, "grad_norm": 5.84375, "learning_rate": 1.6865221843504775e-06, "loss": 1.156790018081665, "step": 5306 }, { "epoch": 1.6337334564481378, "grad_norm": 28.375, "learning_rate": 1.685728256202944e-06, "loss": 1.4441311359405518, "step": 5308 }, { "epoch": 1.634349030470914, "grad_norm": 20.875, "learning_rate": 1.684934378529371e-06, "loss": 1.5814571380615234, "step": 5310 }, { "epoch": 1.6349646044936903, "grad_norm": 24.875, "learning_rate": 1.6841405516804147e-06, "loss": 1.757308006286621, "step": 5312 }, { "epoch": 1.6355801785164665, "grad_norm": 6.03125, "learning_rate": 1.6833467760067092e-06, "loss": 1.1893250942230225, "step": 5314 }, { "epoch": 1.6361957525392428, "grad_norm": 11.3125, "learning_rate": 1.6825530518588665e-06, "loss": 1.164353370666504, "step": 5316 }, { "epoch": 1.636811326562019, "grad_norm": 15.875, "learning_rate": 1.681759379587475e-06, "loss": 1.082535743713379, "step": 5318 }, { "epoch": 1.6374269005847952, "grad_norm": 13.4375, "learning_rate": 1.6809657595431009e-06, "loss": 1.3592793941497803, "step": 5320 }, { "epoch": 1.6380424746075715, "grad_norm": 19.125, "learning_rate": 1.6801721920762871e-06, "loss": 1.5188610553741455, "step": 5322 }, { "epoch": 1.6386580486303477, "grad_norm": 49.25, "learning_rate": 1.6793786775375532e-06, "loss": 1.9063937664031982, "step": 5324 }, { "epoch": 1.639273622653124, "grad_norm": 13.5625, "learning_rate": 1.6785852162773955e-06, "loss": 1.3931628465652466, "step": 5326 }, { "epoch": 1.6398891966759002, "grad_norm": 9.125, "learning_rate": 1.6777918086462872e-06, "loss": 1.5546140670776367, "step": 5328 }, { "epoch": 1.6405047706986764, "grad_norm": 7.0, "learning_rate": 1.6769984549946762e-06, "loss": 1.2095413208007812, "step": 5330 }, { "epoch": 1.6411203447214526, "grad_norm": 25.5, "learning_rate": 1.6762051556729892e-06, "loss": 1.5617570877075195, "step": 5332 }, { "epoch": 1.6417359187442289, "grad_norm": 12.25, "learning_rate": 1.675411911031626e-06, "loss": 1.3387706279754639, "step": 5334 }, { "epoch": 1.642351492767005, "grad_norm": 20.5, "learning_rate": 1.6746187214209649e-06, "loss": 1.4189784526824951, "step": 5336 }, { "epoch": 1.6429670667897813, "grad_norm": 8.6875, "learning_rate": 1.6738255871913576e-06, "loss": 1.34170663356781, "step": 5338 }, { "epoch": 1.6435826408125576, "grad_norm": 13.1875, "learning_rate": 1.673032508693133e-06, "loss": 1.3447788953781128, "step": 5340 }, { "epoch": 1.6441982148353338, "grad_norm": 13.5625, "learning_rate": 1.672239486276595e-06, "loss": 1.5071009397506714, "step": 5342 }, { "epoch": 1.64481378885811, "grad_norm": 21.375, "learning_rate": 1.6714465202920218e-06, "loss": 1.550859808921814, "step": 5344 }, { "epoch": 1.6454293628808865, "grad_norm": 13.6875, "learning_rate": 1.6706536110896672e-06, "loss": 1.128093957901001, "step": 5346 }, { "epoch": 1.6460449369036627, "grad_norm": 17.5, "learning_rate": 1.6698607590197614e-06, "loss": 1.2202242612838745, "step": 5348 }, { "epoch": 1.646660510926439, "grad_norm": 13.5625, "learning_rate": 1.6690679644325074e-06, "loss": 1.5327153205871582, "step": 5350 }, { "epoch": 1.6472760849492152, "grad_norm": 21.0, "learning_rate": 1.6682752276780827e-06, "loss": 1.5310816764831543, "step": 5352 }, { "epoch": 1.6478916589719914, "grad_norm": 38.5, "learning_rate": 1.6674825491066413e-06, "loss": 1.4935948848724365, "step": 5354 }, { "epoch": 1.6485072329947676, "grad_norm": 19.875, "learning_rate": 1.6666899290683097e-06, "loss": 1.5328843593597412, "step": 5356 }, { "epoch": 1.6491228070175439, "grad_norm": 21.0, "learning_rate": 1.6658973679131886e-06, "loss": 1.686381220817566, "step": 5358 }, { "epoch": 1.6497383810403201, "grad_norm": 15.0, "learning_rate": 1.6651048659913537e-06, "loss": 1.4446570873260498, "step": 5360 }, { "epoch": 1.6503539550630963, "grad_norm": 26.25, "learning_rate": 1.6643124236528544e-06, "loss": 1.2061572074890137, "step": 5362 }, { "epoch": 1.6509695290858726, "grad_norm": 14.5, "learning_rate": 1.6635200412477125e-06, "loss": 1.6465775966644287, "step": 5364 }, { "epoch": 1.6515851031086488, "grad_norm": 23.75, "learning_rate": 1.6627277191259246e-06, "loss": 1.7101616859436035, "step": 5366 }, { "epoch": 1.652200677131425, "grad_norm": 10.375, "learning_rate": 1.6619354576374604e-06, "loss": 1.0784965753555298, "step": 5368 }, { "epoch": 1.6528162511542013, "grad_norm": 34.0, "learning_rate": 1.6611432571322625e-06, "loss": 1.399306297302246, "step": 5370 }, { "epoch": 1.6534318251769775, "grad_norm": 12.8125, "learning_rate": 1.660351117960247e-06, "loss": 1.044040322303772, "step": 5372 }, { "epoch": 1.6540473991997537, "grad_norm": 13.3125, "learning_rate": 1.659559040471303e-06, "loss": 1.3308005332946777, "step": 5374 }, { "epoch": 1.65466297322253, "grad_norm": 40.0, "learning_rate": 1.6587670250152905e-06, "loss": 1.4872740507125854, "step": 5376 }, { "epoch": 1.6552785472453062, "grad_norm": 29.875, "learning_rate": 1.6579750719420454e-06, "loss": 1.3254127502441406, "step": 5378 }, { "epoch": 1.6558941212680824, "grad_norm": 22.375, "learning_rate": 1.6571831816013736e-06, "loss": 1.5545458793640137, "step": 5380 }, { "epoch": 1.6565096952908587, "grad_norm": 12.75, "learning_rate": 1.6563913543430538e-06, "loss": 1.4617619514465332, "step": 5382 }, { "epoch": 1.6571252693136351, "grad_norm": 50.25, "learning_rate": 1.6555995905168376e-06, "loss": 1.5333282947540283, "step": 5384 }, { "epoch": 1.6577408433364114, "grad_norm": 10.9375, "learning_rate": 1.654807890472447e-06, "loss": 1.2353944778442383, "step": 5386 }, { "epoch": 1.6583564173591876, "grad_norm": 11.375, "learning_rate": 1.654016254559578e-06, "loss": 1.0554957389831543, "step": 5388 }, { "epoch": 1.6589719913819638, "grad_norm": 10.8125, "learning_rate": 1.653224683127896e-06, "loss": 1.4639546871185303, "step": 5390 }, { "epoch": 1.65958756540474, "grad_norm": 11.6875, "learning_rate": 1.6524331765270395e-06, "loss": 1.2144356966018677, "step": 5392 }, { "epoch": 1.6602031394275163, "grad_norm": 21.125, "learning_rate": 1.6516417351066182e-06, "loss": 1.5362677574157715, "step": 5394 }, { "epoch": 1.6608187134502925, "grad_norm": 37.5, "learning_rate": 1.650850359216212e-06, "loss": 1.618035078048706, "step": 5396 }, { "epoch": 1.6614342874730688, "grad_norm": 16.5, "learning_rate": 1.650059049205373e-06, "loss": 1.1688024997711182, "step": 5398 }, { "epoch": 1.662049861495845, "grad_norm": 13.625, "learning_rate": 1.6492678054236234e-06, "loss": 1.240717887878418, "step": 5400 }, { "epoch": 1.6626654355186212, "grad_norm": 5.0625, "learning_rate": 1.6484766282204567e-06, "loss": 1.212373971939087, "step": 5402 }, { "epoch": 1.6632810095413975, "grad_norm": 23.125, "learning_rate": 1.647685517945337e-06, "loss": 1.596376895904541, "step": 5404 }, { "epoch": 1.6638965835641737, "grad_norm": 10.5625, "learning_rate": 1.6468944749476985e-06, "loss": 1.4453678131103516, "step": 5406 }, { "epoch": 1.66451215758695, "grad_norm": 13.375, "learning_rate": 1.6461034995769456e-06, "loss": 1.5522806644439697, "step": 5408 }, { "epoch": 1.6651277316097262, "grad_norm": 17.875, "learning_rate": 1.6453125921824527e-06, "loss": 1.395452618598938, "step": 5410 }, { "epoch": 1.6657433056325024, "grad_norm": 11.375, "learning_rate": 1.6445217531135652e-06, "loss": 1.5645806789398193, "step": 5412 }, { "epoch": 1.6663588796552786, "grad_norm": 10.0, "learning_rate": 1.6437309827195975e-06, "loss": 1.3590474128723145, "step": 5414 }, { "epoch": 1.6669744536780549, "grad_norm": 11.8125, "learning_rate": 1.6429402813498334e-06, "loss": 1.3016057014465332, "step": 5416 }, { "epoch": 1.667590027700831, "grad_norm": 11.6875, "learning_rate": 1.6421496493535271e-06, "loss": 1.5097131729125977, "step": 5418 }, { "epoch": 1.6682056017236073, "grad_norm": 17.5, "learning_rate": 1.6413590870799006e-06, "loss": 1.579138994216919, "step": 5420 }, { "epoch": 1.6688211757463836, "grad_norm": 22.625, "learning_rate": 1.6405685948781474e-06, "loss": 1.4919606447219849, "step": 5422 }, { "epoch": 1.6694367497691598, "grad_norm": 33.5, "learning_rate": 1.639778173097429e-06, "loss": 1.4293932914733887, "step": 5424 }, { "epoch": 1.670052323791936, "grad_norm": 22.375, "learning_rate": 1.6389878220868742e-06, "loss": 0.9004104137420654, "step": 5426 }, { "epoch": 1.6706678978147123, "grad_norm": 29.625, "learning_rate": 1.638197542195583e-06, "loss": 1.6715246438980103, "step": 5428 }, { "epoch": 1.6712834718374885, "grad_norm": 8.5, "learning_rate": 1.6374073337726228e-06, "loss": 1.551560878753662, "step": 5430 }, { "epoch": 1.6718990458602647, "grad_norm": 54.75, "learning_rate": 1.6366171971670287e-06, "loss": 1.8659555912017822, "step": 5432 }, { "epoch": 1.672514619883041, "grad_norm": 6.6875, "learning_rate": 1.6358271327278063e-06, "loss": 1.1660687923431396, "step": 5434 }, { "epoch": 1.6731301939058172, "grad_norm": 15.5625, "learning_rate": 1.6350371408039269e-06, "loss": 1.4193882942199707, "step": 5436 }, { "epoch": 1.6737457679285934, "grad_norm": 12.375, "learning_rate": 1.6342472217443313e-06, "loss": 1.1343913078308105, "step": 5438 }, { "epoch": 1.6743613419513697, "grad_norm": 17.0, "learning_rate": 1.6334573758979277e-06, "loss": 1.5491418838500977, "step": 5440 }, { "epoch": 1.6749769159741459, "grad_norm": 16.375, "learning_rate": 1.6326676036135919e-06, "loss": 1.5517988204956055, "step": 5442 }, { "epoch": 1.6755924899969221, "grad_norm": 22.0, "learning_rate": 1.6318779052401675e-06, "loss": 1.1505072116851807, "step": 5444 }, { "epoch": 1.6762080640196984, "grad_norm": 18.0, "learning_rate": 1.631088281126464e-06, "loss": 0.7888588905334473, "step": 5446 }, { "epoch": 1.6768236380424746, "grad_norm": 28.875, "learning_rate": 1.6302987316212604e-06, "loss": 0.8521158695220947, "step": 5448 }, { "epoch": 1.6774392120652508, "grad_norm": 26.875, "learning_rate": 1.6295092570733016e-06, "loss": 1.5754749774932861, "step": 5450 }, { "epoch": 1.678054786088027, "grad_norm": 6.59375, "learning_rate": 1.6287198578312986e-06, "loss": 1.3611557483673096, "step": 5452 }, { "epoch": 1.6786703601108033, "grad_norm": 34.0, "learning_rate": 1.6279305342439308e-06, "loss": 0.6813716888427734, "step": 5454 }, { "epoch": 1.6792859341335795, "grad_norm": 30.125, "learning_rate": 1.6271412866598432e-06, "loss": 1.5481641292572021, "step": 5456 }, { "epoch": 1.6799015081563557, "grad_norm": 12.5625, "learning_rate": 1.6263521154276469e-06, "loss": 1.5015933513641357, "step": 5458 }, { "epoch": 1.680517082179132, "grad_norm": 17.875, "learning_rate": 1.62556302089592e-06, "loss": 1.2945340871810913, "step": 5460 }, { "epoch": 1.6811326562019082, "grad_norm": 21.375, "learning_rate": 1.6247740034132063e-06, "loss": 1.0310568809509277, "step": 5462 }, { "epoch": 1.6817482302246844, "grad_norm": 9.625, "learning_rate": 1.6239850633280158e-06, "loss": 1.0926060676574707, "step": 5464 }, { "epoch": 1.6823638042474607, "grad_norm": 5.9375, "learning_rate": 1.6231962009888247e-06, "loss": 1.2937240600585938, "step": 5466 }, { "epoch": 1.682979378270237, "grad_norm": 21.75, "learning_rate": 1.6224074167440741e-06, "loss": 1.2178066968917847, "step": 5468 }, { "epoch": 1.6835949522930131, "grad_norm": 5.65625, "learning_rate": 1.6216187109421704e-06, "loss": 1.1452090740203857, "step": 5470 }, { "epoch": 1.6842105263157894, "grad_norm": 11.4375, "learning_rate": 1.6208300839314868e-06, "loss": 1.3337724208831787, "step": 5472 }, { "epoch": 1.6848261003385656, "grad_norm": 5.90625, "learning_rate": 1.6200415360603596e-06, "loss": 1.1718332767486572, "step": 5474 }, { "epoch": 1.6854416743613418, "grad_norm": 37.75, "learning_rate": 1.6192530676770923e-06, "loss": 1.3318912982940674, "step": 5476 }, { "epoch": 1.686057248384118, "grad_norm": 19.875, "learning_rate": 1.6184646791299515e-06, "loss": 1.6451802253723145, "step": 5478 }, { "epoch": 1.6866728224068943, "grad_norm": 18.5, "learning_rate": 1.6176763707671707e-06, "loss": 1.4989774227142334, "step": 5480 }, { "epoch": 1.6872883964296705, "grad_norm": 27.125, "learning_rate": 1.6168881429369443e-06, "loss": 1.7403260469436646, "step": 5482 }, { "epoch": 1.6879039704524468, "grad_norm": 18.375, "learning_rate": 1.6160999959874356e-06, "loss": 1.7129693031311035, "step": 5484 }, { "epoch": 1.688519544475223, "grad_norm": 7.1875, "learning_rate": 1.6153119302667695e-06, "loss": 1.4952833652496338, "step": 5486 }, { "epoch": 1.6891351184979992, "grad_norm": 23.75, "learning_rate": 1.6145239461230345e-06, "loss": 1.4523990154266357, "step": 5488 }, { "epoch": 1.6897506925207755, "grad_norm": 11.3125, "learning_rate": 1.6137360439042855e-06, "loss": 1.563145399093628, "step": 5490 }, { "epoch": 1.6903662665435517, "grad_norm": 7.375, "learning_rate": 1.6129482239585387e-06, "loss": 1.189328908920288, "step": 5492 }, { "epoch": 1.690981840566328, "grad_norm": 34.5, "learning_rate": 1.6121604866337753e-06, "loss": 1.478722095489502, "step": 5494 }, { "epoch": 1.6915974145891042, "grad_norm": 19.125, "learning_rate": 1.6113728322779404e-06, "loss": 1.5562196969985962, "step": 5496 }, { "epoch": 1.6922129886118806, "grad_norm": 27.625, "learning_rate": 1.6105852612389413e-06, "loss": 1.449194073677063, "step": 5498 }, { "epoch": 1.6928285626346569, "grad_norm": 19.25, "learning_rate": 1.6097977738646492e-06, "loss": 1.2681084871292114, "step": 5500 }, { "epoch": 1.693444136657433, "grad_norm": 11.125, "learning_rate": 1.6090103705028978e-06, "loss": 1.376092791557312, "step": 5502 }, { "epoch": 1.6940597106802093, "grad_norm": 18.25, "learning_rate": 1.6082230515014844e-06, "loss": 0.9993728995323181, "step": 5504 }, { "epoch": 1.6946752847029856, "grad_norm": 6.5625, "learning_rate": 1.6074358172081692e-06, "loss": 1.1781853437423706, "step": 5506 }, { "epoch": 1.6952908587257618, "grad_norm": 17.25, "learning_rate": 1.6066486679706731e-06, "loss": 1.278997778892517, "step": 5508 }, { "epoch": 1.695906432748538, "grad_norm": 6.5, "learning_rate": 1.6058616041366823e-06, "loss": 1.3280869722366333, "step": 5510 }, { "epoch": 1.6965220067713143, "grad_norm": 9.875, "learning_rate": 1.6050746260538435e-06, "loss": 1.5951027870178223, "step": 5512 }, { "epoch": 1.6971375807940905, "grad_norm": 14.4375, "learning_rate": 1.604287734069765e-06, "loss": 1.4890497922897339, "step": 5514 }, { "epoch": 1.6977531548168667, "grad_norm": 29.875, "learning_rate": 1.6035009285320186e-06, "loss": 1.2161567211151123, "step": 5516 }, { "epoch": 1.698368728839643, "grad_norm": 17.375, "learning_rate": 1.602714209788137e-06, "loss": 1.6530604362487793, "step": 5518 }, { "epoch": 1.6989843028624192, "grad_norm": 9.5, "learning_rate": 1.6019275781856147e-06, "loss": 1.4269185066223145, "step": 5520 }, { "epoch": 1.6995998768851954, "grad_norm": 11.0, "learning_rate": 1.601141034071908e-06, "loss": 1.2811148166656494, "step": 5522 }, { "epoch": 1.7002154509079717, "grad_norm": 15.0, "learning_rate": 1.600354577794434e-06, "loss": 1.6427247524261475, "step": 5524 }, { "epoch": 1.700831024930748, "grad_norm": 10.5625, "learning_rate": 1.5995682097005709e-06, "loss": 1.0762931108474731, "step": 5526 }, { "epoch": 1.7014465989535241, "grad_norm": 13.375, "learning_rate": 1.5987819301376602e-06, "loss": 1.3468595743179321, "step": 5528 }, { "epoch": 1.7020621729763004, "grad_norm": 7.09375, "learning_rate": 1.5979957394530007e-06, "loss": 1.1727588176727295, "step": 5530 }, { "epoch": 1.7026777469990766, "grad_norm": 25.0, "learning_rate": 1.5972096379938543e-06, "loss": 1.1424286365509033, "step": 5532 }, { "epoch": 1.7032933210218528, "grad_norm": 12.0, "learning_rate": 1.5964236261074432e-06, "loss": 1.0981193780899048, "step": 5534 }, { "epoch": 1.7039088950446293, "grad_norm": 8.6875, "learning_rate": 1.5956377041409495e-06, "loss": 1.2754977941513062, "step": 5536 }, { "epoch": 1.7045244690674055, "grad_norm": 10.5625, "learning_rate": 1.5948518724415154e-06, "loss": 1.2789602279663086, "step": 5538 }, { "epoch": 1.7051400430901817, "grad_norm": 10.75, "learning_rate": 1.5940661313562445e-06, "loss": 1.104224443435669, "step": 5540 }, { "epoch": 1.705755617112958, "grad_norm": 9.3125, "learning_rate": 1.5932804812321991e-06, "loss": 1.0468237400054932, "step": 5542 }, { "epoch": 1.7063711911357342, "grad_norm": 13.25, "learning_rate": 1.5924949224164016e-06, "loss": 1.2828927040100098, "step": 5544 }, { "epoch": 1.7069867651585104, "grad_norm": 12.375, "learning_rate": 1.5917094552558343e-06, "loss": 1.4367725849151611, "step": 5546 }, { "epoch": 1.7076023391812867, "grad_norm": 11.0625, "learning_rate": 1.5909240800974395e-06, "loss": 1.252647876739502, "step": 5548 }, { "epoch": 1.708217913204063, "grad_norm": 8.875, "learning_rate": 1.5901387972881177e-06, "loss": 1.0920422077178955, "step": 5550 }, { "epoch": 1.7088334872268391, "grad_norm": 11.75, "learning_rate": 1.5893536071747289e-06, "loss": 1.3846423625946045, "step": 5552 }, { "epoch": 1.7094490612496154, "grad_norm": 19.0, "learning_rate": 1.5885685101040934e-06, "loss": 1.7853939533233643, "step": 5554 }, { "epoch": 1.7100646352723916, "grad_norm": 13.0, "learning_rate": 1.587783506422989e-06, "loss": 1.0750497579574585, "step": 5556 }, { "epoch": 1.7106802092951678, "grad_norm": 10.0625, "learning_rate": 1.5869985964781524e-06, "loss": 1.1922788619995117, "step": 5558 }, { "epoch": 1.711295783317944, "grad_norm": 23.125, "learning_rate": 1.58621378061628e-06, "loss": 1.2307591438293457, "step": 5560 }, { "epoch": 1.7119113573407203, "grad_norm": 19.75, "learning_rate": 1.585429059184025e-06, "loss": 1.4407920837402344, "step": 5562 }, { "epoch": 1.7125269313634965, "grad_norm": 55.25, "learning_rate": 1.5846444325280004e-06, "loss": 1.307939887046814, "step": 5564 }, { "epoch": 1.7131425053862728, "grad_norm": 28.125, "learning_rate": 1.5838599009947765e-06, "loss": 1.2787885665893555, "step": 5566 }, { "epoch": 1.713758079409049, "grad_norm": 17.5, "learning_rate": 1.5830754649308819e-06, "loss": 1.3981409072875977, "step": 5568 }, { "epoch": 1.7143736534318252, "grad_norm": 25.625, "learning_rate": 1.5822911246828024e-06, "loss": 0.7358442544937134, "step": 5570 }, { "epoch": 1.7149892274546015, "grad_norm": 11.0, "learning_rate": 1.5815068805969829e-06, "loss": 1.253906011581421, "step": 5572 }, { "epoch": 1.7156048014773777, "grad_norm": 16.5, "learning_rate": 1.5807227330198241e-06, "loss": 1.49554443359375, "step": 5574 }, { "epoch": 1.716220375500154, "grad_norm": 21.25, "learning_rate": 1.5799386822976849e-06, "loss": 1.349895715713501, "step": 5576 }, { "epoch": 1.7168359495229302, "grad_norm": 29.0, "learning_rate": 1.5791547287768816e-06, "loss": 1.6913847923278809, "step": 5578 }, { "epoch": 1.7174515235457064, "grad_norm": 73.0, "learning_rate": 1.578370872803688e-06, "loss": 1.261209487915039, "step": 5580 }, { "epoch": 1.7180670975684826, "grad_norm": 24.625, "learning_rate": 1.577587114724333e-06, "loss": 1.4275460243225098, "step": 5582 }, { "epoch": 1.7186826715912589, "grad_norm": 17.125, "learning_rate": 1.5768034548850043e-06, "loss": 1.113776683807373, "step": 5584 }, { "epoch": 1.719298245614035, "grad_norm": 10.625, "learning_rate": 1.5760198936318447e-06, "loss": 1.2948540449142456, "step": 5586 }, { "epoch": 1.7199138196368113, "grad_norm": 10.25, "learning_rate": 1.575236431310955e-06, "loss": 1.360956072807312, "step": 5588 }, { "epoch": 1.7205293936595876, "grad_norm": 7.84375, "learning_rate": 1.5744530682683908e-06, "loss": 0.9487060308456421, "step": 5590 }, { "epoch": 1.7211449676823638, "grad_norm": 39.0, "learning_rate": 1.573669804850164e-06, "loss": 1.8299914598464966, "step": 5592 }, { "epoch": 1.72176054170514, "grad_norm": 13.0625, "learning_rate": 1.572886641402244e-06, "loss": 1.2598625421524048, "step": 5594 }, { "epoch": 1.7223761157279163, "grad_norm": 7.875, "learning_rate": 1.5721035782705546e-06, "loss": 1.1927969455718994, "step": 5596 }, { "epoch": 1.7229916897506925, "grad_norm": 7.875, "learning_rate": 1.5713206158009751e-06, "loss": 1.1537501811981201, "step": 5598 }, { "epoch": 1.7236072637734687, "grad_norm": 17.25, "learning_rate": 1.5705377543393415e-06, "loss": 1.6629645824432373, "step": 5600 }, { "epoch": 1.724222837796245, "grad_norm": 21.0, "learning_rate": 1.5697549942314443e-06, "loss": 1.2592394351959229, "step": 5602 }, { "epoch": 1.7248384118190212, "grad_norm": 12.5, "learning_rate": 1.5689723358230306e-06, "loss": 1.2869166135787964, "step": 5604 }, { "epoch": 1.7254539858417974, "grad_norm": 15.25, "learning_rate": 1.5681897794598e-06, "loss": 1.4332456588745117, "step": 5606 }, { "epoch": 1.7260695598645737, "grad_norm": 43.0, "learning_rate": 1.5674073254874092e-06, "loss": 1.5304079055786133, "step": 5608 }, { "epoch": 1.72668513388735, "grad_norm": 19.875, "learning_rate": 1.566624974251469e-06, "loss": 1.7841585874557495, "step": 5610 }, { "epoch": 1.7273007079101261, "grad_norm": 8.6875, "learning_rate": 1.5658427260975448e-06, "loss": 1.252263069152832, "step": 5612 }, { "epoch": 1.7279162819329024, "grad_norm": 10.3125, "learning_rate": 1.5650605813711564e-06, "loss": 1.31412935256958, "step": 5614 }, { "epoch": 1.7285318559556786, "grad_norm": 11.5, "learning_rate": 1.5642785404177783e-06, "loss": 1.274031162261963, "step": 5616 }, { "epoch": 1.7291474299784548, "grad_norm": 13.875, "learning_rate": 1.563496603582839e-06, "loss": 1.690305233001709, "step": 5618 }, { "epoch": 1.729763004001231, "grad_norm": 13.9375, "learning_rate": 1.56271477121172e-06, "loss": 1.4655866622924805, "step": 5620 }, { "epoch": 1.7303785780240073, "grad_norm": 5.8125, "learning_rate": 1.5619330436497587e-06, "loss": 1.5229971408843994, "step": 5622 }, { "epoch": 1.7309941520467835, "grad_norm": 15.0, "learning_rate": 1.561151421242245e-06, "loss": 1.4558359384536743, "step": 5624 }, { "epoch": 1.7316097260695598, "grad_norm": 15.1875, "learning_rate": 1.5603699043344205e-06, "loss": 1.6087075471878052, "step": 5626 }, { "epoch": 1.732225300092336, "grad_norm": 13.0, "learning_rate": 1.5595884932714848e-06, "loss": 1.607154369354248, "step": 5628 }, { "epoch": 1.7328408741151122, "grad_norm": 14.8125, "learning_rate": 1.5588071883985866e-06, "loss": 1.6805574893951416, "step": 5630 }, { "epoch": 1.7334564481378885, "grad_norm": 10.25, "learning_rate": 1.558025990060829e-06, "loss": 1.3068217039108276, "step": 5632 }, { "epoch": 1.7340720221606647, "grad_norm": 5.5625, "learning_rate": 1.557244898603269e-06, "loss": 1.0744372606277466, "step": 5634 }, { "epoch": 1.734687596183441, "grad_norm": 12.75, "learning_rate": 1.5564639143709149e-06, "loss": 1.440709114074707, "step": 5636 }, { "epoch": 1.7353031702062172, "grad_norm": 37.0, "learning_rate": 1.5556830377087283e-06, "loss": 1.4084618091583252, "step": 5638 }, { "epoch": 1.7359187442289934, "grad_norm": 17.375, "learning_rate": 1.5549022689616245e-06, "loss": 1.2655439376831055, "step": 5640 }, { "epoch": 1.7365343182517696, "grad_norm": 24.0, "learning_rate": 1.5541216084744686e-06, "loss": 1.3823131322860718, "step": 5642 }, { "epoch": 1.7371498922745459, "grad_norm": 64.0, "learning_rate": 1.5533410565920793e-06, "loss": 1.3828883171081543, "step": 5644 }, { "epoch": 1.737765466297322, "grad_norm": 56.5, "learning_rate": 1.552560613659228e-06, "loss": 1.0084829330444336, "step": 5646 }, { "epoch": 1.7383810403200985, "grad_norm": 21.875, "learning_rate": 1.5517802800206373e-06, "loss": 1.5251989364624023, "step": 5648 }, { "epoch": 1.7389966143428748, "grad_norm": 40.25, "learning_rate": 1.5510000560209802e-06, "loss": 1.2445313930511475, "step": 5650 }, { "epoch": 1.739612188365651, "grad_norm": 23.125, "learning_rate": 1.550219942004884e-06, "loss": 1.4254047870635986, "step": 5652 }, { "epoch": 1.7402277623884272, "grad_norm": 10.625, "learning_rate": 1.5494399383169256e-06, "loss": 1.275312066078186, "step": 5654 }, { "epoch": 1.7408433364112035, "grad_norm": 13.0, "learning_rate": 1.5486600453016328e-06, "loss": 1.4267611503601074, "step": 5656 }, { "epoch": 1.7414589104339797, "grad_norm": 32.0, "learning_rate": 1.5478802633034857e-06, "loss": 1.2506225109100342, "step": 5658 }, { "epoch": 1.742074484456756, "grad_norm": 10.375, "learning_rate": 1.5471005926669154e-06, "loss": 1.3239362239837646, "step": 5660 }, { "epoch": 1.7426900584795322, "grad_norm": 9.0, "learning_rate": 1.5463210337363023e-06, "loss": 1.0405347347259521, "step": 5662 }, { "epoch": 1.7433056325023084, "grad_norm": 18.875, "learning_rate": 1.5455415868559788e-06, "loss": 1.4487731456756592, "step": 5664 }, { "epoch": 1.7439212065250846, "grad_norm": 12.5, "learning_rate": 1.5447622523702285e-06, "loss": 1.560713291168213, "step": 5666 }, { "epoch": 1.7445367805478609, "grad_norm": 8.6875, "learning_rate": 1.5439830306232833e-06, "loss": 1.1130002737045288, "step": 5668 }, { "epoch": 1.745152354570637, "grad_norm": 16.125, "learning_rate": 1.5432039219593258e-06, "loss": 1.4023849964141846, "step": 5670 }, { "epoch": 1.7457679285934133, "grad_norm": 13.125, "learning_rate": 1.5424249267224906e-06, "loss": 1.2204084396362305, "step": 5672 }, { "epoch": 1.7463835026161896, "grad_norm": 6.09375, "learning_rate": 1.5416460452568602e-06, "loss": 1.0636701583862305, "step": 5674 }, { "epoch": 1.7469990766389658, "grad_norm": 32.75, "learning_rate": 1.5408672779064668e-06, "loss": 1.237810492515564, "step": 5676 }, { "epoch": 1.747614650661742, "grad_norm": 8.4375, "learning_rate": 1.5400886250152934e-06, "loss": 0.7522115707397461, "step": 5678 }, { "epoch": 1.7482302246845183, "grad_norm": 9.25, "learning_rate": 1.5393100869272726e-06, "loss": 1.1993417739868164, "step": 5680 }, { "epoch": 1.7488457987072945, "grad_norm": 36.0, "learning_rate": 1.538531663986284e-06, "loss": 1.330182433128357, "step": 5682 }, { "epoch": 1.7494613727300707, "grad_norm": 14.0, "learning_rate": 1.5377533565361593e-06, "loss": 1.4139305353164673, "step": 5684 }, { "epoch": 1.7500769467528472, "grad_norm": 8.4375, "learning_rate": 1.536975164920677e-06, "loss": 1.4003902673721313, "step": 5686 }, { "epoch": 1.7506925207756234, "grad_norm": 7.4375, "learning_rate": 1.536197089483565e-06, "loss": 1.389980435371399, "step": 5688 }, { "epoch": 1.7513080947983997, "grad_norm": 15.8125, "learning_rate": 1.5354191305685015e-06, "loss": 1.1338951587677002, "step": 5690 }, { "epoch": 1.751923668821176, "grad_norm": 110.5, "learning_rate": 1.5346412885191106e-06, "loss": 1.5165810585021973, "step": 5692 }, { "epoch": 1.7525392428439521, "grad_norm": 10.25, "learning_rate": 1.5338635636789656e-06, "loss": 1.3715205192565918, "step": 5694 }, { "epoch": 1.7531548168667284, "grad_norm": 11.5625, "learning_rate": 1.5330859563915895e-06, "loss": 1.2929373979568481, "step": 5696 }, { "epoch": 1.7537703908895046, "grad_norm": 9.5625, "learning_rate": 1.5323084670004526e-06, "loss": 1.5255780220031738, "step": 5698 }, { "epoch": 1.7543859649122808, "grad_norm": 8.0, "learning_rate": 1.5315310958489717e-06, "loss": 0.971356987953186, "step": 5700 }, { "epoch": 1.755001538935057, "grad_norm": 15.5, "learning_rate": 1.5307538432805136e-06, "loss": 1.3214330673217773, "step": 5702 }, { "epoch": 1.7556171129578333, "grad_norm": 10.1875, "learning_rate": 1.5299767096383908e-06, "loss": 1.6737107038497925, "step": 5704 }, { "epoch": 1.7562326869806095, "grad_norm": 11.375, "learning_rate": 1.5291996952658643e-06, "loss": 1.386177897453308, "step": 5706 }, { "epoch": 1.7568482610033858, "grad_norm": 13.25, "learning_rate": 1.5284228005061426e-06, "loss": 1.318868637084961, "step": 5708 }, { "epoch": 1.757463835026162, "grad_norm": 13.5625, "learning_rate": 1.527646025702381e-06, "loss": 1.4786326885223389, "step": 5710 }, { "epoch": 1.7580794090489382, "grad_norm": 16.875, "learning_rate": 1.526869371197681e-06, "loss": 1.4338879585266113, "step": 5712 }, { "epoch": 1.7586949830717145, "grad_norm": 9.125, "learning_rate": 1.5260928373350926e-06, "loss": 1.275748372077942, "step": 5714 }, { "epoch": 1.7593105570944907, "grad_norm": 16.625, "learning_rate": 1.5253164244576118e-06, "loss": 1.496471881866455, "step": 5716 }, { "epoch": 1.759926131117267, "grad_norm": 18.25, "learning_rate": 1.5245401329081808e-06, "loss": 1.175106406211853, "step": 5718 }, { "epoch": 1.7605417051400432, "grad_norm": 11.0, "learning_rate": 1.5237639630296883e-06, "loss": 1.1941828727722168, "step": 5720 }, { "epoch": 1.7611572791628194, "grad_norm": 40.25, "learning_rate": 1.5229879151649696e-06, "loss": 1.853593349456787, "step": 5722 }, { "epoch": 1.7617728531855956, "grad_norm": 20.0, "learning_rate": 1.5222119896568056e-06, "loss": 1.6410892009735107, "step": 5724 }, { "epoch": 1.7623884272083719, "grad_norm": 16.5, "learning_rate": 1.521436186847924e-06, "loss": 1.4642964601516724, "step": 5726 }, { "epoch": 1.763004001231148, "grad_norm": 14.8125, "learning_rate": 1.5206605070809976e-06, "loss": 1.4115772247314453, "step": 5728 }, { "epoch": 1.7636195752539243, "grad_norm": 36.75, "learning_rate": 1.5198849506986451e-06, "loss": 1.3137972354888916, "step": 5730 }, { "epoch": 1.7642351492767006, "grad_norm": 16.75, "learning_rate": 1.51910951804343e-06, "loss": 1.5579824447631836, "step": 5732 }, { "epoch": 1.7648507232994768, "grad_norm": 15.0, "learning_rate": 1.518334209457863e-06, "loss": 1.5074825286865234, "step": 5734 }, { "epoch": 1.765466297322253, "grad_norm": 3.703125, "learning_rate": 1.5175590252843977e-06, "loss": 1.1944538354873657, "step": 5736 }, { "epoch": 1.7660818713450293, "grad_norm": 10.0, "learning_rate": 1.516783965865434e-06, "loss": 1.349628210067749, "step": 5738 }, { "epoch": 1.7666974453678055, "grad_norm": 24.5, "learning_rate": 1.5160090315433172e-06, "loss": 1.5806326866149902, "step": 5740 }, { "epoch": 1.7673130193905817, "grad_norm": 11.8125, "learning_rate": 1.5152342226603363e-06, "loss": 0.827213704586029, "step": 5742 }, { "epoch": 1.767928593413358, "grad_norm": 12.25, "learning_rate": 1.514459539558725e-06, "loss": 1.1036474704742432, "step": 5744 }, { "epoch": 1.7685441674361342, "grad_norm": 16.375, "learning_rate": 1.5136849825806622e-06, "loss": 1.5632669925689697, "step": 5746 }, { "epoch": 1.7691597414589104, "grad_norm": 11.1875, "learning_rate": 1.5129105520682701e-06, "loss": 1.4175076484680176, "step": 5748 }, { "epoch": 1.7697753154816867, "grad_norm": 9.9375, "learning_rate": 1.5121362483636157e-06, "loss": 1.2292062044143677, "step": 5750 }, { "epoch": 1.7703908895044629, "grad_norm": 18.125, "learning_rate": 1.5113620718087104e-06, "loss": 1.5425353050231934, "step": 5752 }, { "epoch": 1.7710064635272391, "grad_norm": 13.0, "learning_rate": 1.510588022745508e-06, "loss": 1.1885464191436768, "step": 5754 }, { "epoch": 1.7716220375500153, "grad_norm": 15.625, "learning_rate": 1.5098141015159072e-06, "loss": 1.244379997253418, "step": 5756 }, { "epoch": 1.7722376115727916, "grad_norm": 5.21875, "learning_rate": 1.50904030846175e-06, "loss": 1.322556972503662, "step": 5758 }, { "epoch": 1.7728531855955678, "grad_norm": 21.125, "learning_rate": 1.5082666439248217e-06, "loss": 1.8244168758392334, "step": 5760 }, { "epoch": 1.773468759618344, "grad_norm": 42.0, "learning_rate": 1.5074931082468508e-06, "loss": 1.696357250213623, "step": 5762 }, { "epoch": 1.7740843336411203, "grad_norm": 6.40625, "learning_rate": 1.5067197017695088e-06, "loss": 1.1527273654937744, "step": 5764 }, { "epoch": 1.7746999076638965, "grad_norm": 6.375, "learning_rate": 1.5059464248344102e-06, "loss": 1.110656976699829, "step": 5766 }, { "epoch": 1.7753154816866727, "grad_norm": 9.5, "learning_rate": 1.5051732777831122e-06, "loss": 1.2908079624176025, "step": 5768 }, { "epoch": 1.775931055709449, "grad_norm": 26.0, "learning_rate": 1.5044002609571155e-06, "loss": 1.5653884410858154, "step": 5770 }, { "epoch": 1.7765466297322252, "grad_norm": 13.4375, "learning_rate": 1.5036273746978614e-06, "loss": 1.3660342693328857, "step": 5772 }, { "epoch": 1.7771622037550014, "grad_norm": 21.5, "learning_rate": 1.5028546193467355e-06, "loss": 1.4595375061035156, "step": 5774 }, { "epoch": 1.7777777777777777, "grad_norm": 9.25, "learning_rate": 1.5020819952450642e-06, "loss": 1.145453691482544, "step": 5776 }, { "epoch": 1.778393351800554, "grad_norm": 14.375, "learning_rate": 1.5013095027341166e-06, "loss": 1.477217435836792, "step": 5778 }, { "epoch": 1.7790089258233301, "grad_norm": 8.8125, "learning_rate": 1.5005371421551035e-06, "loss": 1.1808829307556152, "step": 5780 }, { "epoch": 1.7796244998461064, "grad_norm": 7.0, "learning_rate": 1.4997649138491773e-06, "loss": 1.1786186695098877, "step": 5782 }, { "epoch": 1.7802400738688826, "grad_norm": 6.21875, "learning_rate": 1.4989928181574329e-06, "loss": 1.0353856086730957, "step": 5784 }, { "epoch": 1.7808556478916588, "grad_norm": 13.5, "learning_rate": 1.498220855420905e-06, "loss": 1.128792405128479, "step": 5786 }, { "epoch": 1.781471221914435, "grad_norm": 12.0, "learning_rate": 1.4974490259805706e-06, "loss": 1.2422773838043213, "step": 5788 }, { "epoch": 1.7820867959372113, "grad_norm": 12.25, "learning_rate": 1.4966773301773479e-06, "loss": 1.050607681274414, "step": 5790 }, { "epoch": 1.7827023699599875, "grad_norm": 6.09375, "learning_rate": 1.4959057683520958e-06, "loss": 1.0194100141525269, "step": 5792 }, { "epoch": 1.7833179439827638, "grad_norm": 11.9375, "learning_rate": 1.4951343408456137e-06, "loss": 1.3442493677139282, "step": 5794 }, { "epoch": 1.78393351800554, "grad_norm": 15.3125, "learning_rate": 1.4943630479986425e-06, "loss": 1.3655450344085693, "step": 5796 }, { "epoch": 1.7845490920283162, "grad_norm": 10.75, "learning_rate": 1.4935918901518626e-06, "loss": 1.3164520263671875, "step": 5798 }, { "epoch": 1.7851646660510927, "grad_norm": 17.25, "learning_rate": 1.4928208676458954e-06, "loss": 1.3639867305755615, "step": 5800 }, { "epoch": 1.785780240073869, "grad_norm": 9.625, "learning_rate": 1.4920499808213032e-06, "loss": 1.3705554008483887, "step": 5802 }, { "epoch": 1.7863958140966452, "grad_norm": 7.21875, "learning_rate": 1.491279230018587e-06, "loss": 1.1877235174179077, "step": 5804 }, { "epoch": 1.7870113881194214, "grad_norm": 13.1875, "learning_rate": 1.4905086155781874e-06, "loss": 1.4518612623214722, "step": 5806 }, { "epoch": 1.7876269621421976, "grad_norm": 11.875, "learning_rate": 1.4897381378404874e-06, "loss": 1.3673511743545532, "step": 5808 }, { "epoch": 1.7882425361649739, "grad_norm": 12.3125, "learning_rate": 1.4889677971458068e-06, "loss": 1.3825560808181763, "step": 5810 }, { "epoch": 1.78885811018775, "grad_norm": 10.5625, "learning_rate": 1.4881975938344063e-06, "loss": 1.306952953338623, "step": 5812 }, { "epoch": 1.7894736842105263, "grad_norm": 14.0, "learning_rate": 1.4874275282464858e-06, "loss": 1.19303297996521, "step": 5814 }, { "epoch": 1.7900892582333026, "grad_norm": 44.25, "learning_rate": 1.4866576007221835e-06, "loss": 1.4067680835723877, "step": 5816 }, { "epoch": 1.7907048322560788, "grad_norm": 20.0, "learning_rate": 1.4858878116015778e-06, "loss": 1.089118480682373, "step": 5818 }, { "epoch": 1.791320406278855, "grad_norm": 8.3125, "learning_rate": 1.4851181612246851e-06, "loss": 1.4552359580993652, "step": 5820 }, { "epoch": 1.7919359803016313, "grad_norm": 27.5, "learning_rate": 1.4843486499314615e-06, "loss": 1.4873318672180176, "step": 5822 }, { "epoch": 1.7925515543244075, "grad_norm": 17.5, "learning_rate": 1.4835792780617998e-06, "loss": 1.5674926042556763, "step": 5824 }, { "epoch": 1.7931671283471837, "grad_norm": 8.9375, "learning_rate": 1.4828100459555338e-06, "loss": 1.306696891784668, "step": 5826 }, { "epoch": 1.79378270236996, "grad_norm": 4.84375, "learning_rate": 1.4820409539524335e-06, "loss": 1.0492743253707886, "step": 5828 }, { "epoch": 1.7943982763927362, "grad_norm": 15.5, "learning_rate": 1.4812720023922082e-06, "loss": 1.1006546020507812, "step": 5830 }, { "epoch": 1.7950138504155124, "grad_norm": 10.0, "learning_rate": 1.4805031916145034e-06, "loss": 1.339001178741455, "step": 5832 }, { "epoch": 1.7956294244382887, "grad_norm": 38.25, "learning_rate": 1.4797345219589052e-06, "loss": 1.3763446807861328, "step": 5834 }, { "epoch": 1.796244998461065, "grad_norm": 14.5, "learning_rate": 1.4789659937649353e-06, "loss": 1.6027696132659912, "step": 5836 }, { "epoch": 1.7968605724838413, "grad_norm": 15.3125, "learning_rate": 1.478197607372053e-06, "loss": 1.2696528434753418, "step": 5838 }, { "epoch": 1.7974761465066176, "grad_norm": 10.6875, "learning_rate": 1.4774293631196565e-06, "loss": 1.2174136638641357, "step": 5840 }, { "epoch": 1.7980917205293938, "grad_norm": 16.125, "learning_rate": 1.4766612613470795e-06, "loss": 1.2887904644012451, "step": 5842 }, { "epoch": 1.79870729455217, "grad_norm": 15.0625, "learning_rate": 1.4758933023935927e-06, "loss": 1.4577984809875488, "step": 5844 }, { "epoch": 1.7993228685749463, "grad_norm": 9.75, "learning_rate": 1.475125486598406e-06, "loss": 1.325479507446289, "step": 5846 }, { "epoch": 1.7999384425977225, "grad_norm": 13.5625, "learning_rate": 1.474357814300664e-06, "loss": 1.4405393600463867, "step": 5848 }, { "epoch": 1.8005540166204987, "grad_norm": 14.5625, "learning_rate": 1.4735902858394473e-06, "loss": 1.1311811208724976, "step": 5850 }, { "epoch": 1.801169590643275, "grad_norm": 9.4375, "learning_rate": 1.4728229015537758e-06, "loss": 1.2644745111465454, "step": 5852 }, { "epoch": 1.8017851646660512, "grad_norm": 14.0625, "learning_rate": 1.4720556617826028e-06, "loss": 0.9831246137619019, "step": 5854 }, { "epoch": 1.8024007386888274, "grad_norm": 13.8125, "learning_rate": 1.4712885668648192e-06, "loss": 1.3548238277435303, "step": 5856 }, { "epoch": 1.8030163127116037, "grad_norm": 56.75, "learning_rate": 1.4705216171392525e-06, "loss": 1.4002970457077026, "step": 5858 }, { "epoch": 1.80363188673438, "grad_norm": 11.0625, "learning_rate": 1.4697548129446644e-06, "loss": 1.233629584312439, "step": 5860 }, { "epoch": 1.8042474607571561, "grad_norm": 7.8125, "learning_rate": 1.4689881546197533e-06, "loss": 0.9660263657569885, "step": 5862 }, { "epoch": 1.8048630347799324, "grad_norm": 15.0, "learning_rate": 1.4682216425031536e-06, "loss": 0.9532240629196167, "step": 5864 }, { "epoch": 1.8054786088027086, "grad_norm": 22.0, "learning_rate": 1.467455276933434e-06, "loss": 1.2945117950439453, "step": 5866 }, { "epoch": 1.8060941828254848, "grad_norm": 14.0, "learning_rate": 1.4666890582490986e-06, "loss": 1.5546940565109253, "step": 5868 }, { "epoch": 1.806709756848261, "grad_norm": 19.75, "learning_rate": 1.4659229867885885e-06, "loss": 0.7492038011550903, "step": 5870 }, { "epoch": 1.8073253308710373, "grad_norm": 11.25, "learning_rate": 1.4651570628902779e-06, "loss": 1.2762389183044434, "step": 5872 }, { "epoch": 1.8079409048938135, "grad_norm": 13.4375, "learning_rate": 1.464391286892475e-06, "loss": 1.2377161979675293, "step": 5874 }, { "epoch": 1.8085564789165898, "grad_norm": 13.125, "learning_rate": 1.4636256591334253e-06, "loss": 1.1009962558746338, "step": 5876 }, { "epoch": 1.809172052939366, "grad_norm": 16.375, "learning_rate": 1.4628601799513072e-06, "loss": 1.4094276428222656, "step": 5878 }, { "epoch": 1.8097876269621422, "grad_norm": 3.96875, "learning_rate": 1.4620948496842332e-06, "loss": 0.9528759717941284, "step": 5880 }, { "epoch": 1.8104032009849185, "grad_norm": 5.59375, "learning_rate": 1.4613296686702512e-06, "loss": 1.1366877555847168, "step": 5882 }, { "epoch": 1.8110187750076947, "grad_norm": 16.875, "learning_rate": 1.4605646372473423e-06, "loss": 1.3106170892715454, "step": 5884 }, { "epoch": 1.811634349030471, "grad_norm": 17.375, "learning_rate": 1.4597997557534215e-06, "loss": 1.3541948795318604, "step": 5886 }, { "epoch": 1.8122499230532472, "grad_norm": 30.625, "learning_rate": 1.4590350245263377e-06, "loss": 1.4160265922546387, "step": 5888 }, { "epoch": 1.8128654970760234, "grad_norm": 18.25, "learning_rate": 1.4582704439038745e-06, "loss": 1.4113764762878418, "step": 5890 }, { "epoch": 1.8134810710987996, "grad_norm": 7.3125, "learning_rate": 1.4575060142237472e-06, "loss": 1.2803568840026855, "step": 5892 }, { "epoch": 1.8140966451215759, "grad_norm": 19.75, "learning_rate": 1.456741735823605e-06, "loss": 1.5021907091140747, "step": 5894 }, { "epoch": 1.814712219144352, "grad_norm": 13.875, "learning_rate": 1.4559776090410314e-06, "loss": 1.150805950164795, "step": 5896 }, { "epoch": 1.8153277931671283, "grad_norm": 13.5, "learning_rate": 1.4552136342135418e-06, "loss": 1.2975443601608276, "step": 5898 }, { "epoch": 1.8159433671899046, "grad_norm": 30.75, "learning_rate": 1.4544498116785845e-06, "loss": 1.4264914989471436, "step": 5900 }, { "epoch": 1.8165589412126808, "grad_norm": 10.875, "learning_rate": 1.4536861417735412e-06, "loss": 1.366098165512085, "step": 5902 }, { "epoch": 1.817174515235457, "grad_norm": 10.3125, "learning_rate": 1.4529226248357255e-06, "loss": 1.4455392360687256, "step": 5904 }, { "epoch": 1.8177900892582333, "grad_norm": 16.5, "learning_rate": 1.4521592612023832e-06, "loss": 1.1741234064102173, "step": 5906 }, { "epoch": 1.8184056632810095, "grad_norm": 17.625, "learning_rate": 1.451396051210694e-06, "loss": 1.3307926654815674, "step": 5908 }, { "epoch": 1.8190212373037857, "grad_norm": 10.625, "learning_rate": 1.4506329951977679e-06, "loss": 1.3589565753936768, "step": 5910 }, { "epoch": 1.819636811326562, "grad_norm": 10.0, "learning_rate": 1.4498700935006472e-06, "loss": 1.3831872940063477, "step": 5912 }, { "epoch": 1.8202523853493382, "grad_norm": 9.5, "learning_rate": 1.4491073464563079e-06, "loss": 1.084181308746338, "step": 5914 }, { "epoch": 1.8208679593721144, "grad_norm": 18.75, "learning_rate": 1.448344754401655e-06, "loss": 0.9600379467010498, "step": 5916 }, { "epoch": 1.8214835333948907, "grad_norm": 16.375, "learning_rate": 1.4475823176735261e-06, "loss": 1.3483178615570068, "step": 5918 }, { "epoch": 1.822099107417667, "grad_norm": 23.625, "learning_rate": 1.4468200366086915e-06, "loss": 1.416744351387024, "step": 5920 }, { "epoch": 1.8227146814404431, "grad_norm": 14.0625, "learning_rate": 1.446057911543851e-06, "loss": 0.886028528213501, "step": 5922 }, { "epoch": 1.8233302554632194, "grad_norm": 14.8125, "learning_rate": 1.4452959428156358e-06, "loss": 1.695695161819458, "step": 5924 }, { "epoch": 1.8239458294859956, "grad_norm": 14.0, "learning_rate": 1.4445341307606091e-06, "loss": 1.2986364364624023, "step": 5926 }, { "epoch": 1.8245614035087718, "grad_norm": 17.125, "learning_rate": 1.4437724757152637e-06, "loss": 1.0665688514709473, "step": 5928 }, { "epoch": 1.825176977531548, "grad_norm": 26.875, "learning_rate": 1.4430109780160231e-06, "loss": 1.1360788345336914, "step": 5930 }, { "epoch": 1.8257925515543243, "grad_norm": 19.875, "learning_rate": 1.4422496379992428e-06, "loss": 1.3271257877349854, "step": 5932 }, { "epoch": 1.8264081255771005, "grad_norm": 14.9375, "learning_rate": 1.4414884560012065e-06, "loss": 1.3400228023529053, "step": 5934 }, { "epoch": 1.8270236995998768, "grad_norm": 27.625, "learning_rate": 1.4407274323581296e-06, "loss": 1.4710955619812012, "step": 5936 }, { "epoch": 1.827639273622653, "grad_norm": 10.1875, "learning_rate": 1.439966567406157e-06, "loss": 1.0766589641571045, "step": 5938 }, { "epoch": 1.8282548476454292, "grad_norm": 9.5625, "learning_rate": 1.4392058614813643e-06, "loss": 1.5178849697113037, "step": 5940 }, { "epoch": 1.8288704216682055, "grad_norm": 13.3125, "learning_rate": 1.4384453149197553e-06, "loss": 1.1585719585418701, "step": 5942 }, { "epoch": 1.8294859956909817, "grad_norm": 39.75, "learning_rate": 1.4376849280572643e-06, "loss": 1.561208724975586, "step": 5944 }, { "epoch": 1.830101569713758, "grad_norm": 30.25, "learning_rate": 1.4369247012297557e-06, "loss": 1.2275128364562988, "step": 5946 }, { "epoch": 1.8307171437365342, "grad_norm": 10.5, "learning_rate": 1.4361646347730221e-06, "loss": 1.3385765552520752, "step": 5948 }, { "epoch": 1.8313327177593104, "grad_norm": 18.75, "learning_rate": 1.4354047290227856e-06, "loss": 1.3635698556900024, "step": 5950 }, { "epoch": 1.8319482917820868, "grad_norm": 9.5, "learning_rate": 1.4346449843146978e-06, "loss": 1.3228318691253662, "step": 5952 }, { "epoch": 1.832563865804863, "grad_norm": 10.3125, "learning_rate": 1.433885400984339e-06, "loss": 1.1687228679656982, "step": 5954 }, { "epoch": 1.8331794398276393, "grad_norm": 23.0, "learning_rate": 1.4331259793672172e-06, "loss": 1.4278203248977661, "step": 5956 }, { "epoch": 1.8337950138504155, "grad_norm": 16.625, "learning_rate": 1.4323667197987704e-06, "loss": 1.6822397708892822, "step": 5958 }, { "epoch": 1.8344105878731918, "grad_norm": 13.8125, "learning_rate": 1.4316076226143645e-06, "loss": 1.465928554534912, "step": 5960 }, { "epoch": 1.835026161895968, "grad_norm": 3.5, "learning_rate": 1.4308486881492927e-06, "loss": 1.2920657396316528, "step": 5962 }, { "epoch": 1.8356417359187442, "grad_norm": 12.125, "learning_rate": 1.4300899167387787e-06, "loss": 0.9915714859962463, "step": 5964 }, { "epoch": 1.8362573099415205, "grad_norm": 12.1875, "learning_rate": 1.4293313087179717e-06, "loss": 1.468205451965332, "step": 5966 }, { "epoch": 1.8368728839642967, "grad_norm": 31.125, "learning_rate": 1.4285728644219499e-06, "loss": 1.684161901473999, "step": 5968 }, { "epoch": 1.837488457987073, "grad_norm": 9.5, "learning_rate": 1.427814584185719e-06, "loss": 1.4519860744476318, "step": 5970 }, { "epoch": 1.8381040320098492, "grad_norm": 12.0625, "learning_rate": 1.4270564683442127e-06, "loss": 1.1766678094863892, "step": 5972 }, { "epoch": 1.8387196060326254, "grad_norm": 24.375, "learning_rate": 1.426298517232291e-06, "loss": 1.4680485725402832, "step": 5974 }, { "epoch": 1.8393351800554016, "grad_norm": 3.1875, "learning_rate": 1.425540731184742e-06, "loss": 1.1224040985107422, "step": 5976 }, { "epoch": 1.8399507540781779, "grad_norm": 17.875, "learning_rate": 1.4247831105362807e-06, "loss": 1.3447721004486084, "step": 5978 }, { "epoch": 1.840566328100954, "grad_norm": 22.125, "learning_rate": 1.4240256556215486e-06, "loss": 1.0965287685394287, "step": 5980 }, { "epoch": 1.8411819021237303, "grad_norm": 11.6875, "learning_rate": 1.4232683667751152e-06, "loss": 1.4905890226364136, "step": 5982 }, { "epoch": 1.8417974761465066, "grad_norm": 15.8125, "learning_rate": 1.422511244331476e-06, "loss": 1.76155424118042, "step": 5984 }, { "epoch": 1.8424130501692828, "grad_norm": 31.75, "learning_rate": 1.4217542886250513e-06, "loss": 1.6751346588134766, "step": 5986 }, { "epoch": 1.8430286241920593, "grad_norm": 22.0, "learning_rate": 1.420997499990191e-06, "loss": 1.5737178325653076, "step": 5988 }, { "epoch": 1.8436441982148355, "grad_norm": 11.3125, "learning_rate": 1.4202408787611686e-06, "loss": 1.3350948095321655, "step": 5990 }, { "epoch": 1.8442597722376117, "grad_norm": 30.25, "learning_rate": 1.4194844252721846e-06, "loss": 1.5872807502746582, "step": 5992 }, { "epoch": 1.844875346260388, "grad_norm": 17.5, "learning_rate": 1.4187281398573659e-06, "loss": 1.6296907663345337, "step": 5994 }, { "epoch": 1.8454909202831642, "grad_norm": 13.6875, "learning_rate": 1.417972022850764e-06, "loss": 1.1191483736038208, "step": 5996 }, { "epoch": 1.8461064943059404, "grad_norm": 14.875, "learning_rate": 1.4172160745863571e-06, "loss": 1.686267375946045, "step": 5998 }, { "epoch": 1.8467220683287167, "grad_norm": 23.75, "learning_rate": 1.416460295398048e-06, "loss": 1.6978259086608887, "step": 6000 }, { "epoch": 1.847337642351493, "grad_norm": 11.25, "learning_rate": 1.4157046856196658e-06, "loss": 1.3672219514846802, "step": 6002 }, { "epoch": 1.8479532163742691, "grad_norm": 10.6875, "learning_rate": 1.4149492455849637e-06, "loss": 0.9509298801422119, "step": 6004 }, { "epoch": 1.8485687903970454, "grad_norm": 6.125, "learning_rate": 1.4141939756276203e-06, "loss": 1.0525821447372437, "step": 6006 }, { "epoch": 1.8491843644198216, "grad_norm": 23.5, "learning_rate": 1.4134388760812399e-06, "loss": 1.6087608337402344, "step": 6008 }, { "epoch": 1.8497999384425978, "grad_norm": 19.125, "learning_rate": 1.4126839472793508e-06, "loss": 1.2136560678482056, "step": 6010 }, { "epoch": 1.850415512465374, "grad_norm": 30.25, "learning_rate": 1.4119291895554045e-06, "loss": 1.345487356185913, "step": 6012 }, { "epoch": 1.8510310864881503, "grad_norm": 11.375, "learning_rate": 1.4111746032427802e-06, "loss": 1.2782511711120605, "step": 6014 }, { "epoch": 1.8516466605109265, "grad_norm": 10.625, "learning_rate": 1.4104201886747787e-06, "loss": 0.9771016240119934, "step": 6016 }, { "epoch": 1.8522622345337028, "grad_norm": 7.75, "learning_rate": 1.4096659461846256e-06, "loss": 1.4104878902435303, "step": 6018 }, { "epoch": 1.852877808556479, "grad_norm": 18.125, "learning_rate": 1.4089118761054711e-06, "loss": 1.3528629541397095, "step": 6020 }, { "epoch": 1.8534933825792552, "grad_norm": 38.0, "learning_rate": 1.4081579787703885e-06, "loss": 1.4981791973114014, "step": 6022 }, { "epoch": 1.8541089566020315, "grad_norm": 15.0625, "learning_rate": 1.4074042545123747e-06, "loss": 1.4606707096099854, "step": 6024 }, { "epoch": 1.8547245306248077, "grad_norm": 19.875, "learning_rate": 1.406650703664352e-06, "loss": 1.4054559469223022, "step": 6026 }, { "epoch": 1.855340104647584, "grad_norm": 7.78125, "learning_rate": 1.4058973265591637e-06, "loss": 1.2006244659423828, "step": 6028 }, { "epoch": 1.8559556786703602, "grad_norm": 20.375, "learning_rate": 1.4051441235295766e-06, "loss": 1.7498140335083008, "step": 6030 }, { "epoch": 1.8565712526931364, "grad_norm": 12.875, "learning_rate": 1.404391094908283e-06, "loss": 1.2948148250579834, "step": 6032 }, { "epoch": 1.8571868267159126, "grad_norm": 46.25, "learning_rate": 1.4036382410278952e-06, "loss": 0.7961997389793396, "step": 6034 }, { "epoch": 1.8578024007386889, "grad_norm": 15.25, "learning_rate": 1.4028855622209501e-06, "loss": 1.1270966529846191, "step": 6036 }, { "epoch": 1.858417974761465, "grad_norm": 14.75, "learning_rate": 1.402133058819907e-06, "loss": 1.3352243900299072, "step": 6038 }, { "epoch": 1.8590335487842413, "grad_norm": 13.3125, "learning_rate": 1.4013807311571472e-06, "loss": 0.8288969993591309, "step": 6040 }, { "epoch": 1.8596491228070176, "grad_norm": 27.75, "learning_rate": 1.4006285795649744e-06, "loss": 1.330993413925171, "step": 6042 }, { "epoch": 1.8602646968297938, "grad_norm": 7.21875, "learning_rate": 1.3998766043756157e-06, "loss": 0.905243992805481, "step": 6044 }, { "epoch": 1.86088027085257, "grad_norm": 20.625, "learning_rate": 1.3991248059212186e-06, "loss": 1.3450603485107422, "step": 6046 }, { "epoch": 1.8614958448753463, "grad_norm": 13.625, "learning_rate": 1.3983731845338533e-06, "loss": 1.3866983652114868, "step": 6048 }, { "epoch": 1.8621114188981225, "grad_norm": 11.375, "learning_rate": 1.3976217405455126e-06, "loss": 0.7493995428085327, "step": 6050 }, { "epoch": 1.8627269929208987, "grad_norm": 10.0625, "learning_rate": 1.3968704742881097e-06, "loss": 1.3307311534881592, "step": 6052 }, { "epoch": 1.863342566943675, "grad_norm": 19.375, "learning_rate": 1.39611938609348e-06, "loss": 1.3420662879943848, "step": 6054 }, { "epoch": 1.8639581409664512, "grad_norm": 9.8125, "learning_rate": 1.3953684762933792e-06, "loss": 1.4183435440063477, "step": 6056 }, { "epoch": 1.8645737149892274, "grad_norm": 13.125, "learning_rate": 1.3946177452194858e-06, "loss": 1.4013707637786865, "step": 6058 }, { "epoch": 1.8651892890120036, "grad_norm": 18.5, "learning_rate": 1.3938671932033987e-06, "loss": 1.352440595626831, "step": 6060 }, { "epoch": 1.8658048630347799, "grad_norm": 11.6875, "learning_rate": 1.3931168205766368e-06, "loss": 1.3323051929473877, "step": 6062 }, { "epoch": 1.8664204370575561, "grad_norm": 23.125, "learning_rate": 1.3923666276706413e-06, "loss": 1.316023349761963, "step": 6064 }, { "epoch": 1.8670360110803323, "grad_norm": 10.5625, "learning_rate": 1.391616614816773e-06, "loss": 1.466226577758789, "step": 6066 }, { "epoch": 1.8676515851031086, "grad_norm": 6.8125, "learning_rate": 1.3908667823463133e-06, "loss": 1.3296306133270264, "step": 6068 }, { "epoch": 1.8682671591258848, "grad_norm": 39.75, "learning_rate": 1.3901171305904645e-06, "loss": 1.5542833805084229, "step": 6070 }, { "epoch": 1.868882733148661, "grad_norm": 8.125, "learning_rate": 1.3893676598803481e-06, "loss": 1.1805827617645264, "step": 6072 }, { "epoch": 1.8694983071714373, "grad_norm": 15.3125, "learning_rate": 1.3886183705470061e-06, "loss": 1.3519244194030762, "step": 6074 }, { "epoch": 1.8701138811942135, "grad_norm": 31.5, "learning_rate": 1.3878692629214011e-06, "loss": 1.0316970348358154, "step": 6076 }, { "epoch": 1.8707294552169897, "grad_norm": 17.625, "learning_rate": 1.3871203373344145e-06, "loss": 1.20766019821167, "step": 6078 }, { "epoch": 1.871345029239766, "grad_norm": 6.75, "learning_rate": 1.3863715941168474e-06, "loss": 1.157658576965332, "step": 6080 }, { "epoch": 1.8719606032625422, "grad_norm": 9.8125, "learning_rate": 1.3856230335994208e-06, "loss": 1.1696847677230835, "step": 6082 }, { "epoch": 1.8725761772853184, "grad_norm": 11.375, "learning_rate": 1.3848746561127744e-06, "loss": 1.259034276008606, "step": 6084 }, { "epoch": 1.8731917513080947, "grad_norm": 14.125, "learning_rate": 1.3841264619874678e-06, "loss": 1.7221574783325195, "step": 6086 }, { "epoch": 1.873807325330871, "grad_norm": 69.0, "learning_rate": 1.383378451553979e-06, "loss": 1.5460913181304932, "step": 6088 }, { "epoch": 1.8744228993536471, "grad_norm": 56.75, "learning_rate": 1.382630625142705e-06, "loss": 1.4097764492034912, "step": 6090 }, { "epoch": 1.8750384733764234, "grad_norm": 13.6875, "learning_rate": 1.3818829830839614e-06, "loss": 1.6211826801300049, "step": 6092 }, { "epoch": 1.8756540473991996, "grad_norm": 20.75, "learning_rate": 1.3811355257079834e-06, "loss": 1.6837477684020996, "step": 6094 }, { "epoch": 1.8762696214219758, "grad_norm": 24.375, "learning_rate": 1.3803882533449228e-06, "loss": 1.5443352460861206, "step": 6096 }, { "epoch": 1.876885195444752, "grad_norm": 13.75, "learning_rate": 1.3796411663248508e-06, "loss": 1.5669658184051514, "step": 6098 }, { "epoch": 1.8775007694675283, "grad_norm": 18.25, "learning_rate": 1.378894264977757e-06, "loss": 1.6158027648925781, "step": 6100 }, { "epoch": 1.8781163434903048, "grad_norm": 59.75, "learning_rate": 1.3781475496335487e-06, "loss": 1.166754961013794, "step": 6102 }, { "epoch": 1.878731917513081, "grad_norm": 18.25, "learning_rate": 1.3774010206220504e-06, "loss": 0.9254148006439209, "step": 6104 }, { "epoch": 1.8793474915358572, "grad_norm": 4.1875, "learning_rate": 1.376654678273005e-06, "loss": 1.2385072708129883, "step": 6106 }, { "epoch": 1.8799630655586335, "grad_norm": 8.5625, "learning_rate": 1.3759085229160734e-06, "loss": 1.2265651226043701, "step": 6108 }, { "epoch": 1.8805786395814097, "grad_norm": 45.75, "learning_rate": 1.3751625548808322e-06, "loss": 1.5409295558929443, "step": 6110 }, { "epoch": 1.881194213604186, "grad_norm": 21.5, "learning_rate": 1.3744167744967768e-06, "loss": 1.5468591451644897, "step": 6112 }, { "epoch": 1.8818097876269622, "grad_norm": 19.625, "learning_rate": 1.3736711820933193e-06, "loss": 1.1492195129394531, "step": 6114 }, { "epoch": 1.8824253616497384, "grad_norm": 12.375, "learning_rate": 1.3729257779997888e-06, "loss": 0.7553317546844482, "step": 6116 }, { "epoch": 1.8830409356725146, "grad_norm": 39.0, "learning_rate": 1.3721805625454307e-06, "loss": 1.6203052997589111, "step": 6118 }, { "epoch": 1.8836565096952909, "grad_norm": 44.75, "learning_rate": 1.3714355360594082e-06, "loss": 1.593339204788208, "step": 6120 }, { "epoch": 1.884272083718067, "grad_norm": 7.28125, "learning_rate": 1.3706906988708003e-06, "loss": 0.950556755065918, "step": 6122 }, { "epoch": 1.8848876577408433, "grad_norm": 14.3125, "learning_rate": 1.3699460513086015e-06, "loss": 1.2142386436462402, "step": 6124 }, { "epoch": 1.8855032317636196, "grad_norm": 16.125, "learning_rate": 1.3692015937017246e-06, "loss": 1.2912973165512085, "step": 6126 }, { "epoch": 1.8861188057863958, "grad_norm": 10.0625, "learning_rate": 1.3684573263789967e-06, "loss": 1.3026981353759766, "step": 6128 }, { "epoch": 1.886734379809172, "grad_norm": 12.75, "learning_rate": 1.3677132496691617e-06, "loss": 1.272803783416748, "step": 6130 }, { "epoch": 1.8873499538319483, "grad_norm": 7.875, "learning_rate": 1.3669693639008794e-06, "loss": 1.3544511795043945, "step": 6132 }, { "epoch": 1.8879655278547245, "grad_norm": 14.6875, "learning_rate": 1.3662256694027248e-06, "loss": 1.2876298427581787, "step": 6134 }, { "epoch": 1.8885811018775007, "grad_norm": 16.75, "learning_rate": 1.3654821665031882e-06, "loss": 0.8064992427825928, "step": 6136 }, { "epoch": 1.889196675900277, "grad_norm": 24.875, "learning_rate": 1.3647388555306766e-06, "loss": 1.1648228168487549, "step": 6138 }, { "epoch": 1.8898122499230534, "grad_norm": 15.5625, "learning_rate": 1.3639957368135105e-06, "loss": 1.1499993801116943, "step": 6140 }, { "epoch": 1.8904278239458296, "grad_norm": 6.5, "learning_rate": 1.3632528106799261e-06, "loss": 1.2055202722549438, "step": 6142 }, { "epoch": 1.8910433979686059, "grad_norm": 16.375, "learning_rate": 1.3625100774580757e-06, "loss": 0.9683582782745361, "step": 6144 }, { "epoch": 1.891658971991382, "grad_norm": 9.875, "learning_rate": 1.3617675374760248e-06, "loss": 1.428824782371521, "step": 6146 }, { "epoch": 1.8922745460141583, "grad_norm": 44.0, "learning_rate": 1.3610251910617542e-06, "loss": 1.4965403079986572, "step": 6148 }, { "epoch": 1.8928901200369346, "grad_norm": 18.125, "learning_rate": 1.3602830385431593e-06, "loss": 1.4022653102874756, "step": 6150 }, { "epoch": 1.8935056940597108, "grad_norm": 6.5, "learning_rate": 1.3595410802480496e-06, "loss": 1.2949481010437012, "step": 6152 }, { "epoch": 1.894121268082487, "grad_norm": 16.125, "learning_rate": 1.358799316504149e-06, "loss": 1.4073116779327393, "step": 6154 }, { "epoch": 1.8947368421052633, "grad_norm": 13.875, "learning_rate": 1.3580577476390954e-06, "loss": 1.3306310176849365, "step": 6156 }, { "epoch": 1.8953524161280395, "grad_norm": 28.25, "learning_rate": 1.3573163739804405e-06, "loss": 1.3141050338745117, "step": 6158 }, { "epoch": 1.8959679901508157, "grad_norm": 17.0, "learning_rate": 1.3565751958556498e-06, "loss": 1.7682037353515625, "step": 6160 }, { "epoch": 1.896583564173592, "grad_norm": 42.0, "learning_rate": 1.3558342135921026e-06, "loss": 0.986321210861206, "step": 6162 }, { "epoch": 1.8971991381963682, "grad_norm": 16.25, "learning_rate": 1.3550934275170924e-06, "loss": 1.5372295379638672, "step": 6164 }, { "epoch": 1.8978147122191444, "grad_norm": 56.25, "learning_rate": 1.3543528379578242e-06, "loss": 1.5373578071594238, "step": 6166 }, { "epoch": 1.8984302862419207, "grad_norm": 10.875, "learning_rate": 1.3536124452414173e-06, "loss": 1.3291563987731934, "step": 6168 }, { "epoch": 1.899045860264697, "grad_norm": 9.1875, "learning_rate": 1.3528722496949046e-06, "loss": 1.4090769290924072, "step": 6170 }, { "epoch": 1.8996614342874731, "grad_norm": 7.3125, "learning_rate": 1.3521322516452313e-06, "loss": 0.9713155031204224, "step": 6172 }, { "epoch": 1.9002770083102494, "grad_norm": 25.0, "learning_rate": 1.3513924514192546e-06, "loss": 1.47679603099823, "step": 6174 }, { "epoch": 1.9008925823330256, "grad_norm": 43.0, "learning_rate": 1.3506528493437459e-06, "loss": 1.8054498434066772, "step": 6176 }, { "epoch": 1.9015081563558018, "grad_norm": 13.375, "learning_rate": 1.3499134457453883e-06, "loss": 1.1690771579742432, "step": 6178 }, { "epoch": 1.902123730378578, "grad_norm": 20.25, "learning_rate": 1.3491742409507764e-06, "loss": 1.4926562309265137, "step": 6180 }, { "epoch": 1.9027393044013543, "grad_norm": 10.0625, "learning_rate": 1.3484352352864186e-06, "loss": 1.525926113128662, "step": 6182 }, { "epoch": 1.9033548784241305, "grad_norm": 14.5625, "learning_rate": 1.3476964290787346e-06, "loss": 1.117192268371582, "step": 6184 }, { "epoch": 1.9039704524469068, "grad_norm": 34.0, "learning_rate": 1.346957822654055e-06, "loss": 1.599233627319336, "step": 6186 }, { "epoch": 1.904586026469683, "grad_norm": 8.5, "learning_rate": 1.3462194163386241e-06, "loss": 1.2044343948364258, "step": 6188 }, { "epoch": 1.9052016004924592, "grad_norm": 18.875, "learning_rate": 1.3454812104585969e-06, "loss": 1.5050618648529053, "step": 6190 }, { "epoch": 1.9058171745152355, "grad_norm": 36.25, "learning_rate": 1.3447432053400386e-06, "loss": 1.3360257148742676, "step": 6192 }, { "epoch": 1.9064327485380117, "grad_norm": 11.625, "learning_rate": 1.344005401308928e-06, "loss": 1.4488022327423096, "step": 6194 }, { "epoch": 1.907048322560788, "grad_norm": 19.75, "learning_rate": 1.3432677986911537e-06, "loss": 1.5527957677841187, "step": 6196 }, { "epoch": 1.9076638965835642, "grad_norm": 17.375, "learning_rate": 1.3425303978125154e-06, "loss": 1.5068588256835938, "step": 6198 }, { "epoch": 1.9082794706063404, "grad_norm": 15.0625, "learning_rate": 1.3417931989987242e-06, "loss": 1.3681578636169434, "step": 6200 }, { "epoch": 1.9088950446291166, "grad_norm": 13.125, "learning_rate": 1.3410562025754015e-06, "loss": 0.6912654638290405, "step": 6202 }, { "epoch": 1.9095106186518929, "grad_norm": 16.875, "learning_rate": 1.340319408868079e-06, "loss": 0.8955786228179932, "step": 6204 }, { "epoch": 1.910126192674669, "grad_norm": 11.625, "learning_rate": 1.3395828182022005e-06, "loss": 1.6106204986572266, "step": 6206 }, { "epoch": 1.9107417666974453, "grad_norm": 7.90625, "learning_rate": 1.3388464309031182e-06, "loss": 1.243058681488037, "step": 6208 }, { "epoch": 1.9113573407202216, "grad_norm": 11.875, "learning_rate": 1.3381102472960947e-06, "loss": 1.4035508632659912, "step": 6210 }, { "epoch": 1.9119729147429978, "grad_norm": 9.375, "learning_rate": 1.3373742677063042e-06, "loss": 1.5388617515563965, "step": 6212 }, { "epoch": 1.912588488765774, "grad_norm": 13.5625, "learning_rate": 1.336638492458829e-06, "loss": 1.381677508354187, "step": 6214 }, { "epoch": 1.9132040627885503, "grad_norm": 17.0, "learning_rate": 1.3359029218786623e-06, "loss": 1.1878316402435303, "step": 6216 }, { "epoch": 1.9138196368113265, "grad_norm": 35.5, "learning_rate": 1.3351675562907064e-06, "loss": 1.4496146440505981, "step": 6218 }, { "epoch": 1.9144352108341027, "grad_norm": 37.0, "learning_rate": 1.3344323960197728e-06, "loss": 1.3815988302230835, "step": 6220 }, { "epoch": 1.915050784856879, "grad_norm": 22.5, "learning_rate": 1.3336974413905827e-06, "loss": 1.449317216873169, "step": 6222 }, { "epoch": 1.9156663588796552, "grad_norm": 11.375, "learning_rate": 1.3329626927277669e-06, "loss": 1.5711392164230347, "step": 6224 }, { "epoch": 1.9162819329024314, "grad_norm": 11.125, "learning_rate": 1.332228150355864e-06, "loss": 1.304990530014038, "step": 6226 }, { "epoch": 1.9168975069252077, "grad_norm": 30.0, "learning_rate": 1.3314938145993227e-06, "loss": 1.4342398643493652, "step": 6228 }, { "epoch": 1.917513080947984, "grad_norm": 5.75, "learning_rate": 1.3307596857824994e-06, "loss": 1.0260145664215088, "step": 6230 }, { "epoch": 1.9181286549707601, "grad_norm": 7.125, "learning_rate": 1.3300257642296605e-06, "loss": 1.0618666410446167, "step": 6232 }, { "epoch": 1.9187442289935364, "grad_norm": 22.375, "learning_rate": 1.3292920502649797e-06, "loss": 1.2553677558898926, "step": 6234 }, { "epoch": 1.9193598030163126, "grad_norm": 8.4375, "learning_rate": 1.3285585442125386e-06, "loss": 1.163750410079956, "step": 6236 }, { "epoch": 1.9199753770390888, "grad_norm": 15.875, "learning_rate": 1.3278252463963285e-06, "loss": 1.3896212577819824, "step": 6238 }, { "epoch": 1.920590951061865, "grad_norm": 17.5, "learning_rate": 1.3270921571402477e-06, "loss": 1.5164833068847656, "step": 6240 }, { "epoch": 1.9212065250846413, "grad_norm": 4.6875, "learning_rate": 1.3263592767681022e-06, "loss": 1.0327818393707275, "step": 6242 }, { "epoch": 1.9218220991074175, "grad_norm": 10.0, "learning_rate": 1.3256266056036064e-06, "loss": 1.2779362201690674, "step": 6244 }, { "epoch": 1.9224376731301938, "grad_norm": 21.375, "learning_rate": 1.3248941439703822e-06, "loss": 1.6328758001327515, "step": 6246 }, { "epoch": 1.92305324715297, "grad_norm": 9.5, "learning_rate": 1.324161892191958e-06, "loss": 1.6194400787353516, "step": 6248 }, { "epoch": 1.9236688211757462, "grad_norm": 10.3125, "learning_rate": 1.3234298505917713e-06, "loss": 1.5902223587036133, "step": 6250 }, { "epoch": 1.9242843951985225, "grad_norm": 29.125, "learning_rate": 1.3226980194931648e-06, "loss": 1.2746918201446533, "step": 6252 }, { "epoch": 1.924899969221299, "grad_norm": 17.5, "learning_rate": 1.3219663992193891e-06, "loss": 1.4883370399475098, "step": 6254 }, { "epoch": 1.9255155432440751, "grad_norm": 11.5, "learning_rate": 1.3212349900936027e-06, "loss": 1.228645920753479, "step": 6256 }, { "epoch": 1.9261311172668514, "grad_norm": 26.875, "learning_rate": 1.320503792438869e-06, "loss": 1.536346435546875, "step": 6258 }, { "epoch": 1.9267466912896276, "grad_norm": 35.0, "learning_rate": 1.319772806578159e-06, "loss": 1.0358887910842896, "step": 6260 }, { "epoch": 1.9273622653124038, "grad_norm": 11.5, "learning_rate": 1.3190420328343503e-06, "loss": 1.271331787109375, "step": 6262 }, { "epoch": 1.92797783933518, "grad_norm": 6.8125, "learning_rate": 1.318311471530226e-06, "loss": 1.1881802082061768, "step": 6264 }, { "epoch": 1.9285934133579563, "grad_norm": 12.875, "learning_rate": 1.3175811229884762e-06, "loss": 1.2093167304992676, "step": 6266 }, { "epoch": 1.9292089873807325, "grad_norm": 15.8125, "learning_rate": 1.3168509875316968e-06, "loss": 1.361979365348816, "step": 6268 }, { "epoch": 1.9298245614035088, "grad_norm": 22.5, "learning_rate": 1.3161210654823893e-06, "loss": 1.1650192737579346, "step": 6270 }, { "epoch": 1.930440135426285, "grad_norm": 10.9375, "learning_rate": 1.3153913571629605e-06, "loss": 1.1376789808273315, "step": 6272 }, { "epoch": 1.9310557094490612, "grad_norm": 11.5, "learning_rate": 1.3146618628957244e-06, "loss": 1.3090848922729492, "step": 6274 }, { "epoch": 1.9316712834718375, "grad_norm": 14.0, "learning_rate": 1.3139325830028996e-06, "loss": 1.6002295017242432, "step": 6276 }, { "epoch": 1.9322868574946137, "grad_norm": 9.0625, "learning_rate": 1.3132035178066089e-06, "loss": 1.232072353363037, "step": 6278 }, { "epoch": 1.93290243151739, "grad_norm": 12.6875, "learning_rate": 1.312474667628881e-06, "loss": 1.537278652191162, "step": 6280 }, { "epoch": 1.9335180055401662, "grad_norm": 14.9375, "learning_rate": 1.3117460327916512e-06, "loss": 1.2492306232452393, "step": 6282 }, { "epoch": 1.9341335795629424, "grad_norm": 15.375, "learning_rate": 1.3110176136167576e-06, "loss": 1.626798391342163, "step": 6284 }, { "epoch": 1.9347491535857186, "grad_norm": 10.1875, "learning_rate": 1.3102894104259433e-06, "loss": 1.4308114051818848, "step": 6286 }, { "epoch": 1.9353647276084949, "grad_norm": 42.75, "learning_rate": 1.3095614235408575e-06, "loss": 1.4480242729187012, "step": 6288 }, { "epoch": 1.935980301631271, "grad_norm": 22.125, "learning_rate": 1.3088336532830524e-06, "loss": 1.646793007850647, "step": 6290 }, { "epoch": 1.9365958756540476, "grad_norm": 8.1875, "learning_rate": 1.3081060999739844e-06, "loss": 1.1365125179290771, "step": 6292 }, { "epoch": 1.9372114496768238, "grad_norm": 6.375, "learning_rate": 1.3073787639350156e-06, "loss": 0.9681548476219177, "step": 6294 }, { "epoch": 1.9378270236996, "grad_norm": 26.0, "learning_rate": 1.3066516454874107e-06, "loss": 1.3221466541290283, "step": 6296 }, { "epoch": 1.9384425977223763, "grad_norm": 7.3125, "learning_rate": 1.305924744952338e-06, "loss": 1.2677980661392212, "step": 6298 }, { "epoch": 1.9390581717451525, "grad_norm": 25.625, "learning_rate": 1.3051980626508717e-06, "loss": 1.460378646850586, "step": 6300 }, { "epoch": 1.9396737457679287, "grad_norm": 104.5, "learning_rate": 1.304471598903988e-06, "loss": 1.6934248208999634, "step": 6302 }, { "epoch": 1.940289319790705, "grad_norm": 12.4375, "learning_rate": 1.3037453540325651e-06, "loss": 1.4465457201004028, "step": 6304 }, { "epoch": 1.9409048938134812, "grad_norm": 12.6875, "learning_rate": 1.3030193283573883e-06, "loss": 1.2186150550842285, "step": 6306 }, { "epoch": 1.9415204678362574, "grad_norm": 11.0, "learning_rate": 1.3022935221991428e-06, "loss": 1.6539312601089478, "step": 6308 }, { "epoch": 1.9421360418590337, "grad_norm": 12.0, "learning_rate": 1.3015679358784183e-06, "loss": 1.5259175300598145, "step": 6310 }, { "epoch": 1.94275161588181, "grad_norm": 19.0, "learning_rate": 1.300842569715707e-06, "loss": 1.3606197834014893, "step": 6312 }, { "epoch": 1.9433671899045861, "grad_norm": 30.375, "learning_rate": 1.3001174240314043e-06, "loss": 1.5610425472259521, "step": 6314 }, { "epoch": 1.9439827639273624, "grad_norm": 10.8125, "learning_rate": 1.2993924991458072e-06, "loss": 1.2968568801879883, "step": 6316 }, { "epoch": 1.9445983379501386, "grad_norm": 13.875, "learning_rate": 1.2986677953791168e-06, "loss": 1.3084802627563477, "step": 6318 }, { "epoch": 1.9452139119729148, "grad_norm": 47.25, "learning_rate": 1.297943313051435e-06, "loss": 1.3081390857696533, "step": 6320 }, { "epoch": 1.945829485995691, "grad_norm": 9.9375, "learning_rate": 1.2972190524827661e-06, "loss": 1.126495122909546, "step": 6322 }, { "epoch": 1.9464450600184673, "grad_norm": 11.875, "learning_rate": 1.296495013993018e-06, "loss": 1.385104775428772, "step": 6324 }, { "epoch": 1.9470606340412435, "grad_norm": 5.0, "learning_rate": 1.2957711979019981e-06, "loss": 1.249680757522583, "step": 6326 }, { "epoch": 1.9476762080640198, "grad_norm": 15.0625, "learning_rate": 1.2950476045294178e-06, "loss": 1.3745570182800293, "step": 6328 }, { "epoch": 1.948291782086796, "grad_norm": 21.875, "learning_rate": 1.294324234194888e-06, "loss": 1.6673004627227783, "step": 6330 }, { "epoch": 1.9489073561095722, "grad_norm": 22.125, "learning_rate": 1.2936010872179227e-06, "loss": 0.8813947439193726, "step": 6332 }, { "epoch": 1.9495229301323485, "grad_norm": 22.125, "learning_rate": 1.2928781639179377e-06, "loss": 1.6500790119171143, "step": 6334 }, { "epoch": 1.9501385041551247, "grad_norm": 5.9375, "learning_rate": 1.292155464614247e-06, "loss": 1.0126196146011353, "step": 6336 }, { "epoch": 1.950754078177901, "grad_norm": 14.375, "learning_rate": 1.29143298962607e-06, "loss": 1.327214002609253, "step": 6338 }, { "epoch": 1.9513696522006772, "grad_norm": 68.5, "learning_rate": 1.290710739272523e-06, "loss": 1.2860033512115479, "step": 6340 }, { "epoch": 1.9519852262234534, "grad_norm": 16.125, "learning_rate": 1.2899887138726256e-06, "loss": 1.706552267074585, "step": 6342 }, { "epoch": 1.9526008002462296, "grad_norm": 19.125, "learning_rate": 1.2892669137452975e-06, "loss": 1.4060251712799072, "step": 6344 }, { "epoch": 1.9532163742690059, "grad_norm": 11.9375, "learning_rate": 1.2885453392093578e-06, "loss": 1.5031074285507202, "step": 6346 }, { "epoch": 1.953831948291782, "grad_norm": 12.125, "learning_rate": 1.2878239905835275e-06, "loss": 1.1290861368179321, "step": 6348 }, { "epoch": 1.9544475223145583, "grad_norm": 6.25, "learning_rate": 1.287102868186427e-06, "loss": 1.2029154300689697, "step": 6350 }, { "epoch": 1.9550630963373345, "grad_norm": 15.875, "learning_rate": 1.2863819723365765e-06, "loss": 1.4671310186386108, "step": 6352 }, { "epoch": 1.9556786703601108, "grad_norm": 11.0, "learning_rate": 1.2856613033523968e-06, "loss": 1.035024642944336, "step": 6354 }, { "epoch": 1.956294244382887, "grad_norm": 30.25, "learning_rate": 1.284940861552208e-06, "loss": 1.1749812364578247, "step": 6356 }, { "epoch": 1.9569098184056632, "grad_norm": 15.875, "learning_rate": 1.2842206472542308e-06, "loss": 1.2916359901428223, "step": 6358 }, { "epoch": 1.9575253924284395, "grad_norm": 12.125, "learning_rate": 1.2835006607765837e-06, "loss": 1.0509839057922363, "step": 6360 }, { "epoch": 1.9581409664512157, "grad_norm": 17.0, "learning_rate": 1.2827809024372858e-06, "loss": 1.5406067371368408, "step": 6362 }, { "epoch": 1.958756540473992, "grad_norm": 20.0, "learning_rate": 1.282061372554256e-06, "loss": 1.4444975852966309, "step": 6364 }, { "epoch": 1.9593721144967682, "grad_norm": 30.375, "learning_rate": 1.2813420714453106e-06, "loss": 1.199216604232788, "step": 6366 }, { "epoch": 1.9599876885195444, "grad_norm": 3.640625, "learning_rate": 1.2806229994281656e-06, "loss": 1.1930866241455078, "step": 6368 }, { "epoch": 1.9606032625423206, "grad_norm": 20.0, "learning_rate": 1.2799041568204368e-06, "loss": 1.1290255784988403, "step": 6370 }, { "epoch": 1.9612188365650969, "grad_norm": 16.125, "learning_rate": 1.2791855439396369e-06, "loss": 1.2265934944152832, "step": 6372 }, { "epoch": 1.9618344105878731, "grad_norm": 17.0, "learning_rate": 1.2784671611031787e-06, "loss": 1.585903525352478, "step": 6374 }, { "epoch": 1.9624499846106493, "grad_norm": 11.25, "learning_rate": 1.2777490086283728e-06, "loss": 1.3189527988433838, "step": 6376 }, { "epoch": 1.9630655586334256, "grad_norm": 7.34375, "learning_rate": 1.2770310868324266e-06, "loss": 1.0683166980743408, "step": 6378 }, { "epoch": 1.9636811326562018, "grad_norm": 91.0, "learning_rate": 1.2763133960324486e-06, "loss": 1.0657436847686768, "step": 6380 }, { "epoch": 1.964296706678978, "grad_norm": 14.0, "learning_rate": 1.2755959365454433e-06, "loss": 1.1493613719940186, "step": 6382 }, { "epoch": 1.9649122807017543, "grad_norm": 17.75, "learning_rate": 1.274878708688313e-06, "loss": 1.5804072618484497, "step": 6384 }, { "epoch": 1.9655278547245305, "grad_norm": 15.75, "learning_rate": 1.2741617127778581e-06, "loss": 1.352163553237915, "step": 6386 }, { "epoch": 1.9661434287473067, "grad_norm": 19.125, "learning_rate": 1.2734449491307764e-06, "loss": 1.1006097793579102, "step": 6388 }, { "epoch": 1.966759002770083, "grad_norm": 20.375, "learning_rate": 1.2727284180636641e-06, "loss": 1.8996819257736206, "step": 6390 }, { "epoch": 1.9673745767928592, "grad_norm": 25.5, "learning_rate": 1.2720121198930123e-06, "loss": 1.290725588798523, "step": 6392 }, { "epoch": 1.9679901508156354, "grad_norm": 15.4375, "learning_rate": 1.271296054935212e-06, "loss": 1.235483169555664, "step": 6394 }, { "epoch": 1.9686057248384117, "grad_norm": 14.625, "learning_rate": 1.2705802235065495e-06, "loss": 1.2222580909729004, "step": 6396 }, { "epoch": 1.969221298861188, "grad_norm": 12.75, "learning_rate": 1.2698646259232077e-06, "loss": 1.5252647399902344, "step": 6398 }, { "epoch": 1.9698368728839641, "grad_norm": 10.9375, "learning_rate": 1.2691492625012682e-06, "loss": 1.555239200592041, "step": 6400 }, { "epoch": 1.9704524469067404, "grad_norm": 36.25, "learning_rate": 1.2684341335567074e-06, "loss": 1.5563361644744873, "step": 6402 }, { "epoch": 1.9710680209295168, "grad_norm": 12.1875, "learning_rate": 1.2677192394053971e-06, "loss": 1.6911733150482178, "step": 6404 }, { "epoch": 1.971683594952293, "grad_norm": 10.8125, "learning_rate": 1.2670045803631093e-06, "loss": 1.5041658878326416, "step": 6406 }, { "epoch": 1.9722991689750693, "grad_norm": 8.8125, "learning_rate": 1.2662901567455077e-06, "loss": 0.9709604978561401, "step": 6408 }, { "epoch": 1.9729147429978455, "grad_norm": 11.0625, "learning_rate": 1.2655759688681548e-06, "loss": 1.1826274394989014, "step": 6410 }, { "epoch": 1.9735303170206218, "grad_norm": 21.625, "learning_rate": 1.264862017046509e-06, "loss": 1.141406774520874, "step": 6412 }, { "epoch": 1.974145891043398, "grad_norm": 132.0, "learning_rate": 1.2641483015959223e-06, "loss": 1.436084270477295, "step": 6414 }, { "epoch": 1.9747614650661742, "grad_norm": 15.625, "learning_rate": 1.2634348228316442e-06, "loss": 1.4876971244812012, "step": 6416 }, { "epoch": 1.9753770390889505, "grad_norm": 20.875, "learning_rate": 1.2627215810688196e-06, "loss": 1.5110372304916382, "step": 6418 }, { "epoch": 1.9759926131117267, "grad_norm": 12.625, "learning_rate": 1.262008576622487e-06, "loss": 1.2345492839813232, "step": 6420 }, { "epoch": 1.976608187134503, "grad_norm": 33.5, "learning_rate": 1.261295809807582e-06, "loss": 1.3591794967651367, "step": 6422 }, { "epoch": 1.9772237611572792, "grad_norm": 13.75, "learning_rate": 1.2605832809389347e-06, "loss": 1.5868587493896484, "step": 6424 }, { "epoch": 1.9778393351800554, "grad_norm": 10.0625, "learning_rate": 1.25987099033127e-06, "loss": 1.4743338823318481, "step": 6426 }, { "epoch": 1.9784549092028316, "grad_norm": 12.125, "learning_rate": 1.2591589382992066e-06, "loss": 1.4161126613616943, "step": 6428 }, { "epoch": 1.9790704832256079, "grad_norm": 17.875, "learning_rate": 1.2584471251572596e-06, "loss": 2.168795108795166, "step": 6430 }, { "epoch": 1.979686057248384, "grad_norm": 19.25, "learning_rate": 1.2577355512198374e-06, "loss": 1.795354962348938, "step": 6432 }, { "epoch": 1.9803016312711603, "grad_norm": 12.5625, "learning_rate": 1.2570242168012427e-06, "loss": 1.554661750793457, "step": 6434 }, { "epoch": 1.9809172052939366, "grad_norm": 13.6875, "learning_rate": 1.256313122215673e-06, "loss": 1.3290802240371704, "step": 6436 }, { "epoch": 1.9815327793167128, "grad_norm": 14.4375, "learning_rate": 1.2556022677772202e-06, "loss": 1.7011728286743164, "step": 6438 }, { "epoch": 1.982148353339489, "grad_norm": 14.875, "learning_rate": 1.2548916537998682e-06, "loss": 1.0243396759033203, "step": 6440 }, { "epoch": 1.9827639273622655, "grad_norm": 17.375, "learning_rate": 1.2541812805974969e-06, "loss": 0.9982007741928101, "step": 6442 }, { "epoch": 1.9833795013850417, "grad_norm": 43.25, "learning_rate": 1.2534711484838791e-06, "loss": 1.570636510848999, "step": 6444 }, { "epoch": 1.983995075407818, "grad_norm": 38.75, "learning_rate": 1.2527612577726805e-06, "loss": 0.9931204915046692, "step": 6446 }, { "epoch": 1.9846106494305942, "grad_norm": 11.5, "learning_rate": 1.2520516087774603e-06, "loss": 1.2048075199127197, "step": 6448 }, { "epoch": 1.9852262234533704, "grad_norm": 11.375, "learning_rate": 1.251342201811672e-06, "loss": 1.1237223148345947, "step": 6450 }, { "epoch": 1.9858417974761466, "grad_norm": 10.5625, "learning_rate": 1.2506330371886616e-06, "loss": 1.353630781173706, "step": 6452 }, { "epoch": 1.9864573714989229, "grad_norm": 31.5, "learning_rate": 1.2499241152216673e-06, "loss": 0.7195751070976257, "step": 6454 }, { "epoch": 1.987072945521699, "grad_norm": 6.25, "learning_rate": 1.2492154362238208e-06, "loss": 1.288529872894287, "step": 6456 }, { "epoch": 1.9876885195444753, "grad_norm": 9.4375, "learning_rate": 1.2485070005081473e-06, "loss": 1.4264882802963257, "step": 6458 }, { "epoch": 1.9883040935672516, "grad_norm": 17.25, "learning_rate": 1.2477988083875625e-06, "loss": 1.44509756565094, "step": 6460 }, { "epoch": 1.9889196675900278, "grad_norm": 19.0, "learning_rate": 1.2470908601748759e-06, "loss": 1.390662670135498, "step": 6462 }, { "epoch": 1.989535241612804, "grad_norm": 44.75, "learning_rate": 1.24638315618279e-06, "loss": 1.1538774967193604, "step": 6464 }, { "epoch": 1.9901508156355803, "grad_norm": 14.625, "learning_rate": 1.2456756967238967e-06, "loss": 1.4914668798446655, "step": 6466 }, { "epoch": 1.9907663896583565, "grad_norm": 7.46875, "learning_rate": 1.2449684821106837e-06, "loss": 1.0413475036621094, "step": 6468 }, { "epoch": 1.9913819636811327, "grad_norm": 15.125, "learning_rate": 1.2442615126555275e-06, "loss": 1.5457627773284912, "step": 6470 }, { "epoch": 1.991997537703909, "grad_norm": 17.375, "learning_rate": 1.2435547886706963e-06, "loss": 1.5767290592193604, "step": 6472 }, { "epoch": 1.9926131117266852, "grad_norm": 21.75, "learning_rate": 1.242848310468353e-06, "loss": 1.1986875534057617, "step": 6474 }, { "epoch": 1.9932286857494614, "grad_norm": 10.3125, "learning_rate": 1.2421420783605481e-06, "loss": 1.5655173063278198, "step": 6476 }, { "epoch": 1.9938442597722377, "grad_norm": 11.625, "learning_rate": 1.2414360926592258e-06, "loss": 1.719822883605957, "step": 6478 }, { "epoch": 1.994459833795014, "grad_norm": 23.625, "learning_rate": 1.2407303536762217e-06, "loss": 1.5850684642791748, "step": 6480 }, { "epoch": 1.9950754078177901, "grad_norm": 248.0, "learning_rate": 1.2400248617232597e-06, "loss": 0.8525184392929077, "step": 6482 }, { "epoch": 1.9956909818405664, "grad_norm": 6.15625, "learning_rate": 1.2393196171119575e-06, "loss": 0.9294012188911438, "step": 6484 }, { "epoch": 1.9963065558633426, "grad_norm": 21.875, "learning_rate": 1.2386146201538224e-06, "loss": 1.3426337242126465, "step": 6486 }, { "epoch": 1.9969221298861188, "grad_norm": 11.25, "learning_rate": 1.237909871160252e-06, "loss": 0.5351234078407288, "step": 6488 }, { "epoch": 1.997537703908895, "grad_norm": 29.875, "learning_rate": 1.237205370442535e-06, "loss": 1.0683671236038208, "step": 6490 }, { "epoch": 1.9981532779316713, "grad_norm": 15.8125, "learning_rate": 1.23650111831185e-06, "loss": 1.2516847848892212, "step": 6492 }, { "epoch": 1.9987688519544475, "grad_norm": 14.125, "learning_rate": 1.2357971150792667e-06, "loss": 1.3247123956680298, "step": 6494 }, { "epoch": 1.9993844259772238, "grad_norm": 15.5625, "learning_rate": 1.2350933610557434e-06, "loss": 1.4294352531433105, "step": 6496 }, { "epoch": 2.0, "grad_norm": 15.875, "learning_rate": 1.2343898565521283e-06, "loss": 1.3242424726486206, "step": 6498 }, { "epoch": 2.0006155740227762, "grad_norm": 3.359375, "learning_rate": 1.233686601879162e-06, "loss": 1.1256086826324463, "step": 6500 }, { "epoch": 2.0012311480455525, "grad_norm": 11.25, "learning_rate": 1.2329835973474713e-06, "loss": 1.5124751329421997, "step": 6502 }, { "epoch": 2.0018467220683287, "grad_norm": 127.5, "learning_rate": 1.2322808432675746e-06, "loss": 1.2730624675750732, "step": 6504 }, { "epoch": 2.002462296091105, "grad_norm": 5.5, "learning_rate": 1.2315783399498802e-06, "loss": 1.5110902786254883, "step": 6506 }, { "epoch": 2.003077870113881, "grad_norm": 10.75, "learning_rate": 1.2308760877046833e-06, "loss": 1.7414963245391846, "step": 6508 }, { "epoch": 2.0036934441366574, "grad_norm": 34.75, "learning_rate": 1.2301740868421696e-06, "loss": 1.1642667055130005, "step": 6510 }, { "epoch": 2.0043090181594336, "grad_norm": 26.125, "learning_rate": 1.2294723376724145e-06, "loss": 0.8890253901481628, "step": 6512 }, { "epoch": 2.00492459218221, "grad_norm": 31.75, "learning_rate": 1.2287708405053806e-06, "loss": 1.2963333129882812, "step": 6514 }, { "epoch": 2.005540166204986, "grad_norm": 42.5, "learning_rate": 1.2280695956509205e-06, "loss": 1.245513916015625, "step": 6516 }, { "epoch": 2.0061557402277623, "grad_norm": 8.3125, "learning_rate": 1.2273686034187746e-06, "loss": 1.2022202014923096, "step": 6518 }, { "epoch": 2.0067713142505386, "grad_norm": 5.625, "learning_rate": 1.2266678641185725e-06, "loss": 1.3870012760162354, "step": 6520 }, { "epoch": 2.007386888273315, "grad_norm": 19.875, "learning_rate": 1.2259673780598306e-06, "loss": 1.3389239311218262, "step": 6522 }, { "epoch": 2.008002462296091, "grad_norm": 7.0, "learning_rate": 1.2252671455519553e-06, "loss": 1.1467727422714233, "step": 6524 }, { "epoch": 2.0086180363188673, "grad_norm": 6.71875, "learning_rate": 1.2245671669042399e-06, "loss": 1.2564332485198975, "step": 6526 }, { "epoch": 2.0092336103416435, "grad_norm": 6.15625, "learning_rate": 1.2238674424258652e-06, "loss": 1.5031843185424805, "step": 6528 }, { "epoch": 2.0098491843644197, "grad_norm": 9.5625, "learning_rate": 1.2231679724259005e-06, "loss": 1.339223861694336, "step": 6530 }, { "epoch": 2.010464758387196, "grad_norm": 8.4375, "learning_rate": 1.2224687572133034e-06, "loss": 1.743408203125, "step": 6532 }, { "epoch": 2.011080332409972, "grad_norm": 12.625, "learning_rate": 1.2217697970969164e-06, "loss": 1.2714446783065796, "step": 6534 }, { "epoch": 2.0116959064327484, "grad_norm": 27.75, "learning_rate": 1.2210710923854726e-06, "loss": 1.3177649974822998, "step": 6536 }, { "epoch": 2.0123114804555247, "grad_norm": 4.40625, "learning_rate": 1.2203726433875904e-06, "loss": 1.0545878410339355, "step": 6538 }, { "epoch": 2.012927054478301, "grad_norm": 10.4375, "learning_rate": 1.219674450411774e-06, "loss": 0.9841394424438477, "step": 6540 }, { "epoch": 2.013542628501077, "grad_norm": 26.5, "learning_rate": 1.2189765137664182e-06, "loss": 1.654037356376648, "step": 6542 }, { "epoch": 2.0141582025238534, "grad_norm": 45.25, "learning_rate": 1.2182788337598009e-06, "loss": 1.1750099658966064, "step": 6544 }, { "epoch": 2.0147737765466296, "grad_norm": 14.25, "learning_rate": 1.2175814107000885e-06, "loss": 1.3668975830078125, "step": 6546 }, { "epoch": 2.015389350569406, "grad_norm": 15.1875, "learning_rate": 1.2168842448953343e-06, "loss": 1.1165693998336792, "step": 6548 }, { "epoch": 2.016004924592182, "grad_norm": 15.0, "learning_rate": 1.216187336653476e-06, "loss": 1.4498151540756226, "step": 6550 }, { "epoch": 2.0166204986149583, "grad_norm": 52.5, "learning_rate": 1.2154906862823402e-06, "loss": 1.5345754623413086, "step": 6552 }, { "epoch": 2.0172360726377345, "grad_norm": 12.75, "learning_rate": 1.2147942940896367e-06, "loss": 1.228990077972412, "step": 6554 }, { "epoch": 2.0178516466605108, "grad_norm": 9.75, "learning_rate": 1.2140981603829635e-06, "loss": 1.6220732927322388, "step": 6556 }, { "epoch": 2.018467220683287, "grad_norm": 10.75, "learning_rate": 1.2134022854698037e-06, "loss": 1.5125510692596436, "step": 6558 }, { "epoch": 2.019082794706063, "grad_norm": 13.875, "learning_rate": 1.2127066696575252e-06, "loss": 1.2239012718200684, "step": 6560 }, { "epoch": 2.0196983687288395, "grad_norm": 21.375, "learning_rate": 1.212011313253384e-06, "loss": 1.0492621660232544, "step": 6562 }, { "epoch": 2.0203139427516157, "grad_norm": 23.75, "learning_rate": 1.211316216564519e-06, "loss": 1.2073233127593994, "step": 6564 }, { "epoch": 2.020929516774392, "grad_norm": 8.375, "learning_rate": 1.2106213798979539e-06, "loss": 1.0625733137130737, "step": 6566 }, { "epoch": 2.021545090797168, "grad_norm": 24.125, "learning_rate": 1.209926803560601e-06, "loss": 1.758016586303711, "step": 6568 }, { "epoch": 2.0221606648199444, "grad_norm": 29.5, "learning_rate": 1.2092324878592537e-06, "loss": 1.503721833229065, "step": 6570 }, { "epoch": 2.0227762388427206, "grad_norm": 16.625, "learning_rate": 1.2085384331005931e-06, "loss": 1.3319995403289795, "step": 6572 }, { "epoch": 2.023391812865497, "grad_norm": 14.75, "learning_rate": 1.2078446395911838e-06, "loss": 1.56700599193573, "step": 6574 }, { "epoch": 2.0240073868882735, "grad_norm": 16.25, "learning_rate": 1.207151107637475e-06, "loss": 1.5571670532226562, "step": 6576 }, { "epoch": 2.0246229609110498, "grad_norm": 11.625, "learning_rate": 1.2064578375458004e-06, "loss": 0.8407614231109619, "step": 6578 }, { "epoch": 2.025238534933826, "grad_norm": 14.3125, "learning_rate": 1.2057648296223788e-06, "loss": 1.2025877237319946, "step": 6580 }, { "epoch": 2.0258541089566022, "grad_norm": 8.3125, "learning_rate": 1.2050720841733117e-06, "loss": 1.1296197175979614, "step": 6582 }, { "epoch": 2.0264696829793785, "grad_norm": 10.3125, "learning_rate": 1.204379601504586e-06, "loss": 1.2144582271575928, "step": 6584 }, { "epoch": 2.0270852570021547, "grad_norm": 9.6875, "learning_rate": 1.2036873819220725e-06, "loss": 1.43599534034729, "step": 6586 }, { "epoch": 2.027700831024931, "grad_norm": 19.375, "learning_rate": 1.2029954257315252e-06, "loss": 1.6993939876556396, "step": 6588 }, { "epoch": 2.028316405047707, "grad_norm": 9.75, "learning_rate": 1.2023037332385814e-06, "loss": 1.2860901355743408, "step": 6590 }, { "epoch": 2.0289319790704834, "grad_norm": 8.625, "learning_rate": 1.2016123047487627e-06, "loss": 1.5420019626617432, "step": 6592 }, { "epoch": 2.0295475530932596, "grad_norm": 4.96875, "learning_rate": 1.2009211405674746e-06, "loss": 1.373903512954712, "step": 6594 }, { "epoch": 2.030163127116036, "grad_norm": 16.25, "learning_rate": 1.2002302410000039e-06, "loss": 1.3337182998657227, "step": 6596 }, { "epoch": 2.030778701138812, "grad_norm": 7.96875, "learning_rate": 1.1995396063515227e-06, "loss": 1.0187783241271973, "step": 6598 }, { "epoch": 2.0313942751615883, "grad_norm": 10.8125, "learning_rate": 1.1988492369270847e-06, "loss": 1.351269245147705, "step": 6600 }, { "epoch": 2.0320098491843646, "grad_norm": 13.0, "learning_rate": 1.1981591330316262e-06, "loss": 1.346450686454773, "step": 6602 }, { "epoch": 2.032625423207141, "grad_norm": 15.9375, "learning_rate": 1.1974692949699687e-06, "loss": 1.3943252563476562, "step": 6604 }, { "epoch": 2.033240997229917, "grad_norm": 4.3125, "learning_rate": 1.196779723046813e-06, "loss": 1.275787353515625, "step": 6606 }, { "epoch": 2.0338565712526933, "grad_norm": 4.65625, "learning_rate": 1.196090417566744e-06, "loss": 1.4417996406555176, "step": 6608 }, { "epoch": 2.0344721452754695, "grad_norm": 23.75, "learning_rate": 1.1954013788342285e-06, "loss": 1.6339545249938965, "step": 6610 }, { "epoch": 2.0350877192982457, "grad_norm": 13.0, "learning_rate": 1.1947126071536165e-06, "loss": 1.7662523984909058, "step": 6612 }, { "epoch": 2.035703293321022, "grad_norm": 5.71875, "learning_rate": 1.194024102829139e-06, "loss": 1.236097812652588, "step": 6614 }, { "epoch": 2.036318867343798, "grad_norm": 56.25, "learning_rate": 1.1933358661649086e-06, "loss": 1.5584526062011719, "step": 6616 }, { "epoch": 2.0369344413665744, "grad_norm": 9.0, "learning_rate": 1.1926478974649205e-06, "loss": 1.2706172466278076, "step": 6618 }, { "epoch": 2.0375500153893507, "grad_norm": 8.0625, "learning_rate": 1.191960197033052e-06, "loss": 1.143151879310608, "step": 6620 }, { "epoch": 2.038165589412127, "grad_norm": 22.75, "learning_rate": 1.1912727651730598e-06, "loss": 1.0128357410430908, "step": 6622 }, { "epoch": 2.038781163434903, "grad_norm": 20.75, "learning_rate": 1.1905856021885842e-06, "loss": 1.2151031494140625, "step": 6624 }, { "epoch": 2.0393967374576794, "grad_norm": 19.0, "learning_rate": 1.189898708383146e-06, "loss": 1.426274061203003, "step": 6626 }, { "epoch": 2.0400123114804556, "grad_norm": 29.0, "learning_rate": 1.189212084060146e-06, "loss": 0.9838099479675293, "step": 6628 }, { "epoch": 2.040627885503232, "grad_norm": 58.0, "learning_rate": 1.1885257295228685e-06, "loss": 1.4184445142745972, "step": 6630 }, { "epoch": 2.041243459526008, "grad_norm": 15.125, "learning_rate": 1.1878396450744758e-06, "loss": 1.3893468379974365, "step": 6632 }, { "epoch": 2.0418590335487843, "grad_norm": 12.0, "learning_rate": 1.1871538310180128e-06, "loss": 0.8931977152824402, "step": 6634 }, { "epoch": 2.0424746075715605, "grad_norm": 40.25, "learning_rate": 1.1864682876564044e-06, "loss": 1.477832555770874, "step": 6636 }, { "epoch": 2.0430901815943368, "grad_norm": 16.5, "learning_rate": 1.1857830152924553e-06, "loss": 1.6426715850830078, "step": 6638 }, { "epoch": 2.043705755617113, "grad_norm": 14.5625, "learning_rate": 1.1850980142288515e-06, "loss": 1.2351162433624268, "step": 6640 }, { "epoch": 2.044321329639889, "grad_norm": 11.125, "learning_rate": 1.184413284768159e-06, "loss": 1.2899537086486816, "step": 6642 }, { "epoch": 2.0449369036626655, "grad_norm": 9.3125, "learning_rate": 1.183728827212823e-06, "loss": 1.3072597980499268, "step": 6644 }, { "epoch": 2.0455524776854417, "grad_norm": 25.0, "learning_rate": 1.1830446418651694e-06, "loss": 1.6979049444198608, "step": 6646 }, { "epoch": 2.046168051708218, "grad_norm": 10.5625, "learning_rate": 1.1823607290274045e-06, "loss": 1.087388515472412, "step": 6648 }, { "epoch": 2.046783625730994, "grad_norm": 51.5, "learning_rate": 1.181677089001612e-06, "loss": 1.2746059894561768, "step": 6650 }, { "epoch": 2.0473991997537704, "grad_norm": 9.625, "learning_rate": 1.1809937220897568e-06, "loss": 1.4777030944824219, "step": 6652 }, { "epoch": 2.0480147737765466, "grad_norm": 10.9375, "learning_rate": 1.1803106285936834e-06, "loss": 1.0702719688415527, "step": 6654 }, { "epoch": 2.048630347799323, "grad_norm": 12.25, "learning_rate": 1.1796278088151152e-06, "loss": 1.3066442012786865, "step": 6656 }, { "epoch": 2.049245921822099, "grad_norm": 42.0, "learning_rate": 1.1789452630556535e-06, "loss": 1.4079071283340454, "step": 6658 }, { "epoch": 2.0498614958448753, "grad_norm": 6.53125, "learning_rate": 1.17826299161678e-06, "loss": 1.1593713760375977, "step": 6660 }, { "epoch": 2.0504770698676515, "grad_norm": 12.0, "learning_rate": 1.1775809947998552e-06, "loss": 1.1441447734832764, "step": 6662 }, { "epoch": 2.051092643890428, "grad_norm": 27.5, "learning_rate": 1.1768992729061173e-06, "loss": 1.1255943775177002, "step": 6664 }, { "epoch": 2.051708217913204, "grad_norm": 4.28125, "learning_rate": 1.1762178262366838e-06, "loss": 1.6918699741363525, "step": 6666 }, { "epoch": 2.0523237919359802, "grad_norm": 7.0625, "learning_rate": 1.175536655092551e-06, "loss": 1.2675297260284424, "step": 6668 }, { "epoch": 2.0529393659587565, "grad_norm": 24.25, "learning_rate": 1.1748557597745918e-06, "loss": 1.716539978981018, "step": 6670 }, { "epoch": 2.0535549399815327, "grad_norm": 14.1875, "learning_rate": 1.1741751405835593e-06, "loss": 0.9748501777648926, "step": 6672 }, { "epoch": 2.054170514004309, "grad_norm": 11.125, "learning_rate": 1.1734947978200835e-06, "loss": 1.3657870292663574, "step": 6674 }, { "epoch": 2.054786088027085, "grad_norm": 19.875, "learning_rate": 1.1728147317846733e-06, "loss": 1.5253214836120605, "step": 6676 }, { "epoch": 2.0554016620498614, "grad_norm": 12.5625, "learning_rate": 1.1721349427777133e-06, "loss": 1.2408127784729004, "step": 6678 }, { "epoch": 2.0560172360726376, "grad_norm": 9.8125, "learning_rate": 1.171455431099468e-06, "loss": 1.2663090229034424, "step": 6680 }, { "epoch": 2.056632810095414, "grad_norm": 9.125, "learning_rate": 1.1707761970500787e-06, "loss": 0.8668129444122314, "step": 6682 }, { "epoch": 2.05724838411819, "grad_norm": 10.875, "learning_rate": 1.1700972409295631e-06, "loss": 1.4875694513320923, "step": 6684 }, { "epoch": 2.0578639581409663, "grad_norm": 22.125, "learning_rate": 1.1694185630378171e-06, "loss": 1.2524561882019043, "step": 6686 }, { "epoch": 2.0584795321637426, "grad_norm": 17.0, "learning_rate": 1.1687401636746143e-06, "loss": 1.4323711395263672, "step": 6688 }, { "epoch": 2.059095106186519, "grad_norm": 36.25, "learning_rate": 1.1680620431396033e-06, "loss": 1.189788818359375, "step": 6690 }, { "epoch": 2.059710680209295, "grad_norm": 12.5, "learning_rate": 1.1673842017323112e-06, "loss": 1.1640629768371582, "step": 6692 }, { "epoch": 2.0603262542320713, "grad_norm": 24.0, "learning_rate": 1.1667066397521418e-06, "loss": 1.3109760284423828, "step": 6694 }, { "epoch": 2.0609418282548475, "grad_norm": 7.625, "learning_rate": 1.1660293574983739e-06, "loss": 1.2151691913604736, "step": 6696 }, { "epoch": 2.0615574022776237, "grad_norm": 7.125, "learning_rate": 1.165352355270165e-06, "loss": 1.3004823923110962, "step": 6698 }, { "epoch": 2.0621729763004, "grad_norm": 12.5625, "learning_rate": 1.1646756333665472e-06, "loss": 1.4980103969573975, "step": 6700 }, { "epoch": 2.062788550323176, "grad_norm": 5.25, "learning_rate": 1.1639991920864292e-06, "loss": 1.001665711402893, "step": 6702 }, { "epoch": 2.0634041243459524, "grad_norm": 22.625, "learning_rate": 1.1633230317285967e-06, "loss": 1.5311155319213867, "step": 6704 }, { "epoch": 2.0640196983687287, "grad_norm": 12.125, "learning_rate": 1.1626471525917093e-06, "loss": 1.5896987915039062, "step": 6706 }, { "epoch": 2.064635272391505, "grad_norm": 13.5625, "learning_rate": 1.1619715549743045e-06, "loss": 1.5665377378463745, "step": 6708 }, { "epoch": 2.065250846414281, "grad_norm": 27.25, "learning_rate": 1.1612962391747944e-06, "loss": 1.6170427799224854, "step": 6710 }, { "epoch": 2.0658664204370574, "grad_norm": 40.75, "learning_rate": 1.1606212054914662e-06, "loss": 1.57864248752594, "step": 6712 }, { "epoch": 2.0664819944598336, "grad_norm": 59.0, "learning_rate": 1.1599464542224836e-06, "loss": 1.138413906097412, "step": 6714 }, { "epoch": 2.06709756848261, "grad_norm": 13.9375, "learning_rate": 1.1592719856658846e-06, "loss": 1.4642479419708252, "step": 6716 }, { "epoch": 2.067713142505386, "grad_norm": 12.875, "learning_rate": 1.1585978001195838e-06, "loss": 1.3136050701141357, "step": 6718 }, { "epoch": 2.0683287165281623, "grad_norm": 3.21875, "learning_rate": 1.1579238978813686e-06, "loss": 1.2860009670257568, "step": 6720 }, { "epoch": 2.0689442905509385, "grad_norm": 15.0, "learning_rate": 1.1572502792489018e-06, "loss": 1.012883186340332, "step": 6722 }, { "epoch": 2.0695598645737148, "grad_norm": 5.0, "learning_rate": 1.1565769445197234e-06, "loss": 1.2962499856948853, "step": 6724 }, { "epoch": 2.0701754385964914, "grad_norm": 33.75, "learning_rate": 1.1559038939912448e-06, "loss": 1.3131591081619263, "step": 6726 }, { "epoch": 2.0707910126192677, "grad_norm": 29.875, "learning_rate": 1.1552311279607536e-06, "loss": 1.5089943408966064, "step": 6728 }, { "epoch": 2.071406586642044, "grad_norm": 23.5, "learning_rate": 1.1545586467254113e-06, "loss": 1.2558484077453613, "step": 6730 }, { "epoch": 2.07202216066482, "grad_norm": 9.6875, "learning_rate": 1.1538864505822537e-06, "loss": 1.2030107975006104, "step": 6732 }, { "epoch": 2.0726377346875964, "grad_norm": 4.15625, "learning_rate": 1.1532145398281904e-06, "loss": 1.2497684955596924, "step": 6734 }, { "epoch": 2.0732533087103726, "grad_norm": 10.25, "learning_rate": 1.1525429147600054e-06, "loss": 1.3862303495407104, "step": 6736 }, { "epoch": 2.073868882733149, "grad_norm": 13.4375, "learning_rate": 1.1518715756743558e-06, "loss": 1.1934767961502075, "step": 6738 }, { "epoch": 2.074484456755925, "grad_norm": 3.6875, "learning_rate": 1.1512005228677735e-06, "loss": 1.0756824016571045, "step": 6740 }, { "epoch": 2.0751000307787013, "grad_norm": 12.375, "learning_rate": 1.1505297566366623e-06, "loss": 1.2047674655914307, "step": 6742 }, { "epoch": 2.0757156048014775, "grad_norm": 19.75, "learning_rate": 1.149859277277302e-06, "loss": 1.3884174823760986, "step": 6744 }, { "epoch": 2.0763311788242538, "grad_norm": 12.9375, "learning_rate": 1.1491890850858426e-06, "loss": 1.3445823192596436, "step": 6746 }, { "epoch": 2.07694675284703, "grad_norm": 8.25, "learning_rate": 1.1485191803583091e-06, "loss": 1.028502345085144, "step": 6748 }, { "epoch": 2.0775623268698062, "grad_norm": 12.125, "learning_rate": 1.1478495633906003e-06, "loss": 1.3428664207458496, "step": 6750 }, { "epoch": 2.0781779008925825, "grad_norm": 10.875, "learning_rate": 1.147180234478485e-06, "loss": 1.5432965755462646, "step": 6752 }, { "epoch": 2.0787934749153587, "grad_norm": 26.125, "learning_rate": 1.1465111939176077e-06, "loss": 1.7907958030700684, "step": 6754 }, { "epoch": 2.079409048938135, "grad_norm": 9.75, "learning_rate": 1.1458424420034846e-06, "loss": 1.359710931777954, "step": 6756 }, { "epoch": 2.080024622960911, "grad_norm": 5.65625, "learning_rate": 1.1451739790315026e-06, "loss": 1.5104893445968628, "step": 6758 }, { "epoch": 2.0806401969836874, "grad_norm": 29.875, "learning_rate": 1.1445058052969246e-06, "loss": 1.3971478939056396, "step": 6760 }, { "epoch": 2.0812557710064636, "grad_norm": 16.375, "learning_rate": 1.143837921094883e-06, "loss": 1.4559658765792847, "step": 6762 }, { "epoch": 2.08187134502924, "grad_norm": 13.375, "learning_rate": 1.1431703267203817e-06, "loss": 1.188551664352417, "step": 6764 }, { "epoch": 2.082486919052016, "grad_norm": 9.1875, "learning_rate": 1.1425030224682998e-06, "loss": 1.3196886777877808, "step": 6766 }, { "epoch": 2.0831024930747923, "grad_norm": 4.84375, "learning_rate": 1.1418360086333852e-06, "loss": 1.2066409587860107, "step": 6768 }, { "epoch": 2.0837180670975686, "grad_norm": 13.875, "learning_rate": 1.141169285510259e-06, "loss": 1.4319138526916504, "step": 6770 }, { "epoch": 2.084333641120345, "grad_norm": 24.75, "learning_rate": 1.1405028533934138e-06, "loss": 0.9755879044532776, "step": 6772 }, { "epoch": 2.084949215143121, "grad_norm": 3.375, "learning_rate": 1.1398367125772132e-06, "loss": 1.4275426864624023, "step": 6774 }, { "epoch": 2.0855647891658973, "grad_norm": 11.25, "learning_rate": 1.1391708633558924e-06, "loss": 1.2716853618621826, "step": 6776 }, { "epoch": 2.0861803631886735, "grad_norm": 17.25, "learning_rate": 1.1385053060235576e-06, "loss": 1.4581801891326904, "step": 6778 }, { "epoch": 2.0867959372114497, "grad_norm": 13.6875, "learning_rate": 1.1378400408741862e-06, "loss": 1.4064788818359375, "step": 6780 }, { "epoch": 2.087411511234226, "grad_norm": 27.375, "learning_rate": 1.1371750682016272e-06, "loss": 1.2097954750061035, "step": 6782 }, { "epoch": 2.088027085257002, "grad_norm": 22.375, "learning_rate": 1.1365103882995986e-06, "loss": 1.9022862911224365, "step": 6784 }, { "epoch": 2.0886426592797784, "grad_norm": 12.0625, "learning_rate": 1.1358460014616915e-06, "loss": 0.9254089593887329, "step": 6786 }, { "epoch": 2.0892582333025547, "grad_norm": 16.5, "learning_rate": 1.135181907981366e-06, "loss": 1.5083138942718506, "step": 6788 }, { "epoch": 2.089873807325331, "grad_norm": 16.625, "learning_rate": 1.1345181081519521e-06, "loss": 0.9767194986343384, "step": 6790 }, { "epoch": 2.090489381348107, "grad_norm": 12.875, "learning_rate": 1.1338546022666525e-06, "loss": 1.3762192726135254, "step": 6792 }, { "epoch": 2.0911049553708834, "grad_norm": 8.6875, "learning_rate": 1.133191390618537e-06, "loss": 1.3783683776855469, "step": 6794 }, { "epoch": 2.0917205293936596, "grad_norm": 18.0, "learning_rate": 1.1325284735005478e-06, "loss": 0.9821135997772217, "step": 6796 }, { "epoch": 2.092336103416436, "grad_norm": 20.5, "learning_rate": 1.1318658512054961e-06, "loss": 1.3283894062042236, "step": 6798 }, { "epoch": 2.092951677439212, "grad_norm": 13.4375, "learning_rate": 1.1312035240260623e-06, "loss": 0.9971583485603333, "step": 6800 }, { "epoch": 2.0935672514619883, "grad_norm": 30.375, "learning_rate": 1.1305414922547976e-06, "loss": 1.8160250186920166, "step": 6802 }, { "epoch": 2.0941828254847645, "grad_norm": 24.875, "learning_rate": 1.129879756184123e-06, "loss": 1.6659497022628784, "step": 6804 }, { "epoch": 2.0947983995075408, "grad_norm": 7.25, "learning_rate": 1.1292183161063262e-06, "loss": 0.7340489029884338, "step": 6806 }, { "epoch": 2.095413973530317, "grad_norm": 25.125, "learning_rate": 1.1285571723135673e-06, "loss": 1.9677259922027588, "step": 6808 }, { "epoch": 2.0960295475530932, "grad_norm": 21.0, "learning_rate": 1.127896325097874e-06, "loss": 1.7345890998840332, "step": 6810 }, { "epoch": 2.0966451215758695, "grad_norm": 26.5, "learning_rate": 1.1272357747511437e-06, "loss": 1.692164659500122, "step": 6812 }, { "epoch": 2.0972606955986457, "grad_norm": 4.25, "learning_rate": 1.1265755215651414e-06, "loss": 0.6040706038475037, "step": 6814 }, { "epoch": 2.097876269621422, "grad_norm": 26.75, "learning_rate": 1.125915565831502e-06, "loss": 1.1488302946090698, "step": 6816 }, { "epoch": 2.098491843644198, "grad_norm": 10.8125, "learning_rate": 1.1252559078417293e-06, "loss": 0.9155806303024292, "step": 6818 }, { "epoch": 2.0991074176669744, "grad_norm": 9.125, "learning_rate": 1.124596547887194e-06, "loss": 1.2064423561096191, "step": 6820 }, { "epoch": 2.0997229916897506, "grad_norm": 10.4375, "learning_rate": 1.123937486259137e-06, "loss": 1.3566370010375977, "step": 6822 }, { "epoch": 2.100338565712527, "grad_norm": 32.5, "learning_rate": 1.1232787232486663e-06, "loss": 1.3902403116226196, "step": 6824 }, { "epoch": 2.100954139735303, "grad_norm": 24.5, "learning_rate": 1.1226202591467575e-06, "loss": 1.4831122159957886, "step": 6826 }, { "epoch": 2.1015697137580793, "grad_norm": 18.125, "learning_rate": 1.1219620942442565e-06, "loss": 1.1349093914031982, "step": 6828 }, { "epoch": 2.1021852877808556, "grad_norm": 9.0625, "learning_rate": 1.1213042288318747e-06, "loss": 1.1530771255493164, "step": 6830 }, { "epoch": 2.102800861803632, "grad_norm": 23.875, "learning_rate": 1.1206466632001915e-06, "loss": 1.4058551788330078, "step": 6832 }, { "epoch": 2.103416435826408, "grad_norm": 3.859375, "learning_rate": 1.1199893976396548e-06, "loss": 1.1480472087860107, "step": 6834 }, { "epoch": 2.1040320098491843, "grad_norm": 12.9375, "learning_rate": 1.1193324324405795e-06, "loss": 1.0479693412780762, "step": 6836 }, { "epoch": 2.1046475838719605, "grad_norm": 17.125, "learning_rate": 1.1186757678931484e-06, "loss": 1.5369406938552856, "step": 6838 }, { "epoch": 2.1052631578947367, "grad_norm": 15.0625, "learning_rate": 1.1180194042874104e-06, "loss": 1.1449368000030518, "step": 6840 }, { "epoch": 2.105878731917513, "grad_norm": 13.125, "learning_rate": 1.117363341913282e-06, "loss": 1.3344638347625732, "step": 6842 }, { "epoch": 2.106494305940289, "grad_norm": 4.6875, "learning_rate": 1.1167075810605473e-06, "loss": 1.231259822845459, "step": 6844 }, { "epoch": 2.1071098799630654, "grad_norm": 11.6875, "learning_rate": 1.1160521220188558e-06, "loss": 1.2796053886413574, "step": 6846 }, { "epoch": 2.1077254539858417, "grad_norm": 20.625, "learning_rate": 1.1153969650777249e-06, "loss": 0.9106309413909912, "step": 6848 }, { "epoch": 2.108341028008618, "grad_norm": 4.9375, "learning_rate": 1.114742110526539e-06, "loss": 1.1471333503723145, "step": 6850 }, { "epoch": 2.108956602031394, "grad_norm": 8.8125, "learning_rate": 1.114087558654546e-06, "loss": 1.4695062637329102, "step": 6852 }, { "epoch": 2.1095721760541704, "grad_norm": 11.25, "learning_rate": 1.1134333097508647e-06, "loss": 1.306732177734375, "step": 6854 }, { "epoch": 2.1101877500769466, "grad_norm": 12.5625, "learning_rate": 1.1127793641044763e-06, "loss": 1.2547025680541992, "step": 6856 }, { "epoch": 2.110803324099723, "grad_norm": 8.875, "learning_rate": 1.1121257220042286e-06, "loss": 1.3866357803344727, "step": 6858 }, { "epoch": 2.111418898122499, "grad_norm": 24.125, "learning_rate": 1.1114723837388378e-06, "loss": 1.4638195037841797, "step": 6860 }, { "epoch": 2.1120344721452753, "grad_norm": 10.125, "learning_rate": 1.110819349596883e-06, "loss": 1.4681785106658936, "step": 6862 }, { "epoch": 2.1126500461680515, "grad_norm": 7.09375, "learning_rate": 1.11016661986681e-06, "loss": 0.5226856470108032, "step": 6864 }, { "epoch": 2.1132656201908278, "grad_norm": 24.875, "learning_rate": 1.1095141948369316e-06, "loss": 1.569403052330017, "step": 6866 }, { "epoch": 2.113881194213604, "grad_norm": 19.5, "learning_rate": 1.108862074795423e-06, "loss": 1.3763833045959473, "step": 6868 }, { "epoch": 2.11449676823638, "grad_norm": 7.0625, "learning_rate": 1.1082102600303272e-06, "loss": 0.9807989001274109, "step": 6870 }, { "epoch": 2.1151123422591565, "grad_norm": 11.625, "learning_rate": 1.107558750829552e-06, "loss": 1.6106832027435303, "step": 6872 }, { "epoch": 2.1157279162819327, "grad_norm": 8.125, "learning_rate": 1.1069075474808685e-06, "loss": 1.1969914436340332, "step": 6874 }, { "epoch": 2.1163434903047094, "grad_norm": 18.5, "learning_rate": 1.1062566502719151e-06, "loss": 1.3149508237838745, "step": 6876 }, { "epoch": 2.116959064327485, "grad_norm": 6.09375, "learning_rate": 1.1056060594901931e-06, "loss": 1.1083767414093018, "step": 6878 }, { "epoch": 2.117574638350262, "grad_norm": 3.71875, "learning_rate": 1.1049557754230703e-06, "loss": 1.1356470584869385, "step": 6880 }, { "epoch": 2.118190212373038, "grad_norm": 23.625, "learning_rate": 1.1043057983577762e-06, "loss": 1.579628825187683, "step": 6882 }, { "epoch": 2.1188057863958143, "grad_norm": 16.625, "learning_rate": 1.1036561285814077e-06, "loss": 1.2171895503997803, "step": 6884 }, { "epoch": 2.1194213604185905, "grad_norm": 11.0625, "learning_rate": 1.1030067663809247e-06, "loss": 1.6301791667938232, "step": 6886 }, { "epoch": 2.1200369344413668, "grad_norm": 15.0, "learning_rate": 1.1023577120431505e-06, "loss": 0.9143880605697632, "step": 6888 }, { "epoch": 2.120652508464143, "grad_norm": 25.0, "learning_rate": 1.1017089658547733e-06, "loss": 1.3826243877410889, "step": 6890 }, { "epoch": 2.1212680824869192, "grad_norm": 7.03125, "learning_rate": 1.1010605281023458e-06, "loss": 1.3522517681121826, "step": 6892 }, { "epoch": 2.1218836565096955, "grad_norm": 15.3125, "learning_rate": 1.1004123990722829e-06, "loss": 1.2374582290649414, "step": 6894 }, { "epoch": 2.1224992305324717, "grad_norm": 8.3125, "learning_rate": 1.0997645790508637e-06, "loss": 1.2146995067596436, "step": 6896 }, { "epoch": 2.123114804555248, "grad_norm": 17.125, "learning_rate": 1.0991170683242324e-06, "loss": 1.3192272186279297, "step": 6898 }, { "epoch": 2.123730378578024, "grad_norm": 15.8125, "learning_rate": 1.0984698671783936e-06, "loss": 1.4727823734283447, "step": 6900 }, { "epoch": 2.1243459526008004, "grad_norm": 28.375, "learning_rate": 1.0978229758992177e-06, "loss": 0.9449286460876465, "step": 6902 }, { "epoch": 2.1249615266235766, "grad_norm": 24.5, "learning_rate": 1.097176394772437e-06, "loss": 1.1769983768463135, "step": 6904 }, { "epoch": 2.125577100646353, "grad_norm": 16.125, "learning_rate": 1.0965301240836481e-06, "loss": 1.314269781112671, "step": 6906 }, { "epoch": 2.126192674669129, "grad_norm": 8.0, "learning_rate": 1.0958841641183077e-06, "loss": 1.2756083011627197, "step": 6908 }, { "epoch": 2.1268082486919053, "grad_norm": 39.25, "learning_rate": 1.0952385151617384e-06, "loss": 1.1773420572280884, "step": 6910 }, { "epoch": 2.1274238227146816, "grad_norm": 28.75, "learning_rate": 1.094593177499124e-06, "loss": 1.4545814990997314, "step": 6912 }, { "epoch": 2.128039396737458, "grad_norm": 15.5, "learning_rate": 1.0939481514155102e-06, "loss": 1.445265769958496, "step": 6914 }, { "epoch": 2.128654970760234, "grad_norm": 35.0, "learning_rate": 1.0933034371958061e-06, "loss": 0.7057738304138184, "step": 6916 }, { "epoch": 2.1292705447830103, "grad_norm": 19.125, "learning_rate": 1.0926590351247835e-06, "loss": 1.3218157291412354, "step": 6918 }, { "epoch": 2.1298861188057865, "grad_norm": 34.25, "learning_rate": 1.0920149454870736e-06, "loss": 1.4286811351776123, "step": 6920 }, { "epoch": 2.1305016928285627, "grad_norm": 10.5, "learning_rate": 1.0913711685671738e-06, "loss": 0.9320307970046997, "step": 6922 }, { "epoch": 2.131117266851339, "grad_norm": 9.875, "learning_rate": 1.0907277046494398e-06, "loss": 1.3198941946029663, "step": 6924 }, { "epoch": 2.131732840874115, "grad_norm": 32.25, "learning_rate": 1.09008455401809e-06, "loss": 1.175795555114746, "step": 6926 }, { "epoch": 2.1323484148968914, "grad_norm": 12.1875, "learning_rate": 1.089441716957206e-06, "loss": 1.2813923358917236, "step": 6928 }, { "epoch": 2.1329639889196677, "grad_norm": 13.375, "learning_rate": 1.0887991937507287e-06, "loss": 1.3827874660491943, "step": 6930 }, { "epoch": 2.133579562942444, "grad_norm": 55.0, "learning_rate": 1.0881569846824615e-06, "loss": 1.8015201091766357, "step": 6932 }, { "epoch": 2.13419513696522, "grad_norm": 35.25, "learning_rate": 1.0875150900360695e-06, "loss": 1.8140422105789185, "step": 6934 }, { "epoch": 2.1348107109879964, "grad_norm": 12.8125, "learning_rate": 1.0868735100950775e-06, "loss": 1.3741447925567627, "step": 6936 }, { "epoch": 2.1354262850107726, "grad_norm": 10.875, "learning_rate": 1.0862322451428725e-06, "loss": 0.892223596572876, "step": 6938 }, { "epoch": 2.136041859033549, "grad_norm": 25.25, "learning_rate": 1.085591295462702e-06, "loss": 1.5447278022766113, "step": 6940 }, { "epoch": 2.136657433056325, "grad_norm": 15.3125, "learning_rate": 1.084950661337674e-06, "loss": 1.270053744316101, "step": 6942 }, { "epoch": 2.1372730070791013, "grad_norm": 17.75, "learning_rate": 1.0843103430507579e-06, "loss": 1.3592324256896973, "step": 6944 }, { "epoch": 2.1378885811018775, "grad_norm": 53.5, "learning_rate": 1.0836703408847815e-06, "loss": 1.8099262714385986, "step": 6946 }, { "epoch": 2.1385041551246537, "grad_norm": 9.5, "learning_rate": 1.0830306551224365e-06, "loss": 1.254908561706543, "step": 6948 }, { "epoch": 2.13911972914743, "grad_norm": 37.5, "learning_rate": 1.0823912860462715e-06, "loss": 1.4922285079956055, "step": 6950 }, { "epoch": 2.139735303170206, "grad_norm": 15.0625, "learning_rate": 1.0817522339386967e-06, "loss": 1.1510124206542969, "step": 6952 }, { "epoch": 2.1403508771929824, "grad_norm": 14.25, "learning_rate": 1.081113499081983e-06, "loss": 1.4475257396697998, "step": 6954 }, { "epoch": 2.1409664512157587, "grad_norm": 16.875, "learning_rate": 1.080475081758259e-06, "loss": 1.4003031253814697, "step": 6956 }, { "epoch": 2.141582025238535, "grad_norm": 13.125, "learning_rate": 1.0798369822495153e-06, "loss": 1.8019049167633057, "step": 6958 }, { "epoch": 2.142197599261311, "grad_norm": 13.1875, "learning_rate": 1.0791992008376013e-06, "loss": 1.6473685503005981, "step": 6960 }, { "epoch": 2.1428131732840874, "grad_norm": 25.125, "learning_rate": 1.078561737804225e-06, "loss": 1.3453480005264282, "step": 6962 }, { "epoch": 2.1434287473068636, "grad_norm": 11.75, "learning_rate": 1.077924593430955e-06, "loss": 1.3464794158935547, "step": 6964 }, { "epoch": 2.14404432132964, "grad_norm": 14.25, "learning_rate": 1.077287767999219e-06, "loss": 1.691328525543213, "step": 6966 }, { "epoch": 2.144659895352416, "grad_norm": 12.1875, "learning_rate": 1.0766512617903028e-06, "loss": 1.588313341140747, "step": 6968 }, { "epoch": 2.1452754693751923, "grad_norm": 17.625, "learning_rate": 1.0760150750853522e-06, "loss": 1.3188862800598145, "step": 6970 }, { "epoch": 2.1458910433979685, "grad_norm": 11.625, "learning_rate": 1.0753792081653718e-06, "loss": 1.4394432306289673, "step": 6972 }, { "epoch": 2.146506617420745, "grad_norm": 10.1875, "learning_rate": 1.0747436613112253e-06, "loss": 1.6718394756317139, "step": 6974 }, { "epoch": 2.147122191443521, "grad_norm": 8.75, "learning_rate": 1.074108434803633e-06, "loss": 0.9956381320953369, "step": 6976 }, { "epoch": 2.1477377654662972, "grad_norm": 10.4375, "learning_rate": 1.0734735289231762e-06, "loss": 1.2394037246704102, "step": 6978 }, { "epoch": 2.1483533394890735, "grad_norm": 18.75, "learning_rate": 1.0728389439502937e-06, "loss": 1.2736289501190186, "step": 6980 }, { "epoch": 2.1489689135118497, "grad_norm": 7.75, "learning_rate": 1.0722046801652819e-06, "loss": 1.2063171863555908, "step": 6982 }, { "epoch": 2.149584487534626, "grad_norm": 21.25, "learning_rate": 1.0715707378482955e-06, "loss": 1.4747381210327148, "step": 6984 }, { "epoch": 2.150200061557402, "grad_norm": 13.375, "learning_rate": 1.0709371172793488e-06, "loss": 1.5591285228729248, "step": 6986 }, { "epoch": 2.1508156355801784, "grad_norm": 26.75, "learning_rate": 1.0703038187383112e-06, "loss": 1.4576010704040527, "step": 6988 }, { "epoch": 2.1514312096029546, "grad_norm": 4.65625, "learning_rate": 1.069670842504913e-06, "loss": 1.1618280410766602, "step": 6990 }, { "epoch": 2.152046783625731, "grad_norm": 14.3125, "learning_rate": 1.069038188858739e-06, "loss": 0.8342984914779663, "step": 6992 }, { "epoch": 2.152662357648507, "grad_norm": 35.25, "learning_rate": 1.0684058580792339e-06, "loss": 1.4440381526947021, "step": 6994 }, { "epoch": 2.1532779316712833, "grad_norm": 21.5, "learning_rate": 1.0677738504456992e-06, "loss": 1.6453757286071777, "step": 6996 }, { "epoch": 2.1538935056940596, "grad_norm": 12.6875, "learning_rate": 1.0671421662372927e-06, "loss": 1.1821353435516357, "step": 6998 }, { "epoch": 2.154509079716836, "grad_norm": 11.6875, "learning_rate": 1.0665108057330306e-06, "loss": 1.4075431823730469, "step": 7000 }, { "epoch": 2.155124653739612, "grad_norm": 28.5, "learning_rate": 1.0658797692117847e-06, "loss": 1.6227091550827026, "step": 7002 }, { "epoch": 2.1557402277623883, "grad_norm": 15.125, "learning_rate": 1.065249056952285e-06, "loss": 1.389638900756836, "step": 7004 }, { "epoch": 2.1563558017851645, "grad_norm": 14.9375, "learning_rate": 1.0646186692331187e-06, "loss": 1.2804958820343018, "step": 7006 }, { "epoch": 2.1569713758079407, "grad_norm": 9.0, "learning_rate": 1.0639886063327275e-06, "loss": 1.4667779207229614, "step": 7008 }, { "epoch": 2.157586949830717, "grad_norm": 62.0, "learning_rate": 1.0633588685294114e-06, "loss": 1.7286169528961182, "step": 7010 }, { "epoch": 2.158202523853493, "grad_norm": 12.375, "learning_rate": 1.0627294561013269e-06, "loss": 1.4651620388031006, "step": 7012 }, { "epoch": 2.1588180978762694, "grad_norm": 9.375, "learning_rate": 1.0621003693264845e-06, "loss": 1.2908897399902344, "step": 7014 }, { "epoch": 2.1594336718990457, "grad_norm": 10.3125, "learning_rate": 1.0614716084827546e-06, "loss": 1.427079200744629, "step": 7016 }, { "epoch": 2.160049245921822, "grad_norm": 21.875, "learning_rate": 1.0608431738478604e-06, "loss": 1.3272745609283447, "step": 7018 }, { "epoch": 2.160664819944598, "grad_norm": 26.625, "learning_rate": 1.0602150656993821e-06, "loss": 1.7375531196594238, "step": 7020 }, { "epoch": 2.1612803939673744, "grad_norm": 17.375, "learning_rate": 1.0595872843147568e-06, "loss": 1.364192247390747, "step": 7022 }, { "epoch": 2.1618959679901506, "grad_norm": 20.375, "learning_rate": 1.0589598299712753e-06, "loss": 1.699464201927185, "step": 7024 }, { "epoch": 2.1625115420129273, "grad_norm": 10.6875, "learning_rate": 1.058332702946085e-06, "loss": 1.4647746086120605, "step": 7026 }, { "epoch": 2.163127116035703, "grad_norm": 24.625, "learning_rate": 1.0577059035161893e-06, "loss": 0.9580203294754028, "step": 7028 }, { "epoch": 2.1637426900584797, "grad_norm": 11.375, "learning_rate": 1.0570794319584451e-06, "loss": 1.3357770442962646, "step": 7030 }, { "epoch": 2.164358264081256, "grad_norm": 80.0, "learning_rate": 1.0564532885495665e-06, "loss": 1.3333418369293213, "step": 7032 }, { "epoch": 2.164973838104032, "grad_norm": 35.0, "learning_rate": 1.0558274735661214e-06, "loss": 1.3418631553649902, "step": 7034 }, { "epoch": 2.1655894121268084, "grad_norm": 26.125, "learning_rate": 1.0552019872845336e-06, "loss": 1.6847832202911377, "step": 7036 }, { "epoch": 2.1662049861495847, "grad_norm": 7.78125, "learning_rate": 1.05457682998108e-06, "loss": 1.330476999282837, "step": 7038 }, { "epoch": 2.166820560172361, "grad_norm": 10.8125, "learning_rate": 1.0539520019318943e-06, "loss": 1.4650633335113525, "step": 7040 }, { "epoch": 2.167436134195137, "grad_norm": 17.5, "learning_rate": 1.0533275034129636e-06, "loss": 1.2590150833129883, "step": 7042 }, { "epoch": 2.1680517082179134, "grad_norm": 10.3125, "learning_rate": 1.0527033347001288e-06, "loss": 0.8662137985229492, "step": 7044 }, { "epoch": 2.1686672822406896, "grad_norm": 9.5625, "learning_rate": 1.0520794960690866e-06, "loss": 1.225198745727539, "step": 7046 }, { "epoch": 2.169282856263466, "grad_norm": 11.375, "learning_rate": 1.0514559877953876e-06, "loss": 1.300356149673462, "step": 7048 }, { "epoch": 2.169898430286242, "grad_norm": 43.25, "learning_rate": 1.0508328101544355e-06, "loss": 1.5632920265197754, "step": 7050 }, { "epoch": 2.1705140043090183, "grad_norm": 44.25, "learning_rate": 1.0502099634214882e-06, "loss": 1.3846997022628784, "step": 7052 }, { "epoch": 2.1711295783317945, "grad_norm": 39.25, "learning_rate": 1.0495874478716593e-06, "loss": 1.5945847034454346, "step": 7054 }, { "epoch": 2.1717451523545708, "grad_norm": 13.9375, "learning_rate": 1.0489652637799131e-06, "loss": 1.6202548742294312, "step": 7056 }, { "epoch": 2.172360726377347, "grad_norm": 23.0, "learning_rate": 1.0483434114210694e-06, "loss": 1.3037610054016113, "step": 7058 }, { "epoch": 2.1729763004001232, "grad_norm": 8.5625, "learning_rate": 1.0477218910698017e-06, "loss": 0.9244287014007568, "step": 7060 }, { "epoch": 2.1735918744228995, "grad_norm": 17.75, "learning_rate": 1.047100703000636e-06, "loss": 1.3129549026489258, "step": 7062 }, { "epoch": 2.1742074484456757, "grad_norm": 5.90625, "learning_rate": 1.0464798474879512e-06, "loss": 0.8765146732330322, "step": 7064 }, { "epoch": 2.174823022468452, "grad_norm": 8.9375, "learning_rate": 1.0458593248059807e-06, "loss": 1.2174797058105469, "step": 7066 }, { "epoch": 2.175438596491228, "grad_norm": 11.4375, "learning_rate": 1.04523913522881e-06, "loss": 1.4736378192901611, "step": 7068 }, { "epoch": 2.1760541705140044, "grad_norm": 30.625, "learning_rate": 1.0446192790303766e-06, "loss": 1.3222408294677734, "step": 7070 }, { "epoch": 2.1766697445367806, "grad_norm": 10.125, "learning_rate": 1.0439997564844726e-06, "loss": 1.1831345558166504, "step": 7072 }, { "epoch": 2.177285318559557, "grad_norm": 16.5, "learning_rate": 1.043380567864742e-06, "loss": 1.5872745513916016, "step": 7074 }, { "epoch": 2.177900892582333, "grad_norm": 4.625, "learning_rate": 1.0427617134446797e-06, "loss": 0.9572898149490356, "step": 7076 }, { "epoch": 2.1785164666051093, "grad_norm": 23.0, "learning_rate": 1.0421431934976363e-06, "loss": 0.9428175091743469, "step": 7078 }, { "epoch": 2.1791320406278856, "grad_norm": 43.25, "learning_rate": 1.041525008296812e-06, "loss": 1.2451021671295166, "step": 7080 }, { "epoch": 2.179747614650662, "grad_norm": 25.125, "learning_rate": 1.040907158115259e-06, "loss": 1.7040786743164062, "step": 7082 }, { "epoch": 2.180363188673438, "grad_norm": 13.6875, "learning_rate": 1.0402896432258838e-06, "loss": 1.5211122035980225, "step": 7084 }, { "epoch": 2.1809787626962143, "grad_norm": 15.0, "learning_rate": 1.0396724639014427e-06, "loss": 1.5954887866973877, "step": 7086 }, { "epoch": 2.1815943367189905, "grad_norm": 17.875, "learning_rate": 1.0390556204145444e-06, "loss": 1.3188745975494385, "step": 7088 }, { "epoch": 2.1822099107417667, "grad_norm": 28.25, "learning_rate": 1.0384391130376502e-06, "loss": 1.4498337507247925, "step": 7090 }, { "epoch": 2.182825484764543, "grad_norm": 11.375, "learning_rate": 1.037822942043071e-06, "loss": 1.3745802640914917, "step": 7092 }, { "epoch": 2.183441058787319, "grad_norm": 9.1875, "learning_rate": 1.0372071077029713e-06, "loss": 1.3752477169036865, "step": 7094 }, { "epoch": 2.1840566328100954, "grad_norm": 6.09375, "learning_rate": 1.036591610289365e-06, "loss": 0.9320778846740723, "step": 7096 }, { "epoch": 2.1846722068328717, "grad_norm": 27.0, "learning_rate": 1.0359764500741184e-06, "loss": 0.8172409534454346, "step": 7098 }, { "epoch": 2.185287780855648, "grad_norm": 18.25, "learning_rate": 1.0353616273289483e-06, "loss": 1.3536324501037598, "step": 7100 }, { "epoch": 2.185903354878424, "grad_norm": 33.25, "learning_rate": 1.0347471423254226e-06, "loss": 1.423474907875061, "step": 7102 }, { "epoch": 2.1865189289012004, "grad_norm": 18.25, "learning_rate": 1.0341329953349606e-06, "loss": 1.3694900274276733, "step": 7104 }, { "epoch": 2.1871345029239766, "grad_norm": 11.125, "learning_rate": 1.0335191866288312e-06, "loss": 1.5513331890106201, "step": 7106 }, { "epoch": 2.187750076946753, "grad_norm": 23.125, "learning_rate": 1.032905716478154e-06, "loss": 1.5492780208587646, "step": 7108 }, { "epoch": 2.188365650969529, "grad_norm": 15.5, "learning_rate": 1.0322925851539005e-06, "loss": 0.5792900919914246, "step": 7110 }, { "epoch": 2.1889812249923053, "grad_norm": 6.65625, "learning_rate": 1.0316797929268905e-06, "loss": 1.271028995513916, "step": 7112 }, { "epoch": 2.1895967990150815, "grad_norm": 25.125, "learning_rate": 1.0310673400677957e-06, "loss": 1.341614007949829, "step": 7114 }, { "epoch": 2.1902123730378578, "grad_norm": 18.375, "learning_rate": 1.0304552268471373e-06, "loss": 1.2016503810882568, "step": 7116 }, { "epoch": 2.190827947060634, "grad_norm": 14.1875, "learning_rate": 1.029843453535286e-06, "loss": 0.934708297252655, "step": 7118 }, { "epoch": 2.1914435210834102, "grad_norm": 36.25, "learning_rate": 1.0292320204024623e-06, "loss": 1.4819412231445312, "step": 7120 }, { "epoch": 2.1920590951061865, "grad_norm": 24.5, "learning_rate": 1.0286209277187384e-06, "loss": 1.5816731452941895, "step": 7122 }, { "epoch": 2.1926746691289627, "grad_norm": 16.625, "learning_rate": 1.028010175754033e-06, "loss": 0.9610680341720581, "step": 7124 }, { "epoch": 2.193290243151739, "grad_norm": 22.375, "learning_rate": 1.027399764778117e-06, "loss": 1.6568622589111328, "step": 7126 }, { "epoch": 2.193905817174515, "grad_norm": 24.75, "learning_rate": 1.026789695060609e-06, "loss": 1.8416197299957275, "step": 7128 }, { "epoch": 2.1945213911972914, "grad_norm": 15.8125, "learning_rate": 1.0261799668709785e-06, "loss": 1.2987570762634277, "step": 7130 }, { "epoch": 2.1951369652200676, "grad_norm": 10.5, "learning_rate": 1.025570580478542e-06, "loss": 1.2055429220199585, "step": 7132 }, { "epoch": 2.195752539242844, "grad_norm": 8.5, "learning_rate": 1.0249615361524663e-06, "loss": 1.4445521831512451, "step": 7134 }, { "epoch": 2.19636811326562, "grad_norm": 16.375, "learning_rate": 1.0243528341617681e-06, "loss": 1.2120137214660645, "step": 7136 }, { "epoch": 2.1969836872883963, "grad_norm": 16.625, "learning_rate": 1.0237444747753104e-06, "loss": 1.4288749694824219, "step": 7138 }, { "epoch": 2.1975992613111726, "grad_norm": 13.5625, "learning_rate": 1.023136458261807e-06, "loss": 1.5977098941802979, "step": 7140 }, { "epoch": 2.198214835333949, "grad_norm": 7.28125, "learning_rate": 1.0225287848898193e-06, "loss": 1.297478437423706, "step": 7142 }, { "epoch": 2.198830409356725, "grad_norm": 52.75, "learning_rate": 1.021921454927757e-06, "loss": 1.3651518821716309, "step": 7144 }, { "epoch": 2.1994459833795013, "grad_norm": 19.5, "learning_rate": 1.0213144686438791e-06, "loss": 1.0678225755691528, "step": 7146 }, { "epoch": 2.2000615574022775, "grad_norm": 24.375, "learning_rate": 1.0207078263062918e-06, "loss": 1.4905214309692383, "step": 7148 }, { "epoch": 2.2006771314250537, "grad_norm": 25.75, "learning_rate": 1.0201015281829492e-06, "loss": 1.4449894428253174, "step": 7150 }, { "epoch": 2.20129270544783, "grad_norm": 13.3125, "learning_rate": 1.0194955745416549e-06, "loss": 1.0292598009109497, "step": 7152 }, { "epoch": 2.201908279470606, "grad_norm": 32.25, "learning_rate": 1.0188899656500588e-06, "loss": 1.6224634647369385, "step": 7154 }, { "epoch": 2.2025238534933824, "grad_norm": 8.125, "learning_rate": 1.0182847017756585e-06, "loss": 1.441828966140747, "step": 7156 }, { "epoch": 2.2031394275161587, "grad_norm": 12.5625, "learning_rate": 1.0176797831858012e-06, "loss": 1.4349195957183838, "step": 7158 }, { "epoch": 2.203755001538935, "grad_norm": 8.3125, "learning_rate": 1.0170752101476783e-06, "loss": 1.3309807777404785, "step": 7160 }, { "epoch": 2.204370575561711, "grad_norm": 14.375, "learning_rate": 1.0164709829283315e-06, "loss": 1.332626223564148, "step": 7162 }, { "epoch": 2.2049861495844874, "grad_norm": 9.9375, "learning_rate": 1.0158671017946491e-06, "loss": 0.7435551285743713, "step": 7164 }, { "epoch": 2.2056017236072636, "grad_norm": 19.5, "learning_rate": 1.0152635670133648e-06, "loss": 1.1366829872131348, "step": 7166 }, { "epoch": 2.20621729763004, "grad_norm": 8.8125, "learning_rate": 1.0146603788510617e-06, "loss": 1.3661516904830933, "step": 7168 }, { "epoch": 2.206832871652816, "grad_norm": 8.5, "learning_rate": 1.0140575375741676e-06, "loss": 1.4987188577651978, "step": 7170 }, { "epoch": 2.2074484456755923, "grad_norm": 20.25, "learning_rate": 1.0134550434489594e-06, "loss": 1.2765529155731201, "step": 7172 }, { "epoch": 2.2080640196983685, "grad_norm": 13.0, "learning_rate": 1.0128528967415594e-06, "loss": 1.1727826595306396, "step": 7174 }, { "epoch": 2.208679593721145, "grad_norm": 17.5, "learning_rate": 1.0122510977179347e-06, "loss": 1.641790747642517, "step": 7176 }, { "epoch": 2.209295167743921, "grad_norm": 21.125, "learning_rate": 1.0116496466439029e-06, "loss": 1.4589368104934692, "step": 7178 }, { "epoch": 2.2099107417666977, "grad_norm": 12.25, "learning_rate": 1.0110485437851243e-06, "loss": 1.2559046745300293, "step": 7180 }, { "epoch": 2.2105263157894735, "grad_norm": 11.8125, "learning_rate": 1.0104477894071066e-06, "loss": 1.2748613357543945, "step": 7182 }, { "epoch": 2.21114188981225, "grad_norm": 12.5, "learning_rate": 1.0098473837752049e-06, "loss": 1.525190830230713, "step": 7184 }, { "epoch": 2.2117574638350264, "grad_norm": 14.6875, "learning_rate": 1.009247327154618e-06, "loss": 1.805044174194336, "step": 7186 }, { "epoch": 2.2123730378578026, "grad_norm": 16.625, "learning_rate": 1.0086476198103918e-06, "loss": 1.4728741645812988, "step": 7188 }, { "epoch": 2.212988611880579, "grad_norm": 29.875, "learning_rate": 1.008048262007418e-06, "loss": 1.122135877609253, "step": 7190 }, { "epoch": 2.213604185903355, "grad_norm": 22.125, "learning_rate": 1.0074492540104334e-06, "loss": 1.3765050172805786, "step": 7192 }, { "epoch": 2.2142197599261313, "grad_norm": 8.5, "learning_rate": 1.0068505960840204e-06, "loss": 1.5011130571365356, "step": 7194 }, { "epoch": 2.2148353339489075, "grad_norm": 6.28125, "learning_rate": 1.0062522884926068e-06, "loss": 1.3109992742538452, "step": 7196 }, { "epoch": 2.2154509079716838, "grad_norm": 9.375, "learning_rate": 1.0056543315004669e-06, "loss": 1.220149278640747, "step": 7198 }, { "epoch": 2.21606648199446, "grad_norm": 11.0, "learning_rate": 1.0050567253717172e-06, "loss": 1.2714948654174805, "step": 7200 }, { "epoch": 2.2166820560172362, "grad_norm": 21.125, "learning_rate": 1.0044594703703222e-06, "loss": 1.0933783054351807, "step": 7202 }, { "epoch": 2.2172976300400125, "grad_norm": 18.25, "learning_rate": 1.0038625667600903e-06, "loss": 1.6369684934616089, "step": 7204 }, { "epoch": 2.2179132040627887, "grad_norm": 9.5625, "learning_rate": 1.0032660148046734e-06, "loss": 1.3350924253463745, "step": 7206 }, { "epoch": 2.218528778085565, "grad_norm": 13.9375, "learning_rate": 1.00266981476757e-06, "loss": 1.3873803615570068, "step": 7208 }, { "epoch": 2.219144352108341, "grad_norm": 20.375, "learning_rate": 1.0020739669121223e-06, "loss": 1.4078598022460938, "step": 7210 }, { "epoch": 2.2197599261311174, "grad_norm": 38.5, "learning_rate": 1.0014784715015161e-06, "loss": 1.376378059387207, "step": 7212 }, { "epoch": 2.2203755001538936, "grad_norm": 14.5, "learning_rate": 1.0008833287987842e-06, "loss": 1.2546615600585938, "step": 7214 }, { "epoch": 2.22099107417667, "grad_norm": 17.625, "learning_rate": 1.0002885390668002e-06, "loss": 1.1415352821350098, "step": 7216 }, { "epoch": 2.221606648199446, "grad_norm": 15.1875, "learning_rate": 9.996941025682843e-07, "loss": 1.455315113067627, "step": 7218 }, { "epoch": 2.2222222222222223, "grad_norm": 31.875, "learning_rate": 9.991000195657988e-07, "loss": 1.6557023525238037, "step": 7220 }, { "epoch": 2.2228377962449986, "grad_norm": 10.375, "learning_rate": 9.985062903217516e-07, "loss": 0.691315770149231, "step": 7222 }, { "epoch": 2.223453370267775, "grad_norm": 16.0, "learning_rate": 9.97912915098394e-07, "loss": 1.4408737421035767, "step": 7224 }, { "epoch": 2.224068944290551, "grad_norm": 20.0, "learning_rate": 9.973198941578195e-07, "loss": 1.5671606063842773, "step": 7226 }, { "epoch": 2.2246845183133273, "grad_norm": 11.1875, "learning_rate": 9.967272277619666e-07, "loss": 1.2749438285827637, "step": 7228 }, { "epoch": 2.2253000923361035, "grad_norm": 11.9375, "learning_rate": 9.961349161726172e-07, "loss": 1.1752612590789795, "step": 7230 }, { "epoch": 2.2259156663588797, "grad_norm": 6.03125, "learning_rate": 9.955429596513951e-07, "loss": 1.1268336772918701, "step": 7232 }, { "epoch": 2.226531240381656, "grad_norm": 29.0, "learning_rate": 9.949513584597688e-07, "loss": 1.4443705081939697, "step": 7234 }, { "epoch": 2.227146814404432, "grad_norm": 11.625, "learning_rate": 9.943601128590489e-07, "loss": 1.3743427991867065, "step": 7236 }, { "epoch": 2.2277623884272084, "grad_norm": 71.0, "learning_rate": 9.93769223110389e-07, "loss": 1.7574753761291504, "step": 7238 }, { "epoch": 2.2283779624499847, "grad_norm": 17.25, "learning_rate": 9.93178689474787e-07, "loss": 1.6704533100128174, "step": 7240 }, { "epoch": 2.228993536472761, "grad_norm": 14.4375, "learning_rate": 9.925885122130812e-07, "loss": 0.9240731000900269, "step": 7242 }, { "epoch": 2.229609110495537, "grad_norm": 12.5625, "learning_rate": 9.919986915859533e-07, "loss": 1.3428912162780762, "step": 7244 }, { "epoch": 2.2302246845183133, "grad_norm": 18.5, "learning_rate": 9.914092278539291e-07, "loss": 1.1740977764129639, "step": 7246 }, { "epoch": 2.2308402585410896, "grad_norm": 6.65625, "learning_rate": 9.90820121277374e-07, "loss": 1.195859670639038, "step": 7248 }, { "epoch": 2.231455832563866, "grad_norm": 15.375, "learning_rate": 9.902313721164975e-07, "loss": 1.3590707778930664, "step": 7250 }, { "epoch": 2.232071406586642, "grad_norm": 14.5625, "learning_rate": 9.896429806313515e-07, "loss": 1.4215617179870605, "step": 7252 }, { "epoch": 2.2326869806094183, "grad_norm": 20.0, "learning_rate": 9.890549470818282e-07, "loss": 1.3735675811767578, "step": 7254 }, { "epoch": 2.2333025546321945, "grad_norm": 12.5, "learning_rate": 9.88467271727663e-07, "loss": 1.3217053413391113, "step": 7256 }, { "epoch": 2.2339181286549707, "grad_norm": 9.25, "learning_rate": 9.878799548284332e-07, "loss": 1.4295120239257812, "step": 7258 }, { "epoch": 2.234533702677747, "grad_norm": 11.75, "learning_rate": 9.872929966435569e-07, "loss": 1.1404094696044922, "step": 7260 }, { "epoch": 2.235149276700523, "grad_norm": 14.0625, "learning_rate": 9.86706397432294e-07, "loss": 1.7106270790100098, "step": 7262 }, { "epoch": 2.2357648507232994, "grad_norm": 25.0, "learning_rate": 9.861201574537466e-07, "loss": 1.638358235359192, "step": 7264 }, { "epoch": 2.2363804247460757, "grad_norm": 62.75, "learning_rate": 9.855342769668576e-07, "loss": 1.517430067062378, "step": 7266 }, { "epoch": 2.236995998768852, "grad_norm": 10.4375, "learning_rate": 9.849487562304105e-07, "loss": 1.170607089996338, "step": 7268 }, { "epoch": 2.237611572791628, "grad_norm": 13.0625, "learning_rate": 9.843635955030307e-07, "loss": 1.1815297603607178, "step": 7270 }, { "epoch": 2.2382271468144044, "grad_norm": 21.625, "learning_rate": 9.837787950431848e-07, "loss": 1.6441622972488403, "step": 7272 }, { "epoch": 2.2388427208371806, "grad_norm": 17.875, "learning_rate": 9.831943551091793e-07, "loss": 1.020333170890808, "step": 7274 }, { "epoch": 2.239458294859957, "grad_norm": 19.125, "learning_rate": 9.82610275959162e-07, "loss": 1.3882975578308105, "step": 7276 }, { "epoch": 2.240073868882733, "grad_norm": 18.125, "learning_rate": 9.820265578511218e-07, "loss": 1.380478024482727, "step": 7278 }, { "epoch": 2.2406894429055093, "grad_norm": 16.375, "learning_rate": 9.814432010428871e-07, "loss": 1.0519275665283203, "step": 7280 }, { "epoch": 2.2413050169282855, "grad_norm": 8.75, "learning_rate": 9.808602057921276e-07, "loss": 1.449505090713501, "step": 7282 }, { "epoch": 2.2419205909510618, "grad_norm": 20.375, "learning_rate": 9.80277572356353e-07, "loss": 1.6460387706756592, "step": 7284 }, { "epoch": 2.242536164973838, "grad_norm": 13.5625, "learning_rate": 9.79695300992913e-07, "loss": 1.3300621509552002, "step": 7286 }, { "epoch": 2.2431517389966142, "grad_norm": 14.6875, "learning_rate": 9.791133919589974e-07, "loss": 1.4139244556427002, "step": 7288 }, { "epoch": 2.2437673130193905, "grad_norm": 12.5625, "learning_rate": 9.785318455116363e-07, "loss": 1.2685773372650146, "step": 7290 }, { "epoch": 2.2443828870421667, "grad_norm": 18.125, "learning_rate": 9.779506619076998e-07, "loss": 1.2302950620651245, "step": 7292 }, { "epoch": 2.244998461064943, "grad_norm": 15.375, "learning_rate": 9.773698414038966e-07, "loss": 1.5846174955368042, "step": 7294 }, { "epoch": 2.245614035087719, "grad_norm": 10.5625, "learning_rate": 9.767893842567763e-07, "loss": 1.4596608877182007, "step": 7296 }, { "epoch": 2.2462296091104954, "grad_norm": 14.375, "learning_rate": 9.762092907227272e-07, "loss": 1.5452144145965576, "step": 7298 }, { "epoch": 2.2468451831332716, "grad_norm": 16.125, "learning_rate": 9.756295610579775e-07, "loss": 1.6285851001739502, "step": 7300 }, { "epoch": 2.247460757156048, "grad_norm": 12.375, "learning_rate": 9.750501955185943e-07, "loss": 1.5760045051574707, "step": 7302 }, { "epoch": 2.248076331178824, "grad_norm": 18.125, "learning_rate": 9.744711943604844e-07, "loss": 1.7773367166519165, "step": 7304 }, { "epoch": 2.2486919052016003, "grad_norm": 17.625, "learning_rate": 9.738925578393922e-07, "loss": 1.3781275749206543, "step": 7306 }, { "epoch": 2.2493074792243766, "grad_norm": 20.5, "learning_rate": 9.733142862109036e-07, "loss": 0.925908088684082, "step": 7308 }, { "epoch": 2.249923053247153, "grad_norm": 15.125, "learning_rate": 9.727363797304407e-07, "loss": 1.5970616340637207, "step": 7310 }, { "epoch": 2.250538627269929, "grad_norm": 6.15625, "learning_rate": 9.721588386532659e-07, "loss": 1.098137617111206, "step": 7312 }, { "epoch": 2.2511542012927053, "grad_norm": 8.5625, "learning_rate": 9.715816632344803e-07, "loss": 1.1556612253189087, "step": 7314 }, { "epoch": 2.2517697753154815, "grad_norm": 20.75, "learning_rate": 9.710048537290218e-07, "loss": 0.9828079342842102, "step": 7316 }, { "epoch": 2.2523853493382577, "grad_norm": 14.4375, "learning_rate": 9.704284103916682e-07, "loss": 1.13887619972229, "step": 7318 }, { "epoch": 2.253000923361034, "grad_norm": 17.375, "learning_rate": 9.69852333477036e-07, "loss": 1.549184799194336, "step": 7320 }, { "epoch": 2.2536164973838106, "grad_norm": 5.9375, "learning_rate": 9.69276623239578e-07, "loss": 1.257533311843872, "step": 7322 }, { "epoch": 2.2542320714065864, "grad_norm": 11.4375, "learning_rate": 9.687012799335864e-07, "loss": 1.1815621852874756, "step": 7324 }, { "epoch": 2.254847645429363, "grad_norm": 17.75, "learning_rate": 9.681263038131916e-07, "loss": 0.6288884878158569, "step": 7326 }, { "epoch": 2.255463219452139, "grad_norm": 11.4375, "learning_rate": 9.675516951323602e-07, "loss": 1.4890342950820923, "step": 7328 }, { "epoch": 2.2560787934749156, "grad_norm": 9.5, "learning_rate": 9.669774541448983e-07, "loss": 1.2622697353363037, "step": 7330 }, { "epoch": 2.2566943674976914, "grad_norm": 16.125, "learning_rate": 9.664035811044478e-07, "loss": 1.391549825668335, "step": 7332 }, { "epoch": 2.257309941520468, "grad_norm": 15.75, "learning_rate": 9.658300762644905e-07, "loss": 0.9622032642364502, "step": 7334 }, { "epoch": 2.257925515543244, "grad_norm": 36.0, "learning_rate": 9.652569398783432e-07, "loss": 0.6518381237983704, "step": 7336 }, { "epoch": 2.2585410895660205, "grad_norm": 35.5, "learning_rate": 9.64684172199161e-07, "loss": 1.6143358945846558, "step": 7338 }, { "epoch": 2.2591566635887967, "grad_norm": 10.9375, "learning_rate": 9.641117734799363e-07, "loss": 1.3510743379592896, "step": 7340 }, { "epoch": 2.259772237611573, "grad_norm": 14.375, "learning_rate": 9.635397439734982e-07, "loss": 1.3647127151489258, "step": 7342 }, { "epoch": 2.260387811634349, "grad_norm": 18.5, "learning_rate": 9.629680839325124e-07, "loss": 1.4263982772827148, "step": 7344 }, { "epoch": 2.2610033856571254, "grad_norm": 12.9375, "learning_rate": 9.623967936094823e-07, "loss": 1.775465488433838, "step": 7346 }, { "epoch": 2.2616189596799017, "grad_norm": 6.96875, "learning_rate": 9.618258732567473e-07, "loss": 1.0765156745910645, "step": 7348 }, { "epoch": 2.262234533702678, "grad_norm": 21.125, "learning_rate": 9.612553231264832e-07, "loss": 1.3329508304595947, "step": 7350 }, { "epoch": 2.262850107725454, "grad_norm": 8.5, "learning_rate": 9.606851434707034e-07, "loss": 1.1238059997558594, "step": 7352 }, { "epoch": 2.2634656817482304, "grad_norm": 30.25, "learning_rate": 9.601153345412567e-07, "loss": 1.6501717567443848, "step": 7354 }, { "epoch": 2.2640812557710066, "grad_norm": 13.6875, "learning_rate": 9.595458965898277e-07, "loss": 1.6731393337249756, "step": 7356 }, { "epoch": 2.264696829793783, "grad_norm": 2.953125, "learning_rate": 9.589768298679387e-07, "loss": 1.5364469289779663, "step": 7358 }, { "epoch": 2.265312403816559, "grad_norm": 18.125, "learning_rate": 9.584081346269465e-07, "loss": 1.5376877784729004, "step": 7360 }, { "epoch": 2.2659279778393353, "grad_norm": 80.0, "learning_rate": 9.578398111180447e-07, "loss": 1.4920644760131836, "step": 7362 }, { "epoch": 2.2665435518621115, "grad_norm": 14.0, "learning_rate": 9.572718595922622e-07, "loss": 0.7458437085151672, "step": 7364 }, { "epoch": 2.2671591258848878, "grad_norm": 9.0625, "learning_rate": 9.567042803004643e-07, "loss": 1.1495895385742188, "step": 7366 }, { "epoch": 2.267774699907664, "grad_norm": 9.9375, "learning_rate": 9.56137073493351e-07, "loss": 1.1583331823349, "step": 7368 }, { "epoch": 2.2683902739304402, "grad_norm": 8.125, "learning_rate": 9.555702394214581e-07, "loss": 1.0664770603179932, "step": 7370 }, { "epoch": 2.2690058479532165, "grad_norm": 39.25, "learning_rate": 9.550037783351575e-07, "loss": 1.0306137800216675, "step": 7372 }, { "epoch": 2.2696214219759927, "grad_norm": 11.0625, "learning_rate": 9.544376904846547e-07, "loss": 0.9307693839073181, "step": 7374 }, { "epoch": 2.270236995998769, "grad_norm": 16.125, "learning_rate": 9.538719761199927e-07, "loss": 1.4178649187088013, "step": 7376 }, { "epoch": 2.270852570021545, "grad_norm": 25.75, "learning_rate": 9.533066354910469e-07, "loss": 1.4406909942626953, "step": 7378 }, { "epoch": 2.2714681440443214, "grad_norm": 11.625, "learning_rate": 9.527416688475295e-07, "loss": 0.8210102319717407, "step": 7380 }, { "epoch": 2.2720837180670976, "grad_norm": 40.75, "learning_rate": 9.521770764389873e-07, "loss": 1.2936944961547852, "step": 7382 }, { "epoch": 2.272699292089874, "grad_norm": 6.84375, "learning_rate": 9.516128585148006e-07, "loss": 1.1711663007736206, "step": 7384 }, { "epoch": 2.27331486611265, "grad_norm": 14.8125, "learning_rate": 9.510490153241858e-07, "loss": 1.367745041847229, "step": 7386 }, { "epoch": 2.2739304401354263, "grad_norm": 13.875, "learning_rate": 9.504855471161922e-07, "loss": 1.5758328437805176, "step": 7388 }, { "epoch": 2.2745460141582026, "grad_norm": 13.25, "learning_rate": 9.499224541397051e-07, "loss": 1.3060379028320312, "step": 7390 }, { "epoch": 2.275161588180979, "grad_norm": 11.3125, "learning_rate": 9.493597366434435e-07, "loss": 1.6739366054534912, "step": 7392 }, { "epoch": 2.275777162203755, "grad_norm": 21.125, "learning_rate": 9.487973948759593e-07, "loss": 1.3423973321914673, "step": 7394 }, { "epoch": 2.2763927362265313, "grad_norm": 12.5625, "learning_rate": 9.482354290856407e-07, "loss": 1.2677085399627686, "step": 7396 }, { "epoch": 2.2770083102493075, "grad_norm": 23.25, "learning_rate": 9.476738395207082e-07, "loss": 1.1683954000473022, "step": 7398 }, { "epoch": 2.2776238842720837, "grad_norm": 24.25, "learning_rate": 9.471126264292158e-07, "loss": 1.2260327339172363, "step": 7400 }, { "epoch": 2.27823945829486, "grad_norm": 13.375, "learning_rate": 9.465517900590535e-07, "loss": 1.0855873823165894, "step": 7402 }, { "epoch": 2.278855032317636, "grad_norm": 11.625, "learning_rate": 9.459913306579419e-07, "loss": 1.4089841842651367, "step": 7404 }, { "epoch": 2.2794706063404124, "grad_norm": 8.3125, "learning_rate": 9.454312484734374e-07, "loss": 1.1654994487762451, "step": 7406 }, { "epoch": 2.2800861803631887, "grad_norm": 11.3125, "learning_rate": 9.448715437529287e-07, "loss": 1.3815590143203735, "step": 7408 }, { "epoch": 2.280701754385965, "grad_norm": 22.25, "learning_rate": 9.44312216743638e-07, "loss": 0.8755576610565186, "step": 7410 }, { "epoch": 2.281317328408741, "grad_norm": 10.75, "learning_rate": 9.437532676926205e-07, "loss": 0.9427816867828369, "step": 7412 }, { "epoch": 2.2819329024315174, "grad_norm": 6.4375, "learning_rate": 9.431946968467651e-07, "loss": 0.912126898765564, "step": 7414 }, { "epoch": 2.2825484764542936, "grad_norm": 17.0, "learning_rate": 9.426365044527928e-07, "loss": 1.6929936408996582, "step": 7416 }, { "epoch": 2.28316405047707, "grad_norm": 15.8125, "learning_rate": 9.420786907572579e-07, "loss": 1.2102103233337402, "step": 7418 }, { "epoch": 2.283779624499846, "grad_norm": 10.1875, "learning_rate": 9.415212560065473e-07, "loss": 1.3206998109817505, "step": 7420 }, { "epoch": 2.2843951985226223, "grad_norm": 22.5, "learning_rate": 9.40964200446881e-07, "loss": 0.8749468326568604, "step": 7422 }, { "epoch": 2.2850107725453985, "grad_norm": 36.5, "learning_rate": 9.404075243243105e-07, "loss": 1.5027368068695068, "step": 7424 }, { "epoch": 2.2856263465681748, "grad_norm": 26.875, "learning_rate": 9.398512278847203e-07, "loss": 1.502715826034546, "step": 7426 }, { "epoch": 2.286241920590951, "grad_norm": 22.0, "learning_rate": 9.392953113738278e-07, "loss": 1.255620002746582, "step": 7428 }, { "epoch": 2.2868574946137272, "grad_norm": 6.53125, "learning_rate": 9.387397750371812e-07, "loss": 1.1149355173110962, "step": 7430 }, { "epoch": 2.2874730686365035, "grad_norm": 16.25, "learning_rate": 9.381846191201622e-07, "loss": 1.1521387100219727, "step": 7432 }, { "epoch": 2.2880886426592797, "grad_norm": 35.5, "learning_rate": 9.376298438679835e-07, "loss": 1.296518087387085, "step": 7434 }, { "epoch": 2.288704216682056, "grad_norm": 6.0, "learning_rate": 9.370754495256893e-07, "loss": 1.2158961296081543, "step": 7436 }, { "epoch": 2.289319790704832, "grad_norm": 9.625, "learning_rate": 9.365214363381575e-07, "loss": 1.3488669395446777, "step": 7438 }, { "epoch": 2.2899353647276084, "grad_norm": 5.96875, "learning_rate": 9.359678045500958e-07, "loss": 1.019263744354248, "step": 7440 }, { "epoch": 2.2905509387503846, "grad_norm": 10.1875, "learning_rate": 9.354145544060435e-07, "loss": 0.9132190942764282, "step": 7442 }, { "epoch": 2.291166512773161, "grad_norm": 12.75, "learning_rate": 9.348616861503722e-07, "loss": 1.2218446731567383, "step": 7444 }, { "epoch": 2.291782086795937, "grad_norm": 19.25, "learning_rate": 9.343092000272847e-07, "loss": 1.7379786968231201, "step": 7446 }, { "epoch": 2.2923976608187133, "grad_norm": 12.5625, "learning_rate": 9.337570962808148e-07, "loss": 1.300768494606018, "step": 7448 }, { "epoch": 2.2930132348414896, "grad_norm": 16.5, "learning_rate": 9.332053751548265e-07, "loss": 1.4459890127182007, "step": 7450 }, { "epoch": 2.293628808864266, "grad_norm": 9.625, "learning_rate": 9.326540368930166e-07, "loss": 1.1389377117156982, "step": 7452 }, { "epoch": 2.294244382887042, "grad_norm": 6.15625, "learning_rate": 9.321030817389115e-07, "loss": 1.1465227603912354, "step": 7454 }, { "epoch": 2.2948599569098183, "grad_norm": 14.625, "learning_rate": 9.315525099358687e-07, "loss": 1.3622372150421143, "step": 7456 }, { "epoch": 2.2954755309325945, "grad_norm": 17.125, "learning_rate": 9.31002321727076e-07, "loss": 1.5798914432525635, "step": 7458 }, { "epoch": 2.2960911049553707, "grad_norm": 27.625, "learning_rate": 9.304525173555531e-07, "loss": 1.3255949020385742, "step": 7460 }, { "epoch": 2.296706678978147, "grad_norm": 12.75, "learning_rate": 9.299030970641481e-07, "loss": 1.316671371459961, "step": 7462 }, { "epoch": 2.297322253000923, "grad_norm": 13.1875, "learning_rate": 9.293540610955418e-07, "loss": 1.4738574028015137, "step": 7464 }, { "epoch": 2.2979378270236994, "grad_norm": 15.625, "learning_rate": 9.288054096922433e-07, "loss": 1.0142216682434082, "step": 7466 }, { "epoch": 2.2985534010464757, "grad_norm": 14.1875, "learning_rate": 9.282571430965921e-07, "loss": 1.0364811420440674, "step": 7468 }, { "epoch": 2.299168975069252, "grad_norm": 17.5, "learning_rate": 9.277092615507592e-07, "loss": 0.938054621219635, "step": 7470 }, { "epoch": 2.2997845490920286, "grad_norm": 13.4375, "learning_rate": 9.271617652967437e-07, "loss": 1.342480182647705, "step": 7472 }, { "epoch": 2.3004001231148044, "grad_norm": 16.25, "learning_rate": 9.266146545763756e-07, "loss": 1.4579970836639404, "step": 7474 }, { "epoch": 2.301015697137581, "grad_norm": 16.375, "learning_rate": 9.26067929631315e-07, "loss": 1.1322877407073975, "step": 7476 }, { "epoch": 2.301631271160357, "grad_norm": 6.34375, "learning_rate": 9.255215907030496e-07, "loss": 1.4794162511825562, "step": 7478 }, { "epoch": 2.3022468451831335, "grad_norm": 25.625, "learning_rate": 9.249756380328987e-07, "loss": 1.5662634372711182, "step": 7480 }, { "epoch": 2.3028624192059093, "grad_norm": 19.25, "learning_rate": 9.244300718620106e-07, "loss": 1.1814448833465576, "step": 7482 }, { "epoch": 2.303477993228686, "grad_norm": 9.375, "learning_rate": 9.238848924313618e-07, "loss": 1.393389344215393, "step": 7484 }, { "epoch": 2.3040935672514617, "grad_norm": 13.25, "learning_rate": 9.233400999817587e-07, "loss": 1.3429032564163208, "step": 7486 }, { "epoch": 2.3047091412742384, "grad_norm": 50.75, "learning_rate": 9.227956947538371e-07, "loss": 1.1032218933105469, "step": 7488 }, { "epoch": 2.3053247152970147, "grad_norm": 9.8125, "learning_rate": 9.222516769880616e-07, "loss": 1.412358283996582, "step": 7490 }, { "epoch": 2.305940289319791, "grad_norm": 26.25, "learning_rate": 9.217080469247248e-07, "loss": 1.5782816410064697, "step": 7492 }, { "epoch": 2.306555863342567, "grad_norm": 13.4375, "learning_rate": 9.211648048039491e-07, "loss": 1.264474868774414, "step": 7494 }, { "epoch": 2.3071714373653434, "grad_norm": 9.0625, "learning_rate": 9.206219508656857e-07, "loss": 1.283721923828125, "step": 7496 }, { "epoch": 2.3077870113881196, "grad_norm": 28.625, "learning_rate": 9.200794853497126e-07, "loss": 1.184441089630127, "step": 7498 }, { "epoch": 2.308402585410896, "grad_norm": 47.5, "learning_rate": 9.195374084956382e-07, "loss": 1.6521155834197998, "step": 7500 }, { "epoch": 2.309018159433672, "grad_norm": 15.6875, "learning_rate": 9.189957205428987e-07, "loss": 1.0816607475280762, "step": 7502 }, { "epoch": 2.3096337334564483, "grad_norm": 19.0, "learning_rate": 9.184544217307577e-07, "loss": 1.6764347553253174, "step": 7504 }, { "epoch": 2.3102493074792245, "grad_norm": 4.625, "learning_rate": 9.179135122983076e-07, "loss": 1.303048849105835, "step": 7506 }, { "epoch": 2.3108648815020008, "grad_norm": 10.0, "learning_rate": 9.173729924844692e-07, "loss": 1.2569313049316406, "step": 7508 }, { "epoch": 2.311480455524777, "grad_norm": 9.875, "learning_rate": 9.168328625279903e-07, "loss": 1.2223122119903564, "step": 7510 }, { "epoch": 2.312096029547553, "grad_norm": 8.0625, "learning_rate": 9.162931226674469e-07, "loss": 1.1510555744171143, "step": 7512 }, { "epoch": 2.3127116035703295, "grad_norm": 28.25, "learning_rate": 9.157537731412427e-07, "loss": 1.366336464881897, "step": 7514 }, { "epoch": 2.3133271775931057, "grad_norm": 15.5625, "learning_rate": 9.152148141876096e-07, "loss": 1.4099074602127075, "step": 7516 }, { "epoch": 2.313942751615882, "grad_norm": 11.875, "learning_rate": 9.146762460446054e-07, "loss": 1.520900011062622, "step": 7518 }, { "epoch": 2.314558325638658, "grad_norm": 10.0, "learning_rate": 9.141380689501168e-07, "loss": 1.2113454341888428, "step": 7520 }, { "epoch": 2.3151738996614344, "grad_norm": 13.9375, "learning_rate": 9.136002831418578e-07, "loss": 1.0119948387145996, "step": 7522 }, { "epoch": 2.3157894736842106, "grad_norm": 86.5, "learning_rate": 9.13062888857368e-07, "loss": 0.6641063094139099, "step": 7524 }, { "epoch": 2.316405047706987, "grad_norm": 15.25, "learning_rate": 9.125258863340153e-07, "loss": 1.3526802062988281, "step": 7526 }, { "epoch": 2.317020621729763, "grad_norm": 25.375, "learning_rate": 9.119892758089951e-07, "loss": 1.0443966388702393, "step": 7528 }, { "epoch": 2.3176361957525393, "grad_norm": 10.625, "learning_rate": 9.114530575193276e-07, "loss": 1.4699032306671143, "step": 7530 }, { "epoch": 2.3182517697753156, "grad_norm": 15.875, "learning_rate": 9.109172317018624e-07, "loss": 1.4337401390075684, "step": 7532 }, { "epoch": 2.318867343798092, "grad_norm": 13.5, "learning_rate": 9.103817985932737e-07, "loss": 1.5521501302719116, "step": 7534 }, { "epoch": 2.319482917820868, "grad_norm": 9.1875, "learning_rate": 9.098467584300621e-07, "loss": 1.0651452541351318, "step": 7536 }, { "epoch": 2.3200984918436443, "grad_norm": 11.9375, "learning_rate": 9.093121114485572e-07, "loss": 1.4797773361206055, "step": 7538 }, { "epoch": 2.3207140658664205, "grad_norm": 37.5, "learning_rate": 9.087778578849118e-07, "loss": 1.4951937198638916, "step": 7540 }, { "epoch": 2.3213296398891967, "grad_norm": 18.5, "learning_rate": 9.082439979751068e-07, "loss": 1.453412652015686, "step": 7542 }, { "epoch": 2.321945213911973, "grad_norm": 10.625, "learning_rate": 9.077105319549492e-07, "loss": 1.2274169921875, "step": 7544 }, { "epoch": 2.322560787934749, "grad_norm": 8.4375, "learning_rate": 9.071774600600706e-07, "loss": 1.292219638824463, "step": 7546 }, { "epoch": 2.3231763619575254, "grad_norm": 14.1875, "learning_rate": 9.066447825259298e-07, "loss": 1.2093820571899414, "step": 7548 }, { "epoch": 2.3237919359803016, "grad_norm": 13.5, "learning_rate": 9.061124995878117e-07, "loss": 1.073814034461975, "step": 7550 }, { "epoch": 2.324407510003078, "grad_norm": 23.25, "learning_rate": 9.055806114808254e-07, "loss": 0.8666530251502991, "step": 7552 }, { "epoch": 2.325023084025854, "grad_norm": 13.6875, "learning_rate": 9.050491184399073e-07, "loss": 1.2789653539657593, "step": 7554 }, { "epoch": 2.3256386580486303, "grad_norm": 9.5625, "learning_rate": 9.045180206998174e-07, "loss": 1.3404902219772339, "step": 7556 }, { "epoch": 2.3262542320714066, "grad_norm": 14.4375, "learning_rate": 9.039873184951435e-07, "loss": 1.2336769104003906, "step": 7558 }, { "epoch": 2.326869806094183, "grad_norm": 50.5, "learning_rate": 9.034570120602964e-07, "loss": 1.631737232208252, "step": 7560 }, { "epoch": 2.327485380116959, "grad_norm": 24.375, "learning_rate": 9.029271016295132e-07, "loss": 1.5029709339141846, "step": 7562 }, { "epoch": 2.3281009541397353, "grad_norm": 50.5, "learning_rate": 9.023975874368567e-07, "loss": 1.0878214836120605, "step": 7564 }, { "epoch": 2.3287165281625115, "grad_norm": 75.5, "learning_rate": 9.018684697162127e-07, "loss": 0.8998363614082336, "step": 7566 }, { "epoch": 2.3293321021852877, "grad_norm": 15.5625, "learning_rate": 9.013397487012941e-07, "loss": 1.1986682415008545, "step": 7568 }, { "epoch": 2.329947676208064, "grad_norm": 23.75, "learning_rate": 9.008114246256374e-07, "loss": 1.7010976076126099, "step": 7570 }, { "epoch": 2.33056325023084, "grad_norm": 26.125, "learning_rate": 9.002834977226034e-07, "loss": 1.6871237754821777, "step": 7572 }, { "epoch": 2.3311788242536164, "grad_norm": 65.5, "learning_rate": 8.997559682253784e-07, "loss": 1.5429918766021729, "step": 7574 }, { "epoch": 2.3317943982763927, "grad_norm": 26.5, "learning_rate": 8.992288363669732e-07, "loss": 1.3595178127288818, "step": 7576 }, { "epoch": 2.332409972299169, "grad_norm": 5.09375, "learning_rate": 8.987021023802215e-07, "loss": 1.4431962966918945, "step": 7578 }, { "epoch": 2.333025546321945, "grad_norm": 19.75, "learning_rate": 8.981757664977833e-07, "loss": 1.348787546157837, "step": 7580 }, { "epoch": 2.3336411203447214, "grad_norm": 15.375, "learning_rate": 8.976498289521412e-07, "loss": 1.2389013767242432, "step": 7582 }, { "epoch": 2.3342566943674976, "grad_norm": 8.125, "learning_rate": 8.97124289975603e-07, "loss": 1.3180859088897705, "step": 7584 }, { "epoch": 2.334872268390274, "grad_norm": 31.5, "learning_rate": 8.965991498002991e-07, "loss": 0.9535354375839233, "step": 7586 }, { "epoch": 2.33548784241305, "grad_norm": 5.8125, "learning_rate": 8.960744086581849e-07, "loss": 1.2119085788726807, "step": 7588 }, { "epoch": 2.3361034164358263, "grad_norm": 15.8125, "learning_rate": 8.955500667810395e-07, "loss": 1.145918369293213, "step": 7590 }, { "epoch": 2.3367189904586025, "grad_norm": 13.9375, "learning_rate": 8.950261244004645e-07, "loss": 0.48710447549819946, "step": 7592 }, { "epoch": 2.3373345644813788, "grad_norm": 17.125, "learning_rate": 8.945025817478864e-07, "loss": 1.31890869140625, "step": 7594 }, { "epoch": 2.337950138504155, "grad_norm": 7.15625, "learning_rate": 8.939794390545547e-07, "loss": 1.133885383605957, "step": 7596 }, { "epoch": 2.3385657125269312, "grad_norm": 10.9375, "learning_rate": 8.934566965515415e-07, "loss": 1.3523964881896973, "step": 7598 }, { "epoch": 2.3391812865497075, "grad_norm": 17.625, "learning_rate": 8.929343544697438e-07, "loss": 1.5850661993026733, "step": 7600 }, { "epoch": 2.3397968605724837, "grad_norm": 23.75, "learning_rate": 8.924124130398796e-07, "loss": 1.057510256767273, "step": 7602 }, { "epoch": 2.34041243459526, "grad_norm": 13.625, "learning_rate": 8.918908724924915e-07, "loss": 1.1042832136154175, "step": 7604 }, { "epoch": 2.341028008618036, "grad_norm": 19.0, "learning_rate": 8.913697330579452e-07, "loss": 1.1760351657867432, "step": 7606 }, { "epoch": 2.3416435826408124, "grad_norm": 12.125, "learning_rate": 8.908489949664272e-07, "loss": 1.384007453918457, "step": 7608 }, { "epoch": 2.3422591566635886, "grad_norm": 23.625, "learning_rate": 8.903286584479493e-07, "loss": 1.2748414278030396, "step": 7610 }, { "epoch": 2.342874730686365, "grad_norm": 12.4375, "learning_rate": 8.898087237323441e-07, "loss": 1.4564826488494873, "step": 7612 }, { "epoch": 2.343490304709141, "grad_norm": 25.125, "learning_rate": 8.892891910492675e-07, "loss": 1.7585818767547607, "step": 7614 }, { "epoch": 2.3441058787319173, "grad_norm": 13.75, "learning_rate": 8.88770060628198e-07, "loss": 1.3288085460662842, "step": 7616 }, { "epoch": 2.3447214527546936, "grad_norm": 9.5, "learning_rate": 8.882513326984355e-07, "loss": 1.4811022281646729, "step": 7618 }, { "epoch": 2.34533702677747, "grad_norm": 4.5625, "learning_rate": 8.877330074891032e-07, "loss": 0.993120551109314, "step": 7620 }, { "epoch": 2.3459526008002465, "grad_norm": 20.375, "learning_rate": 8.872150852291461e-07, "loss": 1.3954640626907349, "step": 7622 }, { "epoch": 2.3465681748230223, "grad_norm": 17.625, "learning_rate": 8.866975661473299e-07, "loss": 1.5178502798080444, "step": 7624 }, { "epoch": 2.347183748845799, "grad_norm": 8.3125, "learning_rate": 8.86180450472245e-07, "loss": 1.3272488117218018, "step": 7626 }, { "epoch": 2.3477993228685747, "grad_norm": 39.75, "learning_rate": 8.856637384323009e-07, "loss": 1.7186293601989746, "step": 7628 }, { "epoch": 2.3484148968913514, "grad_norm": 24.5, "learning_rate": 8.8514743025573e-07, "loss": 1.2998948097229004, "step": 7630 }, { "epoch": 2.349030470914127, "grad_norm": 24.375, "learning_rate": 8.846315261705871e-07, "loss": 1.6087599992752075, "step": 7632 }, { "epoch": 2.349646044936904, "grad_norm": 15.75, "learning_rate": 8.841160264047466e-07, "loss": 1.5249056816101074, "step": 7634 }, { "epoch": 2.3502616189596797, "grad_norm": 5.84375, "learning_rate": 8.836009311859053e-07, "loss": 1.2703745365142822, "step": 7636 }, { "epoch": 2.3508771929824563, "grad_norm": 25.0, "learning_rate": 8.830862407415824e-07, "loss": 1.3457015752792358, "step": 7638 }, { "epoch": 2.351492767005232, "grad_norm": 3.0, "learning_rate": 8.82571955299116e-07, "loss": 1.0862663984298706, "step": 7640 }, { "epoch": 2.352108341028009, "grad_norm": 53.5, "learning_rate": 8.820580750856673e-07, "loss": 1.2190375328063965, "step": 7642 }, { "epoch": 2.352723915050785, "grad_norm": 29.5, "learning_rate": 8.815446003282177e-07, "loss": 1.4536583423614502, "step": 7644 }, { "epoch": 2.3533394890735613, "grad_norm": 36.5, "learning_rate": 8.810315312535697e-07, "loss": 1.399013638496399, "step": 7646 }, { "epoch": 2.3539550630963375, "grad_norm": 18.375, "learning_rate": 8.80518868088346e-07, "loss": 1.4619370698928833, "step": 7648 }, { "epoch": 2.3545706371191137, "grad_norm": 36.25, "learning_rate": 8.80006611058991e-07, "loss": 1.626915693283081, "step": 7650 }, { "epoch": 2.35518621114189, "grad_norm": 30.75, "learning_rate": 8.794947603917691e-07, "loss": 1.156236171722412, "step": 7652 }, { "epoch": 2.355801785164666, "grad_norm": 15.0, "learning_rate": 8.789833163127652e-07, "loss": 1.5183656215667725, "step": 7654 }, { "epoch": 2.3564173591874424, "grad_norm": 7.8125, "learning_rate": 8.784722790478847e-07, "loss": 1.1837083101272583, "step": 7656 }, { "epoch": 2.3570329332102187, "grad_norm": 22.0, "learning_rate": 8.779616488228538e-07, "loss": 1.3680181503295898, "step": 7658 }, { "epoch": 2.357648507232995, "grad_norm": 18.25, "learning_rate": 8.774514258632178e-07, "loss": 1.4081780910491943, "step": 7660 }, { "epoch": 2.358264081255771, "grad_norm": 34.25, "learning_rate": 8.769416103943428e-07, "loss": 1.5171761512756348, "step": 7662 }, { "epoch": 2.3588796552785474, "grad_norm": 15.25, "learning_rate": 8.764322026414157e-07, "loss": 1.1231598854064941, "step": 7664 }, { "epoch": 2.3594952293013236, "grad_norm": 8.5, "learning_rate": 8.759232028294418e-07, "loss": 1.3174912929534912, "step": 7666 }, { "epoch": 2.3601108033241, "grad_norm": 15.8125, "learning_rate": 8.754146111832467e-07, "loss": 1.4191495180130005, "step": 7668 }, { "epoch": 2.360726377346876, "grad_norm": 13.0, "learning_rate": 8.749064279274764e-07, "loss": 1.5959736108779907, "step": 7670 }, { "epoch": 2.3613419513696523, "grad_norm": 38.0, "learning_rate": 8.743986532865962e-07, "loss": 1.6755528450012207, "step": 7672 }, { "epoch": 2.3619575253924285, "grad_norm": 8.75, "learning_rate": 8.7389128748489e-07, "loss": 1.21274995803833, "step": 7674 }, { "epoch": 2.3625730994152048, "grad_norm": 23.5, "learning_rate": 8.733843307464623e-07, "loss": 1.5054268836975098, "step": 7676 }, { "epoch": 2.363188673437981, "grad_norm": 14.625, "learning_rate": 8.728777832952366e-07, "loss": 1.2082719802856445, "step": 7678 }, { "epoch": 2.3638042474607572, "grad_norm": 52.25, "learning_rate": 8.723716453549553e-07, "loss": 1.5130014419555664, "step": 7680 }, { "epoch": 2.3644198214835335, "grad_norm": 14.6875, "learning_rate": 8.7186591714918e-07, "loss": 1.3520900011062622, "step": 7682 }, { "epoch": 2.3650353955063097, "grad_norm": 27.75, "learning_rate": 8.713605989012918e-07, "loss": 1.385072946548462, "step": 7684 }, { "epoch": 2.365650969529086, "grad_norm": 12.75, "learning_rate": 8.708556908344895e-07, "loss": 1.2833045721054077, "step": 7686 }, { "epoch": 2.366266543551862, "grad_norm": 9.625, "learning_rate": 8.703511931717929e-07, "loss": 1.4542651176452637, "step": 7688 }, { "epoch": 2.3668821175746384, "grad_norm": 18.25, "learning_rate": 8.698471061360385e-07, "loss": 1.1327033042907715, "step": 7690 }, { "epoch": 2.3674976915974146, "grad_norm": 8.1875, "learning_rate": 8.693434299498812e-07, "loss": 1.386979103088379, "step": 7692 }, { "epoch": 2.368113265620191, "grad_norm": 14.4375, "learning_rate": 8.688401648357972e-07, "loss": 1.348421335220337, "step": 7694 }, { "epoch": 2.368728839642967, "grad_norm": 18.125, "learning_rate": 8.683373110160779e-07, "loss": 1.8261079788208008, "step": 7696 }, { "epoch": 2.3693444136657433, "grad_norm": 24.25, "learning_rate": 8.678348687128348e-07, "loss": 1.5496175289154053, "step": 7698 }, { "epoch": 2.3699599876885196, "grad_norm": 5.34375, "learning_rate": 8.673328381479978e-07, "loss": 1.3678693771362305, "step": 7700 }, { "epoch": 2.370575561711296, "grad_norm": 22.125, "learning_rate": 8.668312195433132e-07, "loss": 1.2740004062652588, "step": 7702 }, { "epoch": 2.371191135734072, "grad_norm": 20.0, "learning_rate": 8.663300131203472e-07, "loss": 1.5466177463531494, "step": 7704 }, { "epoch": 2.3718067097568483, "grad_norm": 12.9375, "learning_rate": 8.658292191004838e-07, "loss": 1.6574368476867676, "step": 7706 }, { "epoch": 2.3724222837796245, "grad_norm": 12.0, "learning_rate": 8.653288377049229e-07, "loss": 1.3180005550384521, "step": 7708 }, { "epoch": 2.3730378578024007, "grad_norm": 23.75, "learning_rate": 8.648288691546848e-07, "loss": 1.4729022979736328, "step": 7710 }, { "epoch": 2.373653431825177, "grad_norm": 13.4375, "learning_rate": 8.643293136706055e-07, "loss": 0.969083309173584, "step": 7712 }, { "epoch": 2.374269005847953, "grad_norm": 15.1875, "learning_rate": 8.638301714733399e-07, "loss": 1.605223298072815, "step": 7714 }, { "epoch": 2.3748845798707294, "grad_norm": 11.6875, "learning_rate": 8.633314427833587e-07, "loss": 0.9919534921646118, "step": 7716 }, { "epoch": 2.3755001538935057, "grad_norm": 5.8125, "learning_rate": 8.628331278209516e-07, "loss": 1.044752597808838, "step": 7718 }, { "epoch": 2.376115727916282, "grad_norm": 11.1875, "learning_rate": 8.62335226806225e-07, "loss": 1.4849684238433838, "step": 7720 }, { "epoch": 2.376731301939058, "grad_norm": 10.5625, "learning_rate": 8.618377399591017e-07, "loss": 1.4038259983062744, "step": 7722 }, { "epoch": 2.3773468759618344, "grad_norm": 17.25, "learning_rate": 8.613406674993228e-07, "loss": 1.2761948108673096, "step": 7724 }, { "epoch": 2.3779624499846106, "grad_norm": 10.4375, "learning_rate": 8.608440096464458e-07, "loss": 1.2625563144683838, "step": 7726 }, { "epoch": 2.378578024007387, "grad_norm": 10.8125, "learning_rate": 8.603477666198445e-07, "loss": 1.325385570526123, "step": 7728 }, { "epoch": 2.379193598030163, "grad_norm": 14.3125, "learning_rate": 8.598519386387104e-07, "loss": 1.3754222393035889, "step": 7730 }, { "epoch": 2.3798091720529393, "grad_norm": 11.5, "learning_rate": 8.593565259220514e-07, "loss": 1.410444736480713, "step": 7732 }, { "epoch": 2.3804247460757155, "grad_norm": 16.75, "learning_rate": 8.588615286886914e-07, "loss": 1.3901515007019043, "step": 7734 }, { "epoch": 2.3810403200984918, "grad_norm": 16.375, "learning_rate": 8.583669471572716e-07, "loss": 1.8944385051727295, "step": 7736 }, { "epoch": 2.381655894121268, "grad_norm": 12.5, "learning_rate": 8.578727815462492e-07, "loss": 1.2769474983215332, "step": 7738 }, { "epoch": 2.3822714681440442, "grad_norm": 11.25, "learning_rate": 8.573790320738979e-07, "loss": 1.8802108764648438, "step": 7740 }, { "epoch": 2.3828870421668205, "grad_norm": 11.0, "learning_rate": 8.568856989583068e-07, "loss": 1.3289381265640259, "step": 7742 }, { "epoch": 2.3835026161895967, "grad_norm": 14.4375, "learning_rate": 8.563927824173822e-07, "loss": 1.67485511302948, "step": 7744 }, { "epoch": 2.384118190212373, "grad_norm": 16.625, "learning_rate": 8.559002826688462e-07, "loss": 1.2186708450317383, "step": 7746 }, { "epoch": 2.384733764235149, "grad_norm": 12.375, "learning_rate": 8.554081999302356e-07, "loss": 1.433382272720337, "step": 7748 }, { "epoch": 2.3853493382579254, "grad_norm": 55.75, "learning_rate": 8.549165344189045e-07, "loss": 1.1944999694824219, "step": 7750 }, { "epoch": 2.3859649122807016, "grad_norm": 5.875, "learning_rate": 8.544252863520221e-07, "loss": 1.154907464981079, "step": 7752 }, { "epoch": 2.386580486303478, "grad_norm": 15.0, "learning_rate": 8.539344559465728e-07, "loss": 1.3341572284698486, "step": 7754 }, { "epoch": 2.387196060326254, "grad_norm": 10.625, "learning_rate": 8.534440434193579e-07, "loss": 1.5109237432479858, "step": 7756 }, { "epoch": 2.3878116343490303, "grad_norm": 31.25, "learning_rate": 8.529540489869925e-07, "loss": 1.6490485668182373, "step": 7758 }, { "epoch": 2.3884272083718066, "grad_norm": 36.75, "learning_rate": 8.524644728659071e-07, "loss": 1.447040319442749, "step": 7760 }, { "epoch": 2.389042782394583, "grad_norm": 18.75, "learning_rate": 8.519753152723493e-07, "loss": 1.6449151039123535, "step": 7762 }, { "epoch": 2.389658356417359, "grad_norm": 6.6875, "learning_rate": 8.514865764223799e-07, "loss": 1.2321791648864746, "step": 7764 }, { "epoch": 2.3902739304401353, "grad_norm": 5.96875, "learning_rate": 8.509982565318752e-07, "loss": 1.1312897205352783, "step": 7766 }, { "epoch": 2.3908895044629115, "grad_norm": 14.0, "learning_rate": 8.505103558165274e-07, "loss": 1.233027696609497, "step": 7768 }, { "epoch": 2.3915050784856877, "grad_norm": 22.5, "learning_rate": 8.50022874491842e-07, "loss": 1.7808541059494019, "step": 7770 }, { "epoch": 2.392120652508464, "grad_norm": 15.1875, "learning_rate": 8.495358127731406e-07, "loss": 1.5311319828033447, "step": 7772 }, { "epoch": 2.39273622653124, "grad_norm": 6.90625, "learning_rate": 8.490491708755588e-07, "loss": 1.3701417446136475, "step": 7774 }, { "epoch": 2.393351800554017, "grad_norm": 55.0, "learning_rate": 8.485629490140465e-07, "loss": 1.1161890029907227, "step": 7776 }, { "epoch": 2.3939673745767927, "grad_norm": 9.625, "learning_rate": 8.480771474033691e-07, "loss": 1.2349655628204346, "step": 7778 }, { "epoch": 2.3945829485995693, "grad_norm": 20.5, "learning_rate": 8.47591766258105e-07, "loss": 0.8640923500061035, "step": 7780 }, { "epoch": 2.395198522622345, "grad_norm": 8.5, "learning_rate": 8.471068057926483e-07, "loss": 1.1073216199874878, "step": 7782 }, { "epoch": 2.395814096645122, "grad_norm": 18.875, "learning_rate": 8.466222662212063e-07, "loss": 1.2677631378173828, "step": 7784 }, { "epoch": 2.3964296706678976, "grad_norm": 17.875, "learning_rate": 8.461381477578003e-07, "loss": 1.2740721702575684, "step": 7786 }, { "epoch": 2.3970452446906743, "grad_norm": 10.625, "learning_rate": 8.456544506162668e-07, "loss": 1.4284172058105469, "step": 7788 }, { "epoch": 2.39766081871345, "grad_norm": 14.4375, "learning_rate": 8.451711750102546e-07, "loss": 1.7935305833816528, "step": 7790 }, { "epoch": 2.3982763927362267, "grad_norm": 20.375, "learning_rate": 8.446883211532275e-07, "loss": 1.6979771852493286, "step": 7792 }, { "epoch": 2.398891966759003, "grad_norm": 11.25, "learning_rate": 8.442058892584629e-07, "loss": 1.3462008237838745, "step": 7794 }, { "epoch": 2.399507540781779, "grad_norm": 10.4375, "learning_rate": 8.437238795390507e-07, "loss": 1.3277199268341064, "step": 7796 }, { "epoch": 2.4001231148045554, "grad_norm": 14.6875, "learning_rate": 8.432422922078955e-07, "loss": 1.2554333209991455, "step": 7798 }, { "epoch": 2.4007386888273317, "grad_norm": 25.5, "learning_rate": 8.427611274777156e-07, "loss": 1.3627779483795166, "step": 7800 }, { "epoch": 2.401354262850108, "grad_norm": 11.375, "learning_rate": 8.422803855610411e-07, "loss": 1.3388714790344238, "step": 7802 }, { "epoch": 2.401969836872884, "grad_norm": 17.625, "learning_rate": 8.418000666702166e-07, "loss": 1.5131101608276367, "step": 7804 }, { "epoch": 2.4025854108956604, "grad_norm": 13.4375, "learning_rate": 8.413201710173996e-07, "loss": 1.553926944732666, "step": 7806 }, { "epoch": 2.4032009849184366, "grad_norm": 10.9375, "learning_rate": 8.408406988145607e-07, "loss": 1.2082258462905884, "step": 7808 }, { "epoch": 2.403816558941213, "grad_norm": 17.5, "learning_rate": 8.403616502734828e-07, "loss": 1.420758605003357, "step": 7810 }, { "epoch": 2.404432132963989, "grad_norm": 20.0, "learning_rate": 8.398830256057625e-07, "loss": 1.002287745475769, "step": 7812 }, { "epoch": 2.4050477069867653, "grad_norm": 15.0625, "learning_rate": 8.394048250228093e-07, "loss": 1.3062329292297363, "step": 7814 }, { "epoch": 2.4056632810095415, "grad_norm": 33.0, "learning_rate": 8.38927048735844e-07, "loss": 1.532996416091919, "step": 7816 }, { "epoch": 2.4062788550323178, "grad_norm": 14.25, "learning_rate": 8.384496969559016e-07, "loss": 1.4776278734207153, "step": 7818 }, { "epoch": 2.406894429055094, "grad_norm": 12.25, "learning_rate": 8.37972769893829e-07, "loss": 1.2780346870422363, "step": 7820 }, { "epoch": 2.40751000307787, "grad_norm": 5.59375, "learning_rate": 8.374962677602847e-07, "loss": 1.1072016954421997, "step": 7822 }, { "epoch": 2.4081255771006465, "grad_norm": 20.125, "learning_rate": 8.370201907657415e-07, "loss": 1.4812865257263184, "step": 7824 }, { "epoch": 2.4087411511234227, "grad_norm": 6.84375, "learning_rate": 8.36544539120482e-07, "loss": 1.0782959461212158, "step": 7826 }, { "epoch": 2.409356725146199, "grad_norm": 22.375, "learning_rate": 8.360693130346021e-07, "loss": 1.2231228351593018, "step": 7828 }, { "epoch": 2.409972299168975, "grad_norm": 8.875, "learning_rate": 8.355945127180107e-07, "loss": 1.3597500324249268, "step": 7830 }, { "epoch": 2.4105878731917514, "grad_norm": 34.5, "learning_rate": 8.351201383804266e-07, "loss": 1.655010461807251, "step": 7832 }, { "epoch": 2.4112034472145276, "grad_norm": 20.125, "learning_rate": 8.346461902313823e-07, "loss": 1.4868947267532349, "step": 7834 }, { "epoch": 2.411819021237304, "grad_norm": 16.5, "learning_rate": 8.341726684802205e-07, "loss": 1.2523350715637207, "step": 7836 }, { "epoch": 2.41243459526008, "grad_norm": 11.6875, "learning_rate": 8.336995733360966e-07, "loss": 1.1832252740859985, "step": 7838 }, { "epoch": 2.4130501692828563, "grad_norm": 23.875, "learning_rate": 8.332269050079777e-07, "loss": 1.2065129280090332, "step": 7840 }, { "epoch": 2.4136657433056325, "grad_norm": 23.0, "learning_rate": 8.327546637046411e-07, "loss": 1.335740089416504, "step": 7842 }, { "epoch": 2.414281317328409, "grad_norm": 13.5625, "learning_rate": 8.32282849634677e-07, "loss": 1.565500020980835, "step": 7844 }, { "epoch": 2.414896891351185, "grad_norm": 20.375, "learning_rate": 8.318114630064861e-07, "loss": 1.4919871091842651, "step": 7846 }, { "epoch": 2.4155124653739612, "grad_norm": 19.0, "learning_rate": 8.313405040282797e-07, "loss": 1.5719404220581055, "step": 7848 }, { "epoch": 2.4161280393967375, "grad_norm": 16.625, "learning_rate": 8.308699729080822e-07, "loss": 1.315718650817871, "step": 7850 }, { "epoch": 2.4167436134195137, "grad_norm": 7.84375, "learning_rate": 8.303998698537272e-07, "loss": 1.0368592739105225, "step": 7852 }, { "epoch": 2.41735918744229, "grad_norm": 12.625, "learning_rate": 8.29930195072859e-07, "loss": 1.2762839794158936, "step": 7854 }, { "epoch": 2.417974761465066, "grad_norm": 15.1875, "learning_rate": 8.294609487729346e-07, "loss": 1.638676404953003, "step": 7856 }, { "epoch": 2.4185903354878424, "grad_norm": 24.125, "learning_rate": 8.289921311612198e-07, "loss": 1.1454219818115234, "step": 7858 }, { "epoch": 2.4192059095106186, "grad_norm": 19.875, "learning_rate": 8.285237424447923e-07, "loss": 1.719085693359375, "step": 7860 }, { "epoch": 2.419821483533395, "grad_norm": 12.0, "learning_rate": 8.2805578283054e-07, "loss": 1.0720081329345703, "step": 7862 }, { "epoch": 2.420437057556171, "grad_norm": 10.875, "learning_rate": 8.275882525251607e-07, "loss": 1.2760975360870361, "step": 7864 }, { "epoch": 2.4210526315789473, "grad_norm": 18.125, "learning_rate": 8.271211517351636e-07, "loss": 1.639174222946167, "step": 7866 }, { "epoch": 2.4216682056017236, "grad_norm": 37.25, "learning_rate": 8.266544806668678e-07, "loss": 1.2773809432983398, "step": 7868 }, { "epoch": 2.4222837796245, "grad_norm": 10.1875, "learning_rate": 8.261882395264017e-07, "loss": 1.1791555881500244, "step": 7870 }, { "epoch": 2.422899353647276, "grad_norm": 21.375, "learning_rate": 8.257224285197049e-07, "loss": 1.487235188484192, "step": 7872 }, { "epoch": 2.4235149276700523, "grad_norm": 11.9375, "learning_rate": 8.252570478525268e-07, "loss": 1.3929115533828735, "step": 7874 }, { "epoch": 2.4241305016928285, "grad_norm": 12.25, "learning_rate": 8.247920977304267e-07, "loss": 1.374371886253357, "step": 7876 }, { "epoch": 2.4247460757156047, "grad_norm": 7.65625, "learning_rate": 8.243275783587732e-07, "loss": 1.2207591533660889, "step": 7878 }, { "epoch": 2.425361649738381, "grad_norm": 18.25, "learning_rate": 8.238634899427451e-07, "loss": 0.96770840883255, "step": 7880 }, { "epoch": 2.425977223761157, "grad_norm": 16.75, "learning_rate": 8.233998326873314e-07, "loss": 1.5982918739318848, "step": 7882 }, { "epoch": 2.4265927977839334, "grad_norm": 8.25, "learning_rate": 8.229366067973291e-07, "loss": 1.2492954730987549, "step": 7884 }, { "epoch": 2.4272083718067097, "grad_norm": 20.875, "learning_rate": 8.22473812477346e-07, "loss": 1.3705569505691528, "step": 7886 }, { "epoch": 2.427823945829486, "grad_norm": 23.75, "learning_rate": 8.220114499317994e-07, "loss": 1.0353972911834717, "step": 7888 }, { "epoch": 2.428439519852262, "grad_norm": 11.0, "learning_rate": 8.215495193649144e-07, "loss": 1.1741468906402588, "step": 7890 }, { "epoch": 2.4290550938750384, "grad_norm": 48.0, "learning_rate": 8.21088020980727e-07, "loss": 1.2422951459884644, "step": 7892 }, { "epoch": 2.4296706678978146, "grad_norm": 45.0, "learning_rate": 8.206269549830813e-07, "loss": 1.3556740283966064, "step": 7894 }, { "epoch": 2.430286241920591, "grad_norm": 17.5, "learning_rate": 8.201663215756308e-07, "loss": 1.5279185771942139, "step": 7896 }, { "epoch": 2.430901815943367, "grad_norm": 6.15625, "learning_rate": 8.197061209618374e-07, "loss": 1.1852245330810547, "step": 7898 }, { "epoch": 2.4315173899661433, "grad_norm": 23.5, "learning_rate": 8.192463533449727e-07, "loss": 1.5237150192260742, "step": 7900 }, { "epoch": 2.4321329639889195, "grad_norm": 10.6875, "learning_rate": 8.187870189281167e-07, "loss": 1.355332612991333, "step": 7902 }, { "epoch": 2.4327485380116958, "grad_norm": 6.375, "learning_rate": 8.183281179141573e-07, "loss": 1.1575570106506348, "step": 7904 }, { "epoch": 2.433364112034472, "grad_norm": 14.375, "learning_rate": 8.178696505057921e-07, "loss": 1.236515998840332, "step": 7906 }, { "epoch": 2.4339796860572482, "grad_norm": 9.8125, "learning_rate": 8.174116169055268e-07, "loss": 1.3578482866287231, "step": 7908 }, { "epoch": 2.4345952600800245, "grad_norm": 15.3125, "learning_rate": 8.169540173156746e-07, "loss": 1.4025132656097412, "step": 7910 }, { "epoch": 2.4352108341028007, "grad_norm": 5.0625, "learning_rate": 8.164968519383585e-07, "loss": 1.1161717176437378, "step": 7912 }, { "epoch": 2.435826408125577, "grad_norm": 13.75, "learning_rate": 8.16040120975509e-07, "loss": 1.6636682748794556, "step": 7914 }, { "epoch": 2.436441982148353, "grad_norm": 27.25, "learning_rate": 8.155838246288638e-07, "loss": 1.5401861667633057, "step": 7916 }, { "epoch": 2.4370575561711294, "grad_norm": 12.0, "learning_rate": 8.151279630999709e-07, "loss": 1.5521032810211182, "step": 7918 }, { "epoch": 2.4376731301939056, "grad_norm": 90.5, "learning_rate": 8.146725365901836e-07, "loss": 1.5973589420318604, "step": 7920 }, { "epoch": 2.438288704216682, "grad_norm": 15.125, "learning_rate": 8.142175453006649e-07, "loss": 1.0958237648010254, "step": 7922 }, { "epoch": 2.438904278239458, "grad_norm": 12.3125, "learning_rate": 8.137629894323854e-07, "loss": 0.8826869130134583, "step": 7924 }, { "epoch": 2.439519852262235, "grad_norm": 18.625, "learning_rate": 8.133088691861219e-07, "loss": 1.2588679790496826, "step": 7926 }, { "epoch": 2.4401354262850106, "grad_norm": 8.6875, "learning_rate": 8.128551847624605e-07, "loss": 1.0845204591751099, "step": 7928 }, { "epoch": 2.4407510003077872, "grad_norm": 12.3125, "learning_rate": 8.12401936361794e-07, "loss": 1.260317087173462, "step": 7930 }, { "epoch": 2.441366574330563, "grad_norm": 23.75, "learning_rate": 8.119491241843229e-07, "loss": 1.0685640573501587, "step": 7932 }, { "epoch": 2.4419821483533397, "grad_norm": 12.75, "learning_rate": 8.114967484300544e-07, "loss": 1.4659960269927979, "step": 7934 }, { "epoch": 2.4425977223761155, "grad_norm": 31.625, "learning_rate": 8.110448092988041e-07, "loss": 1.2615103721618652, "step": 7936 }, { "epoch": 2.443213296398892, "grad_norm": 29.625, "learning_rate": 8.105933069901935e-07, "loss": 1.3865262269973755, "step": 7938 }, { "epoch": 2.443828870421668, "grad_norm": 6.8125, "learning_rate": 8.101422417036516e-07, "loss": 1.39396071434021, "step": 7940 }, { "epoch": 2.4444444444444446, "grad_norm": 5.21875, "learning_rate": 8.096916136384146e-07, "loss": 1.4983265399932861, "step": 7942 }, { "epoch": 2.445060018467221, "grad_norm": 11.5, "learning_rate": 8.09241422993526e-07, "loss": 1.1928825378417969, "step": 7944 }, { "epoch": 2.445675592489997, "grad_norm": 8.5625, "learning_rate": 8.087916699678345e-07, "loss": 1.1064355373382568, "step": 7946 }, { "epoch": 2.4462911665127733, "grad_norm": 21.0, "learning_rate": 8.08342354759997e-07, "loss": 1.461334466934204, "step": 7948 }, { "epoch": 2.4469067405355496, "grad_norm": 15.1875, "learning_rate": 8.078934775684771e-07, "loss": 1.3841646909713745, "step": 7950 }, { "epoch": 2.447522314558326, "grad_norm": 6.65625, "learning_rate": 8.074450385915435e-07, "loss": 0.9941138625144958, "step": 7952 }, { "epoch": 2.448137888581102, "grad_norm": 39.75, "learning_rate": 8.069970380272727e-07, "loss": 0.9700048565864563, "step": 7954 }, { "epoch": 2.4487534626038783, "grad_norm": 8.625, "learning_rate": 8.065494760735468e-07, "loss": 1.2525842189788818, "step": 7956 }, { "epoch": 2.4493690366266545, "grad_norm": 32.75, "learning_rate": 8.061023529280546e-07, "loss": 1.5355992317199707, "step": 7958 }, { "epoch": 2.4499846106494307, "grad_norm": 9.875, "learning_rate": 8.056556687882909e-07, "loss": 1.3056560754776, "step": 7960 }, { "epoch": 2.450600184672207, "grad_norm": 11.1875, "learning_rate": 8.052094238515563e-07, "loss": 1.2505466938018799, "step": 7962 }, { "epoch": 2.451215758694983, "grad_norm": 16.75, "learning_rate": 8.047636183149584e-07, "loss": 1.2493584156036377, "step": 7964 }, { "epoch": 2.4518313327177594, "grad_norm": 7.3125, "learning_rate": 8.043182523754092e-07, "loss": 1.2162585258483887, "step": 7966 }, { "epoch": 2.4524469067405357, "grad_norm": 15.625, "learning_rate": 8.038733262296278e-07, "loss": 1.416115403175354, "step": 7968 }, { "epoch": 2.453062480763312, "grad_norm": 12.625, "learning_rate": 8.034288400741388e-07, "loss": 1.5428014993667603, "step": 7970 }, { "epoch": 2.453678054786088, "grad_norm": 28.125, "learning_rate": 8.029847941052717e-07, "loss": 1.3966515064239502, "step": 7972 }, { "epoch": 2.4542936288088644, "grad_norm": 47.75, "learning_rate": 8.025411885191622e-07, "loss": 1.602909803390503, "step": 7974 }, { "epoch": 2.4549092028316406, "grad_norm": 20.125, "learning_rate": 8.020980235117518e-07, "loss": 1.5091824531555176, "step": 7976 }, { "epoch": 2.455524776854417, "grad_norm": 32.5, "learning_rate": 8.016552992787867e-07, "loss": 1.4437615871429443, "step": 7978 }, { "epoch": 2.456140350877193, "grad_norm": 25.625, "learning_rate": 8.012130160158187e-07, "loss": 1.4464353322982788, "step": 7980 }, { "epoch": 2.4567559248999693, "grad_norm": 14.8125, "learning_rate": 8.007711739182051e-07, "loss": 1.307931661605835, "step": 7982 }, { "epoch": 2.4573714989227455, "grad_norm": 28.875, "learning_rate": 8.003297731811072e-07, "loss": 1.121088981628418, "step": 7984 }, { "epoch": 2.4579870729455218, "grad_norm": 100.0, "learning_rate": 7.998888139994933e-07, "loss": 0.8891786336898804, "step": 7986 }, { "epoch": 2.458602646968298, "grad_norm": 7.5, "learning_rate": 7.994482965681352e-07, "loss": 0.7378986477851868, "step": 7988 }, { "epoch": 2.4592182209910742, "grad_norm": 26.0, "learning_rate": 7.990082210816096e-07, "loss": 1.3136024475097656, "step": 7990 }, { "epoch": 2.4598337950138505, "grad_norm": 18.625, "learning_rate": 7.985685877342989e-07, "loss": 1.7769261598587036, "step": 7992 }, { "epoch": 2.4604493690366267, "grad_norm": 39.0, "learning_rate": 7.981293967203893e-07, "loss": 1.4590258598327637, "step": 7994 }, { "epoch": 2.461064943059403, "grad_norm": 32.75, "learning_rate": 7.976906482338718e-07, "loss": 1.2280614376068115, "step": 7996 }, { "epoch": 2.461680517082179, "grad_norm": 7.75, "learning_rate": 7.972523424685428e-07, "loss": 0.8923991322517395, "step": 7998 }, { "epoch": 2.4622960911049554, "grad_norm": 23.625, "learning_rate": 7.968144796180014e-07, "loss": 1.5898621082305908, "step": 8000 }, { "epoch": 2.4629116651277316, "grad_norm": 30.125, "learning_rate": 7.963770598756535e-07, "loss": 1.3274173736572266, "step": 8002 }, { "epoch": 2.463527239150508, "grad_norm": 21.25, "learning_rate": 7.959400834347062e-07, "loss": 1.9697543382644653, "step": 8004 }, { "epoch": 2.464142813173284, "grad_norm": 16.0, "learning_rate": 7.955035504881741e-07, "loss": 1.3735530376434326, "step": 8006 }, { "epoch": 2.4647583871960603, "grad_norm": 15.0625, "learning_rate": 7.950674612288737e-07, "loss": 1.231592059135437, "step": 8008 }, { "epoch": 2.4653739612188366, "grad_norm": 35.5, "learning_rate": 7.946318158494255e-07, "loss": 1.4949934482574463, "step": 8010 }, { "epoch": 2.465989535241613, "grad_norm": 161.0, "learning_rate": 7.941966145422555e-07, "loss": 0.8490093946456909, "step": 8012 }, { "epoch": 2.466605109264389, "grad_norm": 59.25, "learning_rate": 7.93761857499592e-07, "loss": 0.8294261693954468, "step": 8014 }, { "epoch": 2.4672206832871653, "grad_norm": 13.75, "learning_rate": 7.93327544913468e-07, "loss": 1.3439993858337402, "step": 8016 }, { "epoch": 2.4678362573099415, "grad_norm": 23.25, "learning_rate": 7.928936769757201e-07, "loss": 1.3793857097625732, "step": 8018 }, { "epoch": 2.4684518313327177, "grad_norm": 12.5625, "learning_rate": 7.924602538779877e-07, "loss": 1.548597812652588, "step": 8020 }, { "epoch": 2.469067405355494, "grad_norm": 22.625, "learning_rate": 7.920272758117145e-07, "loss": 1.4939284324645996, "step": 8022 }, { "epoch": 2.46968297937827, "grad_norm": 16.625, "learning_rate": 7.915947429681476e-07, "loss": 1.4503446817398071, "step": 8024 }, { "epoch": 2.4702985534010464, "grad_norm": 6.6875, "learning_rate": 7.911626555383368e-07, "loss": 1.2970399856567383, "step": 8026 }, { "epoch": 2.4709141274238227, "grad_norm": 5.34375, "learning_rate": 7.907310137131358e-07, "loss": 0.9716742634773254, "step": 8028 }, { "epoch": 2.471529701446599, "grad_norm": 23.5, "learning_rate": 7.902998176832018e-07, "loss": 1.3309000730514526, "step": 8030 }, { "epoch": 2.472145275469375, "grad_norm": 28.125, "learning_rate": 7.89869067638994e-07, "loss": 1.6835633516311646, "step": 8032 }, { "epoch": 2.4727608494921514, "grad_norm": 25.75, "learning_rate": 7.894387637707753e-07, "loss": 1.668080449104309, "step": 8034 }, { "epoch": 2.4733764235149276, "grad_norm": 14.625, "learning_rate": 7.890089062686115e-07, "loss": 1.832417607307434, "step": 8036 }, { "epoch": 2.473991997537704, "grad_norm": 10.5625, "learning_rate": 7.885794953223713e-07, "loss": 1.4618122577667236, "step": 8038 }, { "epoch": 2.47460757156048, "grad_norm": 8.6875, "learning_rate": 7.881505311217255e-07, "loss": 1.3789515495300293, "step": 8040 }, { "epoch": 2.4752231455832563, "grad_norm": 12.875, "learning_rate": 7.877220138561485e-07, "loss": 1.2313289642333984, "step": 8042 }, { "epoch": 2.4758387196060325, "grad_norm": 4.0, "learning_rate": 7.87293943714917e-07, "loss": 1.0658435821533203, "step": 8044 }, { "epoch": 2.4764542936288088, "grad_norm": 15.3125, "learning_rate": 7.868663208871092e-07, "loss": 1.407973051071167, "step": 8046 }, { "epoch": 2.477069867651585, "grad_norm": 12.75, "learning_rate": 7.864391455616078e-07, "loss": 1.230506181716919, "step": 8048 }, { "epoch": 2.477685441674361, "grad_norm": 10.375, "learning_rate": 7.860124179270963e-07, "loss": 0.9987541437149048, "step": 8050 }, { "epoch": 2.4783010156971375, "grad_norm": 14.0625, "learning_rate": 7.855861381720601e-07, "loss": 1.5021973848342896, "step": 8052 }, { "epoch": 2.4789165897199137, "grad_norm": 17.125, "learning_rate": 7.851603064847879e-07, "loss": 1.5795702934265137, "step": 8054 }, { "epoch": 2.47953216374269, "grad_norm": 15.75, "learning_rate": 7.8473492305337e-07, "loss": 1.112412929534912, "step": 8056 }, { "epoch": 2.480147737765466, "grad_norm": 14.25, "learning_rate": 7.843099880656992e-07, "loss": 1.1409666538238525, "step": 8058 }, { "epoch": 2.4807633117882424, "grad_norm": 11.4375, "learning_rate": 7.83885501709469e-07, "loss": 1.554758071899414, "step": 8060 }, { "epoch": 2.4813788858110186, "grad_norm": 16.0, "learning_rate": 7.834614641721759e-07, "loss": 1.277086853981018, "step": 8062 }, { "epoch": 2.481994459833795, "grad_norm": 7.625, "learning_rate": 7.83037875641118e-07, "loss": 1.2530086040496826, "step": 8064 }, { "epoch": 2.482610033856571, "grad_norm": 12.5625, "learning_rate": 7.826147363033943e-07, "loss": 1.2744269371032715, "step": 8066 }, { "epoch": 2.4832256078793473, "grad_norm": 11.25, "learning_rate": 7.821920463459062e-07, "loss": 1.3146778345108032, "step": 8068 }, { "epoch": 2.4838411819021236, "grad_norm": 13.125, "learning_rate": 7.817698059553566e-07, "loss": 1.4526034593582153, "step": 8070 }, { "epoch": 2.4844567559249, "grad_norm": 19.0, "learning_rate": 7.813480153182487e-07, "loss": 1.1881377696990967, "step": 8072 }, { "epoch": 2.485072329947676, "grad_norm": 2.9375, "learning_rate": 7.809266746208894e-07, "loss": 1.0977604389190674, "step": 8074 }, { "epoch": 2.4856879039704527, "grad_norm": 23.125, "learning_rate": 7.805057840493841e-07, "loss": 1.4201725721359253, "step": 8076 }, { "epoch": 2.4863034779932285, "grad_norm": 42.25, "learning_rate": 7.800853437896407e-07, "loss": 1.129119873046875, "step": 8078 }, { "epoch": 2.486919052016005, "grad_norm": 18.0, "learning_rate": 7.796653540273689e-07, "loss": 1.2863776683807373, "step": 8080 }, { "epoch": 2.487534626038781, "grad_norm": 24.75, "learning_rate": 7.792458149480781e-07, "loss": 1.3503018617630005, "step": 8082 }, { "epoch": 2.4881502000615576, "grad_norm": 9.8125, "learning_rate": 7.788267267370792e-07, "loss": 1.3690378665924072, "step": 8084 }, { "epoch": 2.4887657740843334, "grad_norm": 10.4375, "learning_rate": 7.784080895794845e-07, "loss": 1.1764789819717407, "step": 8086 }, { "epoch": 2.48938134810711, "grad_norm": 21.75, "learning_rate": 7.779899036602055e-07, "loss": 1.2622982263565063, "step": 8088 }, { "epoch": 2.489996922129886, "grad_norm": 19.0, "learning_rate": 7.775721691639563e-07, "loss": 1.3501579761505127, "step": 8090 }, { "epoch": 2.4906124961526626, "grad_norm": 10.6875, "learning_rate": 7.771548862752504e-07, "loss": 0.9969066381454468, "step": 8092 }, { "epoch": 2.4912280701754383, "grad_norm": 6.25, "learning_rate": 7.767380551784021e-07, "loss": 0.5417050123214722, "step": 8094 }, { "epoch": 2.491843644198215, "grad_norm": 14.1875, "learning_rate": 7.76321676057526e-07, "loss": 1.3599417209625244, "step": 8096 }, { "epoch": 2.4924592182209913, "grad_norm": 24.5, "learning_rate": 7.759057490965375e-07, "loss": 1.6254465579986572, "step": 8098 }, { "epoch": 2.4930747922437675, "grad_norm": 132.0, "learning_rate": 7.754902744791523e-07, "loss": 1.958270788192749, "step": 8100 }, { "epoch": 2.4936903662665437, "grad_norm": 12.1875, "learning_rate": 7.750752523888852e-07, "loss": 1.4026708602905273, "step": 8102 }, { "epoch": 2.49430594028932, "grad_norm": 94.5, "learning_rate": 7.746606830090525e-07, "loss": 1.394837498664856, "step": 8104 }, { "epoch": 2.494921514312096, "grad_norm": 9.0, "learning_rate": 7.742465665227702e-07, "loss": 1.3615858554840088, "step": 8106 }, { "epoch": 2.4955370883348724, "grad_norm": 65.5, "learning_rate": 7.738329031129533e-07, "loss": 1.4655237197875977, "step": 8108 }, { "epoch": 2.4961526623576487, "grad_norm": 10.0625, "learning_rate": 7.734196929623177e-07, "loss": 1.4283119440078735, "step": 8110 }, { "epoch": 2.496768236380425, "grad_norm": 6.90625, "learning_rate": 7.730069362533791e-07, "loss": 1.05420982837677, "step": 8112 }, { "epoch": 2.497383810403201, "grad_norm": 11.4375, "learning_rate": 7.725946331684523e-07, "loss": 1.1853525638580322, "step": 8114 }, { "epoch": 2.4979993844259774, "grad_norm": 4.625, "learning_rate": 7.721827838896523e-07, "loss": 1.0965955257415771, "step": 8116 }, { "epoch": 2.4986149584487536, "grad_norm": 19.75, "learning_rate": 7.717713885988933e-07, "loss": 1.4483702182769775, "step": 8118 }, { "epoch": 2.49923053247153, "grad_norm": 23.5, "learning_rate": 7.713604474778886e-07, "loss": 1.3182878494262695, "step": 8120 }, { "epoch": 2.499846106494306, "grad_norm": 15.375, "learning_rate": 7.709499607081519e-07, "loss": 1.0512882471084595, "step": 8122 }, { "epoch": 2.5004616805170823, "grad_norm": 17.875, "learning_rate": 7.705399284709955e-07, "loss": 1.2965221405029297, "step": 8124 }, { "epoch": 2.5010772545398585, "grad_norm": 40.75, "learning_rate": 7.701303509475315e-07, "loss": 1.5295205116271973, "step": 8126 }, { "epoch": 2.5016928285626348, "grad_norm": 27.0, "learning_rate": 7.6972122831867e-07, "loss": 1.6842844486236572, "step": 8128 }, { "epoch": 2.502308402585411, "grad_norm": 7.625, "learning_rate": 7.693125607651216e-07, "loss": 0.8428970575332642, "step": 8130 }, { "epoch": 2.502923976608187, "grad_norm": 15.3125, "learning_rate": 7.68904348467395e-07, "loss": 1.4775192737579346, "step": 8132 }, { "epoch": 2.5035395506309635, "grad_norm": 22.75, "learning_rate": 7.684965916057978e-07, "loss": 1.201244831085205, "step": 8134 }, { "epoch": 2.5041551246537397, "grad_norm": 14.0625, "learning_rate": 7.680892903604369e-07, "loss": 1.4047563076019287, "step": 8136 }, { "epoch": 2.504770698676516, "grad_norm": 12.375, "learning_rate": 7.676824449112181e-07, "loss": 1.0803864002227783, "step": 8138 }, { "epoch": 2.505386272699292, "grad_norm": 13.6875, "learning_rate": 7.672760554378444e-07, "loss": 1.3757635354995728, "step": 8140 }, { "epoch": 2.5060018467220684, "grad_norm": 28.875, "learning_rate": 7.668701221198197e-07, "loss": 1.7247374057769775, "step": 8142 }, { "epoch": 2.5066174207448446, "grad_norm": 9.0, "learning_rate": 7.664646451364448e-07, "loss": 0.49472346901893616, "step": 8144 }, { "epoch": 2.507232994767621, "grad_norm": 4.5625, "learning_rate": 7.660596246668188e-07, "loss": 1.024422526359558, "step": 8146 }, { "epoch": 2.507848568790397, "grad_norm": 21.75, "learning_rate": 7.656550608898407e-07, "loss": 1.0848078727722168, "step": 8148 }, { "epoch": 2.5084641428131733, "grad_norm": 17.0, "learning_rate": 7.652509539842058e-07, "loss": 1.2989914417266846, "step": 8150 }, { "epoch": 2.5090797168359495, "grad_norm": 11.5625, "learning_rate": 7.648473041284093e-07, "loss": 1.4980051517486572, "step": 8152 }, { "epoch": 2.509695290858726, "grad_norm": 11.0625, "learning_rate": 7.644441115007437e-07, "loss": 1.068181037902832, "step": 8154 }, { "epoch": 2.510310864881502, "grad_norm": 13.8125, "learning_rate": 7.640413762792991e-07, "loss": 1.6641740798950195, "step": 8156 }, { "epoch": 2.5109264389042782, "grad_norm": 20.0, "learning_rate": 7.636390986419646e-07, "loss": 1.0981097221374512, "step": 8158 }, { "epoch": 2.5115420129270545, "grad_norm": 131.0, "learning_rate": 7.632372787664268e-07, "loss": 0.4805665910243988, "step": 8160 }, { "epoch": 2.5121575869498307, "grad_norm": 8.625, "learning_rate": 7.628359168301697e-07, "loss": 1.3994219303131104, "step": 8162 }, { "epoch": 2.512773160972607, "grad_norm": 11.375, "learning_rate": 7.624350130104754e-07, "loss": 1.0327104330062866, "step": 8164 }, { "epoch": 2.513388734995383, "grad_norm": 10.625, "learning_rate": 7.620345674844232e-07, "loss": 1.2655659914016724, "step": 8166 }, { "epoch": 2.5140043090181594, "grad_norm": 17.125, "learning_rate": 7.616345804288912e-07, "loss": 0.9777556657791138, "step": 8168 }, { "epoch": 2.5146198830409356, "grad_norm": 24.125, "learning_rate": 7.612350520205537e-07, "loss": 1.3112051486968994, "step": 8170 }, { "epoch": 2.515235457063712, "grad_norm": 17.5, "learning_rate": 7.608359824358824e-07, "loss": 1.42042076587677, "step": 8172 }, { "epoch": 2.515851031086488, "grad_norm": 21.5, "learning_rate": 7.604373718511477e-07, "loss": 1.642563819885254, "step": 8174 }, { "epoch": 2.5164666051092643, "grad_norm": 20.625, "learning_rate": 7.600392204424156e-07, "loss": 1.469331979751587, "step": 8176 }, { "epoch": 2.5170821791320406, "grad_norm": 12.75, "learning_rate": 7.596415283855503e-07, "loss": 1.5045514106750488, "step": 8178 }, { "epoch": 2.517697753154817, "grad_norm": 19.875, "learning_rate": 7.592442958562132e-07, "loss": 1.5238573551177979, "step": 8180 }, { "epoch": 2.518313327177593, "grad_norm": 25.625, "learning_rate": 7.588475230298616e-07, "loss": 1.4592609405517578, "step": 8182 }, { "epoch": 2.5189289012003693, "grad_norm": 15.125, "learning_rate": 7.584512100817509e-07, "loss": 1.4213488101959229, "step": 8184 }, { "epoch": 2.5195444752231455, "grad_norm": 94.5, "learning_rate": 7.580553571869333e-07, "loss": 1.2677407264709473, "step": 8186 }, { "epoch": 2.5201600492459217, "grad_norm": 16.75, "learning_rate": 7.576599645202571e-07, "loss": 1.285733699798584, "step": 8188 }, { "epoch": 2.520775623268698, "grad_norm": 15.125, "learning_rate": 7.572650322563676e-07, "loss": 1.280822515487671, "step": 8190 }, { "epoch": 2.521391197291474, "grad_norm": 24.875, "learning_rate": 7.568705605697071e-07, "loss": 1.3867173194885254, "step": 8192 }, { "epoch": 2.5220067713142504, "grad_norm": 16.125, "learning_rate": 7.564765496345142e-07, "loss": 1.411359190940857, "step": 8194 }, { "epoch": 2.5226223453370267, "grad_norm": 13.9375, "learning_rate": 7.560829996248237e-07, "loss": 1.5883018970489502, "step": 8196 }, { "epoch": 2.523237919359803, "grad_norm": 18.75, "learning_rate": 7.556899107144672e-07, "loss": 1.0765776634216309, "step": 8198 }, { "epoch": 2.523853493382579, "grad_norm": 11.4375, "learning_rate": 7.55297283077073e-07, "loss": 1.353743553161621, "step": 8200 }, { "epoch": 2.5244690674053554, "grad_norm": 5.25, "learning_rate": 7.549051168860643e-07, "loss": 1.3110792636871338, "step": 8202 }, { "epoch": 2.5250846414281316, "grad_norm": 9.1875, "learning_rate": 7.545134123146621e-07, "loss": 1.0713155269622803, "step": 8204 }, { "epoch": 2.525700215450908, "grad_norm": 8.6875, "learning_rate": 7.541221695358827e-07, "loss": 1.2455883026123047, "step": 8206 }, { "epoch": 2.526315789473684, "grad_norm": 16.875, "learning_rate": 7.537313887225374e-07, "loss": 1.690041184425354, "step": 8208 }, { "epoch": 2.5269313634964603, "grad_norm": 10.5, "learning_rate": 7.533410700472362e-07, "loss": 1.1286089420318604, "step": 8210 }, { "epoch": 2.5275469375192365, "grad_norm": 27.5, "learning_rate": 7.529512136823826e-07, "loss": 1.151153802871704, "step": 8212 }, { "epoch": 2.5281625115420128, "grad_norm": 15.125, "learning_rate": 7.525618198001758e-07, "loss": 1.4475476741790771, "step": 8214 }, { "epoch": 2.528778085564789, "grad_norm": 6.125, "learning_rate": 7.521728885726129e-07, "loss": 1.101869821548462, "step": 8216 }, { "epoch": 2.5293936595875657, "grad_norm": 9.0625, "learning_rate": 7.517844201714842e-07, "loss": 0.9757754802703857, "step": 8218 }, { "epoch": 2.5300092336103415, "grad_norm": 9.5625, "learning_rate": 7.513964147683775e-07, "loss": 1.3237364292144775, "step": 8220 }, { "epoch": 2.530624807633118, "grad_norm": 12.0, "learning_rate": 7.510088725346742e-07, "loss": 1.3525787591934204, "step": 8222 }, { "epoch": 2.531240381655894, "grad_norm": 12.3125, "learning_rate": 7.506217936415528e-07, "loss": 1.103072166442871, "step": 8224 }, { "epoch": 2.5318559556786706, "grad_norm": 16.875, "learning_rate": 7.50235178259987e-07, "loss": 1.3378674983978271, "step": 8226 }, { "epoch": 2.5324715297014464, "grad_norm": 15.125, "learning_rate": 7.49849026560744e-07, "loss": 1.2925280332565308, "step": 8228 }, { "epoch": 2.533087103724223, "grad_norm": 16.0, "learning_rate": 7.494633387143883e-07, "loss": 1.5961421728134155, "step": 8230 }, { "epoch": 2.533702677746999, "grad_norm": 10.125, "learning_rate": 7.490781148912786e-07, "loss": 1.464501142501831, "step": 8232 }, { "epoch": 2.5343182517697755, "grad_norm": 24.625, "learning_rate": 7.486933552615682e-07, "loss": 1.707505702972412, "step": 8234 }, { "epoch": 2.5349338257925513, "grad_norm": 21.75, "learning_rate": 7.483090599952067e-07, "loss": 1.503312110900879, "step": 8236 }, { "epoch": 2.535549399815328, "grad_norm": 6.15625, "learning_rate": 7.479252292619371e-07, "loss": 1.1280899047851562, "step": 8238 }, { "epoch": 2.536164973838104, "grad_norm": 81.0, "learning_rate": 7.47541863231298e-07, "loss": 1.3761932849884033, "step": 8240 }, { "epoch": 2.5367805478608805, "grad_norm": 12.0625, "learning_rate": 7.47158962072623e-07, "loss": 1.0959453582763672, "step": 8242 }, { "epoch": 2.5373961218836563, "grad_norm": 14.5, "learning_rate": 7.467765259550394e-07, "loss": 1.3974474668502808, "step": 8244 }, { "epoch": 2.538011695906433, "grad_norm": 7.46875, "learning_rate": 7.463945550474699e-07, "loss": 1.3391976356506348, "step": 8246 }, { "epoch": 2.5386272699292087, "grad_norm": 17.75, "learning_rate": 7.460130495186319e-07, "loss": 1.3113188743591309, "step": 8248 }, { "epoch": 2.5392428439519854, "grad_norm": 13.375, "learning_rate": 7.45632009537036e-07, "loss": 1.2737629413604736, "step": 8250 }, { "epoch": 2.539858417974761, "grad_norm": 6.375, "learning_rate": 7.452514352709887e-07, "loss": 1.1970089673995972, "step": 8252 }, { "epoch": 2.540473991997538, "grad_norm": 29.5, "learning_rate": 7.4487132688859e-07, "loss": 1.0147309303283691, "step": 8254 }, { "epoch": 2.541089566020314, "grad_norm": 46.5, "learning_rate": 7.444916845577338e-07, "loss": 1.3090674877166748, "step": 8256 }, { "epoch": 2.5417051400430903, "grad_norm": 10.75, "learning_rate": 7.441125084461088e-07, "loss": 1.1613578796386719, "step": 8258 }, { "epoch": 2.5423207140658666, "grad_norm": 20.125, "learning_rate": 7.437337987211975e-07, "loss": 1.4671497344970703, "step": 8260 }, { "epoch": 2.542936288088643, "grad_norm": 15.875, "learning_rate": 7.433555555502766e-07, "loss": 1.3811712265014648, "step": 8262 }, { "epoch": 2.543551862111419, "grad_norm": 19.25, "learning_rate": 7.429777791004164e-07, "loss": 1.6986422538757324, "step": 8264 }, { "epoch": 2.5441674361341953, "grad_norm": 56.5, "learning_rate": 7.42600469538481e-07, "loss": 1.27402925491333, "step": 8266 }, { "epoch": 2.5447830101569715, "grad_norm": 20.875, "learning_rate": 7.42223627031129e-07, "loss": 1.6471729278564453, "step": 8268 }, { "epoch": 2.5453985841797477, "grad_norm": 14.6875, "learning_rate": 7.418472517448114e-07, "loss": 1.2318017482757568, "step": 8270 }, { "epoch": 2.546014158202524, "grad_norm": 12.4375, "learning_rate": 7.414713438457741e-07, "loss": 1.270857810974121, "step": 8272 }, { "epoch": 2.5466297322253, "grad_norm": 26.625, "learning_rate": 7.410959035000563e-07, "loss": 1.4487571716308594, "step": 8274 }, { "epoch": 2.5472453062480764, "grad_norm": 14.8125, "learning_rate": 7.407209308734898e-07, "loss": 1.3470463752746582, "step": 8276 }, { "epoch": 2.5478608802708527, "grad_norm": 9.4375, "learning_rate": 7.403464261317005e-07, "loss": 1.4863656759262085, "step": 8278 }, { "epoch": 2.548476454293629, "grad_norm": 31.5, "learning_rate": 7.399723894401081e-07, "loss": 1.744840383529663, "step": 8280 }, { "epoch": 2.549092028316405, "grad_norm": 21.375, "learning_rate": 7.395988209639248e-07, "loss": 1.571589708328247, "step": 8282 }, { "epoch": 2.5497076023391814, "grad_norm": 15.5, "learning_rate": 7.392257208681559e-07, "loss": 1.1173479557037354, "step": 8284 }, { "epoch": 2.5503231763619576, "grad_norm": 25.75, "learning_rate": 7.388530893176005e-07, "loss": 1.352543830871582, "step": 8286 }, { "epoch": 2.550938750384734, "grad_norm": 32.25, "learning_rate": 7.384809264768504e-07, "loss": 1.4264599084854126, "step": 8288 }, { "epoch": 2.55155432440751, "grad_norm": 9.0, "learning_rate": 7.381092325102902e-07, "loss": 1.4546525478363037, "step": 8290 }, { "epoch": 2.5521698984302863, "grad_norm": 11.4375, "learning_rate": 7.377380075820974e-07, "loss": 0.9854034781455994, "step": 8292 }, { "epoch": 2.5527854724530625, "grad_norm": 13.6875, "learning_rate": 7.37367251856243e-07, "loss": 1.3239917755126953, "step": 8294 }, { "epoch": 2.5534010464758388, "grad_norm": 9.75, "learning_rate": 7.369969654964895e-07, "loss": 1.4116442203521729, "step": 8296 }, { "epoch": 2.554016620498615, "grad_norm": 16.875, "learning_rate": 7.366271486663933e-07, "loss": 1.1564812660217285, "step": 8298 }, { "epoch": 2.5546321945213912, "grad_norm": 21.0, "learning_rate": 7.36257801529303e-07, "loss": 1.0999027490615845, "step": 8300 }, { "epoch": 2.5552477685441675, "grad_norm": 18.0, "learning_rate": 7.35888924248359e-07, "loss": 1.4925274848937988, "step": 8302 }, { "epoch": 2.5558633425669437, "grad_norm": 20.0, "learning_rate": 7.355205169864957e-07, "loss": 0.9652723073959351, "step": 8304 }, { "epoch": 2.55647891658972, "grad_norm": 27.75, "learning_rate": 7.351525799064384e-07, "loss": 1.7413601875305176, "step": 8306 }, { "epoch": 2.557094490612496, "grad_norm": 9.3125, "learning_rate": 7.347851131707057e-07, "loss": 1.0957928895950317, "step": 8308 }, { "epoch": 2.5577100646352724, "grad_norm": 30.75, "learning_rate": 7.34418116941608e-07, "loss": 1.7662091255187988, "step": 8310 }, { "epoch": 2.5583256386580486, "grad_norm": 29.625, "learning_rate": 7.340515913812476e-07, "loss": 1.799209713935852, "step": 8312 }, { "epoch": 2.558941212680825, "grad_norm": 20.5, "learning_rate": 7.336855366515195e-07, "loss": 1.5272533893585205, "step": 8314 }, { "epoch": 2.559556786703601, "grad_norm": 24.625, "learning_rate": 7.333199529141107e-07, "loss": 1.0781242847442627, "step": 8316 }, { "epoch": 2.5601723607263773, "grad_norm": 13.9375, "learning_rate": 7.329548403304996e-07, "loss": 1.262157678604126, "step": 8318 }, { "epoch": 2.5607879347491536, "grad_norm": 65.5, "learning_rate": 7.32590199061957e-07, "loss": 1.4290770292282104, "step": 8320 }, { "epoch": 2.56140350877193, "grad_norm": 10.125, "learning_rate": 7.322260292695454e-07, "loss": 1.4656684398651123, "step": 8322 }, { "epoch": 2.562019082794706, "grad_norm": 9.25, "learning_rate": 7.318623311141191e-07, "loss": 1.0915682315826416, "step": 8324 }, { "epoch": 2.5626346568174823, "grad_norm": 85.5, "learning_rate": 7.314991047563237e-07, "loss": 1.1750187873840332, "step": 8326 }, { "epoch": 2.5632502308402585, "grad_norm": 9.1875, "learning_rate": 7.311363503565969e-07, "loss": 1.7010297775268555, "step": 8328 }, { "epoch": 2.5638658048630347, "grad_norm": 15.0625, "learning_rate": 7.30774068075168e-07, "loss": 1.6195107698440552, "step": 8330 }, { "epoch": 2.564481378885811, "grad_norm": 17.625, "learning_rate": 7.304122580720569e-07, "loss": 1.3422554731369019, "step": 8332 }, { "epoch": 2.565096952908587, "grad_norm": 13.25, "learning_rate": 7.300509205070758e-07, "loss": 1.464439868927002, "step": 8334 }, { "epoch": 2.5657125269313634, "grad_norm": 17.75, "learning_rate": 7.296900555398282e-07, "loss": 1.6185365915298462, "step": 8336 }, { "epoch": 2.5663281009541397, "grad_norm": 10.625, "learning_rate": 7.293296633297081e-07, "loss": 1.3132727146148682, "step": 8338 }, { "epoch": 2.566943674976916, "grad_norm": 19.875, "learning_rate": 7.289697440359012e-07, "loss": 1.5001401901245117, "step": 8340 }, { "epoch": 2.567559248999692, "grad_norm": 19.125, "learning_rate": 7.286102978173847e-07, "loss": 1.0073649883270264, "step": 8342 }, { "epoch": 2.5681748230224684, "grad_norm": 27.5, "learning_rate": 7.282513248329258e-07, "loss": 1.0872913599014282, "step": 8344 }, { "epoch": 2.5687903970452446, "grad_norm": 15.375, "learning_rate": 7.278928252410838e-07, "loss": 1.535001516342163, "step": 8346 }, { "epoch": 2.569405971068021, "grad_norm": 11.8125, "learning_rate": 7.275347992002079e-07, "loss": 1.4017884731292725, "step": 8348 }, { "epoch": 2.570021545090797, "grad_norm": 18.25, "learning_rate": 7.271772468684393e-07, "loss": 1.2841901779174805, "step": 8350 }, { "epoch": 2.5706371191135733, "grad_norm": 41.0, "learning_rate": 7.268201684037085e-07, "loss": 1.235544204711914, "step": 8352 }, { "epoch": 2.5712526931363495, "grad_norm": 16.625, "learning_rate": 7.26463563963738e-07, "loss": 0.9660817980766296, "step": 8354 }, { "epoch": 2.5718682671591258, "grad_norm": 9.6875, "learning_rate": 7.261074337060402e-07, "loss": 1.301021933555603, "step": 8356 }, { "epoch": 2.572483841181902, "grad_norm": 8.125, "learning_rate": 7.257517777879182e-07, "loss": 1.1702752113342285, "step": 8358 }, { "epoch": 2.573099415204678, "grad_norm": 10.375, "learning_rate": 7.253965963664656e-07, "loss": 1.286567211151123, "step": 8360 }, { "epoch": 2.5737149892274545, "grad_norm": 11.5625, "learning_rate": 7.250418895985668e-07, "loss": 1.185631513595581, "step": 8362 }, { "epoch": 2.5743305632502307, "grad_norm": 8.4375, "learning_rate": 7.246876576408954e-07, "loss": 1.2492417097091675, "step": 8364 }, { "epoch": 2.574946137273007, "grad_norm": 9.1875, "learning_rate": 7.243339006499171e-07, "loss": 1.3682482242584229, "step": 8366 }, { "epoch": 2.575561711295783, "grad_norm": 22.5, "learning_rate": 7.239806187818861e-07, "loss": 1.232422113418579, "step": 8368 }, { "epoch": 2.5761772853185594, "grad_norm": 31.125, "learning_rate": 7.236278121928472e-07, "loss": 0.8495137691497803, "step": 8370 }, { "epoch": 2.576792859341336, "grad_norm": 12.4375, "learning_rate": 7.232754810386362e-07, "loss": 1.4019091129302979, "step": 8372 }, { "epoch": 2.577408433364112, "grad_norm": 6.15625, "learning_rate": 7.229236254748776e-07, "loss": 1.1350939273834229, "step": 8374 }, { "epoch": 2.5780240073868885, "grad_norm": 27.0, "learning_rate": 7.225722456569866e-07, "loss": 1.1491904258728027, "step": 8376 }, { "epoch": 2.5786395814096643, "grad_norm": 7.46875, "learning_rate": 7.222213417401682e-07, "loss": 1.5071659088134766, "step": 8378 }, { "epoch": 2.579255155432441, "grad_norm": 13.4375, "learning_rate": 7.218709138794167e-07, "loss": 1.4240700006484985, "step": 8380 }, { "epoch": 2.579870729455217, "grad_norm": 23.625, "learning_rate": 7.215209622295168e-07, "loss": 1.2526869773864746, "step": 8382 }, { "epoch": 2.5804863034779935, "grad_norm": 26.25, "learning_rate": 7.211714869450427e-07, "loss": 1.2095661163330078, "step": 8384 }, { "epoch": 2.5811018775007692, "grad_norm": 25.5, "learning_rate": 7.208224881803574e-07, "loss": 1.530590534210205, "step": 8386 }, { "epoch": 2.581717451523546, "grad_norm": 14.0625, "learning_rate": 7.204739660896148e-07, "loss": 1.5323766469955444, "step": 8388 }, { "epoch": 2.5823330255463217, "grad_norm": 29.375, "learning_rate": 7.201259208267567e-07, "loss": 0.969746470451355, "step": 8390 }, { "epoch": 2.5829485995690984, "grad_norm": 6.875, "learning_rate": 7.197783525455159e-07, "loss": 1.546391248703003, "step": 8392 }, { "epoch": 2.583564173591874, "grad_norm": 12.0625, "learning_rate": 7.194312613994134e-07, "loss": 1.3328742980957031, "step": 8394 }, { "epoch": 2.584179747614651, "grad_norm": 12.5625, "learning_rate": 7.190846475417593e-07, "loss": 1.2772825956344604, "step": 8396 }, { "epoch": 2.5847953216374266, "grad_norm": 12.5, "learning_rate": 7.187385111256541e-07, "loss": 1.4215928316116333, "step": 8398 }, { "epoch": 2.5854108956602033, "grad_norm": 22.125, "learning_rate": 7.183928523039861e-07, "loss": 1.6627047061920166, "step": 8400 }, { "epoch": 2.586026469682979, "grad_norm": 24.75, "learning_rate": 7.180476712294335e-07, "loss": 1.6336177587509155, "step": 8402 }, { "epoch": 2.586642043705756, "grad_norm": 18.125, "learning_rate": 7.17702968054463e-07, "loss": 1.4483904838562012, "step": 8404 }, { "epoch": 2.587257617728532, "grad_norm": 15.375, "learning_rate": 7.173587429313306e-07, "loss": 1.3190503120422363, "step": 8406 }, { "epoch": 2.5878731917513083, "grad_norm": 11.6875, "learning_rate": 7.170149960120804e-07, "loss": 1.1253911256790161, "step": 8408 }, { "epoch": 2.5884887657740845, "grad_norm": 12.0, "learning_rate": 7.166717274485467e-07, "loss": 1.256879448890686, "step": 8410 }, { "epoch": 2.5891043397968607, "grad_norm": 33.25, "learning_rate": 7.163289373923507e-07, "loss": 1.4488470554351807, "step": 8412 }, { "epoch": 2.589719913819637, "grad_norm": 10.75, "learning_rate": 7.159866259949036e-07, "loss": 1.2682024240493774, "step": 8414 }, { "epoch": 2.590335487842413, "grad_norm": 9.8125, "learning_rate": 7.156447934074048e-07, "loss": 1.371632695198059, "step": 8416 }, { "epoch": 2.5909510618651894, "grad_norm": 71.5, "learning_rate": 7.153034397808421e-07, "loss": 0.9261094927787781, "step": 8418 }, { "epoch": 2.5915666358879657, "grad_norm": 12.625, "learning_rate": 7.149625652659918e-07, "loss": 1.3142518997192383, "step": 8420 }, { "epoch": 2.592182209910742, "grad_norm": 9.9375, "learning_rate": 7.146221700134182e-07, "loss": 1.2637038230895996, "step": 8422 }, { "epoch": 2.592797783933518, "grad_norm": 8.5, "learning_rate": 7.142822541734751e-07, "loss": 1.1678962707519531, "step": 8424 }, { "epoch": 2.5934133579562944, "grad_norm": 19.0, "learning_rate": 7.139428178963027e-07, "loss": 1.3438940048217773, "step": 8426 }, { "epoch": 2.5940289319790706, "grad_norm": 9.375, "learning_rate": 7.136038613318309e-07, "loss": 1.3192870616912842, "step": 8428 }, { "epoch": 2.594644506001847, "grad_norm": 15.875, "learning_rate": 7.132653846297776e-07, "loss": 1.523959755897522, "step": 8430 }, { "epoch": 2.595260080024623, "grad_norm": 18.25, "learning_rate": 7.129273879396473e-07, "loss": 1.1334865093231201, "step": 8432 }, { "epoch": 2.5958756540473993, "grad_norm": 11.75, "learning_rate": 7.125898714107347e-07, "loss": 1.4075385332107544, "step": 8434 }, { "epoch": 2.5964912280701755, "grad_norm": 15.125, "learning_rate": 7.122528351921207e-07, "loss": 1.591729998588562, "step": 8436 }, { "epoch": 2.5971068020929517, "grad_norm": 13.6875, "learning_rate": 7.11916279432674e-07, "loss": 1.4569002389907837, "step": 8438 }, { "epoch": 2.597722376115728, "grad_norm": 11.25, "learning_rate": 7.115802042810529e-07, "loss": 1.2565406560897827, "step": 8440 }, { "epoch": 2.598337950138504, "grad_norm": 13.75, "learning_rate": 7.112446098857011e-07, "loss": 1.421222448348999, "step": 8442 }, { "epoch": 2.5989535241612804, "grad_norm": 4.59375, "learning_rate": 7.109094963948518e-07, "loss": 1.1741873025894165, "step": 8444 }, { "epoch": 2.5995690981840567, "grad_norm": 6.9375, "learning_rate": 7.105748639565243e-07, "loss": 0.9959874749183655, "step": 8446 }, { "epoch": 2.600184672206833, "grad_norm": 23.125, "learning_rate": 7.102407127185266e-07, "loss": 1.4912631511688232, "step": 8448 }, { "epoch": 2.600800246229609, "grad_norm": 18.125, "learning_rate": 7.099070428284537e-07, "loss": 1.0366930961608887, "step": 8450 }, { "epoch": 2.6014158202523854, "grad_norm": 27.625, "learning_rate": 7.095738544336877e-07, "loss": 1.302032709121704, "step": 8452 }, { "epoch": 2.6020313942751616, "grad_norm": 15.9375, "learning_rate": 7.092411476813984e-07, "loss": 1.2757986783981323, "step": 8454 }, { "epoch": 2.602646968297938, "grad_norm": 11.0, "learning_rate": 7.089089227185432e-07, "loss": 1.0776348114013672, "step": 8456 }, { "epoch": 2.603262542320714, "grad_norm": 15.5625, "learning_rate": 7.085771796918652e-07, "loss": 1.6991130113601685, "step": 8458 }, { "epoch": 2.6038781163434903, "grad_norm": 7.9375, "learning_rate": 7.082459187478967e-07, "loss": 1.3396224975585938, "step": 8460 }, { "epoch": 2.6044936903662665, "grad_norm": 23.25, "learning_rate": 7.079151400329557e-07, "loss": 1.3427355289459229, "step": 8462 }, { "epoch": 2.605109264389043, "grad_norm": 36.5, "learning_rate": 7.075848436931472e-07, "loss": 1.116722822189331, "step": 8464 }, { "epoch": 2.605724838411819, "grad_norm": 15.9375, "learning_rate": 7.072550298743641e-07, "loss": 0.7307620048522949, "step": 8466 }, { "epoch": 2.6063404124345952, "grad_norm": 14.25, "learning_rate": 7.06925698722285e-07, "loss": 1.4221217632293701, "step": 8468 }, { "epoch": 2.6069559864573715, "grad_norm": 13.1875, "learning_rate": 7.065968503823761e-07, "loss": 1.3831367492675781, "step": 8470 }, { "epoch": 2.6075715604801477, "grad_norm": 17.0, "learning_rate": 7.062684849998903e-07, "loss": 0.9629683494567871, "step": 8472 }, { "epoch": 2.608187134502924, "grad_norm": 11.625, "learning_rate": 7.059406027198662e-07, "loss": 1.333807349205017, "step": 8474 }, { "epoch": 2.6088027085257, "grad_norm": 33.5, "learning_rate": 7.056132036871306e-07, "loss": 0.8511074781417847, "step": 8476 }, { "epoch": 2.6094182825484764, "grad_norm": 21.5, "learning_rate": 7.052862880462958e-07, "loss": 1.3947702646255493, "step": 8478 }, { "epoch": 2.6100338565712526, "grad_norm": 26.5, "learning_rate": 7.049598559417604e-07, "loss": 1.2550370693206787, "step": 8480 }, { "epoch": 2.610649430594029, "grad_norm": 33.25, "learning_rate": 7.046339075177104e-07, "loss": 1.7725454568862915, "step": 8482 }, { "epoch": 2.611265004616805, "grad_norm": 17.0, "learning_rate": 7.043084429181172e-07, "loss": 1.7614102363586426, "step": 8484 }, { "epoch": 2.6118805786395813, "grad_norm": 15.6875, "learning_rate": 7.039834622867393e-07, "loss": 1.2160773277282715, "step": 8486 }, { "epoch": 2.6124961526623576, "grad_norm": 39.75, "learning_rate": 7.036589657671205e-07, "loss": 1.4206631183624268, "step": 8488 }, { "epoch": 2.613111726685134, "grad_norm": 30.5, "learning_rate": 7.033349535025915e-07, "loss": 1.267873764038086, "step": 8490 }, { "epoch": 2.61372730070791, "grad_norm": 12.875, "learning_rate": 7.030114256362693e-07, "loss": 1.347649335861206, "step": 8492 }, { "epoch": 2.6143428747306863, "grad_norm": 35.75, "learning_rate": 7.026883823110557e-07, "loss": 0.9955426454544067, "step": 8494 }, { "epoch": 2.6149584487534625, "grad_norm": 13.5, "learning_rate": 7.023658236696399e-07, "loss": 1.5194344520568848, "step": 8496 }, { "epoch": 2.6155740227762387, "grad_norm": 19.0, "learning_rate": 7.020437498544965e-07, "loss": 1.7742666006088257, "step": 8498 }, { "epoch": 2.616189596799015, "grad_norm": 17.0, "learning_rate": 7.017221610078855e-07, "loss": 1.0506432056427002, "step": 8500 }, { "epoch": 2.616805170821791, "grad_norm": 18.25, "learning_rate": 7.014010572718532e-07, "loss": 1.3107993602752686, "step": 8502 }, { "epoch": 2.6174207448445674, "grad_norm": 19.875, "learning_rate": 7.010804387882316e-07, "loss": 1.4073238372802734, "step": 8504 }, { "epoch": 2.6180363188673437, "grad_norm": 13.375, "learning_rate": 7.00760305698638e-07, "loss": 1.240088701248169, "step": 8506 }, { "epoch": 2.61865189289012, "grad_norm": 7.375, "learning_rate": 7.004406581444758e-07, "loss": 0.9746329188346863, "step": 8508 }, { "epoch": 2.619267466912896, "grad_norm": 12.4375, "learning_rate": 7.001214962669335e-07, "loss": 1.2753421068191528, "step": 8510 }, { "epoch": 2.6198830409356724, "grad_norm": 16.625, "learning_rate": 6.998028202069855e-07, "loss": 1.495098352432251, "step": 8512 }, { "epoch": 2.6204986149584486, "grad_norm": 10.75, "learning_rate": 6.994846301053912e-07, "loss": 1.4813196659088135, "step": 8514 }, { "epoch": 2.621114188981225, "grad_norm": 11.0, "learning_rate": 6.991669261026955e-07, "loss": 1.1549345254898071, "step": 8516 }, { "epoch": 2.621729763004001, "grad_norm": 16.0, "learning_rate": 6.988497083392288e-07, "loss": 1.3601171970367432, "step": 8518 }, { "epoch": 2.6223453370267773, "grad_norm": 14.125, "learning_rate": 6.985329769551065e-07, "loss": 1.7009246349334717, "step": 8520 }, { "epoch": 2.622960911049554, "grad_norm": 20.0, "learning_rate": 6.98216732090229e-07, "loss": 0.9509875774383545, "step": 8522 }, { "epoch": 2.6235764850723298, "grad_norm": 8.0625, "learning_rate": 6.979009738842824e-07, "loss": 1.3136813640594482, "step": 8524 }, { "epoch": 2.6241920590951064, "grad_norm": 10.9375, "learning_rate": 6.975857024767368e-07, "loss": 1.6199955940246582, "step": 8526 }, { "epoch": 2.6248076331178822, "grad_norm": 45.0, "learning_rate": 6.972709180068488e-07, "loss": 1.7706856727600098, "step": 8528 }, { "epoch": 2.625423207140659, "grad_norm": 23.625, "learning_rate": 6.969566206136588e-07, "loss": 1.009418249130249, "step": 8530 }, { "epoch": 2.6260387811634347, "grad_norm": 16.25, "learning_rate": 6.966428104359916e-07, "loss": 1.1988286972045898, "step": 8532 }, { "epoch": 2.6266543551862114, "grad_norm": 10.9375, "learning_rate": 6.963294876124585e-07, "loss": 1.2532944679260254, "step": 8534 }, { "epoch": 2.627269929208987, "grad_norm": 23.375, "learning_rate": 6.96016652281454e-07, "loss": 1.2428243160247803, "step": 8536 }, { "epoch": 2.627885503231764, "grad_norm": 25.75, "learning_rate": 6.957043045811579e-07, "loss": 0.9903680682182312, "step": 8538 }, { "epoch": 2.6285010772545396, "grad_norm": 41.5, "learning_rate": 6.953924446495348e-07, "loss": 0.9027482867240906, "step": 8540 }, { "epoch": 2.6291166512773163, "grad_norm": 3.578125, "learning_rate": 6.950810726243332e-07, "loss": 1.1158403158187866, "step": 8542 }, { "epoch": 2.629732225300092, "grad_norm": 23.25, "learning_rate": 6.947701886430868e-07, "loss": 1.6318439245224, "step": 8544 }, { "epoch": 2.6303477993228688, "grad_norm": 13.25, "learning_rate": 6.944597928431132e-07, "loss": 1.3490636348724365, "step": 8546 }, { "epoch": 2.6309633733456446, "grad_norm": 13.5, "learning_rate": 6.941498853615145e-07, "loss": 1.3829448223114014, "step": 8548 }, { "epoch": 2.6315789473684212, "grad_norm": 24.5, "learning_rate": 6.938404663351774e-07, "loss": 1.3896286487579346, "step": 8550 }, { "epoch": 2.632194521391197, "grad_norm": 33.5, "learning_rate": 6.935315359007725e-07, "loss": 1.0280698537826538, "step": 8552 }, { "epoch": 2.6328100954139737, "grad_norm": 11.1875, "learning_rate": 6.932230941947551e-07, "loss": 1.194199562072754, "step": 8554 }, { "epoch": 2.6334256694367495, "grad_norm": 19.875, "learning_rate": 6.929151413533638e-07, "loss": 1.0795966386795044, "step": 8556 }, { "epoch": 2.634041243459526, "grad_norm": 21.75, "learning_rate": 6.926076775126217e-07, "loss": 1.6833020448684692, "step": 8558 }, { "epoch": 2.6346568174823024, "grad_norm": 18.75, "learning_rate": 6.923007028083365e-07, "loss": 1.5397007465362549, "step": 8560 }, { "epoch": 2.6352723915050786, "grad_norm": 16.125, "learning_rate": 6.919942173760986e-07, "loss": 1.5429842472076416, "step": 8562 }, { "epoch": 2.635887965527855, "grad_norm": 6.75, "learning_rate": 6.916882213512831e-07, "loss": 1.1548995971679688, "step": 8564 }, { "epoch": 2.636503539550631, "grad_norm": 20.125, "learning_rate": 6.913827148690494e-07, "loss": 1.2357516288757324, "step": 8566 }, { "epoch": 2.6371191135734073, "grad_norm": 25.5, "learning_rate": 6.910776980643394e-07, "loss": 1.1196115016937256, "step": 8568 }, { "epoch": 2.6377346875961836, "grad_norm": 12.125, "learning_rate": 6.907731710718794e-07, "loss": 1.3863110542297363, "step": 8570 }, { "epoch": 2.63835026161896, "grad_norm": 17.125, "learning_rate": 6.904691340261799e-07, "loss": 1.6990160942077637, "step": 8572 }, { "epoch": 2.638965835641736, "grad_norm": 9.5, "learning_rate": 6.901655870615336e-07, "loss": 1.697874665260315, "step": 8574 }, { "epoch": 2.6395814096645123, "grad_norm": 59.25, "learning_rate": 6.898625303120182e-07, "loss": 1.6032500267028809, "step": 8576 }, { "epoch": 2.6401969836872885, "grad_norm": 10.125, "learning_rate": 6.895599639114939e-07, "loss": 1.2693076133728027, "step": 8578 }, { "epoch": 2.6408125577100647, "grad_norm": 11.25, "learning_rate": 6.892578879936048e-07, "loss": 1.2726253271102905, "step": 8580 }, { "epoch": 2.641428131732841, "grad_norm": 12.5625, "learning_rate": 6.88956302691778e-07, "loss": 1.5192341804504395, "step": 8582 }, { "epoch": 2.642043705755617, "grad_norm": 26.0, "learning_rate": 6.88655208139224e-07, "loss": 1.3771846294403076, "step": 8584 }, { "epoch": 2.6426592797783934, "grad_norm": 18.5, "learning_rate": 6.88354604468937e-07, "loss": 1.4578754901885986, "step": 8586 }, { "epoch": 2.6432748538011697, "grad_norm": 11.4375, "learning_rate": 6.880544918136936e-07, "loss": 1.217280387878418, "step": 8588 }, { "epoch": 2.643890427823946, "grad_norm": 24.0, "learning_rate": 6.877548703060541e-07, "loss": 1.3406977653503418, "step": 8590 }, { "epoch": 2.644506001846722, "grad_norm": 14.5625, "learning_rate": 6.874557400783616e-07, "loss": 1.7917208671569824, "step": 8592 }, { "epoch": 2.6451215758694984, "grad_norm": 19.25, "learning_rate": 6.871571012627421e-07, "loss": 1.2604199647903442, "step": 8594 }, { "epoch": 2.6457371498922746, "grad_norm": 13.375, "learning_rate": 6.868589539911052e-07, "loss": 1.0737535953521729, "step": 8596 }, { "epoch": 2.646352723915051, "grad_norm": 25.125, "learning_rate": 6.865612983951423e-07, "loss": 1.3047271966934204, "step": 8598 }, { "epoch": 2.646968297937827, "grad_norm": 25.25, "learning_rate": 6.862641346063285e-07, "loss": 1.7620556354522705, "step": 8600 }, { "epoch": 2.6475838719606033, "grad_norm": 19.25, "learning_rate": 6.859674627559217e-07, "loss": 1.283871054649353, "step": 8602 }, { "epoch": 2.6481994459833795, "grad_norm": 12.0, "learning_rate": 6.856712829749616e-07, "loss": 1.4960191249847412, "step": 8604 }, { "epoch": 2.6488150200061558, "grad_norm": 22.125, "learning_rate": 6.853755953942714e-07, "loss": 1.9348828792572021, "step": 8606 }, { "epoch": 2.649430594028932, "grad_norm": 19.75, "learning_rate": 6.85080400144457e-07, "loss": 1.135761022567749, "step": 8608 }, { "epoch": 2.6500461680517082, "grad_norm": 109.5, "learning_rate": 6.847856973559057e-07, "loss": 1.211793303489685, "step": 8610 }, { "epoch": 2.6506617420744845, "grad_norm": 24.375, "learning_rate": 6.844914871587888e-07, "loss": 1.6693403720855713, "step": 8612 }, { "epoch": 2.6512773160972607, "grad_norm": 25.25, "learning_rate": 6.841977696830587e-07, "loss": 1.7439048290252686, "step": 8614 }, { "epoch": 2.651892890120037, "grad_norm": 5.0625, "learning_rate": 6.839045450584512e-07, "loss": 1.3092360496520996, "step": 8616 }, { "epoch": 2.652508464142813, "grad_norm": 10.1875, "learning_rate": 6.836118134144839e-07, "loss": 1.1425950527191162, "step": 8618 }, { "epoch": 2.6531240381655894, "grad_norm": 3.609375, "learning_rate": 6.833195748804561e-07, "loss": 1.2227166891098022, "step": 8620 }, { "epoch": 2.6537396121883656, "grad_norm": 14.125, "learning_rate": 6.830278295854509e-07, "loss": 1.398766279220581, "step": 8622 }, { "epoch": 2.654355186211142, "grad_norm": 13.3125, "learning_rate": 6.827365776583319e-07, "loss": 1.239612102508545, "step": 8624 }, { "epoch": 2.654970760233918, "grad_norm": 12.5, "learning_rate": 6.824458192277451e-07, "loss": 1.468193531036377, "step": 8626 }, { "epoch": 2.6555863342566943, "grad_norm": 10.8125, "learning_rate": 6.821555544221199e-07, "loss": 1.3009378910064697, "step": 8628 }, { "epoch": 2.6562019082794706, "grad_norm": 15.5, "learning_rate": 6.818657833696655e-07, "loss": 1.7035462856292725, "step": 8630 }, { "epoch": 2.656817482302247, "grad_norm": 8.1875, "learning_rate": 6.815765061983747e-07, "loss": 1.3358898162841797, "step": 8632 }, { "epoch": 2.657433056325023, "grad_norm": 32.0, "learning_rate": 6.812877230360214e-07, "loss": 1.3851983547210693, "step": 8634 }, { "epoch": 2.6580486303477993, "grad_norm": 9.5625, "learning_rate": 6.809994340101614e-07, "loss": 1.2320975065231323, "step": 8636 }, { "epoch": 2.6586642043705755, "grad_norm": 27.5, "learning_rate": 6.807116392481322e-07, "loss": 1.1153748035430908, "step": 8638 }, { "epoch": 2.6592797783933517, "grad_norm": 6.75, "learning_rate": 6.804243388770534e-07, "loss": 1.4036579132080078, "step": 8640 }, { "epoch": 2.659895352416128, "grad_norm": 18.5, "learning_rate": 6.801375330238259e-07, "loss": 1.3697552680969238, "step": 8642 }, { "epoch": 2.660510926438904, "grad_norm": 20.125, "learning_rate": 6.798512218151318e-07, "loss": 1.6686692237854004, "step": 8644 }, { "epoch": 2.6611265004616804, "grad_norm": 15.25, "learning_rate": 6.795654053774355e-07, "loss": 1.219560146331787, "step": 8646 }, { "epoch": 2.6617420744844567, "grad_norm": 14.875, "learning_rate": 6.792800838369821e-07, "loss": 1.338904619216919, "step": 8648 }, { "epoch": 2.662357648507233, "grad_norm": 15.6875, "learning_rate": 6.789952573197986e-07, "loss": 1.3034480810165405, "step": 8650 }, { "epoch": 2.662973222530009, "grad_norm": 20.75, "learning_rate": 6.787109259516932e-07, "loss": 1.2775514125823975, "step": 8652 }, { "epoch": 2.6635887965527854, "grad_norm": 25.125, "learning_rate": 6.784270898582553e-07, "loss": 1.7190017700195312, "step": 8654 }, { "epoch": 2.6642043705755616, "grad_norm": 12.0625, "learning_rate": 6.781437491648559e-07, "loss": 1.3352466821670532, "step": 8656 }, { "epoch": 2.664819944598338, "grad_norm": 11.5625, "learning_rate": 6.778609039966464e-07, "loss": 1.4454190731048584, "step": 8658 }, { "epoch": 2.665435518621114, "grad_norm": 22.125, "learning_rate": 6.775785544785606e-07, "loss": 1.3824318647384644, "step": 8660 }, { "epoch": 2.6660510926438903, "grad_norm": 19.875, "learning_rate": 6.772967007353117e-07, "loss": 1.6892662048339844, "step": 8662 }, { "epoch": 2.6666666666666665, "grad_norm": 19.25, "learning_rate": 6.770153428913956e-07, "loss": 1.223496437072754, "step": 8664 }, { "epoch": 2.6672822406894428, "grad_norm": 19.25, "learning_rate": 6.767344810710878e-07, "loss": 1.4526058435440063, "step": 8666 }, { "epoch": 2.667897814712219, "grad_norm": 38.0, "learning_rate": 6.764541153984458e-07, "loss": 1.5432841777801514, "step": 8668 }, { "epoch": 2.668513388734995, "grad_norm": 12.6875, "learning_rate": 6.76174245997307e-07, "loss": 1.3734015226364136, "step": 8670 }, { "epoch": 2.669128962757772, "grad_norm": 18.75, "learning_rate": 6.758948729912904e-07, "loss": 1.566282033920288, "step": 8672 }, { "epoch": 2.6697445367805477, "grad_norm": 10.875, "learning_rate": 6.756159965037951e-07, "loss": 1.2576236724853516, "step": 8674 }, { "epoch": 2.6703601108033244, "grad_norm": 11.3125, "learning_rate": 6.753376166580013e-07, "loss": 1.0591974258422852, "step": 8676 }, { "epoch": 2.6709756848261, "grad_norm": 37.25, "learning_rate": 6.750597335768695e-07, "loss": 1.7590951919555664, "step": 8678 }, { "epoch": 2.671591258848877, "grad_norm": 17.25, "learning_rate": 6.747823473831416e-07, "loss": 1.7607227563858032, "step": 8680 }, { "epoch": 2.6722068328716526, "grad_norm": 17.5, "learning_rate": 6.745054581993382e-07, "loss": 1.4102087020874023, "step": 8682 }, { "epoch": 2.6728224068944293, "grad_norm": 12.0625, "learning_rate": 6.742290661477629e-07, "loss": 1.2666304111480713, "step": 8684 }, { "epoch": 2.673437980917205, "grad_norm": 30.625, "learning_rate": 6.739531713504978e-07, "loss": 1.2427082061767578, "step": 8686 }, { "epoch": 2.6740535549399818, "grad_norm": 18.75, "learning_rate": 6.736777739294055e-07, "loss": 1.1650917530059814, "step": 8688 }, { "epoch": 2.6746691289627575, "grad_norm": 22.125, "learning_rate": 6.734028740061301e-07, "loss": 1.6606533527374268, "step": 8690 }, { "epoch": 2.6752847029855342, "grad_norm": 11.8125, "learning_rate": 6.731284717020948e-07, "loss": 1.676476240158081, "step": 8692 }, { "epoch": 2.67590027700831, "grad_norm": 23.125, "learning_rate": 6.728545671385035e-07, "loss": 0.9003405570983887, "step": 8694 }, { "epoch": 2.6765158510310867, "grad_norm": 8.6875, "learning_rate": 6.725811604363403e-07, "loss": 0.7444573640823364, "step": 8696 }, { "epoch": 2.6771314250538625, "grad_norm": 14.75, "learning_rate": 6.72308251716369e-07, "loss": 1.0186790227890015, "step": 8698 }, { "epoch": 2.677746999076639, "grad_norm": 13.9375, "learning_rate": 6.720358410991337e-07, "loss": 1.7139232158660889, "step": 8700 }, { "epoch": 2.678362573099415, "grad_norm": 17.5, "learning_rate": 6.717639287049589e-07, "loss": 0.9571051597595215, "step": 8702 }, { "epoch": 2.6789781471221916, "grad_norm": 13.4375, "learning_rate": 6.714925146539483e-07, "loss": 0.839079737663269, "step": 8704 }, { "epoch": 2.6795937211449674, "grad_norm": 27.75, "learning_rate": 6.712215990659856e-07, "loss": 1.8214905261993408, "step": 8706 }, { "epoch": 2.680209295167744, "grad_norm": 23.375, "learning_rate": 6.709511820607348e-07, "loss": 1.3192217350006104, "step": 8708 }, { "epoch": 2.6808248691905203, "grad_norm": 12.875, "learning_rate": 6.706812637576395e-07, "loss": 0.8749622106552124, "step": 8710 }, { "epoch": 2.6814404432132966, "grad_norm": 7.5625, "learning_rate": 6.704118442759226e-07, "loss": 1.2919583320617676, "step": 8712 }, { "epoch": 2.682056017236073, "grad_norm": 20.25, "learning_rate": 6.701429237345872e-07, "loss": 1.2938746213912964, "step": 8714 }, { "epoch": 2.682671591258849, "grad_norm": 11.3125, "learning_rate": 6.698745022524159e-07, "loss": 1.1630654335021973, "step": 8716 }, { "epoch": 2.6832871652816253, "grad_norm": 4.5, "learning_rate": 6.696065799479704e-07, "loss": 1.2157087326049805, "step": 8718 }, { "epoch": 2.6839027393044015, "grad_norm": 19.625, "learning_rate": 6.693391569395927e-07, "loss": 1.2119028568267822, "step": 8720 }, { "epoch": 2.6845183133271777, "grad_norm": 13.875, "learning_rate": 6.690722333454038e-07, "loss": 1.262474536895752, "step": 8722 }, { "epoch": 2.685133887349954, "grad_norm": 11.125, "learning_rate": 6.688058092833038e-07, "loss": 1.2290127277374268, "step": 8724 }, { "epoch": 2.68574946137273, "grad_norm": 11.1875, "learning_rate": 6.685398848709727e-07, "loss": 1.341980218887329, "step": 8726 }, { "epoch": 2.6863650353955064, "grad_norm": 17.875, "learning_rate": 6.6827446022587e-07, "loss": 1.7965751886367798, "step": 8728 }, { "epoch": 2.6869806094182827, "grad_norm": 18.5, "learning_rate": 6.680095354652336e-07, "loss": 1.4040262699127197, "step": 8730 }, { "epoch": 2.687596183441059, "grad_norm": 26.25, "learning_rate": 6.67745110706081e-07, "loss": 1.7785351276397705, "step": 8732 }, { "epoch": 2.688211757463835, "grad_norm": 18.625, "learning_rate": 6.674811860652094e-07, "loss": 1.7813286781311035, "step": 8734 }, { "epoch": 2.6888273314866113, "grad_norm": 22.75, "learning_rate": 6.672177616591943e-07, "loss": 1.3169972896575928, "step": 8736 }, { "epoch": 2.6894429055093876, "grad_norm": 10.8125, "learning_rate": 6.669548376043905e-07, "loss": 1.4833617210388184, "step": 8738 }, { "epoch": 2.690058479532164, "grad_norm": 11.625, "learning_rate": 6.666924140169319e-07, "loss": 1.3738125562667847, "step": 8740 }, { "epoch": 2.69067405355494, "grad_norm": 27.875, "learning_rate": 6.664304910127317e-07, "loss": 1.2362579107284546, "step": 8742 }, { "epoch": 2.6912896275777163, "grad_norm": 28.625, "learning_rate": 6.66169068707481e-07, "loss": 1.728048324584961, "step": 8744 }, { "epoch": 2.6919052016004925, "grad_norm": 30.875, "learning_rate": 6.659081472166506e-07, "loss": 1.4855546951293945, "step": 8746 }, { "epoch": 2.6925207756232687, "grad_norm": 8.25, "learning_rate": 6.656477266554898e-07, "loss": 1.1835436820983887, "step": 8748 }, { "epoch": 2.693136349646045, "grad_norm": 16.625, "learning_rate": 6.653878071390264e-07, "loss": 1.6319580078125, "step": 8750 }, { "epoch": 2.693751923668821, "grad_norm": 11.4375, "learning_rate": 6.651283887820678e-07, "loss": 0.6635792255401611, "step": 8752 }, { "epoch": 2.6943674976915974, "grad_norm": 5.375, "learning_rate": 6.648694716991992e-07, "loss": 1.3232951164245605, "step": 8754 }, { "epoch": 2.6949830717143737, "grad_norm": 11.6875, "learning_rate": 6.646110560047838e-07, "loss": 1.319361686706543, "step": 8756 }, { "epoch": 2.69559864573715, "grad_norm": 8.375, "learning_rate": 6.643531418129651e-07, "loss": 1.2317544221878052, "step": 8758 }, { "epoch": 2.696214219759926, "grad_norm": 57.0, "learning_rate": 6.640957292376635e-07, "loss": 1.5650432109832764, "step": 8760 }, { "epoch": 2.6968297937827024, "grad_norm": 23.875, "learning_rate": 6.638388183925786e-07, "loss": 1.5568079948425293, "step": 8762 }, { "epoch": 2.6974453678054786, "grad_norm": 4.75, "learning_rate": 6.635824093911883e-07, "loss": 1.2287406921386719, "step": 8764 }, { "epoch": 2.698060941828255, "grad_norm": 16.0, "learning_rate": 6.633265023467485e-07, "loss": 1.466149091720581, "step": 8766 }, { "epoch": 2.698676515851031, "grad_norm": 18.25, "learning_rate": 6.63071097372294e-07, "loss": 1.5374802350997925, "step": 8768 }, { "epoch": 2.6992920898738073, "grad_norm": 12.4375, "learning_rate": 6.628161945806372e-07, "loss": 1.3909275531768799, "step": 8770 }, { "epoch": 2.6999076638965835, "grad_norm": 17.375, "learning_rate": 6.625617940843691e-07, "loss": 1.3820092678070068, "step": 8772 }, { "epoch": 2.7005232379193598, "grad_norm": 12.75, "learning_rate": 6.623078959958584e-07, "loss": 1.3533204793930054, "step": 8774 }, { "epoch": 2.701138811942136, "grad_norm": 18.125, "learning_rate": 6.620545004272524e-07, "loss": 1.3313978910446167, "step": 8776 }, { "epoch": 2.7017543859649122, "grad_norm": 11.0625, "learning_rate": 6.618016074904765e-07, "loss": 1.130128026008606, "step": 8778 }, { "epoch": 2.7023699599876885, "grad_norm": 13.625, "learning_rate": 6.615492172972335e-07, "loss": 1.2231190204620361, "step": 8780 }, { "epoch": 2.7029855340104647, "grad_norm": 16.625, "learning_rate": 6.612973299590039e-07, "loss": 0.885326623916626, "step": 8782 }, { "epoch": 2.703601108033241, "grad_norm": 11.6875, "learning_rate": 6.610459455870476e-07, "loss": 1.4117552042007446, "step": 8784 }, { "epoch": 2.704216682056017, "grad_norm": 5.03125, "learning_rate": 6.607950642924009e-07, "loss": 1.1439400911331177, "step": 8786 }, { "epoch": 2.7048322560787934, "grad_norm": 5.875, "learning_rate": 6.605446861858783e-07, "loss": 1.2332112789154053, "step": 8788 }, { "epoch": 2.7054478301015696, "grad_norm": 3.453125, "learning_rate": 6.602948113780724e-07, "loss": 1.0315492153167725, "step": 8790 }, { "epoch": 2.706063404124346, "grad_norm": 24.125, "learning_rate": 6.600454399793526e-07, "loss": 1.2301273345947266, "step": 8792 }, { "epoch": 2.706678978147122, "grad_norm": 14.5, "learning_rate": 6.597965720998672e-07, "loss": 1.4328529834747314, "step": 8794 }, { "epoch": 2.7072945521698983, "grad_norm": 23.875, "learning_rate": 6.595482078495411e-07, "loss": 1.2243239879608154, "step": 8796 }, { "epoch": 2.7079101261926746, "grad_norm": 11.125, "learning_rate": 6.59300347338077e-07, "loss": 1.1748735904693604, "step": 8798 }, { "epoch": 2.708525700215451, "grad_norm": 13.5625, "learning_rate": 6.590529906749554e-07, "loss": 1.195931077003479, "step": 8800 }, { "epoch": 2.709141274238227, "grad_norm": 16.375, "learning_rate": 6.588061379694336e-07, "loss": 1.6373540163040161, "step": 8802 }, { "epoch": 2.7097568482610033, "grad_norm": 29.25, "learning_rate": 6.585597893305473e-07, "loss": 1.38240647315979, "step": 8804 }, { "epoch": 2.7103724222837795, "grad_norm": 4.6875, "learning_rate": 6.583139448671087e-07, "loss": 1.1793478727340698, "step": 8806 }, { "epoch": 2.7109879963065557, "grad_norm": 10.25, "learning_rate": 6.580686046877075e-07, "loss": 1.1160082817077637, "step": 8808 }, { "epoch": 2.711603570329332, "grad_norm": 12.0, "learning_rate": 6.578237689007108e-07, "loss": 1.3985621929168701, "step": 8810 }, { "epoch": 2.712219144352108, "grad_norm": 9.4375, "learning_rate": 6.575794376142629e-07, "loss": 1.3924959897994995, "step": 8812 }, { "epoch": 2.7128347183748844, "grad_norm": 8.6875, "learning_rate": 6.573356109362851e-07, "loss": 1.2795300483703613, "step": 8814 }, { "epoch": 2.7134502923976607, "grad_norm": 28.25, "learning_rate": 6.570922889744762e-07, "loss": 1.362342357635498, "step": 8816 }, { "epoch": 2.714065866420437, "grad_norm": 27.125, "learning_rate": 6.568494718363112e-07, "loss": 1.2380298376083374, "step": 8818 }, { "epoch": 2.714681440443213, "grad_norm": 18.875, "learning_rate": 6.566071596290434e-07, "loss": 0.7231048941612244, "step": 8820 }, { "epoch": 2.71529701446599, "grad_norm": 18.375, "learning_rate": 6.563653524597021e-07, "loss": 1.4576283693313599, "step": 8822 }, { "epoch": 2.7159125884887656, "grad_norm": 31.875, "learning_rate": 6.561240504350935e-07, "loss": 1.336194396018982, "step": 8824 }, { "epoch": 2.7165281625115423, "grad_norm": 18.5, "learning_rate": 6.558832536618015e-07, "loss": 1.5954232215881348, "step": 8826 }, { "epoch": 2.717143736534318, "grad_norm": 17.625, "learning_rate": 6.55642962246186e-07, "loss": 1.5177388191223145, "step": 8828 }, { "epoch": 2.7177593105570947, "grad_norm": 9.25, "learning_rate": 6.55403176294384e-07, "loss": 1.209438443183899, "step": 8830 }, { "epoch": 2.7183748845798705, "grad_norm": 7.1875, "learning_rate": 6.551638959123095e-07, "loss": 1.2927244901657104, "step": 8832 }, { "epoch": 2.718990458602647, "grad_norm": 75.5, "learning_rate": 6.549251212056525e-07, "loss": 1.129912257194519, "step": 8834 }, { "epoch": 2.719606032625423, "grad_norm": 13.375, "learning_rate": 6.546868522798803e-07, "loss": 1.4281249046325684, "step": 8836 }, { "epoch": 2.7202216066481997, "grad_norm": 9.8125, "learning_rate": 6.544490892402366e-07, "loss": 1.14664626121521, "step": 8838 }, { "epoch": 2.7208371806709755, "grad_norm": 20.625, "learning_rate": 6.542118321917414e-07, "loss": 1.4425570964813232, "step": 8840 }, { "epoch": 2.721452754693752, "grad_norm": 13.5, "learning_rate": 6.539750812391918e-07, "loss": 1.6932718753814697, "step": 8842 }, { "epoch": 2.722068328716528, "grad_norm": 20.75, "learning_rate": 6.537388364871601e-07, "loss": 1.110339641571045, "step": 8844 }, { "epoch": 2.7226839027393046, "grad_norm": 13.3125, "learning_rate": 6.535030980399971e-07, "loss": 1.1894402503967285, "step": 8846 }, { "epoch": 2.7232994767620804, "grad_norm": 11.0625, "learning_rate": 6.532678660018281e-07, "loss": 1.2754297256469727, "step": 8848 }, { "epoch": 2.723915050784857, "grad_norm": 12.6875, "learning_rate": 6.53033140476555e-07, "loss": 1.4751468896865845, "step": 8850 }, { "epoch": 2.724530624807633, "grad_norm": 10.5625, "learning_rate": 6.527989215678571e-07, "loss": 1.2689716815948486, "step": 8852 }, { "epoch": 2.7251461988304095, "grad_norm": 38.75, "learning_rate": 6.525652093791885e-07, "loss": 1.395056962966919, "step": 8854 }, { "epoch": 2.7257617728531853, "grad_norm": 9.1875, "learning_rate": 6.523320040137805e-07, "loss": 1.2616653442382812, "step": 8856 }, { "epoch": 2.726377346875962, "grad_norm": 27.0, "learning_rate": 6.520993055746403e-07, "loss": 1.917528510093689, "step": 8858 }, { "epoch": 2.7269929208987382, "grad_norm": 21.125, "learning_rate": 6.518671141645506e-07, "loss": 1.4512934684753418, "step": 8860 }, { "epoch": 2.7276084949215145, "grad_norm": 14.875, "learning_rate": 6.516354298860711e-07, "loss": 1.3462598323822021, "step": 8862 }, { "epoch": 2.7282240689442907, "grad_norm": 10.0, "learning_rate": 6.514042528415366e-07, "loss": 1.181436538696289, "step": 8864 }, { "epoch": 2.728839642967067, "grad_norm": 30.375, "learning_rate": 6.511735831330586e-07, "loss": 1.5524072647094727, "step": 8866 }, { "epoch": 2.729455216989843, "grad_norm": 10.625, "learning_rate": 6.50943420862524e-07, "loss": 1.4597262144088745, "step": 8868 }, { "epoch": 2.7300707910126194, "grad_norm": 26.25, "learning_rate": 6.507137661315956e-07, "loss": 1.7072659730911255, "step": 8870 }, { "epoch": 2.7306863650353956, "grad_norm": 12.0, "learning_rate": 6.504846190417125e-07, "loss": 1.2412455081939697, "step": 8872 }, { "epoch": 2.731301939058172, "grad_norm": 15.875, "learning_rate": 6.50255979694089e-07, "loss": 1.5315024852752686, "step": 8874 }, { "epoch": 2.731917513080948, "grad_norm": 45.0, "learning_rate": 6.500278481897154e-07, "loss": 1.7796781063079834, "step": 8876 }, { "epoch": 2.7325330871037243, "grad_norm": 22.25, "learning_rate": 6.498002246293578e-07, "loss": 1.4514446258544922, "step": 8878 }, { "epoch": 2.7331486611265006, "grad_norm": 16.0, "learning_rate": 6.495731091135575e-07, "loss": 1.5924737453460693, "step": 8880 }, { "epoch": 2.733764235149277, "grad_norm": 10.75, "learning_rate": 6.493465017426318e-07, "loss": 1.1855311393737793, "step": 8882 }, { "epoch": 2.734379809172053, "grad_norm": 12.1875, "learning_rate": 6.491204026166737e-07, "loss": 1.1828142404556274, "step": 8884 }, { "epoch": 2.7349953831948293, "grad_norm": 23.375, "learning_rate": 6.488948118355509e-07, "loss": 1.6172746419906616, "step": 8886 }, { "epoch": 2.7356109572176055, "grad_norm": 7.09375, "learning_rate": 6.486697294989078e-07, "loss": 1.1457502841949463, "step": 8888 }, { "epoch": 2.7362265312403817, "grad_norm": 10.75, "learning_rate": 6.48445155706163e-07, "loss": 1.4178847074508667, "step": 8890 }, { "epoch": 2.736842105263158, "grad_norm": 32.25, "learning_rate": 6.482210905565111e-07, "loss": 1.2971978187561035, "step": 8892 }, { "epoch": 2.737457679285934, "grad_norm": 23.75, "learning_rate": 6.479975341489219e-07, "loss": 1.2564046382904053, "step": 8894 }, { "epoch": 2.7380732533087104, "grad_norm": 16.75, "learning_rate": 6.477744865821406e-07, "loss": 1.1586273908615112, "step": 8896 }, { "epoch": 2.7386888273314867, "grad_norm": 7.875, "learning_rate": 6.475519479546876e-07, "loss": 1.404249906539917, "step": 8898 }, { "epoch": 2.739304401354263, "grad_norm": 9.25, "learning_rate": 6.473299183648585e-07, "loss": 1.3985319137573242, "step": 8900 }, { "epoch": 2.739919975377039, "grad_norm": 10.875, "learning_rate": 6.471083979107239e-07, "loss": 1.315010666847229, "step": 8902 }, { "epoch": 2.7405355493998154, "grad_norm": 28.375, "learning_rate": 6.468873866901298e-07, "loss": 1.347687005996704, "step": 8904 }, { "epoch": 2.7411511234225916, "grad_norm": 7.09375, "learning_rate": 6.466668848006969e-07, "loss": 1.2556222677230835, "step": 8906 }, { "epoch": 2.741766697445368, "grad_norm": 16.625, "learning_rate": 6.464468923398212e-07, "loss": 1.4205504655838013, "step": 8908 }, { "epoch": 2.742382271468144, "grad_norm": 4.65625, "learning_rate": 6.462274094046739e-07, "loss": 1.0294299125671387, "step": 8910 }, { "epoch": 2.7429978454909203, "grad_norm": 10.9375, "learning_rate": 6.460084360922002e-07, "loss": 1.1584376096725464, "step": 8912 }, { "epoch": 2.7436134195136965, "grad_norm": 22.5, "learning_rate": 6.457899724991216e-07, "loss": 1.7459558248519897, "step": 8914 }, { "epoch": 2.7442289935364728, "grad_norm": 9.6875, "learning_rate": 6.455720187219333e-07, "loss": 1.2811490297317505, "step": 8916 }, { "epoch": 2.744844567559249, "grad_norm": 11.4375, "learning_rate": 6.453545748569058e-07, "loss": 1.325385332107544, "step": 8918 }, { "epoch": 2.7454601415820252, "grad_norm": 18.375, "learning_rate": 6.451376410000846e-07, "loss": 1.186486005783081, "step": 8920 }, { "epoch": 2.7460757156048015, "grad_norm": 19.25, "learning_rate": 6.449212172472891e-07, "loss": 1.2240514755249023, "step": 8922 }, { "epoch": 2.7466912896275777, "grad_norm": 22.375, "learning_rate": 6.447053036941143e-07, "loss": 0.8598315119743347, "step": 8924 }, { "epoch": 2.747306863650354, "grad_norm": 13.3125, "learning_rate": 6.444899004359295e-07, "loss": 1.358074426651001, "step": 8926 }, { "epoch": 2.74792243767313, "grad_norm": 10.4375, "learning_rate": 6.442750075678782e-07, "loss": 0.8594173192977905, "step": 8928 }, { "epoch": 2.7485380116959064, "grad_norm": 13.875, "learning_rate": 6.440606251848792e-07, "loss": 1.2267684936523438, "step": 8930 }, { "epoch": 2.7491535857186826, "grad_norm": 23.375, "learning_rate": 6.438467533816253e-07, "loss": 1.3598570823669434, "step": 8932 }, { "epoch": 2.749769159741459, "grad_norm": 32.0, "learning_rate": 6.436333922525838e-07, "loss": 1.4652607440948486, "step": 8934 }, { "epoch": 2.750384733764235, "grad_norm": 13.0, "learning_rate": 6.434205418919967e-07, "loss": 1.2682214975357056, "step": 8936 }, { "epoch": 2.7510003077870113, "grad_norm": 7.25, "learning_rate": 6.432082023938802e-07, "loss": 1.2484195232391357, "step": 8938 }, { "epoch": 2.7516158818097876, "grad_norm": 40.75, "learning_rate": 6.429963738520251e-07, "loss": 1.449970006942749, "step": 8940 }, { "epoch": 2.752231455832564, "grad_norm": 20.125, "learning_rate": 6.427850563599959e-07, "loss": 1.4970884323120117, "step": 8942 }, { "epoch": 2.75284702985534, "grad_norm": 19.75, "learning_rate": 6.425742500111322e-07, "loss": 1.2655609846115112, "step": 8944 }, { "epoch": 2.7534626038781163, "grad_norm": 14.875, "learning_rate": 6.423639548985471e-07, "loss": 1.5221806764602661, "step": 8946 }, { "epoch": 2.7540781779008925, "grad_norm": 3.1875, "learning_rate": 6.421541711151284e-07, "loss": 1.1110451221466064, "step": 8948 }, { "epoch": 2.7546937519236687, "grad_norm": 4.6875, "learning_rate": 6.419448987535376e-07, "loss": 1.0029635429382324, "step": 8950 }, { "epoch": 2.755309325946445, "grad_norm": 22.875, "learning_rate": 6.417361379062107e-07, "loss": 1.7326197624206543, "step": 8952 }, { "epoch": 2.755924899969221, "grad_norm": 7.75, "learning_rate": 6.415278886653577e-07, "loss": 1.3342669010162354, "step": 8954 }, { "epoch": 2.7565404739919974, "grad_norm": 16.625, "learning_rate": 6.413201511229622e-07, "loss": 1.4437904357910156, "step": 8956 }, { "epoch": 2.7571560480147737, "grad_norm": 19.125, "learning_rate": 6.411129253707827e-07, "loss": 1.2913410663604736, "step": 8958 }, { "epoch": 2.75777162203755, "grad_norm": 11.4375, "learning_rate": 6.409062115003505e-07, "loss": 1.433046817779541, "step": 8960 }, { "epoch": 2.758387196060326, "grad_norm": 22.0, "learning_rate": 6.407000096029715e-07, "loss": 1.4304602146148682, "step": 8962 }, { "epoch": 2.7590027700831024, "grad_norm": 19.875, "learning_rate": 6.404943197697252e-07, "loss": 1.5041424036026, "step": 8964 }, { "epoch": 2.7596183441058786, "grad_norm": 45.0, "learning_rate": 6.402891420914655e-07, "loss": 1.2554550170898438, "step": 8966 }, { "epoch": 2.760233918128655, "grad_norm": 5.40625, "learning_rate": 6.40084476658819e-07, "loss": 1.1710494756698608, "step": 8968 }, { "epoch": 2.760849492151431, "grad_norm": 27.25, "learning_rate": 6.398803235621871e-07, "loss": 1.3762147426605225, "step": 8970 }, { "epoch": 2.7614650661742073, "grad_norm": 32.25, "learning_rate": 6.396766828917444e-07, "loss": 1.7743159532546997, "step": 8972 }, { "epoch": 2.7620806401969835, "grad_norm": 19.0, "learning_rate": 6.39473554737439e-07, "loss": 1.6314376592636108, "step": 8974 }, { "epoch": 2.76269621421976, "grad_norm": 11.6875, "learning_rate": 6.39270939188993e-07, "loss": 1.3375153541564941, "step": 8976 }, { "epoch": 2.763311788242536, "grad_norm": 9.25, "learning_rate": 6.390688363359018e-07, "loss": 1.412973403930664, "step": 8978 }, { "epoch": 2.7639273622653127, "grad_norm": 15.3125, "learning_rate": 6.388672462674345e-07, "loss": 1.4405186176300049, "step": 8980 }, { "epoch": 2.7645429362880884, "grad_norm": 42.75, "learning_rate": 6.386661690726338e-07, "loss": 1.7845385074615479, "step": 8982 }, { "epoch": 2.765158510310865, "grad_norm": 13.1875, "learning_rate": 6.384656048403156e-07, "loss": 1.1434121131896973, "step": 8984 }, { "epoch": 2.765774084333641, "grad_norm": 27.0, "learning_rate": 6.382655536590695e-07, "loss": 1.0858327150344849, "step": 8986 }, { "epoch": 2.7663896583564176, "grad_norm": 21.0, "learning_rate": 6.380660156172584e-07, "loss": 1.6220226287841797, "step": 8988 }, { "epoch": 2.7670052323791934, "grad_norm": 23.0, "learning_rate": 6.378669908030179e-07, "loss": 1.24774169921875, "step": 8990 }, { "epoch": 2.76762080640197, "grad_norm": 11.625, "learning_rate": 6.376684793042581e-07, "loss": 0.8773247599601746, "step": 8992 }, { "epoch": 2.768236380424746, "grad_norm": 14.5, "learning_rate": 6.374704812086616e-07, "loss": 1.4489095211029053, "step": 8994 }, { "epoch": 2.7688519544475225, "grad_norm": 18.875, "learning_rate": 6.372729966036841e-07, "loss": 1.2934106588363647, "step": 8996 }, { "epoch": 2.7694675284702983, "grad_norm": 5.0625, "learning_rate": 6.370760255765553e-07, "loss": 1.3208156824111938, "step": 8998 }, { "epoch": 2.770083102493075, "grad_norm": 34.75, "learning_rate": 6.368795682142769e-07, "loss": 1.487781047821045, "step": 9000 }, { "epoch": 2.7706986765158508, "grad_norm": 76.0, "learning_rate": 6.36683624603625e-07, "loss": 1.2479522228240967, "step": 9002 }, { "epoch": 2.7713142505386275, "grad_norm": 25.875, "learning_rate": 6.364881948311478e-07, "loss": 1.1628947257995605, "step": 9004 }, { "epoch": 2.7719298245614032, "grad_norm": 18.75, "learning_rate": 6.362932789831665e-07, "loss": 1.6086757183074951, "step": 9006 }, { "epoch": 2.77254539858418, "grad_norm": 11.8125, "learning_rate": 6.360988771457763e-07, "loss": 1.2544701099395752, "step": 9008 }, { "epoch": 2.7731609726069557, "grad_norm": 25.0, "learning_rate": 6.359049894048445e-07, "loss": 1.9421390295028687, "step": 9010 }, { "epoch": 2.7737765466297324, "grad_norm": 20.125, "learning_rate": 6.357116158460112e-07, "loss": 1.416395664215088, "step": 9012 }, { "epoch": 2.7743921206525086, "grad_norm": 8.5, "learning_rate": 6.355187565546902e-07, "loss": 1.0506492853164673, "step": 9014 }, { "epoch": 2.775007694675285, "grad_norm": 8.75, "learning_rate": 6.353264116160672e-07, "loss": 1.2236146926879883, "step": 9016 }, { "epoch": 2.775623268698061, "grad_norm": 31.5, "learning_rate": 6.351345811151017e-07, "loss": 1.3423714637756348, "step": 9018 }, { "epoch": 2.7762388427208373, "grad_norm": 13.0, "learning_rate": 6.349432651365252e-07, "loss": 1.5946636199951172, "step": 9020 }, { "epoch": 2.7768544167436136, "grad_norm": 12.3125, "learning_rate": 6.347524637648418e-07, "loss": 1.2847610712051392, "step": 9022 }, { "epoch": 2.77746999076639, "grad_norm": 8.4375, "learning_rate": 6.345621770843293e-07, "loss": 1.281554102897644, "step": 9024 }, { "epoch": 2.778085564789166, "grad_norm": 24.875, "learning_rate": 6.343724051790371e-07, "loss": 1.3122029304504395, "step": 9026 }, { "epoch": 2.7787011388119423, "grad_norm": 7.5, "learning_rate": 6.341831481327882e-07, "loss": 1.4820513725280762, "step": 9028 }, { "epoch": 2.7793167128347185, "grad_norm": 12.75, "learning_rate": 6.339944060291772e-07, "loss": 1.0853147506713867, "step": 9030 }, { "epoch": 2.7799322868574947, "grad_norm": 13.8125, "learning_rate": 6.338061789515717e-07, "loss": 1.1276772022247314, "step": 9032 }, { "epoch": 2.780547860880271, "grad_norm": 7.03125, "learning_rate": 6.336184669831121e-07, "loss": 1.0151764154434204, "step": 9034 }, { "epoch": 2.781163434903047, "grad_norm": 15.125, "learning_rate": 6.334312702067106e-07, "loss": 1.247842788696289, "step": 9036 }, { "epoch": 2.7817790089258234, "grad_norm": 13.25, "learning_rate": 6.332445887050527e-07, "loss": 0.9100526571273804, "step": 9038 }, { "epoch": 2.7823945829485996, "grad_norm": 5.90625, "learning_rate": 6.330584225605955e-07, "loss": 1.2452267408370972, "step": 9040 }, { "epoch": 2.783010156971376, "grad_norm": 9.8125, "learning_rate": 6.328727718555688e-07, "loss": 0.924896240234375, "step": 9042 }, { "epoch": 2.783625730994152, "grad_norm": 9.6875, "learning_rate": 6.326876366719749e-07, "loss": 1.6312079429626465, "step": 9044 }, { "epoch": 2.7842413050169283, "grad_norm": 20.0, "learning_rate": 6.325030170915882e-07, "loss": 1.1611487865447998, "step": 9046 }, { "epoch": 2.7848568790397046, "grad_norm": 39.5, "learning_rate": 6.323189131959552e-07, "loss": 1.4214301109313965, "step": 9048 }, { "epoch": 2.785472453062481, "grad_norm": 70.0, "learning_rate": 6.321353250663951e-07, "loss": 1.5114094018936157, "step": 9050 }, { "epoch": 2.786088027085257, "grad_norm": 11.0, "learning_rate": 6.319522527839986e-07, "loss": 1.2237645387649536, "step": 9052 }, { "epoch": 2.7867036011080333, "grad_norm": 15.6875, "learning_rate": 6.317696964296293e-07, "loss": 1.2461700439453125, "step": 9054 }, { "epoch": 2.7873191751308095, "grad_norm": 16.5, "learning_rate": 6.315876560839224e-07, "loss": 1.46170973777771, "step": 9056 }, { "epoch": 2.7879347491535857, "grad_norm": 17.625, "learning_rate": 6.314061318272852e-07, "loss": 1.203120231628418, "step": 9058 }, { "epoch": 2.788550323176362, "grad_norm": 27.0, "learning_rate": 6.312251237398975e-07, "loss": 1.5163331031799316, "step": 9060 }, { "epoch": 2.789165897199138, "grad_norm": 11.75, "learning_rate": 6.310446319017104e-07, "loss": 1.1536353826522827, "step": 9062 }, { "epoch": 2.7897814712219144, "grad_norm": 16.875, "learning_rate": 6.308646563924475e-07, "loss": 1.4147756099700928, "step": 9064 }, { "epoch": 2.7903970452446907, "grad_norm": 5.8125, "learning_rate": 6.306851972916045e-07, "loss": 1.1038388013839722, "step": 9066 }, { "epoch": 2.791012619267467, "grad_norm": 15.0, "learning_rate": 6.30506254678448e-07, "loss": 1.3057680130004883, "step": 9068 }, { "epoch": 2.791628193290243, "grad_norm": 45.25, "learning_rate": 6.303278286320177e-07, "loss": 1.4038832187652588, "step": 9070 }, { "epoch": 2.7922437673130194, "grad_norm": 16.375, "learning_rate": 6.301499192311246e-07, "loss": 1.6461620330810547, "step": 9072 }, { "epoch": 2.7928593413357956, "grad_norm": 34.0, "learning_rate": 6.299725265543512e-07, "loss": 1.4873437881469727, "step": 9074 }, { "epoch": 2.793474915358572, "grad_norm": 16.75, "learning_rate": 6.297956506800524e-07, "loss": 1.1311352252960205, "step": 9076 }, { "epoch": 2.794090489381348, "grad_norm": 9.375, "learning_rate": 6.29619291686354e-07, "loss": 0.941398024559021, "step": 9078 }, { "epoch": 2.7947060634041243, "grad_norm": 13.0625, "learning_rate": 6.294434496511543e-07, "loss": 1.284990668296814, "step": 9080 }, { "epoch": 2.7953216374269005, "grad_norm": 9.5, "learning_rate": 6.292681246521231e-07, "loss": 1.284984827041626, "step": 9082 }, { "epoch": 2.7959372114496768, "grad_norm": 19.125, "learning_rate": 6.290933167667011e-07, "loss": 1.6055502891540527, "step": 9084 }, { "epoch": 2.796552785472453, "grad_norm": 25.875, "learning_rate": 6.289190260721016e-07, "loss": 1.4686188697814941, "step": 9086 }, { "epoch": 2.7971683594952292, "grad_norm": 10.9375, "learning_rate": 6.287452526453089e-07, "loss": 1.1350395679473877, "step": 9088 }, { "epoch": 2.7977839335180055, "grad_norm": 11.0, "learning_rate": 6.285719965630789e-07, "loss": 1.3151172399520874, "step": 9090 }, { "epoch": 2.7983995075407817, "grad_norm": 20.625, "learning_rate": 6.283992579019388e-07, "loss": 1.334385633468628, "step": 9092 }, { "epoch": 2.799015081563558, "grad_norm": 13.75, "learning_rate": 6.282270367381877e-07, "loss": 1.3577499389648438, "step": 9094 }, { "epoch": 2.799630655586334, "grad_norm": 13.625, "learning_rate": 6.280553331478961e-07, "loss": 1.3364747762680054, "step": 9096 }, { "epoch": 2.8002462296091104, "grad_norm": 15.3125, "learning_rate": 6.278841472069051e-07, "loss": 1.4921326637268066, "step": 9098 }, { "epoch": 2.8008618036318866, "grad_norm": 8.5, "learning_rate": 6.277134789908282e-07, "loss": 1.0777859687805176, "step": 9100 }, { "epoch": 2.801477377654663, "grad_norm": 20.75, "learning_rate": 6.275433285750497e-07, "loss": 1.0497572422027588, "step": 9102 }, { "epoch": 2.802092951677439, "grad_norm": 17.5, "learning_rate": 6.273736960347249e-07, "loss": 1.194733738899231, "step": 9104 }, { "epoch": 2.8027085257002153, "grad_norm": 11.0625, "learning_rate": 6.272045814447808e-07, "loss": 1.3191816806793213, "step": 9106 }, { "epoch": 2.8033240997229916, "grad_norm": 16.625, "learning_rate": 6.27035984879916e-07, "loss": 1.4231816530227661, "step": 9108 }, { "epoch": 2.803939673745768, "grad_norm": 17.625, "learning_rate": 6.268679064145992e-07, "loss": 0.9059617519378662, "step": 9110 }, { "epoch": 2.804555247768544, "grad_norm": 7.09375, "learning_rate": 6.267003461230709e-07, "loss": 1.060996413230896, "step": 9112 }, { "epoch": 2.8051708217913203, "grad_norm": 8.75, "learning_rate": 6.26533304079343e-07, "loss": 1.1279702186584473, "step": 9114 }, { "epoch": 2.8057863958140965, "grad_norm": 17.5, "learning_rate": 6.263667803571981e-07, "loss": 1.3636348247528076, "step": 9116 }, { "epoch": 2.8064019698368727, "grad_norm": 12.4375, "learning_rate": 6.262007750301896e-07, "loss": 1.2173731327056885, "step": 9118 }, { "epoch": 2.807017543859649, "grad_norm": 10.1875, "learning_rate": 6.260352881716425e-07, "loss": 1.0125764608383179, "step": 9120 }, { "epoch": 2.807633117882425, "grad_norm": 3.9375, "learning_rate": 6.258703198546526e-07, "loss": 1.1348586082458496, "step": 9122 }, { "epoch": 2.8082486919052014, "grad_norm": 243.0, "learning_rate": 6.257058701520865e-07, "loss": 1.2904644012451172, "step": 9124 }, { "epoch": 2.808864265927978, "grad_norm": 27.5, "learning_rate": 6.255419391365816e-07, "loss": 1.3715291023254395, "step": 9126 }, { "epoch": 2.809479839950754, "grad_norm": 11.25, "learning_rate": 6.253785268805469e-07, "loss": 0.9206017255783081, "step": 9128 }, { "epoch": 2.8100954139735306, "grad_norm": 5.65625, "learning_rate": 6.252156334561611e-07, "loss": 1.1874823570251465, "step": 9130 }, { "epoch": 2.8107109879963064, "grad_norm": 31.75, "learning_rate": 6.250532589353752e-07, "loss": 1.0900100469589233, "step": 9132 }, { "epoch": 2.811326562019083, "grad_norm": 9.875, "learning_rate": 6.248914033899098e-07, "loss": 1.2578953504562378, "step": 9134 }, { "epoch": 2.811942136041859, "grad_norm": 10.375, "learning_rate": 6.247300668912565e-07, "loss": 1.4572657346725464, "step": 9136 }, { "epoch": 2.8125577100646355, "grad_norm": 14.1875, "learning_rate": 6.24569249510678e-07, "loss": 1.3846606016159058, "step": 9138 }, { "epoch": 2.8131732840874113, "grad_norm": 14.5, "learning_rate": 6.244089513192079e-07, "loss": 1.4546737670898438, "step": 9140 }, { "epoch": 2.813788858110188, "grad_norm": 14.0625, "learning_rate": 6.242491723876493e-07, "loss": 1.3725277185440063, "step": 9142 }, { "epoch": 2.8144044321329638, "grad_norm": 9.375, "learning_rate": 6.240899127865775e-07, "loss": 1.3328849077224731, "step": 9144 }, { "epoch": 2.8150200061557404, "grad_norm": 22.5, "learning_rate": 6.239311725863371e-07, "loss": 1.1928489208221436, "step": 9146 }, { "epoch": 2.8156355801785162, "grad_norm": 12.0, "learning_rate": 6.237729518570442e-07, "loss": 1.4977320432662964, "step": 9148 }, { "epoch": 2.816251154201293, "grad_norm": 22.625, "learning_rate": 6.236152506685849e-07, "loss": 1.2532992362976074, "step": 9150 }, { "epoch": 2.8168667282240687, "grad_norm": 17.25, "learning_rate": 6.23458069090616e-07, "loss": 1.485316514968872, "step": 9152 }, { "epoch": 2.8174823022468454, "grad_norm": 29.875, "learning_rate": 6.233014071925648e-07, "loss": 1.481433629989624, "step": 9154 }, { "epoch": 2.818097876269621, "grad_norm": 26.625, "learning_rate": 6.231452650436291e-07, "loss": 0.9959707260131836, "step": 9156 }, { "epoch": 2.818713450292398, "grad_norm": 9.875, "learning_rate": 6.22989642712777e-07, "loss": 1.2736601829528809, "step": 9158 }, { "epoch": 2.8193290243151736, "grad_norm": 22.0, "learning_rate": 6.228345402687471e-07, "loss": 1.4581197500228882, "step": 9160 }, { "epoch": 2.8199445983379503, "grad_norm": 8.0625, "learning_rate": 6.226799577800481e-07, "loss": 1.2403430938720703, "step": 9162 }, { "epoch": 2.8205601723607265, "grad_norm": 19.875, "learning_rate": 6.225258953149598e-07, "loss": 1.0801653861999512, "step": 9164 }, { "epoch": 2.8211757463835028, "grad_norm": 10.625, "learning_rate": 6.223723529415314e-07, "loss": 1.0728917121887207, "step": 9166 }, { "epoch": 2.821791320406279, "grad_norm": 30.375, "learning_rate": 6.222193307275824e-07, "loss": 1.2684574127197266, "step": 9168 }, { "epoch": 2.8224068944290552, "grad_norm": 13.5625, "learning_rate": 6.220668287407036e-07, "loss": 1.4267816543579102, "step": 9170 }, { "epoch": 2.8230224684518315, "grad_norm": 16.25, "learning_rate": 6.219148470482549e-07, "loss": 1.0929862260818481, "step": 9172 }, { "epoch": 2.8236380424746077, "grad_norm": 7.375, "learning_rate": 6.217633857173668e-07, "loss": 1.4870045185089111, "step": 9174 }, { "epoch": 2.824253616497384, "grad_norm": 9.125, "learning_rate": 6.2161244481494e-07, "loss": 1.235878348350525, "step": 9176 }, { "epoch": 2.82486919052016, "grad_norm": 13.9375, "learning_rate": 6.214620244076452e-07, "loss": 1.0306167602539062, "step": 9178 }, { "epoch": 2.8254847645429364, "grad_norm": 32.75, "learning_rate": 6.213121245619233e-07, "loss": 1.281254529953003, "step": 9180 }, { "epoch": 2.8261003385657126, "grad_norm": 14.9375, "learning_rate": 6.211627453439856e-07, "loss": 1.2573875188827515, "step": 9182 }, { "epoch": 2.826715912588489, "grad_norm": 16.375, "learning_rate": 6.210138868198124e-07, "loss": 1.3871865272521973, "step": 9184 }, { "epoch": 2.827331486611265, "grad_norm": 11.9375, "learning_rate": 6.208655490551551e-07, "loss": 1.324197769165039, "step": 9186 }, { "epoch": 2.8279470606340413, "grad_norm": 13.0625, "learning_rate": 6.207177321155345e-07, "loss": 1.4072306156158447, "step": 9188 }, { "epoch": 2.8285626346568176, "grad_norm": 6.03125, "learning_rate": 6.205704360662417e-07, "loss": 1.0662217140197754, "step": 9190 }, { "epoch": 2.829178208679594, "grad_norm": 20.375, "learning_rate": 6.204236609723375e-07, "loss": 1.5592257976531982, "step": 9192 }, { "epoch": 2.82979378270237, "grad_norm": 13.3125, "learning_rate": 6.202774068986524e-07, "loss": 1.4568991661071777, "step": 9194 }, { "epoch": 2.8304093567251463, "grad_norm": 20.0, "learning_rate": 6.201316739097872e-07, "loss": 1.1466706991195679, "step": 9196 }, { "epoch": 2.8310249307479225, "grad_norm": 14.0625, "learning_rate": 6.199864620701123e-07, "loss": 1.2321534156799316, "step": 9198 }, { "epoch": 2.8316405047706987, "grad_norm": 36.5, "learning_rate": 6.198417714437677e-07, "loss": 1.470842719078064, "step": 9200 }, { "epoch": 2.832256078793475, "grad_norm": 12.25, "learning_rate": 6.19697602094664e-07, "loss": 1.2441885471343994, "step": 9202 }, { "epoch": 2.832871652816251, "grad_norm": 11.4375, "learning_rate": 6.195539540864802e-07, "loss": 1.274795651435852, "step": 9204 }, { "epoch": 2.8334872268390274, "grad_norm": 34.75, "learning_rate": 6.194108274826663e-07, "loss": 1.6561574935913086, "step": 9206 }, { "epoch": 2.8341028008618037, "grad_norm": 22.75, "learning_rate": 6.192682223464415e-07, "loss": 1.5058321952819824, "step": 9208 }, { "epoch": 2.83471837488458, "grad_norm": 60.5, "learning_rate": 6.191261387407942e-07, "loss": 1.4805090427398682, "step": 9210 }, { "epoch": 2.835333948907356, "grad_norm": 22.625, "learning_rate": 6.189845767284836e-07, "loss": 0.9961310625076294, "step": 9212 }, { "epoch": 2.8359495229301324, "grad_norm": 23.0, "learning_rate": 6.188435363720372e-07, "loss": 1.1998114585876465, "step": 9214 }, { "epoch": 2.8365650969529086, "grad_norm": 21.375, "learning_rate": 6.187030177337529e-07, "loss": 1.7376620769500732, "step": 9216 }, { "epoch": 2.837180670975685, "grad_norm": 15.5625, "learning_rate": 6.18563020875698e-07, "loss": 1.4271830320358276, "step": 9218 }, { "epoch": 2.837796244998461, "grad_norm": 12.9375, "learning_rate": 6.184235458597091e-07, "loss": 1.3464505672454834, "step": 9220 }, { "epoch": 2.8384118190212373, "grad_norm": 17.25, "learning_rate": 6.182845927473927e-07, "loss": 1.2641328573226929, "step": 9222 }, { "epoch": 2.8390273930440135, "grad_norm": 4.5, "learning_rate": 6.181461616001242e-07, "loss": 1.2309045791625977, "step": 9224 }, { "epoch": 2.8396429670667898, "grad_norm": 20.875, "learning_rate": 6.180082524790492e-07, "loss": 1.2517509460449219, "step": 9226 }, { "epoch": 2.840258541089566, "grad_norm": 8.75, "learning_rate": 6.178708654450823e-07, "loss": 1.2566943168640137, "step": 9228 }, { "epoch": 2.8408741151123422, "grad_norm": 26.375, "learning_rate": 6.17734000558907e-07, "loss": 1.3818330764770508, "step": 9230 }, { "epoch": 2.8414896891351185, "grad_norm": 56.0, "learning_rate": 6.175976578809773e-07, "loss": 1.668084979057312, "step": 9232 }, { "epoch": 2.8421052631578947, "grad_norm": 13.0, "learning_rate": 6.174618374715157e-07, "loss": 1.6795376539230347, "step": 9234 }, { "epoch": 2.842720837180671, "grad_norm": 16.25, "learning_rate": 6.173265393905139e-07, "loss": 1.54259192943573, "step": 9236 }, { "epoch": 2.843336411203447, "grad_norm": 10.9375, "learning_rate": 6.171917636977337e-07, "loss": 1.4081530570983887, "step": 9238 }, { "epoch": 2.8439519852262234, "grad_norm": 15.1875, "learning_rate": 6.170575104527053e-07, "loss": 1.540208339691162, "step": 9240 }, { "epoch": 2.8445675592489996, "grad_norm": 22.75, "learning_rate": 6.169237797147289e-07, "loss": 1.5846127271652222, "step": 9242 }, { "epoch": 2.845183133271776, "grad_norm": 17.0, "learning_rate": 6.167905715428731e-07, "loss": 1.472456693649292, "step": 9244 }, { "epoch": 2.845798707294552, "grad_norm": 26.875, "learning_rate": 6.166578859959766e-07, "loss": 1.312475323677063, "step": 9246 }, { "epoch": 2.8464142813173283, "grad_norm": 28.75, "learning_rate": 6.165257231326462e-07, "loss": 1.7437453269958496, "step": 9248 }, { "epoch": 2.8470298553401046, "grad_norm": 16.375, "learning_rate": 6.163940830112587e-07, "loss": 1.4031589031219482, "step": 9250 }, { "epoch": 2.847645429362881, "grad_norm": 7.96875, "learning_rate": 6.162629656899597e-07, "loss": 1.2629880905151367, "step": 9252 }, { "epoch": 2.848261003385657, "grad_norm": 4.25, "learning_rate": 6.161323712266637e-07, "loss": 0.9520352482795715, "step": 9254 }, { "epoch": 2.8488765774084333, "grad_norm": 77.5, "learning_rate": 6.160022996790547e-07, "loss": 1.3207604885101318, "step": 9256 }, { "epoch": 2.8494921514312095, "grad_norm": 9.5625, "learning_rate": 6.158727511045853e-07, "loss": 1.3743228912353516, "step": 9258 }, { "epoch": 2.8501077254539857, "grad_norm": 20.75, "learning_rate": 6.157437255604771e-07, "loss": 1.3018256425857544, "step": 9260 }, { "epoch": 2.850723299476762, "grad_norm": 13.5625, "learning_rate": 6.15615223103721e-07, "loss": 1.3166005611419678, "step": 9262 }, { "epoch": 2.851338873499538, "grad_norm": 4.34375, "learning_rate": 6.154872437910768e-07, "loss": 1.047378420829773, "step": 9264 }, { "epoch": 2.8519544475223144, "grad_norm": 15.3125, "learning_rate": 6.153597876790729e-07, "loss": 1.4092975854873657, "step": 9266 }, { "epoch": 2.8525700215450907, "grad_norm": 15.6875, "learning_rate": 6.152328548240068e-07, "loss": 1.2166848182678223, "step": 9268 }, { "epoch": 2.853185595567867, "grad_norm": 21.0, "learning_rate": 6.15106445281945e-07, "loss": 1.451936960220337, "step": 9270 }, { "epoch": 2.853801169590643, "grad_norm": 20.75, "learning_rate": 6.149805591087226e-07, "loss": 1.4809799194335938, "step": 9272 }, { "epoch": 2.8544167436134193, "grad_norm": 10.25, "learning_rate": 6.14855196359944e-07, "loss": 1.3496973514556885, "step": 9274 }, { "epoch": 2.855032317636196, "grad_norm": 18.125, "learning_rate": 6.147303570909818e-07, "loss": 1.3943860530853271, "step": 9276 }, { "epoch": 2.855647891658972, "grad_norm": 36.0, "learning_rate": 6.146060413569776e-07, "loss": 1.413816213607788, "step": 9278 }, { "epoch": 2.8562634656817485, "grad_norm": 22.625, "learning_rate": 6.14482249212842e-07, "loss": 1.5197443962097168, "step": 9280 }, { "epoch": 2.8568790397045243, "grad_norm": 8.0625, "learning_rate": 6.143589807132539e-07, "loss": 1.0143325328826904, "step": 9282 }, { "epoch": 2.857494613727301, "grad_norm": 13.625, "learning_rate": 6.142362359126613e-07, "loss": 1.0320184230804443, "step": 9284 }, { "epoch": 2.8581101877500767, "grad_norm": 11.6875, "learning_rate": 6.141140148652807e-07, "loss": 1.1835293769836426, "step": 9286 }, { "epoch": 2.8587257617728534, "grad_norm": 20.25, "learning_rate": 6.139923176250972e-07, "loss": 1.2464087009429932, "step": 9288 }, { "epoch": 2.859341335795629, "grad_norm": 11.875, "learning_rate": 6.138711442458648e-07, "loss": 0.8897020816802979, "step": 9290 }, { "epoch": 2.859956909818406, "grad_norm": 52.25, "learning_rate": 6.137504947811057e-07, "loss": 1.041160225868225, "step": 9292 }, { "epoch": 2.8605724838411817, "grad_norm": 12.875, "learning_rate": 6.136303692841109e-07, "loss": 1.2199876308441162, "step": 9294 }, { "epoch": 2.8611880578639584, "grad_norm": 9.875, "learning_rate": 6.135107678079403e-07, "loss": 1.2565871477127075, "step": 9296 }, { "epoch": 2.861803631886734, "grad_norm": 11.875, "learning_rate": 6.133916904054215e-07, "loss": 1.2938958406448364, "step": 9298 }, { "epoch": 2.862419205909511, "grad_norm": 13.375, "learning_rate": 6.132731371291515e-07, "loss": 0.8824082612991333, "step": 9300 }, { "epoch": 2.8630347799322866, "grad_norm": 12.6875, "learning_rate": 6.131551080314953e-07, "loss": 1.3084430694580078, "step": 9302 }, { "epoch": 2.8636503539550633, "grad_norm": 13.1875, "learning_rate": 6.130376031645865e-07, "loss": 1.4461159706115723, "step": 9304 }, { "epoch": 2.864265927977839, "grad_norm": 13.6875, "learning_rate": 6.129206225803272e-07, "loss": 1.3158795833587646, "step": 9306 }, { "epoch": 2.8648815020006158, "grad_norm": 8.875, "learning_rate": 6.128041663303877e-07, "loss": 1.3527281284332275, "step": 9308 }, { "epoch": 2.8654970760233915, "grad_norm": 9.3125, "learning_rate": 6.126882344662072e-07, "loss": 1.3457539081573486, "step": 9310 }, { "epoch": 2.866112650046168, "grad_norm": 14.25, "learning_rate": 6.125728270389925e-07, "loss": 1.2900372743606567, "step": 9312 }, { "epoch": 2.8667282240689445, "grad_norm": 12.5, "learning_rate": 6.124579440997193e-07, "loss": 1.3647406101226807, "step": 9314 }, { "epoch": 2.8673437980917207, "grad_norm": 20.375, "learning_rate": 6.123435856991316e-07, "loss": 1.5858960151672363, "step": 9316 }, { "epoch": 2.867959372114497, "grad_norm": 33.5, "learning_rate": 6.122297518877417e-07, "loss": 1.2413525581359863, "step": 9318 }, { "epoch": 2.868574946137273, "grad_norm": 7.21875, "learning_rate": 6.121164427158303e-07, "loss": 1.42106294631958, "step": 9320 }, { "epoch": 2.8691905201600494, "grad_norm": 10.0625, "learning_rate": 6.120036582334457e-07, "loss": 1.2894312143325806, "step": 9322 }, { "epoch": 2.8698060941828256, "grad_norm": 6.375, "learning_rate": 6.118913984904053e-07, "loss": 1.0894622802734375, "step": 9324 }, { "epoch": 2.870421668205602, "grad_norm": 9.0625, "learning_rate": 6.117796635362942e-07, "loss": 1.2433956861495972, "step": 9326 }, { "epoch": 2.871037242228378, "grad_norm": 21.25, "learning_rate": 6.116684534204659e-07, "loss": 1.2344213724136353, "step": 9328 }, { "epoch": 2.8716528162511543, "grad_norm": 16.25, "learning_rate": 6.115577681920423e-07, "loss": 1.0639562606811523, "step": 9330 }, { "epoch": 2.8722683902739305, "grad_norm": 9.1875, "learning_rate": 6.114476078999126e-07, "loss": 1.2135043144226074, "step": 9332 }, { "epoch": 2.872883964296707, "grad_norm": 14.375, "learning_rate": 6.113379725927352e-07, "loss": 1.5391733646392822, "step": 9334 }, { "epoch": 2.873499538319483, "grad_norm": 14.25, "learning_rate": 6.112288623189359e-07, "loss": 1.6590864658355713, "step": 9336 }, { "epoch": 2.8741151123422592, "grad_norm": 15.375, "learning_rate": 6.111202771267092e-07, "loss": 1.3919843435287476, "step": 9338 }, { "epoch": 2.8747306863650355, "grad_norm": 21.25, "learning_rate": 6.110122170640168e-07, "loss": 1.5578798055648804, "step": 9340 }, { "epoch": 2.8753462603878117, "grad_norm": 16.75, "learning_rate": 6.109046821785893e-07, "loss": 1.5761566162109375, "step": 9342 }, { "epoch": 2.875961834410588, "grad_norm": 33.5, "learning_rate": 6.107976725179247e-07, "loss": 1.8075366020202637, "step": 9344 }, { "epoch": 2.876577408433364, "grad_norm": 18.5, "learning_rate": 6.106911881292898e-07, "loss": 1.3936195373535156, "step": 9346 }, { "epoch": 2.8771929824561404, "grad_norm": 18.75, "learning_rate": 6.105852290597182e-07, "loss": 1.6210025548934937, "step": 9348 }, { "epoch": 2.8778085564789166, "grad_norm": 30.125, "learning_rate": 6.104797953560127e-07, "loss": 1.3459267616271973, "step": 9350 }, { "epoch": 2.878424130501693, "grad_norm": 22.625, "learning_rate": 6.103748870647431e-07, "loss": 1.0890836715698242, "step": 9352 }, { "epoch": 2.879039704524469, "grad_norm": 9.625, "learning_rate": 6.102705042322478e-07, "loss": 1.0718843936920166, "step": 9354 }, { "epoch": 2.8796552785472453, "grad_norm": 4.03125, "learning_rate": 6.101666469046329e-07, "loss": 1.2060925960540771, "step": 9356 }, { "epoch": 2.8802708525700216, "grad_norm": 14.625, "learning_rate": 6.100633151277719e-07, "loss": 1.3895275592803955, "step": 9358 }, { "epoch": 2.880886426592798, "grad_norm": 19.75, "learning_rate": 6.099605089473066e-07, "loss": 1.490225076675415, "step": 9360 }, { "epoch": 2.881502000615574, "grad_norm": 36.0, "learning_rate": 6.098582284086471e-07, "loss": 1.4311065673828125, "step": 9362 }, { "epoch": 2.8821175746383503, "grad_norm": 48.0, "learning_rate": 6.097564735569703e-07, "loss": 0.726864218711853, "step": 9364 }, { "epoch": 2.8827331486611265, "grad_norm": 20.0, "learning_rate": 6.096552444372216e-07, "loss": 1.4224977493286133, "step": 9366 }, { "epoch": 2.8833487226839027, "grad_norm": 27.125, "learning_rate": 6.095545410941143e-07, "loss": 1.629457712173462, "step": 9368 }, { "epoch": 2.883964296706679, "grad_norm": 23.75, "learning_rate": 6.094543635721287e-07, "loss": 1.1574146747589111, "step": 9370 }, { "epoch": 2.884579870729455, "grad_norm": 18.5, "learning_rate": 6.093547119155136e-07, "loss": 1.085228681564331, "step": 9372 }, { "epoch": 2.8851954447522314, "grad_norm": 12.5625, "learning_rate": 6.092555861682855e-07, "loss": 1.32918119430542, "step": 9374 }, { "epoch": 2.8858110187750077, "grad_norm": 12.6875, "learning_rate": 6.091569863742277e-07, "loss": 1.2604478597640991, "step": 9376 }, { "epoch": 2.886426592797784, "grad_norm": 9.9375, "learning_rate": 6.090589125768923e-07, "loss": 1.2702546119689941, "step": 9378 }, { "epoch": 2.88704216682056, "grad_norm": 13.125, "learning_rate": 6.089613648195987e-07, "loss": 1.282178521156311, "step": 9380 }, { "epoch": 2.8876577408433364, "grad_norm": 24.0, "learning_rate": 6.088643431454336e-07, "loss": 1.0952709913253784, "step": 9382 }, { "epoch": 2.8882733148661126, "grad_norm": 9.4375, "learning_rate": 6.087678475972516e-07, "loss": 1.4014630317687988, "step": 9384 }, { "epoch": 2.888888888888889, "grad_norm": 31.25, "learning_rate": 6.086718782176749e-07, "loss": 1.0754542350769043, "step": 9386 }, { "epoch": 2.889504462911665, "grad_norm": 21.625, "learning_rate": 6.085764350490934e-07, "loss": 0.689826488494873, "step": 9388 }, { "epoch": 2.8901200369344413, "grad_norm": 8.5625, "learning_rate": 6.084815181336646e-07, "loss": 1.3719182014465332, "step": 9390 }, { "epoch": 2.8907356109572175, "grad_norm": 11.75, "learning_rate": 6.083871275133129e-07, "loss": 1.2801945209503174, "step": 9392 }, { "epoch": 2.8913511849799938, "grad_norm": 34.75, "learning_rate": 6.082932632297312e-07, "loss": 1.1227266788482666, "step": 9394 }, { "epoch": 2.89196675900277, "grad_norm": 17.5, "learning_rate": 6.081999253243793e-07, "loss": 1.4343689680099487, "step": 9396 }, { "epoch": 2.8925823330255462, "grad_norm": 24.125, "learning_rate": 6.081071138384848e-07, "loss": 1.5909650325775146, "step": 9398 }, { "epoch": 2.8931979070483225, "grad_norm": 10.0625, "learning_rate": 6.080148288130424e-07, "loss": 1.1403911113739014, "step": 9400 }, { "epoch": 2.8938134810710987, "grad_norm": 12.5625, "learning_rate": 6.079230702888147e-07, "loss": 1.381251335144043, "step": 9402 }, { "epoch": 2.894429055093875, "grad_norm": 17.75, "learning_rate": 6.078318383063312e-07, "loss": 1.3560043573379517, "step": 9404 }, { "epoch": 2.895044629116651, "grad_norm": 11.6875, "learning_rate": 6.077411329058897e-07, "loss": 1.2616386413574219, "step": 9406 }, { "epoch": 2.8956602031394274, "grad_norm": 19.375, "learning_rate": 6.076509541275545e-07, "loss": 1.6452581882476807, "step": 9408 }, { "epoch": 2.8962757771622036, "grad_norm": 16.125, "learning_rate": 6.075613020111578e-07, "loss": 1.3793082237243652, "step": 9410 }, { "epoch": 2.89689135118498, "grad_norm": 21.75, "learning_rate": 6.074721765962991e-07, "loss": 1.2782071828842163, "step": 9412 }, { "epoch": 2.897506925207756, "grad_norm": 22.125, "learning_rate": 6.073835779223451e-07, "loss": 1.5189945697784424, "step": 9414 }, { "epoch": 2.8981224992305323, "grad_norm": 5.125, "learning_rate": 6.072955060284299e-07, "loss": 1.2915692329406738, "step": 9416 }, { "epoch": 2.8987380732533086, "grad_norm": 16.5, "learning_rate": 6.072079609534549e-07, "loss": 1.546412706375122, "step": 9418 }, { "epoch": 2.899353647276085, "grad_norm": 14.875, "learning_rate": 6.071209427360892e-07, "loss": 1.0842649936676025, "step": 9420 }, { "epoch": 2.899969221298861, "grad_norm": 15.625, "learning_rate": 6.070344514147685e-07, "loss": 1.0759096145629883, "step": 9422 }, { "epoch": 2.9005847953216373, "grad_norm": 15.8125, "learning_rate": 6.069484870276964e-07, "loss": 1.7201030254364014, "step": 9424 }, { "epoch": 2.901200369344414, "grad_norm": 5.375, "learning_rate": 6.068630496128431e-07, "loss": 1.4501621723175049, "step": 9426 }, { "epoch": 2.9018159433671897, "grad_norm": 20.25, "learning_rate": 6.067781392079465e-07, "loss": 1.4068629741668701, "step": 9428 }, { "epoch": 2.9024315173899664, "grad_norm": 37.0, "learning_rate": 6.066937558505121e-07, "loss": 1.6690633296966553, "step": 9430 }, { "epoch": 2.903047091412742, "grad_norm": 6.40625, "learning_rate": 6.066098995778116e-07, "loss": 1.0934585332870483, "step": 9432 }, { "epoch": 2.903662665435519, "grad_norm": 10.375, "learning_rate": 6.065265704268845e-07, "loss": 1.3146367073059082, "step": 9434 }, { "epoch": 2.9042782394582947, "grad_norm": 11.625, "learning_rate": 6.064437684345375e-07, "loss": 1.6005584001541138, "step": 9436 }, { "epoch": 2.9048938134810713, "grad_norm": 9.8125, "learning_rate": 6.063614936373442e-07, "loss": 1.2471882104873657, "step": 9438 }, { "epoch": 2.905509387503847, "grad_norm": 17.125, "learning_rate": 6.062797460716457e-07, "loss": 1.5874648094177246, "step": 9440 }, { "epoch": 2.906124961526624, "grad_norm": 18.875, "learning_rate": 6.061985257735498e-07, "loss": 1.3863563537597656, "step": 9442 }, { "epoch": 2.9067405355493996, "grad_norm": 11.5, "learning_rate": 6.061178327789316e-07, "loss": 1.272346019744873, "step": 9444 }, { "epoch": 2.9073561095721763, "grad_norm": 28.625, "learning_rate": 6.060376671234333e-07, "loss": 1.717216968536377, "step": 9446 }, { "epoch": 2.907971683594952, "grad_norm": 10.1875, "learning_rate": 6.059580288424644e-07, "loss": 1.3084156513214111, "step": 9448 }, { "epoch": 2.9085872576177287, "grad_norm": 14.1875, "learning_rate": 6.058789179712007e-07, "loss": 0.8683636784553528, "step": 9450 }, { "epoch": 2.9092028316405045, "grad_norm": 17.25, "learning_rate": 6.058003345445861e-07, "loss": 0.7504647970199585, "step": 9452 }, { "epoch": 2.909818405663281, "grad_norm": 32.75, "learning_rate": 6.057222785973308e-07, "loss": 1.600705623626709, "step": 9454 }, { "epoch": 2.910433979686057, "grad_norm": 12.125, "learning_rate": 6.056447501639121e-07, "loss": 1.3672009706497192, "step": 9456 }, { "epoch": 2.9110495537088337, "grad_norm": 12.8125, "learning_rate": 6.055677492785745e-07, "loss": 1.138643741607666, "step": 9458 }, { "epoch": 2.9116651277316095, "grad_norm": 22.875, "learning_rate": 6.054912759753293e-07, "loss": 1.6278893947601318, "step": 9460 }, { "epoch": 2.912280701754386, "grad_norm": 14.375, "learning_rate": 6.05415330287955e-07, "loss": 1.4144954681396484, "step": 9462 }, { "epoch": 2.9128962757771624, "grad_norm": 12.5, "learning_rate": 6.053399122499966e-07, "loss": 1.3152668476104736, "step": 9464 }, { "epoch": 2.9135118497999386, "grad_norm": 16.875, "learning_rate": 6.052650218947665e-07, "loss": 1.298598289489746, "step": 9466 }, { "epoch": 2.914127423822715, "grad_norm": 11.0, "learning_rate": 6.051906592553442e-07, "loss": 1.3542667627334595, "step": 9468 }, { "epoch": 2.914742997845491, "grad_norm": 16.375, "learning_rate": 6.051168243645752e-07, "loss": 1.3643980026245117, "step": 9470 }, { "epoch": 2.9153585718682673, "grad_norm": 28.75, "learning_rate": 6.050435172550727e-07, "loss": 1.6208703517913818, "step": 9472 }, { "epoch": 2.9159741458910435, "grad_norm": 23.0, "learning_rate": 6.049707379592166e-07, "loss": 1.3265775442123413, "step": 9474 }, { "epoch": 2.9165897199138198, "grad_norm": 19.625, "learning_rate": 6.048984865091536e-07, "loss": 1.5905539989471436, "step": 9476 }, { "epoch": 2.917205293936596, "grad_norm": 6.8125, "learning_rate": 6.048267629367971e-07, "loss": 1.0480585098266602, "step": 9478 }, { "epoch": 2.9178208679593722, "grad_norm": 4.90625, "learning_rate": 6.047555672738275e-07, "loss": 1.0062479972839355, "step": 9480 }, { "epoch": 2.9184364419821485, "grad_norm": 7.40625, "learning_rate": 6.046848995516922e-07, "loss": 1.1649580001831055, "step": 9482 }, { "epoch": 2.9190520160049247, "grad_norm": 4.25, "learning_rate": 6.046147598016049e-07, "loss": 1.1874901056289673, "step": 9484 }, { "epoch": 2.919667590027701, "grad_norm": 56.25, "learning_rate": 6.045451480545469e-07, "loss": 1.3118739128112793, "step": 9486 }, { "epoch": 2.920283164050477, "grad_norm": 15.625, "learning_rate": 6.044760643412653e-07, "loss": 1.6022027730941772, "step": 9488 }, { "epoch": 2.9208987380732534, "grad_norm": 36.0, "learning_rate": 6.044075086922746e-07, "loss": 1.4660375118255615, "step": 9490 }, { "epoch": 2.9215143120960296, "grad_norm": 73.0, "learning_rate": 6.043394811378559e-07, "loss": 0.815237283706665, "step": 9492 }, { "epoch": 2.922129886118806, "grad_norm": 15.875, "learning_rate": 6.042719817080573e-07, "loss": 1.4905991554260254, "step": 9494 }, { "epoch": 2.922745460141582, "grad_norm": 85.5, "learning_rate": 6.042050104326928e-07, "loss": 1.7456505298614502, "step": 9496 }, { "epoch": 2.9233610341643583, "grad_norm": 41.75, "learning_rate": 6.041385673413444e-07, "loss": 1.542734980583191, "step": 9498 }, { "epoch": 2.9239766081871346, "grad_norm": 10.6875, "learning_rate": 6.040726524633597e-07, "loss": 1.2516852617263794, "step": 9500 }, { "epoch": 2.924592182209911, "grad_norm": 13.3125, "learning_rate": 6.040072658278534e-07, "loss": 1.4482108354568481, "step": 9502 }, { "epoch": 2.925207756232687, "grad_norm": 11.0, "learning_rate": 6.039424074637067e-07, "loss": 1.4160391092300415, "step": 9504 }, { "epoch": 2.9258233302554633, "grad_norm": 19.75, "learning_rate": 6.038780773995679e-07, "loss": 1.4620568752288818, "step": 9506 }, { "epoch": 2.9264389042782395, "grad_norm": 5.0, "learning_rate": 6.038142756638518e-07, "loss": 1.1898398399353027, "step": 9508 }, { "epoch": 2.9270544783010157, "grad_norm": 6.71875, "learning_rate": 6.037510022847392e-07, "loss": 1.1003707647323608, "step": 9510 }, { "epoch": 2.927670052323792, "grad_norm": 15.75, "learning_rate": 6.036882572901782e-07, "loss": 1.2336218357086182, "step": 9512 }, { "epoch": 2.928285626346568, "grad_norm": 97.0, "learning_rate": 6.036260407078835e-07, "loss": 1.258795976638794, "step": 9514 }, { "epoch": 2.9289012003693444, "grad_norm": 17.25, "learning_rate": 6.035643525653363e-07, "loss": 1.35211980342865, "step": 9516 }, { "epoch": 2.9295167743921207, "grad_norm": 15.625, "learning_rate": 6.035031928897839e-07, "loss": 1.0009329319000244, "step": 9518 }, { "epoch": 2.930132348414897, "grad_norm": 12.0625, "learning_rate": 6.03442561708241e-07, "loss": 1.1892321109771729, "step": 9520 }, { "epoch": 2.930747922437673, "grad_norm": 9.375, "learning_rate": 6.033824590474884e-07, "loss": 1.3555978536605835, "step": 9522 }, { "epoch": 2.9313634964604494, "grad_norm": 49.75, "learning_rate": 6.033228849340733e-07, "loss": 1.5087881088256836, "step": 9524 }, { "epoch": 2.9319790704832256, "grad_norm": 23.875, "learning_rate": 6.032638393943101e-07, "loss": 1.3452261686325073, "step": 9526 }, { "epoch": 2.932594644506002, "grad_norm": 13.3125, "learning_rate": 6.032053224542786e-07, "loss": 1.3585171699523926, "step": 9528 }, { "epoch": 2.933210218528778, "grad_norm": 10.1875, "learning_rate": 6.031473341398264e-07, "loss": 1.3980622291564941, "step": 9530 }, { "epoch": 2.9338257925515543, "grad_norm": 27.625, "learning_rate": 6.030898744765667e-07, "loss": 1.542459487915039, "step": 9532 }, { "epoch": 2.9344413665743305, "grad_norm": 34.25, "learning_rate": 6.030329434898795e-07, "loss": 1.4522576332092285, "step": 9534 }, { "epoch": 2.9350569405971068, "grad_norm": 18.625, "learning_rate": 6.029765412049117e-07, "loss": 1.3313522338867188, "step": 9536 }, { "epoch": 2.935672514619883, "grad_norm": 29.125, "learning_rate": 6.029206676465756e-07, "loss": 1.4171909093856812, "step": 9538 }, { "epoch": 2.936288088642659, "grad_norm": 11.5, "learning_rate": 6.02865322839551e-07, "loss": 1.6026504039764404, "step": 9540 }, { "epoch": 2.9369036626654355, "grad_norm": 3.90625, "learning_rate": 6.028105068082838e-07, "loss": 1.0158360004425049, "step": 9542 }, { "epoch": 2.9375192366882117, "grad_norm": 12.3125, "learning_rate": 6.027562195769862e-07, "loss": 1.1536140441894531, "step": 9544 }, { "epoch": 2.938134810710988, "grad_norm": 21.375, "learning_rate": 6.027024611696368e-07, "loss": 1.3445171117782593, "step": 9546 }, { "epoch": 2.938750384733764, "grad_norm": 9.625, "learning_rate": 6.02649231609981e-07, "loss": 1.2725205421447754, "step": 9548 }, { "epoch": 2.9393659587565404, "grad_norm": 32.5, "learning_rate": 6.025965309215302e-07, "loss": 1.693610429763794, "step": 9550 }, { "epoch": 2.9399815327793166, "grad_norm": 15.75, "learning_rate": 6.025443591275625e-07, "loss": 1.5363895893096924, "step": 9552 }, { "epoch": 2.940597106802093, "grad_norm": 10.1875, "learning_rate": 6.02492716251122e-07, "loss": 1.305220365524292, "step": 9554 }, { "epoch": 2.941212680824869, "grad_norm": 23.375, "learning_rate": 6.024416023150197e-07, "loss": 1.430680274963379, "step": 9556 }, { "epoch": 2.9418282548476453, "grad_norm": 13.625, "learning_rate": 6.023910173418323e-07, "loss": 1.56490159034729, "step": 9558 }, { "epoch": 2.9424438288704216, "grad_norm": 9.0, "learning_rate": 6.023409613539036e-07, "loss": 1.1676526069641113, "step": 9560 }, { "epoch": 2.943059402893198, "grad_norm": 9.6875, "learning_rate": 6.022914343733434e-07, "loss": 1.792161226272583, "step": 9562 }, { "epoch": 2.943674976915974, "grad_norm": 18.75, "learning_rate": 6.022424364220275e-07, "loss": 1.4854860305786133, "step": 9564 }, { "epoch": 2.9442905509387503, "grad_norm": 10.5, "learning_rate": 6.021939675215987e-07, "loss": 1.162688970565796, "step": 9566 }, { "epoch": 2.9449061249615265, "grad_norm": 13.6875, "learning_rate": 6.021460276934656e-07, "loss": 1.3605610132217407, "step": 9568 }, { "epoch": 2.9455216989843027, "grad_norm": 14.75, "learning_rate": 6.020986169588032e-07, "loss": 1.1231822967529297, "step": 9570 }, { "epoch": 2.946137273007079, "grad_norm": 14.9375, "learning_rate": 6.02051735338553e-07, "loss": 1.1760200262069702, "step": 9572 }, { "epoch": 2.946752847029855, "grad_norm": 11.6875, "learning_rate": 6.020053828534226e-07, "loss": 1.4490975141525269, "step": 9574 }, { "epoch": 2.9473684210526314, "grad_norm": 18.625, "learning_rate": 6.019595595238861e-07, "loss": 1.256340503692627, "step": 9576 }, { "epoch": 2.9479839950754076, "grad_norm": 13.9375, "learning_rate": 6.019142653701834e-07, "loss": 1.6551717519760132, "step": 9578 }, { "epoch": 2.9485995690981843, "grad_norm": 24.0, "learning_rate": 6.018695004123214e-07, "loss": 1.2904531955718994, "step": 9580 }, { "epoch": 2.94921514312096, "grad_norm": 35.5, "learning_rate": 6.018252646700724e-07, "loss": 0.9481006860733032, "step": 9582 }, { "epoch": 2.949830717143737, "grad_norm": 18.875, "learning_rate": 6.017815581629757e-07, "loss": 1.5369739532470703, "step": 9584 }, { "epoch": 2.9504462911665126, "grad_norm": 16.875, "learning_rate": 6.017383809103363e-07, "loss": 1.1765402555465698, "step": 9586 }, { "epoch": 2.9510618651892893, "grad_norm": 13.5625, "learning_rate": 6.01695732931226e-07, "loss": 1.171614170074463, "step": 9588 }, { "epoch": 2.951677439212065, "grad_norm": 43.25, "learning_rate": 6.01653614244482e-07, "loss": 1.760496735572815, "step": 9590 }, { "epoch": 2.9522930132348417, "grad_norm": 14.5, "learning_rate": 6.016120248687082e-07, "loss": 1.387416124343872, "step": 9592 }, { "epoch": 2.9529085872576175, "grad_norm": 9.5625, "learning_rate": 6.01570964822275e-07, "loss": 1.355863332748413, "step": 9594 }, { "epoch": 2.953524161280394, "grad_norm": 11.25, "learning_rate": 6.015304341233187e-07, "loss": 1.4700431823730469, "step": 9596 }, { "epoch": 2.95413973530317, "grad_norm": 10.1875, "learning_rate": 6.014904327897414e-07, "loss": 1.1266649961471558, "step": 9598 }, { "epoch": 2.9547553093259467, "grad_norm": 8.6875, "learning_rate": 6.01450960839212e-07, "loss": 1.219506859779358, "step": 9600 }, { "epoch": 2.9553708833487224, "grad_norm": 56.25, "learning_rate": 6.014120182891651e-07, "loss": 1.198972463607788, "step": 9602 }, { "epoch": 2.955986457371499, "grad_norm": 9.375, "learning_rate": 6.013736051568018e-07, "loss": 1.1335132122039795, "step": 9604 }, { "epoch": 2.956602031394275, "grad_norm": 9.875, "learning_rate": 6.013357214590893e-07, "loss": 1.3339592218399048, "step": 9606 }, { "epoch": 2.9572176054170516, "grad_norm": 6.28125, "learning_rate": 6.012983672127608e-07, "loss": 1.0773075819015503, "step": 9608 }, { "epoch": 2.9578331794398274, "grad_norm": 33.5, "learning_rate": 6.012615424343155e-07, "loss": 1.5172120332717896, "step": 9610 }, { "epoch": 2.958448753462604, "grad_norm": 16.625, "learning_rate": 6.012252471400194e-07, "loss": 1.3709399700164795, "step": 9612 }, { "epoch": 2.95906432748538, "grad_norm": 8.5625, "learning_rate": 6.011894813459037e-07, "loss": 1.2132893800735474, "step": 9614 }, { "epoch": 2.9596799015081565, "grad_norm": 12.0, "learning_rate": 6.011542450677664e-07, "loss": 1.3026639223098755, "step": 9616 }, { "epoch": 2.9602954755309328, "grad_norm": 14.125, "learning_rate": 6.011195383211716e-07, "loss": 1.0850412845611572, "step": 9618 }, { "epoch": 2.960911049553709, "grad_norm": 22.875, "learning_rate": 6.01085361121449e-07, "loss": 1.24189293384552, "step": 9620 }, { "epoch": 2.961526623576485, "grad_norm": 11.4375, "learning_rate": 6.010517134836948e-07, "loss": 1.3805148601531982, "step": 9622 }, { "epoch": 2.9621421975992615, "grad_norm": 17.0, "learning_rate": 6.010185954227712e-07, "loss": 1.5809800624847412, "step": 9624 }, { "epoch": 2.9627577716220377, "grad_norm": 19.625, "learning_rate": 6.009860069533068e-07, "loss": 1.1562623977661133, "step": 9626 }, { "epoch": 2.963373345644814, "grad_norm": 5.6875, "learning_rate": 6.009539480896955e-07, "loss": 0.9422524571418762, "step": 9628 }, { "epoch": 2.96398891966759, "grad_norm": 16.375, "learning_rate": 6.009224188460978e-07, "loss": 1.0297799110412598, "step": 9630 }, { "epoch": 2.9646044936903664, "grad_norm": 17.25, "learning_rate": 6.008914192364404e-07, "loss": 1.4425406455993652, "step": 9632 }, { "epoch": 2.9652200677131426, "grad_norm": 7.46875, "learning_rate": 6.008609492744159e-07, "loss": 1.2678622007369995, "step": 9634 }, { "epoch": 2.965835641735919, "grad_norm": 12.125, "learning_rate": 6.008310089734825e-07, "loss": 1.4951027631759644, "step": 9636 }, { "epoch": 2.966451215758695, "grad_norm": 23.875, "learning_rate": 6.008015983468653e-07, "loss": 1.4566600322723389, "step": 9638 }, { "epoch": 2.9670667897814713, "grad_norm": 7.875, "learning_rate": 6.007727174075549e-07, "loss": 1.620081901550293, "step": 9640 }, { "epoch": 2.9676823638042475, "grad_norm": 80.0, "learning_rate": 6.007443661683082e-07, "loss": 1.240918755531311, "step": 9642 }, { "epoch": 2.968297937827024, "grad_norm": 11.1875, "learning_rate": 6.007165446416476e-07, "loss": 1.126937747001648, "step": 9644 }, { "epoch": 2.9689135118498, "grad_norm": 10.125, "learning_rate": 6.006892528398622e-07, "loss": 1.3524675369262695, "step": 9646 }, { "epoch": 2.9695290858725762, "grad_norm": 35.25, "learning_rate": 6.006624907750065e-07, "loss": 1.562439203262329, "step": 9648 }, { "epoch": 2.9701446598953525, "grad_norm": 32.0, "learning_rate": 6.006362584589017e-07, "loss": 1.5702564716339111, "step": 9650 }, { "epoch": 2.9707602339181287, "grad_norm": 15.625, "learning_rate": 6.006105559031345e-07, "loss": 1.7766485214233398, "step": 9652 }, { "epoch": 2.971375807940905, "grad_norm": 13.5625, "learning_rate": 6.005853831190577e-07, "loss": 1.5016021728515625, "step": 9654 }, { "epoch": 2.971991381963681, "grad_norm": 8.5, "learning_rate": 6.005607401177901e-07, "loss": 1.2089016437530518, "step": 9656 }, { "epoch": 2.9726069559864574, "grad_norm": 5.09375, "learning_rate": 6.005366269102167e-07, "loss": 1.0317680835723877, "step": 9658 }, { "epoch": 2.9732225300092336, "grad_norm": 18.5, "learning_rate": 6.005130435069883e-07, "loss": 1.2018183469772339, "step": 9660 }, { "epoch": 2.97383810403201, "grad_norm": 17.75, "learning_rate": 6.004899899185216e-07, "loss": 1.211350917816162, "step": 9662 }, { "epoch": 2.974453678054786, "grad_norm": 21.0, "learning_rate": 6.004674661549996e-07, "loss": 1.4988772869110107, "step": 9664 }, { "epoch": 2.9750692520775623, "grad_norm": 13.75, "learning_rate": 6.004454722263708e-07, "loss": 1.623139500617981, "step": 9666 }, { "epoch": 2.9756848261003386, "grad_norm": 24.75, "learning_rate": 6.004240081423502e-07, "loss": 1.3296325206756592, "step": 9668 }, { "epoch": 2.976300400123115, "grad_norm": 15.875, "learning_rate": 6.004030739124183e-07, "loss": 1.125887155532837, "step": 9670 }, { "epoch": 2.976915974145891, "grad_norm": 21.75, "learning_rate": 6.003826695458218e-07, "loss": 1.6624093055725098, "step": 9672 }, { "epoch": 2.9775315481686673, "grad_norm": 48.25, "learning_rate": 6.003627950515737e-07, "loss": 1.484586238861084, "step": 9674 }, { "epoch": 2.9781471221914435, "grad_norm": 11.125, "learning_rate": 6.003434504384521e-07, "loss": 1.4605937004089355, "step": 9676 }, { "epoch": 2.9787626962142197, "grad_norm": 41.5, "learning_rate": 6.003246357150016e-07, "loss": 1.7345576286315918, "step": 9678 }, { "epoch": 2.979378270236996, "grad_norm": 17.5, "learning_rate": 6.00306350889533e-07, "loss": 1.9778789281845093, "step": 9680 }, { "epoch": 2.979993844259772, "grad_norm": 35.5, "learning_rate": 6.002885959701225e-07, "loss": 1.897291660308838, "step": 9682 }, { "epoch": 2.9806094182825484, "grad_norm": 25.875, "learning_rate": 6.002713709646125e-07, "loss": 1.3435924053192139, "step": 9684 }, { "epoch": 2.9812249923053247, "grad_norm": 23.5, "learning_rate": 6.002546758806114e-07, "loss": 1.4819591045379639, "step": 9686 }, { "epoch": 2.981840566328101, "grad_norm": 12.375, "learning_rate": 6.002385107254931e-07, "loss": 1.3283945322036743, "step": 9688 }, { "epoch": 2.982456140350877, "grad_norm": 13.5, "learning_rate": 6.002228755063982e-07, "loss": 0.7335997819900513, "step": 9690 }, { "epoch": 2.9830717143736534, "grad_norm": 21.0, "learning_rate": 6.002077702302326e-07, "loss": 1.5746386051177979, "step": 9692 }, { "epoch": 2.9836872883964296, "grad_norm": 34.75, "learning_rate": 6.001931949036683e-07, "loss": 1.1658272743225098, "step": 9694 }, { "epoch": 2.984302862419206, "grad_norm": 7.90625, "learning_rate": 6.001791495331432e-07, "loss": 1.0803534984588623, "step": 9696 }, { "epoch": 2.984918436441982, "grad_norm": 12.625, "learning_rate": 6.001656341248613e-07, "loss": 1.301381230354309, "step": 9698 }, { "epoch": 2.9855340104647583, "grad_norm": 21.25, "learning_rate": 6.001526486847923e-07, "loss": 1.188427209854126, "step": 9700 }, { "epoch": 2.9861495844875345, "grad_norm": 7.90625, "learning_rate": 6.00140193218672e-07, "loss": 0.9816202521324158, "step": 9702 }, { "epoch": 2.9867651585103108, "grad_norm": 19.25, "learning_rate": 6.001282677320017e-07, "loss": 1.194831132888794, "step": 9704 }, { "epoch": 2.987380732533087, "grad_norm": 14.25, "learning_rate": 6.001168722300492e-07, "loss": 1.100862979888916, "step": 9706 }, { "epoch": 2.9879963065558632, "grad_norm": 9.9375, "learning_rate": 6.001060067178477e-07, "loss": 1.4596848487854004, "step": 9708 }, { "epoch": 2.9886118805786395, "grad_norm": 11.5625, "learning_rate": 6.000956712001966e-07, "loss": 1.4575676918029785, "step": 9710 }, { "epoch": 2.9892274546014157, "grad_norm": 3.921875, "learning_rate": 6.000858656816612e-07, "loss": 1.2540020942687988, "step": 9712 }, { "epoch": 2.989843028624192, "grad_norm": 16.0, "learning_rate": 6.000765901665723e-07, "loss": 1.4371435642242432, "step": 9714 }, { "epoch": 2.990458602646968, "grad_norm": 9.375, "learning_rate": 6.000678446590272e-07, "loss": 1.2159063816070557, "step": 9716 }, { "epoch": 2.9910741766697444, "grad_norm": 12.25, "learning_rate": 6.000596291628887e-07, "loss": 1.1678316593170166, "step": 9718 }, { "epoch": 2.9916897506925206, "grad_norm": 23.25, "learning_rate": 6.000519436817857e-07, "loss": 1.6382416486740112, "step": 9720 }, { "epoch": 2.992305324715297, "grad_norm": 24.625, "learning_rate": 6.000447882191127e-07, "loss": 1.2942144870758057, "step": 9722 }, { "epoch": 2.992920898738073, "grad_norm": 11.125, "learning_rate": 6.000381627780304e-07, "loss": 1.5667628049850464, "step": 9724 }, { "epoch": 2.9935364727608493, "grad_norm": 15.5625, "learning_rate": 6.000320673614653e-07, "loss": 1.5609714984893799, "step": 9726 }, { "epoch": 2.9941520467836256, "grad_norm": 18.625, "learning_rate": 6.000265019721097e-07, "loss": 1.583195686340332, "step": 9728 }, { "epoch": 2.9947676208064022, "grad_norm": 17.75, "learning_rate": 6.000214666124217e-07, "loss": 1.3528969287872314, "step": 9730 }, { "epoch": 2.995383194829178, "grad_norm": 27.5, "learning_rate": 6.000169612846257e-07, "loss": 0.8428146839141846, "step": 9732 }, { "epoch": 2.9959987688519547, "grad_norm": 8.5, "learning_rate": 6.000129859907115e-07, "loss": 0.9794246554374695, "step": 9734 }, { "epoch": 2.9966143428747305, "grad_norm": 12.125, "learning_rate": 6.00009540732435e-07, "loss": 0.9260591864585876, "step": 9736 }, { "epoch": 2.997229916897507, "grad_norm": 7.28125, "learning_rate": 6.000066255113181e-07, "loss": 0.8063700795173645, "step": 9738 }, { "epoch": 2.997845490920283, "grad_norm": 25.875, "learning_rate": 6.000042403286483e-07, "loss": 1.2744035720825195, "step": 9740 }, { "epoch": 2.9984610649430596, "grad_norm": 14.9375, "learning_rate": 6.000023851854793e-07, "loss": 1.2116272449493408, "step": 9742 }, { "epoch": 2.9990766389658354, "grad_norm": 12.125, "learning_rate": 6.000010600826304e-07, "loss": 1.378058671951294, "step": 9744 }, { "epoch": 2.999692212988612, "grad_norm": 14.6875, "learning_rate": 6.00000265020687e-07, "loss": 1.3891949653625488, "step": 9746 }, { "epoch": 3.0, "step": 9747, "total_flos": 3.8653261653884273e+18, "train_loss": 1.3544263215239651, "train_runtime": 26899.9771, "train_samples_per_second": 1.449, "train_steps_per_second": 0.362 } ], "logging_steps": 2, "max_steps": 9747, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 9999999, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.8653261653884273e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }