{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 20922, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00014338973329509606, "grad_norm": 6.028634548187256, "learning_rate": 4.777830864787387e-09, "loss": 0.7763, "step": 1 }, { "epoch": 0.00028677946659019213, "grad_norm": 6.167516231536865, "learning_rate": 9.555661729574773e-09, "loss": 0.7982, "step": 2 }, { "epoch": 0.0004301691998852882, "grad_norm": 5.843057155609131, "learning_rate": 1.4333492594362162e-08, "loss": 0.7715, "step": 3 }, { "epoch": 0.0005735589331803843, "grad_norm": 5.952945709228516, "learning_rate": 1.9111323459149547e-08, "loss": 0.7926, "step": 4 }, { "epoch": 0.0007169486664754804, "grad_norm": 5.992730617523193, "learning_rate": 2.3889154323936937e-08, "loss": 0.7802, "step": 5 }, { "epoch": 0.0008603383997705764, "grad_norm": 5.986807823181152, "learning_rate": 2.8666985188724324e-08, "loss": 0.7863, "step": 6 }, { "epoch": 0.0010037281330656726, "grad_norm": 6.379634380340576, "learning_rate": 3.344481605351171e-08, "loss": 0.8187, "step": 7 }, { "epoch": 0.0011471178663607685, "grad_norm": 6.017271518707275, "learning_rate": 3.8222646918299094e-08, "loss": 0.7858, "step": 8 }, { "epoch": 0.0012905075996558647, "grad_norm": 6.156959056854248, "learning_rate": 4.3000477783086484e-08, "loss": 0.7863, "step": 9 }, { "epoch": 0.0014338973329509608, "grad_norm": 6.039450645446777, "learning_rate": 4.7778308647873874e-08, "loss": 0.7853, "step": 10 }, { "epoch": 0.0015772870662460567, "grad_norm": 5.861743450164795, "learning_rate": 5.255613951266126e-08, "loss": 0.7668, "step": 11 }, { "epoch": 0.0017206767995411529, "grad_norm": 5.980361461639404, "learning_rate": 5.733397037744865e-08, "loss": 0.7859, "step": 12 }, { "epoch": 0.001864066532836249, "grad_norm": 5.9112396240234375, "learning_rate": 6.211180124223603e-08, "loss": 0.7631, "step": 13 }, { "epoch": 0.002007456266131345, "grad_norm": 6.006254196166992, "learning_rate": 6.688963210702342e-08, "loss": 0.7812, "step": 14 }, { "epoch": 0.002150845999426441, "grad_norm": 6.352158069610596, "learning_rate": 7.166746297181081e-08, "loss": 0.8052, "step": 15 }, { "epoch": 0.002294235732721537, "grad_norm": 5.985874176025391, "learning_rate": 7.644529383659819e-08, "loss": 0.7911, "step": 16 }, { "epoch": 0.0024376254660166334, "grad_norm": 6.068044662475586, "learning_rate": 8.122312470138559e-08, "loss": 0.7752, "step": 17 }, { "epoch": 0.0025810151993117293, "grad_norm": 6.030412673950195, "learning_rate": 8.600095556617297e-08, "loss": 0.8179, "step": 18 }, { "epoch": 0.0027244049326068252, "grad_norm": 5.8618245124816895, "learning_rate": 9.077878643096034e-08, "loss": 0.7713, "step": 19 }, { "epoch": 0.0028677946659019216, "grad_norm": 5.959840297698975, "learning_rate": 9.555661729574775e-08, "loss": 0.7877, "step": 20 }, { "epoch": 0.0030111843991970175, "grad_norm": 5.938329219818115, "learning_rate": 1.0033444816053512e-07, "loss": 0.7819, "step": 21 }, { "epoch": 0.0031545741324921135, "grad_norm": 5.683435916900635, "learning_rate": 1.0511227902532251e-07, "loss": 0.7652, "step": 22 }, { "epoch": 0.00329796386578721, "grad_norm": 5.829231262207031, "learning_rate": 1.098901098901099e-07, "loss": 0.7799, "step": 23 }, { "epoch": 0.0034413535990823058, "grad_norm": 5.755517482757568, "learning_rate": 1.146679407548973e-07, "loss": 0.7822, "step": 24 }, { "epoch": 0.0035847433323774017, "grad_norm": 5.995002269744873, "learning_rate": 1.1944577161968466e-07, "loss": 0.7917, "step": 25 }, { "epoch": 0.003728133065672498, "grad_norm": 5.9278388023376465, "learning_rate": 1.2422360248447206e-07, "loss": 0.7657, "step": 26 }, { "epoch": 0.003871522798967594, "grad_norm": 5.855252742767334, "learning_rate": 1.2900143334925944e-07, "loss": 0.7651, "step": 27 }, { "epoch": 0.00401491253226269, "grad_norm": 6.124769687652588, "learning_rate": 1.3377926421404684e-07, "loss": 0.7945, "step": 28 }, { "epoch": 0.004158302265557786, "grad_norm": 5.980734825134277, "learning_rate": 1.3855709507883422e-07, "loss": 0.7867, "step": 29 }, { "epoch": 0.004301691998852882, "grad_norm": 6.014309883117676, "learning_rate": 1.4333492594362162e-07, "loss": 0.8021, "step": 30 }, { "epoch": 0.0044450817321479786, "grad_norm": 5.626038551330566, "learning_rate": 1.48112756808409e-07, "loss": 0.7765, "step": 31 }, { "epoch": 0.004588471465443074, "grad_norm": 5.797970771789551, "learning_rate": 1.5289058767319638e-07, "loss": 0.7887, "step": 32 }, { "epoch": 0.00473186119873817, "grad_norm": 5.808217525482178, "learning_rate": 1.5766841853798378e-07, "loss": 0.7769, "step": 33 }, { "epoch": 0.004875250932033267, "grad_norm": 5.694373607635498, "learning_rate": 1.6244624940277118e-07, "loss": 0.7891, "step": 34 }, { "epoch": 0.005018640665328362, "grad_norm": 5.553695201873779, "learning_rate": 1.6722408026755853e-07, "loss": 0.7575, "step": 35 }, { "epoch": 0.005162030398623459, "grad_norm": 5.700619697570801, "learning_rate": 1.7200191113234594e-07, "loss": 0.7665, "step": 36 }, { "epoch": 0.005305420131918555, "grad_norm": 5.355234146118164, "learning_rate": 1.7677974199713334e-07, "loss": 0.7329, "step": 37 }, { "epoch": 0.0054488098652136505, "grad_norm": 5.763507843017578, "learning_rate": 1.815575728619207e-07, "loss": 0.7646, "step": 38 }, { "epoch": 0.005592199598508747, "grad_norm": 5.752255439758301, "learning_rate": 1.863354037267081e-07, "loss": 0.7966, "step": 39 }, { "epoch": 0.005735589331803843, "grad_norm": 5.678107261657715, "learning_rate": 1.911132345914955e-07, "loss": 0.7812, "step": 40 }, { "epoch": 0.005878979065098939, "grad_norm": 5.69219970703125, "learning_rate": 1.9589106545628285e-07, "loss": 0.7924, "step": 41 }, { "epoch": 0.006022368798394035, "grad_norm": 4.9842047691345215, "learning_rate": 2.0066889632107025e-07, "loss": 0.7379, "step": 42 }, { "epoch": 0.0061657585316891314, "grad_norm": 4.889655590057373, "learning_rate": 2.0544672718585765e-07, "loss": 0.7532, "step": 43 }, { "epoch": 0.006309148264984227, "grad_norm": 4.873232364654541, "learning_rate": 2.1022455805064503e-07, "loss": 0.7431, "step": 44 }, { "epoch": 0.006452537998279323, "grad_norm": 4.532863616943359, "learning_rate": 2.150023889154324e-07, "loss": 0.724, "step": 45 }, { "epoch": 0.00659592773157442, "grad_norm": 4.627618789672852, "learning_rate": 2.197802197802198e-07, "loss": 0.7078, "step": 46 }, { "epoch": 0.006739317464869515, "grad_norm": 4.583193302154541, "learning_rate": 2.2455805064500719e-07, "loss": 0.7289, "step": 47 }, { "epoch": 0.0068827071981646115, "grad_norm": 4.680521488189697, "learning_rate": 2.293358815097946e-07, "loss": 0.7185, "step": 48 }, { "epoch": 0.007026096931459708, "grad_norm": 4.618570327758789, "learning_rate": 2.3411371237458194e-07, "loss": 0.7426, "step": 49 }, { "epoch": 0.007169486664754803, "grad_norm": 4.405033111572266, "learning_rate": 2.388915432393693e-07, "loss": 0.7114, "step": 50 }, { "epoch": 0.0073128763980499, "grad_norm": 4.341869354248047, "learning_rate": 2.4366937410415675e-07, "loss": 0.7225, "step": 51 }, { "epoch": 0.007456266131344996, "grad_norm": 4.413589000701904, "learning_rate": 2.484472049689441e-07, "loss": 0.7204, "step": 52 }, { "epoch": 0.007599655864640092, "grad_norm": 4.486782073974609, "learning_rate": 2.532250358337315e-07, "loss": 0.7343, "step": 53 }, { "epoch": 0.007743045597935188, "grad_norm": 4.2955002784729, "learning_rate": 2.580028666985189e-07, "loss": 0.7224, "step": 54 }, { "epoch": 0.007886435331230283, "grad_norm": 4.477858543395996, "learning_rate": 2.6278069756330625e-07, "loss": 0.7157, "step": 55 }, { "epoch": 0.00802982506452538, "grad_norm": 4.124090194702148, "learning_rate": 2.675585284280937e-07, "loss": 0.7101, "step": 56 }, { "epoch": 0.008173214797820476, "grad_norm": 3.9871675968170166, "learning_rate": 2.7233635929288106e-07, "loss": 0.7191, "step": 57 }, { "epoch": 0.008316604531115572, "grad_norm": 3.3673300743103027, "learning_rate": 2.7711419015766844e-07, "loss": 0.7128, "step": 58 }, { "epoch": 0.008459994264410669, "grad_norm": 2.9155972003936768, "learning_rate": 2.818920210224558e-07, "loss": 0.6904, "step": 59 }, { "epoch": 0.008603383997705764, "grad_norm": 2.666625738143921, "learning_rate": 2.8666985188724324e-07, "loss": 0.6498, "step": 60 }, { "epoch": 0.00874677373100086, "grad_norm": 2.560509204864502, "learning_rate": 2.9144768275203057e-07, "loss": 0.6828, "step": 61 }, { "epoch": 0.008890163464295957, "grad_norm": 2.581599473953247, "learning_rate": 2.96225513616818e-07, "loss": 0.6664, "step": 62 }, { "epoch": 0.009033553197591053, "grad_norm": 2.394120931625366, "learning_rate": 3.010033444816054e-07, "loss": 0.6616, "step": 63 }, { "epoch": 0.009176942930886148, "grad_norm": 2.3619909286499023, "learning_rate": 3.0578117534639275e-07, "loss": 0.6695, "step": 64 }, { "epoch": 0.009320332664181245, "grad_norm": 2.5576770305633545, "learning_rate": 3.1055900621118013e-07, "loss": 0.6923, "step": 65 }, { "epoch": 0.00946372239747634, "grad_norm": 2.4052374362945557, "learning_rate": 3.1533683707596756e-07, "loss": 0.6571, "step": 66 }, { "epoch": 0.009607112130771436, "grad_norm": 2.184839963912964, "learning_rate": 3.2011466794075493e-07, "loss": 0.6653, "step": 67 }, { "epoch": 0.009750501864066534, "grad_norm": 2.406169891357422, "learning_rate": 3.2489249880554236e-07, "loss": 0.6761, "step": 68 }, { "epoch": 0.009893891597361629, "grad_norm": 2.271517515182495, "learning_rate": 3.296703296703297e-07, "loss": 0.6691, "step": 69 }, { "epoch": 0.010037281330656725, "grad_norm": 2.377732753753662, "learning_rate": 3.3444816053511706e-07, "loss": 0.6759, "step": 70 }, { "epoch": 0.010180671063951822, "grad_norm": 2.1668784618377686, "learning_rate": 3.392259913999045e-07, "loss": 0.6539, "step": 71 }, { "epoch": 0.010324060797246917, "grad_norm": 2.105522871017456, "learning_rate": 3.4400382226469187e-07, "loss": 0.6883, "step": 72 }, { "epoch": 0.010467450530542013, "grad_norm": 2.151083469390869, "learning_rate": 3.4878165312947925e-07, "loss": 0.6829, "step": 73 }, { "epoch": 0.01061084026383711, "grad_norm": 1.967842936515808, "learning_rate": 3.535594839942667e-07, "loss": 0.6333, "step": 74 }, { "epoch": 0.010754229997132205, "grad_norm": 1.8523740768432617, "learning_rate": 3.5833731485905405e-07, "loss": 0.6389, "step": 75 }, { "epoch": 0.010897619730427301, "grad_norm": 1.8196122646331787, "learning_rate": 3.631151457238414e-07, "loss": 0.6563, "step": 76 }, { "epoch": 0.011041009463722398, "grad_norm": 1.7281718254089355, "learning_rate": 3.678929765886288e-07, "loss": 0.6562, "step": 77 }, { "epoch": 0.011184399197017494, "grad_norm": 1.5873019695281982, "learning_rate": 3.726708074534162e-07, "loss": 0.6463, "step": 78 }, { "epoch": 0.01132778893031259, "grad_norm": 1.4941132068634033, "learning_rate": 3.7744863831820356e-07, "loss": 0.6385, "step": 79 }, { "epoch": 0.011471178663607686, "grad_norm": 1.4701769351959229, "learning_rate": 3.82226469182991e-07, "loss": 0.6301, "step": 80 }, { "epoch": 0.011614568396902782, "grad_norm": 1.4356547594070435, "learning_rate": 3.8700430004777837e-07, "loss": 0.6193, "step": 81 }, { "epoch": 0.011757958130197877, "grad_norm": 1.5084929466247559, "learning_rate": 3.917821309125657e-07, "loss": 0.6166, "step": 82 }, { "epoch": 0.011901347863492975, "grad_norm": 1.5354865789413452, "learning_rate": 3.965599617773531e-07, "loss": 0.64, "step": 83 }, { "epoch": 0.01204473759678807, "grad_norm": 1.5567268133163452, "learning_rate": 4.013377926421405e-07, "loss": 0.6157, "step": 84 }, { "epoch": 0.012188127330083166, "grad_norm": 1.4497002363204956, "learning_rate": 4.061156235069279e-07, "loss": 0.5956, "step": 85 }, { "epoch": 0.012331517063378263, "grad_norm": 1.4199219942092896, "learning_rate": 4.108934543717153e-07, "loss": 0.6257, "step": 86 }, { "epoch": 0.012474906796673358, "grad_norm": 1.454196572303772, "learning_rate": 4.156712852365027e-07, "loss": 0.6207, "step": 87 }, { "epoch": 0.012618296529968454, "grad_norm": 1.3367615938186646, "learning_rate": 4.2044911610129006e-07, "loss": 0.6197, "step": 88 }, { "epoch": 0.012761686263263551, "grad_norm": 1.2546721696853638, "learning_rate": 4.252269469660775e-07, "loss": 0.5987, "step": 89 }, { "epoch": 0.012905075996558647, "grad_norm": 1.2531626224517822, "learning_rate": 4.300047778308648e-07, "loss": 0.6225, "step": 90 }, { "epoch": 0.013048465729853742, "grad_norm": 1.2551312446594238, "learning_rate": 4.347826086956522e-07, "loss": 0.6003, "step": 91 }, { "epoch": 0.01319185546314884, "grad_norm": 1.1666094064712524, "learning_rate": 4.395604395604396e-07, "loss": 0.6021, "step": 92 }, { "epoch": 0.013335245196443935, "grad_norm": 1.1166093349456787, "learning_rate": 4.44338270425227e-07, "loss": 0.5915, "step": 93 }, { "epoch": 0.01347863492973903, "grad_norm": 1.088452935218811, "learning_rate": 4.4911610129001437e-07, "loss": 0.5677, "step": 94 }, { "epoch": 0.013622024663034128, "grad_norm": 1.0351313352584839, "learning_rate": 4.538939321548018e-07, "loss": 0.6216, "step": 95 }, { "epoch": 0.013765414396329223, "grad_norm": 1.0149279832839966, "learning_rate": 4.586717630195892e-07, "loss": 0.5635, "step": 96 }, { "epoch": 0.013908804129624319, "grad_norm": 1.013447880744934, "learning_rate": 4.634495938843765e-07, "loss": 0.592, "step": 97 }, { "epoch": 0.014052193862919416, "grad_norm": 1.0440495014190674, "learning_rate": 4.682274247491639e-07, "loss": 0.5674, "step": 98 }, { "epoch": 0.014195583596214511, "grad_norm": 0.9620437026023865, "learning_rate": 4.730052556139513e-07, "loss": 0.5675, "step": 99 }, { "epoch": 0.014338973329509607, "grad_norm": 0.8889033198356628, "learning_rate": 4.777830864787386e-07, "loss": 0.5662, "step": 100 }, { "epoch": 0.014482363062804704, "grad_norm": 0.8752585649490356, "learning_rate": 4.82560917343526e-07, "loss": 0.5595, "step": 101 }, { "epoch": 0.0146257527960998, "grad_norm": 0.8275120258331299, "learning_rate": 4.873387482083135e-07, "loss": 0.528, "step": 102 }, { "epoch": 0.014769142529394895, "grad_norm": 0.8541390299797058, "learning_rate": 4.921165790731009e-07, "loss": 0.5632, "step": 103 }, { "epoch": 0.014912532262689992, "grad_norm": 0.803570568561554, "learning_rate": 4.968944099378882e-07, "loss": 0.5481, "step": 104 }, { "epoch": 0.015055921995985088, "grad_norm": 0.7431790828704834, "learning_rate": 5.016722408026756e-07, "loss": 0.5554, "step": 105 }, { "epoch": 0.015199311729280183, "grad_norm": 0.69652259349823, "learning_rate": 5.06450071667463e-07, "loss": 0.5543, "step": 106 }, { "epoch": 0.01534270146257528, "grad_norm": 0.69214928150177, "learning_rate": 5.112279025322504e-07, "loss": 0.5554, "step": 107 }, { "epoch": 0.015486091195870376, "grad_norm": 0.6878805756568909, "learning_rate": 5.160057333970378e-07, "loss": 0.5546, "step": 108 }, { "epoch": 0.01562948092916547, "grad_norm": 0.6724877953529358, "learning_rate": 5.207835642618251e-07, "loss": 0.5703, "step": 109 }, { "epoch": 0.015772870662460567, "grad_norm": 0.6325406432151794, "learning_rate": 5.255613951266125e-07, "loss": 0.5446, "step": 110 }, { "epoch": 0.015916260395755662, "grad_norm": 0.615695595741272, "learning_rate": 5.303392259914e-07, "loss": 0.544, "step": 111 }, { "epoch": 0.01605965012905076, "grad_norm": 0.6021168828010559, "learning_rate": 5.351170568561874e-07, "loss": 0.5406, "step": 112 }, { "epoch": 0.016203039862345857, "grad_norm": 0.618527889251709, "learning_rate": 5.398948877209746e-07, "loss": 0.5153, "step": 113 }, { "epoch": 0.016346429595640952, "grad_norm": 0.5751671195030212, "learning_rate": 5.446727185857621e-07, "loss": 0.5484, "step": 114 }, { "epoch": 0.016489819328936048, "grad_norm": 0.5957533717155457, "learning_rate": 5.494505494505495e-07, "loss": 0.5231, "step": 115 }, { "epoch": 0.016633209062231143, "grad_norm": 0.6097672581672668, "learning_rate": 5.542283803153369e-07, "loss": 0.5382, "step": 116 }, { "epoch": 0.01677659879552624, "grad_norm": 0.6183931827545166, "learning_rate": 5.590062111801243e-07, "loss": 0.5469, "step": 117 }, { "epoch": 0.016919988528821338, "grad_norm": 0.5669873356819153, "learning_rate": 5.637840420449116e-07, "loss": 0.517, "step": 118 }, { "epoch": 0.017063378262116433, "grad_norm": 0.5960133075714111, "learning_rate": 5.68561872909699e-07, "loss": 0.5288, "step": 119 }, { "epoch": 0.01720676799541153, "grad_norm": 0.5382276773452759, "learning_rate": 5.733397037744865e-07, "loss": 0.5314, "step": 120 }, { "epoch": 0.017350157728706624, "grad_norm": 0.5399843454360962, "learning_rate": 5.781175346392738e-07, "loss": 0.491, "step": 121 }, { "epoch": 0.01749354746200172, "grad_norm": 0.5232928991317749, "learning_rate": 5.828953655040611e-07, "loss": 0.555, "step": 122 }, { "epoch": 0.017636937195296815, "grad_norm": 0.5447924733161926, "learning_rate": 5.876731963688486e-07, "loss": 0.529, "step": 123 }, { "epoch": 0.017780326928591914, "grad_norm": 0.5111520886421204, "learning_rate": 5.92451027233636e-07, "loss": 0.5187, "step": 124 }, { "epoch": 0.01792371666188701, "grad_norm": 0.5126971006393433, "learning_rate": 5.972288580984234e-07, "loss": 0.5248, "step": 125 }, { "epoch": 0.018067106395182105, "grad_norm": 0.48042604327201843, "learning_rate": 6.020066889632107e-07, "loss": 0.5087, "step": 126 }, { "epoch": 0.0182104961284772, "grad_norm": 0.5316042900085449, "learning_rate": 6.067845198279981e-07, "loss": 0.5232, "step": 127 }, { "epoch": 0.018353885861772296, "grad_norm": 0.5057587027549744, "learning_rate": 6.115623506927855e-07, "loss": 0.5291, "step": 128 }, { "epoch": 0.01849727559506739, "grad_norm": 0.48996207118034363, "learning_rate": 6.163401815575729e-07, "loss": 0.5171, "step": 129 }, { "epoch": 0.01864066532836249, "grad_norm": 0.5271071195602417, "learning_rate": 6.211180124223603e-07, "loss": 0.5378, "step": 130 }, { "epoch": 0.018784055061657586, "grad_norm": 0.4905247688293457, "learning_rate": 6.258958432871477e-07, "loss": 0.5146, "step": 131 }, { "epoch": 0.01892744479495268, "grad_norm": 0.4546985924243927, "learning_rate": 6.306736741519351e-07, "loss": 0.4934, "step": 132 }, { "epoch": 0.019070834528247777, "grad_norm": 0.4815942049026489, "learning_rate": 6.354515050167225e-07, "loss": 0.5073, "step": 133 }, { "epoch": 0.019214224261542873, "grad_norm": 0.5150583982467651, "learning_rate": 6.402293358815099e-07, "loss": 0.5009, "step": 134 }, { "epoch": 0.019357613994837968, "grad_norm": 0.519619882106781, "learning_rate": 6.450071667462971e-07, "loss": 0.5411, "step": 135 }, { "epoch": 0.019501003728133067, "grad_norm": 0.4960572421550751, "learning_rate": 6.497849976110847e-07, "loss": 0.5259, "step": 136 }, { "epoch": 0.019644393461428163, "grad_norm": 0.4669974446296692, "learning_rate": 6.54562828475872e-07, "loss": 0.5106, "step": 137 }, { "epoch": 0.019787783194723258, "grad_norm": 0.4946674704551697, "learning_rate": 6.593406593406594e-07, "loss": 0.4953, "step": 138 }, { "epoch": 0.019931172928018354, "grad_norm": 0.4778884947299957, "learning_rate": 6.641184902054468e-07, "loss": 0.5182, "step": 139 }, { "epoch": 0.02007456266131345, "grad_norm": 0.4686369299888611, "learning_rate": 6.688963210702341e-07, "loss": 0.4917, "step": 140 }, { "epoch": 0.020217952394608545, "grad_norm": 0.5070465803146362, "learning_rate": 6.736741519350215e-07, "loss": 0.5311, "step": 141 }, { "epoch": 0.020361342127903644, "grad_norm": 0.5422459244728088, "learning_rate": 6.78451982799809e-07, "loss": 0.5111, "step": 142 }, { "epoch": 0.02050473186119874, "grad_norm": 0.46077972650527954, "learning_rate": 6.832298136645964e-07, "loss": 0.5054, "step": 143 }, { "epoch": 0.020648121594493835, "grad_norm": 0.4735383689403534, "learning_rate": 6.880076445293837e-07, "loss": 0.5109, "step": 144 }, { "epoch": 0.02079151132778893, "grad_norm": 0.4796519875526428, "learning_rate": 6.927854753941711e-07, "loss": 0.5231, "step": 145 }, { "epoch": 0.020934901061084026, "grad_norm": 0.4753569960594177, "learning_rate": 6.975633062589585e-07, "loss": 0.5021, "step": 146 }, { "epoch": 0.02107829079437912, "grad_norm": 0.4694702625274658, "learning_rate": 7.023411371237459e-07, "loss": 0.5069, "step": 147 }, { "epoch": 0.02122168052767422, "grad_norm": 0.4532809853553772, "learning_rate": 7.071189679885334e-07, "loss": 0.4929, "step": 148 }, { "epoch": 0.021365070260969315, "grad_norm": 0.5124008059501648, "learning_rate": 7.118967988533207e-07, "loss": 0.5066, "step": 149 }, { "epoch": 0.02150845999426441, "grad_norm": 0.4257228970527649, "learning_rate": 7.166746297181081e-07, "loss": 0.5075, "step": 150 }, { "epoch": 0.021651849727559506, "grad_norm": 0.46333765983581543, "learning_rate": 7.214524605828954e-07, "loss": 0.508, "step": 151 }, { "epoch": 0.021795239460854602, "grad_norm": 0.5027212500572205, "learning_rate": 7.262302914476828e-07, "loss": 0.5185, "step": 152 }, { "epoch": 0.021938629194149697, "grad_norm": 0.4431443512439728, "learning_rate": 7.310081223124701e-07, "loss": 0.4977, "step": 153 }, { "epoch": 0.022082018927444796, "grad_norm": 0.44776979088783264, "learning_rate": 7.357859531772576e-07, "loss": 0.5039, "step": 154 }, { "epoch": 0.022225408660739892, "grad_norm": 0.4554808735847473, "learning_rate": 7.40563784042045e-07, "loss": 0.5017, "step": 155 }, { "epoch": 0.022368798394034987, "grad_norm": 0.4229060709476471, "learning_rate": 7.453416149068324e-07, "loss": 0.4864, "step": 156 }, { "epoch": 0.022512188127330083, "grad_norm": 0.4470514953136444, "learning_rate": 7.501194457716197e-07, "loss": 0.5099, "step": 157 }, { "epoch": 0.02265557786062518, "grad_norm": 0.4945819079875946, "learning_rate": 7.548972766364071e-07, "loss": 0.5034, "step": 158 }, { "epoch": 0.022798967593920274, "grad_norm": 0.4704129099845886, "learning_rate": 7.596751075011945e-07, "loss": 0.5018, "step": 159 }, { "epoch": 0.022942357327215373, "grad_norm": 0.4608142375946045, "learning_rate": 7.64452938365982e-07, "loss": 0.4687, "step": 160 }, { "epoch": 0.02308574706051047, "grad_norm": 0.4644317626953125, "learning_rate": 7.692307692307694e-07, "loss": 0.5248, "step": 161 }, { "epoch": 0.023229136793805564, "grad_norm": 0.4665786027908325, "learning_rate": 7.740086000955567e-07, "loss": 0.4725, "step": 162 }, { "epoch": 0.02337252652710066, "grad_norm": 0.415009081363678, "learning_rate": 7.787864309603441e-07, "loss": 0.4822, "step": 163 }, { "epoch": 0.023515916260395755, "grad_norm": 0.48512864112854004, "learning_rate": 7.835642618251314e-07, "loss": 0.4854, "step": 164 }, { "epoch": 0.02365930599369085, "grad_norm": 0.4552495777606964, "learning_rate": 7.883420926899188e-07, "loss": 0.4891, "step": 165 }, { "epoch": 0.02380269572698595, "grad_norm": 0.4327952265739441, "learning_rate": 7.931199235547062e-07, "loss": 0.4718, "step": 166 }, { "epoch": 0.023946085460281045, "grad_norm": 0.4359220862388611, "learning_rate": 7.978977544194936e-07, "loss": 0.5002, "step": 167 }, { "epoch": 0.02408947519357614, "grad_norm": 0.41264739632606506, "learning_rate": 8.02675585284281e-07, "loss": 0.4637, "step": 168 }, { "epoch": 0.024232864926871236, "grad_norm": 0.4436875283718109, "learning_rate": 8.074534161490684e-07, "loss": 0.4874, "step": 169 }, { "epoch": 0.02437625466016633, "grad_norm": 0.45102614164352417, "learning_rate": 8.122312470138558e-07, "loss": 0.4888, "step": 170 }, { "epoch": 0.024519644393461427, "grad_norm": 0.4451000988483429, "learning_rate": 8.170090778786431e-07, "loss": 0.4658, "step": 171 }, { "epoch": 0.024663034126756526, "grad_norm": 0.44829684495925903, "learning_rate": 8.217869087434306e-07, "loss": 0.4996, "step": 172 }, { "epoch": 0.02480642386005162, "grad_norm": 0.43226850032806396, "learning_rate": 8.26564739608218e-07, "loss": 0.4658, "step": 173 }, { "epoch": 0.024949813593346717, "grad_norm": 0.4133026897907257, "learning_rate": 8.313425704730054e-07, "loss": 0.4998, "step": 174 }, { "epoch": 0.025093203326641812, "grad_norm": 0.42531493306159973, "learning_rate": 8.361204013377927e-07, "loss": 0.4842, "step": 175 }, { "epoch": 0.025236593059936908, "grad_norm": 0.3979494869709015, "learning_rate": 8.408982322025801e-07, "loss": 0.4735, "step": 176 }, { "epoch": 0.025379982793232003, "grad_norm": 0.4533659517765045, "learning_rate": 8.456760630673674e-07, "loss": 0.4708, "step": 177 }, { "epoch": 0.025523372526527102, "grad_norm": 0.4357227385044098, "learning_rate": 8.50453893932155e-07, "loss": 0.4717, "step": 178 }, { "epoch": 0.025666762259822198, "grad_norm": 0.4299195110797882, "learning_rate": 8.552317247969424e-07, "loss": 0.4993, "step": 179 }, { "epoch": 0.025810151993117293, "grad_norm": 0.4300176501274109, "learning_rate": 8.600095556617296e-07, "loss": 0.49, "step": 180 }, { "epoch": 0.02595354172641239, "grad_norm": 0.4106995463371277, "learning_rate": 8.64787386526517e-07, "loss": 0.4849, "step": 181 }, { "epoch": 0.026096931459707484, "grad_norm": 0.4395071268081665, "learning_rate": 8.695652173913044e-07, "loss": 0.4889, "step": 182 }, { "epoch": 0.02624032119300258, "grad_norm": 0.4198647737503052, "learning_rate": 8.743430482560918e-07, "loss": 0.4867, "step": 183 }, { "epoch": 0.02638371092629768, "grad_norm": 0.42592862248420715, "learning_rate": 8.791208791208792e-07, "loss": 0.4949, "step": 184 }, { "epoch": 0.026527100659592774, "grad_norm": 0.4151221513748169, "learning_rate": 8.838987099856666e-07, "loss": 0.4908, "step": 185 }, { "epoch": 0.02667049039288787, "grad_norm": 0.46594250202178955, "learning_rate": 8.88676540850454e-07, "loss": 0.4898, "step": 186 }, { "epoch": 0.026813880126182965, "grad_norm": 0.40869638323783875, "learning_rate": 8.934543717152414e-07, "loss": 0.5118, "step": 187 }, { "epoch": 0.02695726985947806, "grad_norm": 0.4391607642173767, "learning_rate": 8.982322025800287e-07, "loss": 0.4658, "step": 188 }, { "epoch": 0.027100659592773156, "grad_norm": 0.45085304975509644, "learning_rate": 9.030100334448161e-07, "loss": 0.4661, "step": 189 }, { "epoch": 0.027244049326068255, "grad_norm": 0.43021684885025024, "learning_rate": 9.077878643096036e-07, "loss": 0.4592, "step": 190 }, { "epoch": 0.02738743905936335, "grad_norm": 0.4369358420372009, "learning_rate": 9.12565695174391e-07, "loss": 0.4836, "step": 191 }, { "epoch": 0.027530828792658446, "grad_norm": 0.4720838665962219, "learning_rate": 9.173435260391784e-07, "loss": 0.4946, "step": 192 }, { "epoch": 0.02767421852595354, "grad_norm": 0.432171493768692, "learning_rate": 9.221213569039656e-07, "loss": 0.4986, "step": 193 }, { "epoch": 0.027817608259248637, "grad_norm": 0.45830875635147095, "learning_rate": 9.26899187768753e-07, "loss": 0.4615, "step": 194 }, { "epoch": 0.027960997992543733, "grad_norm": 0.4186354875564575, "learning_rate": 9.316770186335404e-07, "loss": 0.4782, "step": 195 }, { "epoch": 0.02810438772583883, "grad_norm": 0.42422762513160706, "learning_rate": 9.364548494983278e-07, "loss": 0.4522, "step": 196 }, { "epoch": 0.028247777459133927, "grad_norm": 0.4357244372367859, "learning_rate": 9.412326803631152e-07, "loss": 0.4588, "step": 197 }, { "epoch": 0.028391167192429023, "grad_norm": 0.45871660113334656, "learning_rate": 9.460105112279026e-07, "loss": 0.4881, "step": 198 }, { "epoch": 0.028534556925724118, "grad_norm": 0.4193512201309204, "learning_rate": 9.5078834209269e-07, "loss": 0.5114, "step": 199 }, { "epoch": 0.028677946659019213, "grad_norm": 0.42306530475616455, "learning_rate": 9.555661729574773e-07, "loss": 0.4475, "step": 200 }, { "epoch": 0.02882133639231431, "grad_norm": 0.3912521004676819, "learning_rate": 9.603440038222646e-07, "loss": 0.4568, "step": 201 }, { "epoch": 0.028964726125609408, "grad_norm": 0.42355775833129883, "learning_rate": 9.65121834687052e-07, "loss": 0.4413, "step": 202 }, { "epoch": 0.029108115858904503, "grad_norm": 0.39640262722969055, "learning_rate": 9.698996655518396e-07, "loss": 0.5048, "step": 203 }, { "epoch": 0.0292515055921996, "grad_norm": 0.3877943754196167, "learning_rate": 9.74677496416627e-07, "loss": 0.4644, "step": 204 }, { "epoch": 0.029394895325494694, "grad_norm": 0.43417301774024963, "learning_rate": 9.794553272814144e-07, "loss": 0.4649, "step": 205 }, { "epoch": 0.02953828505878979, "grad_norm": 0.4340231120586395, "learning_rate": 9.842331581462017e-07, "loss": 0.4692, "step": 206 }, { "epoch": 0.029681674792084885, "grad_norm": 0.399745374917984, "learning_rate": 9.890109890109891e-07, "loss": 0.4835, "step": 207 }, { "epoch": 0.029825064525379984, "grad_norm": 0.4989771842956543, "learning_rate": 9.937888198757765e-07, "loss": 0.4527, "step": 208 }, { "epoch": 0.02996845425867508, "grad_norm": 0.4386262893676758, "learning_rate": 9.985666507405639e-07, "loss": 0.4759, "step": 209 }, { "epoch": 0.030111843991970175, "grad_norm": 0.4274125397205353, "learning_rate": 1.0033444816053512e-06, "loss": 0.4779, "step": 210 }, { "epoch": 0.03025523372526527, "grad_norm": 0.48396772146224976, "learning_rate": 1.0081223124701386e-06, "loss": 0.4854, "step": 211 }, { "epoch": 0.030398623458560366, "grad_norm": 0.43887975811958313, "learning_rate": 1.012900143334926e-06, "loss": 0.4726, "step": 212 }, { "epoch": 0.030542013191855462, "grad_norm": 0.4352761209011078, "learning_rate": 1.0176779741997134e-06, "loss": 0.4816, "step": 213 }, { "epoch": 0.03068540292515056, "grad_norm": 0.39477789402008057, "learning_rate": 1.0224558050645008e-06, "loss": 0.4637, "step": 214 }, { "epoch": 0.030828792658445656, "grad_norm": 0.4478986859321594, "learning_rate": 1.0272336359292883e-06, "loss": 0.4774, "step": 215 }, { "epoch": 0.030972182391740752, "grad_norm": 0.4261312484741211, "learning_rate": 1.0320114667940755e-06, "loss": 0.4692, "step": 216 }, { "epoch": 0.031115572125035847, "grad_norm": 0.43289992213249207, "learning_rate": 1.0367892976588629e-06, "loss": 0.4851, "step": 217 }, { "epoch": 0.03125896185833094, "grad_norm": 0.5260618329048157, "learning_rate": 1.0415671285236503e-06, "loss": 0.4425, "step": 218 }, { "epoch": 0.03140235159162604, "grad_norm": 0.386227548122406, "learning_rate": 1.0463449593884376e-06, "loss": 0.4513, "step": 219 }, { "epoch": 0.031545741324921134, "grad_norm": 0.44717657566070557, "learning_rate": 1.051122790253225e-06, "loss": 0.4738, "step": 220 }, { "epoch": 0.03168913105821623, "grad_norm": 0.4286089837551117, "learning_rate": 1.0559006211180126e-06, "loss": 0.4594, "step": 221 }, { "epoch": 0.031832520791511325, "grad_norm": 0.4687623679637909, "learning_rate": 1.0606784519828e-06, "loss": 0.4485, "step": 222 }, { "epoch": 0.031975910524806424, "grad_norm": 0.4722347855567932, "learning_rate": 1.0654562828475874e-06, "loss": 0.4707, "step": 223 }, { "epoch": 0.03211930025810152, "grad_norm": 0.5014804601669312, "learning_rate": 1.0702341137123747e-06, "loss": 0.4606, "step": 224 }, { "epoch": 0.032262689991396615, "grad_norm": 0.4408886432647705, "learning_rate": 1.0750119445771621e-06, "loss": 0.4447, "step": 225 }, { "epoch": 0.032406079724691714, "grad_norm": 0.44438886642456055, "learning_rate": 1.0797897754419493e-06, "loss": 0.4595, "step": 226 }, { "epoch": 0.032549469457986806, "grad_norm": 0.4047393500804901, "learning_rate": 1.0845676063067369e-06, "loss": 0.4642, "step": 227 }, { "epoch": 0.032692859191281905, "grad_norm": 0.42597487568855286, "learning_rate": 1.0893454371715242e-06, "loss": 0.4932, "step": 228 }, { "epoch": 0.032836248924577004, "grad_norm": 0.44515809416770935, "learning_rate": 1.0941232680363116e-06, "loss": 0.4633, "step": 229 }, { "epoch": 0.032979638657872096, "grad_norm": 0.4234674870967865, "learning_rate": 1.098901098901099e-06, "loss": 0.446, "step": 230 }, { "epoch": 0.033123028391167195, "grad_norm": 0.43257036805152893, "learning_rate": 1.1036789297658864e-06, "loss": 0.4766, "step": 231 }, { "epoch": 0.03326641812446229, "grad_norm": 0.44261181354522705, "learning_rate": 1.1084567606306737e-06, "loss": 0.4712, "step": 232 }, { "epoch": 0.033409807857757386, "grad_norm": 0.45818892121315, "learning_rate": 1.1132345914954611e-06, "loss": 0.4431, "step": 233 }, { "epoch": 0.03355319759105248, "grad_norm": 0.4187541902065277, "learning_rate": 1.1180124223602485e-06, "loss": 0.5068, "step": 234 }, { "epoch": 0.03369658732434758, "grad_norm": 0.42136791348457336, "learning_rate": 1.1227902532250359e-06, "loss": 0.4692, "step": 235 }, { "epoch": 0.033839977057642676, "grad_norm": 0.459249883890152, "learning_rate": 1.1275680840898233e-06, "loss": 0.4423, "step": 236 }, { "epoch": 0.03398336679093777, "grad_norm": 0.5066389441490173, "learning_rate": 1.1323459149546106e-06, "loss": 0.4422, "step": 237 }, { "epoch": 0.03412675652423287, "grad_norm": 0.4259575605392456, "learning_rate": 1.137123745819398e-06, "loss": 0.4489, "step": 238 }, { "epoch": 0.03427014625752796, "grad_norm": 0.40305036306381226, "learning_rate": 1.1419015766841856e-06, "loss": 0.4345, "step": 239 }, { "epoch": 0.03441353599082306, "grad_norm": 0.4270547032356262, "learning_rate": 1.146679407548973e-06, "loss": 0.4587, "step": 240 }, { "epoch": 0.03455692572411816, "grad_norm": 0.4526289701461792, "learning_rate": 1.1514572384137603e-06, "loss": 0.4588, "step": 241 }, { "epoch": 0.03470031545741325, "grad_norm": 0.44372305274009705, "learning_rate": 1.1562350692785475e-06, "loss": 0.48, "step": 242 }, { "epoch": 0.03484370519070835, "grad_norm": 0.41631028056144714, "learning_rate": 1.1610129001433349e-06, "loss": 0.4605, "step": 243 }, { "epoch": 0.03498709492400344, "grad_norm": 0.45371344685554504, "learning_rate": 1.1657907310081223e-06, "loss": 0.4701, "step": 244 }, { "epoch": 0.03513048465729854, "grad_norm": 0.41568514704704285, "learning_rate": 1.1705685618729099e-06, "loss": 0.452, "step": 245 }, { "epoch": 0.03527387439059363, "grad_norm": 0.436805784702301, "learning_rate": 1.1753463927376972e-06, "loss": 0.4343, "step": 246 }, { "epoch": 0.03541726412388873, "grad_norm": 0.4636583924293518, "learning_rate": 1.1801242236024846e-06, "loss": 0.4541, "step": 247 }, { "epoch": 0.03556065385718383, "grad_norm": 0.4389295279979706, "learning_rate": 1.184902054467272e-06, "loss": 0.4667, "step": 248 }, { "epoch": 0.03570404359047892, "grad_norm": 0.454166442155838, "learning_rate": 1.1896798853320594e-06, "loss": 0.4818, "step": 249 }, { "epoch": 0.03584743332377402, "grad_norm": 0.4729958176612854, "learning_rate": 1.1944577161968467e-06, "loss": 0.4639, "step": 250 }, { "epoch": 0.03599082305706911, "grad_norm": 0.40169984102249146, "learning_rate": 1.1992355470616341e-06, "loss": 0.4392, "step": 251 }, { "epoch": 0.03613421279036421, "grad_norm": 0.43964219093322754, "learning_rate": 1.2040133779264215e-06, "loss": 0.4637, "step": 252 }, { "epoch": 0.03627760252365931, "grad_norm": 0.513671338558197, "learning_rate": 1.2087912087912089e-06, "loss": 0.444, "step": 253 }, { "epoch": 0.0364209922569544, "grad_norm": 0.42345699667930603, "learning_rate": 1.2135690396559962e-06, "loss": 0.4598, "step": 254 }, { "epoch": 0.0365643819902495, "grad_norm": 0.44038814306259155, "learning_rate": 1.2183468705207836e-06, "loss": 0.4599, "step": 255 }, { "epoch": 0.03670777172354459, "grad_norm": 0.41485434770584106, "learning_rate": 1.223124701385571e-06, "loss": 0.465, "step": 256 }, { "epoch": 0.03685116145683969, "grad_norm": 0.4193460941314697, "learning_rate": 1.2279025322503586e-06, "loss": 0.4536, "step": 257 }, { "epoch": 0.03699455119013478, "grad_norm": 0.4450029730796814, "learning_rate": 1.2326803631151458e-06, "loss": 0.4487, "step": 258 }, { "epoch": 0.03713794092342988, "grad_norm": 0.4495091736316681, "learning_rate": 1.2374581939799331e-06, "loss": 0.4558, "step": 259 }, { "epoch": 0.03728133065672498, "grad_norm": 0.44838210940361023, "learning_rate": 1.2422360248447205e-06, "loss": 0.4394, "step": 260 }, { "epoch": 0.03742472039002007, "grad_norm": 0.4257270097732544, "learning_rate": 1.2470138557095079e-06, "loss": 0.4428, "step": 261 }, { "epoch": 0.03756811012331517, "grad_norm": 0.4147776663303375, "learning_rate": 1.2517916865742955e-06, "loss": 0.4409, "step": 262 }, { "epoch": 0.037711499856610264, "grad_norm": 0.41867902874946594, "learning_rate": 1.2565695174390826e-06, "loss": 0.4423, "step": 263 }, { "epoch": 0.03785488958990536, "grad_norm": 0.43563783168792725, "learning_rate": 1.2613473483038702e-06, "loss": 0.4562, "step": 264 }, { "epoch": 0.03799827932320046, "grad_norm": 0.4598829448223114, "learning_rate": 1.2661251791686574e-06, "loss": 0.4315, "step": 265 }, { "epoch": 0.038141669056495554, "grad_norm": 0.4121547043323517, "learning_rate": 1.270903010033445e-06, "loss": 0.4539, "step": 266 }, { "epoch": 0.03828505878979065, "grad_norm": 0.4052758812904358, "learning_rate": 1.2756808408982324e-06, "loss": 0.4422, "step": 267 }, { "epoch": 0.038428448523085745, "grad_norm": 0.44913482666015625, "learning_rate": 1.2804586717630197e-06, "loss": 0.4573, "step": 268 }, { "epoch": 0.038571838256380844, "grad_norm": 0.4067220985889435, "learning_rate": 1.2852365026278071e-06, "loss": 0.4475, "step": 269 }, { "epoch": 0.038715227989675936, "grad_norm": 0.5043574571609497, "learning_rate": 1.2900143334925943e-06, "loss": 0.4671, "step": 270 }, { "epoch": 0.038858617722971035, "grad_norm": 0.42863473296165466, "learning_rate": 1.2947921643573819e-06, "loss": 0.4297, "step": 271 }, { "epoch": 0.039002007456266134, "grad_norm": 0.4293617904186249, "learning_rate": 1.2995699952221695e-06, "loss": 0.4523, "step": 272 }, { "epoch": 0.039145397189561226, "grad_norm": 0.4304468035697937, "learning_rate": 1.3043478260869566e-06, "loss": 0.4514, "step": 273 }, { "epoch": 0.039288786922856325, "grad_norm": 0.4146033823490143, "learning_rate": 1.309125656951744e-06, "loss": 0.4372, "step": 274 }, { "epoch": 0.03943217665615142, "grad_norm": 0.4076017439365387, "learning_rate": 1.3139034878165314e-06, "loss": 0.4545, "step": 275 }, { "epoch": 0.039575566389446516, "grad_norm": 0.4040968716144562, "learning_rate": 1.3186813186813187e-06, "loss": 0.4628, "step": 276 }, { "epoch": 0.03971895612274161, "grad_norm": 0.38086366653442383, "learning_rate": 1.3234591495461061e-06, "loss": 0.4586, "step": 277 }, { "epoch": 0.03986234585603671, "grad_norm": 0.4418119788169861, "learning_rate": 1.3282369804108935e-06, "loss": 0.4337, "step": 278 }, { "epoch": 0.040005735589331806, "grad_norm": 0.40053918957710266, "learning_rate": 1.333014811275681e-06, "loss": 0.4527, "step": 279 }, { "epoch": 0.0401491253226269, "grad_norm": 0.43677809834480286, "learning_rate": 1.3377926421404683e-06, "loss": 0.436, "step": 280 }, { "epoch": 0.040292515055922, "grad_norm": 0.46457439661026, "learning_rate": 1.3425704730052558e-06, "loss": 0.4285, "step": 281 }, { "epoch": 0.04043590478921709, "grad_norm": 0.48762670159339905, "learning_rate": 1.347348303870043e-06, "loss": 0.4218, "step": 282 }, { "epoch": 0.04057929452251219, "grad_norm": 0.4313371777534485, "learning_rate": 1.3521261347348306e-06, "loss": 0.428, "step": 283 }, { "epoch": 0.04072268425580729, "grad_norm": 0.4453345239162445, "learning_rate": 1.356903965599618e-06, "loss": 0.467, "step": 284 }, { "epoch": 0.04086607398910238, "grad_norm": 0.4695543944835663, "learning_rate": 1.3616817964644051e-06, "loss": 0.4455, "step": 285 }, { "epoch": 0.04100946372239748, "grad_norm": 0.4121398329734802, "learning_rate": 1.3664596273291927e-06, "loss": 0.4513, "step": 286 }, { "epoch": 0.04115285345569257, "grad_norm": 0.38506877422332764, "learning_rate": 1.37123745819398e-06, "loss": 0.4536, "step": 287 }, { "epoch": 0.04129624318898767, "grad_norm": 0.4122912883758545, "learning_rate": 1.3760152890587675e-06, "loss": 0.4766, "step": 288 }, { "epoch": 0.04143963292228276, "grad_norm": 0.3973139524459839, "learning_rate": 1.3807931199235546e-06, "loss": 0.4351, "step": 289 }, { "epoch": 0.04158302265557786, "grad_norm": 0.4444577693939209, "learning_rate": 1.3855709507883422e-06, "loss": 0.4468, "step": 290 }, { "epoch": 0.04172641238887296, "grad_norm": 0.39293935894966125, "learning_rate": 1.3903487816531296e-06, "loss": 0.4314, "step": 291 }, { "epoch": 0.04186980212216805, "grad_norm": 0.43519213795661926, "learning_rate": 1.395126612517917e-06, "loss": 0.4453, "step": 292 }, { "epoch": 0.04201319185546315, "grad_norm": 0.41744735836982727, "learning_rate": 1.3999044433827044e-06, "loss": 0.4426, "step": 293 }, { "epoch": 0.04215658158875824, "grad_norm": 0.3726487159729004, "learning_rate": 1.4046822742474917e-06, "loss": 0.428, "step": 294 }, { "epoch": 0.04229997132205334, "grad_norm": 0.43438297510147095, "learning_rate": 1.4094601051122791e-06, "loss": 0.4237, "step": 295 }, { "epoch": 0.04244336105534844, "grad_norm": 0.40679749846458435, "learning_rate": 1.4142379359770667e-06, "loss": 0.4448, "step": 296 }, { "epoch": 0.04258675078864353, "grad_norm": 0.4375452399253845, "learning_rate": 1.4190157668418539e-06, "loss": 0.4365, "step": 297 }, { "epoch": 0.04273014052193863, "grad_norm": 0.4182201027870178, "learning_rate": 1.4237935977066415e-06, "loss": 0.4277, "step": 298 }, { "epoch": 0.04287353025523372, "grad_norm": 0.4639587700366974, "learning_rate": 1.4285714285714286e-06, "loss": 0.4651, "step": 299 }, { "epoch": 0.04301691998852882, "grad_norm": 0.40477877855300903, "learning_rate": 1.4333492594362162e-06, "loss": 0.4205, "step": 300 }, { "epoch": 0.043160309721823914, "grad_norm": 0.4110063314437866, "learning_rate": 1.4381270903010034e-06, "loss": 0.4565, "step": 301 }, { "epoch": 0.04330369945511901, "grad_norm": 0.39684218168258667, "learning_rate": 1.4429049211657908e-06, "loss": 0.4108, "step": 302 }, { "epoch": 0.04344708918841411, "grad_norm": 0.4527221918106079, "learning_rate": 1.4476827520305783e-06, "loss": 0.4267, "step": 303 }, { "epoch": 0.043590478921709204, "grad_norm": 0.411278635263443, "learning_rate": 1.4524605828953655e-06, "loss": 0.4535, "step": 304 }, { "epoch": 0.0437338686550043, "grad_norm": 0.383398175239563, "learning_rate": 1.457238413760153e-06, "loss": 0.4611, "step": 305 }, { "epoch": 0.043877258388299395, "grad_norm": 0.43985581398010254, "learning_rate": 1.4620162446249403e-06, "loss": 0.479, "step": 306 }, { "epoch": 0.044020648121594494, "grad_norm": 0.4120394289493561, "learning_rate": 1.4667940754897279e-06, "loss": 0.4256, "step": 307 }, { "epoch": 0.04416403785488959, "grad_norm": 0.4376498758792877, "learning_rate": 1.4715719063545152e-06, "loss": 0.4467, "step": 308 }, { "epoch": 0.044307427588184685, "grad_norm": 0.39410045742988586, "learning_rate": 1.4763497372193026e-06, "loss": 0.4298, "step": 309 }, { "epoch": 0.044450817321479784, "grad_norm": 0.4148866534233093, "learning_rate": 1.48112756808409e-06, "loss": 0.4336, "step": 310 }, { "epoch": 0.044594207054774876, "grad_norm": 0.4292428493499756, "learning_rate": 1.4859053989488771e-06, "loss": 0.4459, "step": 311 }, { "epoch": 0.044737596788069975, "grad_norm": 0.45921286940574646, "learning_rate": 1.4906832298136647e-06, "loss": 0.4286, "step": 312 }, { "epoch": 0.04488098652136507, "grad_norm": 0.47576582431793213, "learning_rate": 1.495461060678452e-06, "loss": 0.4351, "step": 313 }, { "epoch": 0.045024376254660166, "grad_norm": 0.45134374499320984, "learning_rate": 1.5002388915432395e-06, "loss": 0.4232, "step": 314 }, { "epoch": 0.045167765987955265, "grad_norm": 0.4750058352947235, "learning_rate": 1.505016722408027e-06, "loss": 0.4285, "step": 315 }, { "epoch": 0.04531115572125036, "grad_norm": 0.4534226655960083, "learning_rate": 1.5097945532728142e-06, "loss": 0.434, "step": 316 }, { "epoch": 0.045454545454545456, "grad_norm": 0.4164750277996063, "learning_rate": 1.5145723841376016e-06, "loss": 0.4574, "step": 317 }, { "epoch": 0.04559793518784055, "grad_norm": 0.4116749167442322, "learning_rate": 1.519350215002389e-06, "loss": 0.4263, "step": 318 }, { "epoch": 0.04574132492113565, "grad_norm": 0.4298824965953827, "learning_rate": 1.5241280458671764e-06, "loss": 0.4627, "step": 319 }, { "epoch": 0.045884714654430746, "grad_norm": 0.48798868060112, "learning_rate": 1.528905876731964e-06, "loss": 0.4249, "step": 320 }, { "epoch": 0.04602810438772584, "grad_norm": 0.4098011553287506, "learning_rate": 1.5336837075967511e-06, "loss": 0.4704, "step": 321 }, { "epoch": 0.04617149412102094, "grad_norm": 0.4362853467464447, "learning_rate": 1.5384615384615387e-06, "loss": 0.4432, "step": 322 }, { "epoch": 0.04631488385431603, "grad_norm": 0.4353121519088745, "learning_rate": 1.5432393693263259e-06, "loss": 0.4418, "step": 323 }, { "epoch": 0.04645827358761113, "grad_norm": 0.40414947271347046, "learning_rate": 1.5480172001911135e-06, "loss": 0.4602, "step": 324 }, { "epoch": 0.04660166332090622, "grad_norm": 0.4541720151901245, "learning_rate": 1.5527950310559006e-06, "loss": 0.4347, "step": 325 }, { "epoch": 0.04674505305420132, "grad_norm": 0.44399768114089966, "learning_rate": 1.5575728619206882e-06, "loss": 0.4387, "step": 326 }, { "epoch": 0.04688844278749642, "grad_norm": 0.4162309765815735, "learning_rate": 1.5623506927854756e-06, "loss": 0.4565, "step": 327 }, { "epoch": 0.04703183252079151, "grad_norm": 0.39398741722106934, "learning_rate": 1.5671285236502628e-06, "loss": 0.4399, "step": 328 }, { "epoch": 0.04717522225408661, "grad_norm": 0.42681610584259033, "learning_rate": 1.5719063545150504e-06, "loss": 0.4268, "step": 329 }, { "epoch": 0.0473186119873817, "grad_norm": 0.44244739413261414, "learning_rate": 1.5766841853798375e-06, "loss": 0.4144, "step": 330 }, { "epoch": 0.0474620017206768, "grad_norm": 0.400931715965271, "learning_rate": 1.5814620162446251e-06, "loss": 0.4403, "step": 331 }, { "epoch": 0.0476053914539719, "grad_norm": 0.4325736463069916, "learning_rate": 1.5862398471094125e-06, "loss": 0.4467, "step": 332 }, { "epoch": 0.04774878118726699, "grad_norm": 0.4484155476093292, "learning_rate": 1.5910176779741999e-06, "loss": 0.4427, "step": 333 }, { "epoch": 0.04789217092056209, "grad_norm": 0.435223788022995, "learning_rate": 1.5957955088389872e-06, "loss": 0.4329, "step": 334 }, { "epoch": 0.04803556065385718, "grad_norm": 0.4281010329723358, "learning_rate": 1.6005733397037746e-06, "loss": 0.4342, "step": 335 }, { "epoch": 0.04817895038715228, "grad_norm": 0.3889048099517822, "learning_rate": 1.605351170568562e-06, "loss": 0.4152, "step": 336 }, { "epoch": 0.04832234012044737, "grad_norm": 0.4039011597633362, "learning_rate": 1.6101290014333494e-06, "loss": 0.4325, "step": 337 }, { "epoch": 0.04846572985374247, "grad_norm": 0.4045931398868561, "learning_rate": 1.6149068322981367e-06, "loss": 0.4476, "step": 338 }, { "epoch": 0.04860911958703757, "grad_norm": 0.4177647531032562, "learning_rate": 1.6196846631629243e-06, "loss": 0.4242, "step": 339 }, { "epoch": 0.04875250932033266, "grad_norm": 0.4547858238220215, "learning_rate": 1.6244624940277115e-06, "loss": 0.444, "step": 340 }, { "epoch": 0.04889589905362776, "grad_norm": 0.5265015363693237, "learning_rate": 1.629240324892499e-06, "loss": 0.449, "step": 341 }, { "epoch": 0.049039288786922854, "grad_norm": 0.4339706599712372, "learning_rate": 1.6340181557572863e-06, "loss": 0.422, "step": 342 }, { "epoch": 0.04918267852021795, "grad_norm": 0.4380543529987335, "learning_rate": 1.6387959866220736e-06, "loss": 0.4427, "step": 343 }, { "epoch": 0.04932606825351305, "grad_norm": 0.4217549264431, "learning_rate": 1.6435738174868612e-06, "loss": 0.4577, "step": 344 }, { "epoch": 0.049469457986808144, "grad_norm": 0.40103834867477417, "learning_rate": 1.6483516483516484e-06, "loss": 0.4076, "step": 345 }, { "epoch": 0.04961284772010324, "grad_norm": 0.4409862160682678, "learning_rate": 1.653129479216436e-06, "loss": 0.4585, "step": 346 }, { "epoch": 0.049756237453398335, "grad_norm": 0.4110385775566101, "learning_rate": 1.6579073100812231e-06, "loss": 0.4335, "step": 347 }, { "epoch": 0.049899627186693434, "grad_norm": 0.4329816699028015, "learning_rate": 1.6626851409460107e-06, "loss": 0.4546, "step": 348 }, { "epoch": 0.050043016919988526, "grad_norm": 0.3997075855731964, "learning_rate": 1.6674629718107979e-06, "loss": 0.4115, "step": 349 }, { "epoch": 0.050186406653283624, "grad_norm": 0.430449903011322, "learning_rate": 1.6722408026755855e-06, "loss": 0.4469, "step": 350 }, { "epoch": 0.05032979638657872, "grad_norm": 0.4550170302391052, "learning_rate": 1.6770186335403729e-06, "loss": 0.4523, "step": 351 }, { "epoch": 0.050473186119873815, "grad_norm": 0.4847877025604248, "learning_rate": 1.6817964644051602e-06, "loss": 0.4605, "step": 352 }, { "epoch": 0.050616575853168914, "grad_norm": 0.3849944770336151, "learning_rate": 1.6865742952699476e-06, "loss": 0.4272, "step": 353 }, { "epoch": 0.050759965586464006, "grad_norm": 0.4354095458984375, "learning_rate": 1.6913521261347348e-06, "loss": 0.4397, "step": 354 }, { "epoch": 0.050903355319759105, "grad_norm": 0.4369865655899048, "learning_rate": 1.6961299569995224e-06, "loss": 0.4238, "step": 355 }, { "epoch": 0.051046745053054204, "grad_norm": 0.44846364855766296, "learning_rate": 1.70090778786431e-06, "loss": 0.4522, "step": 356 }, { "epoch": 0.051190134786349296, "grad_norm": 0.4241853654384613, "learning_rate": 1.7056856187290971e-06, "loss": 0.4601, "step": 357 }, { "epoch": 0.051333524519644395, "grad_norm": 0.4473690986633301, "learning_rate": 1.7104634495938847e-06, "loss": 0.4462, "step": 358 }, { "epoch": 0.05147691425293949, "grad_norm": 0.4430094063282013, "learning_rate": 1.7152412804586719e-06, "loss": 0.4275, "step": 359 }, { "epoch": 0.051620303986234586, "grad_norm": 0.4413195252418518, "learning_rate": 1.7200191113234592e-06, "loss": 0.4075, "step": 360 }, { "epoch": 0.05176369371952968, "grad_norm": 0.39212125539779663, "learning_rate": 1.7247969421882466e-06, "loss": 0.4462, "step": 361 }, { "epoch": 0.05190708345282478, "grad_norm": 0.42425990104675293, "learning_rate": 1.729574773053034e-06, "loss": 0.4274, "step": 362 }, { "epoch": 0.052050473186119876, "grad_norm": 0.3951784670352936, "learning_rate": 1.7343526039178216e-06, "loss": 0.4213, "step": 363 }, { "epoch": 0.05219386291941497, "grad_norm": 0.42663267254829407, "learning_rate": 1.7391304347826088e-06, "loss": 0.4494, "step": 364 }, { "epoch": 0.05233725265271007, "grad_norm": 0.40690192580223083, "learning_rate": 1.7439082656473963e-06, "loss": 0.429, "step": 365 }, { "epoch": 0.05248064238600516, "grad_norm": 0.39707618951797485, "learning_rate": 1.7486860965121835e-06, "loss": 0.4317, "step": 366 }, { "epoch": 0.05262403211930026, "grad_norm": 0.4143345355987549, "learning_rate": 1.753463927376971e-06, "loss": 0.4026, "step": 367 }, { "epoch": 0.05276742185259536, "grad_norm": 0.38786688446998596, "learning_rate": 1.7582417582417585e-06, "loss": 0.4399, "step": 368 }, { "epoch": 0.05291081158589045, "grad_norm": 0.40986666083335876, "learning_rate": 1.7630195891065459e-06, "loss": 0.4141, "step": 369 }, { "epoch": 0.05305420131918555, "grad_norm": 0.43294888734817505, "learning_rate": 1.7677974199713332e-06, "loss": 0.4209, "step": 370 }, { "epoch": 0.05319759105248064, "grad_norm": 0.3747105300426483, "learning_rate": 1.7725752508361204e-06, "loss": 0.421, "step": 371 }, { "epoch": 0.05334098078577574, "grad_norm": 0.42783597111701965, "learning_rate": 1.777353081700908e-06, "loss": 0.4216, "step": 372 }, { "epoch": 0.05348437051907083, "grad_norm": 0.4117274880409241, "learning_rate": 1.7821309125656951e-06, "loss": 0.436, "step": 373 }, { "epoch": 0.05362776025236593, "grad_norm": 0.4322548806667328, "learning_rate": 1.7869087434304827e-06, "loss": 0.4427, "step": 374 }, { "epoch": 0.05377114998566103, "grad_norm": 0.4207087457180023, "learning_rate": 1.7916865742952701e-06, "loss": 0.3924, "step": 375 }, { "epoch": 0.05391453971895612, "grad_norm": 0.40603622794151306, "learning_rate": 1.7964644051600575e-06, "loss": 0.4359, "step": 376 }, { "epoch": 0.05405792945225122, "grad_norm": 0.3955737054347992, "learning_rate": 1.8012422360248449e-06, "loss": 0.4422, "step": 377 }, { "epoch": 0.05420131918554631, "grad_norm": 0.4163385331630707, "learning_rate": 1.8060200668896322e-06, "loss": 0.4458, "step": 378 }, { "epoch": 0.05434470891884141, "grad_norm": 0.39683443307876587, "learning_rate": 1.8107978977544196e-06, "loss": 0.4248, "step": 379 }, { "epoch": 0.05448809865213651, "grad_norm": 0.4441393315792084, "learning_rate": 1.8155757286192072e-06, "loss": 0.4334, "step": 380 }, { "epoch": 0.0546314883854316, "grad_norm": 0.4332062005996704, "learning_rate": 1.8203535594839944e-06, "loss": 0.4338, "step": 381 }, { "epoch": 0.0547748781187267, "grad_norm": 0.4167422652244568, "learning_rate": 1.825131390348782e-06, "loss": 0.4336, "step": 382 }, { "epoch": 0.05491826785202179, "grad_norm": 0.4269276261329651, "learning_rate": 1.8299092212135691e-06, "loss": 0.4563, "step": 383 }, { "epoch": 0.05506165758531689, "grad_norm": 0.4061259925365448, "learning_rate": 1.8346870520783567e-06, "loss": 0.4114, "step": 384 }, { "epoch": 0.055205047318611984, "grad_norm": 0.5081895589828491, "learning_rate": 1.8394648829431439e-06, "loss": 0.4272, "step": 385 }, { "epoch": 0.05534843705190708, "grad_norm": 0.4222818911075592, "learning_rate": 1.8442427138079313e-06, "loss": 0.4079, "step": 386 }, { "epoch": 0.05549182678520218, "grad_norm": 0.4648245573043823, "learning_rate": 1.8490205446727188e-06, "loss": 0.4388, "step": 387 }, { "epoch": 0.055635216518497274, "grad_norm": 0.46335330605506897, "learning_rate": 1.853798375537506e-06, "loss": 0.4387, "step": 388 }, { "epoch": 0.05577860625179237, "grad_norm": 0.4439956247806549, "learning_rate": 1.8585762064022936e-06, "loss": 0.445, "step": 389 }, { "epoch": 0.055921995985087465, "grad_norm": 0.4590660333633423, "learning_rate": 1.8633540372670808e-06, "loss": 0.4339, "step": 390 }, { "epoch": 0.056065385718382564, "grad_norm": 0.40664857625961304, "learning_rate": 1.8681318681318684e-06, "loss": 0.4225, "step": 391 }, { "epoch": 0.05620877545167766, "grad_norm": 0.44877973198890686, "learning_rate": 1.8729096989966555e-06, "loss": 0.4174, "step": 392 }, { "epoch": 0.056352165184972755, "grad_norm": 0.4167042374610901, "learning_rate": 1.877687529861443e-06, "loss": 0.4462, "step": 393 }, { "epoch": 0.056495554918267854, "grad_norm": 0.4376240670681, "learning_rate": 1.8824653607262305e-06, "loss": 0.4283, "step": 394 }, { "epoch": 0.056638944651562946, "grad_norm": 0.4044860601425171, "learning_rate": 1.8872431915910179e-06, "loss": 0.4145, "step": 395 }, { "epoch": 0.056782334384858045, "grad_norm": 0.45643875002861023, "learning_rate": 1.8920210224558052e-06, "loss": 0.4281, "step": 396 }, { "epoch": 0.05692572411815314, "grad_norm": 0.418647438287735, "learning_rate": 1.8967988533205924e-06, "loss": 0.4089, "step": 397 }, { "epoch": 0.057069113851448236, "grad_norm": 0.3991043269634247, "learning_rate": 1.90157668418538e-06, "loss": 0.4049, "step": 398 }, { "epoch": 0.057212503584743335, "grad_norm": 0.4037700891494751, "learning_rate": 1.9063545150501676e-06, "loss": 0.4021, "step": 399 }, { "epoch": 0.05735589331803843, "grad_norm": 0.45786890387535095, "learning_rate": 1.9111323459149545e-06, "loss": 0.4135, "step": 400 }, { "epoch": 0.057499283051333526, "grad_norm": 0.4014211893081665, "learning_rate": 1.9159101767797423e-06, "loss": 0.408, "step": 401 }, { "epoch": 0.05764267278462862, "grad_norm": 0.4350392520427704, "learning_rate": 1.9206880076445293e-06, "loss": 0.4198, "step": 402 }, { "epoch": 0.05778606251792372, "grad_norm": 0.41374728083610535, "learning_rate": 1.925465838509317e-06, "loss": 0.4044, "step": 403 }, { "epoch": 0.057929452251218816, "grad_norm": 0.4240091145038605, "learning_rate": 1.930243669374104e-06, "loss": 0.4279, "step": 404 }, { "epoch": 0.05807284198451391, "grad_norm": 0.39792096614837646, "learning_rate": 1.935021500238892e-06, "loss": 0.4358, "step": 405 }, { "epoch": 0.05821623171780901, "grad_norm": 0.4362461268901825, "learning_rate": 1.9397993311036792e-06, "loss": 0.4098, "step": 406 }, { "epoch": 0.0583596214511041, "grad_norm": 0.41836869716644287, "learning_rate": 1.9445771619684666e-06, "loss": 0.4114, "step": 407 }, { "epoch": 0.0585030111843992, "grad_norm": 0.41542482376098633, "learning_rate": 1.949354992833254e-06, "loss": 0.4241, "step": 408 }, { "epoch": 0.05864640091769429, "grad_norm": 0.4455432593822479, "learning_rate": 1.9541328236980413e-06, "loss": 0.4135, "step": 409 }, { "epoch": 0.05878979065098939, "grad_norm": 0.41773781180381775, "learning_rate": 1.9589106545628287e-06, "loss": 0.4393, "step": 410 }, { "epoch": 0.05893318038428449, "grad_norm": 0.39659908413887024, "learning_rate": 1.963688485427616e-06, "loss": 0.4056, "step": 411 }, { "epoch": 0.05907657011757958, "grad_norm": 0.43154019117355347, "learning_rate": 1.9684663162924035e-06, "loss": 0.416, "step": 412 }, { "epoch": 0.05921995985087468, "grad_norm": 0.4476464092731476, "learning_rate": 1.973244147157191e-06, "loss": 0.4093, "step": 413 }, { "epoch": 0.05936334958416977, "grad_norm": 0.44274675846099854, "learning_rate": 1.9780219780219782e-06, "loss": 0.4473, "step": 414 }, { "epoch": 0.05950673931746487, "grad_norm": 0.44476714730262756, "learning_rate": 1.9827998088867656e-06, "loss": 0.4413, "step": 415 }, { "epoch": 0.05965012905075997, "grad_norm": 0.4475274682044983, "learning_rate": 1.987577639751553e-06, "loss": 0.442, "step": 416 }, { "epoch": 0.05979351878405506, "grad_norm": 0.39417219161987305, "learning_rate": 1.9923554706163404e-06, "loss": 0.4149, "step": 417 }, { "epoch": 0.05993690851735016, "grad_norm": 0.4379570782184601, "learning_rate": 1.9971333014811277e-06, "loss": 0.4231, "step": 418 }, { "epoch": 0.06008029825064525, "grad_norm": 0.43737784028053284, "learning_rate": 2.001911132345915e-06, "loss": 0.4127, "step": 419 }, { "epoch": 0.06022368798394035, "grad_norm": 0.44695180654525757, "learning_rate": 2.0066889632107025e-06, "loss": 0.4013, "step": 420 }, { "epoch": 0.06036707771723544, "grad_norm": 0.38856741786003113, "learning_rate": 2.01146679407549e-06, "loss": 0.4279, "step": 421 }, { "epoch": 0.06051046745053054, "grad_norm": 0.44257909059524536, "learning_rate": 2.0162446249402772e-06, "loss": 0.4187, "step": 422 }, { "epoch": 0.06065385718382564, "grad_norm": 0.44098225235939026, "learning_rate": 2.0210224558050646e-06, "loss": 0.4328, "step": 423 }, { "epoch": 0.06079724691712073, "grad_norm": 0.4434906244277954, "learning_rate": 2.025800286669852e-06, "loss": 0.4134, "step": 424 }, { "epoch": 0.06094063665041583, "grad_norm": 0.423033207654953, "learning_rate": 2.0305781175346394e-06, "loss": 0.4123, "step": 425 }, { "epoch": 0.061084026383710924, "grad_norm": 0.42290300130844116, "learning_rate": 2.0353559483994268e-06, "loss": 0.436, "step": 426 }, { "epoch": 0.06122741611700602, "grad_norm": 0.4646404981613159, "learning_rate": 2.040133779264214e-06, "loss": 0.4408, "step": 427 }, { "epoch": 0.06137080585030112, "grad_norm": 0.4046838879585266, "learning_rate": 2.0449116101290015e-06, "loss": 0.4467, "step": 428 }, { "epoch": 0.061514195583596214, "grad_norm": 0.3881036043167114, "learning_rate": 2.049689440993789e-06, "loss": 0.4445, "step": 429 }, { "epoch": 0.06165758531689131, "grad_norm": 0.4732971787452698, "learning_rate": 2.0544672718585767e-06, "loss": 0.4694, "step": 430 }, { "epoch": 0.061800975050186405, "grad_norm": 0.4396432936191559, "learning_rate": 2.0592451027233636e-06, "loss": 0.4358, "step": 431 }, { "epoch": 0.061944364783481504, "grad_norm": 0.42229217290878296, "learning_rate": 2.064022933588151e-06, "loss": 0.452, "step": 432 }, { "epoch": 0.062087754516776596, "grad_norm": 0.41892439126968384, "learning_rate": 2.0688007644529384e-06, "loss": 0.4279, "step": 433 }, { "epoch": 0.062231144250071695, "grad_norm": 0.42503371834754944, "learning_rate": 2.0735785953177258e-06, "loss": 0.4262, "step": 434 }, { "epoch": 0.062374533983366794, "grad_norm": 0.4197878837585449, "learning_rate": 2.0783564261825136e-06, "loss": 0.4141, "step": 435 }, { "epoch": 0.06251792371666189, "grad_norm": 0.44894540309906006, "learning_rate": 2.0831342570473005e-06, "loss": 0.4041, "step": 436 }, { "epoch": 0.06266131344995698, "grad_norm": 0.518524706363678, "learning_rate": 2.0879120879120883e-06, "loss": 0.4206, "step": 437 }, { "epoch": 0.06280470318325208, "grad_norm": 0.4308348000049591, "learning_rate": 2.0926899187768753e-06, "loss": 0.4321, "step": 438 }, { "epoch": 0.06294809291654717, "grad_norm": 0.4009954631328583, "learning_rate": 2.097467749641663e-06, "loss": 0.4408, "step": 439 }, { "epoch": 0.06309148264984227, "grad_norm": 0.4058317542076111, "learning_rate": 2.10224558050645e-06, "loss": 0.416, "step": 440 }, { "epoch": 0.06323487238313737, "grad_norm": 0.4677073359489441, "learning_rate": 2.1070234113712374e-06, "loss": 0.4271, "step": 441 }, { "epoch": 0.06337826211643247, "grad_norm": 0.40982145071029663, "learning_rate": 2.111801242236025e-06, "loss": 0.4161, "step": 442 }, { "epoch": 0.06352165184972756, "grad_norm": 0.3956904709339142, "learning_rate": 2.116579073100812e-06, "loss": 0.4311, "step": 443 }, { "epoch": 0.06366504158302265, "grad_norm": 0.450579971075058, "learning_rate": 2.1213569039656e-06, "loss": 0.4051, "step": 444 }, { "epoch": 0.06380843131631775, "grad_norm": 0.38845548033714294, "learning_rate": 2.126134734830387e-06, "loss": 0.4233, "step": 445 }, { "epoch": 0.06395182104961285, "grad_norm": 0.44508248567581177, "learning_rate": 2.1309125656951747e-06, "loss": 0.4072, "step": 446 }, { "epoch": 0.06409521078290795, "grad_norm": 0.48271647095680237, "learning_rate": 2.135690396559962e-06, "loss": 0.4466, "step": 447 }, { "epoch": 0.06423860051620305, "grad_norm": 0.43437904119491577, "learning_rate": 2.1404682274247495e-06, "loss": 0.4145, "step": 448 }, { "epoch": 0.06438199024949813, "grad_norm": 0.41747409105300903, "learning_rate": 2.145246058289537e-06, "loss": 0.4155, "step": 449 }, { "epoch": 0.06452537998279323, "grad_norm": 0.3893345892429352, "learning_rate": 2.1500238891543242e-06, "loss": 0.4206, "step": 450 }, { "epoch": 0.06466876971608833, "grad_norm": 0.4247410297393799, "learning_rate": 2.1548017200191116e-06, "loss": 0.4213, "step": 451 }, { "epoch": 0.06481215944938343, "grad_norm": 0.38226476311683655, "learning_rate": 2.1595795508838985e-06, "loss": 0.4055, "step": 452 }, { "epoch": 0.06495554918267853, "grad_norm": 0.395211786031723, "learning_rate": 2.1643573817486863e-06, "loss": 0.4546, "step": 453 }, { "epoch": 0.06509893891597361, "grad_norm": 0.4189876317977905, "learning_rate": 2.1691352126134737e-06, "loss": 0.4283, "step": 454 }, { "epoch": 0.06524232864926871, "grad_norm": 0.4228185713291168, "learning_rate": 2.173913043478261e-06, "loss": 0.4419, "step": 455 }, { "epoch": 0.06538571838256381, "grad_norm": 0.4228114187717438, "learning_rate": 2.1786908743430485e-06, "loss": 0.4044, "step": 456 }, { "epoch": 0.06552910811585891, "grad_norm": 0.4126686453819275, "learning_rate": 2.183468705207836e-06, "loss": 0.4378, "step": 457 }, { "epoch": 0.06567249784915401, "grad_norm": 0.4204348921775818, "learning_rate": 2.1882465360726232e-06, "loss": 0.3841, "step": 458 }, { "epoch": 0.06581588758244909, "grad_norm": 0.4231877028942108, "learning_rate": 2.1930243669374106e-06, "loss": 0.4209, "step": 459 }, { "epoch": 0.06595927731574419, "grad_norm": 0.40147915482521057, "learning_rate": 2.197802197802198e-06, "loss": 0.4296, "step": 460 }, { "epoch": 0.06610266704903929, "grad_norm": 0.41045770049095154, "learning_rate": 2.2025800286669854e-06, "loss": 0.401, "step": 461 }, { "epoch": 0.06624605678233439, "grad_norm": 0.4383613169193268, "learning_rate": 2.2073578595317727e-06, "loss": 0.4184, "step": 462 }, { "epoch": 0.06638944651562947, "grad_norm": 0.4145072400569916, "learning_rate": 2.21213569039656e-06, "loss": 0.4119, "step": 463 }, { "epoch": 0.06653283624892457, "grad_norm": 0.3849464952945709, "learning_rate": 2.2169135212613475e-06, "loss": 0.4091, "step": 464 }, { "epoch": 0.06667622598221967, "grad_norm": 0.40296173095703125, "learning_rate": 2.221691352126135e-06, "loss": 0.4129, "step": 465 }, { "epoch": 0.06681961571551477, "grad_norm": 0.4523398280143738, "learning_rate": 2.2264691829909222e-06, "loss": 0.4278, "step": 466 }, { "epoch": 0.06696300544880987, "grad_norm": 0.41321316361427307, "learning_rate": 2.2312470138557096e-06, "loss": 0.427, "step": 467 }, { "epoch": 0.06710639518210496, "grad_norm": 0.43052178621292114, "learning_rate": 2.236024844720497e-06, "loss": 0.4079, "step": 468 }, { "epoch": 0.06724978491540005, "grad_norm": 0.3738011121749878, "learning_rate": 2.2408026755852844e-06, "loss": 0.4203, "step": 469 }, { "epoch": 0.06739317464869515, "grad_norm": 0.4366662800312042, "learning_rate": 2.2455805064500718e-06, "loss": 0.4132, "step": 470 }, { "epoch": 0.06753656438199025, "grad_norm": 0.40574538707733154, "learning_rate": 2.2503583373148596e-06, "loss": 0.43, "step": 471 }, { "epoch": 0.06767995411528535, "grad_norm": 0.39714065194129944, "learning_rate": 2.2551361681796465e-06, "loss": 0.4444, "step": 472 }, { "epoch": 0.06782334384858044, "grad_norm": 0.4052039682865143, "learning_rate": 2.259913999044434e-06, "loss": 0.4178, "step": 473 }, { "epoch": 0.06796673358187554, "grad_norm": 0.40494081377983093, "learning_rate": 2.2646918299092213e-06, "loss": 0.4059, "step": 474 }, { "epoch": 0.06811012331517063, "grad_norm": 0.44805672764778137, "learning_rate": 2.2694696607740086e-06, "loss": 0.4285, "step": 475 }, { "epoch": 0.06825351304846573, "grad_norm": 0.3917527496814728, "learning_rate": 2.274247491638796e-06, "loss": 0.4175, "step": 476 }, { "epoch": 0.06839690278176083, "grad_norm": 0.45071274042129517, "learning_rate": 2.2790253225035834e-06, "loss": 0.4285, "step": 477 }, { "epoch": 0.06854029251505592, "grad_norm": 0.41474685072898865, "learning_rate": 2.283803153368371e-06, "loss": 0.4299, "step": 478 }, { "epoch": 0.06868368224835102, "grad_norm": 0.4518735110759735, "learning_rate": 2.288580984233158e-06, "loss": 0.4131, "step": 479 }, { "epoch": 0.06882707198164612, "grad_norm": 0.41988855600357056, "learning_rate": 2.293358815097946e-06, "loss": 0.4218, "step": 480 }, { "epoch": 0.06897046171494121, "grad_norm": 0.3596690595149994, "learning_rate": 2.298136645962733e-06, "loss": 0.4109, "step": 481 }, { "epoch": 0.06911385144823631, "grad_norm": 0.4425627291202545, "learning_rate": 2.3029144768275207e-06, "loss": 0.4123, "step": 482 }, { "epoch": 0.0692572411815314, "grad_norm": 0.44860756397247314, "learning_rate": 2.307692307692308e-06, "loss": 0.4279, "step": 483 }, { "epoch": 0.0694006309148265, "grad_norm": 0.41639256477355957, "learning_rate": 2.312470138557095e-06, "loss": 0.4298, "step": 484 }, { "epoch": 0.0695440206481216, "grad_norm": 0.42664507031440735, "learning_rate": 2.317247969421883e-06, "loss": 0.4166, "step": 485 }, { "epoch": 0.0696874103814167, "grad_norm": 0.46340757608413696, "learning_rate": 2.3220258002866698e-06, "loss": 0.4278, "step": 486 }, { "epoch": 0.06983080011471178, "grad_norm": 0.48882848024368286, "learning_rate": 2.3268036311514576e-06, "loss": 0.4233, "step": 487 }, { "epoch": 0.06997418984800688, "grad_norm": 0.5195527672767639, "learning_rate": 2.3315814620162445e-06, "loss": 0.3962, "step": 488 }, { "epoch": 0.07011757958130198, "grad_norm": 0.4009092152118683, "learning_rate": 2.3363592928810323e-06, "loss": 0.4288, "step": 489 }, { "epoch": 0.07026096931459708, "grad_norm": 0.45002293586730957, "learning_rate": 2.3411371237458197e-06, "loss": 0.4195, "step": 490 }, { "epoch": 0.07040435904789218, "grad_norm": 0.40892815589904785, "learning_rate": 2.345914954610607e-06, "loss": 0.4403, "step": 491 }, { "epoch": 0.07054774878118726, "grad_norm": 0.4428500533103943, "learning_rate": 2.3506927854753945e-06, "loss": 0.4115, "step": 492 }, { "epoch": 0.07069113851448236, "grad_norm": 0.45920243859291077, "learning_rate": 2.355470616340182e-06, "loss": 0.4233, "step": 493 }, { "epoch": 0.07083452824777746, "grad_norm": 0.4332737624645233, "learning_rate": 2.3602484472049692e-06, "loss": 0.4301, "step": 494 }, { "epoch": 0.07097791798107256, "grad_norm": 0.4351244568824768, "learning_rate": 2.3650262780697566e-06, "loss": 0.416, "step": 495 }, { "epoch": 0.07112130771436766, "grad_norm": 0.38797521591186523, "learning_rate": 2.369804108934544e-06, "loss": 0.4434, "step": 496 }, { "epoch": 0.07126469744766274, "grad_norm": 0.4650706648826599, "learning_rate": 2.3745819397993314e-06, "loss": 0.4087, "step": 497 }, { "epoch": 0.07140808718095784, "grad_norm": 0.512516975402832, "learning_rate": 2.3793597706641187e-06, "loss": 0.431, "step": 498 }, { "epoch": 0.07155147691425294, "grad_norm": 0.4032112658023834, "learning_rate": 2.384137601528906e-06, "loss": 0.4146, "step": 499 }, { "epoch": 0.07169486664754804, "grad_norm": 0.38711443543434143, "learning_rate": 2.3889154323936935e-06, "loss": 0.4234, "step": 500 }, { "epoch": 0.07183825638084314, "grad_norm": 0.43057963252067566, "learning_rate": 2.393693263258481e-06, "loss": 0.4141, "step": 501 }, { "epoch": 0.07198164611413822, "grad_norm": 0.4497792422771454, "learning_rate": 2.3984710941232682e-06, "loss": 0.3944, "step": 502 }, { "epoch": 0.07212503584743332, "grad_norm": 0.4256892800331116, "learning_rate": 2.4032489249880556e-06, "loss": 0.4015, "step": 503 }, { "epoch": 0.07226842558072842, "grad_norm": 0.44316554069519043, "learning_rate": 2.408026755852843e-06, "loss": 0.408, "step": 504 }, { "epoch": 0.07241181531402352, "grad_norm": 0.45546749234199524, "learning_rate": 2.4128045867176304e-06, "loss": 0.4471, "step": 505 }, { "epoch": 0.07255520504731862, "grad_norm": 0.4576176702976227, "learning_rate": 2.4175824175824177e-06, "loss": 0.4308, "step": 506 }, { "epoch": 0.0726985947806137, "grad_norm": 0.43147575855255127, "learning_rate": 2.422360248447205e-06, "loss": 0.4393, "step": 507 }, { "epoch": 0.0728419845139088, "grad_norm": 0.44858717918395996, "learning_rate": 2.4271380793119925e-06, "loss": 0.4306, "step": 508 }, { "epoch": 0.0729853742472039, "grad_norm": 0.3845183253288269, "learning_rate": 2.43191591017678e-06, "loss": 0.4021, "step": 509 }, { "epoch": 0.073128763980499, "grad_norm": 0.4160864055156708, "learning_rate": 2.4366937410415673e-06, "loss": 0.4034, "step": 510 }, { "epoch": 0.07327215371379409, "grad_norm": 0.5189532041549683, "learning_rate": 2.4414715719063546e-06, "loss": 0.4104, "step": 511 }, { "epoch": 0.07341554344708918, "grad_norm": 0.4292583167552948, "learning_rate": 2.446249402771142e-06, "loss": 0.4058, "step": 512 }, { "epoch": 0.07355893318038428, "grad_norm": 0.4218714237213135, "learning_rate": 2.4510272336359294e-06, "loss": 0.415, "step": 513 }, { "epoch": 0.07370232291367938, "grad_norm": 0.47920626401901245, "learning_rate": 2.455805064500717e-06, "loss": 0.455, "step": 514 }, { "epoch": 0.07384571264697448, "grad_norm": 0.4262942969799042, "learning_rate": 2.460582895365504e-06, "loss": 0.3977, "step": 515 }, { "epoch": 0.07398910238026957, "grad_norm": 0.4554016888141632, "learning_rate": 2.4653607262302915e-06, "loss": 0.4214, "step": 516 }, { "epoch": 0.07413249211356467, "grad_norm": 0.4300241768360138, "learning_rate": 2.470138557095079e-06, "loss": 0.4227, "step": 517 }, { "epoch": 0.07427588184685976, "grad_norm": 0.4159088134765625, "learning_rate": 2.4749163879598663e-06, "loss": 0.4203, "step": 518 }, { "epoch": 0.07441927158015486, "grad_norm": 0.4203924834728241, "learning_rate": 2.479694218824654e-06, "loss": 0.4244, "step": 519 }, { "epoch": 0.07456266131344996, "grad_norm": 0.4518815875053406, "learning_rate": 2.484472049689441e-06, "loss": 0.4116, "step": 520 }, { "epoch": 0.07470605104674505, "grad_norm": 0.41244634985923767, "learning_rate": 2.489249880554229e-06, "loss": 0.4374, "step": 521 }, { "epoch": 0.07484944078004015, "grad_norm": 0.4124402403831482, "learning_rate": 2.4940277114190158e-06, "loss": 0.4036, "step": 522 }, { "epoch": 0.07499283051333525, "grad_norm": 0.46547138690948486, "learning_rate": 2.4988055422838036e-06, "loss": 0.4322, "step": 523 }, { "epoch": 0.07513622024663034, "grad_norm": 0.40864700078964233, "learning_rate": 2.503583373148591e-06, "loss": 0.4071, "step": 524 }, { "epoch": 0.07527960997992544, "grad_norm": 0.417971134185791, "learning_rate": 2.5083612040133783e-06, "loss": 0.4128, "step": 525 }, { "epoch": 0.07542299971322053, "grad_norm": 0.4228958785533905, "learning_rate": 2.5131390348781653e-06, "loss": 0.4277, "step": 526 }, { "epoch": 0.07556638944651563, "grad_norm": 0.3820919096469879, "learning_rate": 2.517916865742953e-06, "loss": 0.4482, "step": 527 }, { "epoch": 0.07570977917981073, "grad_norm": 0.4200609624385834, "learning_rate": 2.5226946966077405e-06, "loss": 0.4292, "step": 528 }, { "epoch": 0.07585316891310583, "grad_norm": 0.4656864404678345, "learning_rate": 2.5274725274725274e-06, "loss": 0.4154, "step": 529 }, { "epoch": 0.07599655864640092, "grad_norm": 0.4206996262073517, "learning_rate": 2.5322503583373148e-06, "loss": 0.4343, "step": 530 }, { "epoch": 0.07613994837969601, "grad_norm": 0.4333719313144684, "learning_rate": 2.5370281892021026e-06, "loss": 0.406, "step": 531 }, { "epoch": 0.07628333811299111, "grad_norm": 0.4823293387889862, "learning_rate": 2.54180602006689e-06, "loss": 0.4134, "step": 532 }, { "epoch": 0.07642672784628621, "grad_norm": 0.4119727611541748, "learning_rate": 2.546583850931677e-06, "loss": 0.4223, "step": 533 }, { "epoch": 0.0765701175795813, "grad_norm": 0.3915899395942688, "learning_rate": 2.5513616817964647e-06, "loss": 0.4118, "step": 534 }, { "epoch": 0.07671350731287639, "grad_norm": 0.4477192759513855, "learning_rate": 2.556139512661252e-06, "loss": 0.4015, "step": 535 }, { "epoch": 0.07685689704617149, "grad_norm": 0.42800256609916687, "learning_rate": 2.5609173435260395e-06, "loss": 0.4192, "step": 536 }, { "epoch": 0.07700028677946659, "grad_norm": 0.43287456035614014, "learning_rate": 2.565695174390827e-06, "loss": 0.4258, "step": 537 }, { "epoch": 0.07714367651276169, "grad_norm": 0.4318499267101288, "learning_rate": 2.5704730052556142e-06, "loss": 0.4265, "step": 538 }, { "epoch": 0.07728706624605679, "grad_norm": 0.4842379093170166, "learning_rate": 2.5752508361204016e-06, "loss": 0.4303, "step": 539 }, { "epoch": 0.07743045597935187, "grad_norm": 0.43355792760849, "learning_rate": 2.5800286669851886e-06, "loss": 0.4094, "step": 540 }, { "epoch": 0.07757384571264697, "grad_norm": 0.39383140206336975, "learning_rate": 2.5848064978499764e-06, "loss": 0.4067, "step": 541 }, { "epoch": 0.07771723544594207, "grad_norm": 0.40863391757011414, "learning_rate": 2.5895843287147637e-06, "loss": 0.3939, "step": 542 }, { "epoch": 0.07786062517923717, "grad_norm": 0.39390134811401367, "learning_rate": 2.594362159579551e-06, "loss": 0.4113, "step": 543 }, { "epoch": 0.07800401491253227, "grad_norm": 0.44182297587394714, "learning_rate": 2.599139990444339e-06, "loss": 0.4268, "step": 544 }, { "epoch": 0.07814740464582735, "grad_norm": 0.4552352726459503, "learning_rate": 2.603917821309126e-06, "loss": 0.4282, "step": 545 }, { "epoch": 0.07829079437912245, "grad_norm": 0.46876299381256104, "learning_rate": 2.6086956521739132e-06, "loss": 0.4179, "step": 546 }, { "epoch": 0.07843418411241755, "grad_norm": 0.4158574044704437, "learning_rate": 2.6134734830387006e-06, "loss": 0.4031, "step": 547 }, { "epoch": 0.07857757384571265, "grad_norm": 0.42124736309051514, "learning_rate": 2.618251313903488e-06, "loss": 0.41, "step": 548 }, { "epoch": 0.07872096357900775, "grad_norm": 0.42534536123275757, "learning_rate": 2.6230291447682754e-06, "loss": 0.4204, "step": 549 }, { "epoch": 0.07886435331230283, "grad_norm": 0.39944928884506226, "learning_rate": 2.6278069756330627e-06, "loss": 0.4139, "step": 550 }, { "epoch": 0.07900774304559793, "grad_norm": 0.44766965508461, "learning_rate": 2.6325848064978505e-06, "loss": 0.4568, "step": 551 }, { "epoch": 0.07915113277889303, "grad_norm": 0.4391150176525116, "learning_rate": 2.6373626373626375e-06, "loss": 0.4202, "step": 552 }, { "epoch": 0.07929452251218813, "grad_norm": 0.458577036857605, "learning_rate": 2.642140468227425e-06, "loss": 0.4152, "step": 553 }, { "epoch": 0.07943791224548322, "grad_norm": 0.4055730402469635, "learning_rate": 2.6469182990922123e-06, "loss": 0.416, "step": 554 }, { "epoch": 0.07958130197877832, "grad_norm": 0.49307048320770264, "learning_rate": 2.651696129957e-06, "loss": 0.4209, "step": 555 }, { "epoch": 0.07972469171207341, "grad_norm": 0.4455513656139374, "learning_rate": 2.656473960821787e-06, "loss": 0.3929, "step": 556 }, { "epoch": 0.07986808144536851, "grad_norm": 0.4032212793827057, "learning_rate": 2.6612517916865744e-06, "loss": 0.4052, "step": 557 }, { "epoch": 0.08001147117866361, "grad_norm": 0.39877480268478394, "learning_rate": 2.666029622551362e-06, "loss": 0.3941, "step": 558 }, { "epoch": 0.0801548609119587, "grad_norm": 0.41375768184661865, "learning_rate": 2.670807453416149e-06, "loss": 0.4229, "step": 559 }, { "epoch": 0.0802982506452538, "grad_norm": 0.4340943992137909, "learning_rate": 2.6755852842809365e-06, "loss": 0.4348, "step": 560 }, { "epoch": 0.0804416403785489, "grad_norm": 0.4836515486240387, "learning_rate": 2.6803631151457243e-06, "loss": 0.4019, "step": 561 }, { "epoch": 0.080585030111844, "grad_norm": 0.3684757947921753, "learning_rate": 2.6851409460105117e-06, "loss": 0.4185, "step": 562 }, { "epoch": 0.0807284198451391, "grad_norm": 0.42034488916397095, "learning_rate": 2.6899187768752986e-06, "loss": 0.405, "step": 563 }, { "epoch": 0.08087180957843418, "grad_norm": 0.4582155644893646, "learning_rate": 2.694696607740086e-06, "loss": 0.4186, "step": 564 }, { "epoch": 0.08101519931172928, "grad_norm": 0.4493630528450012, "learning_rate": 2.699474438604874e-06, "loss": 0.4259, "step": 565 }, { "epoch": 0.08115858904502438, "grad_norm": 0.4363219141960144, "learning_rate": 2.704252269469661e-06, "loss": 0.4188, "step": 566 }, { "epoch": 0.08130197877831948, "grad_norm": 0.42728203535079956, "learning_rate": 2.709030100334448e-06, "loss": 0.4202, "step": 567 }, { "epoch": 0.08144536851161457, "grad_norm": 0.41654646396636963, "learning_rate": 2.713807931199236e-06, "loss": 0.4103, "step": 568 }, { "epoch": 0.08158875824490966, "grad_norm": 0.3892410397529602, "learning_rate": 2.7185857620640233e-06, "loss": 0.3951, "step": 569 }, { "epoch": 0.08173214797820476, "grad_norm": 0.4003753960132599, "learning_rate": 2.7233635929288103e-06, "loss": 0.3927, "step": 570 }, { "epoch": 0.08187553771149986, "grad_norm": 0.44716495275497437, "learning_rate": 2.7281414237935977e-06, "loss": 0.4011, "step": 571 }, { "epoch": 0.08201892744479496, "grad_norm": 0.47411924600601196, "learning_rate": 2.7329192546583855e-06, "loss": 0.418, "step": 572 }, { "epoch": 0.08216231717809006, "grad_norm": 0.41892051696777344, "learning_rate": 2.737697085523173e-06, "loss": 0.417, "step": 573 }, { "epoch": 0.08230570691138514, "grad_norm": 0.41015905141830444, "learning_rate": 2.74247491638796e-06, "loss": 0.3971, "step": 574 }, { "epoch": 0.08244909664468024, "grad_norm": 0.42465639114379883, "learning_rate": 2.7472527472527476e-06, "loss": 0.4182, "step": 575 }, { "epoch": 0.08259248637797534, "grad_norm": 0.40051981806755066, "learning_rate": 2.752030578117535e-06, "loss": 0.4145, "step": 576 }, { "epoch": 0.08273587611127044, "grad_norm": 0.4416466951370239, "learning_rate": 2.7568084089823223e-06, "loss": 0.4194, "step": 577 }, { "epoch": 0.08287926584456552, "grad_norm": 0.4188959300518036, "learning_rate": 2.7615862398471093e-06, "loss": 0.4102, "step": 578 }, { "epoch": 0.08302265557786062, "grad_norm": 0.4086974859237671, "learning_rate": 2.766364070711897e-06, "loss": 0.416, "step": 579 }, { "epoch": 0.08316604531115572, "grad_norm": 0.4410012364387512, "learning_rate": 2.7711419015766845e-06, "loss": 0.4107, "step": 580 }, { "epoch": 0.08330943504445082, "grad_norm": 0.45567649602890015, "learning_rate": 2.7759197324414714e-06, "loss": 0.4072, "step": 581 }, { "epoch": 0.08345282477774592, "grad_norm": 0.43852946162223816, "learning_rate": 2.7806975633062592e-06, "loss": 0.3775, "step": 582 }, { "epoch": 0.083596214511041, "grad_norm": 0.4992862641811371, "learning_rate": 2.7854753941710466e-06, "loss": 0.4325, "step": 583 }, { "epoch": 0.0837396042443361, "grad_norm": 0.44361019134521484, "learning_rate": 2.790253225035834e-06, "loss": 0.4277, "step": 584 }, { "epoch": 0.0838829939776312, "grad_norm": 0.48667627573013306, "learning_rate": 2.795031055900621e-06, "loss": 0.408, "step": 585 }, { "epoch": 0.0840263837109263, "grad_norm": 0.3970353603363037, "learning_rate": 2.7998088867654087e-06, "loss": 0.4139, "step": 586 }, { "epoch": 0.0841697734442214, "grad_norm": 0.4742043912410736, "learning_rate": 2.804586717630196e-06, "loss": 0.3989, "step": 587 }, { "epoch": 0.08431316317751648, "grad_norm": 0.46628984808921814, "learning_rate": 2.8093645484949835e-06, "loss": 0.398, "step": 588 }, { "epoch": 0.08445655291081158, "grad_norm": 0.46139729022979736, "learning_rate": 2.8141423793597713e-06, "loss": 0.4106, "step": 589 }, { "epoch": 0.08459994264410668, "grad_norm": 0.42371413111686707, "learning_rate": 2.8189202102245582e-06, "loss": 0.4016, "step": 590 }, { "epoch": 0.08474333237740178, "grad_norm": 0.4515853226184845, "learning_rate": 2.8236980410893456e-06, "loss": 0.4274, "step": 591 }, { "epoch": 0.08488672211069688, "grad_norm": 0.45756131410598755, "learning_rate": 2.8284758719541334e-06, "loss": 0.4407, "step": 592 }, { "epoch": 0.08503011184399197, "grad_norm": 0.5069389343261719, "learning_rate": 2.8332537028189204e-06, "loss": 0.4105, "step": 593 }, { "epoch": 0.08517350157728706, "grad_norm": 0.4132387638092041, "learning_rate": 2.8380315336837077e-06, "loss": 0.4116, "step": 594 }, { "epoch": 0.08531689131058216, "grad_norm": 0.41934841871261597, "learning_rate": 2.842809364548495e-06, "loss": 0.396, "step": 595 }, { "epoch": 0.08546028104387726, "grad_norm": 0.4583408236503601, "learning_rate": 2.847587195413283e-06, "loss": 0.4092, "step": 596 }, { "epoch": 0.08560367077717236, "grad_norm": 0.4437278211116791, "learning_rate": 2.85236502627807e-06, "loss": 0.4081, "step": 597 }, { "epoch": 0.08574706051046745, "grad_norm": 0.42886775732040405, "learning_rate": 2.8571428571428573e-06, "loss": 0.4046, "step": 598 }, { "epoch": 0.08589045024376255, "grad_norm": 0.49595576524734497, "learning_rate": 2.861920688007645e-06, "loss": 0.4396, "step": 599 }, { "epoch": 0.08603383997705764, "grad_norm": 0.4697168469429016, "learning_rate": 2.8666985188724324e-06, "loss": 0.4066, "step": 600 }, { "epoch": 0.08617722971035274, "grad_norm": 0.4191589653491974, "learning_rate": 2.8714763497372194e-06, "loss": 0.3842, "step": 601 }, { "epoch": 0.08632061944364783, "grad_norm": 0.43608611822128296, "learning_rate": 2.8762541806020068e-06, "loss": 0.4019, "step": 602 }, { "epoch": 0.08646400917694293, "grad_norm": 0.4432092607021332, "learning_rate": 2.8810320114667946e-06, "loss": 0.413, "step": 603 }, { "epoch": 0.08660739891023803, "grad_norm": 0.4279719889163971, "learning_rate": 2.8858098423315815e-06, "loss": 0.4186, "step": 604 }, { "epoch": 0.08675078864353312, "grad_norm": 0.48219409584999084, "learning_rate": 2.890587673196369e-06, "loss": 0.4071, "step": 605 }, { "epoch": 0.08689417837682822, "grad_norm": 0.49527212977409363, "learning_rate": 2.8953655040611567e-06, "loss": 0.4196, "step": 606 }, { "epoch": 0.08703756811012331, "grad_norm": 0.4107043147087097, "learning_rate": 2.900143334925944e-06, "loss": 0.4079, "step": 607 }, { "epoch": 0.08718095784341841, "grad_norm": 0.42692166566848755, "learning_rate": 2.904921165790731e-06, "loss": 0.4329, "step": 608 }, { "epoch": 0.0873243475767135, "grad_norm": 0.40613120794296265, "learning_rate": 2.9096989966555184e-06, "loss": 0.3763, "step": 609 }, { "epoch": 0.0874677373100086, "grad_norm": 0.41631942987442017, "learning_rate": 2.914476827520306e-06, "loss": 0.402, "step": 610 }, { "epoch": 0.0876111270433037, "grad_norm": 0.47599563002586365, "learning_rate": 2.919254658385093e-06, "loss": 0.4298, "step": 611 }, { "epoch": 0.08775451677659879, "grad_norm": 0.4580989480018616, "learning_rate": 2.9240324892498805e-06, "loss": 0.4004, "step": 612 }, { "epoch": 0.08789790650989389, "grad_norm": 0.5037564039230347, "learning_rate": 2.9288103201146683e-06, "loss": 0.4237, "step": 613 }, { "epoch": 0.08804129624318899, "grad_norm": 0.40711531043052673, "learning_rate": 2.9335881509794557e-06, "loss": 0.3921, "step": 614 }, { "epoch": 0.08818468597648409, "grad_norm": 0.49606284499168396, "learning_rate": 2.9383659818442427e-06, "loss": 0.4174, "step": 615 }, { "epoch": 0.08832807570977919, "grad_norm": 0.4314693808555603, "learning_rate": 2.9431438127090305e-06, "loss": 0.4098, "step": 616 }, { "epoch": 0.08847146544307427, "grad_norm": 0.4651407301425934, "learning_rate": 2.947921643573818e-06, "loss": 0.4199, "step": 617 }, { "epoch": 0.08861485517636937, "grad_norm": 0.4656417965888977, "learning_rate": 2.9526994744386052e-06, "loss": 0.3932, "step": 618 }, { "epoch": 0.08875824490966447, "grad_norm": 0.4246296286582947, "learning_rate": 2.957477305303392e-06, "loss": 0.4221, "step": 619 }, { "epoch": 0.08890163464295957, "grad_norm": 0.4665874242782593, "learning_rate": 2.96225513616818e-06, "loss": 0.4237, "step": 620 }, { "epoch": 0.08904502437625467, "grad_norm": 0.5468493700027466, "learning_rate": 2.9670329670329673e-06, "loss": 0.4439, "step": 621 }, { "epoch": 0.08918841410954975, "grad_norm": 0.4400111436843872, "learning_rate": 2.9718107978977543e-06, "loss": 0.3758, "step": 622 }, { "epoch": 0.08933180384284485, "grad_norm": 0.4071016311645508, "learning_rate": 2.976588628762542e-06, "loss": 0.4105, "step": 623 }, { "epoch": 0.08947519357613995, "grad_norm": 0.44660863280296326, "learning_rate": 2.9813664596273295e-06, "loss": 0.4041, "step": 624 }, { "epoch": 0.08961858330943505, "grad_norm": 0.5024822950363159, "learning_rate": 2.986144290492117e-06, "loss": 0.4325, "step": 625 }, { "epoch": 0.08976197304273013, "grad_norm": 0.38979029655456543, "learning_rate": 2.990922121356904e-06, "loss": 0.409, "step": 626 }, { "epoch": 0.08990536277602523, "grad_norm": 0.4494381546974182, "learning_rate": 2.9956999522216916e-06, "loss": 0.4334, "step": 627 }, { "epoch": 0.09004875250932033, "grad_norm": 0.4176464378833771, "learning_rate": 3.000477783086479e-06, "loss": 0.4012, "step": 628 }, { "epoch": 0.09019214224261543, "grad_norm": 0.4656321406364441, "learning_rate": 3.0052556139512664e-06, "loss": 0.3833, "step": 629 }, { "epoch": 0.09033553197591053, "grad_norm": 0.4820505678653717, "learning_rate": 3.010033444816054e-06, "loss": 0.4339, "step": 630 }, { "epoch": 0.09047892170920561, "grad_norm": 0.4271308183670044, "learning_rate": 3.014811275680841e-06, "loss": 0.4325, "step": 631 }, { "epoch": 0.09062231144250071, "grad_norm": 0.4280604124069214, "learning_rate": 3.0195891065456285e-06, "loss": 0.4158, "step": 632 }, { "epoch": 0.09076570117579581, "grad_norm": 0.448805034160614, "learning_rate": 3.0243669374104154e-06, "loss": 0.4325, "step": 633 }, { "epoch": 0.09090909090909091, "grad_norm": 0.417185515165329, "learning_rate": 3.0291447682752032e-06, "loss": 0.4106, "step": 634 }, { "epoch": 0.09105248064238601, "grad_norm": 0.46786636114120483, "learning_rate": 3.0339225991399906e-06, "loss": 0.4345, "step": 635 }, { "epoch": 0.0911958703756811, "grad_norm": 0.4068709909915924, "learning_rate": 3.038700430004778e-06, "loss": 0.4123, "step": 636 }, { "epoch": 0.0913392601089762, "grad_norm": 0.4252001941204071, "learning_rate": 3.043478260869566e-06, "loss": 0.3916, "step": 637 }, { "epoch": 0.0914826498422713, "grad_norm": 0.44264039397239685, "learning_rate": 3.0482560917343528e-06, "loss": 0.4284, "step": 638 }, { "epoch": 0.09162603957556639, "grad_norm": 0.39616289734840393, "learning_rate": 3.05303392259914e-06, "loss": 0.3953, "step": 639 }, { "epoch": 0.09176942930886149, "grad_norm": 0.4659471809864044, "learning_rate": 3.057811753463928e-06, "loss": 0.4145, "step": 640 }, { "epoch": 0.09191281904215658, "grad_norm": 0.4131932854652405, "learning_rate": 3.0625895843287153e-06, "loss": 0.3987, "step": 641 }, { "epoch": 0.09205620877545168, "grad_norm": 0.4444846212863922, "learning_rate": 3.0673674151935023e-06, "loss": 0.4277, "step": 642 }, { "epoch": 0.09219959850874677, "grad_norm": 0.4252930283546448, "learning_rate": 3.0721452460582896e-06, "loss": 0.4292, "step": 643 }, { "epoch": 0.09234298824204187, "grad_norm": 0.3985830545425415, "learning_rate": 3.0769230769230774e-06, "loss": 0.4248, "step": 644 }, { "epoch": 0.09248637797533697, "grad_norm": 0.3890540301799774, "learning_rate": 3.0817009077878644e-06, "loss": 0.3815, "step": 645 }, { "epoch": 0.09262976770863206, "grad_norm": 0.4266330897808075, "learning_rate": 3.0864787386526518e-06, "loss": 0.4134, "step": 646 }, { "epoch": 0.09277315744192716, "grad_norm": 0.42544111609458923, "learning_rate": 3.0912565695174396e-06, "loss": 0.4053, "step": 647 }, { "epoch": 0.09291654717522226, "grad_norm": 0.4102191627025604, "learning_rate": 3.096034400382227e-06, "loss": 0.4063, "step": 648 }, { "epoch": 0.09305993690851735, "grad_norm": 0.3919582664966583, "learning_rate": 3.100812231247014e-06, "loss": 0.4039, "step": 649 }, { "epoch": 0.09320332664181244, "grad_norm": 0.4161120355129242, "learning_rate": 3.1055900621118013e-06, "loss": 0.4048, "step": 650 }, { "epoch": 0.09334671637510754, "grad_norm": 0.4080949127674103, "learning_rate": 3.110367892976589e-06, "loss": 0.4158, "step": 651 }, { "epoch": 0.09349010610840264, "grad_norm": 0.42725878953933716, "learning_rate": 3.1151457238413764e-06, "loss": 0.4476, "step": 652 }, { "epoch": 0.09363349584169774, "grad_norm": 0.4240025579929352, "learning_rate": 3.1199235547061634e-06, "loss": 0.432, "step": 653 }, { "epoch": 0.09377688557499284, "grad_norm": 0.4937463104724884, "learning_rate": 3.124701385570951e-06, "loss": 0.4146, "step": 654 }, { "epoch": 0.09392027530828792, "grad_norm": 0.4261866509914398, "learning_rate": 3.1294792164357386e-06, "loss": 0.4029, "step": 655 }, { "epoch": 0.09406366504158302, "grad_norm": 0.45206791162490845, "learning_rate": 3.1342570473005255e-06, "loss": 0.3936, "step": 656 }, { "epoch": 0.09420705477487812, "grad_norm": 0.4456377327442169, "learning_rate": 3.139034878165313e-06, "loss": 0.3933, "step": 657 }, { "epoch": 0.09435044450817322, "grad_norm": 0.4239141345024109, "learning_rate": 3.1438127090301007e-06, "loss": 0.4159, "step": 658 }, { "epoch": 0.09449383424146832, "grad_norm": 0.46134787797927856, "learning_rate": 3.148590539894888e-06, "loss": 0.424, "step": 659 }, { "epoch": 0.0946372239747634, "grad_norm": 0.43277043104171753, "learning_rate": 3.153368370759675e-06, "loss": 0.4265, "step": 660 }, { "epoch": 0.0947806137080585, "grad_norm": 0.4231414198875427, "learning_rate": 3.158146201624463e-06, "loss": 0.426, "step": 661 }, { "epoch": 0.0949240034413536, "grad_norm": 0.4871426224708557, "learning_rate": 3.1629240324892502e-06, "loss": 0.4412, "step": 662 }, { "epoch": 0.0950673931746487, "grad_norm": 0.4350969195365906, "learning_rate": 3.1677018633540376e-06, "loss": 0.3928, "step": 663 }, { "epoch": 0.0952107829079438, "grad_norm": 0.43367066979408264, "learning_rate": 3.172479694218825e-06, "loss": 0.3967, "step": 664 }, { "epoch": 0.09535417264123888, "grad_norm": 0.48286890983581543, "learning_rate": 3.1772575250836123e-06, "loss": 0.3987, "step": 665 }, { "epoch": 0.09549756237453398, "grad_norm": 0.4420829117298126, "learning_rate": 3.1820353559483997e-06, "loss": 0.4268, "step": 666 }, { "epoch": 0.09564095210782908, "grad_norm": 0.39967936277389526, "learning_rate": 3.1868131868131867e-06, "loss": 0.4188, "step": 667 }, { "epoch": 0.09578434184112418, "grad_norm": 0.458270788192749, "learning_rate": 3.1915910176779745e-06, "loss": 0.4024, "step": 668 }, { "epoch": 0.09592773157441928, "grad_norm": 0.4376903772354126, "learning_rate": 3.196368848542762e-06, "loss": 0.4116, "step": 669 }, { "epoch": 0.09607112130771436, "grad_norm": 0.397243857383728, "learning_rate": 3.2011466794075492e-06, "loss": 0.4157, "step": 670 }, { "epoch": 0.09621451104100946, "grad_norm": 0.41517409682273865, "learning_rate": 3.205924510272337e-06, "loss": 0.4143, "step": 671 }, { "epoch": 0.09635790077430456, "grad_norm": 0.43626296520233154, "learning_rate": 3.210702341137124e-06, "loss": 0.4309, "step": 672 }, { "epoch": 0.09650129050759966, "grad_norm": 0.42889535427093506, "learning_rate": 3.2154801720019114e-06, "loss": 0.4065, "step": 673 }, { "epoch": 0.09664468024089475, "grad_norm": 0.4467771053314209, "learning_rate": 3.2202580028666987e-06, "loss": 0.4065, "step": 674 }, { "epoch": 0.09678806997418984, "grad_norm": 0.4206111431121826, "learning_rate": 3.225035833731486e-06, "loss": 0.3881, "step": 675 }, { "epoch": 0.09693145970748494, "grad_norm": 0.4152800142765045, "learning_rate": 3.2298136645962735e-06, "loss": 0.4055, "step": 676 }, { "epoch": 0.09707484944078004, "grad_norm": 0.43108251690864563, "learning_rate": 3.234591495461061e-06, "loss": 0.45, "step": 677 }, { "epoch": 0.09721823917407514, "grad_norm": 0.4153777062892914, "learning_rate": 3.2393693263258487e-06, "loss": 0.4036, "step": 678 }, { "epoch": 0.09736162890737023, "grad_norm": 0.40517711639404297, "learning_rate": 3.2441471571906356e-06, "loss": 0.4073, "step": 679 }, { "epoch": 0.09750501864066533, "grad_norm": 0.41947609186172485, "learning_rate": 3.248924988055423e-06, "loss": 0.4021, "step": 680 }, { "epoch": 0.09764840837396042, "grad_norm": 0.3921623229980469, "learning_rate": 3.2537028189202104e-06, "loss": 0.4149, "step": 681 }, { "epoch": 0.09779179810725552, "grad_norm": 0.3731326460838318, "learning_rate": 3.258480649784998e-06, "loss": 0.3818, "step": 682 }, { "epoch": 0.09793518784055062, "grad_norm": 0.48253703117370605, "learning_rate": 3.263258480649785e-06, "loss": 0.4053, "step": 683 }, { "epoch": 0.09807857757384571, "grad_norm": 0.4528331160545349, "learning_rate": 3.2680363115145725e-06, "loss": 0.4145, "step": 684 }, { "epoch": 0.0982219673071408, "grad_norm": 0.40649229288101196, "learning_rate": 3.2728141423793603e-06, "loss": 0.3827, "step": 685 }, { "epoch": 0.0983653570404359, "grad_norm": 0.4303116798400879, "learning_rate": 3.2775919732441473e-06, "loss": 0.3947, "step": 686 }, { "epoch": 0.098508746773731, "grad_norm": 0.44978493452072144, "learning_rate": 3.2823698041089346e-06, "loss": 0.4289, "step": 687 }, { "epoch": 0.0986521365070261, "grad_norm": 0.41825225949287415, "learning_rate": 3.2871476349737224e-06, "loss": 0.3866, "step": 688 }, { "epoch": 0.09879552624032119, "grad_norm": 0.4466347098350525, "learning_rate": 3.29192546583851e-06, "loss": 0.3969, "step": 689 }, { "epoch": 0.09893891597361629, "grad_norm": 0.44084030389785767, "learning_rate": 3.2967032967032968e-06, "loss": 0.3955, "step": 690 }, { "epoch": 0.09908230570691139, "grad_norm": 0.39919179677963257, "learning_rate": 3.301481127568084e-06, "loss": 0.3956, "step": 691 }, { "epoch": 0.09922569544020649, "grad_norm": 0.4657234251499176, "learning_rate": 3.306258958432872e-06, "loss": 0.4042, "step": 692 }, { "epoch": 0.09936908517350158, "grad_norm": 0.4306391179561615, "learning_rate": 3.3110367892976593e-06, "loss": 0.4146, "step": 693 }, { "epoch": 0.09951247490679667, "grad_norm": 0.4098949730396271, "learning_rate": 3.3158146201624463e-06, "loss": 0.4106, "step": 694 }, { "epoch": 0.09965586464009177, "grad_norm": 0.40571486949920654, "learning_rate": 3.320592451027234e-06, "loss": 0.3866, "step": 695 }, { "epoch": 0.09979925437338687, "grad_norm": 0.47751736640930176, "learning_rate": 3.3253702818920215e-06, "loss": 0.4041, "step": 696 }, { "epoch": 0.09994264410668197, "grad_norm": 0.43455490469932556, "learning_rate": 3.3301481127568084e-06, "loss": 0.4118, "step": 697 }, { "epoch": 0.10008603383997705, "grad_norm": 0.47757431864738464, "learning_rate": 3.3349259436215958e-06, "loss": 0.403, "step": 698 }, { "epoch": 0.10022942357327215, "grad_norm": 0.4342787265777588, "learning_rate": 3.3397037744863836e-06, "loss": 0.4092, "step": 699 }, { "epoch": 0.10037281330656725, "grad_norm": 0.4219911992549896, "learning_rate": 3.344481605351171e-06, "loss": 0.4071, "step": 700 }, { "epoch": 0.10051620303986235, "grad_norm": 0.48932838439941406, "learning_rate": 3.349259436215958e-06, "loss": 0.4083, "step": 701 }, { "epoch": 0.10065959277315745, "grad_norm": 0.4193122088909149, "learning_rate": 3.3540372670807457e-06, "loss": 0.3926, "step": 702 }, { "epoch": 0.10080298250645253, "grad_norm": 0.40390923619270325, "learning_rate": 3.358815097945533e-06, "loss": 0.4043, "step": 703 }, { "epoch": 0.10094637223974763, "grad_norm": 0.46761244535446167, "learning_rate": 3.3635929288103205e-06, "loss": 0.4349, "step": 704 }, { "epoch": 0.10108976197304273, "grad_norm": 0.43384528160095215, "learning_rate": 3.3683707596751074e-06, "loss": 0.3951, "step": 705 }, { "epoch": 0.10123315170633783, "grad_norm": 0.4396335184574127, "learning_rate": 3.3731485905398952e-06, "loss": 0.3983, "step": 706 }, { "epoch": 0.10137654143963293, "grad_norm": 0.4240514039993286, "learning_rate": 3.3779264214046826e-06, "loss": 0.3995, "step": 707 }, { "epoch": 0.10151993117292801, "grad_norm": 0.41472455859184265, "learning_rate": 3.3827042522694696e-06, "loss": 0.4193, "step": 708 }, { "epoch": 0.10166332090622311, "grad_norm": 0.4022922217845917, "learning_rate": 3.3874820831342574e-06, "loss": 0.3896, "step": 709 }, { "epoch": 0.10180671063951821, "grad_norm": 0.4217979609966278, "learning_rate": 3.3922599139990447e-06, "loss": 0.4254, "step": 710 }, { "epoch": 0.10195010037281331, "grad_norm": 0.431034117937088, "learning_rate": 3.397037744863832e-06, "loss": 0.4125, "step": 711 }, { "epoch": 0.10209349010610841, "grad_norm": 0.4361136853694916, "learning_rate": 3.40181557572862e-06, "loss": 0.3845, "step": 712 }, { "epoch": 0.1022368798394035, "grad_norm": 0.39060303568840027, "learning_rate": 3.406593406593407e-06, "loss": 0.4001, "step": 713 }, { "epoch": 0.10238026957269859, "grad_norm": 0.39032623171806335, "learning_rate": 3.4113712374581942e-06, "loss": 0.384, "step": 714 }, { "epoch": 0.10252365930599369, "grad_norm": 0.4111459255218506, "learning_rate": 3.4161490683229816e-06, "loss": 0.4023, "step": 715 }, { "epoch": 0.10266704903928879, "grad_norm": 0.4365268647670746, "learning_rate": 3.4209268991877694e-06, "loss": 0.3753, "step": 716 }, { "epoch": 0.10281043877258389, "grad_norm": 0.43067654967308044, "learning_rate": 3.4257047300525564e-06, "loss": 0.4058, "step": 717 }, { "epoch": 0.10295382850587897, "grad_norm": 0.42751544713974, "learning_rate": 3.4304825609173437e-06, "loss": 0.3996, "step": 718 }, { "epoch": 0.10309721823917407, "grad_norm": 0.5022243857383728, "learning_rate": 3.4352603917821315e-06, "loss": 0.4133, "step": 719 }, { "epoch": 0.10324060797246917, "grad_norm": 0.4936974048614502, "learning_rate": 3.4400382226469185e-06, "loss": 0.4174, "step": 720 }, { "epoch": 0.10338399770576427, "grad_norm": 0.4336369037628174, "learning_rate": 3.444816053511706e-06, "loss": 0.3774, "step": 721 }, { "epoch": 0.10352738743905936, "grad_norm": 0.444049209356308, "learning_rate": 3.4495938843764932e-06, "loss": 0.4093, "step": 722 }, { "epoch": 0.10367077717235446, "grad_norm": 0.5079161524772644, "learning_rate": 3.454371715241281e-06, "loss": 0.4265, "step": 723 }, { "epoch": 0.10381416690564955, "grad_norm": 0.4381064474582672, "learning_rate": 3.459149546106068e-06, "loss": 0.3949, "step": 724 }, { "epoch": 0.10395755663894465, "grad_norm": 0.42629122734069824, "learning_rate": 3.4639273769708554e-06, "loss": 0.3971, "step": 725 }, { "epoch": 0.10410094637223975, "grad_norm": 0.414164274930954, "learning_rate": 3.468705207835643e-06, "loss": 0.3827, "step": 726 }, { "epoch": 0.10424433610553484, "grad_norm": 0.4328790605068207, "learning_rate": 3.4734830387004306e-06, "loss": 0.4127, "step": 727 }, { "epoch": 0.10438772583882994, "grad_norm": 0.4485488533973694, "learning_rate": 3.4782608695652175e-06, "loss": 0.3973, "step": 728 }, { "epoch": 0.10453111557212504, "grad_norm": 0.501205325126648, "learning_rate": 3.483038700430005e-06, "loss": 0.4006, "step": 729 }, { "epoch": 0.10467450530542013, "grad_norm": 0.41754084825515747, "learning_rate": 3.4878165312947927e-06, "loss": 0.3999, "step": 730 }, { "epoch": 0.10481789503871523, "grad_norm": 0.45675909519195557, "learning_rate": 3.4925943621595796e-06, "loss": 0.4002, "step": 731 }, { "epoch": 0.10496128477201032, "grad_norm": 0.4709968864917755, "learning_rate": 3.497372193024367e-06, "loss": 0.4138, "step": 732 }, { "epoch": 0.10510467450530542, "grad_norm": 0.4702642261981964, "learning_rate": 3.502150023889155e-06, "loss": 0.4034, "step": 733 }, { "epoch": 0.10524806423860052, "grad_norm": 0.3933655619621277, "learning_rate": 3.506927854753942e-06, "loss": 0.383, "step": 734 }, { "epoch": 0.10539145397189562, "grad_norm": 0.41106700897216797, "learning_rate": 3.511705685618729e-06, "loss": 0.3901, "step": 735 }, { "epoch": 0.10553484370519071, "grad_norm": 0.45376405119895935, "learning_rate": 3.516483516483517e-06, "loss": 0.3997, "step": 736 }, { "epoch": 0.1056782334384858, "grad_norm": 0.44965600967407227, "learning_rate": 3.5212613473483043e-06, "loss": 0.4308, "step": 737 }, { "epoch": 0.1058216231717809, "grad_norm": 0.4399315416812897, "learning_rate": 3.5260391782130917e-06, "loss": 0.4077, "step": 738 }, { "epoch": 0.105965012905076, "grad_norm": 0.3765484690666199, "learning_rate": 3.5308170090778787e-06, "loss": 0.4004, "step": 739 }, { "epoch": 0.1061084026383711, "grad_norm": 0.39465993642807007, "learning_rate": 3.5355948399426665e-06, "loss": 0.3891, "step": 740 }, { "epoch": 0.1062517923716662, "grad_norm": 0.3946310877799988, "learning_rate": 3.540372670807454e-06, "loss": 0.4, "step": 741 }, { "epoch": 0.10639518210496128, "grad_norm": 0.4090716242790222, "learning_rate": 3.5451505016722408e-06, "loss": 0.3918, "step": 742 }, { "epoch": 0.10653857183825638, "grad_norm": 0.43817877769470215, "learning_rate": 3.5499283325370286e-06, "loss": 0.4173, "step": 743 }, { "epoch": 0.10668196157155148, "grad_norm": 0.4322904348373413, "learning_rate": 3.554706163401816e-06, "loss": 0.3852, "step": 744 }, { "epoch": 0.10682535130484658, "grad_norm": 0.4315958023071289, "learning_rate": 3.5594839942666033e-06, "loss": 0.4086, "step": 745 }, { "epoch": 0.10696874103814166, "grad_norm": 0.3880349099636078, "learning_rate": 3.5642618251313903e-06, "loss": 0.3989, "step": 746 }, { "epoch": 0.10711213077143676, "grad_norm": 0.4620588421821594, "learning_rate": 3.569039655996178e-06, "loss": 0.3795, "step": 747 }, { "epoch": 0.10725552050473186, "grad_norm": 0.4299677312374115, "learning_rate": 3.5738174868609655e-06, "loss": 0.3811, "step": 748 }, { "epoch": 0.10739891023802696, "grad_norm": 0.43389788269996643, "learning_rate": 3.578595317725753e-06, "loss": 0.3962, "step": 749 }, { "epoch": 0.10754229997132206, "grad_norm": 0.4043819010257721, "learning_rate": 3.5833731485905402e-06, "loss": 0.3865, "step": 750 }, { "epoch": 0.10768568970461714, "grad_norm": 0.46450871229171753, "learning_rate": 3.5881509794553276e-06, "loss": 0.4011, "step": 751 }, { "epoch": 0.10782907943791224, "grad_norm": 0.422861784696579, "learning_rate": 3.592928810320115e-06, "loss": 0.3677, "step": 752 }, { "epoch": 0.10797246917120734, "grad_norm": 0.49186986684799194, "learning_rate": 3.597706641184902e-06, "loss": 0.3996, "step": 753 }, { "epoch": 0.10811585890450244, "grad_norm": 0.44871020317077637, "learning_rate": 3.6024844720496897e-06, "loss": 0.4339, "step": 754 }, { "epoch": 0.10825924863779754, "grad_norm": 0.4395062029361725, "learning_rate": 3.607262302914477e-06, "loss": 0.3933, "step": 755 }, { "epoch": 0.10840263837109262, "grad_norm": 0.4289090931415558, "learning_rate": 3.6120401337792645e-06, "loss": 0.3885, "step": 756 }, { "epoch": 0.10854602810438772, "grad_norm": 0.4623865783214569, "learning_rate": 3.6168179646440523e-06, "loss": 0.4046, "step": 757 }, { "epoch": 0.10868941783768282, "grad_norm": 0.4657321274280548, "learning_rate": 3.6215957955088392e-06, "loss": 0.4433, "step": 758 }, { "epoch": 0.10883280757097792, "grad_norm": 0.41100746393203735, "learning_rate": 3.6263736263736266e-06, "loss": 0.3963, "step": 759 }, { "epoch": 0.10897619730427302, "grad_norm": 0.43357399106025696, "learning_rate": 3.6311514572384144e-06, "loss": 0.4089, "step": 760 }, { "epoch": 0.1091195870375681, "grad_norm": 0.425292432308197, "learning_rate": 3.6359292881032014e-06, "loss": 0.4021, "step": 761 }, { "epoch": 0.1092629767708632, "grad_norm": 0.3985533118247986, "learning_rate": 3.6407071189679887e-06, "loss": 0.3813, "step": 762 }, { "epoch": 0.1094063665041583, "grad_norm": 0.4914790987968445, "learning_rate": 3.645484949832776e-06, "loss": 0.3801, "step": 763 }, { "epoch": 0.1095497562374534, "grad_norm": 0.47894370555877686, "learning_rate": 3.650262780697564e-06, "loss": 0.3804, "step": 764 }, { "epoch": 0.1096931459707485, "grad_norm": 0.43361034989356995, "learning_rate": 3.655040611562351e-06, "loss": 0.4141, "step": 765 }, { "epoch": 0.10983653570404359, "grad_norm": 0.4319232106208801, "learning_rate": 3.6598184424271383e-06, "loss": 0.3979, "step": 766 }, { "epoch": 0.10997992543733869, "grad_norm": 0.5123934149742126, "learning_rate": 3.664596273291926e-06, "loss": 0.4259, "step": 767 }, { "epoch": 0.11012331517063378, "grad_norm": 0.4227287471294403, "learning_rate": 3.6693741041567134e-06, "loss": 0.3903, "step": 768 }, { "epoch": 0.11026670490392888, "grad_norm": 0.4069305658340454, "learning_rate": 3.6741519350215004e-06, "loss": 0.3998, "step": 769 }, { "epoch": 0.11041009463722397, "grad_norm": 0.4542251527309418, "learning_rate": 3.6789297658862878e-06, "loss": 0.3801, "step": 770 }, { "epoch": 0.11055348437051907, "grad_norm": 0.44094032049179077, "learning_rate": 3.6837075967510756e-06, "loss": 0.3986, "step": 771 }, { "epoch": 0.11069687410381417, "grad_norm": 0.3803381323814392, "learning_rate": 3.6884854276158625e-06, "loss": 0.4036, "step": 772 }, { "epoch": 0.11084026383710927, "grad_norm": 0.4526727497577667, "learning_rate": 3.69326325848065e-06, "loss": 0.4172, "step": 773 }, { "epoch": 0.11098365357040436, "grad_norm": 0.4484894573688507, "learning_rate": 3.6980410893454377e-06, "loss": 0.4037, "step": 774 }, { "epoch": 0.11112704330369945, "grad_norm": 0.4098954498767853, "learning_rate": 3.702818920210225e-06, "loss": 0.3969, "step": 775 }, { "epoch": 0.11127043303699455, "grad_norm": 0.44703102111816406, "learning_rate": 3.707596751075012e-06, "loss": 0.409, "step": 776 }, { "epoch": 0.11141382277028965, "grad_norm": 0.4135681390762329, "learning_rate": 3.7123745819397994e-06, "loss": 0.4036, "step": 777 }, { "epoch": 0.11155721250358475, "grad_norm": 0.44620946049690247, "learning_rate": 3.717152412804587e-06, "loss": 0.4124, "step": 778 }, { "epoch": 0.11170060223687985, "grad_norm": 0.4365106225013733, "learning_rate": 3.7219302436693746e-06, "loss": 0.3952, "step": 779 }, { "epoch": 0.11184399197017493, "grad_norm": 0.4441297948360443, "learning_rate": 3.7267080745341615e-06, "loss": 0.3925, "step": 780 }, { "epoch": 0.11198738170347003, "grad_norm": 0.46263566613197327, "learning_rate": 3.7314859053989493e-06, "loss": 0.3878, "step": 781 }, { "epoch": 0.11213077143676513, "grad_norm": 0.43078482151031494, "learning_rate": 3.7362637362637367e-06, "loss": 0.4154, "step": 782 }, { "epoch": 0.11227416117006023, "grad_norm": 0.4279409646987915, "learning_rate": 3.7410415671285237e-06, "loss": 0.4168, "step": 783 }, { "epoch": 0.11241755090335533, "grad_norm": 0.44091591238975525, "learning_rate": 3.745819397993311e-06, "loss": 0.4033, "step": 784 }, { "epoch": 0.11256094063665041, "grad_norm": 0.4824831187725067, "learning_rate": 3.750597228858099e-06, "loss": 0.418, "step": 785 }, { "epoch": 0.11270433036994551, "grad_norm": 0.4327097535133362, "learning_rate": 3.755375059722886e-06, "loss": 0.3897, "step": 786 }, { "epoch": 0.11284772010324061, "grad_norm": 0.4563843905925751, "learning_rate": 3.760152890587673e-06, "loss": 0.3732, "step": 787 }, { "epoch": 0.11299110983653571, "grad_norm": 0.41379815340042114, "learning_rate": 3.764930721452461e-06, "loss": 0.3781, "step": 788 }, { "epoch": 0.11313449956983081, "grad_norm": 0.4325787425041199, "learning_rate": 3.7697085523172483e-06, "loss": 0.3856, "step": 789 }, { "epoch": 0.11327788930312589, "grad_norm": 0.3971291780471802, "learning_rate": 3.7744863831820357e-06, "loss": 0.3876, "step": 790 }, { "epoch": 0.11342127903642099, "grad_norm": 0.4502171576023102, "learning_rate": 3.7792642140468235e-06, "loss": 0.4128, "step": 791 }, { "epoch": 0.11356466876971609, "grad_norm": 0.44110625982284546, "learning_rate": 3.7840420449116105e-06, "loss": 0.3938, "step": 792 }, { "epoch": 0.11370805850301119, "grad_norm": 0.42840322852134705, "learning_rate": 3.788819875776398e-06, "loss": 0.3877, "step": 793 }, { "epoch": 0.11385144823630627, "grad_norm": 0.4201824963092804, "learning_rate": 3.793597706641185e-06, "loss": 0.3991, "step": 794 }, { "epoch": 0.11399483796960137, "grad_norm": 0.4970066547393799, "learning_rate": 3.7983755375059726e-06, "loss": 0.4126, "step": 795 }, { "epoch": 0.11413822770289647, "grad_norm": 0.418597549200058, "learning_rate": 3.80315336837076e-06, "loss": 0.3791, "step": 796 }, { "epoch": 0.11428161743619157, "grad_norm": 0.403983473777771, "learning_rate": 3.8079311992355474e-06, "loss": 0.3816, "step": 797 }, { "epoch": 0.11442500716948667, "grad_norm": 0.42417675256729126, "learning_rate": 3.812709030100335e-06, "loss": 0.3807, "step": 798 }, { "epoch": 0.11456839690278176, "grad_norm": 0.4430795907974243, "learning_rate": 3.8174868609651225e-06, "loss": 0.3947, "step": 799 }, { "epoch": 0.11471178663607685, "grad_norm": 0.4005560278892517, "learning_rate": 3.822264691829909e-06, "loss": 0.3943, "step": 800 }, { "epoch": 0.11485517636937195, "grad_norm": 0.4238700270652771, "learning_rate": 3.8270425226946964e-06, "loss": 0.4, "step": 801 }, { "epoch": 0.11499856610266705, "grad_norm": 0.49992635846138, "learning_rate": 3.831820353559485e-06, "loss": 0.3993, "step": 802 }, { "epoch": 0.11514195583596215, "grad_norm": 0.3877616226673126, "learning_rate": 3.836598184424272e-06, "loss": 0.4169, "step": 803 }, { "epoch": 0.11528534556925724, "grad_norm": 0.4549158215522766, "learning_rate": 3.8413760152890586e-06, "loss": 0.3959, "step": 804 }, { "epoch": 0.11542873530255233, "grad_norm": 0.45848798751831055, "learning_rate": 3.846153846153847e-06, "loss": 0.3946, "step": 805 }, { "epoch": 0.11557212503584743, "grad_norm": 0.4199373424053192, "learning_rate": 3.850931677018634e-06, "loss": 0.3845, "step": 806 }, { "epoch": 0.11571551476914253, "grad_norm": 0.42193061113357544, "learning_rate": 3.8557095078834215e-06, "loss": 0.3865, "step": 807 }, { "epoch": 0.11585890450243763, "grad_norm": 0.4753974676132202, "learning_rate": 3.860487338748208e-06, "loss": 0.406, "step": 808 }, { "epoch": 0.11600229423573272, "grad_norm": 0.44753196835517883, "learning_rate": 3.865265169612996e-06, "loss": 0.3949, "step": 809 }, { "epoch": 0.11614568396902782, "grad_norm": 0.47079169750213623, "learning_rate": 3.870043000477784e-06, "loss": 0.3831, "step": 810 }, { "epoch": 0.11628907370232291, "grad_norm": 0.4422566592693329, "learning_rate": 3.87482083134257e-06, "loss": 0.4026, "step": 811 }, { "epoch": 0.11643246343561801, "grad_norm": 0.42028477787971497, "learning_rate": 3.8795986622073584e-06, "loss": 0.4104, "step": 812 }, { "epoch": 0.1165758531689131, "grad_norm": 0.44426247477531433, "learning_rate": 3.884376493072146e-06, "loss": 0.3799, "step": 813 }, { "epoch": 0.1167192429022082, "grad_norm": 0.4159654974937439, "learning_rate": 3.889154323936933e-06, "loss": 0.373, "step": 814 }, { "epoch": 0.1168626326355033, "grad_norm": 0.4468208849430084, "learning_rate": 3.8939321548017206e-06, "loss": 0.3953, "step": 815 }, { "epoch": 0.1170060223687984, "grad_norm": 0.4790559411048889, "learning_rate": 3.898709985666508e-06, "loss": 0.3837, "step": 816 }, { "epoch": 0.1171494121020935, "grad_norm": 0.43412554264068604, "learning_rate": 3.903487816531295e-06, "loss": 0.4057, "step": 817 }, { "epoch": 0.11729280183538858, "grad_norm": 0.4262566566467285, "learning_rate": 3.908265647396083e-06, "loss": 0.409, "step": 818 }, { "epoch": 0.11743619156868368, "grad_norm": 0.46137651801109314, "learning_rate": 3.91304347826087e-06, "loss": 0.3935, "step": 819 }, { "epoch": 0.11757958130197878, "grad_norm": 0.4779674708843231, "learning_rate": 3.9178213091256574e-06, "loss": 0.387, "step": 820 }, { "epoch": 0.11772297103527388, "grad_norm": 0.46147245168685913, "learning_rate": 3.922599139990445e-06, "loss": 0.4109, "step": 821 }, { "epoch": 0.11786636076856898, "grad_norm": 0.4325077533721924, "learning_rate": 3.927376970855232e-06, "loss": 0.3998, "step": 822 }, { "epoch": 0.11800975050186406, "grad_norm": 0.41539284586906433, "learning_rate": 3.93215480172002e-06, "loss": 0.3849, "step": 823 }, { "epoch": 0.11815314023515916, "grad_norm": 0.48611024022102356, "learning_rate": 3.936932632584807e-06, "loss": 0.4161, "step": 824 }, { "epoch": 0.11829652996845426, "grad_norm": 0.46580442786216736, "learning_rate": 3.941710463449594e-06, "loss": 0.4124, "step": 825 }, { "epoch": 0.11843991970174936, "grad_norm": 0.45285797119140625, "learning_rate": 3.946488294314382e-06, "loss": 0.3659, "step": 826 }, { "epoch": 0.11858330943504446, "grad_norm": 0.43489864468574524, "learning_rate": 3.951266125179169e-06, "loss": 0.3864, "step": 827 }, { "epoch": 0.11872669916833954, "grad_norm": 0.5107983946800232, "learning_rate": 3.9560439560439565e-06, "loss": 0.4006, "step": 828 }, { "epoch": 0.11887008890163464, "grad_norm": 0.43279507756233215, "learning_rate": 3.960821786908744e-06, "loss": 0.3791, "step": 829 }, { "epoch": 0.11901347863492974, "grad_norm": 0.4442771077156067, "learning_rate": 3.965599617773531e-06, "loss": 0.402, "step": 830 }, { "epoch": 0.11915686836822484, "grad_norm": 0.4360503852367401, "learning_rate": 3.970377448638319e-06, "loss": 0.3918, "step": 831 }, { "epoch": 0.11930025810151994, "grad_norm": 0.4597717225551605, "learning_rate": 3.975155279503106e-06, "loss": 0.3817, "step": 832 }, { "epoch": 0.11944364783481502, "grad_norm": 0.43274131417274475, "learning_rate": 3.979933110367893e-06, "loss": 0.4042, "step": 833 }, { "epoch": 0.11958703756811012, "grad_norm": 0.43287041783332825, "learning_rate": 3.984710941232681e-06, "loss": 0.3993, "step": 834 }, { "epoch": 0.11973042730140522, "grad_norm": 0.46249639987945557, "learning_rate": 3.989488772097468e-06, "loss": 0.4057, "step": 835 }, { "epoch": 0.11987381703470032, "grad_norm": 0.4314408004283905, "learning_rate": 3.9942666029622555e-06, "loss": 0.381, "step": 836 }, { "epoch": 0.1200172067679954, "grad_norm": 0.4481728971004486, "learning_rate": 3.999044433827043e-06, "loss": 0.3683, "step": 837 }, { "epoch": 0.1201605965012905, "grad_norm": 0.42803749442100525, "learning_rate": 4.00382226469183e-06, "loss": 0.3981, "step": 838 }, { "epoch": 0.1203039862345856, "grad_norm": 0.46911656856536865, "learning_rate": 4.008600095556618e-06, "loss": 0.3918, "step": 839 }, { "epoch": 0.1204473759678807, "grad_norm": 0.4771193265914917, "learning_rate": 4.013377926421405e-06, "loss": 0.3761, "step": 840 }, { "epoch": 0.1205907657011758, "grad_norm": 0.4326167702674866, "learning_rate": 4.018155757286192e-06, "loss": 0.4019, "step": 841 }, { "epoch": 0.12073415543447089, "grad_norm": 0.3927064538002014, "learning_rate": 4.02293358815098e-06, "loss": 0.3753, "step": 842 }, { "epoch": 0.12087754516776598, "grad_norm": 0.41561686992645264, "learning_rate": 4.027711419015767e-06, "loss": 0.413, "step": 843 }, { "epoch": 0.12102093490106108, "grad_norm": 0.4274291694164276, "learning_rate": 4.0324892498805545e-06, "loss": 0.3901, "step": 844 }, { "epoch": 0.12116432463435618, "grad_norm": 0.45132482051849365, "learning_rate": 4.037267080745342e-06, "loss": 0.4188, "step": 845 }, { "epoch": 0.12130771436765128, "grad_norm": 0.4499838054180145, "learning_rate": 4.042044911610129e-06, "loss": 0.4215, "step": 846 }, { "epoch": 0.12145110410094637, "grad_norm": 0.4797186553478241, "learning_rate": 4.046822742474917e-06, "loss": 0.4109, "step": 847 }, { "epoch": 0.12159449383424147, "grad_norm": 0.4640295207500458, "learning_rate": 4.051600573339704e-06, "loss": 0.3862, "step": 848 }, { "epoch": 0.12173788356753656, "grad_norm": 0.45631951093673706, "learning_rate": 4.056378404204491e-06, "loss": 0.3939, "step": 849 }, { "epoch": 0.12188127330083166, "grad_norm": 0.3925289809703827, "learning_rate": 4.061156235069279e-06, "loss": 0.3959, "step": 850 }, { "epoch": 0.12202466303412676, "grad_norm": 0.5018851161003113, "learning_rate": 4.065934065934066e-06, "loss": 0.4073, "step": 851 }, { "epoch": 0.12216805276742185, "grad_norm": 0.4487796425819397, "learning_rate": 4.0707118967988535e-06, "loss": 0.4053, "step": 852 }, { "epoch": 0.12231144250071695, "grad_norm": 0.4132881164550781, "learning_rate": 4.075489727663641e-06, "loss": 0.3861, "step": 853 }, { "epoch": 0.12245483223401205, "grad_norm": 0.4497639536857605, "learning_rate": 4.080267558528428e-06, "loss": 0.4015, "step": 854 }, { "epoch": 0.12259822196730714, "grad_norm": 0.46188515424728394, "learning_rate": 4.085045389393216e-06, "loss": 0.4008, "step": 855 }, { "epoch": 0.12274161170060224, "grad_norm": 0.419737845659256, "learning_rate": 4.089823220258003e-06, "loss": 0.3732, "step": 856 }, { "epoch": 0.12288500143389733, "grad_norm": 0.43291398882865906, "learning_rate": 4.09460105112279e-06, "loss": 0.3777, "step": 857 }, { "epoch": 0.12302839116719243, "grad_norm": 0.4314952492713928, "learning_rate": 4.099378881987578e-06, "loss": 0.4283, "step": 858 }, { "epoch": 0.12317178090048753, "grad_norm": 0.38394054770469666, "learning_rate": 4.104156712852365e-06, "loss": 0.3901, "step": 859 }, { "epoch": 0.12331517063378263, "grad_norm": 0.42708608508110046, "learning_rate": 4.108934543717153e-06, "loss": 0.3747, "step": 860 }, { "epoch": 0.12345856036707771, "grad_norm": 0.42382434010505676, "learning_rate": 4.11371237458194e-06, "loss": 0.3915, "step": 861 }, { "epoch": 0.12360195010037281, "grad_norm": 0.3971651792526245, "learning_rate": 4.118490205446727e-06, "loss": 0.3944, "step": 862 }, { "epoch": 0.12374533983366791, "grad_norm": 0.4283129572868347, "learning_rate": 4.1232680363115155e-06, "loss": 0.3897, "step": 863 }, { "epoch": 0.12388872956696301, "grad_norm": 0.4539901614189148, "learning_rate": 4.128045867176302e-06, "loss": 0.3818, "step": 864 }, { "epoch": 0.1240321193002581, "grad_norm": 0.43984556198120117, "learning_rate": 4.132823698041089e-06, "loss": 0.3919, "step": 865 }, { "epoch": 0.12417550903355319, "grad_norm": 0.4545941948890686, "learning_rate": 4.137601528905877e-06, "loss": 0.3985, "step": 866 }, { "epoch": 0.12431889876684829, "grad_norm": 0.5157697796821594, "learning_rate": 4.142379359770665e-06, "loss": 0.4111, "step": 867 }, { "epoch": 0.12446228850014339, "grad_norm": 0.4831155836582184, "learning_rate": 4.1471571906354515e-06, "loss": 0.3758, "step": 868 }, { "epoch": 0.12460567823343849, "grad_norm": 0.4335170388221741, "learning_rate": 4.151935021500239e-06, "loss": 0.399, "step": 869 }, { "epoch": 0.12474906796673359, "grad_norm": 0.4954366683959961, "learning_rate": 4.156712852365027e-06, "loss": 0.39, "step": 870 }, { "epoch": 0.12489245770002867, "grad_norm": 0.47669360041618347, "learning_rate": 4.1614906832298145e-06, "loss": 0.4088, "step": 871 }, { "epoch": 0.12503584743332377, "grad_norm": 0.4811387360095978, "learning_rate": 4.166268514094601e-06, "loss": 0.4055, "step": 872 }, { "epoch": 0.12517923716661886, "grad_norm": 0.4691310226917267, "learning_rate": 4.171046344959388e-06, "loss": 0.4242, "step": 873 }, { "epoch": 0.12532262689991397, "grad_norm": 0.5142088532447815, "learning_rate": 4.175824175824177e-06, "loss": 0.3795, "step": 874 }, { "epoch": 0.12546601663320905, "grad_norm": 0.44620323181152344, "learning_rate": 4.180602006688963e-06, "loss": 0.3945, "step": 875 }, { "epoch": 0.12560940636650417, "grad_norm": 0.4545617699623108, "learning_rate": 4.1853798375537505e-06, "loss": 0.3889, "step": 876 }, { "epoch": 0.12575279609979925, "grad_norm": 0.5249200463294983, "learning_rate": 4.190157668418539e-06, "loss": 0.3776, "step": 877 }, { "epoch": 0.12589618583309434, "grad_norm": 0.4900038540363312, "learning_rate": 4.194935499283326e-06, "loss": 0.4012, "step": 878 }, { "epoch": 0.12603957556638945, "grad_norm": 0.4194788932800293, "learning_rate": 4.199713330148113e-06, "loss": 0.3904, "step": 879 }, { "epoch": 0.12618296529968454, "grad_norm": 0.506332278251648, "learning_rate": 4.2044911610129e-06, "loss": 0.412, "step": 880 }, { "epoch": 0.12632635503297965, "grad_norm": 0.4645387828350067, "learning_rate": 4.209268991877688e-06, "loss": 0.3883, "step": 881 }, { "epoch": 0.12646974476627473, "grad_norm": 0.4593797028064728, "learning_rate": 4.214046822742475e-06, "loss": 0.389, "step": 882 }, { "epoch": 0.12661313449956982, "grad_norm": 0.5262132883071899, "learning_rate": 4.218824653607262e-06, "loss": 0.3938, "step": 883 }, { "epoch": 0.12675652423286493, "grad_norm": 0.45310285687446594, "learning_rate": 4.22360248447205e-06, "loss": 0.4195, "step": 884 }, { "epoch": 0.12689991396616002, "grad_norm": 0.5164772868156433, "learning_rate": 4.228380315336838e-06, "loss": 0.3983, "step": 885 }, { "epoch": 0.12704330369945513, "grad_norm": 0.5451394319534302, "learning_rate": 4.233158146201624e-06, "loss": 0.3778, "step": 886 }, { "epoch": 0.12718669343275021, "grad_norm": 0.4647102355957031, "learning_rate": 4.2379359770664125e-06, "loss": 0.3865, "step": 887 }, { "epoch": 0.1273300831660453, "grad_norm": 0.5132599472999573, "learning_rate": 4.2427138079312e-06, "loss": 0.3882, "step": 888 }, { "epoch": 0.1274734728993404, "grad_norm": 0.5000318884849548, "learning_rate": 4.247491638795987e-06, "loss": 0.4111, "step": 889 }, { "epoch": 0.1276168626326355, "grad_norm": 0.42954105138778687, "learning_rate": 4.252269469660774e-06, "loss": 0.3951, "step": 890 }, { "epoch": 0.1277602523659306, "grad_norm": 0.4791671633720398, "learning_rate": 4.257047300525562e-06, "loss": 0.3859, "step": 891 }, { "epoch": 0.1279036420992257, "grad_norm": 0.4769665002822876, "learning_rate": 4.261825131390349e-06, "loss": 0.396, "step": 892 }, { "epoch": 0.12804703183252078, "grad_norm": 0.48658135533332825, "learning_rate": 4.266602962255136e-06, "loss": 0.3908, "step": 893 }, { "epoch": 0.1281904215658159, "grad_norm": 0.39769473671913147, "learning_rate": 4.271380793119924e-06, "loss": 0.4177, "step": 894 }, { "epoch": 0.12833381129911098, "grad_norm": 0.44253504276275635, "learning_rate": 4.2761586239847116e-06, "loss": 0.4031, "step": 895 }, { "epoch": 0.1284772010324061, "grad_norm": 0.5004390478134155, "learning_rate": 4.280936454849499e-06, "loss": 0.3905, "step": 896 }, { "epoch": 0.12862059076570118, "grad_norm": 0.41896721720695496, "learning_rate": 4.2857142857142855e-06, "loss": 0.3976, "step": 897 }, { "epoch": 0.12876398049899626, "grad_norm": 0.43737974762916565, "learning_rate": 4.290492116579074e-06, "loss": 0.3937, "step": 898 }, { "epoch": 0.12890737023229137, "grad_norm": 0.4542475938796997, "learning_rate": 4.295269947443861e-06, "loss": 0.3781, "step": 899 }, { "epoch": 0.12905075996558646, "grad_norm": 0.4626382291316986, "learning_rate": 4.3000477783086484e-06, "loss": 0.3651, "step": 900 }, { "epoch": 0.12919414969888157, "grad_norm": 0.4537074863910675, "learning_rate": 4.304825609173436e-06, "loss": 0.4182, "step": 901 }, { "epoch": 0.12933753943217666, "grad_norm": 0.4040357172489166, "learning_rate": 4.309603440038223e-06, "loss": 0.3969, "step": 902 }, { "epoch": 0.12948092916547174, "grad_norm": 0.42849457263946533, "learning_rate": 4.3143812709030106e-06, "loss": 0.3918, "step": 903 }, { "epoch": 0.12962431889876685, "grad_norm": 0.4345065951347351, "learning_rate": 4.319159101767797e-06, "loss": 0.4134, "step": 904 }, { "epoch": 0.12976770863206194, "grad_norm": 0.39830002188682556, "learning_rate": 4.323936932632585e-06, "loss": 0.3749, "step": 905 }, { "epoch": 0.12991109836535705, "grad_norm": 0.460793137550354, "learning_rate": 4.328714763497373e-06, "loss": 0.3641, "step": 906 }, { "epoch": 0.13005448809865214, "grad_norm": 0.46484261751174927, "learning_rate": 4.33349259436216e-06, "loss": 0.3837, "step": 907 }, { "epoch": 0.13019787783194722, "grad_norm": 0.4062252342700958, "learning_rate": 4.3382704252269475e-06, "loss": 0.4005, "step": 908 }, { "epoch": 0.13034126756524234, "grad_norm": 0.4397974908351898, "learning_rate": 4.343048256091735e-06, "loss": 0.3976, "step": 909 }, { "epoch": 0.13048465729853742, "grad_norm": 0.45573779940605164, "learning_rate": 4.347826086956522e-06, "loss": 0.3924, "step": 910 }, { "epoch": 0.13062804703183253, "grad_norm": 0.44763630628585815, "learning_rate": 4.35260391782131e-06, "loss": 0.3859, "step": 911 }, { "epoch": 0.13077143676512762, "grad_norm": 0.4208945631980896, "learning_rate": 4.357381748686097e-06, "loss": 0.4118, "step": 912 }, { "epoch": 0.1309148264984227, "grad_norm": 0.41296136379241943, "learning_rate": 4.362159579550884e-06, "loss": 0.3822, "step": 913 }, { "epoch": 0.13105821623171782, "grad_norm": 0.44359850883483887, "learning_rate": 4.366937410415672e-06, "loss": 0.4008, "step": 914 }, { "epoch": 0.1312016059650129, "grad_norm": 0.3974003195762634, "learning_rate": 4.371715241280459e-06, "loss": 0.384, "step": 915 }, { "epoch": 0.13134499569830801, "grad_norm": 0.4091705083847046, "learning_rate": 4.3764930721452465e-06, "loss": 0.372, "step": 916 }, { "epoch": 0.1314883854316031, "grad_norm": 0.40441644191741943, "learning_rate": 4.381270903010034e-06, "loss": 0.3726, "step": 917 }, { "epoch": 0.13163177516489818, "grad_norm": 0.44380316138267517, "learning_rate": 4.386048733874821e-06, "loss": 0.4105, "step": 918 }, { "epoch": 0.1317751648981933, "grad_norm": 0.4441313147544861, "learning_rate": 4.390826564739609e-06, "loss": 0.4049, "step": 919 }, { "epoch": 0.13191855463148838, "grad_norm": 0.42199310660362244, "learning_rate": 4.395604395604396e-06, "loss": 0.3989, "step": 920 }, { "epoch": 0.13206194436478347, "grad_norm": 0.47646042704582214, "learning_rate": 4.400382226469183e-06, "loss": 0.3976, "step": 921 }, { "epoch": 0.13220533409807858, "grad_norm": 0.4049639403820038, "learning_rate": 4.405160057333971e-06, "loss": 0.3919, "step": 922 }, { "epoch": 0.13234872383137367, "grad_norm": 0.4535678029060364, "learning_rate": 4.409937888198758e-06, "loss": 0.3945, "step": 923 }, { "epoch": 0.13249211356466878, "grad_norm": 0.4459519684314728, "learning_rate": 4.4147157190635455e-06, "loss": 0.3842, "step": 924 }, { "epoch": 0.13263550329796386, "grad_norm": 0.43236321210861206, "learning_rate": 4.419493549928333e-06, "loss": 0.383, "step": 925 }, { "epoch": 0.13277889303125895, "grad_norm": 0.4286075532436371, "learning_rate": 4.42427138079312e-06, "loss": 0.3868, "step": 926 }, { "epoch": 0.13292228276455406, "grad_norm": 0.4449140131473541, "learning_rate": 4.429049211657908e-06, "loss": 0.3914, "step": 927 }, { "epoch": 0.13306567249784915, "grad_norm": 0.5036810040473938, "learning_rate": 4.433827042522695e-06, "loss": 0.4061, "step": 928 }, { "epoch": 0.13320906223114426, "grad_norm": 0.44687193632125854, "learning_rate": 4.438604873387482e-06, "loss": 0.3862, "step": 929 }, { "epoch": 0.13335245196443934, "grad_norm": 0.43281808495521545, "learning_rate": 4.44338270425227e-06, "loss": 0.3609, "step": 930 }, { "epoch": 0.13349584169773443, "grad_norm": 0.4374484419822693, "learning_rate": 4.448160535117057e-06, "loss": 0.4014, "step": 931 }, { "epoch": 0.13363923143102954, "grad_norm": 0.467790424823761, "learning_rate": 4.4529383659818445e-06, "loss": 0.3916, "step": 932 }, { "epoch": 0.13378262116432463, "grad_norm": 0.43290746212005615, "learning_rate": 4.457716196846632e-06, "loss": 0.4009, "step": 933 }, { "epoch": 0.13392601089761974, "grad_norm": 0.45729976892471313, "learning_rate": 4.462494027711419e-06, "loss": 0.3923, "step": 934 }, { "epoch": 0.13406940063091483, "grad_norm": 0.450764924287796, "learning_rate": 4.467271858576207e-06, "loss": 0.4028, "step": 935 }, { "epoch": 0.1342127903642099, "grad_norm": 0.46315622329711914, "learning_rate": 4.472049689440994e-06, "loss": 0.3987, "step": 936 }, { "epoch": 0.13435618009750502, "grad_norm": 0.4970240294933319, "learning_rate": 4.476827520305781e-06, "loss": 0.3988, "step": 937 }, { "epoch": 0.1344995698308001, "grad_norm": 0.4993778169155121, "learning_rate": 4.481605351170569e-06, "loss": 0.3741, "step": 938 }, { "epoch": 0.13464295956409522, "grad_norm": 0.45249253511428833, "learning_rate": 4.486383182035356e-06, "loss": 0.3853, "step": 939 }, { "epoch": 0.1347863492973903, "grad_norm": 0.4636417329311371, "learning_rate": 4.4911610129001435e-06, "loss": 0.406, "step": 940 }, { "epoch": 0.1349297390306854, "grad_norm": 0.45962512493133545, "learning_rate": 4.495938843764931e-06, "loss": 0.3805, "step": 941 }, { "epoch": 0.1350731287639805, "grad_norm": 0.4506915211677551, "learning_rate": 4.500716674629719e-06, "loss": 0.4115, "step": 942 }, { "epoch": 0.1352165184972756, "grad_norm": 0.4405030608177185, "learning_rate": 4.505494505494506e-06, "loss": 0.4031, "step": 943 }, { "epoch": 0.1353599082305707, "grad_norm": 0.4554212689399719, "learning_rate": 4.510272336359293e-06, "loss": 0.3769, "step": 944 }, { "epoch": 0.1355032979638658, "grad_norm": 0.4299309551715851, "learning_rate": 4.51505016722408e-06, "loss": 0.3912, "step": 945 }, { "epoch": 0.13564668769716087, "grad_norm": 0.47947975993156433, "learning_rate": 4.519827998088868e-06, "loss": 0.3895, "step": 946 }, { "epoch": 0.13579007743045599, "grad_norm": 0.43655458092689514, "learning_rate": 4.524605828953655e-06, "loss": 0.3952, "step": 947 }, { "epoch": 0.13593346716375107, "grad_norm": 0.4064379334449768, "learning_rate": 4.5293836598184425e-06, "loss": 0.4084, "step": 948 }, { "epoch": 0.13607685689704618, "grad_norm": 0.47959643602371216, "learning_rate": 4.534161490683231e-06, "loss": 0.4111, "step": 949 }, { "epoch": 0.13622024663034127, "grad_norm": 0.4733428955078125, "learning_rate": 4.538939321548017e-06, "loss": 0.3749, "step": 950 }, { "epoch": 0.13636363636363635, "grad_norm": 0.4044030010700226, "learning_rate": 4.543717152412805e-06, "loss": 0.3846, "step": 951 }, { "epoch": 0.13650702609693147, "grad_norm": 0.4367164373397827, "learning_rate": 4.548494983277592e-06, "loss": 0.3935, "step": 952 }, { "epoch": 0.13665041583022655, "grad_norm": 0.44493257999420166, "learning_rate": 4.55327281414238e-06, "loss": 0.3703, "step": 953 }, { "epoch": 0.13679380556352166, "grad_norm": 0.42878761887550354, "learning_rate": 4.558050645007167e-06, "loss": 0.3887, "step": 954 }, { "epoch": 0.13693719529681675, "grad_norm": 0.4457035958766937, "learning_rate": 4.562828475871954e-06, "loss": 0.4005, "step": 955 }, { "epoch": 0.13708058503011183, "grad_norm": 0.488151878118515, "learning_rate": 4.567606306736742e-06, "loss": 0.3995, "step": 956 }, { "epoch": 0.13722397476340695, "grad_norm": 0.4812062084674835, "learning_rate": 4.572384137601529e-06, "loss": 0.3874, "step": 957 }, { "epoch": 0.13736736449670203, "grad_norm": 0.4836706519126892, "learning_rate": 4.577161968466316e-06, "loss": 0.3965, "step": 958 }, { "epoch": 0.13751075422999715, "grad_norm": 0.4379911422729492, "learning_rate": 4.581939799331104e-06, "loss": 0.384, "step": 959 }, { "epoch": 0.13765414396329223, "grad_norm": 0.4768490791320801, "learning_rate": 4.586717630195892e-06, "loss": 0.3961, "step": 960 }, { "epoch": 0.13779753369658732, "grad_norm": 0.4367746114730835, "learning_rate": 4.591495461060678e-06, "loss": 0.3917, "step": 961 }, { "epoch": 0.13794092342988243, "grad_norm": 0.4048379361629486, "learning_rate": 4.596273291925466e-06, "loss": 0.4075, "step": 962 }, { "epoch": 0.1380843131631775, "grad_norm": 0.4896012544631958, "learning_rate": 4.601051122790254e-06, "loss": 0.3864, "step": 963 }, { "epoch": 0.13822770289647263, "grad_norm": 0.45842331647872925, "learning_rate": 4.605828953655041e-06, "loss": 0.3973, "step": 964 }, { "epoch": 0.1383710926297677, "grad_norm": 0.46148237586021423, "learning_rate": 4.610606784519828e-06, "loss": 0.3761, "step": 965 }, { "epoch": 0.1385144823630628, "grad_norm": 0.49286648631095886, "learning_rate": 4.615384615384616e-06, "loss": 0.4265, "step": 966 }, { "epoch": 0.1386578720963579, "grad_norm": 0.42665740847587585, "learning_rate": 4.6201624462494035e-06, "loss": 0.3971, "step": 967 }, { "epoch": 0.138801261829653, "grad_norm": 0.40738850831985474, "learning_rate": 4.62494027711419e-06, "loss": 0.3772, "step": 968 }, { "epoch": 0.13894465156294808, "grad_norm": 0.5083932876586914, "learning_rate": 4.6297181079789774e-06, "loss": 0.394, "step": 969 }, { "epoch": 0.1390880412962432, "grad_norm": 0.47188571095466614, "learning_rate": 4.634495938843766e-06, "loss": 0.377, "step": 970 }, { "epoch": 0.13923143102953828, "grad_norm": 0.4600543975830078, "learning_rate": 4.639273769708553e-06, "loss": 0.4065, "step": 971 }, { "epoch": 0.1393748207628334, "grad_norm": 0.5460575222969055, "learning_rate": 4.6440516005733396e-06, "loss": 0.3883, "step": 972 }, { "epoch": 0.13951821049612848, "grad_norm": 0.4898830056190491, "learning_rate": 4.648829431438128e-06, "loss": 0.3888, "step": 973 }, { "epoch": 0.13966160022942356, "grad_norm": 0.4367707371711731, "learning_rate": 4.653607262302915e-06, "loss": 0.3905, "step": 974 }, { "epoch": 0.13980498996271867, "grad_norm": 0.46885696053504944, "learning_rate": 4.6583850931677025e-06, "loss": 0.3842, "step": 975 }, { "epoch": 0.13994837969601376, "grad_norm": 0.512488842010498, "learning_rate": 4.663162924032489e-06, "loss": 0.399, "step": 976 }, { "epoch": 0.14009176942930887, "grad_norm": 0.47474411129951477, "learning_rate": 4.667940754897277e-06, "loss": 0.3942, "step": 977 }, { "epoch": 0.14023515916260396, "grad_norm": 0.41927918791770935, "learning_rate": 4.672718585762065e-06, "loss": 0.3894, "step": 978 }, { "epoch": 0.14037854889589904, "grad_norm": 0.4532061517238617, "learning_rate": 4.677496416626851e-06, "loss": 0.3959, "step": 979 }, { "epoch": 0.14052193862919415, "grad_norm": 0.4335079789161682, "learning_rate": 4.6822742474916394e-06, "loss": 0.3934, "step": 980 }, { "epoch": 0.14066532836248924, "grad_norm": 0.47505831718444824, "learning_rate": 4.687052078356427e-06, "loss": 0.3983, "step": 981 }, { "epoch": 0.14080871809578435, "grad_norm": 0.4627453684806824, "learning_rate": 4.691829909221214e-06, "loss": 0.402, "step": 982 }, { "epoch": 0.14095210782907944, "grad_norm": 0.41030922532081604, "learning_rate": 4.696607740086001e-06, "loss": 0.3717, "step": 983 }, { "epoch": 0.14109549756237452, "grad_norm": 0.5056600570678711, "learning_rate": 4.701385570950789e-06, "loss": 0.4086, "step": 984 }, { "epoch": 0.14123888729566964, "grad_norm": 0.46023380756378174, "learning_rate": 4.706163401815576e-06, "loss": 0.3843, "step": 985 }, { "epoch": 0.14138227702896472, "grad_norm": 0.4608609974384308, "learning_rate": 4.710941232680364e-06, "loss": 0.3904, "step": 986 }, { "epoch": 0.14152566676225983, "grad_norm": 0.4688347280025482, "learning_rate": 4.715719063545151e-06, "loss": 0.3749, "step": 987 }, { "epoch": 0.14166905649555492, "grad_norm": 0.46998393535614014, "learning_rate": 4.7204968944099384e-06, "loss": 0.4012, "step": 988 }, { "epoch": 0.14181244622885, "grad_norm": 0.45446592569351196, "learning_rate": 4.725274725274726e-06, "loss": 0.4029, "step": 989 }, { "epoch": 0.14195583596214512, "grad_norm": 0.491315096616745, "learning_rate": 4.730052556139513e-06, "loss": 0.4016, "step": 990 }, { "epoch": 0.1420992256954402, "grad_norm": 0.4348723590373993, "learning_rate": 4.7348303870043006e-06, "loss": 0.3764, "step": 991 }, { "epoch": 0.14224261542873531, "grad_norm": 0.4851716160774231, "learning_rate": 4.739608217869088e-06, "loss": 0.4108, "step": 992 }, { "epoch": 0.1423860051620304, "grad_norm": 0.4400936961174011, "learning_rate": 4.744386048733875e-06, "loss": 0.3988, "step": 993 }, { "epoch": 0.14252939489532548, "grad_norm": 0.4942076504230499, "learning_rate": 4.749163879598663e-06, "loss": 0.3796, "step": 994 }, { "epoch": 0.1426727846286206, "grad_norm": 0.4189963638782501, "learning_rate": 4.75394171046345e-06, "loss": 0.385, "step": 995 }, { "epoch": 0.14281617436191568, "grad_norm": 0.4588637948036194, "learning_rate": 4.7587195413282375e-06, "loss": 0.4026, "step": 996 }, { "epoch": 0.1429595640952108, "grad_norm": 0.45723649859428406, "learning_rate": 4.763497372193025e-06, "loss": 0.3847, "step": 997 }, { "epoch": 0.14310295382850588, "grad_norm": 0.4426797926425934, "learning_rate": 4.768275203057812e-06, "loss": 0.3802, "step": 998 }, { "epoch": 0.14324634356180097, "grad_norm": 0.4276224374771118, "learning_rate": 4.7730530339226e-06, "loss": 0.3991, "step": 999 }, { "epoch": 0.14338973329509608, "grad_norm": 0.463000625371933, "learning_rate": 4.777830864787387e-06, "loss": 0.4028, "step": 1000 }, { "epoch": 0.14353312302839116, "grad_norm": 0.42596033215522766, "learning_rate": 4.782608695652174e-06, "loss": 0.3908, "step": 1001 }, { "epoch": 0.14367651276168628, "grad_norm": 0.45944398641586304, "learning_rate": 4.787386526516962e-06, "loss": 0.4018, "step": 1002 }, { "epoch": 0.14381990249498136, "grad_norm": 0.4399135410785675, "learning_rate": 4.792164357381749e-06, "loss": 0.3769, "step": 1003 }, { "epoch": 0.14396329222827645, "grad_norm": 0.4633922576904297, "learning_rate": 4.7969421882465365e-06, "loss": 0.3941, "step": 1004 }, { "epoch": 0.14410668196157156, "grad_norm": 0.4417799413204193, "learning_rate": 4.801720019111324e-06, "loss": 0.3831, "step": 1005 }, { "epoch": 0.14425007169486664, "grad_norm": 0.45594415068626404, "learning_rate": 4.806497849976111e-06, "loss": 0.3841, "step": 1006 }, { "epoch": 0.14439346142816176, "grad_norm": 0.46405908465385437, "learning_rate": 4.811275680840899e-06, "loss": 0.3964, "step": 1007 }, { "epoch": 0.14453685116145684, "grad_norm": 0.4503626525402069, "learning_rate": 4.816053511705686e-06, "loss": 0.3779, "step": 1008 }, { "epoch": 0.14468024089475193, "grad_norm": 0.43592068552970886, "learning_rate": 4.820831342570473e-06, "loss": 0.3891, "step": 1009 }, { "epoch": 0.14482363062804704, "grad_norm": 0.43003931641578674, "learning_rate": 4.825609173435261e-06, "loss": 0.3811, "step": 1010 }, { "epoch": 0.14496702036134212, "grad_norm": 0.39590317010879517, "learning_rate": 4.830387004300048e-06, "loss": 0.3788, "step": 1011 }, { "epoch": 0.14511041009463724, "grad_norm": 0.4590090215206146, "learning_rate": 4.8351648351648355e-06, "loss": 0.3818, "step": 1012 }, { "epoch": 0.14525379982793232, "grad_norm": 0.4588252902030945, "learning_rate": 4.839942666029623e-06, "loss": 0.4169, "step": 1013 }, { "epoch": 0.1453971895612274, "grad_norm": 0.5157479643821716, "learning_rate": 4.84472049689441e-06, "loss": 0.3975, "step": 1014 }, { "epoch": 0.14554057929452252, "grad_norm": 0.3933117687702179, "learning_rate": 4.849498327759198e-06, "loss": 0.3847, "step": 1015 }, { "epoch": 0.1456839690278176, "grad_norm": 0.42902445793151855, "learning_rate": 4.854276158623985e-06, "loss": 0.3828, "step": 1016 }, { "epoch": 0.1458273587611127, "grad_norm": 0.4217897951602936, "learning_rate": 4.859053989488772e-06, "loss": 0.3826, "step": 1017 }, { "epoch": 0.1459707484944078, "grad_norm": 0.4951091408729553, "learning_rate": 4.86383182035356e-06, "loss": 0.3892, "step": 1018 }, { "epoch": 0.1461141382277029, "grad_norm": 0.48037052154541016, "learning_rate": 4.868609651218347e-06, "loss": 0.376, "step": 1019 }, { "epoch": 0.146257527960998, "grad_norm": 0.4388693869113922, "learning_rate": 4.8733874820831345e-06, "loss": 0.3905, "step": 1020 }, { "epoch": 0.1464009176942931, "grad_norm": 0.4912484288215637, "learning_rate": 4.878165312947922e-06, "loss": 0.3968, "step": 1021 }, { "epoch": 0.14654430742758817, "grad_norm": 0.45503267645835876, "learning_rate": 4.882943143812709e-06, "loss": 0.3982, "step": 1022 }, { "epoch": 0.14668769716088328, "grad_norm": 0.4696691632270813, "learning_rate": 4.887720974677497e-06, "loss": 0.3889, "step": 1023 }, { "epoch": 0.14683108689417837, "grad_norm": 0.4061465859413147, "learning_rate": 4.892498805542284e-06, "loss": 0.3658, "step": 1024 }, { "epoch": 0.14697447662747348, "grad_norm": 0.4994473457336426, "learning_rate": 4.897276636407071e-06, "loss": 0.3789, "step": 1025 }, { "epoch": 0.14711786636076857, "grad_norm": 0.49315693974494934, "learning_rate": 4.902054467271859e-06, "loss": 0.3864, "step": 1026 }, { "epoch": 0.14726125609406365, "grad_norm": 0.4608769416809082, "learning_rate": 4.906832298136646e-06, "loss": 0.3879, "step": 1027 }, { "epoch": 0.14740464582735877, "grad_norm": 0.46062028408050537, "learning_rate": 4.911610129001434e-06, "loss": 0.4405, "step": 1028 }, { "epoch": 0.14754803556065385, "grad_norm": 0.446956604719162, "learning_rate": 4.916387959866221e-06, "loss": 0.3811, "step": 1029 }, { "epoch": 0.14769142529394896, "grad_norm": 0.44149166345596313, "learning_rate": 4.921165790731008e-06, "loss": 0.3636, "step": 1030 }, { "epoch": 0.14783481502724405, "grad_norm": 0.46144115924835205, "learning_rate": 4.925943621595796e-06, "loss": 0.3993, "step": 1031 }, { "epoch": 0.14797820476053913, "grad_norm": 0.391132116317749, "learning_rate": 4.930721452460583e-06, "loss": 0.3941, "step": 1032 }, { "epoch": 0.14812159449383425, "grad_norm": 0.46045756340026855, "learning_rate": 4.93549928332537e-06, "loss": 0.4005, "step": 1033 }, { "epoch": 0.14826498422712933, "grad_norm": 0.4023682773113251, "learning_rate": 4.940277114190158e-06, "loss": 0.3775, "step": 1034 }, { "epoch": 0.14840837396042444, "grad_norm": 0.4315963089466095, "learning_rate": 4.945054945054946e-06, "loss": 0.3835, "step": 1035 }, { "epoch": 0.14855176369371953, "grad_norm": 0.4270118474960327, "learning_rate": 4.9498327759197325e-06, "loss": 0.4019, "step": 1036 }, { "epoch": 0.14869515342701461, "grad_norm": 0.467256635427475, "learning_rate": 4.95461060678452e-06, "loss": 0.3928, "step": 1037 }, { "epoch": 0.14883854316030973, "grad_norm": 0.4644683599472046, "learning_rate": 4.959388437649308e-06, "loss": 0.4, "step": 1038 }, { "epoch": 0.1489819328936048, "grad_norm": 0.4533044993877411, "learning_rate": 4.9641662685140955e-06, "loss": 0.3935, "step": 1039 }, { "epoch": 0.14912532262689993, "grad_norm": 0.41713884472846985, "learning_rate": 4.968944099378882e-06, "loss": 0.3757, "step": 1040 }, { "epoch": 0.149268712360195, "grad_norm": 0.4757533669471741, "learning_rate": 4.973721930243669e-06, "loss": 0.383, "step": 1041 }, { "epoch": 0.1494121020934901, "grad_norm": 0.5144376158714294, "learning_rate": 4.978499761108458e-06, "loss": 0.3837, "step": 1042 }, { "epoch": 0.1495554918267852, "grad_norm": 0.4127426743507385, "learning_rate": 4.983277591973244e-06, "loss": 0.3858, "step": 1043 }, { "epoch": 0.1496988815600803, "grad_norm": 0.4449688494205475, "learning_rate": 4.9880554228380315e-06, "loss": 0.388, "step": 1044 }, { "epoch": 0.1498422712933754, "grad_norm": 0.4226646423339844, "learning_rate": 4.99283325370282e-06, "loss": 0.4308, "step": 1045 }, { "epoch": 0.1499856610266705, "grad_norm": 0.41197919845581055, "learning_rate": 4.997611084567607e-06, "loss": 0.4112, "step": 1046 }, { "epoch": 0.15012905075996558, "grad_norm": 0.4152630567550659, "learning_rate": 5.0023889154323945e-06, "loss": 0.3724, "step": 1047 }, { "epoch": 0.1502724404932607, "grad_norm": 0.44092950224876404, "learning_rate": 5.007166746297182e-06, "loss": 0.3816, "step": 1048 }, { "epoch": 0.15041583022655577, "grad_norm": 0.4327300190925598, "learning_rate": 5.011944577161969e-06, "loss": 0.4123, "step": 1049 }, { "epoch": 0.1505592199598509, "grad_norm": 0.4557746648788452, "learning_rate": 5.016722408026757e-06, "loss": 0.4078, "step": 1050 }, { "epoch": 0.15070260969314597, "grad_norm": 0.43229439854621887, "learning_rate": 5.021500238891543e-06, "loss": 0.4097, "step": 1051 }, { "epoch": 0.15084599942644106, "grad_norm": 0.41067931056022644, "learning_rate": 5.0262780697563306e-06, "loss": 0.3794, "step": 1052 }, { "epoch": 0.15098938915973617, "grad_norm": 0.393530935049057, "learning_rate": 5.031055900621118e-06, "loss": 0.3726, "step": 1053 }, { "epoch": 0.15113277889303126, "grad_norm": 0.40745216608047485, "learning_rate": 5.035833731485906e-06, "loss": 0.3975, "step": 1054 }, { "epoch": 0.15127616862632637, "grad_norm": 0.455058217048645, "learning_rate": 5.0406115623506935e-06, "loss": 0.3763, "step": 1055 }, { "epoch": 0.15141955835962145, "grad_norm": 0.40792617201805115, "learning_rate": 5.045389393215481e-06, "loss": 0.4022, "step": 1056 }, { "epoch": 0.15156294809291654, "grad_norm": 0.42219749093055725, "learning_rate": 5.050167224080268e-06, "loss": 0.3731, "step": 1057 }, { "epoch": 0.15170633782621165, "grad_norm": 0.4228120446205139, "learning_rate": 5.054945054945055e-06, "loss": 0.3727, "step": 1058 }, { "epoch": 0.15184972755950674, "grad_norm": 0.3950308561325073, "learning_rate": 5.059722885809842e-06, "loss": 0.3734, "step": 1059 }, { "epoch": 0.15199311729280185, "grad_norm": 0.46840590238571167, "learning_rate": 5.0645007166746296e-06, "loss": 0.4112, "step": 1060 }, { "epoch": 0.15213650702609693, "grad_norm": 0.41905975341796875, "learning_rate": 5.069278547539418e-06, "loss": 0.4019, "step": 1061 }, { "epoch": 0.15227989675939202, "grad_norm": 0.41460323333740234, "learning_rate": 5.074056378404205e-06, "loss": 0.3907, "step": 1062 }, { "epoch": 0.15242328649268713, "grad_norm": 0.4091598391532898, "learning_rate": 5.0788342092689925e-06, "loss": 0.3984, "step": 1063 }, { "epoch": 0.15256667622598222, "grad_norm": 0.4206342399120331, "learning_rate": 5.08361204013378e-06, "loss": 0.3679, "step": 1064 }, { "epoch": 0.1527100659592773, "grad_norm": 0.48984846472740173, "learning_rate": 5.0883898709985665e-06, "loss": 0.3711, "step": 1065 }, { "epoch": 0.15285345569257242, "grad_norm": 0.4157397150993347, "learning_rate": 5.093167701863354e-06, "loss": 0.3774, "step": 1066 }, { "epoch": 0.1529968454258675, "grad_norm": 0.46816733479499817, "learning_rate": 5.097945532728141e-06, "loss": 0.3998, "step": 1067 }, { "epoch": 0.1531402351591626, "grad_norm": 0.4696803689002991, "learning_rate": 5.1027233635929294e-06, "loss": 0.4031, "step": 1068 }, { "epoch": 0.1532836248924577, "grad_norm": 0.48127222061157227, "learning_rate": 5.107501194457717e-06, "loss": 0.3765, "step": 1069 }, { "epoch": 0.15342701462575278, "grad_norm": 0.43453338742256165, "learning_rate": 5.112279025322504e-06, "loss": 0.3807, "step": 1070 }, { "epoch": 0.1535704043590479, "grad_norm": 0.5091345310211182, "learning_rate": 5.1170568561872916e-06, "loss": 0.3843, "step": 1071 }, { "epoch": 0.15371379409234298, "grad_norm": 0.42094457149505615, "learning_rate": 5.121834687052079e-06, "loss": 0.4012, "step": 1072 }, { "epoch": 0.1538571838256381, "grad_norm": 0.41333484649658203, "learning_rate": 5.1266125179168655e-06, "loss": 0.3995, "step": 1073 }, { "epoch": 0.15400057355893318, "grad_norm": 0.4596073627471924, "learning_rate": 5.131390348781654e-06, "loss": 0.3869, "step": 1074 }, { "epoch": 0.15414396329222826, "grad_norm": 0.4475536048412323, "learning_rate": 5.136168179646441e-06, "loss": 0.3822, "step": 1075 }, { "epoch": 0.15428735302552338, "grad_norm": 0.4505835473537445, "learning_rate": 5.1409460105112284e-06, "loss": 0.355, "step": 1076 }, { "epoch": 0.15443074275881846, "grad_norm": 0.5019168257713318, "learning_rate": 5.145723841376016e-06, "loss": 0.3809, "step": 1077 }, { "epoch": 0.15457413249211358, "grad_norm": 0.4574154019355774, "learning_rate": 5.150501672240803e-06, "loss": 0.3976, "step": 1078 }, { "epoch": 0.15471752222540866, "grad_norm": 0.4789392948150635, "learning_rate": 5.155279503105591e-06, "loss": 0.407, "step": 1079 }, { "epoch": 0.15486091195870375, "grad_norm": 0.46424373984336853, "learning_rate": 5.160057333970377e-06, "loss": 0.3651, "step": 1080 }, { "epoch": 0.15500430169199886, "grad_norm": 0.42303624749183655, "learning_rate": 5.164835164835166e-06, "loss": 0.3778, "step": 1081 }, { "epoch": 0.15514769142529394, "grad_norm": 0.43955492973327637, "learning_rate": 5.169612995699953e-06, "loss": 0.4082, "step": 1082 }, { "epoch": 0.15529108115858906, "grad_norm": 0.5030077695846558, "learning_rate": 5.17439082656474e-06, "loss": 0.3837, "step": 1083 }, { "epoch": 0.15543447089188414, "grad_norm": 0.38028788566589355, "learning_rate": 5.1791686574295275e-06, "loss": 0.3712, "step": 1084 }, { "epoch": 0.15557786062517923, "grad_norm": 0.48417770862579346, "learning_rate": 5.183946488294315e-06, "loss": 0.3821, "step": 1085 }, { "epoch": 0.15572125035847434, "grad_norm": 0.4707304537296295, "learning_rate": 5.188724319159102e-06, "loss": 0.3861, "step": 1086 }, { "epoch": 0.15586464009176942, "grad_norm": 0.45278578996658325, "learning_rate": 5.193502150023889e-06, "loss": 0.3751, "step": 1087 }, { "epoch": 0.15600802982506454, "grad_norm": 0.46716687083244324, "learning_rate": 5.198279980888678e-06, "loss": 0.3855, "step": 1088 }, { "epoch": 0.15615141955835962, "grad_norm": 0.5046566128730774, "learning_rate": 5.203057811753464e-06, "loss": 0.3666, "step": 1089 }, { "epoch": 0.1562948092916547, "grad_norm": 0.46024081110954285, "learning_rate": 5.207835642618252e-06, "loss": 0.3807, "step": 1090 }, { "epoch": 0.15643819902494982, "grad_norm": 0.6011648774147034, "learning_rate": 5.212613473483039e-06, "loss": 0.3952, "step": 1091 }, { "epoch": 0.1565815887582449, "grad_norm": 0.56218022108078, "learning_rate": 5.2173913043478265e-06, "loss": 0.3892, "step": 1092 }, { "epoch": 0.15672497849154002, "grad_norm": 0.4409030079841614, "learning_rate": 5.222169135212614e-06, "loss": 0.3787, "step": 1093 }, { "epoch": 0.1568683682248351, "grad_norm": 0.43391403555870056, "learning_rate": 5.226946966077401e-06, "loss": 0.3995, "step": 1094 }, { "epoch": 0.1570117579581302, "grad_norm": 0.5069836974143982, "learning_rate": 5.2317247969421895e-06, "loss": 0.3871, "step": 1095 }, { "epoch": 0.1571551476914253, "grad_norm": 0.46838515996932983, "learning_rate": 5.236502627806976e-06, "loss": 0.3917, "step": 1096 }, { "epoch": 0.15729853742472039, "grad_norm": 0.5026601552963257, "learning_rate": 5.241280458671763e-06, "loss": 0.3963, "step": 1097 }, { "epoch": 0.1574419271580155, "grad_norm": 0.42475587129592896, "learning_rate": 5.246058289536551e-06, "loss": 0.3704, "step": 1098 }, { "epoch": 0.15758531689131058, "grad_norm": 0.47846537828445435, "learning_rate": 5.250836120401338e-06, "loss": 0.3775, "step": 1099 }, { "epoch": 0.15772870662460567, "grad_norm": 0.5642855763435364, "learning_rate": 5.2556139512661255e-06, "loss": 0.3997, "step": 1100 }, { "epoch": 0.15787209635790078, "grad_norm": 0.43668827414512634, "learning_rate": 5.260391782130913e-06, "loss": 0.3779, "step": 1101 }, { "epoch": 0.15801548609119587, "grad_norm": 0.5378737449645996, "learning_rate": 5.265169612995701e-06, "loss": 0.3767, "step": 1102 }, { "epoch": 0.15815887582449098, "grad_norm": 0.5607795119285583, "learning_rate": 5.2699474438604885e-06, "loss": 0.3822, "step": 1103 }, { "epoch": 0.15830226555778606, "grad_norm": 0.5058465003967285, "learning_rate": 5.274725274725275e-06, "loss": 0.4029, "step": 1104 }, { "epoch": 0.15844565529108115, "grad_norm": 0.5670028328895569, "learning_rate": 5.279503105590062e-06, "loss": 0.3996, "step": 1105 }, { "epoch": 0.15858904502437626, "grad_norm": 0.44802793860435486, "learning_rate": 5.28428093645485e-06, "loss": 0.3585, "step": 1106 }, { "epoch": 0.15873243475767135, "grad_norm": 0.5151559710502625, "learning_rate": 5.289058767319637e-06, "loss": 0.3767, "step": 1107 }, { "epoch": 0.15887582449096643, "grad_norm": 0.4316193163394928, "learning_rate": 5.2938365981844245e-06, "loss": 0.3911, "step": 1108 }, { "epoch": 0.15901921422426155, "grad_norm": 0.4102732241153717, "learning_rate": 5.298614429049213e-06, "loss": 0.4058, "step": 1109 }, { "epoch": 0.15916260395755663, "grad_norm": 0.4328915476799011, "learning_rate": 5.303392259914e-06, "loss": 0.4, "step": 1110 }, { "epoch": 0.15930599369085174, "grad_norm": 0.4141583740711212, "learning_rate": 5.308170090778787e-06, "loss": 0.3883, "step": 1111 }, { "epoch": 0.15944938342414683, "grad_norm": 0.41999346017837524, "learning_rate": 5.312947921643574e-06, "loss": 0.3768, "step": 1112 }, { "epoch": 0.15959277315744191, "grad_norm": 0.4801800549030304, "learning_rate": 5.317725752508361e-06, "loss": 0.394, "step": 1113 }, { "epoch": 0.15973616289073703, "grad_norm": 0.4339461326599121, "learning_rate": 5.322503583373149e-06, "loss": 0.3822, "step": 1114 }, { "epoch": 0.1598795526240321, "grad_norm": 0.4156497120857239, "learning_rate": 5.327281414237936e-06, "loss": 0.3784, "step": 1115 }, { "epoch": 0.16002294235732722, "grad_norm": 0.46161162853240967, "learning_rate": 5.332059245102724e-06, "loss": 0.3718, "step": 1116 }, { "epoch": 0.1601663320906223, "grad_norm": 0.4262489080429077, "learning_rate": 5.336837075967512e-06, "loss": 0.3608, "step": 1117 }, { "epoch": 0.1603097218239174, "grad_norm": 0.4718317687511444, "learning_rate": 5.341614906832298e-06, "loss": 0.3865, "step": 1118 }, { "epoch": 0.1604531115572125, "grad_norm": 0.4562208652496338, "learning_rate": 5.346392737697086e-06, "loss": 0.3824, "step": 1119 }, { "epoch": 0.1605965012905076, "grad_norm": 0.42801544070243835, "learning_rate": 5.351170568561873e-06, "loss": 0.3556, "step": 1120 }, { "epoch": 0.1607398910238027, "grad_norm": 0.4027671217918396, "learning_rate": 5.35594839942666e-06, "loss": 0.376, "step": 1121 }, { "epoch": 0.1608832807570978, "grad_norm": 0.43310362100601196, "learning_rate": 5.360726230291449e-06, "loss": 0.3998, "step": 1122 }, { "epoch": 0.16102667049039288, "grad_norm": 0.44468069076538086, "learning_rate": 5.365504061156236e-06, "loss": 0.3883, "step": 1123 }, { "epoch": 0.161170060223688, "grad_norm": 0.40758347511291504, "learning_rate": 5.370281892021023e-06, "loss": 0.3862, "step": 1124 }, { "epoch": 0.16131344995698307, "grad_norm": 0.4212578237056732, "learning_rate": 5.375059722885811e-06, "loss": 0.3934, "step": 1125 }, { "epoch": 0.1614568396902782, "grad_norm": 0.46301525831222534, "learning_rate": 5.379837553750597e-06, "loss": 0.3992, "step": 1126 }, { "epoch": 0.16160022942357327, "grad_norm": 0.4452148377895355, "learning_rate": 5.384615384615385e-06, "loss": 0.4094, "step": 1127 }, { "epoch": 0.16174361915686836, "grad_norm": 0.4173849821090698, "learning_rate": 5.389393215480172e-06, "loss": 0.3766, "step": 1128 }, { "epoch": 0.16188700889016347, "grad_norm": 0.491195946931839, "learning_rate": 5.39417104634496e-06, "loss": 0.4095, "step": 1129 }, { "epoch": 0.16203039862345855, "grad_norm": 0.3893331289291382, "learning_rate": 5.398948877209748e-06, "loss": 0.3848, "step": 1130 }, { "epoch": 0.16217378835675367, "grad_norm": 0.48556894063949585, "learning_rate": 5.403726708074535e-06, "loss": 0.3918, "step": 1131 }, { "epoch": 0.16231717809004875, "grad_norm": 0.4068482220172882, "learning_rate": 5.408504538939322e-06, "loss": 0.3755, "step": 1132 }, { "epoch": 0.16246056782334384, "grad_norm": 0.46307793259620667, "learning_rate": 5.413282369804109e-06, "loss": 0.4027, "step": 1133 }, { "epoch": 0.16260395755663895, "grad_norm": 0.40113022923469543, "learning_rate": 5.418060200668896e-06, "loss": 0.3838, "step": 1134 }, { "epoch": 0.16274734728993404, "grad_norm": 0.43495047092437744, "learning_rate": 5.422838031533684e-06, "loss": 0.3992, "step": 1135 }, { "epoch": 0.16289073702322915, "grad_norm": 0.40363946557044983, "learning_rate": 5.427615862398472e-06, "loss": 0.3758, "step": 1136 }, { "epoch": 0.16303412675652423, "grad_norm": 0.43621131777763367, "learning_rate": 5.432393693263259e-06, "loss": 0.3693, "step": 1137 }, { "epoch": 0.16317751648981932, "grad_norm": 0.4340543746948242, "learning_rate": 5.437171524128047e-06, "loss": 0.4034, "step": 1138 }, { "epoch": 0.16332090622311443, "grad_norm": 0.3955646753311157, "learning_rate": 5.441949354992834e-06, "loss": 0.3943, "step": 1139 }, { "epoch": 0.16346429595640952, "grad_norm": 0.447169691324234, "learning_rate": 5.4467271858576206e-06, "loss": 0.3656, "step": 1140 }, { "epoch": 0.16360768568970463, "grad_norm": 0.49277549982070923, "learning_rate": 5.451505016722408e-06, "loss": 0.3749, "step": 1141 }, { "epoch": 0.16375107542299971, "grad_norm": 0.39907801151275635, "learning_rate": 5.456282847587195e-06, "loss": 0.3907, "step": 1142 }, { "epoch": 0.1638944651562948, "grad_norm": 0.45925238728523254, "learning_rate": 5.4610606784519835e-06, "loss": 0.372, "step": 1143 }, { "epoch": 0.1640378548895899, "grad_norm": 0.5378758311271667, "learning_rate": 5.465838509316771e-06, "loss": 0.4159, "step": 1144 }, { "epoch": 0.164181244622885, "grad_norm": 0.48579296469688416, "learning_rate": 5.470616340181558e-06, "loss": 0.3971, "step": 1145 }, { "epoch": 0.1643246343561801, "grad_norm": 0.4388284981250763, "learning_rate": 5.475394171046346e-06, "loss": 0.3776, "step": 1146 }, { "epoch": 0.1644680240894752, "grad_norm": 0.4180465042591095, "learning_rate": 5.480172001911133e-06, "loss": 0.4019, "step": 1147 }, { "epoch": 0.16461141382277028, "grad_norm": 0.4474366307258606, "learning_rate": 5.48494983277592e-06, "loss": 0.3965, "step": 1148 }, { "epoch": 0.1647548035560654, "grad_norm": 0.46336933970451355, "learning_rate": 5.489727663640707e-06, "loss": 0.41, "step": 1149 }, { "epoch": 0.16489819328936048, "grad_norm": 0.4311912953853607, "learning_rate": 5.494505494505495e-06, "loss": 0.3899, "step": 1150 }, { "epoch": 0.1650415830226556, "grad_norm": 0.4894212484359741, "learning_rate": 5.4992833253702826e-06, "loss": 0.3603, "step": 1151 }, { "epoch": 0.16518497275595068, "grad_norm": 0.49225157499313354, "learning_rate": 5.50406115623507e-06, "loss": 0.3876, "step": 1152 }, { "epoch": 0.16532836248924576, "grad_norm": 0.4616071581840515, "learning_rate": 5.508838987099857e-06, "loss": 0.3933, "step": 1153 }, { "epoch": 0.16547175222254087, "grad_norm": 0.49545958638191223, "learning_rate": 5.513616817964645e-06, "loss": 0.3835, "step": 1154 }, { "epoch": 0.16561514195583596, "grad_norm": 0.47406697273254395, "learning_rate": 5.518394648829431e-06, "loss": 0.3945, "step": 1155 }, { "epoch": 0.16575853168913104, "grad_norm": 0.5064722299575806, "learning_rate": 5.523172479694219e-06, "loss": 0.3795, "step": 1156 }, { "epoch": 0.16590192142242616, "grad_norm": 0.46109840273857117, "learning_rate": 5.527950310559007e-06, "loss": 0.4001, "step": 1157 }, { "epoch": 0.16604531115572124, "grad_norm": 0.5146347284317017, "learning_rate": 5.532728141423794e-06, "loss": 0.3771, "step": 1158 }, { "epoch": 0.16618870088901636, "grad_norm": 0.49374523758888245, "learning_rate": 5.5375059722885816e-06, "loss": 0.3951, "step": 1159 }, { "epoch": 0.16633209062231144, "grad_norm": 0.44056129455566406, "learning_rate": 5.542283803153369e-06, "loss": 0.3844, "step": 1160 }, { "epoch": 0.16647548035560653, "grad_norm": 0.4645235538482666, "learning_rate": 5.547061634018156e-06, "loss": 0.3753, "step": 1161 }, { "epoch": 0.16661887008890164, "grad_norm": 0.5059448480606079, "learning_rate": 5.551839464882943e-06, "loss": 0.4191, "step": 1162 }, { "epoch": 0.16676225982219672, "grad_norm": 0.45605409145355225, "learning_rate": 5.55661729574773e-06, "loss": 0.3707, "step": 1163 }, { "epoch": 0.16690564955549184, "grad_norm": 0.44967079162597656, "learning_rate": 5.5613951266125185e-06, "loss": 0.36, "step": 1164 }, { "epoch": 0.16704903928878692, "grad_norm": 0.49384552240371704, "learning_rate": 5.566172957477306e-06, "loss": 0.3908, "step": 1165 }, { "epoch": 0.167192429022082, "grad_norm": 0.4802095293998718, "learning_rate": 5.570950788342093e-06, "loss": 0.3906, "step": 1166 }, { "epoch": 0.16733581875537712, "grad_norm": 0.4294726550579071, "learning_rate": 5.575728619206881e-06, "loss": 0.383, "step": 1167 }, { "epoch": 0.1674792084886722, "grad_norm": 0.4965922236442566, "learning_rate": 5.580506450071668e-06, "loss": 0.3717, "step": 1168 }, { "epoch": 0.16762259822196732, "grad_norm": 0.45615068078041077, "learning_rate": 5.585284280936455e-06, "loss": 0.357, "step": 1169 }, { "epoch": 0.1677659879552624, "grad_norm": 0.46221864223480225, "learning_rate": 5.590062111801242e-06, "loss": 0.3741, "step": 1170 }, { "epoch": 0.1679093776885575, "grad_norm": 0.43376511335372925, "learning_rate": 5.59483994266603e-06, "loss": 0.3983, "step": 1171 }, { "epoch": 0.1680527674218526, "grad_norm": 0.5382615327835083, "learning_rate": 5.5996177735308175e-06, "loss": 0.3871, "step": 1172 }, { "epoch": 0.16819615715514769, "grad_norm": 0.44459062814712524, "learning_rate": 5.604395604395605e-06, "loss": 0.3889, "step": 1173 }, { "epoch": 0.1683395468884428, "grad_norm": 0.45489081740379333, "learning_rate": 5.609173435260392e-06, "loss": 0.4079, "step": 1174 }, { "epoch": 0.16848293662173788, "grad_norm": 0.56861811876297, "learning_rate": 5.61395126612518e-06, "loss": 0.415, "step": 1175 }, { "epoch": 0.16862632635503297, "grad_norm": 0.4476411044597626, "learning_rate": 5.618729096989967e-06, "loss": 0.4059, "step": 1176 }, { "epoch": 0.16876971608832808, "grad_norm": 0.5008382797241211, "learning_rate": 5.623506927854755e-06, "loss": 0.4013, "step": 1177 }, { "epoch": 0.16891310582162317, "grad_norm": 0.4688461422920227, "learning_rate": 5.6282847587195426e-06, "loss": 0.3656, "step": 1178 }, { "epoch": 0.16905649555491828, "grad_norm": 0.4485182464122772, "learning_rate": 5.633062589584329e-06, "loss": 0.3642, "step": 1179 }, { "epoch": 0.16919988528821336, "grad_norm": 0.45979562401771545, "learning_rate": 5.6378404204491165e-06, "loss": 0.3982, "step": 1180 }, { "epoch": 0.16934327502150845, "grad_norm": 0.4647439122200012, "learning_rate": 5.642618251313904e-06, "loss": 0.3716, "step": 1181 }, { "epoch": 0.16948666475480356, "grad_norm": 0.4885733723640442, "learning_rate": 5.647396082178691e-06, "loss": 0.3913, "step": 1182 }, { "epoch": 0.16963005448809865, "grad_norm": 0.47020047903060913, "learning_rate": 5.652173913043479e-06, "loss": 0.363, "step": 1183 }, { "epoch": 0.16977344422139376, "grad_norm": 0.46070513129234314, "learning_rate": 5.656951743908267e-06, "loss": 0.3757, "step": 1184 }, { "epoch": 0.16991683395468885, "grad_norm": 0.46859872341156006, "learning_rate": 5.661729574773054e-06, "loss": 0.3709, "step": 1185 }, { "epoch": 0.17006022368798393, "grad_norm": 0.4446485936641693, "learning_rate": 5.666507405637841e-06, "loss": 0.3669, "step": 1186 }, { "epoch": 0.17020361342127904, "grad_norm": 0.4416781961917877, "learning_rate": 5.671285236502628e-06, "loss": 0.3977, "step": 1187 }, { "epoch": 0.17034700315457413, "grad_norm": 0.5016521215438843, "learning_rate": 5.6760630673674155e-06, "loss": 0.4004, "step": 1188 }, { "epoch": 0.17049039288786924, "grad_norm": 0.4403558671474457, "learning_rate": 5.680840898232203e-06, "loss": 0.3993, "step": 1189 }, { "epoch": 0.17063378262116433, "grad_norm": 0.43410658836364746, "learning_rate": 5.68561872909699e-06, "loss": 0.3897, "step": 1190 }, { "epoch": 0.1707771723544594, "grad_norm": 0.4449644982814789, "learning_rate": 5.6903965599617785e-06, "loss": 0.3946, "step": 1191 }, { "epoch": 0.17092056208775452, "grad_norm": 0.4190562963485718, "learning_rate": 5.695174390826566e-06, "loss": 0.3805, "step": 1192 }, { "epoch": 0.1710639518210496, "grad_norm": 0.47592973709106445, "learning_rate": 5.699952221691352e-06, "loss": 0.3866, "step": 1193 }, { "epoch": 0.17120734155434472, "grad_norm": 0.4577448070049286, "learning_rate": 5.70473005255614e-06, "loss": 0.3802, "step": 1194 }, { "epoch": 0.1713507312876398, "grad_norm": 0.4541190564632416, "learning_rate": 5.709507883420927e-06, "loss": 0.3699, "step": 1195 }, { "epoch": 0.1714941210209349, "grad_norm": 0.47089409828186035, "learning_rate": 5.7142857142857145e-06, "loss": 0.3749, "step": 1196 }, { "epoch": 0.17163751075423, "grad_norm": 0.42929109930992126, "learning_rate": 5.719063545150502e-06, "loss": 0.3625, "step": 1197 }, { "epoch": 0.1717809004875251, "grad_norm": 0.45559000968933105, "learning_rate": 5.72384137601529e-06, "loss": 0.392, "step": 1198 }, { "epoch": 0.1719242902208202, "grad_norm": 0.5414654016494751, "learning_rate": 5.7286192068800775e-06, "loss": 0.402, "step": 1199 }, { "epoch": 0.1720676799541153, "grad_norm": 0.4466353952884674, "learning_rate": 5.733397037744865e-06, "loss": 0.4044, "step": 1200 }, { "epoch": 0.17221106968741037, "grad_norm": 0.4874142110347748, "learning_rate": 5.738174868609651e-06, "loss": 0.3652, "step": 1201 }, { "epoch": 0.17235445942070549, "grad_norm": 0.4351606070995331, "learning_rate": 5.742952699474439e-06, "loss": 0.4003, "step": 1202 }, { "epoch": 0.17249784915400057, "grad_norm": 0.4393514096736908, "learning_rate": 5.747730530339226e-06, "loss": 0.3794, "step": 1203 }, { "epoch": 0.17264123888729566, "grad_norm": 0.4832441806793213, "learning_rate": 5.7525083612040135e-06, "loss": 0.398, "step": 1204 }, { "epoch": 0.17278462862059077, "grad_norm": 0.4328100085258484, "learning_rate": 5.757286192068802e-06, "loss": 0.3735, "step": 1205 }, { "epoch": 0.17292801835388585, "grad_norm": 0.45275774598121643, "learning_rate": 5.762064022933589e-06, "loss": 0.3832, "step": 1206 }, { "epoch": 0.17307140808718097, "grad_norm": 0.43210309743881226, "learning_rate": 5.7668418537983765e-06, "loss": 0.3763, "step": 1207 }, { "epoch": 0.17321479782047605, "grad_norm": 0.43472790718078613, "learning_rate": 5.771619684663163e-06, "loss": 0.3604, "step": 1208 }, { "epoch": 0.17335818755377114, "grad_norm": 0.4123733639717102, "learning_rate": 5.77639751552795e-06, "loss": 0.3894, "step": 1209 }, { "epoch": 0.17350157728706625, "grad_norm": 0.4663380980491638, "learning_rate": 5.781175346392738e-06, "loss": 0.3813, "step": 1210 }, { "epoch": 0.17364496702036133, "grad_norm": 0.4824031889438629, "learning_rate": 5.785953177257525e-06, "loss": 0.3799, "step": 1211 }, { "epoch": 0.17378835675365645, "grad_norm": 0.46525266766548157, "learning_rate": 5.790731008122313e-06, "loss": 0.384, "step": 1212 }, { "epoch": 0.17393174648695153, "grad_norm": 0.47444167733192444, "learning_rate": 5.795508838987101e-06, "loss": 0.3686, "step": 1213 }, { "epoch": 0.17407513622024662, "grad_norm": 0.4967861473560333, "learning_rate": 5.800286669851888e-06, "loss": 0.4012, "step": 1214 }, { "epoch": 0.17421852595354173, "grad_norm": 0.4370676279067993, "learning_rate": 5.805064500716675e-06, "loss": 0.3701, "step": 1215 }, { "epoch": 0.17436191568683682, "grad_norm": 0.43237176537513733, "learning_rate": 5.809842331581462e-06, "loss": 0.3846, "step": 1216 }, { "epoch": 0.17450530542013193, "grad_norm": 0.490978866815567, "learning_rate": 5.8146201624462494e-06, "loss": 0.3628, "step": 1217 }, { "epoch": 0.174648695153427, "grad_norm": 0.5041152834892273, "learning_rate": 5.819397993311037e-06, "loss": 0.3709, "step": 1218 }, { "epoch": 0.1747920848867221, "grad_norm": 0.45472466945648193, "learning_rate": 5.824175824175825e-06, "loss": 0.3894, "step": 1219 }, { "epoch": 0.1749354746200172, "grad_norm": 0.48182615637779236, "learning_rate": 5.828953655040612e-06, "loss": 0.4022, "step": 1220 }, { "epoch": 0.1750788643533123, "grad_norm": 0.43898752331733704, "learning_rate": 5.8337314859054e-06, "loss": 0.3937, "step": 1221 }, { "epoch": 0.1752222540866074, "grad_norm": 0.43140944838523865, "learning_rate": 5.838509316770186e-06, "loss": 0.3534, "step": 1222 }, { "epoch": 0.1753656438199025, "grad_norm": 0.4472249150276184, "learning_rate": 5.843287147634974e-06, "loss": 0.3969, "step": 1223 }, { "epoch": 0.17550903355319758, "grad_norm": 0.4089515507221222, "learning_rate": 5.848064978499761e-06, "loss": 0.3596, "step": 1224 }, { "epoch": 0.1756524232864927, "grad_norm": 0.47042256593704224, "learning_rate": 5.852842809364549e-06, "loss": 0.3785, "step": 1225 }, { "epoch": 0.17579581301978778, "grad_norm": 0.45822998881340027, "learning_rate": 5.857620640229337e-06, "loss": 0.3969, "step": 1226 }, { "epoch": 0.1759392027530829, "grad_norm": 0.4309653043746948, "learning_rate": 5.862398471094124e-06, "loss": 0.3783, "step": 1227 }, { "epoch": 0.17608259248637798, "grad_norm": 0.41645702719688416, "learning_rate": 5.867176301958911e-06, "loss": 0.3585, "step": 1228 }, { "epoch": 0.17622598221967306, "grad_norm": 0.4592890441417694, "learning_rate": 5.871954132823699e-06, "loss": 0.4055, "step": 1229 }, { "epoch": 0.17636937195296817, "grad_norm": 0.5066605806350708, "learning_rate": 5.876731963688485e-06, "loss": 0.3988, "step": 1230 }, { "epoch": 0.17651276168626326, "grad_norm": 0.46880683302879333, "learning_rate": 5.881509794553273e-06, "loss": 0.3735, "step": 1231 }, { "epoch": 0.17665615141955837, "grad_norm": 0.5219554901123047, "learning_rate": 5.886287625418061e-06, "loss": 0.3901, "step": 1232 }, { "epoch": 0.17679954115285346, "grad_norm": 0.4718014597892761, "learning_rate": 5.891065456282848e-06, "loss": 0.3896, "step": 1233 }, { "epoch": 0.17694293088614854, "grad_norm": 0.4310028553009033, "learning_rate": 5.895843287147636e-06, "loss": 0.3812, "step": 1234 }, { "epoch": 0.17708632061944365, "grad_norm": 0.4433060586452484, "learning_rate": 5.900621118012423e-06, "loss": 0.3735, "step": 1235 }, { "epoch": 0.17722971035273874, "grad_norm": 0.5056569576263428, "learning_rate": 5.9053989488772104e-06, "loss": 0.3724, "step": 1236 }, { "epoch": 0.17737310008603385, "grad_norm": 0.4243239164352417, "learning_rate": 5.910176779741997e-06, "loss": 0.3945, "step": 1237 }, { "epoch": 0.17751648981932894, "grad_norm": 0.48037832975387573, "learning_rate": 5.914954610606784e-06, "loss": 0.3713, "step": 1238 }, { "epoch": 0.17765987955262402, "grad_norm": 0.47119566798210144, "learning_rate": 5.9197324414715726e-06, "loss": 0.3845, "step": 1239 }, { "epoch": 0.17780326928591914, "grad_norm": 0.5055485367774963, "learning_rate": 5.92451027233636e-06, "loss": 0.3791, "step": 1240 }, { "epoch": 0.17794665901921422, "grad_norm": 0.49925440549850464, "learning_rate": 5.929288103201147e-06, "loss": 0.3934, "step": 1241 }, { "epoch": 0.17809004875250933, "grad_norm": 0.41872623562812805, "learning_rate": 5.934065934065935e-06, "loss": 0.3782, "step": 1242 }, { "epoch": 0.17823343848580442, "grad_norm": 0.44944101572036743, "learning_rate": 5.938843764930722e-06, "loss": 0.3578, "step": 1243 }, { "epoch": 0.1783768282190995, "grad_norm": 0.441725492477417, "learning_rate": 5.943621595795509e-06, "loss": 0.3706, "step": 1244 }, { "epoch": 0.17852021795239462, "grad_norm": 0.455474853515625, "learning_rate": 5.948399426660296e-06, "loss": 0.3808, "step": 1245 }, { "epoch": 0.1786636076856897, "grad_norm": 0.44228413701057434, "learning_rate": 5.953177257525084e-06, "loss": 0.3884, "step": 1246 }, { "epoch": 0.17880699741898481, "grad_norm": 0.572123646736145, "learning_rate": 5.9579550883898716e-06, "loss": 0.3813, "step": 1247 }, { "epoch": 0.1789503871522799, "grad_norm": 0.4474240243434906, "learning_rate": 5.962732919254659e-06, "loss": 0.3853, "step": 1248 }, { "epoch": 0.17909377688557498, "grad_norm": 0.4591982662677765, "learning_rate": 5.967510750119446e-06, "loss": 0.3686, "step": 1249 }, { "epoch": 0.1792371666188701, "grad_norm": 0.5100600719451904, "learning_rate": 5.972288580984234e-06, "loss": 0.3742, "step": 1250 }, { "epoch": 0.17938055635216518, "grad_norm": 0.4247656762599945, "learning_rate": 5.977066411849021e-06, "loss": 0.3687, "step": 1251 }, { "epoch": 0.17952394608546027, "grad_norm": 0.47841426730155945, "learning_rate": 5.981844242713808e-06, "loss": 0.3813, "step": 1252 }, { "epoch": 0.17966733581875538, "grad_norm": 0.478655606508255, "learning_rate": 5.986622073578597e-06, "loss": 0.3963, "step": 1253 }, { "epoch": 0.17981072555205047, "grad_norm": 0.4898900091648102, "learning_rate": 5.991399904443383e-06, "loss": 0.3814, "step": 1254 }, { "epoch": 0.17995411528534558, "grad_norm": 0.5093713402748108, "learning_rate": 5.996177735308171e-06, "loss": 0.3846, "step": 1255 }, { "epoch": 0.18009750501864066, "grad_norm": 0.4501628875732422, "learning_rate": 6.000955566172958e-06, "loss": 0.3778, "step": 1256 }, { "epoch": 0.18024089475193575, "grad_norm": 0.475628525018692, "learning_rate": 6.005733397037745e-06, "loss": 0.3541, "step": 1257 }, { "epoch": 0.18038428448523086, "grad_norm": 0.44187167286872864, "learning_rate": 6.010511227902533e-06, "loss": 0.3759, "step": 1258 }, { "epoch": 0.18052767421852595, "grad_norm": 0.47522953152656555, "learning_rate": 6.015289058767319e-06, "loss": 0.3668, "step": 1259 }, { "epoch": 0.18067106395182106, "grad_norm": 0.4410911500453949, "learning_rate": 6.020066889632108e-06, "loss": 0.3649, "step": 1260 }, { "epoch": 0.18081445368511614, "grad_norm": 0.4674330949783325, "learning_rate": 6.024844720496895e-06, "loss": 0.4041, "step": 1261 }, { "epoch": 0.18095784341841123, "grad_norm": 0.5771647095680237, "learning_rate": 6.029622551361682e-06, "loss": 0.3944, "step": 1262 }, { "epoch": 0.18110123315170634, "grad_norm": 0.4286360740661621, "learning_rate": 6.03440038222647e-06, "loss": 0.3719, "step": 1263 }, { "epoch": 0.18124462288500143, "grad_norm": 0.5701044201850891, "learning_rate": 6.039178213091257e-06, "loss": 0.3671, "step": 1264 }, { "epoch": 0.18138801261829654, "grad_norm": 0.4316636025905609, "learning_rate": 6.043956043956044e-06, "loss": 0.3777, "step": 1265 }, { "epoch": 0.18153140235159163, "grad_norm": 0.4328753650188446, "learning_rate": 6.048733874820831e-06, "loss": 0.3831, "step": 1266 }, { "epoch": 0.1816747920848867, "grad_norm": 0.552888035774231, "learning_rate": 6.05351170568562e-06, "loss": 0.3881, "step": 1267 }, { "epoch": 0.18181818181818182, "grad_norm": 0.4658913016319275, "learning_rate": 6.0582895365504065e-06, "loss": 0.4011, "step": 1268 }, { "epoch": 0.1819615715514769, "grad_norm": 0.4826608896255493, "learning_rate": 6.063067367415194e-06, "loss": 0.4028, "step": 1269 }, { "epoch": 0.18210496128477202, "grad_norm": 0.4866369962692261, "learning_rate": 6.067845198279981e-06, "loss": 0.367, "step": 1270 }, { "epoch": 0.1822483510180671, "grad_norm": 0.45106449723243713, "learning_rate": 6.072623029144769e-06, "loss": 0.3832, "step": 1271 }, { "epoch": 0.1823917407513622, "grad_norm": 0.6062861084938049, "learning_rate": 6.077400860009556e-06, "loss": 0.3751, "step": 1272 }, { "epoch": 0.1825351304846573, "grad_norm": 0.4934801161289215, "learning_rate": 6.082178690874344e-06, "loss": 0.3662, "step": 1273 }, { "epoch": 0.1826785202179524, "grad_norm": 0.48454055190086365, "learning_rate": 6.086956521739132e-06, "loss": 0.3877, "step": 1274 }, { "epoch": 0.1828219099512475, "grad_norm": 0.49516576528549194, "learning_rate": 6.091734352603918e-06, "loss": 0.3788, "step": 1275 }, { "epoch": 0.1829652996845426, "grad_norm": 0.6056767702102661, "learning_rate": 6.0965121834687055e-06, "loss": 0.3836, "step": 1276 }, { "epoch": 0.18310868941783767, "grad_norm": 0.46464043855667114, "learning_rate": 6.101290014333493e-06, "loss": 0.3844, "step": 1277 }, { "epoch": 0.18325207915113279, "grad_norm": 0.4612119793891907, "learning_rate": 6.10606784519828e-06, "loss": 0.3916, "step": 1278 }, { "epoch": 0.18339546888442787, "grad_norm": 0.44133713841438293, "learning_rate": 6.110845676063068e-06, "loss": 0.3762, "step": 1279 }, { "epoch": 0.18353885861772298, "grad_norm": 0.49016812443733215, "learning_rate": 6.115623506927856e-06, "loss": 0.3739, "step": 1280 }, { "epoch": 0.18368224835101807, "grad_norm": 0.43667078018188477, "learning_rate": 6.120401337792643e-06, "loss": 0.3719, "step": 1281 }, { "epoch": 0.18382563808431315, "grad_norm": 0.4712618589401245, "learning_rate": 6.125179168657431e-06, "loss": 0.3726, "step": 1282 }, { "epoch": 0.18396902781760827, "grad_norm": 0.4462161362171173, "learning_rate": 6.129956999522217e-06, "loss": 0.362, "step": 1283 }, { "epoch": 0.18411241755090335, "grad_norm": 0.4359181523323059, "learning_rate": 6.1347348303870045e-06, "loss": 0.3671, "step": 1284 }, { "epoch": 0.18425580728419846, "grad_norm": 0.4275704026222229, "learning_rate": 6.139512661251792e-06, "loss": 0.3768, "step": 1285 }, { "epoch": 0.18439919701749355, "grad_norm": 0.440907746553421, "learning_rate": 6.144290492116579e-06, "loss": 0.3679, "step": 1286 }, { "epoch": 0.18454258675078863, "grad_norm": 0.460388720035553, "learning_rate": 6.1490683229813675e-06, "loss": 0.3737, "step": 1287 }, { "epoch": 0.18468597648408375, "grad_norm": 0.444875568151474, "learning_rate": 6.153846153846155e-06, "loss": 0.3756, "step": 1288 }, { "epoch": 0.18482936621737883, "grad_norm": 0.5319907069206238, "learning_rate": 6.158623984710942e-06, "loss": 0.3871, "step": 1289 }, { "epoch": 0.18497275595067394, "grad_norm": 0.4826795160770416, "learning_rate": 6.163401815575729e-06, "loss": 0.3619, "step": 1290 }, { "epoch": 0.18511614568396903, "grad_norm": 0.46090999245643616, "learning_rate": 6.168179646440516e-06, "loss": 0.3733, "step": 1291 }, { "epoch": 0.18525953541726412, "grad_norm": 0.48642265796661377, "learning_rate": 6.1729574773053035e-06, "loss": 0.3859, "step": 1292 }, { "epoch": 0.18540292515055923, "grad_norm": 0.48362040519714355, "learning_rate": 6.177735308170091e-06, "loss": 0.3861, "step": 1293 }, { "epoch": 0.1855463148838543, "grad_norm": 0.5431490540504456, "learning_rate": 6.182513139034879e-06, "loss": 0.399, "step": 1294 }, { "epoch": 0.18568970461714943, "grad_norm": 0.47129306197166443, "learning_rate": 6.1872909698996665e-06, "loss": 0.3732, "step": 1295 }, { "epoch": 0.1858330943504445, "grad_norm": 0.5064027309417725, "learning_rate": 6.192068800764454e-06, "loss": 0.3952, "step": 1296 }, { "epoch": 0.1859764840837396, "grad_norm": 0.5908556580543518, "learning_rate": 6.19684663162924e-06, "loss": 0.3908, "step": 1297 }, { "epoch": 0.1861198738170347, "grad_norm": 0.4568788707256317, "learning_rate": 6.201624462494028e-06, "loss": 0.3668, "step": 1298 }, { "epoch": 0.1862632635503298, "grad_norm": 0.5126976370811462, "learning_rate": 6.206402293358815e-06, "loss": 0.3789, "step": 1299 }, { "epoch": 0.18640665328362488, "grad_norm": 0.4458827078342438, "learning_rate": 6.2111801242236025e-06, "loss": 0.4108, "step": 1300 }, { "epoch": 0.18655004301692, "grad_norm": 0.4373418092727661, "learning_rate": 6.215957955088391e-06, "loss": 0.38, "step": 1301 }, { "epoch": 0.18669343275021508, "grad_norm": 0.5250642895698547, "learning_rate": 6.220735785953178e-06, "loss": 0.3999, "step": 1302 }, { "epoch": 0.1868368224835102, "grad_norm": 0.4774230122566223, "learning_rate": 6.2255136168179655e-06, "loss": 0.3769, "step": 1303 }, { "epoch": 0.18698021221680527, "grad_norm": 0.48131054639816284, "learning_rate": 6.230291447682753e-06, "loss": 0.3763, "step": 1304 }, { "epoch": 0.18712360195010036, "grad_norm": 0.5054311752319336, "learning_rate": 6.2350692785475394e-06, "loss": 0.3697, "step": 1305 }, { "epoch": 0.18726699168339547, "grad_norm": 0.47778499126434326, "learning_rate": 6.239847109412327e-06, "loss": 0.3784, "step": 1306 }, { "epoch": 0.18741038141669056, "grad_norm": 0.4764214754104614, "learning_rate": 6.244624940277114e-06, "loss": 0.4119, "step": 1307 }, { "epoch": 0.18755377114998567, "grad_norm": 0.4829418361186981, "learning_rate": 6.249402771141902e-06, "loss": 0.3679, "step": 1308 }, { "epoch": 0.18769716088328076, "grad_norm": 0.4468190670013428, "learning_rate": 6.25418060200669e-06, "loss": 0.3754, "step": 1309 }, { "epoch": 0.18784055061657584, "grad_norm": 0.3883448541164398, "learning_rate": 6.258958432871477e-06, "loss": 0.3682, "step": 1310 }, { "epoch": 0.18798394034987095, "grad_norm": 0.459577739238739, "learning_rate": 6.2637362637362645e-06, "loss": 0.3849, "step": 1311 }, { "epoch": 0.18812733008316604, "grad_norm": 0.43735504150390625, "learning_rate": 6.268514094601051e-06, "loss": 0.3773, "step": 1312 }, { "epoch": 0.18827071981646115, "grad_norm": 0.4546605944633484, "learning_rate": 6.2732919254658384e-06, "loss": 0.3484, "step": 1313 }, { "epoch": 0.18841410954975624, "grad_norm": 0.41068774461746216, "learning_rate": 6.278069756330626e-06, "loss": 0.353, "step": 1314 }, { "epoch": 0.18855749928305132, "grad_norm": 0.43890446424484253, "learning_rate": 6.282847587195414e-06, "loss": 0.3653, "step": 1315 }, { "epoch": 0.18870088901634643, "grad_norm": 0.42807960510253906, "learning_rate": 6.287625418060201e-06, "loss": 0.3968, "step": 1316 }, { "epoch": 0.18884427874964152, "grad_norm": 0.45587489008903503, "learning_rate": 6.292403248924989e-06, "loss": 0.3676, "step": 1317 }, { "epoch": 0.18898766848293663, "grad_norm": 0.44921204447746277, "learning_rate": 6.297181079789776e-06, "loss": 0.3542, "step": 1318 }, { "epoch": 0.18913105821623172, "grad_norm": 0.41493532061576843, "learning_rate": 6.301958910654563e-06, "loss": 0.3935, "step": 1319 }, { "epoch": 0.1892744479495268, "grad_norm": 0.41581931710243225, "learning_rate": 6.30673674151935e-06, "loss": 0.3732, "step": 1320 }, { "epoch": 0.18941783768282192, "grad_norm": 0.42618751525878906, "learning_rate": 6.311514572384138e-06, "loss": 0.3763, "step": 1321 }, { "epoch": 0.189561227416117, "grad_norm": 0.4076145887374878, "learning_rate": 6.316292403248926e-06, "loss": 0.3783, "step": 1322 }, { "epoch": 0.1897046171494121, "grad_norm": 0.4302264451980591, "learning_rate": 6.321070234113713e-06, "loss": 0.3601, "step": 1323 }, { "epoch": 0.1898480068827072, "grad_norm": 0.4241527318954468, "learning_rate": 6.3258480649785004e-06, "loss": 0.3828, "step": 1324 }, { "epoch": 0.18999139661600228, "grad_norm": 0.42800506949424744, "learning_rate": 6.330625895843288e-06, "loss": 0.3728, "step": 1325 }, { "epoch": 0.1901347863492974, "grad_norm": 0.43013715744018555, "learning_rate": 6.335403726708075e-06, "loss": 0.3829, "step": 1326 }, { "epoch": 0.19027817608259248, "grad_norm": 0.4530632495880127, "learning_rate": 6.340181557572862e-06, "loss": 0.4085, "step": 1327 }, { "epoch": 0.1904215658158876, "grad_norm": 0.4213431477546692, "learning_rate": 6.34495938843765e-06, "loss": 0.3852, "step": 1328 }, { "epoch": 0.19056495554918268, "grad_norm": 0.4449235796928406, "learning_rate": 6.349737219302437e-06, "loss": 0.3657, "step": 1329 }, { "epoch": 0.19070834528247776, "grad_norm": 0.4628525972366333, "learning_rate": 6.354515050167225e-06, "loss": 0.3966, "step": 1330 }, { "epoch": 0.19085173501577288, "grad_norm": 0.3885108530521393, "learning_rate": 6.359292881032012e-06, "loss": 0.3716, "step": 1331 }, { "epoch": 0.19099512474906796, "grad_norm": 0.5338886976242065, "learning_rate": 6.3640707118967995e-06, "loss": 0.3708, "step": 1332 }, { "epoch": 0.19113851448236308, "grad_norm": 0.43460696935653687, "learning_rate": 6.368848542761587e-06, "loss": 0.3553, "step": 1333 }, { "epoch": 0.19128190421565816, "grad_norm": 0.45119357109069824, "learning_rate": 6.373626373626373e-06, "loss": 0.3733, "step": 1334 }, { "epoch": 0.19142529394895325, "grad_norm": 0.6359733939170837, "learning_rate": 6.3784042044911624e-06, "loss": 0.391, "step": 1335 }, { "epoch": 0.19156868368224836, "grad_norm": 0.4451442062854767, "learning_rate": 6.383182035355949e-06, "loss": 0.3899, "step": 1336 }, { "epoch": 0.19171207341554344, "grad_norm": 0.45038652420043945, "learning_rate": 6.387959866220736e-06, "loss": 0.3453, "step": 1337 }, { "epoch": 0.19185546314883856, "grad_norm": 0.4836343228816986, "learning_rate": 6.392737697085524e-06, "loss": 0.3772, "step": 1338 }, { "epoch": 0.19199885288213364, "grad_norm": 0.42347121238708496, "learning_rate": 6.397515527950311e-06, "loss": 0.378, "step": 1339 }, { "epoch": 0.19214224261542873, "grad_norm": 0.4338068962097168, "learning_rate": 6.4022933588150985e-06, "loss": 0.3935, "step": 1340 }, { "epoch": 0.19228563234872384, "grad_norm": 0.4450666904449463, "learning_rate": 6.407071189679885e-06, "loss": 0.3931, "step": 1341 }, { "epoch": 0.19242902208201892, "grad_norm": 0.4545268714427948, "learning_rate": 6.411849020544674e-06, "loss": 0.3706, "step": 1342 }, { "epoch": 0.192572411815314, "grad_norm": 0.436441034078598, "learning_rate": 6.416626851409461e-06, "loss": 0.4112, "step": 1343 }, { "epoch": 0.19271580154860912, "grad_norm": 0.4471248388290405, "learning_rate": 6.421404682274248e-06, "loss": 0.3713, "step": 1344 }, { "epoch": 0.1928591912819042, "grad_norm": 0.4920189678668976, "learning_rate": 6.426182513139035e-06, "loss": 0.3903, "step": 1345 }, { "epoch": 0.19300258101519932, "grad_norm": 0.42923519015312195, "learning_rate": 6.430960344003823e-06, "loss": 0.3696, "step": 1346 }, { "epoch": 0.1931459707484944, "grad_norm": 0.47068294882774353, "learning_rate": 6.43573817486861e-06, "loss": 0.3893, "step": 1347 }, { "epoch": 0.1932893604817895, "grad_norm": 0.43979567289352417, "learning_rate": 6.4405160057333975e-06, "loss": 0.3728, "step": 1348 }, { "epoch": 0.1934327502150846, "grad_norm": 0.4598051607608795, "learning_rate": 6.445293836598186e-06, "loss": 0.3767, "step": 1349 }, { "epoch": 0.1935761399483797, "grad_norm": 0.43485257029533386, "learning_rate": 6.450071667462972e-06, "loss": 0.3642, "step": 1350 }, { "epoch": 0.1937195296816748, "grad_norm": 0.4531552195549011, "learning_rate": 6.45484949832776e-06, "loss": 0.4001, "step": 1351 }, { "epoch": 0.1938629194149699, "grad_norm": 0.4796309471130371, "learning_rate": 6.459627329192547e-06, "loss": 0.3895, "step": 1352 }, { "epoch": 0.19400630914826497, "grad_norm": 0.47837549448013306, "learning_rate": 6.464405160057334e-06, "loss": 0.3668, "step": 1353 }, { "epoch": 0.19414969888156008, "grad_norm": 0.47343921661376953, "learning_rate": 6.469182990922122e-06, "loss": 0.3694, "step": 1354 }, { "epoch": 0.19429308861485517, "grad_norm": 0.4143138825893402, "learning_rate": 6.473960821786909e-06, "loss": 0.3922, "step": 1355 }, { "epoch": 0.19443647834815028, "grad_norm": 0.41690999269485474, "learning_rate": 6.478738652651697e-06, "loss": 0.3656, "step": 1356 }, { "epoch": 0.19457986808144537, "grad_norm": 0.4179277718067169, "learning_rate": 6.483516483516485e-06, "loss": 0.3579, "step": 1357 }, { "epoch": 0.19472325781474045, "grad_norm": 0.4629082679748535, "learning_rate": 6.488294314381271e-06, "loss": 0.3928, "step": 1358 }, { "epoch": 0.19486664754803557, "grad_norm": 0.49849414825439453, "learning_rate": 6.493072145246059e-06, "loss": 0.3668, "step": 1359 }, { "epoch": 0.19501003728133065, "grad_norm": 0.4141697883605957, "learning_rate": 6.497849976110846e-06, "loss": 0.391, "step": 1360 }, { "epoch": 0.19515342701462576, "grad_norm": 0.46692514419555664, "learning_rate": 6.502627806975633e-06, "loss": 0.3959, "step": 1361 }, { "epoch": 0.19529681674792085, "grad_norm": 0.40710049867630005, "learning_rate": 6.507405637840421e-06, "loss": 0.3642, "step": 1362 }, { "epoch": 0.19544020648121593, "grad_norm": 0.417940616607666, "learning_rate": 6.512183468705209e-06, "loss": 0.3628, "step": 1363 }, { "epoch": 0.19558359621451105, "grad_norm": 0.40157344937324524, "learning_rate": 6.516961299569996e-06, "loss": 0.3841, "step": 1364 }, { "epoch": 0.19572698594780613, "grad_norm": 0.44488298892974854, "learning_rate": 6.521739130434783e-06, "loss": 0.3813, "step": 1365 }, { "epoch": 0.19587037568110124, "grad_norm": 0.433841735124588, "learning_rate": 6.52651696129957e-06, "loss": 0.3685, "step": 1366 }, { "epoch": 0.19601376541439633, "grad_norm": 0.4973697066307068, "learning_rate": 6.531294792164358e-06, "loss": 0.377, "step": 1367 }, { "epoch": 0.19615715514769141, "grad_norm": 0.44415566325187683, "learning_rate": 6.536072623029145e-06, "loss": 0.3751, "step": 1368 }, { "epoch": 0.19630054488098653, "grad_norm": 0.4732089042663574, "learning_rate": 6.540850453893932e-06, "loss": 0.3776, "step": 1369 }, { "epoch": 0.1964439346142816, "grad_norm": 0.4443933963775635, "learning_rate": 6.545628284758721e-06, "loss": 0.3462, "step": 1370 }, { "epoch": 0.19658732434757673, "grad_norm": 0.45637211203575134, "learning_rate": 6.550406115623508e-06, "loss": 0.3765, "step": 1371 }, { "epoch": 0.1967307140808718, "grad_norm": 0.40503716468811035, "learning_rate": 6.5551839464882945e-06, "loss": 0.3929, "step": 1372 }, { "epoch": 0.1968741038141669, "grad_norm": 0.43170472979545593, "learning_rate": 6.559961777353082e-06, "loss": 0.4016, "step": 1373 }, { "epoch": 0.197017493547462, "grad_norm": 0.43391332030296326, "learning_rate": 6.564739608217869e-06, "loss": 0.3891, "step": 1374 }, { "epoch": 0.1971608832807571, "grad_norm": 0.40735697746276855, "learning_rate": 6.569517439082657e-06, "loss": 0.3606, "step": 1375 }, { "epoch": 0.1973042730140522, "grad_norm": 0.4358409643173218, "learning_rate": 6.574295269947445e-06, "loss": 0.3733, "step": 1376 }, { "epoch": 0.1974476627473473, "grad_norm": 0.46097609400749207, "learning_rate": 6.579073100812232e-06, "loss": 0.3394, "step": 1377 }, { "epoch": 0.19759105248064238, "grad_norm": 0.42589133977890015, "learning_rate": 6.58385093167702e-06, "loss": 0.4067, "step": 1378 }, { "epoch": 0.1977344422139375, "grad_norm": 0.5024645924568176, "learning_rate": 6.588628762541807e-06, "loss": 0.3811, "step": 1379 }, { "epoch": 0.19787783194723257, "grad_norm": 0.45240721106529236, "learning_rate": 6.5934065934065935e-06, "loss": 0.364, "step": 1380 }, { "epoch": 0.1980212216805277, "grad_norm": 0.3930383324623108, "learning_rate": 6.598184424271381e-06, "loss": 0.3838, "step": 1381 }, { "epoch": 0.19816461141382277, "grad_norm": 0.5150833129882812, "learning_rate": 6.602962255136168e-06, "loss": 0.374, "step": 1382 }, { "epoch": 0.19830800114711786, "grad_norm": 0.3993018865585327, "learning_rate": 6.6077400860009565e-06, "loss": 0.379, "step": 1383 }, { "epoch": 0.19845139088041297, "grad_norm": 0.39134106040000916, "learning_rate": 6.612517916865744e-06, "loss": 0.3778, "step": 1384 }, { "epoch": 0.19859478061370806, "grad_norm": 0.5182577967643738, "learning_rate": 6.617295747730531e-06, "loss": 0.3705, "step": 1385 }, { "epoch": 0.19873817034700317, "grad_norm": 0.52330482006073, "learning_rate": 6.622073578595319e-06, "loss": 0.4053, "step": 1386 }, { "epoch": 0.19888156008029825, "grad_norm": 0.4110066592693329, "learning_rate": 6.626851409460105e-06, "loss": 0.3592, "step": 1387 }, { "epoch": 0.19902494981359334, "grad_norm": 0.4921492338180542, "learning_rate": 6.6316292403248926e-06, "loss": 0.3723, "step": 1388 }, { "epoch": 0.19916833954688845, "grad_norm": 0.5132524967193604, "learning_rate": 6.63640707118968e-06, "loss": 0.3692, "step": 1389 }, { "epoch": 0.19931172928018354, "grad_norm": 0.42605239152908325, "learning_rate": 6.641184902054468e-06, "loss": 0.3897, "step": 1390 }, { "epoch": 0.19945511901347862, "grad_norm": 0.43105584383010864, "learning_rate": 6.6459627329192555e-06, "loss": 0.3705, "step": 1391 }, { "epoch": 0.19959850874677373, "grad_norm": 0.46601778268814087, "learning_rate": 6.650740563784043e-06, "loss": 0.3564, "step": 1392 }, { "epoch": 0.19974189848006882, "grad_norm": 0.4343614876270294, "learning_rate": 6.65551839464883e-06, "loss": 0.378, "step": 1393 }, { "epoch": 0.19988528821336393, "grad_norm": 0.43924397230148315, "learning_rate": 6.660296225513617e-06, "loss": 0.3762, "step": 1394 }, { "epoch": 0.20002867794665902, "grad_norm": 0.4759392738342285, "learning_rate": 6.665074056378404e-06, "loss": 0.3711, "step": 1395 }, { "epoch": 0.2001720676799541, "grad_norm": 0.4320766031742096, "learning_rate": 6.6698518872431916e-06, "loss": 0.3775, "step": 1396 }, { "epoch": 0.20031545741324921, "grad_norm": 0.4265623092651367, "learning_rate": 6.67462971810798e-06, "loss": 0.3645, "step": 1397 }, { "epoch": 0.2004588471465443, "grad_norm": 0.4199308156967163, "learning_rate": 6.679407548972767e-06, "loss": 0.396, "step": 1398 }, { "epoch": 0.2006022368798394, "grad_norm": 0.4123990535736084, "learning_rate": 6.6841853798375545e-06, "loss": 0.3831, "step": 1399 }, { "epoch": 0.2007456266131345, "grad_norm": 0.4311105012893677, "learning_rate": 6.688963210702342e-06, "loss": 0.3852, "step": 1400 }, { "epoch": 0.20088901634642958, "grad_norm": 0.4260129928588867, "learning_rate": 6.693741041567129e-06, "loss": 0.3721, "step": 1401 }, { "epoch": 0.2010324060797247, "grad_norm": 0.41041630506515503, "learning_rate": 6.698518872431916e-06, "loss": 0.3951, "step": 1402 }, { "epoch": 0.20117579581301978, "grad_norm": 0.3906494081020355, "learning_rate": 6.703296703296703e-06, "loss": 0.3652, "step": 1403 }, { "epoch": 0.2013191855463149, "grad_norm": 0.4359085261821747, "learning_rate": 6.7080745341614914e-06, "loss": 0.3556, "step": 1404 }, { "epoch": 0.20146257527960998, "grad_norm": 0.4352289140224457, "learning_rate": 6.712852365026279e-06, "loss": 0.3627, "step": 1405 }, { "epoch": 0.20160596501290506, "grad_norm": 0.41784390807151794, "learning_rate": 6.717630195891066e-06, "loss": 0.3759, "step": 1406 }, { "epoch": 0.20174935474620018, "grad_norm": 0.4895695745944977, "learning_rate": 6.7224080267558536e-06, "loss": 0.3801, "step": 1407 }, { "epoch": 0.20189274447949526, "grad_norm": 0.44313937425613403, "learning_rate": 6.727185857620641e-06, "loss": 0.3885, "step": 1408 }, { "epoch": 0.20203613421279037, "grad_norm": 0.46529263257980347, "learning_rate": 6.7319636884854275e-06, "loss": 0.3917, "step": 1409 }, { "epoch": 0.20217952394608546, "grad_norm": 0.4205860197544098, "learning_rate": 6.736741519350215e-06, "loss": 0.3772, "step": 1410 }, { "epoch": 0.20232291367938054, "grad_norm": 0.4408707320690155, "learning_rate": 6.741519350215003e-06, "loss": 0.3727, "step": 1411 }, { "epoch": 0.20246630341267566, "grad_norm": 0.4271821975708008, "learning_rate": 6.7462971810797904e-06, "loss": 0.3805, "step": 1412 }, { "epoch": 0.20260969314597074, "grad_norm": 0.38085129857063293, "learning_rate": 6.751075011944578e-06, "loss": 0.3779, "step": 1413 }, { "epoch": 0.20275308287926586, "grad_norm": 0.442579060792923, "learning_rate": 6.755852842809365e-06, "loss": 0.3763, "step": 1414 }, { "epoch": 0.20289647261256094, "grad_norm": 0.458117812871933, "learning_rate": 6.7606306736741526e-06, "loss": 0.3557, "step": 1415 }, { "epoch": 0.20303986234585603, "grad_norm": 0.42345383763313293, "learning_rate": 6.765408504538939e-06, "loss": 0.3832, "step": 1416 }, { "epoch": 0.20318325207915114, "grad_norm": 0.5358066558837891, "learning_rate": 6.7701863354037265e-06, "loss": 0.3801, "step": 1417 }, { "epoch": 0.20332664181244622, "grad_norm": 0.43665000796318054, "learning_rate": 6.774964166268515e-06, "loss": 0.3716, "step": 1418 }, { "epoch": 0.20347003154574134, "grad_norm": 0.4489595890045166, "learning_rate": 6.779741997133302e-06, "loss": 0.3868, "step": 1419 }, { "epoch": 0.20361342127903642, "grad_norm": 0.4140758216381073, "learning_rate": 6.7845198279980895e-06, "loss": 0.3697, "step": 1420 }, { "epoch": 0.2037568110123315, "grad_norm": 0.4686461091041565, "learning_rate": 6.789297658862877e-06, "loss": 0.3634, "step": 1421 }, { "epoch": 0.20390020074562662, "grad_norm": 0.43578919768333435, "learning_rate": 6.794075489727664e-06, "loss": 0.3735, "step": 1422 }, { "epoch": 0.2040435904789217, "grad_norm": 0.4569375514984131, "learning_rate": 6.798853320592452e-06, "loss": 0.3581, "step": 1423 }, { "epoch": 0.20418698021221682, "grad_norm": 0.4251011908054352, "learning_rate": 6.80363115145724e-06, "loss": 0.3652, "step": 1424 }, { "epoch": 0.2043303699455119, "grad_norm": 0.4713062644004822, "learning_rate": 6.808408982322026e-06, "loss": 0.3658, "step": 1425 }, { "epoch": 0.204473759678807, "grad_norm": 0.45271021127700806, "learning_rate": 6.813186813186814e-06, "loss": 0.405, "step": 1426 }, { "epoch": 0.2046171494121021, "grad_norm": 0.5151425004005432, "learning_rate": 6.817964644051601e-06, "loss": 0.3788, "step": 1427 }, { "epoch": 0.20476053914539719, "grad_norm": 0.4217826724052429, "learning_rate": 6.8227424749163885e-06, "loss": 0.3876, "step": 1428 }, { "epoch": 0.2049039288786923, "grad_norm": 0.45592859387397766, "learning_rate": 6.827520305781176e-06, "loss": 0.3663, "step": 1429 }, { "epoch": 0.20504731861198738, "grad_norm": 0.47955718636512756, "learning_rate": 6.832298136645963e-06, "loss": 0.3726, "step": 1430 }, { "epoch": 0.20519070834528247, "grad_norm": 0.4346364736557007, "learning_rate": 6.8370759675107514e-06, "loss": 0.3851, "step": 1431 }, { "epoch": 0.20533409807857758, "grad_norm": 0.40311843156814575, "learning_rate": 6.841853798375539e-06, "loss": 0.3661, "step": 1432 }, { "epoch": 0.20547748781187267, "grad_norm": 0.441511869430542, "learning_rate": 6.846631629240325e-06, "loss": 0.3704, "step": 1433 }, { "epoch": 0.20562087754516778, "grad_norm": 0.4635191857814789, "learning_rate": 6.851409460105113e-06, "loss": 0.3675, "step": 1434 }, { "epoch": 0.20576426727846286, "grad_norm": 0.4086994528770447, "learning_rate": 6.8561872909699e-06, "loss": 0.3695, "step": 1435 }, { "epoch": 0.20590765701175795, "grad_norm": 0.48896855115890503, "learning_rate": 6.8609651218346875e-06, "loss": 0.3963, "step": 1436 }, { "epoch": 0.20605104674505306, "grad_norm": 0.4702501595020294, "learning_rate": 6.865742952699475e-06, "loss": 0.3805, "step": 1437 }, { "epoch": 0.20619443647834815, "grad_norm": 0.4283773601055145, "learning_rate": 6.870520783564263e-06, "loss": 0.3652, "step": 1438 }, { "epoch": 0.20633782621164323, "grad_norm": 0.44715702533721924, "learning_rate": 6.8752986144290505e-06, "loss": 0.3745, "step": 1439 }, { "epoch": 0.20648121594493835, "grad_norm": 0.5152228474617004, "learning_rate": 6.880076445293837e-06, "loss": 0.3798, "step": 1440 }, { "epoch": 0.20662460567823343, "grad_norm": 0.42797279357910156, "learning_rate": 6.884854276158624e-06, "loss": 0.3792, "step": 1441 }, { "epoch": 0.20676799541152854, "grad_norm": 0.4428170621395111, "learning_rate": 6.889632107023412e-06, "loss": 0.4066, "step": 1442 }, { "epoch": 0.20691138514482363, "grad_norm": 0.4557800590991974, "learning_rate": 6.894409937888199e-06, "loss": 0.3774, "step": 1443 }, { "epoch": 0.2070547748781187, "grad_norm": 0.49240657687187195, "learning_rate": 6.8991877687529865e-06, "loss": 0.387, "step": 1444 }, { "epoch": 0.20719816461141383, "grad_norm": 0.44219082593917847, "learning_rate": 6.903965599617775e-06, "loss": 0.3573, "step": 1445 }, { "epoch": 0.2073415543447089, "grad_norm": 0.4712727665901184, "learning_rate": 6.908743430482562e-06, "loss": 0.3874, "step": 1446 }, { "epoch": 0.20748494407800402, "grad_norm": 0.45401468873023987, "learning_rate": 6.913521261347349e-06, "loss": 0.358, "step": 1447 }, { "epoch": 0.2076283338112991, "grad_norm": 0.5300387144088745, "learning_rate": 6.918299092212136e-06, "loss": 0.3899, "step": 1448 }, { "epoch": 0.2077717235445942, "grad_norm": 0.5309650897979736, "learning_rate": 6.923076923076923e-06, "loss": 0.3715, "step": 1449 }, { "epoch": 0.2079151132778893, "grad_norm": 0.4449687600135803, "learning_rate": 6.927854753941711e-06, "loss": 0.3812, "step": 1450 }, { "epoch": 0.2080585030111844, "grad_norm": 0.4277084767818451, "learning_rate": 6.932632584806498e-06, "loss": 0.3727, "step": 1451 }, { "epoch": 0.2082018927444795, "grad_norm": 0.5455196499824524, "learning_rate": 6.937410415671286e-06, "loss": 0.3733, "step": 1452 }, { "epoch": 0.2083452824777746, "grad_norm": 0.43570268154144287, "learning_rate": 6.942188246536074e-06, "loss": 0.3694, "step": 1453 }, { "epoch": 0.20848867221106968, "grad_norm": 0.4932262897491455, "learning_rate": 6.946966077400861e-06, "loss": 0.3802, "step": 1454 }, { "epoch": 0.2086320619443648, "grad_norm": 0.4871554970741272, "learning_rate": 6.951743908265648e-06, "loss": 0.3737, "step": 1455 }, { "epoch": 0.20877545167765987, "grad_norm": 0.42310163378715515, "learning_rate": 6.956521739130435e-06, "loss": 0.378, "step": 1456 }, { "epoch": 0.208918841410955, "grad_norm": 0.48620161414146423, "learning_rate": 6.961299569995222e-06, "loss": 0.3521, "step": 1457 }, { "epoch": 0.20906223114425007, "grad_norm": 0.48781248927116394, "learning_rate": 6.96607740086001e-06, "loss": 0.3829, "step": 1458 }, { "epoch": 0.20920562087754516, "grad_norm": 0.4586291015148163, "learning_rate": 6.970855231724798e-06, "loss": 0.3882, "step": 1459 }, { "epoch": 0.20934901061084027, "grad_norm": 0.49115198850631714, "learning_rate": 6.975633062589585e-06, "loss": 0.3695, "step": 1460 }, { "epoch": 0.20949240034413535, "grad_norm": 0.42652979493141174, "learning_rate": 6.980410893454373e-06, "loss": 0.3766, "step": 1461 }, { "epoch": 0.20963579007743047, "grad_norm": 0.42927104234695435, "learning_rate": 6.985188724319159e-06, "loss": 0.3761, "step": 1462 }, { "epoch": 0.20977917981072555, "grad_norm": 0.5059802532196045, "learning_rate": 6.989966555183947e-06, "loss": 0.3602, "step": 1463 }, { "epoch": 0.20992256954402064, "grad_norm": 0.4677756130695343, "learning_rate": 6.994744386048734e-06, "loss": 0.345, "step": 1464 }, { "epoch": 0.21006595927731575, "grad_norm": 0.4061398506164551, "learning_rate": 6.999522216913521e-06, "loss": 0.3751, "step": 1465 }, { "epoch": 0.21020934901061084, "grad_norm": 0.4376729428768158, "learning_rate": 7.00430004777831e-06, "loss": 0.3696, "step": 1466 }, { "epoch": 0.21035273874390595, "grad_norm": 0.45138850808143616, "learning_rate": 7.009077878643097e-06, "loss": 0.3738, "step": 1467 }, { "epoch": 0.21049612847720103, "grad_norm": 0.5008721351623535, "learning_rate": 7.013855709507884e-06, "loss": 0.3904, "step": 1468 }, { "epoch": 0.21063951821049612, "grad_norm": 0.3989749252796173, "learning_rate": 7.018633540372671e-06, "loss": 0.3541, "step": 1469 }, { "epoch": 0.21078290794379123, "grad_norm": 0.42141467332839966, "learning_rate": 7.023411371237458e-06, "loss": 0.3817, "step": 1470 }, { "epoch": 0.21092629767708632, "grad_norm": 0.4459751546382904, "learning_rate": 7.028189202102246e-06, "loss": 0.3668, "step": 1471 }, { "epoch": 0.21106968741038143, "grad_norm": 0.47751855850219727, "learning_rate": 7.032967032967034e-06, "loss": 0.3729, "step": 1472 }, { "epoch": 0.21121307714367651, "grad_norm": 0.39864057302474976, "learning_rate": 7.037744863831821e-06, "loss": 0.3577, "step": 1473 }, { "epoch": 0.2113564668769716, "grad_norm": 0.44668370485305786, "learning_rate": 7.042522694696609e-06, "loss": 0.3767, "step": 1474 }, { "epoch": 0.2114998566102667, "grad_norm": 0.43273642659187317, "learning_rate": 7.047300525561396e-06, "loss": 0.3699, "step": 1475 }, { "epoch": 0.2116432463435618, "grad_norm": 0.41433483362197876, "learning_rate": 7.052078356426183e-06, "loss": 0.3597, "step": 1476 }, { "epoch": 0.2117866360768569, "grad_norm": 0.40974193811416626, "learning_rate": 7.05685618729097e-06, "loss": 0.3755, "step": 1477 }, { "epoch": 0.211930025810152, "grad_norm": 0.5043480396270752, "learning_rate": 7.061634018155757e-06, "loss": 0.3948, "step": 1478 }, { "epoch": 0.21207341554344708, "grad_norm": 0.3964613676071167, "learning_rate": 7.0664118490205455e-06, "loss": 0.3761, "step": 1479 }, { "epoch": 0.2122168052767422, "grad_norm": 0.45512813329696655, "learning_rate": 7.071189679885333e-06, "loss": 0.3815, "step": 1480 }, { "epoch": 0.21236019501003728, "grad_norm": 0.4278451204299927, "learning_rate": 7.07596751075012e-06, "loss": 0.3898, "step": 1481 }, { "epoch": 0.2125035847433324, "grad_norm": 0.4161425828933716, "learning_rate": 7.080745341614908e-06, "loss": 0.3532, "step": 1482 }, { "epoch": 0.21264697447662748, "grad_norm": 0.45625463128089905, "learning_rate": 7.085523172479695e-06, "loss": 0.3754, "step": 1483 }, { "epoch": 0.21279036420992256, "grad_norm": 0.45565181970596313, "learning_rate": 7.0903010033444816e-06, "loss": 0.3866, "step": 1484 }, { "epoch": 0.21293375394321767, "grad_norm": 0.44929441809654236, "learning_rate": 7.095078834209269e-06, "loss": 0.3639, "step": 1485 }, { "epoch": 0.21307714367651276, "grad_norm": 0.45541179180145264, "learning_rate": 7.099856665074057e-06, "loss": 0.3761, "step": 1486 }, { "epoch": 0.21322053340980784, "grad_norm": 0.45103949308395386, "learning_rate": 7.1046344959388445e-06, "loss": 0.3679, "step": 1487 }, { "epoch": 0.21336392314310296, "grad_norm": 0.4885012209415436, "learning_rate": 7.109412326803632e-06, "loss": 0.3842, "step": 1488 }, { "epoch": 0.21350731287639804, "grad_norm": 0.44871068000793457, "learning_rate": 7.114190157668419e-06, "loss": 0.3648, "step": 1489 }, { "epoch": 0.21365070260969315, "grad_norm": 0.4524807631969452, "learning_rate": 7.118967988533207e-06, "loss": 0.3734, "step": 1490 }, { "epoch": 0.21379409234298824, "grad_norm": 0.4226072132587433, "learning_rate": 7.123745819397993e-06, "loss": 0.3644, "step": 1491 }, { "epoch": 0.21393748207628333, "grad_norm": 0.42903390526771545, "learning_rate": 7.128523650262781e-06, "loss": 0.3725, "step": 1492 }, { "epoch": 0.21408087180957844, "grad_norm": 0.4299779534339905, "learning_rate": 7.133301481127569e-06, "loss": 0.3555, "step": 1493 }, { "epoch": 0.21422426154287352, "grad_norm": 0.4907664656639099, "learning_rate": 7.138079311992356e-06, "loss": 0.3834, "step": 1494 }, { "epoch": 0.21436765127616864, "grad_norm": 0.42705196142196655, "learning_rate": 7.1428571428571436e-06, "loss": 0.3815, "step": 1495 }, { "epoch": 0.21451104100946372, "grad_norm": 0.4532662630081177, "learning_rate": 7.147634973721931e-06, "loss": 0.3877, "step": 1496 }, { "epoch": 0.2146544307427588, "grad_norm": 0.4719238579273224, "learning_rate": 7.152412804586718e-06, "loss": 0.3877, "step": 1497 }, { "epoch": 0.21479782047605392, "grad_norm": 0.4170296788215637, "learning_rate": 7.157190635451506e-06, "loss": 0.3681, "step": 1498 }, { "epoch": 0.214941210209349, "grad_norm": 0.49160000681877136, "learning_rate": 7.161968466316292e-06, "loss": 0.3579, "step": 1499 }, { "epoch": 0.21508459994264412, "grad_norm": 0.5051781535148621, "learning_rate": 7.1667462971810804e-06, "loss": 0.3764, "step": 1500 }, { "epoch": 0.2152279896759392, "grad_norm": 0.43869248032569885, "learning_rate": 7.171524128045868e-06, "loss": 0.3512, "step": 1501 }, { "epoch": 0.2153713794092343, "grad_norm": 0.4647137224674225, "learning_rate": 7.176301958910655e-06, "loss": 0.3794, "step": 1502 }, { "epoch": 0.2155147691425294, "grad_norm": 0.4148513972759247, "learning_rate": 7.181079789775443e-06, "loss": 0.3805, "step": 1503 }, { "epoch": 0.21565815887582448, "grad_norm": 0.4874074161052704, "learning_rate": 7.18585762064023e-06, "loss": 0.3636, "step": 1504 }, { "epoch": 0.2158015486091196, "grad_norm": 0.4398317337036133, "learning_rate": 7.190635451505017e-06, "loss": 0.3847, "step": 1505 }, { "epoch": 0.21594493834241468, "grad_norm": 0.5111578702926636, "learning_rate": 7.195413282369804e-06, "loss": 0.3841, "step": 1506 }, { "epoch": 0.21608832807570977, "grad_norm": 0.4605599045753479, "learning_rate": 7.200191113234593e-06, "loss": 0.3847, "step": 1507 }, { "epoch": 0.21623171780900488, "grad_norm": 0.45424899458885193, "learning_rate": 7.2049689440993795e-06, "loss": 0.3809, "step": 1508 }, { "epoch": 0.21637510754229997, "grad_norm": 0.5354922413825989, "learning_rate": 7.209746774964167e-06, "loss": 0.4172, "step": 1509 }, { "epoch": 0.21651849727559508, "grad_norm": 0.5124530792236328, "learning_rate": 7.214524605828954e-06, "loss": 0.3735, "step": 1510 }, { "epoch": 0.21666188700889016, "grad_norm": 0.49364468455314636, "learning_rate": 7.219302436693742e-06, "loss": 0.373, "step": 1511 }, { "epoch": 0.21680527674218525, "grad_norm": 0.41040700674057007, "learning_rate": 7.224080267558529e-06, "loss": 0.3525, "step": 1512 }, { "epoch": 0.21694866647548036, "grad_norm": 0.5162869095802307, "learning_rate": 7.2288580984233155e-06, "loss": 0.3873, "step": 1513 }, { "epoch": 0.21709205620877545, "grad_norm": 0.5015899538993835, "learning_rate": 7.2336359292881046e-06, "loss": 0.3717, "step": 1514 }, { "epoch": 0.21723544594207056, "grad_norm": 0.4458504021167755, "learning_rate": 7.238413760152891e-06, "loss": 0.3501, "step": 1515 }, { "epoch": 0.21737883567536564, "grad_norm": 0.47839587926864624, "learning_rate": 7.2431915910176785e-06, "loss": 0.3886, "step": 1516 }, { "epoch": 0.21752222540866073, "grad_norm": 0.45824742317199707, "learning_rate": 7.247969421882466e-06, "loss": 0.3905, "step": 1517 }, { "epoch": 0.21766561514195584, "grad_norm": 0.4307498335838318, "learning_rate": 7.252747252747253e-06, "loss": 0.3743, "step": 1518 }, { "epoch": 0.21780900487525093, "grad_norm": 0.4740462005138397, "learning_rate": 7.257525083612041e-06, "loss": 0.3989, "step": 1519 }, { "epoch": 0.21795239460854604, "grad_norm": 0.495839387178421, "learning_rate": 7.262302914476829e-06, "loss": 0.3899, "step": 1520 }, { "epoch": 0.21809578434184113, "grad_norm": 0.43013957142829895, "learning_rate": 7.267080745341616e-06, "loss": 0.347, "step": 1521 }, { "epoch": 0.2182391740751362, "grad_norm": 0.44757941365242004, "learning_rate": 7.271858576206403e-06, "loss": 0.3757, "step": 1522 }, { "epoch": 0.21838256380843132, "grad_norm": 0.45852747559547424, "learning_rate": 7.27663640707119e-06, "loss": 0.3792, "step": 1523 }, { "epoch": 0.2185259535417264, "grad_norm": 0.4418584406375885, "learning_rate": 7.2814142379359775e-06, "loss": 0.3664, "step": 1524 }, { "epoch": 0.21866934327502152, "grad_norm": 0.4746224284172058, "learning_rate": 7.286192068800765e-06, "loss": 0.3551, "step": 1525 }, { "epoch": 0.2188127330083166, "grad_norm": 0.46426406502723694, "learning_rate": 7.290969899665552e-06, "loss": 0.3911, "step": 1526 }, { "epoch": 0.2189561227416117, "grad_norm": 0.49683964252471924, "learning_rate": 7.2957477305303405e-06, "loss": 0.3861, "step": 1527 }, { "epoch": 0.2190995124749068, "grad_norm": 0.4875771403312683, "learning_rate": 7.300525561395128e-06, "loss": 0.3802, "step": 1528 }, { "epoch": 0.2192429022082019, "grad_norm": 0.46237021684646606, "learning_rate": 7.305303392259915e-06, "loss": 0.3543, "step": 1529 }, { "epoch": 0.219386291941497, "grad_norm": 0.4770874083042145, "learning_rate": 7.310081223124702e-06, "loss": 0.3924, "step": 1530 }, { "epoch": 0.2195296816747921, "grad_norm": 0.4351493716239929, "learning_rate": 7.314859053989489e-06, "loss": 0.3696, "step": 1531 }, { "epoch": 0.21967307140808717, "grad_norm": 0.5091522932052612, "learning_rate": 7.3196368848542765e-06, "loss": 0.3845, "step": 1532 }, { "epoch": 0.21981646114138229, "grad_norm": 0.450244665145874, "learning_rate": 7.324414715719064e-06, "loss": 0.3838, "step": 1533 }, { "epoch": 0.21995985087467737, "grad_norm": 0.43645811080932617, "learning_rate": 7.329192546583852e-06, "loss": 0.3486, "step": 1534 }, { "epoch": 0.22010324060797246, "grad_norm": 0.4958837628364563, "learning_rate": 7.3339703774486395e-06, "loss": 0.3803, "step": 1535 }, { "epoch": 0.22024663034126757, "grad_norm": 0.412943959236145, "learning_rate": 7.338748208313427e-06, "loss": 0.3726, "step": 1536 }, { "epoch": 0.22039002007456265, "grad_norm": 0.49953392148017883, "learning_rate": 7.343526039178213e-06, "loss": 0.3907, "step": 1537 }, { "epoch": 0.22053340980785777, "grad_norm": 0.4355343282222748, "learning_rate": 7.348303870043001e-06, "loss": 0.3696, "step": 1538 }, { "epoch": 0.22067679954115285, "grad_norm": 0.44190114736557007, "learning_rate": 7.353081700907788e-06, "loss": 0.3714, "step": 1539 }, { "epoch": 0.22082018927444794, "grad_norm": 0.467342346906662, "learning_rate": 7.3578595317725755e-06, "loss": 0.3545, "step": 1540 }, { "epoch": 0.22096357900774305, "grad_norm": 0.4805123805999756, "learning_rate": 7.362637362637364e-06, "loss": 0.3604, "step": 1541 }, { "epoch": 0.22110696874103813, "grad_norm": 0.4413623511791229, "learning_rate": 7.367415193502151e-06, "loss": 0.3821, "step": 1542 }, { "epoch": 0.22125035847433325, "grad_norm": 0.4746721088886261, "learning_rate": 7.3721930243669385e-06, "loss": 0.394, "step": 1543 }, { "epoch": 0.22139374820762833, "grad_norm": 0.41187772154808044, "learning_rate": 7.376970855231725e-06, "loss": 0.3645, "step": 1544 }, { "epoch": 0.22153713794092342, "grad_norm": 0.5002423524856567, "learning_rate": 7.381748686096512e-06, "loss": 0.372, "step": 1545 }, { "epoch": 0.22168052767421853, "grad_norm": 0.45193395018577576, "learning_rate": 7.3865265169613e-06, "loss": 0.384, "step": 1546 }, { "epoch": 0.22182391740751362, "grad_norm": 0.38809001445770264, "learning_rate": 7.391304347826087e-06, "loss": 0.3923, "step": 1547 }, { "epoch": 0.22196730714080873, "grad_norm": 0.4773957431316376, "learning_rate": 7.396082178690875e-06, "loss": 0.354, "step": 1548 }, { "epoch": 0.2221106968741038, "grad_norm": 0.511002779006958, "learning_rate": 7.400860009555663e-06, "loss": 0.3797, "step": 1549 }, { "epoch": 0.2222540866073989, "grad_norm": 0.44124579429626465, "learning_rate": 7.40563784042045e-06, "loss": 0.3568, "step": 1550 }, { "epoch": 0.222397476340694, "grad_norm": 0.4393945038318634, "learning_rate": 7.4104156712852375e-06, "loss": 0.3639, "step": 1551 }, { "epoch": 0.2225408660739891, "grad_norm": 0.4926522374153137, "learning_rate": 7.415193502150024e-06, "loss": 0.3602, "step": 1552 }, { "epoch": 0.2226842558072842, "grad_norm": 0.4429084360599518, "learning_rate": 7.419971333014811e-06, "loss": 0.3766, "step": 1553 }, { "epoch": 0.2228276455405793, "grad_norm": 0.48856332898139954, "learning_rate": 7.424749163879599e-06, "loss": 0.3759, "step": 1554 }, { "epoch": 0.22297103527387438, "grad_norm": 0.49555885791778564, "learning_rate": 7.429526994744387e-06, "loss": 0.361, "step": 1555 }, { "epoch": 0.2231144250071695, "grad_norm": 0.4779684543609619, "learning_rate": 7.434304825609174e-06, "loss": 0.3632, "step": 1556 }, { "epoch": 0.22325781474046458, "grad_norm": 0.5479699969291687, "learning_rate": 7.439082656473962e-06, "loss": 0.3747, "step": 1557 }, { "epoch": 0.2234012044737597, "grad_norm": 0.5180535316467285, "learning_rate": 7.443860487338749e-06, "loss": 0.3702, "step": 1558 }, { "epoch": 0.22354459420705478, "grad_norm": 0.42599251866340637, "learning_rate": 7.448638318203536e-06, "loss": 0.381, "step": 1559 }, { "epoch": 0.22368798394034986, "grad_norm": 0.5026025176048279, "learning_rate": 7.453416149068323e-06, "loss": 0.3685, "step": 1560 }, { "epoch": 0.22383137367364497, "grad_norm": 0.439705491065979, "learning_rate": 7.4581939799331104e-06, "loss": 0.3717, "step": 1561 }, { "epoch": 0.22397476340694006, "grad_norm": 0.4334481358528137, "learning_rate": 7.462971810797899e-06, "loss": 0.3576, "step": 1562 }, { "epoch": 0.22411815314023517, "grad_norm": 0.40316030383110046, "learning_rate": 7.467749641662686e-06, "loss": 0.3611, "step": 1563 }, { "epoch": 0.22426154287353026, "grad_norm": 0.394503653049469, "learning_rate": 7.472527472527473e-06, "loss": 0.3649, "step": 1564 }, { "epoch": 0.22440493260682534, "grad_norm": 0.41843587160110474, "learning_rate": 7.477305303392261e-06, "loss": 0.3676, "step": 1565 }, { "epoch": 0.22454832234012045, "grad_norm": 0.433586061000824, "learning_rate": 7.482083134257047e-06, "loss": 0.3812, "step": 1566 }, { "epoch": 0.22469171207341554, "grad_norm": 0.4737284779548645, "learning_rate": 7.486860965121835e-06, "loss": 0.3763, "step": 1567 }, { "epoch": 0.22483510180671065, "grad_norm": 0.4175371825695038, "learning_rate": 7.491638795986622e-06, "loss": 0.3557, "step": 1568 }, { "epoch": 0.22497849154000574, "grad_norm": 0.4467008411884308, "learning_rate": 7.49641662685141e-06, "loss": 0.373, "step": 1569 }, { "epoch": 0.22512188127330082, "grad_norm": 0.472811758518219, "learning_rate": 7.501194457716198e-06, "loss": 0.3804, "step": 1570 }, { "epoch": 0.22526527100659594, "grad_norm": 0.42887958884239197, "learning_rate": 7.505972288580985e-06, "loss": 0.3644, "step": 1571 }, { "epoch": 0.22540866073989102, "grad_norm": 0.4614815413951874, "learning_rate": 7.510750119445772e-06, "loss": 0.3585, "step": 1572 }, { "epoch": 0.22555205047318613, "grad_norm": 0.43703001737594604, "learning_rate": 7.515527950310559e-06, "loss": 0.3939, "step": 1573 }, { "epoch": 0.22569544020648122, "grad_norm": 0.42869827151298523, "learning_rate": 7.520305781175346e-06, "loss": 0.3605, "step": 1574 }, { "epoch": 0.2258388299397763, "grad_norm": 0.40858691930770874, "learning_rate": 7.5250836120401346e-06, "loss": 0.3773, "step": 1575 }, { "epoch": 0.22598221967307142, "grad_norm": 0.450340211391449, "learning_rate": 7.529861442904922e-06, "loss": 0.3484, "step": 1576 }, { "epoch": 0.2261256094063665, "grad_norm": 0.39984962344169617, "learning_rate": 7.534639273769709e-06, "loss": 0.3783, "step": 1577 }, { "epoch": 0.22626899913966161, "grad_norm": 0.46079352498054504, "learning_rate": 7.539417104634497e-06, "loss": 0.3926, "step": 1578 }, { "epoch": 0.2264123888729567, "grad_norm": 0.4250757098197937, "learning_rate": 7.544194935499284e-06, "loss": 0.379, "step": 1579 }, { "epoch": 0.22655577860625178, "grad_norm": 0.41187140345573425, "learning_rate": 7.5489727663640714e-06, "loss": 0.3518, "step": 1580 }, { "epoch": 0.2266991683395469, "grad_norm": 0.4272836744785309, "learning_rate": 7.553750597228858e-06, "loss": 0.3705, "step": 1581 }, { "epoch": 0.22684255807284198, "grad_norm": 0.4154178202152252, "learning_rate": 7.558528428093647e-06, "loss": 0.3734, "step": 1582 }, { "epoch": 0.22698594780613707, "grad_norm": 0.4180789291858673, "learning_rate": 7.5633062589584336e-06, "loss": 0.3824, "step": 1583 }, { "epoch": 0.22712933753943218, "grad_norm": 0.3947543203830719, "learning_rate": 7.568084089823221e-06, "loss": 0.3644, "step": 1584 }, { "epoch": 0.22727272727272727, "grad_norm": 0.5197968482971191, "learning_rate": 7.572861920688008e-06, "loss": 0.3962, "step": 1585 }, { "epoch": 0.22741611700602238, "grad_norm": 0.4466851055622101, "learning_rate": 7.577639751552796e-06, "loss": 0.3841, "step": 1586 }, { "epoch": 0.22755950673931746, "grad_norm": 0.4250853955745697, "learning_rate": 7.582417582417583e-06, "loss": 0.354, "step": 1587 }, { "epoch": 0.22770289647261255, "grad_norm": 0.5210568904876709, "learning_rate": 7.58719541328237e-06, "loss": 0.3847, "step": 1588 }, { "epoch": 0.22784628620590766, "grad_norm": 0.4992692470550537, "learning_rate": 7.591973244147159e-06, "loss": 0.3954, "step": 1589 }, { "epoch": 0.22798967593920275, "grad_norm": 0.4442298412322998, "learning_rate": 7.596751075011945e-06, "loss": 0.372, "step": 1590 }, { "epoch": 0.22813306567249786, "grad_norm": 0.4404946267604828, "learning_rate": 7.601528905876733e-06, "loss": 0.3619, "step": 1591 }, { "epoch": 0.22827645540579294, "grad_norm": 0.448819100856781, "learning_rate": 7.60630673674152e-06, "loss": 0.3712, "step": 1592 }, { "epoch": 0.22841984513908803, "grad_norm": 0.4498179256916046, "learning_rate": 7.611084567606307e-06, "loss": 0.3838, "step": 1593 }, { "epoch": 0.22856323487238314, "grad_norm": 0.4130793809890747, "learning_rate": 7.615862398471095e-06, "loss": 0.3625, "step": 1594 }, { "epoch": 0.22870662460567823, "grad_norm": 0.4674473702907562, "learning_rate": 7.620640229335881e-06, "loss": 0.3609, "step": 1595 }, { "epoch": 0.22885001433897334, "grad_norm": 0.47749876976013184, "learning_rate": 7.62541806020067e-06, "loss": 0.3805, "step": 1596 }, { "epoch": 0.22899340407226842, "grad_norm": 0.4319297969341278, "learning_rate": 7.630195891065457e-06, "loss": 0.3619, "step": 1597 }, { "epoch": 0.2291367938055635, "grad_norm": 0.414181649684906, "learning_rate": 7.634973721930245e-06, "loss": 0.3721, "step": 1598 }, { "epoch": 0.22928018353885862, "grad_norm": 0.469927579164505, "learning_rate": 7.639751552795032e-06, "loss": 0.3672, "step": 1599 }, { "epoch": 0.2294235732721537, "grad_norm": 0.44379723072052, "learning_rate": 7.644529383659818e-06, "loss": 0.3833, "step": 1600 }, { "epoch": 0.22956696300544882, "grad_norm": 0.4661206007003784, "learning_rate": 7.649307214524606e-06, "loss": 0.3615, "step": 1601 }, { "epoch": 0.2297103527387439, "grad_norm": 0.4791105389595032, "learning_rate": 7.654085045389393e-06, "loss": 0.3535, "step": 1602 }, { "epoch": 0.229853742472039, "grad_norm": 0.5007043480873108, "learning_rate": 7.658862876254181e-06, "loss": 0.3743, "step": 1603 }, { "epoch": 0.2299971322053341, "grad_norm": 0.4661611318588257, "learning_rate": 7.66364070711897e-06, "loss": 0.3623, "step": 1604 }, { "epoch": 0.2301405219386292, "grad_norm": 0.48821550607681274, "learning_rate": 7.668418537983756e-06, "loss": 0.3645, "step": 1605 }, { "epoch": 0.2302839116719243, "grad_norm": 0.48505187034606934, "learning_rate": 7.673196368848544e-06, "loss": 0.3694, "step": 1606 }, { "epoch": 0.2304273014052194, "grad_norm": 0.41280755400657654, "learning_rate": 7.67797419971333e-06, "loss": 0.3724, "step": 1607 }, { "epoch": 0.23057069113851447, "grad_norm": 0.427351713180542, "learning_rate": 7.682752030578117e-06, "loss": 0.3447, "step": 1608 }, { "epoch": 0.23071408087180958, "grad_norm": 0.5135732889175415, "learning_rate": 7.687529861442905e-06, "loss": 0.3689, "step": 1609 }, { "epoch": 0.23085747060510467, "grad_norm": 0.44516825675964355, "learning_rate": 7.692307692307694e-06, "loss": 0.3763, "step": 1610 }, { "epoch": 0.23100086033839978, "grad_norm": 0.48955675959587097, "learning_rate": 7.69708552317248e-06, "loss": 0.3858, "step": 1611 }, { "epoch": 0.23114425007169487, "grad_norm": 0.45770764350891113, "learning_rate": 7.701863354037268e-06, "loss": 0.3608, "step": 1612 }, { "epoch": 0.23128763980498995, "grad_norm": 0.3890368938446045, "learning_rate": 7.706641184902055e-06, "loss": 0.3564, "step": 1613 }, { "epoch": 0.23143102953828507, "grad_norm": 0.5490550398826599, "learning_rate": 7.711419015766843e-06, "loss": 0.3705, "step": 1614 }, { "epoch": 0.23157441927158015, "grad_norm": 0.4464072287082672, "learning_rate": 7.71619684663163e-06, "loss": 0.3684, "step": 1615 }, { "epoch": 0.23171780900487526, "grad_norm": 0.44563260674476624, "learning_rate": 7.720974677496416e-06, "loss": 0.3988, "step": 1616 }, { "epoch": 0.23186119873817035, "grad_norm": 0.45887917280197144, "learning_rate": 7.725752508361204e-06, "loss": 0.363, "step": 1617 }, { "epoch": 0.23200458847146543, "grad_norm": 0.41697683930397034, "learning_rate": 7.730530339225993e-06, "loss": 0.3635, "step": 1618 }, { "epoch": 0.23214797820476055, "grad_norm": 0.4207721948623657, "learning_rate": 7.735308170090779e-06, "loss": 0.3631, "step": 1619 }, { "epoch": 0.23229136793805563, "grad_norm": 0.5263991355895996, "learning_rate": 7.740086000955567e-06, "loss": 0.3708, "step": 1620 }, { "epoch": 0.23243475767135074, "grad_norm": 0.3888052701950073, "learning_rate": 7.744863831820354e-06, "loss": 0.3635, "step": 1621 }, { "epoch": 0.23257814740464583, "grad_norm": 0.44838470220565796, "learning_rate": 7.74964166268514e-06, "loss": 0.3893, "step": 1622 }, { "epoch": 0.23272153713794091, "grad_norm": 0.48187169432640076, "learning_rate": 7.75441949354993e-06, "loss": 0.3923, "step": 1623 }, { "epoch": 0.23286492687123603, "grad_norm": 0.42343413829803467, "learning_rate": 7.759197324414717e-06, "loss": 0.3774, "step": 1624 }, { "epoch": 0.2330083166045311, "grad_norm": 0.45696309208869934, "learning_rate": 7.763975155279503e-06, "loss": 0.3848, "step": 1625 }, { "epoch": 0.2331517063378262, "grad_norm": 0.4151493012905121, "learning_rate": 7.768752986144292e-06, "loss": 0.3765, "step": 1626 }, { "epoch": 0.2332950960711213, "grad_norm": 0.4303080141544342, "learning_rate": 7.773530817009078e-06, "loss": 0.3611, "step": 1627 }, { "epoch": 0.2334384858044164, "grad_norm": 0.40218788385391235, "learning_rate": 7.778308647873866e-06, "loss": 0.3964, "step": 1628 }, { "epoch": 0.2335818755377115, "grad_norm": 0.4351252317428589, "learning_rate": 7.783086478738653e-06, "loss": 0.3764, "step": 1629 }, { "epoch": 0.2337252652710066, "grad_norm": 0.49753695726394653, "learning_rate": 7.787864309603441e-06, "loss": 0.369, "step": 1630 }, { "epoch": 0.23386865500430168, "grad_norm": 0.4068184494972229, "learning_rate": 7.792642140468228e-06, "loss": 0.3798, "step": 1631 }, { "epoch": 0.2340120447375968, "grad_norm": 0.49043700098991394, "learning_rate": 7.797419971333016e-06, "loss": 0.3466, "step": 1632 }, { "epoch": 0.23415543447089188, "grad_norm": 0.4791688323020935, "learning_rate": 7.802197802197802e-06, "loss": 0.3567, "step": 1633 }, { "epoch": 0.234298824204187, "grad_norm": 0.463570773601532, "learning_rate": 7.80697563306259e-06, "loss": 0.3759, "step": 1634 }, { "epoch": 0.23444221393748207, "grad_norm": 0.473326176404953, "learning_rate": 7.811753463927377e-06, "loss": 0.3705, "step": 1635 }, { "epoch": 0.23458560367077716, "grad_norm": 0.4886380732059479, "learning_rate": 7.816531294792165e-06, "loss": 0.3549, "step": 1636 }, { "epoch": 0.23472899340407227, "grad_norm": 0.4612898826599121, "learning_rate": 7.821309125656954e-06, "loss": 0.3766, "step": 1637 }, { "epoch": 0.23487238313736736, "grad_norm": 0.5876926779747009, "learning_rate": 7.82608695652174e-06, "loss": 0.3747, "step": 1638 }, { "epoch": 0.23501577287066247, "grad_norm": 0.4092480540275574, "learning_rate": 7.830864787386527e-06, "loss": 0.3404, "step": 1639 }, { "epoch": 0.23515916260395756, "grad_norm": 0.49136656522750854, "learning_rate": 7.835642618251315e-06, "loss": 0.3841, "step": 1640 }, { "epoch": 0.23530255233725264, "grad_norm": 0.5258575081825256, "learning_rate": 7.840420449116101e-06, "loss": 0.3803, "step": 1641 }, { "epoch": 0.23544594207054775, "grad_norm": 0.49016568064689636, "learning_rate": 7.84519827998089e-06, "loss": 0.3831, "step": 1642 }, { "epoch": 0.23558933180384284, "grad_norm": 0.5272740721702576, "learning_rate": 7.849976110845676e-06, "loss": 0.368, "step": 1643 }, { "epoch": 0.23573272153713795, "grad_norm": 0.4969545304775238, "learning_rate": 7.854753941710464e-06, "loss": 0.3777, "step": 1644 }, { "epoch": 0.23587611127043304, "grad_norm": 0.47710418701171875, "learning_rate": 7.859531772575253e-06, "loss": 0.3864, "step": 1645 }, { "epoch": 0.23601950100372812, "grad_norm": 0.4413526952266693, "learning_rate": 7.86430960344004e-06, "loss": 0.3743, "step": 1646 }, { "epoch": 0.23616289073702323, "grad_norm": 0.5162209272384644, "learning_rate": 7.869087434304826e-06, "loss": 0.3868, "step": 1647 }, { "epoch": 0.23630628047031832, "grad_norm": 0.43263524770736694, "learning_rate": 7.873865265169614e-06, "loss": 0.3775, "step": 1648 }, { "epoch": 0.23644967020361343, "grad_norm": 0.5207833051681519, "learning_rate": 7.8786430960344e-06, "loss": 0.3744, "step": 1649 }, { "epoch": 0.23659305993690852, "grad_norm": 0.4607938528060913, "learning_rate": 7.883420926899189e-06, "loss": 0.3966, "step": 1650 }, { "epoch": 0.2367364496702036, "grad_norm": 0.5347193479537964, "learning_rate": 7.888198757763977e-06, "loss": 0.3847, "step": 1651 }, { "epoch": 0.23687983940349872, "grad_norm": 0.4725794792175293, "learning_rate": 7.892976588628763e-06, "loss": 0.3616, "step": 1652 }, { "epoch": 0.2370232291367938, "grad_norm": 0.45896270871162415, "learning_rate": 7.89775441949355e-06, "loss": 0.3755, "step": 1653 }, { "epoch": 0.2371666188700889, "grad_norm": 0.5192121267318726, "learning_rate": 7.902532250358338e-06, "loss": 0.3646, "step": 1654 }, { "epoch": 0.237310008603384, "grad_norm": 0.4927304983139038, "learning_rate": 7.907310081223125e-06, "loss": 0.3798, "step": 1655 }, { "epoch": 0.23745339833667908, "grad_norm": 0.4512249827384949, "learning_rate": 7.912087912087913e-06, "loss": 0.3576, "step": 1656 }, { "epoch": 0.2375967880699742, "grad_norm": 0.4892967939376831, "learning_rate": 7.9168657429527e-06, "loss": 0.3785, "step": 1657 }, { "epoch": 0.23774017780326928, "grad_norm": 0.46328696608543396, "learning_rate": 7.921643573817488e-06, "loss": 0.3699, "step": 1658 }, { "epoch": 0.2378835675365644, "grad_norm": 0.4420988857746124, "learning_rate": 7.926421404682276e-06, "loss": 0.3722, "step": 1659 }, { "epoch": 0.23802695726985948, "grad_norm": 0.4239669740200043, "learning_rate": 7.931199235547062e-06, "loss": 0.3904, "step": 1660 }, { "epoch": 0.23817034700315456, "grad_norm": 0.41210079193115234, "learning_rate": 7.935977066411849e-06, "loss": 0.364, "step": 1661 }, { "epoch": 0.23831373673644968, "grad_norm": 0.46386563777923584, "learning_rate": 7.940754897276637e-06, "loss": 0.3657, "step": 1662 }, { "epoch": 0.23845712646974476, "grad_norm": 0.42796364426612854, "learning_rate": 7.945532728141424e-06, "loss": 0.3687, "step": 1663 }, { "epoch": 0.23860051620303988, "grad_norm": 0.4415016174316406, "learning_rate": 7.950310559006212e-06, "loss": 0.3607, "step": 1664 }, { "epoch": 0.23874390593633496, "grad_norm": 0.4294861853122711, "learning_rate": 7.955088389871e-06, "loss": 0.3478, "step": 1665 }, { "epoch": 0.23888729566963005, "grad_norm": 0.5015684962272644, "learning_rate": 7.959866220735787e-06, "loss": 0.3769, "step": 1666 }, { "epoch": 0.23903068540292516, "grad_norm": 0.4255385994911194, "learning_rate": 7.964644051600575e-06, "loss": 0.408, "step": 1667 }, { "epoch": 0.23917407513622024, "grad_norm": 0.4902265667915344, "learning_rate": 7.969421882465361e-06, "loss": 0.385, "step": 1668 }, { "epoch": 0.23931746486951536, "grad_norm": 0.4428371489048004, "learning_rate": 7.974199713330148e-06, "loss": 0.3743, "step": 1669 }, { "epoch": 0.23946085460281044, "grad_norm": 0.4464103877544403, "learning_rate": 7.978977544194936e-06, "loss": 0.376, "step": 1670 }, { "epoch": 0.23960424433610553, "grad_norm": 0.5140626430511475, "learning_rate": 7.983755375059724e-06, "loss": 0.3835, "step": 1671 }, { "epoch": 0.23974763406940064, "grad_norm": 0.46799996495246887, "learning_rate": 7.988533205924511e-06, "loss": 0.372, "step": 1672 }, { "epoch": 0.23989102380269572, "grad_norm": 0.4816042482852936, "learning_rate": 7.9933110367893e-06, "loss": 0.381, "step": 1673 }, { "epoch": 0.2400344135359908, "grad_norm": 0.41891300678253174, "learning_rate": 7.998088867654086e-06, "loss": 0.3592, "step": 1674 }, { "epoch": 0.24017780326928592, "grad_norm": 0.43712007999420166, "learning_rate": 8.002866698518872e-06, "loss": 0.3764, "step": 1675 }, { "epoch": 0.240321193002581, "grad_norm": 0.46534788608551025, "learning_rate": 8.00764452938366e-06, "loss": 0.3582, "step": 1676 }, { "epoch": 0.24046458273587612, "grad_norm": 0.47892966866493225, "learning_rate": 8.012422360248447e-06, "loss": 0.3689, "step": 1677 }, { "epoch": 0.2406079724691712, "grad_norm": 0.4838455021381378, "learning_rate": 8.017200191113235e-06, "loss": 0.3921, "step": 1678 }, { "epoch": 0.2407513622024663, "grad_norm": 0.46921324729919434, "learning_rate": 8.021978021978023e-06, "loss": 0.3677, "step": 1679 }, { "epoch": 0.2408947519357614, "grad_norm": 0.5512990355491638, "learning_rate": 8.02675585284281e-06, "loss": 0.3778, "step": 1680 }, { "epoch": 0.2410381416690565, "grad_norm": 0.486714631319046, "learning_rate": 8.031533683707598e-06, "loss": 0.3806, "step": 1681 }, { "epoch": 0.2411815314023516, "grad_norm": 0.42422589659690857, "learning_rate": 8.036311514572385e-06, "loss": 0.3606, "step": 1682 }, { "epoch": 0.24132492113564669, "grad_norm": 0.5089232325553894, "learning_rate": 8.041089345437171e-06, "loss": 0.3711, "step": 1683 }, { "epoch": 0.24146831086894177, "grad_norm": 0.4206063449382782, "learning_rate": 8.04586717630196e-06, "loss": 0.3827, "step": 1684 }, { "epoch": 0.24161170060223688, "grad_norm": 0.43890833854675293, "learning_rate": 8.050645007166748e-06, "loss": 0.3711, "step": 1685 }, { "epoch": 0.24175509033553197, "grad_norm": 0.4370131194591522, "learning_rate": 8.055422838031534e-06, "loss": 0.3755, "step": 1686 }, { "epoch": 0.24189848006882708, "grad_norm": 0.4477868974208832, "learning_rate": 8.060200668896322e-06, "loss": 0.385, "step": 1687 }, { "epoch": 0.24204186980212217, "grad_norm": 0.452921986579895, "learning_rate": 8.064978499761109e-06, "loss": 0.3956, "step": 1688 }, { "epoch": 0.24218525953541725, "grad_norm": 0.40544596314430237, "learning_rate": 8.069756330625897e-06, "loss": 0.3597, "step": 1689 }, { "epoch": 0.24232864926871236, "grad_norm": 0.44086727499961853, "learning_rate": 8.074534161490684e-06, "loss": 0.3876, "step": 1690 }, { "epoch": 0.24247203900200745, "grad_norm": 0.4558696150779724, "learning_rate": 8.07931199235547e-06, "loss": 0.372, "step": 1691 }, { "epoch": 0.24261542873530256, "grad_norm": 0.4369545578956604, "learning_rate": 8.084089823220258e-06, "loss": 0.3529, "step": 1692 }, { "epoch": 0.24275881846859765, "grad_norm": 0.4629763960838318, "learning_rate": 8.088867654085047e-06, "loss": 0.3588, "step": 1693 }, { "epoch": 0.24290220820189273, "grad_norm": 0.5091299414634705, "learning_rate": 8.093645484949833e-06, "loss": 0.3665, "step": 1694 }, { "epoch": 0.24304559793518785, "grad_norm": 0.5261697769165039, "learning_rate": 8.098423315814621e-06, "loss": 0.3797, "step": 1695 }, { "epoch": 0.24318898766848293, "grad_norm": 0.4748220443725586, "learning_rate": 8.103201146679408e-06, "loss": 0.355, "step": 1696 }, { "epoch": 0.24333237740177804, "grad_norm": 0.4543932378292084, "learning_rate": 8.107978977544195e-06, "loss": 0.3641, "step": 1697 }, { "epoch": 0.24347576713507313, "grad_norm": 0.47362300753593445, "learning_rate": 8.112756808408983e-06, "loss": 0.3674, "step": 1698 }, { "epoch": 0.24361915686836821, "grad_norm": 0.4236856698989868, "learning_rate": 8.117534639273771e-06, "loss": 0.3709, "step": 1699 }, { "epoch": 0.24376254660166333, "grad_norm": 0.44597169756889343, "learning_rate": 8.122312470138558e-06, "loss": 0.3668, "step": 1700 }, { "epoch": 0.2439059363349584, "grad_norm": 0.5235735177993774, "learning_rate": 8.127090301003346e-06, "loss": 0.3772, "step": 1701 }, { "epoch": 0.24404932606825352, "grad_norm": 0.3871288597583771, "learning_rate": 8.131868131868132e-06, "loss": 0.3576, "step": 1702 }, { "epoch": 0.2441927158015486, "grad_norm": 0.46205422282218933, "learning_rate": 8.13664596273292e-06, "loss": 0.3715, "step": 1703 }, { "epoch": 0.2443361055348437, "grad_norm": 0.3874845802783966, "learning_rate": 8.141423793597707e-06, "loss": 0.3797, "step": 1704 }, { "epoch": 0.2444794952681388, "grad_norm": 0.4390513598918915, "learning_rate": 8.146201624462494e-06, "loss": 0.3492, "step": 1705 }, { "epoch": 0.2446228850014339, "grad_norm": 0.4323108494281769, "learning_rate": 8.150979455327282e-06, "loss": 0.371, "step": 1706 }, { "epoch": 0.244766274734729, "grad_norm": 0.4333050549030304, "learning_rate": 8.15575728619207e-06, "loss": 0.3892, "step": 1707 }, { "epoch": 0.2449096644680241, "grad_norm": 0.41442862153053284, "learning_rate": 8.160535117056857e-06, "loss": 0.3895, "step": 1708 }, { "epoch": 0.24505305420131918, "grad_norm": 0.4834401309490204, "learning_rate": 8.165312947921645e-06, "loss": 0.3584, "step": 1709 }, { "epoch": 0.2451964439346143, "grad_norm": 0.5074530243873596, "learning_rate": 8.170090778786431e-06, "loss": 0.3915, "step": 1710 }, { "epoch": 0.24533983366790937, "grad_norm": 0.3803073763847351, "learning_rate": 8.17486860965122e-06, "loss": 0.3521, "step": 1711 }, { "epoch": 0.2454832234012045, "grad_norm": 0.45156776905059814, "learning_rate": 8.179646440516006e-06, "loss": 0.3782, "step": 1712 }, { "epoch": 0.24562661313449957, "grad_norm": 0.4934155344963074, "learning_rate": 8.184424271380794e-06, "loss": 0.3921, "step": 1713 }, { "epoch": 0.24577000286779466, "grad_norm": 0.44989898800849915, "learning_rate": 8.18920210224558e-06, "loss": 0.3761, "step": 1714 }, { "epoch": 0.24591339260108977, "grad_norm": 0.41986632347106934, "learning_rate": 8.193979933110369e-06, "loss": 0.3629, "step": 1715 }, { "epoch": 0.24605678233438485, "grad_norm": 0.4182621240615845, "learning_rate": 8.198757763975156e-06, "loss": 0.3861, "step": 1716 }, { "epoch": 0.24620017206767997, "grad_norm": 0.5090156197547913, "learning_rate": 8.203535594839944e-06, "loss": 0.3833, "step": 1717 }, { "epoch": 0.24634356180097505, "grad_norm": 0.46085554361343384, "learning_rate": 8.20831342570473e-06, "loss": 0.3859, "step": 1718 }, { "epoch": 0.24648695153427014, "grad_norm": 0.48233312368392944, "learning_rate": 8.213091256569517e-06, "loss": 0.3907, "step": 1719 }, { "epoch": 0.24663034126756525, "grad_norm": 0.4634850323200226, "learning_rate": 8.217869087434307e-06, "loss": 0.347, "step": 1720 }, { "epoch": 0.24677373100086034, "grad_norm": 0.46842727065086365, "learning_rate": 8.222646918299093e-06, "loss": 0.3884, "step": 1721 }, { "epoch": 0.24691712073415542, "grad_norm": 0.41680043935775757, "learning_rate": 8.22742474916388e-06, "loss": 0.3702, "step": 1722 }, { "epoch": 0.24706051046745053, "grad_norm": 0.45629870891571045, "learning_rate": 8.232202580028668e-06, "loss": 0.36, "step": 1723 }, { "epoch": 0.24720390020074562, "grad_norm": 0.46806856989860535, "learning_rate": 8.236980410893455e-06, "loss": 0.3652, "step": 1724 }, { "epoch": 0.24734728993404073, "grad_norm": 0.42973792552948, "learning_rate": 8.241758241758243e-06, "loss": 0.3586, "step": 1725 }, { "epoch": 0.24749067966733582, "grad_norm": 0.4266152083873749, "learning_rate": 8.246536072623031e-06, "loss": 0.3687, "step": 1726 }, { "epoch": 0.2476340694006309, "grad_norm": 0.40559229254722595, "learning_rate": 8.251313903487818e-06, "loss": 0.3886, "step": 1727 }, { "epoch": 0.24777745913392601, "grad_norm": 0.47660350799560547, "learning_rate": 8.256091734352604e-06, "loss": 0.3546, "step": 1728 }, { "epoch": 0.2479208488672211, "grad_norm": 0.417982816696167, "learning_rate": 8.260869565217392e-06, "loss": 0.3858, "step": 1729 }, { "epoch": 0.2480642386005162, "grad_norm": 0.4631959795951843, "learning_rate": 8.265647396082179e-06, "loss": 0.3652, "step": 1730 }, { "epoch": 0.2482076283338113, "grad_norm": 0.4079807698726654, "learning_rate": 8.270425226946967e-06, "loss": 0.3523, "step": 1731 }, { "epoch": 0.24835101806710638, "grad_norm": 0.4670834541320801, "learning_rate": 8.275203057811754e-06, "loss": 0.374, "step": 1732 }, { "epoch": 0.2484944078004015, "grad_norm": 0.4189283847808838, "learning_rate": 8.279980888676542e-06, "loss": 0.3694, "step": 1733 }, { "epoch": 0.24863779753369658, "grad_norm": 0.457340806722641, "learning_rate": 8.28475871954133e-06, "loss": 0.3867, "step": 1734 }, { "epoch": 0.2487811872669917, "grad_norm": 0.4407598078250885, "learning_rate": 8.289536550406117e-06, "loss": 0.3693, "step": 1735 }, { "epoch": 0.24892457700028678, "grad_norm": 0.41914185881614685, "learning_rate": 8.294314381270903e-06, "loss": 0.3801, "step": 1736 }, { "epoch": 0.24906796673358186, "grad_norm": 0.4643949568271637, "learning_rate": 8.299092212135691e-06, "loss": 0.3884, "step": 1737 }, { "epoch": 0.24921135646687698, "grad_norm": 0.37124738097190857, "learning_rate": 8.303870043000478e-06, "loss": 0.3571, "step": 1738 }, { "epoch": 0.24935474620017206, "grad_norm": 0.4948292374610901, "learning_rate": 8.308647873865266e-06, "loss": 0.3551, "step": 1739 }, { "epoch": 0.24949813593346717, "grad_norm": 0.45507577061653137, "learning_rate": 8.313425704730054e-06, "loss": 0.3836, "step": 1740 }, { "epoch": 0.24964152566676226, "grad_norm": 0.4279938042163849, "learning_rate": 8.31820353559484e-06, "loss": 0.3639, "step": 1741 }, { "epoch": 0.24978491540005734, "grad_norm": 0.4978596568107605, "learning_rate": 8.322981366459629e-06, "loss": 0.3655, "step": 1742 }, { "epoch": 0.24992830513335246, "grad_norm": 0.4938512146472931, "learning_rate": 8.327759197324416e-06, "loss": 0.3877, "step": 1743 }, { "epoch": 0.25007169486664754, "grad_norm": 0.40730997920036316, "learning_rate": 8.332537028189202e-06, "loss": 0.3596, "step": 1744 }, { "epoch": 0.2502150845999426, "grad_norm": 0.4757990539073944, "learning_rate": 8.33731485905399e-06, "loss": 0.396, "step": 1745 }, { "epoch": 0.2503584743332377, "grad_norm": 0.4712980091571808, "learning_rate": 8.342092689918777e-06, "loss": 0.3877, "step": 1746 }, { "epoch": 0.25050186406653285, "grad_norm": 0.44228145480155945, "learning_rate": 8.346870520783565e-06, "loss": 0.35, "step": 1747 }, { "epoch": 0.25064525379982794, "grad_norm": 0.4977753162384033, "learning_rate": 8.351648351648353e-06, "loss": 0.3705, "step": 1748 }, { "epoch": 0.250788643533123, "grad_norm": 0.5130085349082947, "learning_rate": 8.35642618251314e-06, "loss": 0.3848, "step": 1749 }, { "epoch": 0.2509320332664181, "grad_norm": 0.46044793725013733, "learning_rate": 8.361204013377926e-06, "loss": 0.3819, "step": 1750 }, { "epoch": 0.2510754229997132, "grad_norm": 0.40633541345596313, "learning_rate": 8.365981844242715e-06, "loss": 0.3798, "step": 1751 }, { "epoch": 0.25121881273300833, "grad_norm": 0.49420779943466187, "learning_rate": 8.370759675107501e-06, "loss": 0.3575, "step": 1752 }, { "epoch": 0.2513622024663034, "grad_norm": 0.4056288003921509, "learning_rate": 8.37553750597229e-06, "loss": 0.3585, "step": 1753 }, { "epoch": 0.2515055921995985, "grad_norm": 0.4648582339286804, "learning_rate": 8.380315336837078e-06, "loss": 0.361, "step": 1754 }, { "epoch": 0.2516489819328936, "grad_norm": 0.47775042057037354, "learning_rate": 8.385093167701864e-06, "loss": 0.3889, "step": 1755 }, { "epoch": 0.2517923716661887, "grad_norm": 0.458341509103775, "learning_rate": 8.389870998566652e-06, "loss": 0.3945, "step": 1756 }, { "epoch": 0.2519357613994838, "grad_norm": 0.457218736410141, "learning_rate": 8.394648829431439e-06, "loss": 0.3578, "step": 1757 }, { "epoch": 0.2520791511327789, "grad_norm": 0.4876303970813751, "learning_rate": 8.399426660296225e-06, "loss": 0.3551, "step": 1758 }, { "epoch": 0.252222540866074, "grad_norm": 0.4961831569671631, "learning_rate": 8.404204491161014e-06, "loss": 0.37, "step": 1759 }, { "epoch": 0.25236593059936907, "grad_norm": 0.4845241904258728, "learning_rate": 8.4089823220258e-06, "loss": 0.343, "step": 1760 }, { "epoch": 0.25250932033266416, "grad_norm": 0.41406679153442383, "learning_rate": 8.413760152890588e-06, "loss": 0.3628, "step": 1761 }, { "epoch": 0.2526527100659593, "grad_norm": 0.5190113186836243, "learning_rate": 8.418537983755377e-06, "loss": 0.3741, "step": 1762 }, { "epoch": 0.2527960997992544, "grad_norm": 0.45407891273498535, "learning_rate": 8.423315814620163e-06, "loss": 0.3833, "step": 1763 }, { "epoch": 0.25293948953254947, "grad_norm": 0.5370807647705078, "learning_rate": 8.42809364548495e-06, "loss": 0.3647, "step": 1764 }, { "epoch": 0.25308287926584455, "grad_norm": 0.521007239818573, "learning_rate": 8.432871476349738e-06, "loss": 0.4002, "step": 1765 }, { "epoch": 0.25322626899913964, "grad_norm": 0.4266258180141449, "learning_rate": 8.437649307214524e-06, "loss": 0.3437, "step": 1766 }, { "epoch": 0.2533696587324348, "grad_norm": 0.4224201440811157, "learning_rate": 8.442427138079313e-06, "loss": 0.3542, "step": 1767 }, { "epoch": 0.25351304846572986, "grad_norm": 0.48650234937667847, "learning_rate": 8.4472049689441e-06, "loss": 0.3881, "step": 1768 }, { "epoch": 0.25365643819902495, "grad_norm": 0.44874057173728943, "learning_rate": 8.451982799808887e-06, "loss": 0.3786, "step": 1769 }, { "epoch": 0.25379982793232003, "grad_norm": 0.5103297233581543, "learning_rate": 8.456760630673676e-06, "loss": 0.393, "step": 1770 }, { "epoch": 0.2539432176656151, "grad_norm": 0.44068336486816406, "learning_rate": 8.461538461538462e-06, "loss": 0.4002, "step": 1771 }, { "epoch": 0.25408660739891026, "grad_norm": 0.43872129917144775, "learning_rate": 8.466316292403249e-06, "loss": 0.3816, "step": 1772 }, { "epoch": 0.25422999713220534, "grad_norm": 0.45881375670433044, "learning_rate": 8.471094123268037e-06, "loss": 0.3608, "step": 1773 }, { "epoch": 0.25437338686550043, "grad_norm": 0.42369529604911804, "learning_rate": 8.475871954132825e-06, "loss": 0.373, "step": 1774 }, { "epoch": 0.2545167765987955, "grad_norm": 0.5245718359947205, "learning_rate": 8.480649784997612e-06, "loss": 0.391, "step": 1775 }, { "epoch": 0.2546601663320906, "grad_norm": 0.4236152470111847, "learning_rate": 8.4854276158624e-06, "loss": 0.3905, "step": 1776 }, { "epoch": 0.25480355606538574, "grad_norm": 0.4138684570789337, "learning_rate": 8.490205446727186e-06, "loss": 0.3629, "step": 1777 }, { "epoch": 0.2549469457986808, "grad_norm": 0.41229161620140076, "learning_rate": 8.494983277591975e-06, "loss": 0.346, "step": 1778 }, { "epoch": 0.2550903355319759, "grad_norm": 0.4247377812862396, "learning_rate": 8.499761108456761e-06, "loss": 0.3797, "step": 1779 }, { "epoch": 0.255233725265271, "grad_norm": 0.4466143250465393, "learning_rate": 8.504538939321548e-06, "loss": 0.3755, "step": 1780 }, { "epoch": 0.2553771149985661, "grad_norm": 0.4577440023422241, "learning_rate": 8.509316770186336e-06, "loss": 0.3699, "step": 1781 }, { "epoch": 0.2555205047318612, "grad_norm": 0.4262188673019409, "learning_rate": 8.514094601051124e-06, "loss": 0.4075, "step": 1782 }, { "epoch": 0.2556638944651563, "grad_norm": 0.4471311867237091, "learning_rate": 8.51887243191591e-06, "loss": 0.3717, "step": 1783 }, { "epoch": 0.2558072841984514, "grad_norm": 0.4510149657726288, "learning_rate": 8.523650262780699e-06, "loss": 0.3669, "step": 1784 }, { "epoch": 0.2559506739317465, "grad_norm": 0.42455706000328064, "learning_rate": 8.528428093645485e-06, "loss": 0.387, "step": 1785 }, { "epoch": 0.25609406366504156, "grad_norm": 0.4091707170009613, "learning_rate": 8.533205924510272e-06, "loss": 0.369, "step": 1786 }, { "epoch": 0.2562374533983367, "grad_norm": 0.47120556235313416, "learning_rate": 8.53798375537506e-06, "loss": 0.395, "step": 1787 }, { "epoch": 0.2563808431316318, "grad_norm": 0.3965267539024353, "learning_rate": 8.542761586239848e-06, "loss": 0.3694, "step": 1788 }, { "epoch": 0.25652423286492687, "grad_norm": 0.46222376823425293, "learning_rate": 8.547539417104635e-06, "loss": 0.369, "step": 1789 }, { "epoch": 0.25666762259822196, "grad_norm": 0.49581286311149597, "learning_rate": 8.552317247969423e-06, "loss": 0.39, "step": 1790 }, { "epoch": 0.25681101233151704, "grad_norm": 0.4144819676876068, "learning_rate": 8.55709507883421e-06, "loss": 0.3513, "step": 1791 }, { "epoch": 0.2569544020648122, "grad_norm": 0.4746030867099762, "learning_rate": 8.561872909698998e-06, "loss": 0.3447, "step": 1792 }, { "epoch": 0.25709779179810727, "grad_norm": 0.4640180766582489, "learning_rate": 8.566650740563784e-06, "loss": 0.3512, "step": 1793 }, { "epoch": 0.25724118153140235, "grad_norm": 0.45336663722991943, "learning_rate": 8.571428571428571e-06, "loss": 0.3573, "step": 1794 }, { "epoch": 0.25738457126469744, "grad_norm": 0.4788835644721985, "learning_rate": 8.576206402293359e-06, "loss": 0.381, "step": 1795 }, { "epoch": 0.2575279609979925, "grad_norm": 0.41398388147354126, "learning_rate": 8.580984233158147e-06, "loss": 0.3699, "step": 1796 }, { "epoch": 0.25767135073128766, "grad_norm": 0.48076796531677246, "learning_rate": 8.585762064022934e-06, "loss": 0.3504, "step": 1797 }, { "epoch": 0.25781474046458275, "grad_norm": 0.4352067708969116, "learning_rate": 8.590539894887722e-06, "loss": 0.3779, "step": 1798 }, { "epoch": 0.25795813019787783, "grad_norm": 0.441303551197052, "learning_rate": 8.595317725752509e-06, "loss": 0.3761, "step": 1799 }, { "epoch": 0.2581015199311729, "grad_norm": 0.4589645266532898, "learning_rate": 8.600095556617297e-06, "loss": 0.3609, "step": 1800 }, { "epoch": 0.258244909664468, "grad_norm": 0.43888357281684875, "learning_rate": 8.604873387482083e-06, "loss": 0.3574, "step": 1801 }, { "epoch": 0.25838829939776314, "grad_norm": 0.4538017809391022, "learning_rate": 8.609651218346872e-06, "loss": 0.3464, "step": 1802 }, { "epoch": 0.25853168913105823, "grad_norm": 0.4234298765659332, "learning_rate": 8.614429049211658e-06, "loss": 0.3935, "step": 1803 }, { "epoch": 0.2586750788643533, "grad_norm": 0.43970903754234314, "learning_rate": 8.619206880076446e-06, "loss": 0.3633, "step": 1804 }, { "epoch": 0.2588184685976484, "grad_norm": 0.4567807614803314, "learning_rate": 8.623984710941233e-06, "loss": 0.3804, "step": 1805 }, { "epoch": 0.2589618583309435, "grad_norm": 0.5120955109596252, "learning_rate": 8.628762541806021e-06, "loss": 0.3492, "step": 1806 }, { "epoch": 0.2591052480642386, "grad_norm": 0.4637034833431244, "learning_rate": 8.633540372670808e-06, "loss": 0.4088, "step": 1807 }, { "epoch": 0.2592486377975337, "grad_norm": 0.4737495481967926, "learning_rate": 8.638318203535594e-06, "loss": 0.3826, "step": 1808 }, { "epoch": 0.2593920275308288, "grad_norm": 0.4981021285057068, "learning_rate": 8.643096034400384e-06, "loss": 0.3547, "step": 1809 }, { "epoch": 0.2595354172641239, "grad_norm": 0.3860042989253998, "learning_rate": 8.64787386526517e-06, "loss": 0.3631, "step": 1810 }, { "epoch": 0.25967880699741896, "grad_norm": 0.48321622610092163, "learning_rate": 8.652651696129957e-06, "loss": 0.3945, "step": 1811 }, { "epoch": 0.2598221967307141, "grad_norm": 0.5176047682762146, "learning_rate": 8.657429526994745e-06, "loss": 0.3567, "step": 1812 }, { "epoch": 0.2599655864640092, "grad_norm": 0.40631330013275146, "learning_rate": 8.662207357859532e-06, "loss": 0.3627, "step": 1813 }, { "epoch": 0.2601089761973043, "grad_norm": 0.42377206683158875, "learning_rate": 8.66698518872432e-06, "loss": 0.3768, "step": 1814 }, { "epoch": 0.26025236593059936, "grad_norm": 0.42842844128608704, "learning_rate": 8.671763019589107e-06, "loss": 0.3688, "step": 1815 }, { "epoch": 0.26039575566389445, "grad_norm": 0.5066654682159424, "learning_rate": 8.676540850453895e-06, "loss": 0.3727, "step": 1816 }, { "epoch": 0.2605391453971896, "grad_norm": 0.41575372219085693, "learning_rate": 8.681318681318681e-06, "loss": 0.3909, "step": 1817 }, { "epoch": 0.26068253513048467, "grad_norm": 0.5434616208076477, "learning_rate": 8.68609651218347e-06, "loss": 0.3778, "step": 1818 }, { "epoch": 0.26082592486377976, "grad_norm": 0.40841224789619446, "learning_rate": 8.690874343048256e-06, "loss": 0.3583, "step": 1819 }, { "epoch": 0.26096931459707484, "grad_norm": 0.4266430735588074, "learning_rate": 8.695652173913044e-06, "loss": 0.3529, "step": 1820 }, { "epoch": 0.2611127043303699, "grad_norm": 0.43282121419906616, "learning_rate": 8.700430004777831e-06, "loss": 0.359, "step": 1821 }, { "epoch": 0.26125609406366507, "grad_norm": 0.3825700581073761, "learning_rate": 8.70520783564262e-06, "loss": 0.357, "step": 1822 }, { "epoch": 0.26139948379696015, "grad_norm": 0.4203316271305084, "learning_rate": 8.709985666507407e-06, "loss": 0.3684, "step": 1823 }, { "epoch": 0.26154287353025524, "grad_norm": 0.42799344658851624, "learning_rate": 8.714763497372194e-06, "loss": 0.3747, "step": 1824 }, { "epoch": 0.2616862632635503, "grad_norm": 0.3995625376701355, "learning_rate": 8.71954132823698e-06, "loss": 0.3875, "step": 1825 }, { "epoch": 0.2618296529968454, "grad_norm": 0.4451453685760498, "learning_rate": 8.724319159101769e-06, "loss": 0.3775, "step": 1826 }, { "epoch": 0.26197304273014055, "grad_norm": 0.44751641154289246, "learning_rate": 8.729096989966555e-06, "loss": 0.3697, "step": 1827 }, { "epoch": 0.26211643246343563, "grad_norm": 0.4180363118648529, "learning_rate": 8.733874820831343e-06, "loss": 0.361, "step": 1828 }, { "epoch": 0.2622598221967307, "grad_norm": 0.47495338320732117, "learning_rate": 8.738652651696132e-06, "loss": 0.3705, "step": 1829 }, { "epoch": 0.2624032119300258, "grad_norm": 0.4242611527442932, "learning_rate": 8.743430482560918e-06, "loss": 0.3731, "step": 1830 }, { "epoch": 0.2625466016633209, "grad_norm": 0.40864241123199463, "learning_rate": 8.748208313425706e-06, "loss": 0.3624, "step": 1831 }, { "epoch": 0.26268999139661603, "grad_norm": 0.4623742699623108, "learning_rate": 8.752986144290493e-06, "loss": 0.3635, "step": 1832 }, { "epoch": 0.2628333811299111, "grad_norm": 0.4230354428291321, "learning_rate": 8.75776397515528e-06, "loss": 0.3667, "step": 1833 }, { "epoch": 0.2629767708632062, "grad_norm": 0.5192683935165405, "learning_rate": 8.762541806020068e-06, "loss": 0.3745, "step": 1834 }, { "epoch": 0.2631201605965013, "grad_norm": 0.446217805147171, "learning_rate": 8.767319636884854e-06, "loss": 0.3565, "step": 1835 }, { "epoch": 0.26326355032979637, "grad_norm": 0.47277218103408813, "learning_rate": 8.772097467749642e-06, "loss": 0.3863, "step": 1836 }, { "epoch": 0.2634069400630915, "grad_norm": 0.4859637916088104, "learning_rate": 8.77687529861443e-06, "loss": 0.3703, "step": 1837 }, { "epoch": 0.2635503297963866, "grad_norm": 0.4816749393939972, "learning_rate": 8.781653129479217e-06, "loss": 0.3899, "step": 1838 }, { "epoch": 0.2636937195296817, "grad_norm": 0.4440545439720154, "learning_rate": 8.786430960344004e-06, "loss": 0.3619, "step": 1839 }, { "epoch": 0.26383710926297677, "grad_norm": 0.44472959637641907, "learning_rate": 8.791208791208792e-06, "loss": 0.3839, "step": 1840 }, { "epoch": 0.26398049899627185, "grad_norm": 0.477713406085968, "learning_rate": 8.795986622073578e-06, "loss": 0.3623, "step": 1841 }, { "epoch": 0.26412388872956694, "grad_norm": 0.5298186540603638, "learning_rate": 8.800764452938367e-06, "loss": 0.3754, "step": 1842 }, { "epoch": 0.2642672784628621, "grad_norm": 0.4835379421710968, "learning_rate": 8.805542283803155e-06, "loss": 0.3663, "step": 1843 }, { "epoch": 0.26441066819615716, "grad_norm": 0.4604148268699646, "learning_rate": 8.810320114667941e-06, "loss": 0.3555, "step": 1844 }, { "epoch": 0.26455405792945225, "grad_norm": 0.5545218586921692, "learning_rate": 8.81509794553273e-06, "loss": 0.3626, "step": 1845 }, { "epoch": 0.26469744766274733, "grad_norm": 0.4465009868144989, "learning_rate": 8.819875776397516e-06, "loss": 0.3555, "step": 1846 }, { "epoch": 0.2648408373960424, "grad_norm": 0.5187569856643677, "learning_rate": 8.824653607262303e-06, "loss": 0.3635, "step": 1847 }, { "epoch": 0.26498422712933756, "grad_norm": 0.49001920223236084, "learning_rate": 8.829431438127091e-06, "loss": 0.3718, "step": 1848 }, { "epoch": 0.26512761686263264, "grad_norm": 0.4818209707736969, "learning_rate": 8.834209268991877e-06, "loss": 0.3717, "step": 1849 }, { "epoch": 0.2652710065959277, "grad_norm": 0.42791858315467834, "learning_rate": 8.838987099856666e-06, "loss": 0.3568, "step": 1850 }, { "epoch": 0.2654143963292228, "grad_norm": 0.45800626277923584, "learning_rate": 8.843764930721454e-06, "loss": 0.3689, "step": 1851 }, { "epoch": 0.2655577860625179, "grad_norm": 0.4656262993812561, "learning_rate": 8.84854276158624e-06, "loss": 0.3654, "step": 1852 }, { "epoch": 0.26570117579581304, "grad_norm": 0.45368829369544983, "learning_rate": 8.853320592451029e-06, "loss": 0.375, "step": 1853 }, { "epoch": 0.2658445655291081, "grad_norm": 0.48402947187423706, "learning_rate": 8.858098423315815e-06, "loss": 0.3747, "step": 1854 }, { "epoch": 0.2659879552624032, "grad_norm": 0.4672616422176361, "learning_rate": 8.862876254180602e-06, "loss": 0.3711, "step": 1855 }, { "epoch": 0.2661313449956983, "grad_norm": 0.4318965673446655, "learning_rate": 8.86765408504539e-06, "loss": 0.3484, "step": 1856 }, { "epoch": 0.2662747347289934, "grad_norm": 0.4463658034801483, "learning_rate": 8.872431915910178e-06, "loss": 0.3553, "step": 1857 }, { "epoch": 0.2664181244622885, "grad_norm": 0.43443846702575684, "learning_rate": 8.877209746774965e-06, "loss": 0.3725, "step": 1858 }, { "epoch": 0.2665615141955836, "grad_norm": 0.4349210262298584, "learning_rate": 8.881987577639753e-06, "loss": 0.3674, "step": 1859 }, { "epoch": 0.2667049039288787, "grad_norm": 0.4270308017730713, "learning_rate": 8.88676540850454e-06, "loss": 0.3573, "step": 1860 }, { "epoch": 0.2668482936621738, "grad_norm": 0.4114382266998291, "learning_rate": 8.891543239369326e-06, "loss": 0.359, "step": 1861 }, { "epoch": 0.26699168339546886, "grad_norm": 0.41596195101737976, "learning_rate": 8.896321070234114e-06, "loss": 0.3577, "step": 1862 }, { "epoch": 0.267135073128764, "grad_norm": 0.4635672867298126, "learning_rate": 8.9010989010989e-06, "loss": 0.3782, "step": 1863 }, { "epoch": 0.2672784628620591, "grad_norm": 0.42168593406677246, "learning_rate": 8.905876731963689e-06, "loss": 0.3721, "step": 1864 }, { "epoch": 0.26742185259535417, "grad_norm": 0.45551326870918274, "learning_rate": 8.910654562828477e-06, "loss": 0.3705, "step": 1865 }, { "epoch": 0.26756524232864926, "grad_norm": 0.4467550218105316, "learning_rate": 8.915432393693264e-06, "loss": 0.3604, "step": 1866 }, { "epoch": 0.26770863206194434, "grad_norm": 0.4521734416484833, "learning_rate": 8.920210224558052e-06, "loss": 0.3602, "step": 1867 }, { "epoch": 0.2678520217952395, "grad_norm": 0.45988747477531433, "learning_rate": 8.924988055422838e-06, "loss": 0.3533, "step": 1868 }, { "epoch": 0.26799541152853457, "grad_norm": 0.4367242753505707, "learning_rate": 8.929765886287625e-06, "loss": 0.3814, "step": 1869 }, { "epoch": 0.26813880126182965, "grad_norm": 0.5020619630813599, "learning_rate": 8.934543717152413e-06, "loss": 0.3577, "step": 1870 }, { "epoch": 0.26828219099512474, "grad_norm": 0.48550671339035034, "learning_rate": 8.939321548017201e-06, "loss": 0.3884, "step": 1871 }, { "epoch": 0.2684255807284198, "grad_norm": 0.4527984857559204, "learning_rate": 8.944099378881988e-06, "loss": 0.3892, "step": 1872 }, { "epoch": 0.26856897046171496, "grad_norm": 0.5352456569671631, "learning_rate": 8.948877209746776e-06, "loss": 0.3728, "step": 1873 }, { "epoch": 0.26871236019501005, "grad_norm": 0.44590914249420166, "learning_rate": 8.953655040611563e-06, "loss": 0.359, "step": 1874 }, { "epoch": 0.26885574992830513, "grad_norm": 0.4533917009830475, "learning_rate": 8.958432871476351e-06, "loss": 0.3527, "step": 1875 }, { "epoch": 0.2689991396616002, "grad_norm": 0.5729706883430481, "learning_rate": 8.963210702341138e-06, "loss": 0.3564, "step": 1876 }, { "epoch": 0.2691425293948953, "grad_norm": 0.5272822976112366, "learning_rate": 8.967988533205926e-06, "loss": 0.3828, "step": 1877 }, { "epoch": 0.26928591912819044, "grad_norm": 0.4594517648220062, "learning_rate": 8.972766364070712e-06, "loss": 0.3982, "step": 1878 }, { "epoch": 0.26942930886148553, "grad_norm": 0.45483583211898804, "learning_rate": 8.9775441949355e-06, "loss": 0.364, "step": 1879 }, { "epoch": 0.2695726985947806, "grad_norm": 0.4840465486049652, "learning_rate": 8.982322025800287e-06, "loss": 0.3825, "step": 1880 }, { "epoch": 0.2697160883280757, "grad_norm": 0.4294186532497406, "learning_rate": 8.987099856665075e-06, "loss": 0.3754, "step": 1881 }, { "epoch": 0.2698594780613708, "grad_norm": 0.5186020135879517, "learning_rate": 8.991877687529862e-06, "loss": 0.3596, "step": 1882 }, { "epoch": 0.2700028677946659, "grad_norm": 0.4443545639514923, "learning_rate": 8.996655518394648e-06, "loss": 0.3621, "step": 1883 }, { "epoch": 0.270146257527961, "grad_norm": 0.5292108058929443, "learning_rate": 9.001433349259438e-06, "loss": 0.3697, "step": 1884 }, { "epoch": 0.2702896472612561, "grad_norm": 0.4549960196018219, "learning_rate": 9.006211180124225e-06, "loss": 0.376, "step": 1885 }, { "epoch": 0.2704330369945512, "grad_norm": 0.531855046749115, "learning_rate": 9.010989010989011e-06, "loss": 0.3511, "step": 1886 }, { "epoch": 0.27057642672784626, "grad_norm": 0.5707675814628601, "learning_rate": 9.0157668418538e-06, "loss": 0.4166, "step": 1887 }, { "epoch": 0.2707198164611414, "grad_norm": 0.4393141567707062, "learning_rate": 9.020544672718586e-06, "loss": 0.3621, "step": 1888 }, { "epoch": 0.2708632061944365, "grad_norm": 0.4640008211135864, "learning_rate": 9.025322503583374e-06, "loss": 0.3631, "step": 1889 }, { "epoch": 0.2710065959277316, "grad_norm": 0.5251232981681824, "learning_rate": 9.03010033444816e-06, "loss": 0.3511, "step": 1890 }, { "epoch": 0.27114998566102666, "grad_norm": 0.5167267918586731, "learning_rate": 9.034878165312949e-06, "loss": 0.3859, "step": 1891 }, { "epoch": 0.27129337539432175, "grad_norm": 0.4643188416957855, "learning_rate": 9.039655996177736e-06, "loss": 0.3643, "step": 1892 }, { "epoch": 0.2714367651276169, "grad_norm": 0.48544055223464966, "learning_rate": 9.044433827042524e-06, "loss": 0.3782, "step": 1893 }, { "epoch": 0.27158015486091197, "grad_norm": 0.5083060264587402, "learning_rate": 9.04921165790731e-06, "loss": 0.372, "step": 1894 }, { "epoch": 0.27172354459420706, "grad_norm": 0.4365752041339874, "learning_rate": 9.053989488772099e-06, "loss": 0.3856, "step": 1895 }, { "epoch": 0.27186693432750214, "grad_norm": 0.43001341819763184, "learning_rate": 9.058767319636885e-06, "loss": 0.3778, "step": 1896 }, { "epoch": 0.2720103240607972, "grad_norm": 0.4899238348007202, "learning_rate": 9.063545150501673e-06, "loss": 0.3954, "step": 1897 }, { "epoch": 0.27215371379409237, "grad_norm": 0.45318812131881714, "learning_rate": 9.068322981366461e-06, "loss": 0.3687, "step": 1898 }, { "epoch": 0.27229710352738745, "grad_norm": 0.45191800594329834, "learning_rate": 9.073100812231248e-06, "loss": 0.3601, "step": 1899 }, { "epoch": 0.27244049326068254, "grad_norm": 0.4567171633243561, "learning_rate": 9.077878643096035e-06, "loss": 0.3781, "step": 1900 }, { "epoch": 0.2725838829939776, "grad_norm": 0.48387017846107483, "learning_rate": 9.082656473960823e-06, "loss": 0.3616, "step": 1901 }, { "epoch": 0.2727272727272727, "grad_norm": 0.4374629557132721, "learning_rate": 9.08743430482561e-06, "loss": 0.3677, "step": 1902 }, { "epoch": 0.27287066246056785, "grad_norm": 0.44717419147491455, "learning_rate": 9.092212135690398e-06, "loss": 0.3349, "step": 1903 }, { "epoch": 0.27301405219386293, "grad_norm": 0.4415036737918854, "learning_rate": 9.096989966555184e-06, "loss": 0.3693, "step": 1904 }, { "epoch": 0.273157441927158, "grad_norm": 0.4353375732898712, "learning_rate": 9.101767797419972e-06, "loss": 0.3508, "step": 1905 }, { "epoch": 0.2733008316604531, "grad_norm": 0.46583181619644165, "learning_rate": 9.10654562828476e-06, "loss": 0.3976, "step": 1906 }, { "epoch": 0.2734442213937482, "grad_norm": 0.5427085757255554, "learning_rate": 9.111323459149547e-06, "loss": 0.3642, "step": 1907 }, { "epoch": 0.27358761112704333, "grad_norm": 0.4540790319442749, "learning_rate": 9.116101290014334e-06, "loss": 0.3602, "step": 1908 }, { "epoch": 0.2737310008603384, "grad_norm": 0.4960634410381317, "learning_rate": 9.120879120879122e-06, "loss": 0.3783, "step": 1909 }, { "epoch": 0.2738743905936335, "grad_norm": 0.4404428005218506, "learning_rate": 9.125656951743908e-06, "loss": 0.3594, "step": 1910 }, { "epoch": 0.2740177803269286, "grad_norm": 0.4450056552886963, "learning_rate": 9.130434782608697e-06, "loss": 0.3966, "step": 1911 }, { "epoch": 0.27416117006022367, "grad_norm": 0.5422041416168213, "learning_rate": 9.135212613473485e-06, "loss": 0.3755, "step": 1912 }, { "epoch": 0.2743045597935188, "grad_norm": 0.4233554005622864, "learning_rate": 9.139990444338271e-06, "loss": 0.3699, "step": 1913 }, { "epoch": 0.2744479495268139, "grad_norm": 0.4982956051826477, "learning_rate": 9.144768275203058e-06, "loss": 0.388, "step": 1914 }, { "epoch": 0.274591339260109, "grad_norm": 0.42662370204925537, "learning_rate": 9.149546106067846e-06, "loss": 0.3564, "step": 1915 }, { "epoch": 0.27473472899340406, "grad_norm": 0.5069640874862671, "learning_rate": 9.154323936932633e-06, "loss": 0.3784, "step": 1916 }, { "epoch": 0.27487811872669915, "grad_norm": 0.4204384684562683, "learning_rate": 9.15910176779742e-06, "loss": 0.3851, "step": 1917 }, { "epoch": 0.2750215084599943, "grad_norm": 0.42519277334213257, "learning_rate": 9.163879598662207e-06, "loss": 0.3593, "step": 1918 }, { "epoch": 0.2751648981932894, "grad_norm": 0.46244290471076965, "learning_rate": 9.168657429526996e-06, "loss": 0.3769, "step": 1919 }, { "epoch": 0.27530828792658446, "grad_norm": 0.44356778264045715, "learning_rate": 9.173435260391784e-06, "loss": 0.3724, "step": 1920 }, { "epoch": 0.27545167765987955, "grad_norm": 0.4202917516231537, "learning_rate": 9.17821309125657e-06, "loss": 0.3719, "step": 1921 }, { "epoch": 0.27559506739317463, "grad_norm": 0.4357379376888275, "learning_rate": 9.182990922121357e-06, "loss": 0.3784, "step": 1922 }, { "epoch": 0.27573845712646977, "grad_norm": 0.45150986313819885, "learning_rate": 9.187768752986145e-06, "loss": 0.365, "step": 1923 }, { "epoch": 0.27588184685976486, "grad_norm": 0.42806315422058105, "learning_rate": 9.192546583850932e-06, "loss": 0.3658, "step": 1924 }, { "epoch": 0.27602523659305994, "grad_norm": 0.4356747567653656, "learning_rate": 9.19732441471572e-06, "loss": 0.3565, "step": 1925 }, { "epoch": 0.276168626326355, "grad_norm": 0.5413413643836975, "learning_rate": 9.202102245580508e-06, "loss": 0.3723, "step": 1926 }, { "epoch": 0.2763120160596501, "grad_norm": 0.4238128960132599, "learning_rate": 9.206880076445295e-06, "loss": 0.3507, "step": 1927 }, { "epoch": 0.27645540579294525, "grad_norm": 0.570306122303009, "learning_rate": 9.211657907310083e-06, "loss": 0.3708, "step": 1928 }, { "epoch": 0.27659879552624034, "grad_norm": 0.4287761151790619, "learning_rate": 9.21643573817487e-06, "loss": 0.3653, "step": 1929 }, { "epoch": 0.2767421852595354, "grad_norm": 0.45376351475715637, "learning_rate": 9.221213569039656e-06, "loss": 0.3826, "step": 1930 }, { "epoch": 0.2768855749928305, "grad_norm": 0.4754523038864136, "learning_rate": 9.225991399904444e-06, "loss": 0.3831, "step": 1931 }, { "epoch": 0.2770289647261256, "grad_norm": 0.43439263105392456, "learning_rate": 9.230769230769232e-06, "loss": 0.3505, "step": 1932 }, { "epoch": 0.2771723544594207, "grad_norm": 0.42943868041038513, "learning_rate": 9.235547061634019e-06, "loss": 0.3431, "step": 1933 }, { "epoch": 0.2773157441927158, "grad_norm": 0.43677133321762085, "learning_rate": 9.240324892498807e-06, "loss": 0.397, "step": 1934 }, { "epoch": 0.2774591339260109, "grad_norm": 0.4301722049713135, "learning_rate": 9.245102723363594e-06, "loss": 0.3602, "step": 1935 }, { "epoch": 0.277602523659306, "grad_norm": 0.524488627910614, "learning_rate": 9.24988055422838e-06, "loss": 0.3772, "step": 1936 }, { "epoch": 0.2777459133926011, "grad_norm": 0.4212287962436676, "learning_rate": 9.254658385093168e-06, "loss": 0.3848, "step": 1937 }, { "epoch": 0.27788930312589616, "grad_norm": 0.4244895875453949, "learning_rate": 9.259436215957955e-06, "loss": 0.3426, "step": 1938 }, { "epoch": 0.2780326928591913, "grad_norm": 0.46717047691345215, "learning_rate": 9.264214046822743e-06, "loss": 0.3654, "step": 1939 }, { "epoch": 0.2781760825924864, "grad_norm": 0.5090760588645935, "learning_rate": 9.268991877687531e-06, "loss": 0.3754, "step": 1940 }, { "epoch": 0.27831947232578147, "grad_norm": 0.4232420325279236, "learning_rate": 9.273769708552318e-06, "loss": 0.3499, "step": 1941 }, { "epoch": 0.27846286205907655, "grad_norm": 0.4148833751678467, "learning_rate": 9.278547539417106e-06, "loss": 0.3631, "step": 1942 }, { "epoch": 0.27860625179237164, "grad_norm": 0.42756661772727966, "learning_rate": 9.283325370281893e-06, "loss": 0.3668, "step": 1943 }, { "epoch": 0.2787496415256668, "grad_norm": 0.4056905508041382, "learning_rate": 9.288103201146679e-06, "loss": 0.3412, "step": 1944 }, { "epoch": 0.27889303125896187, "grad_norm": 0.41892313957214355, "learning_rate": 9.292881032011467e-06, "loss": 0.357, "step": 1945 }, { "epoch": 0.27903642099225695, "grad_norm": 0.43490415811538696, "learning_rate": 9.297658862876256e-06, "loss": 0.3766, "step": 1946 }, { "epoch": 0.27917981072555204, "grad_norm": 0.3853394091129303, "learning_rate": 9.302436693741042e-06, "loss": 0.3804, "step": 1947 }, { "epoch": 0.2793232004588471, "grad_norm": 0.4043779671192169, "learning_rate": 9.30721452460583e-06, "loss": 0.3835, "step": 1948 }, { "epoch": 0.27946659019214226, "grad_norm": 0.45452025532722473, "learning_rate": 9.311992355470617e-06, "loss": 0.351, "step": 1949 }, { "epoch": 0.27960997992543735, "grad_norm": 0.4035901427268982, "learning_rate": 9.316770186335405e-06, "loss": 0.3451, "step": 1950 }, { "epoch": 0.27975336965873243, "grad_norm": 0.43111932277679443, "learning_rate": 9.321548017200192e-06, "loss": 0.3732, "step": 1951 }, { "epoch": 0.2798967593920275, "grad_norm": 0.466788113117218, "learning_rate": 9.326325848064978e-06, "loss": 0.3518, "step": 1952 }, { "epoch": 0.2800401491253226, "grad_norm": 0.4269797205924988, "learning_rate": 9.331103678929766e-06, "loss": 0.3828, "step": 1953 }, { "epoch": 0.28018353885861774, "grad_norm": 0.4658278226852417, "learning_rate": 9.335881509794555e-06, "loss": 0.3928, "step": 1954 }, { "epoch": 0.2803269285919128, "grad_norm": 0.4574398994445801, "learning_rate": 9.340659340659341e-06, "loss": 0.3681, "step": 1955 }, { "epoch": 0.2804703183252079, "grad_norm": 0.42527827620506287, "learning_rate": 9.34543717152413e-06, "loss": 0.3689, "step": 1956 }, { "epoch": 0.280613708058503, "grad_norm": 0.43985864520072937, "learning_rate": 9.350215002388916e-06, "loss": 0.3496, "step": 1957 }, { "epoch": 0.2807570977917981, "grad_norm": 0.45800039172172546, "learning_rate": 9.354992833253702e-06, "loss": 0.3622, "step": 1958 }, { "epoch": 0.2809004875250932, "grad_norm": 0.3691648542881012, "learning_rate": 9.35977066411849e-06, "loss": 0.3616, "step": 1959 }, { "epoch": 0.2810438772583883, "grad_norm": 0.44581320881843567, "learning_rate": 9.364548494983279e-06, "loss": 0.3513, "step": 1960 }, { "epoch": 0.2811872669916834, "grad_norm": 0.47160351276397705, "learning_rate": 9.369326325848065e-06, "loss": 0.3827, "step": 1961 }, { "epoch": 0.2813306567249785, "grad_norm": 0.4222598075866699, "learning_rate": 9.374104156712854e-06, "loss": 0.3686, "step": 1962 }, { "epoch": 0.28147404645827356, "grad_norm": 0.42785656452178955, "learning_rate": 9.37888198757764e-06, "loss": 0.3656, "step": 1963 }, { "epoch": 0.2816174361915687, "grad_norm": 0.442342072725296, "learning_rate": 9.383659818442428e-06, "loss": 0.3686, "step": 1964 }, { "epoch": 0.2817608259248638, "grad_norm": 0.5147120356559753, "learning_rate": 9.388437649307215e-06, "loss": 0.3766, "step": 1965 }, { "epoch": 0.2819042156581589, "grad_norm": 0.48474496603012085, "learning_rate": 9.393215480172001e-06, "loss": 0.3672, "step": 1966 }, { "epoch": 0.28204760539145396, "grad_norm": 0.4645819067955017, "learning_rate": 9.39799331103679e-06, "loss": 0.366, "step": 1967 }, { "epoch": 0.28219099512474904, "grad_norm": 0.4780043661594391, "learning_rate": 9.402771141901578e-06, "loss": 0.3795, "step": 1968 }, { "epoch": 0.2823343848580442, "grad_norm": 0.5187668800354004, "learning_rate": 9.407548972766364e-06, "loss": 0.3756, "step": 1969 }, { "epoch": 0.28247777459133927, "grad_norm": 0.5125405192375183, "learning_rate": 9.412326803631153e-06, "loss": 0.3619, "step": 1970 }, { "epoch": 0.28262116432463436, "grad_norm": 0.520746648311615, "learning_rate": 9.41710463449594e-06, "loss": 0.3635, "step": 1971 }, { "epoch": 0.28276455405792944, "grad_norm": 0.4640556275844574, "learning_rate": 9.421882465360727e-06, "loss": 0.3341, "step": 1972 }, { "epoch": 0.2829079437912245, "grad_norm": 0.5022608637809753, "learning_rate": 9.426660296225516e-06, "loss": 0.3653, "step": 1973 }, { "epoch": 0.28305133352451967, "grad_norm": 0.5430384874343872, "learning_rate": 9.431438127090302e-06, "loss": 0.3937, "step": 1974 }, { "epoch": 0.28319472325781475, "grad_norm": 0.5088059902191162, "learning_rate": 9.436215957955089e-06, "loss": 0.3604, "step": 1975 }, { "epoch": 0.28333811299110984, "grad_norm": 0.44809281826019287, "learning_rate": 9.440993788819877e-06, "loss": 0.3783, "step": 1976 }, { "epoch": 0.2834815027244049, "grad_norm": 0.4757801592350006, "learning_rate": 9.445771619684663e-06, "loss": 0.3499, "step": 1977 }, { "epoch": 0.2836248924577, "grad_norm": 0.5225322246551514, "learning_rate": 9.450549450549452e-06, "loss": 0.3686, "step": 1978 }, { "epoch": 0.28376828219099515, "grad_norm": 0.4999322295188904, "learning_rate": 9.455327281414238e-06, "loss": 0.3545, "step": 1979 }, { "epoch": 0.28391167192429023, "grad_norm": 0.4312398135662079, "learning_rate": 9.460105112279026e-06, "loss": 0.3842, "step": 1980 }, { "epoch": 0.2840550616575853, "grad_norm": 0.5600888133049011, "learning_rate": 9.464882943143815e-06, "loss": 0.3666, "step": 1981 }, { "epoch": 0.2841984513908804, "grad_norm": 0.5524774193763733, "learning_rate": 9.469660774008601e-06, "loss": 0.3729, "step": 1982 }, { "epoch": 0.2843418411241755, "grad_norm": 0.4506416320800781, "learning_rate": 9.474438604873388e-06, "loss": 0.3656, "step": 1983 }, { "epoch": 0.28448523085747063, "grad_norm": 0.500836193561554, "learning_rate": 9.479216435738176e-06, "loss": 0.3602, "step": 1984 }, { "epoch": 0.2846286205907657, "grad_norm": 0.4953542649745941, "learning_rate": 9.483994266602962e-06, "loss": 0.364, "step": 1985 }, { "epoch": 0.2847720103240608, "grad_norm": 0.3880102038383484, "learning_rate": 9.48877209746775e-06, "loss": 0.3707, "step": 1986 }, { "epoch": 0.2849154000573559, "grad_norm": 0.5006004571914673, "learning_rate": 9.493549928332539e-06, "loss": 0.3662, "step": 1987 }, { "epoch": 0.28505878979065097, "grad_norm": 0.4481354355812073, "learning_rate": 9.498327759197325e-06, "loss": 0.354, "step": 1988 }, { "epoch": 0.2852021795239461, "grad_norm": 0.3775734007358551, "learning_rate": 9.503105590062112e-06, "loss": 0.3647, "step": 1989 }, { "epoch": 0.2853455692572412, "grad_norm": 0.4556729197502136, "learning_rate": 9.5078834209269e-06, "loss": 0.3679, "step": 1990 }, { "epoch": 0.2854889589905363, "grad_norm": 0.4200204014778137, "learning_rate": 9.512661251791687e-06, "loss": 0.3712, "step": 1991 }, { "epoch": 0.28563234872383136, "grad_norm": 0.4111267328262329, "learning_rate": 9.517439082656475e-06, "loss": 0.3514, "step": 1992 }, { "epoch": 0.28577573845712645, "grad_norm": 0.44637131690979004, "learning_rate": 9.522216913521261e-06, "loss": 0.3716, "step": 1993 }, { "epoch": 0.2859191281904216, "grad_norm": 0.42514607310295105, "learning_rate": 9.52699474438605e-06, "loss": 0.366, "step": 1994 }, { "epoch": 0.2860625179237167, "grad_norm": 0.4723575711250305, "learning_rate": 9.531772575250838e-06, "loss": 0.3469, "step": 1995 }, { "epoch": 0.28620590765701176, "grad_norm": 0.41261157393455505, "learning_rate": 9.536550406115624e-06, "loss": 0.3672, "step": 1996 }, { "epoch": 0.28634929739030685, "grad_norm": 0.46687397360801697, "learning_rate": 9.541328236980411e-06, "loss": 0.3714, "step": 1997 }, { "epoch": 0.28649268712360193, "grad_norm": 0.4241957366466522, "learning_rate": 9.5461060678452e-06, "loss": 0.3614, "step": 1998 }, { "epoch": 0.28663607685689707, "grad_norm": 0.46306824684143066, "learning_rate": 9.550883898709986e-06, "loss": 0.3803, "step": 1999 }, { "epoch": 0.28677946659019216, "grad_norm": 0.4347645044326782, "learning_rate": 9.555661729574774e-06, "loss": 0.3638, "step": 2000 }, { "epoch": 0.28692285632348724, "grad_norm": 0.4644116759300232, "learning_rate": 9.560439560439562e-06, "loss": 0.3815, "step": 2001 }, { "epoch": 0.2870662460567823, "grad_norm": 0.4583599865436554, "learning_rate": 9.565217391304349e-06, "loss": 0.3561, "step": 2002 }, { "epoch": 0.2872096357900774, "grad_norm": 0.48582369089126587, "learning_rate": 9.569995222169137e-06, "loss": 0.3877, "step": 2003 }, { "epoch": 0.28735302552337255, "grad_norm": 0.4374374449253082, "learning_rate": 9.574773053033923e-06, "loss": 0.3652, "step": 2004 }, { "epoch": 0.28749641525666764, "grad_norm": 0.4581700265407562, "learning_rate": 9.57955088389871e-06, "loss": 0.3684, "step": 2005 }, { "epoch": 0.2876398049899627, "grad_norm": 0.4625348150730133, "learning_rate": 9.584328714763498e-06, "loss": 0.3586, "step": 2006 }, { "epoch": 0.2877831947232578, "grad_norm": 0.41239526867866516, "learning_rate": 9.589106545628285e-06, "loss": 0.3727, "step": 2007 }, { "epoch": 0.2879265844565529, "grad_norm": 0.5265212059020996, "learning_rate": 9.593884376493073e-06, "loss": 0.3712, "step": 2008 }, { "epoch": 0.28806997418984803, "grad_norm": 0.47496917843818665, "learning_rate": 9.598662207357861e-06, "loss": 0.3645, "step": 2009 }, { "epoch": 0.2882133639231431, "grad_norm": 0.43523675203323364, "learning_rate": 9.603440038222648e-06, "loss": 0.3517, "step": 2010 }, { "epoch": 0.2883567536564382, "grad_norm": 0.448670357465744, "learning_rate": 9.608217869087434e-06, "loss": 0.3622, "step": 2011 }, { "epoch": 0.2885001433897333, "grad_norm": 0.39282655715942383, "learning_rate": 9.612995699952222e-06, "loss": 0.3464, "step": 2012 }, { "epoch": 0.2886435331230284, "grad_norm": 0.5063558220863342, "learning_rate": 9.617773530817009e-06, "loss": 0.3764, "step": 2013 }, { "epoch": 0.2887869228563235, "grad_norm": 0.43550023436546326, "learning_rate": 9.622551361681797e-06, "loss": 0.3526, "step": 2014 }, { "epoch": 0.2889303125896186, "grad_norm": 0.4811650812625885, "learning_rate": 9.627329192546585e-06, "loss": 0.3568, "step": 2015 }, { "epoch": 0.2890737023229137, "grad_norm": 0.4340903162956238, "learning_rate": 9.632107023411372e-06, "loss": 0.3593, "step": 2016 }, { "epoch": 0.28921709205620877, "grad_norm": 0.45140552520751953, "learning_rate": 9.63688485427616e-06, "loss": 0.375, "step": 2017 }, { "epoch": 0.28936048178950385, "grad_norm": 0.4223378896713257, "learning_rate": 9.641662685140947e-06, "loss": 0.3696, "step": 2018 }, { "epoch": 0.289503871522799, "grad_norm": 0.46365511417388916, "learning_rate": 9.646440516005733e-06, "loss": 0.3786, "step": 2019 }, { "epoch": 0.2896472612560941, "grad_norm": 0.45209798216819763, "learning_rate": 9.651218346870521e-06, "loss": 0.3544, "step": 2020 }, { "epoch": 0.28979065098938916, "grad_norm": 0.38499507308006287, "learning_rate": 9.65599617773531e-06, "loss": 0.3583, "step": 2021 }, { "epoch": 0.28993404072268425, "grad_norm": 0.4213894307613373, "learning_rate": 9.660774008600096e-06, "loss": 0.3563, "step": 2022 }, { "epoch": 0.29007743045597933, "grad_norm": 0.5667498707771301, "learning_rate": 9.665551839464884e-06, "loss": 0.3665, "step": 2023 }, { "epoch": 0.2902208201892745, "grad_norm": 0.4106259346008301, "learning_rate": 9.670329670329671e-06, "loss": 0.3528, "step": 2024 }, { "epoch": 0.29036420992256956, "grad_norm": 0.4764072597026825, "learning_rate": 9.67510750119446e-06, "loss": 0.3514, "step": 2025 }, { "epoch": 0.29050759965586465, "grad_norm": 0.5214059948921204, "learning_rate": 9.679885332059246e-06, "loss": 0.3756, "step": 2026 }, { "epoch": 0.29065098938915973, "grad_norm": 0.44092512130737305, "learning_rate": 9.684663162924032e-06, "loss": 0.3676, "step": 2027 }, { "epoch": 0.2907943791224548, "grad_norm": 0.46354514360427856, "learning_rate": 9.68944099378882e-06, "loss": 0.3699, "step": 2028 }, { "epoch": 0.2909377688557499, "grad_norm": 0.4928266406059265, "learning_rate": 9.694218824653609e-06, "loss": 0.3893, "step": 2029 }, { "epoch": 0.29108115858904504, "grad_norm": 0.433186411857605, "learning_rate": 9.698996655518395e-06, "loss": 0.3508, "step": 2030 }, { "epoch": 0.2912245483223401, "grad_norm": 0.4398455321788788, "learning_rate": 9.703774486383183e-06, "loss": 0.3597, "step": 2031 }, { "epoch": 0.2913679380556352, "grad_norm": 0.42357826232910156, "learning_rate": 9.70855231724797e-06, "loss": 0.3458, "step": 2032 }, { "epoch": 0.2915113277889303, "grad_norm": 0.420491099357605, "learning_rate": 9.713330148112757e-06, "loss": 0.3809, "step": 2033 }, { "epoch": 0.2916547175222254, "grad_norm": 0.46691110730171204, "learning_rate": 9.718107978977545e-06, "loss": 0.3713, "step": 2034 }, { "epoch": 0.2917981072555205, "grad_norm": 0.4174390137195587, "learning_rate": 9.722885809842333e-06, "loss": 0.364, "step": 2035 }, { "epoch": 0.2919414969888156, "grad_norm": 0.4228973388671875, "learning_rate": 9.72766364070712e-06, "loss": 0.3543, "step": 2036 }, { "epoch": 0.2920848867221107, "grad_norm": 0.3879827558994293, "learning_rate": 9.732441471571908e-06, "loss": 0.3627, "step": 2037 }, { "epoch": 0.2922282764554058, "grad_norm": 0.420666366815567, "learning_rate": 9.737219302436694e-06, "loss": 0.3598, "step": 2038 }, { "epoch": 0.29237166618870086, "grad_norm": 0.44431552290916443, "learning_rate": 9.741997133301482e-06, "loss": 0.3895, "step": 2039 }, { "epoch": 0.292515055921996, "grad_norm": 0.4275117814540863, "learning_rate": 9.746774964166269e-06, "loss": 0.3499, "step": 2040 }, { "epoch": 0.2926584456552911, "grad_norm": 0.48493492603302, "learning_rate": 9.751552795031056e-06, "loss": 0.3694, "step": 2041 }, { "epoch": 0.2928018353885862, "grad_norm": 0.401681512594223, "learning_rate": 9.756330625895844e-06, "loss": 0.3726, "step": 2042 }, { "epoch": 0.29294522512188126, "grad_norm": 0.4581857919692993, "learning_rate": 9.761108456760632e-06, "loss": 0.3583, "step": 2043 }, { "epoch": 0.29308861485517634, "grad_norm": 0.4414709508419037, "learning_rate": 9.765886287625419e-06, "loss": 0.358, "step": 2044 }, { "epoch": 0.2932320045884715, "grad_norm": 0.4256233870983124, "learning_rate": 9.770664118490207e-06, "loss": 0.3497, "step": 2045 }, { "epoch": 0.29337539432176657, "grad_norm": 0.5207164883613586, "learning_rate": 9.775441949354993e-06, "loss": 0.3828, "step": 2046 }, { "epoch": 0.29351878405506165, "grad_norm": 0.4909336566925049, "learning_rate": 9.780219780219781e-06, "loss": 0.3461, "step": 2047 }, { "epoch": 0.29366217378835674, "grad_norm": 0.4292454123497009, "learning_rate": 9.784997611084568e-06, "loss": 0.3656, "step": 2048 }, { "epoch": 0.2938055635216518, "grad_norm": 0.5129491686820984, "learning_rate": 9.789775441949356e-06, "loss": 0.3553, "step": 2049 }, { "epoch": 0.29394895325494697, "grad_norm": 0.49006783962249756, "learning_rate": 9.794553272814143e-06, "loss": 0.3848, "step": 2050 }, { "epoch": 0.29409234298824205, "grad_norm": 0.4012763798236847, "learning_rate": 9.799331103678931e-06, "loss": 0.3717, "step": 2051 }, { "epoch": 0.29423573272153714, "grad_norm": 0.4709794521331787, "learning_rate": 9.804108934543718e-06, "loss": 0.3693, "step": 2052 }, { "epoch": 0.2943791224548322, "grad_norm": 0.47466254234313965, "learning_rate": 9.808886765408506e-06, "loss": 0.3896, "step": 2053 }, { "epoch": 0.2945225121881273, "grad_norm": 0.43538469076156616, "learning_rate": 9.813664596273292e-06, "loss": 0.3679, "step": 2054 }, { "epoch": 0.29466590192142245, "grad_norm": 0.4538669288158417, "learning_rate": 9.818442427138079e-06, "loss": 0.3435, "step": 2055 }, { "epoch": 0.29480929165471753, "grad_norm": 0.4081132709980011, "learning_rate": 9.823220258002869e-06, "loss": 0.3583, "step": 2056 }, { "epoch": 0.2949526813880126, "grad_norm": 0.4823303818702698, "learning_rate": 9.827998088867655e-06, "loss": 0.3734, "step": 2057 }, { "epoch": 0.2950960711213077, "grad_norm": 0.43532466888427734, "learning_rate": 9.832775919732442e-06, "loss": 0.3621, "step": 2058 }, { "epoch": 0.2952394608546028, "grad_norm": 0.40442684292793274, "learning_rate": 9.83755375059723e-06, "loss": 0.3671, "step": 2059 }, { "epoch": 0.2953828505878979, "grad_norm": 0.45969510078430176, "learning_rate": 9.842331581462017e-06, "loss": 0.3716, "step": 2060 }, { "epoch": 0.295526240321193, "grad_norm": 0.38909146189689636, "learning_rate": 9.847109412326805e-06, "loss": 0.3495, "step": 2061 }, { "epoch": 0.2956696300544881, "grad_norm": 0.4745708703994751, "learning_rate": 9.851887243191591e-06, "loss": 0.3652, "step": 2062 }, { "epoch": 0.2958130197877832, "grad_norm": 0.4664292335510254, "learning_rate": 9.85666507405638e-06, "loss": 0.3682, "step": 2063 }, { "epoch": 0.29595640952107827, "grad_norm": 0.4429015517234802, "learning_rate": 9.861442904921166e-06, "loss": 0.3727, "step": 2064 }, { "epoch": 0.2960997992543734, "grad_norm": 0.4600830376148224, "learning_rate": 9.866220735785954e-06, "loss": 0.356, "step": 2065 }, { "epoch": 0.2962431889876685, "grad_norm": 0.41489750146865845, "learning_rate": 9.87099856665074e-06, "loss": 0.3673, "step": 2066 }, { "epoch": 0.2963865787209636, "grad_norm": 0.49784141778945923, "learning_rate": 9.875776397515529e-06, "loss": 0.3768, "step": 2067 }, { "epoch": 0.29652996845425866, "grad_norm": 0.40346720814704895, "learning_rate": 9.880554228380316e-06, "loss": 0.3682, "step": 2068 }, { "epoch": 0.29667335818755375, "grad_norm": 0.5136205554008484, "learning_rate": 9.885332059245104e-06, "loss": 0.3643, "step": 2069 }, { "epoch": 0.2968167479208489, "grad_norm": 0.4495609402656555, "learning_rate": 9.890109890109892e-06, "loss": 0.3831, "step": 2070 }, { "epoch": 0.296960137654144, "grad_norm": 0.4239486753940582, "learning_rate": 9.894887720974679e-06, "loss": 0.3852, "step": 2071 }, { "epoch": 0.29710352738743906, "grad_norm": 0.4433147609233856, "learning_rate": 9.899665551839465e-06, "loss": 0.3569, "step": 2072 }, { "epoch": 0.29724691712073414, "grad_norm": 0.4538070857524872, "learning_rate": 9.904443382704253e-06, "loss": 0.3836, "step": 2073 }, { "epoch": 0.29739030685402923, "grad_norm": 0.4895041584968567, "learning_rate": 9.90922121356904e-06, "loss": 0.3698, "step": 2074 }, { "epoch": 0.29753369658732437, "grad_norm": 0.4156481623649597, "learning_rate": 9.913999044433828e-06, "loss": 0.3845, "step": 2075 }, { "epoch": 0.29767708632061946, "grad_norm": 0.4602239429950714, "learning_rate": 9.918776875298616e-06, "loss": 0.34, "step": 2076 }, { "epoch": 0.29782047605391454, "grad_norm": 0.4607965052127838, "learning_rate": 9.923554706163403e-06, "loss": 0.3573, "step": 2077 }, { "epoch": 0.2979638657872096, "grad_norm": 0.4478813111782074, "learning_rate": 9.928332537028191e-06, "loss": 0.3678, "step": 2078 }, { "epoch": 0.2981072555205047, "grad_norm": 0.4032374620437622, "learning_rate": 9.933110367892978e-06, "loss": 0.3534, "step": 2079 }, { "epoch": 0.29825064525379985, "grad_norm": 0.41100096702575684, "learning_rate": 9.937888198757764e-06, "loss": 0.3415, "step": 2080 }, { "epoch": 0.29839403498709494, "grad_norm": 0.44716960191726685, "learning_rate": 9.942666029622552e-06, "loss": 0.3632, "step": 2081 }, { "epoch": 0.29853742472039, "grad_norm": 0.43565037846565247, "learning_rate": 9.947443860487339e-06, "loss": 0.3742, "step": 2082 }, { "epoch": 0.2986808144536851, "grad_norm": 0.44986528158187866, "learning_rate": 9.952221691352127e-06, "loss": 0.349, "step": 2083 }, { "epoch": 0.2988242041869802, "grad_norm": 0.44518083333969116, "learning_rate": 9.956999522216915e-06, "loss": 0.356, "step": 2084 }, { "epoch": 0.29896759392027533, "grad_norm": 0.462034672498703, "learning_rate": 9.961777353081702e-06, "loss": 0.3769, "step": 2085 }, { "epoch": 0.2991109836535704, "grad_norm": 0.4801115095615387, "learning_rate": 9.966555183946488e-06, "loss": 0.3821, "step": 2086 }, { "epoch": 0.2992543733868655, "grad_norm": 0.37178778648376465, "learning_rate": 9.971333014811277e-06, "loss": 0.361, "step": 2087 }, { "epoch": 0.2993977631201606, "grad_norm": 0.46763116121292114, "learning_rate": 9.976110845676063e-06, "loss": 0.3567, "step": 2088 }, { "epoch": 0.29954115285345567, "grad_norm": 0.46441951394081116, "learning_rate": 9.980888676540851e-06, "loss": 0.3717, "step": 2089 }, { "epoch": 0.2996845425867508, "grad_norm": 0.45240265130996704, "learning_rate": 9.98566650740564e-06, "loss": 0.3701, "step": 2090 }, { "epoch": 0.2998279323200459, "grad_norm": 0.4002957344055176, "learning_rate": 9.990444338270426e-06, "loss": 0.3644, "step": 2091 }, { "epoch": 0.299971322053341, "grad_norm": 0.4394091069698334, "learning_rate": 9.995222169135214e-06, "loss": 0.3574, "step": 2092 }, { "epoch": 0.30011471178663607, "grad_norm": 0.43499240279197693, "learning_rate": 1e-05, "loss": 0.3741, "step": 2093 }, { "epoch": 0.30025810151993115, "grad_norm": 0.4010029137134552, "learning_rate": 9.999999930403847e-06, "loss": 0.3597, "step": 2094 }, { "epoch": 0.3004014912532263, "grad_norm": 0.44982674717903137, "learning_rate": 9.999999721615386e-06, "loss": 0.3645, "step": 2095 }, { "epoch": 0.3005448809865214, "grad_norm": 0.49170324206352234, "learning_rate": 9.999999373634623e-06, "loss": 0.3444, "step": 2096 }, { "epoch": 0.30068827071981646, "grad_norm": 0.3970239758491516, "learning_rate": 9.99999888646157e-06, "loss": 0.3774, "step": 2097 }, { "epoch": 0.30083166045311155, "grad_norm": 0.4646018147468567, "learning_rate": 9.99999826009624e-06, "loss": 0.3821, "step": 2098 }, { "epoch": 0.30097505018640663, "grad_norm": 0.4388396441936493, "learning_rate": 9.99999749453865e-06, "loss": 0.3697, "step": 2099 }, { "epoch": 0.3011184399197018, "grad_norm": 0.36101311445236206, "learning_rate": 9.99999658978882e-06, "loss": 0.366, "step": 2100 }, { "epoch": 0.30126182965299686, "grad_norm": 0.43508368730545044, "learning_rate": 9.999995545846778e-06, "loss": 0.3698, "step": 2101 }, { "epoch": 0.30140521938629194, "grad_norm": 0.4199353754520416, "learning_rate": 9.99999436271255e-06, "loss": 0.3653, "step": 2102 }, { "epoch": 0.30154860911958703, "grad_norm": 0.4417155981063843, "learning_rate": 9.999993040386172e-06, "loss": 0.3704, "step": 2103 }, { "epoch": 0.3016919988528821, "grad_norm": 0.43660110235214233, "learning_rate": 9.999991578867677e-06, "loss": 0.3443, "step": 2104 }, { "epoch": 0.30183538858617726, "grad_norm": 0.4290003776550293, "learning_rate": 9.99998997815711e-06, "loss": 0.3678, "step": 2105 }, { "epoch": 0.30197877831947234, "grad_norm": 0.46841371059417725, "learning_rate": 9.999988238254513e-06, "loss": 0.3714, "step": 2106 }, { "epoch": 0.3021221680527674, "grad_norm": 0.5068476796150208, "learning_rate": 9.999986359159933e-06, "loss": 0.3567, "step": 2107 }, { "epoch": 0.3022655577860625, "grad_norm": 0.48073822259902954, "learning_rate": 9.999984340873426e-06, "loss": 0.3587, "step": 2108 }, { "epoch": 0.3024089475193576, "grad_norm": 0.48790931701660156, "learning_rate": 9.999982183395045e-06, "loss": 0.3493, "step": 2109 }, { "epoch": 0.30255233725265274, "grad_norm": 0.49404582381248474, "learning_rate": 9.999979886724852e-06, "loss": 0.371, "step": 2110 }, { "epoch": 0.3026957269859478, "grad_norm": 0.4990573823451996, "learning_rate": 9.999977450862912e-06, "loss": 0.387, "step": 2111 }, { "epoch": 0.3028391167192429, "grad_norm": 0.4496656060218811, "learning_rate": 9.999974875809289e-06, "loss": 0.3686, "step": 2112 }, { "epoch": 0.302982506452538, "grad_norm": 0.47828954458236694, "learning_rate": 9.999972161564058e-06, "loss": 0.3612, "step": 2113 }, { "epoch": 0.3031258961858331, "grad_norm": 0.571133017539978, "learning_rate": 9.999969308127292e-06, "loss": 0.3899, "step": 2114 }, { "epoch": 0.3032692859191282, "grad_norm": 0.44769811630249023, "learning_rate": 9.999966315499073e-06, "loss": 0.3726, "step": 2115 }, { "epoch": 0.3034126756524233, "grad_norm": 0.5377645492553711, "learning_rate": 9.999963183679484e-06, "loss": 0.3862, "step": 2116 }, { "epoch": 0.3035560653857184, "grad_norm": 0.5182932019233704, "learning_rate": 9.999959912668609e-06, "loss": 0.367, "step": 2117 }, { "epoch": 0.3036994551190135, "grad_norm": 0.5073755383491516, "learning_rate": 9.999956502466543e-06, "loss": 0.3482, "step": 2118 }, { "epoch": 0.30384284485230856, "grad_norm": 0.4204258620738983, "learning_rate": 9.99995295307338e-06, "loss": 0.3721, "step": 2119 }, { "epoch": 0.3039862345856037, "grad_norm": 0.4977889657020569, "learning_rate": 9.999949264489217e-06, "loss": 0.3736, "step": 2120 }, { "epoch": 0.3041296243188988, "grad_norm": 0.4725348651409149, "learning_rate": 9.999945436714159e-06, "loss": 0.3602, "step": 2121 }, { "epoch": 0.30427301405219387, "grad_norm": 0.4365196228027344, "learning_rate": 9.999941469748309e-06, "loss": 0.3655, "step": 2122 }, { "epoch": 0.30441640378548895, "grad_norm": 0.4149129390716553, "learning_rate": 9.999937363591781e-06, "loss": 0.3503, "step": 2123 }, { "epoch": 0.30455979351878404, "grad_norm": 0.4176013171672821, "learning_rate": 9.999933118244689e-06, "loss": 0.3775, "step": 2124 }, { "epoch": 0.3047031832520791, "grad_norm": 0.4086458086967468, "learning_rate": 9.999928733707151e-06, "loss": 0.3702, "step": 2125 }, { "epoch": 0.30484657298537426, "grad_norm": 0.39024123549461365, "learning_rate": 9.999924209979286e-06, "loss": 0.3557, "step": 2126 }, { "epoch": 0.30498996271866935, "grad_norm": 0.4007832109928131, "learning_rate": 9.999919547061224e-06, "loss": 0.3673, "step": 2127 }, { "epoch": 0.30513335245196443, "grad_norm": 0.4229172170162201, "learning_rate": 9.999914744953092e-06, "loss": 0.3927, "step": 2128 }, { "epoch": 0.3052767421852595, "grad_norm": 0.42897143959999084, "learning_rate": 9.999909803655027e-06, "loss": 0.3646, "step": 2129 }, { "epoch": 0.3054201319185546, "grad_norm": 0.4407944083213806, "learning_rate": 9.999904723167162e-06, "loss": 0.3627, "step": 2130 }, { "epoch": 0.30556352165184975, "grad_norm": 0.48591917753219604, "learning_rate": 9.999899503489641e-06, "loss": 0.3662, "step": 2131 }, { "epoch": 0.30570691138514483, "grad_norm": 0.43760573863983154, "learning_rate": 9.999894144622611e-06, "loss": 0.3625, "step": 2132 }, { "epoch": 0.3058503011184399, "grad_norm": 0.43549644947052, "learning_rate": 9.999888646566218e-06, "loss": 0.3724, "step": 2133 }, { "epoch": 0.305993690851735, "grad_norm": 0.4324412941932678, "learning_rate": 9.999883009320617e-06, "loss": 0.3517, "step": 2134 }, { "epoch": 0.3061370805850301, "grad_norm": 0.46654507517814636, "learning_rate": 9.999877232885964e-06, "loss": 0.3557, "step": 2135 }, { "epoch": 0.3062804703183252, "grad_norm": 0.4041859805583954, "learning_rate": 9.999871317262419e-06, "loss": 0.3634, "step": 2136 }, { "epoch": 0.3064238600516203, "grad_norm": 0.5477749109268188, "learning_rate": 9.999865262450148e-06, "loss": 0.3916, "step": 2137 }, { "epoch": 0.3065672497849154, "grad_norm": 0.42294320464134216, "learning_rate": 9.99985906844932e-06, "loss": 0.3655, "step": 2138 }, { "epoch": 0.3067106395182105, "grad_norm": 0.49534526467323303, "learning_rate": 9.999852735260107e-06, "loss": 0.3771, "step": 2139 }, { "epoch": 0.30685402925150557, "grad_norm": 0.46375060081481934, "learning_rate": 9.999846262882684e-06, "loss": 0.3561, "step": 2140 }, { "epoch": 0.3069974189848007, "grad_norm": 0.39850881695747375, "learning_rate": 9.999839651317234e-06, "loss": 0.381, "step": 2141 }, { "epoch": 0.3071408087180958, "grad_norm": 0.44188809394836426, "learning_rate": 9.999832900563937e-06, "loss": 0.3727, "step": 2142 }, { "epoch": 0.3072841984513909, "grad_norm": 0.4345448315143585, "learning_rate": 9.999826010622986e-06, "loss": 0.3624, "step": 2143 }, { "epoch": 0.30742758818468596, "grad_norm": 0.4636017084121704, "learning_rate": 9.999818981494567e-06, "loss": 0.3722, "step": 2144 }, { "epoch": 0.30757097791798105, "grad_norm": 0.38209104537963867, "learning_rate": 9.99981181317888e-06, "loss": 0.3791, "step": 2145 }, { "epoch": 0.3077143676512762, "grad_norm": 0.4185943305492401, "learning_rate": 9.999804505676125e-06, "loss": 0.3693, "step": 2146 }, { "epoch": 0.3078577573845713, "grad_norm": 0.4416112005710602, "learning_rate": 9.9997970589865e-06, "loss": 0.3866, "step": 2147 }, { "epoch": 0.30800114711786636, "grad_norm": 0.43748947978019714, "learning_rate": 9.999789473110219e-06, "loss": 0.3744, "step": 2148 }, { "epoch": 0.30814453685116144, "grad_norm": 0.4207359552383423, "learning_rate": 9.99978174804749e-06, "loss": 0.3581, "step": 2149 }, { "epoch": 0.30828792658445653, "grad_norm": 0.42667049169540405, "learning_rate": 9.999773883798527e-06, "loss": 0.3891, "step": 2150 }, { "epoch": 0.30843131631775167, "grad_norm": 0.47797292470932007, "learning_rate": 9.999765880363553e-06, "loss": 0.3936, "step": 2151 }, { "epoch": 0.30857470605104675, "grad_norm": 0.37972521781921387, "learning_rate": 9.999757737742784e-06, "loss": 0.368, "step": 2152 }, { "epoch": 0.30871809578434184, "grad_norm": 0.4151119887828827, "learning_rate": 9.999749455936452e-06, "loss": 0.3557, "step": 2153 }, { "epoch": 0.3088614855176369, "grad_norm": 0.4273431599140167, "learning_rate": 9.999741034944787e-06, "loss": 0.3924, "step": 2154 }, { "epoch": 0.309004875250932, "grad_norm": 0.44512003660202026, "learning_rate": 9.999732474768023e-06, "loss": 0.3732, "step": 2155 }, { "epoch": 0.30914826498422715, "grad_norm": 0.38569176197052, "learning_rate": 9.999723775406398e-06, "loss": 0.3726, "step": 2156 }, { "epoch": 0.30929165471752224, "grad_norm": 0.36929041147232056, "learning_rate": 9.999714936860153e-06, "loss": 0.3516, "step": 2157 }, { "epoch": 0.3094350444508173, "grad_norm": 0.4036484360694885, "learning_rate": 9.999705959129535e-06, "loss": 0.3597, "step": 2158 }, { "epoch": 0.3095784341841124, "grad_norm": 0.38243934512138367, "learning_rate": 9.999696842214794e-06, "loss": 0.3585, "step": 2159 }, { "epoch": 0.3097218239174075, "grad_norm": 0.3967842757701874, "learning_rate": 9.999687586116184e-06, "loss": 0.3554, "step": 2160 }, { "epoch": 0.30986521365070263, "grad_norm": 0.3565588593482971, "learning_rate": 9.999678190833963e-06, "loss": 0.347, "step": 2161 }, { "epoch": 0.3100086033839977, "grad_norm": 0.3903677463531494, "learning_rate": 9.999668656368393e-06, "loss": 0.3773, "step": 2162 }, { "epoch": 0.3101519931172928, "grad_norm": 0.3772165775299072, "learning_rate": 9.999658982719738e-06, "loss": 0.3682, "step": 2163 }, { "epoch": 0.3102953828505879, "grad_norm": 0.4085756540298462, "learning_rate": 9.999649169888267e-06, "loss": 0.3331, "step": 2164 }, { "epoch": 0.31043877258388297, "grad_norm": 0.4021753668785095, "learning_rate": 9.999639217874255e-06, "loss": 0.3482, "step": 2165 }, { "epoch": 0.3105821623171781, "grad_norm": 0.4220201373100281, "learning_rate": 9.999629126677976e-06, "loss": 0.3601, "step": 2166 }, { "epoch": 0.3107255520504732, "grad_norm": 0.4038960635662079, "learning_rate": 9.999618896299717e-06, "loss": 0.3596, "step": 2167 }, { "epoch": 0.3108689417837683, "grad_norm": 0.42963847517967224, "learning_rate": 9.999608526739754e-06, "loss": 0.3633, "step": 2168 }, { "epoch": 0.31101233151706337, "grad_norm": 0.41541245579719543, "learning_rate": 9.999598017998384e-06, "loss": 0.3509, "step": 2169 }, { "epoch": 0.31115572125035845, "grad_norm": 0.4104004204273224, "learning_rate": 9.999587370075895e-06, "loss": 0.3584, "step": 2170 }, { "epoch": 0.3112991109836536, "grad_norm": 0.43908455967903137, "learning_rate": 9.999576582972583e-06, "loss": 0.3623, "step": 2171 }, { "epoch": 0.3114425007169487, "grad_norm": 0.41681694984436035, "learning_rate": 9.999565656688753e-06, "loss": 0.3795, "step": 2172 }, { "epoch": 0.31158589045024376, "grad_norm": 0.43950584530830383, "learning_rate": 9.999554591224704e-06, "loss": 0.3566, "step": 2173 }, { "epoch": 0.31172928018353885, "grad_norm": 0.42471927404403687, "learning_rate": 9.999543386580744e-06, "loss": 0.3563, "step": 2174 }, { "epoch": 0.31187266991683393, "grad_norm": 0.41892990469932556, "learning_rate": 9.99953204275719e-06, "loss": 0.3611, "step": 2175 }, { "epoch": 0.3120160596501291, "grad_norm": 0.4436235725879669, "learning_rate": 9.999520559754352e-06, "loss": 0.374, "step": 2176 }, { "epoch": 0.31215944938342416, "grad_norm": 0.4087196886539459, "learning_rate": 9.999508937572555e-06, "loss": 0.3481, "step": 2177 }, { "epoch": 0.31230283911671924, "grad_norm": 0.40417107939720154, "learning_rate": 9.999497176212118e-06, "loss": 0.3655, "step": 2178 }, { "epoch": 0.31244622885001433, "grad_norm": 0.3972143828868866, "learning_rate": 9.999485275673371e-06, "loss": 0.3513, "step": 2179 }, { "epoch": 0.3125896185833094, "grad_norm": 0.4404175877571106, "learning_rate": 9.999473235956644e-06, "loss": 0.3792, "step": 2180 }, { "epoch": 0.31273300831660455, "grad_norm": 0.43218857049942017, "learning_rate": 9.999461057062272e-06, "loss": 0.3671, "step": 2181 }, { "epoch": 0.31287639804989964, "grad_norm": 0.39767932891845703, "learning_rate": 9.999448738990596e-06, "loss": 0.3528, "step": 2182 }, { "epoch": 0.3130197877831947, "grad_norm": 0.4446158707141876, "learning_rate": 9.999436281741959e-06, "loss": 0.3569, "step": 2183 }, { "epoch": 0.3131631775164898, "grad_norm": 0.40249931812286377, "learning_rate": 9.999423685316705e-06, "loss": 0.3407, "step": 2184 }, { "epoch": 0.3133065672497849, "grad_norm": 0.4027266800403595, "learning_rate": 9.999410949715184e-06, "loss": 0.3581, "step": 2185 }, { "epoch": 0.31344995698308004, "grad_norm": 0.4822736382484436, "learning_rate": 9.999398074937756e-06, "loss": 0.3625, "step": 2186 }, { "epoch": 0.3135933467163751, "grad_norm": 0.41243791580200195, "learning_rate": 9.999385060984775e-06, "loss": 0.3817, "step": 2187 }, { "epoch": 0.3137367364496702, "grad_norm": 0.4302840828895569, "learning_rate": 9.999371907856605e-06, "loss": 0.3425, "step": 2188 }, { "epoch": 0.3138801261829653, "grad_norm": 0.43612706661224365, "learning_rate": 9.999358615553611e-06, "loss": 0.3829, "step": 2189 }, { "epoch": 0.3140235159162604, "grad_norm": 0.4288233816623688, "learning_rate": 9.999345184076164e-06, "loss": 0.3692, "step": 2190 }, { "epoch": 0.3141669056495555, "grad_norm": 0.46303191781044006, "learning_rate": 9.999331613424636e-06, "loss": 0.3815, "step": 2191 }, { "epoch": 0.3143102953828506, "grad_norm": 0.4097793698310852, "learning_rate": 9.999317903599407e-06, "loss": 0.3593, "step": 2192 }, { "epoch": 0.3144536851161457, "grad_norm": 0.44302982091903687, "learning_rate": 9.999304054600859e-06, "loss": 0.343, "step": 2193 }, { "epoch": 0.31459707484944077, "grad_norm": 0.4116382896900177, "learning_rate": 9.999290066429376e-06, "loss": 0.3779, "step": 2194 }, { "epoch": 0.31474046458273586, "grad_norm": 0.4334692060947418, "learning_rate": 9.999275939085346e-06, "loss": 0.3583, "step": 2195 }, { "epoch": 0.314883854316031, "grad_norm": 0.45251786708831787, "learning_rate": 9.999261672569167e-06, "loss": 0.3955, "step": 2196 }, { "epoch": 0.3150272440493261, "grad_norm": 0.4792901873588562, "learning_rate": 9.999247266881233e-06, "loss": 0.3775, "step": 2197 }, { "epoch": 0.31517063378262117, "grad_norm": 0.4637601375579834, "learning_rate": 9.999232722021943e-06, "loss": 0.3824, "step": 2198 }, { "epoch": 0.31531402351591625, "grad_norm": 0.43394917249679565, "learning_rate": 9.999218037991706e-06, "loss": 0.3777, "step": 2199 }, { "epoch": 0.31545741324921134, "grad_norm": 0.41366302967071533, "learning_rate": 9.999203214790929e-06, "loss": 0.356, "step": 2200 }, { "epoch": 0.3156008029825065, "grad_norm": 0.3786259889602661, "learning_rate": 9.999188252420026e-06, "loss": 0.3718, "step": 2201 }, { "epoch": 0.31574419271580156, "grad_norm": 0.4551234841346741, "learning_rate": 9.999173150879411e-06, "loss": 0.3719, "step": 2202 }, { "epoch": 0.31588758244909665, "grad_norm": 0.42066800594329834, "learning_rate": 9.999157910169504e-06, "loss": 0.3473, "step": 2203 }, { "epoch": 0.31603097218239173, "grad_norm": 0.4798874855041504, "learning_rate": 9.999142530290733e-06, "loss": 0.3724, "step": 2204 }, { "epoch": 0.3161743619156868, "grad_norm": 0.37194010615348816, "learning_rate": 9.999127011243523e-06, "loss": 0.3769, "step": 2205 }, { "epoch": 0.31631775164898196, "grad_norm": 0.4180563688278198, "learning_rate": 9.999111353028308e-06, "loss": 0.3617, "step": 2206 }, { "epoch": 0.31646114138227704, "grad_norm": 0.45570626854896545, "learning_rate": 9.999095555645524e-06, "loss": 0.3514, "step": 2207 }, { "epoch": 0.31660453111557213, "grad_norm": 0.3853025436401367, "learning_rate": 9.999079619095608e-06, "loss": 0.3467, "step": 2208 }, { "epoch": 0.3167479208488672, "grad_norm": 0.4317592680454254, "learning_rate": 9.999063543379006e-06, "loss": 0.3713, "step": 2209 }, { "epoch": 0.3168913105821623, "grad_norm": 0.39607495069503784, "learning_rate": 9.999047328496164e-06, "loss": 0.3573, "step": 2210 }, { "epoch": 0.31703470031545744, "grad_norm": 0.4427928626537323, "learning_rate": 9.999030974447535e-06, "loss": 0.3614, "step": 2211 }, { "epoch": 0.3171780900487525, "grad_norm": 0.38567402958869934, "learning_rate": 9.999014481233575e-06, "loss": 0.3577, "step": 2212 }, { "epoch": 0.3173214797820476, "grad_norm": 0.4198855757713318, "learning_rate": 9.99899784885474e-06, "loss": 0.3636, "step": 2213 }, { "epoch": 0.3174648695153427, "grad_norm": 0.45080044865608215, "learning_rate": 9.998981077311496e-06, "loss": 0.357, "step": 2214 }, { "epoch": 0.3176082592486378, "grad_norm": 0.3929312229156494, "learning_rate": 9.998964166604308e-06, "loss": 0.336, "step": 2215 }, { "epoch": 0.31775164898193287, "grad_norm": 0.41810542345046997, "learning_rate": 9.998947116733648e-06, "loss": 0.3983, "step": 2216 }, { "epoch": 0.317895038715228, "grad_norm": 0.4327666461467743, "learning_rate": 9.998929927699989e-06, "loss": 0.3756, "step": 2217 }, { "epoch": 0.3180384284485231, "grad_norm": 0.3650498390197754, "learning_rate": 9.99891259950381e-06, "loss": 0.3677, "step": 2218 }, { "epoch": 0.3181818181818182, "grad_norm": 0.4228907525539398, "learning_rate": 9.998895132145596e-06, "loss": 0.36, "step": 2219 }, { "epoch": 0.31832520791511326, "grad_norm": 0.4274291694164276, "learning_rate": 9.99887752562583e-06, "loss": 0.3524, "step": 2220 }, { "epoch": 0.31846859764840835, "grad_norm": 0.3916853666305542, "learning_rate": 9.998859779945005e-06, "loss": 0.3648, "step": 2221 }, { "epoch": 0.3186119873817035, "grad_norm": 0.4174906313419342, "learning_rate": 9.99884189510361e-06, "loss": 0.3907, "step": 2222 }, { "epoch": 0.3187553771149986, "grad_norm": 0.46875327825546265, "learning_rate": 9.99882387110215e-06, "loss": 0.3661, "step": 2223 }, { "epoch": 0.31889876684829366, "grad_norm": 0.42952603101730347, "learning_rate": 9.998805707941122e-06, "loss": 0.356, "step": 2224 }, { "epoch": 0.31904215658158874, "grad_norm": 0.44190752506256104, "learning_rate": 9.998787405621032e-06, "loss": 0.3539, "step": 2225 }, { "epoch": 0.31918554631488383, "grad_norm": 0.4532366394996643, "learning_rate": 9.998768964142392e-06, "loss": 0.3556, "step": 2226 }, { "epoch": 0.31932893604817897, "grad_norm": 0.41239097714424133, "learning_rate": 9.998750383505712e-06, "loss": 0.3583, "step": 2227 }, { "epoch": 0.31947232578147405, "grad_norm": 0.45211538672447205, "learning_rate": 9.99873166371151e-06, "loss": 0.3588, "step": 2228 }, { "epoch": 0.31961571551476914, "grad_norm": 0.47616398334503174, "learning_rate": 9.998712804760309e-06, "loss": 0.36, "step": 2229 }, { "epoch": 0.3197591052480642, "grad_norm": 0.4007568955421448, "learning_rate": 9.998693806652632e-06, "loss": 0.3748, "step": 2230 }, { "epoch": 0.3199024949813593, "grad_norm": 0.4083974361419678, "learning_rate": 9.99867466938901e-06, "loss": 0.3493, "step": 2231 }, { "epoch": 0.32004588471465445, "grad_norm": 0.42137283086776733, "learning_rate": 9.998655392969975e-06, "loss": 0.3753, "step": 2232 }, { "epoch": 0.32018927444794953, "grad_norm": 0.4390164315700531, "learning_rate": 9.998635977396063e-06, "loss": 0.3627, "step": 2233 }, { "epoch": 0.3203326641812446, "grad_norm": 0.3882579505443573, "learning_rate": 9.998616422667813e-06, "loss": 0.3481, "step": 2234 }, { "epoch": 0.3204760539145397, "grad_norm": 0.4203140139579773, "learning_rate": 9.998596728785772e-06, "loss": 0.3589, "step": 2235 }, { "epoch": 0.3206194436478348, "grad_norm": 0.4505907893180847, "learning_rate": 9.99857689575049e-06, "loss": 0.3779, "step": 2236 }, { "epoch": 0.32076283338112993, "grad_norm": 0.4258905351161957, "learning_rate": 9.998556923562512e-06, "loss": 0.3941, "step": 2237 }, { "epoch": 0.320906223114425, "grad_norm": 0.44969210028648376, "learning_rate": 9.998536812222401e-06, "loss": 0.3395, "step": 2238 }, { "epoch": 0.3210496128477201, "grad_norm": 0.3640768229961395, "learning_rate": 9.998516561730715e-06, "loss": 0.3559, "step": 2239 }, { "epoch": 0.3211930025810152, "grad_norm": 0.4742951989173889, "learning_rate": 9.998496172088016e-06, "loss": 0.3538, "step": 2240 }, { "epoch": 0.32133639231431027, "grad_norm": 0.3975312411785126, "learning_rate": 9.998475643294873e-06, "loss": 0.3543, "step": 2241 }, { "epoch": 0.3214797820476054, "grad_norm": 0.41644325852394104, "learning_rate": 9.998454975351857e-06, "loss": 0.3567, "step": 2242 }, { "epoch": 0.3216231717809005, "grad_norm": 0.401447594165802, "learning_rate": 9.998434168259543e-06, "loss": 0.3645, "step": 2243 }, { "epoch": 0.3217665615141956, "grad_norm": 0.4010425806045532, "learning_rate": 9.99841322201851e-06, "loss": 0.3628, "step": 2244 }, { "epoch": 0.32190995124749067, "grad_norm": 0.45024821162223816, "learning_rate": 9.998392136629344e-06, "loss": 0.3745, "step": 2245 }, { "epoch": 0.32205334098078575, "grad_norm": 0.3887880742549896, "learning_rate": 9.99837091209263e-06, "loss": 0.3629, "step": 2246 }, { "epoch": 0.3221967307140809, "grad_norm": 0.46887144446372986, "learning_rate": 9.998349548408959e-06, "loss": 0.3886, "step": 2247 }, { "epoch": 0.322340120447376, "grad_norm": 0.4060555100440979, "learning_rate": 9.998328045578923e-06, "loss": 0.3997, "step": 2248 }, { "epoch": 0.32248351018067106, "grad_norm": 0.4143604338169098, "learning_rate": 9.998306403603126e-06, "loss": 0.3833, "step": 2249 }, { "epoch": 0.32262689991396615, "grad_norm": 0.398350328207016, "learning_rate": 9.998284622482165e-06, "loss": 0.3611, "step": 2250 }, { "epoch": 0.32277028964726123, "grad_norm": 0.4250696897506714, "learning_rate": 9.99826270221665e-06, "loss": 0.3638, "step": 2251 }, { "epoch": 0.3229136793805564, "grad_norm": 0.42599913477897644, "learning_rate": 9.99824064280719e-06, "loss": 0.3718, "step": 2252 }, { "epoch": 0.32305706911385146, "grad_norm": 0.37193357944488525, "learning_rate": 9.9982184442544e-06, "loss": 0.3479, "step": 2253 }, { "epoch": 0.32320045884714654, "grad_norm": 0.47157466411590576, "learning_rate": 9.998196106558896e-06, "loss": 0.3663, "step": 2254 }, { "epoch": 0.32334384858044163, "grad_norm": 0.4764041304588318, "learning_rate": 9.998173629721301e-06, "loss": 0.3588, "step": 2255 }, { "epoch": 0.3234872383137367, "grad_norm": 0.4230290353298187, "learning_rate": 9.998151013742242e-06, "loss": 0.3773, "step": 2256 }, { "epoch": 0.32363062804703185, "grad_norm": 0.48977017402648926, "learning_rate": 9.998128258622345e-06, "loss": 0.3677, "step": 2257 }, { "epoch": 0.32377401778032694, "grad_norm": 0.44534626603126526, "learning_rate": 9.99810536436225e-06, "loss": 0.3589, "step": 2258 }, { "epoch": 0.323917407513622, "grad_norm": 0.41534924507141113, "learning_rate": 9.998082330962587e-06, "loss": 0.3513, "step": 2259 }, { "epoch": 0.3240607972469171, "grad_norm": 0.39225029945373535, "learning_rate": 9.998059158424e-06, "loss": 0.3793, "step": 2260 }, { "epoch": 0.3242041869802122, "grad_norm": 0.3963901996612549, "learning_rate": 9.998035846747137e-06, "loss": 0.357, "step": 2261 }, { "epoch": 0.32434757671350734, "grad_norm": 0.40494439005851746, "learning_rate": 9.998012395932642e-06, "loss": 0.3522, "step": 2262 }, { "epoch": 0.3244909664468024, "grad_norm": 0.38777706027030945, "learning_rate": 9.99798880598117e-06, "loss": 0.3562, "step": 2263 }, { "epoch": 0.3246343561800975, "grad_norm": 0.3753910958766937, "learning_rate": 9.99796507689338e-06, "loss": 0.3611, "step": 2264 }, { "epoch": 0.3247777459133926, "grad_norm": 0.38259604573249817, "learning_rate": 9.997941208669931e-06, "loss": 0.3733, "step": 2265 }, { "epoch": 0.3249211356466877, "grad_norm": 0.40218082070350647, "learning_rate": 9.997917201311488e-06, "loss": 0.3364, "step": 2266 }, { "epoch": 0.3250645253799828, "grad_norm": 0.45981365442276, "learning_rate": 9.997893054818714e-06, "loss": 0.3746, "step": 2267 }, { "epoch": 0.3252079151132779, "grad_norm": 0.4019639790058136, "learning_rate": 9.997868769192288e-06, "loss": 0.3788, "step": 2268 }, { "epoch": 0.325351304846573, "grad_norm": 0.4143150746822357, "learning_rate": 9.997844344432883e-06, "loss": 0.3625, "step": 2269 }, { "epoch": 0.32549469457986807, "grad_norm": 0.40799012780189514, "learning_rate": 9.99781978054118e-06, "loss": 0.3574, "step": 2270 }, { "epoch": 0.32563808431316316, "grad_norm": 0.4457662105560303, "learning_rate": 9.997795077517864e-06, "loss": 0.3734, "step": 2271 }, { "epoch": 0.3257814740464583, "grad_norm": 0.4259621500968933, "learning_rate": 9.99777023536362e-06, "loss": 0.354, "step": 2272 }, { "epoch": 0.3259248637797534, "grad_norm": 0.4349175691604614, "learning_rate": 9.99774525407914e-06, "loss": 0.3595, "step": 2273 }, { "epoch": 0.32606825351304847, "grad_norm": 0.39196208119392395, "learning_rate": 9.99772013366512e-06, "loss": 0.3553, "step": 2274 }, { "epoch": 0.32621164324634355, "grad_norm": 0.4095393717288971, "learning_rate": 9.99769487412226e-06, "loss": 0.3581, "step": 2275 }, { "epoch": 0.32635503297963864, "grad_norm": 0.38377776741981506, "learning_rate": 9.997669475451261e-06, "loss": 0.3695, "step": 2276 }, { "epoch": 0.3264984227129338, "grad_norm": 0.4353821277618408, "learning_rate": 9.997643937652833e-06, "loss": 0.3952, "step": 2277 }, { "epoch": 0.32664181244622886, "grad_norm": 0.4407728314399719, "learning_rate": 9.997618260727686e-06, "loss": 0.3753, "step": 2278 }, { "epoch": 0.32678520217952395, "grad_norm": 0.36725491285324097, "learning_rate": 9.997592444676534e-06, "loss": 0.3608, "step": 2279 }, { "epoch": 0.32692859191281903, "grad_norm": 0.4161078929901123, "learning_rate": 9.997566489500097e-06, "loss": 0.374, "step": 2280 }, { "epoch": 0.3270719816461141, "grad_norm": 0.4521099030971527, "learning_rate": 9.997540395199095e-06, "loss": 0.3458, "step": 2281 }, { "epoch": 0.32721537137940926, "grad_norm": 0.4140458106994629, "learning_rate": 9.997514161774257e-06, "loss": 0.3691, "step": 2282 }, { "epoch": 0.32735876111270434, "grad_norm": 0.4506133496761322, "learning_rate": 9.997487789226313e-06, "loss": 0.3718, "step": 2283 }, { "epoch": 0.32750215084599943, "grad_norm": 0.4299091696739197, "learning_rate": 9.997461277555994e-06, "loss": 0.3753, "step": 2284 }, { "epoch": 0.3276455405792945, "grad_norm": 0.4243447482585907, "learning_rate": 9.997434626764042e-06, "loss": 0.3286, "step": 2285 }, { "epoch": 0.3277889303125896, "grad_norm": 0.44536346197128296, "learning_rate": 9.997407836851199e-06, "loss": 0.3879, "step": 2286 }, { "epoch": 0.32793232004588474, "grad_norm": 0.4230124056339264, "learning_rate": 9.997380907818207e-06, "loss": 0.3524, "step": 2287 }, { "epoch": 0.3280757097791798, "grad_norm": 0.41999584436416626, "learning_rate": 9.99735383966582e-06, "loss": 0.3972, "step": 2288 }, { "epoch": 0.3282190995124749, "grad_norm": 0.3939589858055115, "learning_rate": 9.997326632394788e-06, "loss": 0.3836, "step": 2289 }, { "epoch": 0.32836248924577, "grad_norm": 0.36398738622665405, "learning_rate": 9.997299286005871e-06, "loss": 0.3482, "step": 2290 }, { "epoch": 0.3285058789790651, "grad_norm": 0.417034387588501, "learning_rate": 9.997271800499828e-06, "loss": 0.3545, "step": 2291 }, { "epoch": 0.3286492687123602, "grad_norm": 0.44866394996643066, "learning_rate": 9.997244175877425e-06, "loss": 0.3835, "step": 2292 }, { "epoch": 0.3287926584456553, "grad_norm": 0.37571749091148376, "learning_rate": 9.997216412139432e-06, "loss": 0.3852, "step": 2293 }, { "epoch": 0.3289360481789504, "grad_norm": 0.3959265351295471, "learning_rate": 9.997188509286622e-06, "loss": 0.3649, "step": 2294 }, { "epoch": 0.3290794379122455, "grad_norm": 0.3959076404571533, "learning_rate": 9.99716046731977e-06, "loss": 0.3841, "step": 2295 }, { "epoch": 0.32922282764554056, "grad_norm": 0.38178685307502747, "learning_rate": 9.997132286239657e-06, "loss": 0.3551, "step": 2296 }, { "epoch": 0.3293662173788357, "grad_norm": 0.4035073220729828, "learning_rate": 9.997103966047071e-06, "loss": 0.3659, "step": 2297 }, { "epoch": 0.3295096071121308, "grad_norm": 0.38504743576049805, "learning_rate": 9.997075506742795e-06, "loss": 0.3481, "step": 2298 }, { "epoch": 0.32965299684542587, "grad_norm": 0.38347113132476807, "learning_rate": 9.997046908327623e-06, "loss": 0.3727, "step": 2299 }, { "epoch": 0.32979638657872096, "grad_norm": 0.38828936219215393, "learning_rate": 9.997018170802354e-06, "loss": 0.3485, "step": 2300 }, { "epoch": 0.32993977631201604, "grad_norm": 0.3885740339756012, "learning_rate": 9.996989294167786e-06, "loss": 0.3705, "step": 2301 }, { "epoch": 0.3300831660453112, "grad_norm": 0.4089291989803314, "learning_rate": 9.996960278424724e-06, "loss": 0.36, "step": 2302 }, { "epoch": 0.33022655577860627, "grad_norm": 0.40176668763160706, "learning_rate": 9.996931123573973e-06, "loss": 0.3709, "step": 2303 }, { "epoch": 0.33036994551190135, "grad_norm": 0.3864276111125946, "learning_rate": 9.996901829616346e-06, "loss": 0.3782, "step": 2304 }, { "epoch": 0.33051333524519644, "grad_norm": 0.42801594734191895, "learning_rate": 9.996872396552658e-06, "loss": 0.3648, "step": 2305 }, { "epoch": 0.3306567249784915, "grad_norm": 0.3720390498638153, "learning_rate": 9.99684282438373e-06, "loss": 0.3606, "step": 2306 }, { "epoch": 0.33080011471178666, "grad_norm": 0.42254650592803955, "learning_rate": 9.996813113110384e-06, "loss": 0.3714, "step": 2307 }, { "epoch": 0.33094350444508175, "grad_norm": 0.37948861718177795, "learning_rate": 9.996783262733448e-06, "loss": 0.3581, "step": 2308 }, { "epoch": 0.33108689417837683, "grad_norm": 0.40497758984565735, "learning_rate": 9.996753273253752e-06, "loss": 0.3821, "step": 2309 }, { "epoch": 0.3312302839116719, "grad_norm": 0.3997814953327179, "learning_rate": 9.996723144672133e-06, "loss": 0.3682, "step": 2310 }, { "epoch": 0.331373673644967, "grad_norm": 0.4209980368614197, "learning_rate": 9.996692876989426e-06, "loss": 0.3728, "step": 2311 }, { "epoch": 0.3315170633782621, "grad_norm": 0.38874194025993347, "learning_rate": 9.996662470206478e-06, "loss": 0.3564, "step": 2312 }, { "epoch": 0.33166045311155723, "grad_norm": 0.4614187777042389, "learning_rate": 9.99663192432413e-06, "loss": 0.3602, "step": 2313 }, { "epoch": 0.3318038428448523, "grad_norm": 0.36420708894729614, "learning_rate": 9.996601239343238e-06, "loss": 0.3403, "step": 2314 }, { "epoch": 0.3319472325781474, "grad_norm": 0.43920716643333435, "learning_rate": 9.996570415264653e-06, "loss": 0.3646, "step": 2315 }, { "epoch": 0.3320906223114425, "grad_norm": 0.4919252395629883, "learning_rate": 9.996539452089233e-06, "loss": 0.3908, "step": 2316 }, { "epoch": 0.33223401204473757, "grad_norm": 0.41397491097450256, "learning_rate": 9.996508349817841e-06, "loss": 0.3584, "step": 2317 }, { "epoch": 0.3323774017780327, "grad_norm": 0.42312127351760864, "learning_rate": 9.996477108451343e-06, "loss": 0.375, "step": 2318 }, { "epoch": 0.3325207915113278, "grad_norm": 0.429688423871994, "learning_rate": 9.996445727990608e-06, "loss": 0.3551, "step": 2319 }, { "epoch": 0.3326641812446229, "grad_norm": 0.47813743352890015, "learning_rate": 9.99641420843651e-06, "loss": 0.3493, "step": 2320 }, { "epoch": 0.33280757097791797, "grad_norm": 0.4576146900653839, "learning_rate": 9.996382549789927e-06, "loss": 0.3637, "step": 2321 }, { "epoch": 0.33295096071121305, "grad_norm": 0.4088260531425476, "learning_rate": 9.996350752051739e-06, "loss": 0.3493, "step": 2322 }, { "epoch": 0.3330943504445082, "grad_norm": 0.45948073267936707, "learning_rate": 9.996318815222832e-06, "loss": 0.3433, "step": 2323 }, { "epoch": 0.3332377401778033, "grad_norm": 0.4425239861011505, "learning_rate": 9.996286739304095e-06, "loss": 0.356, "step": 2324 }, { "epoch": 0.33338112991109836, "grad_norm": 0.4370100796222687, "learning_rate": 9.99625452429642e-06, "loss": 0.382, "step": 2325 }, { "epoch": 0.33352451964439345, "grad_norm": 0.4037180542945862, "learning_rate": 9.996222170200706e-06, "loss": 0.3701, "step": 2326 }, { "epoch": 0.33366790937768853, "grad_norm": 0.3410704731941223, "learning_rate": 9.996189677017854e-06, "loss": 0.354, "step": 2327 }, { "epoch": 0.3338112991109837, "grad_norm": 0.3887995779514313, "learning_rate": 9.996157044748764e-06, "loss": 0.3548, "step": 2328 }, { "epoch": 0.33395468884427876, "grad_norm": 0.389787495136261, "learning_rate": 9.996124273394349e-06, "loss": 0.3691, "step": 2329 }, { "epoch": 0.33409807857757384, "grad_norm": 0.46247366070747375, "learning_rate": 9.99609136295552e-06, "loss": 0.3545, "step": 2330 }, { "epoch": 0.3342414683108689, "grad_norm": 0.43353137373924255, "learning_rate": 9.99605831343319e-06, "loss": 0.3492, "step": 2331 }, { "epoch": 0.334384858044164, "grad_norm": 0.4440949261188507, "learning_rate": 9.996025124828284e-06, "loss": 0.3632, "step": 2332 }, { "epoch": 0.33452824777745915, "grad_norm": 0.40191957354545593, "learning_rate": 9.995991797141725e-06, "loss": 0.3593, "step": 2333 }, { "epoch": 0.33467163751075424, "grad_norm": 0.4058208763599396, "learning_rate": 9.995958330374438e-06, "loss": 0.3709, "step": 2334 }, { "epoch": 0.3348150272440493, "grad_norm": 0.37891077995300293, "learning_rate": 9.995924724527357e-06, "loss": 0.372, "step": 2335 }, { "epoch": 0.3349584169773444, "grad_norm": 0.40710386633872986, "learning_rate": 9.995890979601416e-06, "loss": 0.3571, "step": 2336 }, { "epoch": 0.3351018067106395, "grad_norm": 0.41987141966819763, "learning_rate": 9.995857095597556e-06, "loss": 0.3707, "step": 2337 }, { "epoch": 0.33524519644393463, "grad_norm": 0.3829411566257477, "learning_rate": 9.99582307251672e-06, "loss": 0.3851, "step": 2338 }, { "epoch": 0.3353885861772297, "grad_norm": 0.4335362911224365, "learning_rate": 9.995788910359853e-06, "loss": 0.3682, "step": 2339 }, { "epoch": 0.3355319759105248, "grad_norm": 0.4171055853366852, "learning_rate": 9.99575460912791e-06, "loss": 0.3652, "step": 2340 }, { "epoch": 0.3356753656438199, "grad_norm": 0.45081931352615356, "learning_rate": 9.995720168821843e-06, "loss": 0.3556, "step": 2341 }, { "epoch": 0.335818755377115, "grad_norm": 0.41413843631744385, "learning_rate": 9.99568558944261e-06, "loss": 0.373, "step": 2342 }, { "epoch": 0.3359621451104101, "grad_norm": 0.3852144181728363, "learning_rate": 9.995650870991176e-06, "loss": 0.3688, "step": 2343 }, { "epoch": 0.3361055348437052, "grad_norm": 0.43798285722732544, "learning_rate": 9.995616013468505e-06, "loss": 0.3699, "step": 2344 }, { "epoch": 0.3362489245770003, "grad_norm": 0.45333996415138245, "learning_rate": 9.99558101687557e-06, "loss": 0.3648, "step": 2345 }, { "epoch": 0.33639231431029537, "grad_norm": 0.38771864771842957, "learning_rate": 9.995545881213345e-06, "loss": 0.3629, "step": 2346 }, { "epoch": 0.33653570404359046, "grad_norm": 0.4105449318885803, "learning_rate": 9.995510606482807e-06, "loss": 0.3841, "step": 2347 }, { "epoch": 0.3366790937768856, "grad_norm": 0.3907724916934967, "learning_rate": 9.995475192684938e-06, "loss": 0.3473, "step": 2348 }, { "epoch": 0.3368224835101807, "grad_norm": 0.38136130571365356, "learning_rate": 9.995439639820724e-06, "loss": 0.3657, "step": 2349 }, { "epoch": 0.33696587324347577, "grad_norm": 0.37086883187294006, "learning_rate": 9.995403947891155e-06, "loss": 0.3706, "step": 2350 }, { "epoch": 0.33710926297677085, "grad_norm": 0.3836742341518402, "learning_rate": 9.995368116897223e-06, "loss": 0.363, "step": 2351 }, { "epoch": 0.33725265271006594, "grad_norm": 0.3997032642364502, "learning_rate": 9.995332146839929e-06, "loss": 0.3646, "step": 2352 }, { "epoch": 0.3373960424433611, "grad_norm": 0.3972349762916565, "learning_rate": 9.99529603772027e-06, "loss": 0.3598, "step": 2353 }, { "epoch": 0.33753943217665616, "grad_norm": 0.397206574678421, "learning_rate": 9.995259789539256e-06, "loss": 0.3482, "step": 2354 }, { "epoch": 0.33768282190995125, "grad_norm": 0.3813822567462921, "learning_rate": 9.995223402297893e-06, "loss": 0.3696, "step": 2355 }, { "epoch": 0.33782621164324633, "grad_norm": 0.40786001086235046, "learning_rate": 9.995186875997194e-06, "loss": 0.3743, "step": 2356 }, { "epoch": 0.3379696013765414, "grad_norm": 0.42544934153556824, "learning_rate": 9.995150210638177e-06, "loss": 0.3491, "step": 2357 }, { "epoch": 0.33811299110983656, "grad_norm": 0.37694400548934937, "learning_rate": 9.99511340622186e-06, "loss": 0.3552, "step": 2358 }, { "epoch": 0.33825638084313164, "grad_norm": 0.417901873588562, "learning_rate": 9.995076462749272e-06, "loss": 0.3746, "step": 2359 }, { "epoch": 0.33839977057642673, "grad_norm": 0.45788222551345825, "learning_rate": 9.995039380221438e-06, "loss": 0.3638, "step": 2360 }, { "epoch": 0.3385431603097218, "grad_norm": 0.38315150141716003, "learning_rate": 9.995002158639393e-06, "loss": 0.3574, "step": 2361 }, { "epoch": 0.3386865500430169, "grad_norm": 0.39424628019332886, "learning_rate": 9.994964798004171e-06, "loss": 0.3788, "step": 2362 }, { "epoch": 0.33882993977631204, "grad_norm": 0.3935348689556122, "learning_rate": 9.994927298316812e-06, "loss": 0.3809, "step": 2363 }, { "epoch": 0.3389733295096071, "grad_norm": 0.4558994472026825, "learning_rate": 9.994889659578362e-06, "loss": 0.3612, "step": 2364 }, { "epoch": 0.3391167192429022, "grad_norm": 0.41590312123298645, "learning_rate": 9.994851881789866e-06, "loss": 0.3734, "step": 2365 }, { "epoch": 0.3392601089761973, "grad_norm": 0.3883981704711914, "learning_rate": 9.994813964952377e-06, "loss": 0.3734, "step": 2366 }, { "epoch": 0.3394034987094924, "grad_norm": 0.44678935408592224, "learning_rate": 9.994775909066951e-06, "loss": 0.3738, "step": 2367 }, { "epoch": 0.3395468884427875, "grad_norm": 0.41585397720336914, "learning_rate": 9.994737714134647e-06, "loss": 0.3608, "step": 2368 }, { "epoch": 0.3396902781760826, "grad_norm": 0.36834216117858887, "learning_rate": 9.99469938015653e-06, "loss": 0.3337, "step": 2369 }, { "epoch": 0.3398336679093777, "grad_norm": 0.39116719365119934, "learning_rate": 9.994660907133663e-06, "loss": 0.3433, "step": 2370 }, { "epoch": 0.3399770576426728, "grad_norm": 0.44866812229156494, "learning_rate": 9.994622295067122e-06, "loss": 0.3456, "step": 2371 }, { "epoch": 0.34012044737596786, "grad_norm": 0.42403343319892883, "learning_rate": 9.994583543957978e-06, "loss": 0.3476, "step": 2372 }, { "epoch": 0.340263837109263, "grad_norm": 0.3960195481777191, "learning_rate": 9.994544653807313e-06, "loss": 0.3674, "step": 2373 }, { "epoch": 0.3404072268425581, "grad_norm": 0.44215142726898193, "learning_rate": 9.994505624616206e-06, "loss": 0.3389, "step": 2374 }, { "epoch": 0.34055061657585317, "grad_norm": 0.45002153515815735, "learning_rate": 9.994466456385746e-06, "loss": 0.3684, "step": 2375 }, { "epoch": 0.34069400630914826, "grad_norm": 0.41484636068344116, "learning_rate": 9.99442714911702e-06, "loss": 0.3744, "step": 2376 }, { "epoch": 0.34083739604244334, "grad_norm": 0.38452351093292236, "learning_rate": 9.994387702811129e-06, "loss": 0.3536, "step": 2377 }, { "epoch": 0.3409807857757385, "grad_norm": 0.3938508629798889, "learning_rate": 9.994348117469167e-06, "loss": 0.3798, "step": 2378 }, { "epoch": 0.34112417550903357, "grad_norm": 0.4279032051563263, "learning_rate": 9.994308393092233e-06, "loss": 0.3504, "step": 2379 }, { "epoch": 0.34126756524232865, "grad_norm": 0.4086548388004303, "learning_rate": 9.99426852968144e-06, "loss": 0.3787, "step": 2380 }, { "epoch": 0.34141095497562374, "grad_norm": 0.36660876870155334, "learning_rate": 9.99422852723789e-06, "loss": 0.3724, "step": 2381 }, { "epoch": 0.3415543447089188, "grad_norm": 0.39206814765930176, "learning_rate": 9.994188385762703e-06, "loss": 0.3664, "step": 2382 }, { "epoch": 0.34169773444221396, "grad_norm": 0.37761908769607544, "learning_rate": 9.994148105256994e-06, "loss": 0.3734, "step": 2383 }, { "epoch": 0.34184112417550905, "grad_norm": 0.38734808564186096, "learning_rate": 9.994107685721883e-06, "loss": 0.3457, "step": 2384 }, { "epoch": 0.34198451390880413, "grad_norm": 0.3959967792034149, "learning_rate": 9.994067127158495e-06, "loss": 0.3516, "step": 2385 }, { "epoch": 0.3421279036420992, "grad_norm": 0.4017956554889679, "learning_rate": 9.994026429567964e-06, "loss": 0.3681, "step": 2386 }, { "epoch": 0.3422712933753943, "grad_norm": 0.39361828565597534, "learning_rate": 9.993985592951417e-06, "loss": 0.3636, "step": 2387 }, { "epoch": 0.34241468310868944, "grad_norm": 0.44865527749061584, "learning_rate": 9.993944617309993e-06, "loss": 0.361, "step": 2388 }, { "epoch": 0.34255807284198453, "grad_norm": 0.383803129196167, "learning_rate": 9.993903502644836e-06, "loss": 0.3423, "step": 2389 }, { "epoch": 0.3427014625752796, "grad_norm": 0.3970804810523987, "learning_rate": 9.993862248957083e-06, "loss": 0.3428, "step": 2390 }, { "epoch": 0.3428448523085747, "grad_norm": 0.4196261763572693, "learning_rate": 9.99382085624789e-06, "loss": 0.3605, "step": 2391 }, { "epoch": 0.3429882420418698, "grad_norm": 0.40038228034973145, "learning_rate": 9.993779324518405e-06, "loss": 0.3494, "step": 2392 }, { "epoch": 0.3431316317751649, "grad_norm": 0.41412660479545593, "learning_rate": 9.993737653769787e-06, "loss": 0.3387, "step": 2393 }, { "epoch": 0.34327502150846, "grad_norm": 0.384267657995224, "learning_rate": 9.993695844003194e-06, "loss": 0.3618, "step": 2394 }, { "epoch": 0.3434184112417551, "grad_norm": 0.42850613594055176, "learning_rate": 9.99365389521979e-06, "loss": 0.3472, "step": 2395 }, { "epoch": 0.3435618009750502, "grad_norm": 0.4147367775440216, "learning_rate": 9.993611807420742e-06, "loss": 0.3782, "step": 2396 }, { "epoch": 0.34370519070834527, "grad_norm": 0.431022971868515, "learning_rate": 9.993569580607225e-06, "loss": 0.3726, "step": 2397 }, { "epoch": 0.3438485804416404, "grad_norm": 0.39340153336524963, "learning_rate": 9.993527214780412e-06, "loss": 0.354, "step": 2398 }, { "epoch": 0.3439919701749355, "grad_norm": 0.428225576877594, "learning_rate": 9.993484709941483e-06, "loss": 0.3321, "step": 2399 }, { "epoch": 0.3441353599082306, "grad_norm": 0.4274637699127197, "learning_rate": 9.993442066091619e-06, "loss": 0.354, "step": 2400 }, { "epoch": 0.34427874964152566, "grad_norm": 0.4330657720565796, "learning_rate": 9.99339928323201e-06, "loss": 0.3702, "step": 2401 }, { "epoch": 0.34442213937482075, "grad_norm": 0.3914915919303894, "learning_rate": 9.993356361363848e-06, "loss": 0.3516, "step": 2402 }, { "epoch": 0.3445655291081159, "grad_norm": 0.41771993041038513, "learning_rate": 9.993313300488325e-06, "loss": 0.3604, "step": 2403 }, { "epoch": 0.34470891884141097, "grad_norm": 0.3573532700538635, "learning_rate": 9.993270100606643e-06, "loss": 0.3371, "step": 2404 }, { "epoch": 0.34485230857470606, "grad_norm": 0.3689262568950653, "learning_rate": 9.99322676172e-06, "loss": 0.3966, "step": 2405 }, { "epoch": 0.34499569830800114, "grad_norm": 0.3909642696380615, "learning_rate": 9.993183283829605e-06, "loss": 0.3675, "step": 2406 }, { "epoch": 0.3451390880412962, "grad_norm": 0.37662649154663086, "learning_rate": 9.993139666936669e-06, "loss": 0.3665, "step": 2407 }, { "epoch": 0.3452824777745913, "grad_norm": 0.44739001989364624, "learning_rate": 9.993095911042406e-06, "loss": 0.3613, "step": 2408 }, { "epoch": 0.34542586750788645, "grad_norm": 0.36593708395957947, "learning_rate": 9.993052016148032e-06, "loss": 0.3637, "step": 2409 }, { "epoch": 0.34556925724118154, "grad_norm": 0.39779552817344666, "learning_rate": 9.993007982254772e-06, "loss": 0.3689, "step": 2410 }, { "epoch": 0.3457126469744766, "grad_norm": 0.40056532621383667, "learning_rate": 9.99296380936385e-06, "loss": 0.3578, "step": 2411 }, { "epoch": 0.3458560367077717, "grad_norm": 0.3958064615726471, "learning_rate": 9.992919497476496e-06, "loss": 0.3638, "step": 2412 }, { "epoch": 0.3459994264410668, "grad_norm": 0.41784146428108215, "learning_rate": 9.992875046593945e-06, "loss": 0.3867, "step": 2413 }, { "epoch": 0.34614281617436193, "grad_norm": 0.3668009638786316, "learning_rate": 9.992830456717432e-06, "loss": 0.3628, "step": 2414 }, { "epoch": 0.346286205907657, "grad_norm": 0.3913682997226715, "learning_rate": 9.9927857278482e-06, "loss": 0.3695, "step": 2415 }, { "epoch": 0.3464295956409521, "grad_norm": 0.4058169722557068, "learning_rate": 9.992740859987492e-06, "loss": 0.3667, "step": 2416 }, { "epoch": 0.3465729853742472, "grad_norm": 0.4244973659515381, "learning_rate": 9.99269585313656e-06, "loss": 0.3939, "step": 2417 }, { "epoch": 0.3467163751075423, "grad_norm": 0.36008375883102417, "learning_rate": 9.992650707296656e-06, "loss": 0.3696, "step": 2418 }, { "epoch": 0.3468597648408374, "grad_norm": 0.39166614413261414, "learning_rate": 9.992605422469038e-06, "loss": 0.3664, "step": 2419 }, { "epoch": 0.3470031545741325, "grad_norm": 0.38773322105407715, "learning_rate": 9.992559998654963e-06, "loss": 0.3629, "step": 2420 }, { "epoch": 0.3471465443074276, "grad_norm": 0.42262208461761475, "learning_rate": 9.992514435855699e-06, "loss": 0.363, "step": 2421 }, { "epoch": 0.34728993404072267, "grad_norm": 0.458135187625885, "learning_rate": 9.99246873407251e-06, "loss": 0.3628, "step": 2422 }, { "epoch": 0.34743332377401775, "grad_norm": 0.3604334592819214, "learning_rate": 9.992422893306674e-06, "loss": 0.3369, "step": 2423 }, { "epoch": 0.3475767135073129, "grad_norm": 0.41815444827079773, "learning_rate": 9.992376913559463e-06, "loss": 0.3714, "step": 2424 }, { "epoch": 0.347720103240608, "grad_norm": 0.4169529974460602, "learning_rate": 9.99233079483216e-06, "loss": 0.3438, "step": 2425 }, { "epoch": 0.34786349297390307, "grad_norm": 0.38260194659233093, "learning_rate": 9.992284537126047e-06, "loss": 0.359, "step": 2426 }, { "epoch": 0.34800688270719815, "grad_norm": 0.4273105561733246, "learning_rate": 9.992238140442411e-06, "loss": 0.3865, "step": 2427 }, { "epoch": 0.34815027244049324, "grad_norm": 0.37981995940208435, "learning_rate": 9.992191604782545e-06, "loss": 0.3823, "step": 2428 }, { "epoch": 0.3482936621737884, "grad_norm": 0.3540564477443695, "learning_rate": 9.992144930147744e-06, "loss": 0.3563, "step": 2429 }, { "epoch": 0.34843705190708346, "grad_norm": 0.4343821406364441, "learning_rate": 9.992098116539308e-06, "loss": 0.3357, "step": 2430 }, { "epoch": 0.34858044164037855, "grad_norm": 0.4486740231513977, "learning_rate": 9.992051163958541e-06, "loss": 0.3706, "step": 2431 }, { "epoch": 0.34872383137367363, "grad_norm": 0.37639421224594116, "learning_rate": 9.992004072406748e-06, "loss": 0.3388, "step": 2432 }, { "epoch": 0.3488672211069687, "grad_norm": 0.4317862093448639, "learning_rate": 9.991956841885238e-06, "loss": 0.361, "step": 2433 }, { "epoch": 0.34901061084026386, "grad_norm": 0.40655550360679626, "learning_rate": 9.991909472395331e-06, "loss": 0.3673, "step": 2434 }, { "epoch": 0.34915400057355894, "grad_norm": 0.4404723048210144, "learning_rate": 9.991861963938342e-06, "loss": 0.3635, "step": 2435 }, { "epoch": 0.349297390306854, "grad_norm": 0.3722633719444275, "learning_rate": 9.991814316515597e-06, "loss": 0.3409, "step": 2436 }, { "epoch": 0.3494407800401491, "grad_norm": 0.3789752423763275, "learning_rate": 9.99176653012842e-06, "loss": 0.3477, "step": 2437 }, { "epoch": 0.3495841697734442, "grad_norm": 0.4401470720767975, "learning_rate": 9.99171860477814e-06, "loss": 0.355, "step": 2438 }, { "epoch": 0.34972755950673934, "grad_norm": 0.4359319806098938, "learning_rate": 9.991670540466093e-06, "loss": 0.3509, "step": 2439 }, { "epoch": 0.3498709492400344, "grad_norm": 0.36365023255348206, "learning_rate": 9.991622337193618e-06, "loss": 0.3486, "step": 2440 }, { "epoch": 0.3500143389733295, "grad_norm": 0.4272460341453552, "learning_rate": 9.991573994962055e-06, "loss": 0.3573, "step": 2441 }, { "epoch": 0.3501577287066246, "grad_norm": 0.5033078789710999, "learning_rate": 9.991525513772751e-06, "loss": 0.3372, "step": 2442 }, { "epoch": 0.3503011184399197, "grad_norm": 0.3633081018924713, "learning_rate": 9.991476893627054e-06, "loss": 0.3523, "step": 2443 }, { "epoch": 0.3504445081732148, "grad_norm": 0.4274482727050781, "learning_rate": 9.99142813452632e-06, "loss": 0.3366, "step": 2444 }, { "epoch": 0.3505878979065099, "grad_norm": 0.517918586730957, "learning_rate": 9.991379236471903e-06, "loss": 0.3699, "step": 2445 }, { "epoch": 0.350731287639805, "grad_norm": 0.4853438436985016, "learning_rate": 9.991330199465167e-06, "loss": 0.362, "step": 2446 }, { "epoch": 0.3508746773731001, "grad_norm": 0.3396105170249939, "learning_rate": 9.991281023507477e-06, "loss": 0.3606, "step": 2447 }, { "epoch": 0.35101806710639516, "grad_norm": 0.44767066836357117, "learning_rate": 9.991231708600201e-06, "loss": 0.3652, "step": 2448 }, { "epoch": 0.3511614568396903, "grad_norm": 0.5113663077354431, "learning_rate": 9.991182254744713e-06, "loss": 0.3621, "step": 2449 }, { "epoch": 0.3513048465729854, "grad_norm": 0.38277503848075867, "learning_rate": 9.991132661942388e-06, "loss": 0.38, "step": 2450 }, { "epoch": 0.35144823630628047, "grad_norm": 0.3994405269622803, "learning_rate": 9.991082930194607e-06, "loss": 0.3423, "step": 2451 }, { "epoch": 0.35159162603957556, "grad_norm": 0.4623209834098816, "learning_rate": 9.991033059502757e-06, "loss": 0.3348, "step": 2452 }, { "epoch": 0.35173501577287064, "grad_norm": 0.3899080455303192, "learning_rate": 9.990983049868221e-06, "loss": 0.35, "step": 2453 }, { "epoch": 0.3518784055061658, "grad_norm": 0.429127961397171, "learning_rate": 9.990932901292398e-06, "loss": 0.3677, "step": 2454 }, { "epoch": 0.35202179523946087, "grad_norm": 0.43627479672431946, "learning_rate": 9.990882613776677e-06, "loss": 0.3892, "step": 2455 }, { "epoch": 0.35216518497275595, "grad_norm": 0.4148899018764496, "learning_rate": 9.990832187322463e-06, "loss": 0.3598, "step": 2456 }, { "epoch": 0.35230857470605104, "grad_norm": 0.42561307549476624, "learning_rate": 9.99078162193116e-06, "loss": 0.3648, "step": 2457 }, { "epoch": 0.3524519644393461, "grad_norm": 0.4118078052997589, "learning_rate": 9.990730917604173e-06, "loss": 0.3453, "step": 2458 }, { "epoch": 0.35259535417264126, "grad_norm": 0.48962411284446716, "learning_rate": 9.990680074342914e-06, "loss": 0.3603, "step": 2459 }, { "epoch": 0.35273874390593635, "grad_norm": 0.36194443702697754, "learning_rate": 9.990629092148799e-06, "loss": 0.334, "step": 2460 }, { "epoch": 0.35288213363923143, "grad_norm": 0.4367932975292206, "learning_rate": 9.990577971023245e-06, "loss": 0.3622, "step": 2461 }, { "epoch": 0.3530255233725265, "grad_norm": 0.46121883392333984, "learning_rate": 9.99052671096768e-06, "loss": 0.3561, "step": 2462 }, { "epoch": 0.3531689131058216, "grad_norm": 0.3592073619365692, "learning_rate": 9.990475311983527e-06, "loss": 0.3498, "step": 2463 }, { "epoch": 0.35331230283911674, "grad_norm": 0.48306190967559814, "learning_rate": 9.990423774072217e-06, "loss": 0.3668, "step": 2464 }, { "epoch": 0.35345569257241183, "grad_norm": 0.42582371830940247, "learning_rate": 9.990372097235188e-06, "loss": 0.3561, "step": 2465 }, { "epoch": 0.3535990823057069, "grad_norm": 0.4441707730293274, "learning_rate": 9.990320281473876e-06, "loss": 0.3946, "step": 2466 }, { "epoch": 0.353742472039002, "grad_norm": 0.4687042832374573, "learning_rate": 9.990268326789723e-06, "loss": 0.3538, "step": 2467 }, { "epoch": 0.3538858617722971, "grad_norm": 0.4397154748439789, "learning_rate": 9.990216233184178e-06, "loss": 0.3819, "step": 2468 }, { "epoch": 0.3540292515055922, "grad_norm": 0.3892819881439209, "learning_rate": 9.990164000658688e-06, "loss": 0.3698, "step": 2469 }, { "epoch": 0.3541726412388873, "grad_norm": 0.4385451674461365, "learning_rate": 9.99011162921471e-06, "loss": 0.3672, "step": 2470 }, { "epoch": 0.3543160309721824, "grad_norm": 0.4121139943599701, "learning_rate": 9.990059118853699e-06, "loss": 0.3754, "step": 2471 }, { "epoch": 0.3544594207054775, "grad_norm": 0.44448718428611755, "learning_rate": 9.990006469577118e-06, "loss": 0.379, "step": 2472 }, { "epoch": 0.35460281043877256, "grad_norm": 0.3870014548301697, "learning_rate": 9.989953681386433e-06, "loss": 0.3595, "step": 2473 }, { "epoch": 0.3547462001720677, "grad_norm": 0.39537084102630615, "learning_rate": 9.989900754283115e-06, "loss": 0.3636, "step": 2474 }, { "epoch": 0.3548895899053628, "grad_norm": 0.3925938308238983, "learning_rate": 9.989847688268635e-06, "loss": 0.3482, "step": 2475 }, { "epoch": 0.3550329796386579, "grad_norm": 0.3940475881099701, "learning_rate": 9.98979448334447e-06, "loss": 0.3692, "step": 2476 }, { "epoch": 0.35517636937195296, "grad_norm": 0.3648652136325836, "learning_rate": 9.989741139512104e-06, "loss": 0.3678, "step": 2477 }, { "epoch": 0.35531975910524805, "grad_norm": 0.38207265734672546, "learning_rate": 9.98968765677302e-06, "loss": 0.3545, "step": 2478 }, { "epoch": 0.3554631488385432, "grad_norm": 0.41246241331100464, "learning_rate": 9.989634035128706e-06, "loss": 0.3708, "step": 2479 }, { "epoch": 0.35560653857183827, "grad_norm": 0.40617361664772034, "learning_rate": 9.989580274580657e-06, "loss": 0.3681, "step": 2480 }, { "epoch": 0.35574992830513336, "grad_norm": 0.3789891302585602, "learning_rate": 9.98952637513037e-06, "loss": 0.3665, "step": 2481 }, { "epoch": 0.35589331803842844, "grad_norm": 0.40523648262023926, "learning_rate": 9.989472336779343e-06, "loss": 0.3508, "step": 2482 }, { "epoch": 0.3560367077717235, "grad_norm": 0.3847601115703583, "learning_rate": 9.98941815952908e-06, "loss": 0.3591, "step": 2483 }, { "epoch": 0.35618009750501867, "grad_norm": 0.3759872615337372, "learning_rate": 9.989363843381092e-06, "loss": 0.3526, "step": 2484 }, { "epoch": 0.35632348723831375, "grad_norm": 0.3595691919326782, "learning_rate": 9.989309388336889e-06, "loss": 0.3437, "step": 2485 }, { "epoch": 0.35646687697160884, "grad_norm": 0.38753998279571533, "learning_rate": 9.989254794397986e-06, "loss": 0.3498, "step": 2486 }, { "epoch": 0.3566102667049039, "grad_norm": 0.3871791660785675, "learning_rate": 9.989200061565906e-06, "loss": 0.3441, "step": 2487 }, { "epoch": 0.356753656438199, "grad_norm": 0.41039666533470154, "learning_rate": 9.989145189842173e-06, "loss": 0.366, "step": 2488 }, { "epoch": 0.35689704617149415, "grad_norm": 0.37038445472717285, "learning_rate": 9.98909017922831e-06, "loss": 0.3575, "step": 2489 }, { "epoch": 0.35704043590478923, "grad_norm": 0.3905631899833679, "learning_rate": 9.989035029725853e-06, "loss": 0.3501, "step": 2490 }, { "epoch": 0.3571838256380843, "grad_norm": 0.3612236976623535, "learning_rate": 9.988979741336335e-06, "loss": 0.3379, "step": 2491 }, { "epoch": 0.3573272153713794, "grad_norm": 0.3829599618911743, "learning_rate": 9.988924314061295e-06, "loss": 0.3727, "step": 2492 }, { "epoch": 0.3574706051046745, "grad_norm": 0.39448636770248413, "learning_rate": 9.988868747902277e-06, "loss": 0.3642, "step": 2493 }, { "epoch": 0.35761399483796963, "grad_norm": 0.527259886264801, "learning_rate": 9.988813042860825e-06, "loss": 0.3518, "step": 2494 }, { "epoch": 0.3577573845712647, "grad_norm": 0.41947710514068604, "learning_rate": 9.988757198938494e-06, "loss": 0.3642, "step": 2495 }, { "epoch": 0.3579007743045598, "grad_norm": 0.41589176654815674, "learning_rate": 9.988701216136837e-06, "loss": 0.3464, "step": 2496 }, { "epoch": 0.3580441640378549, "grad_norm": 0.4812590181827545, "learning_rate": 9.988645094457412e-06, "loss": 0.3501, "step": 2497 }, { "epoch": 0.35818755377114997, "grad_norm": 0.37720468640327454, "learning_rate": 9.988588833901782e-06, "loss": 0.3501, "step": 2498 }, { "epoch": 0.35833094350444505, "grad_norm": 0.4885975420475006, "learning_rate": 9.988532434471512e-06, "loss": 0.3357, "step": 2499 }, { "epoch": 0.3584743332377402, "grad_norm": 0.40819844603538513, "learning_rate": 9.988475896168172e-06, "loss": 0.3676, "step": 2500 }, { "epoch": 0.3586177229710353, "grad_norm": 0.4099322557449341, "learning_rate": 9.98841921899334e-06, "loss": 0.3609, "step": 2501 }, { "epoch": 0.35876111270433036, "grad_norm": 0.4384569227695465, "learning_rate": 9.988362402948588e-06, "loss": 0.3363, "step": 2502 }, { "epoch": 0.35890450243762545, "grad_norm": 0.430990993976593, "learning_rate": 9.9883054480355e-06, "loss": 0.3592, "step": 2503 }, { "epoch": 0.35904789217092054, "grad_norm": 0.5330665111541748, "learning_rate": 9.988248354255664e-06, "loss": 0.363, "step": 2504 }, { "epoch": 0.3591912819042157, "grad_norm": 0.43890663981437683, "learning_rate": 9.988191121610664e-06, "loss": 0.3488, "step": 2505 }, { "epoch": 0.35933467163751076, "grad_norm": 0.43101489543914795, "learning_rate": 9.988133750102099e-06, "loss": 0.3521, "step": 2506 }, { "epoch": 0.35947806137080585, "grad_norm": 0.4487873613834381, "learning_rate": 9.98807623973156e-06, "loss": 0.3668, "step": 2507 }, { "epoch": 0.35962145110410093, "grad_norm": 0.38770997524261475, "learning_rate": 9.988018590500656e-06, "loss": 0.3467, "step": 2508 }, { "epoch": 0.359764840837396, "grad_norm": 0.43781065940856934, "learning_rate": 9.987960802410986e-06, "loss": 0.3561, "step": 2509 }, { "epoch": 0.35990823057069116, "grad_norm": 0.3772152364253998, "learning_rate": 9.987902875464159e-06, "loss": 0.3437, "step": 2510 }, { "epoch": 0.36005162030398624, "grad_norm": 0.4089358448982239, "learning_rate": 9.98784480966179e-06, "loss": 0.356, "step": 2511 }, { "epoch": 0.3601950100372813, "grad_norm": 0.48268499970436096, "learning_rate": 9.987786605005495e-06, "loss": 0.3721, "step": 2512 }, { "epoch": 0.3603383997705764, "grad_norm": 0.4238277077674866, "learning_rate": 9.987728261496892e-06, "loss": 0.3468, "step": 2513 }, { "epoch": 0.3604817895038715, "grad_norm": 0.407701700925827, "learning_rate": 9.987669779137606e-06, "loss": 0.3548, "step": 2514 }, { "epoch": 0.36062517923716664, "grad_norm": 0.3995974659919739, "learning_rate": 9.987611157929268e-06, "loss": 0.3441, "step": 2515 }, { "epoch": 0.3607685689704617, "grad_norm": 0.4479675590991974, "learning_rate": 9.987552397873506e-06, "loss": 0.3622, "step": 2516 }, { "epoch": 0.3609119587037568, "grad_norm": 0.39165928959846497, "learning_rate": 9.987493498971959e-06, "loss": 0.3717, "step": 2517 }, { "epoch": 0.3610553484370519, "grad_norm": 0.4011289179325104, "learning_rate": 9.987434461226264e-06, "loss": 0.3755, "step": 2518 }, { "epoch": 0.361198738170347, "grad_norm": 0.4816136658191681, "learning_rate": 9.987375284638065e-06, "loss": 0.3602, "step": 2519 }, { "epoch": 0.3613421279036421, "grad_norm": 0.4052434265613556, "learning_rate": 9.987315969209013e-06, "loss": 0.3392, "step": 2520 }, { "epoch": 0.3614855176369372, "grad_norm": 0.40861114859580994, "learning_rate": 9.987256514940754e-06, "loss": 0.3642, "step": 2521 }, { "epoch": 0.3616289073702323, "grad_norm": 0.47180891036987305, "learning_rate": 9.987196921834946e-06, "loss": 0.3725, "step": 2522 }, { "epoch": 0.3617722971035274, "grad_norm": 0.3756566643714905, "learning_rate": 9.987137189893248e-06, "loss": 0.3613, "step": 2523 }, { "epoch": 0.36191568683682246, "grad_norm": 0.3912111222743988, "learning_rate": 9.987077319117322e-06, "loss": 0.3759, "step": 2524 }, { "epoch": 0.3620590765701176, "grad_norm": 0.41753003001213074, "learning_rate": 9.987017309508835e-06, "loss": 0.3565, "step": 2525 }, { "epoch": 0.3622024663034127, "grad_norm": 0.37798917293548584, "learning_rate": 9.986957161069458e-06, "loss": 0.337, "step": 2526 }, { "epoch": 0.36234585603670777, "grad_norm": 0.4214720129966736, "learning_rate": 9.986896873800864e-06, "loss": 0.3566, "step": 2527 }, { "epoch": 0.36248924577000285, "grad_norm": 0.37758100032806396, "learning_rate": 9.986836447704732e-06, "loss": 0.3593, "step": 2528 }, { "epoch": 0.36263263550329794, "grad_norm": 0.394862562417984, "learning_rate": 9.986775882782746e-06, "loss": 0.3545, "step": 2529 }, { "epoch": 0.3627760252365931, "grad_norm": 0.45306286215782166, "learning_rate": 9.98671517903659e-06, "loss": 0.3604, "step": 2530 }, { "epoch": 0.36291941496988817, "grad_norm": 0.43231481313705444, "learning_rate": 9.986654336467954e-06, "loss": 0.3657, "step": 2531 }, { "epoch": 0.36306280470318325, "grad_norm": 0.335886687040329, "learning_rate": 9.986593355078532e-06, "loss": 0.3463, "step": 2532 }, { "epoch": 0.36320619443647834, "grad_norm": 0.4158840477466583, "learning_rate": 9.986532234870021e-06, "loss": 0.3594, "step": 2533 }, { "epoch": 0.3633495841697734, "grad_norm": 0.4227856993675232, "learning_rate": 9.986470975844125e-06, "loss": 0.3641, "step": 2534 }, { "epoch": 0.36349297390306856, "grad_norm": 0.3964722752571106, "learning_rate": 9.986409578002547e-06, "loss": 0.3721, "step": 2535 }, { "epoch": 0.36363636363636365, "grad_norm": 0.38859623670578003, "learning_rate": 9.986348041346998e-06, "loss": 0.3291, "step": 2536 }, { "epoch": 0.36377975336965873, "grad_norm": 0.3885888159275055, "learning_rate": 9.986286365879188e-06, "loss": 0.3549, "step": 2537 }, { "epoch": 0.3639231431029538, "grad_norm": 0.3925637900829315, "learning_rate": 9.986224551600837e-06, "loss": 0.3433, "step": 2538 }, { "epoch": 0.3640665328362489, "grad_norm": 0.33048802614212036, "learning_rate": 9.986162598513664e-06, "loss": 0.3521, "step": 2539 }, { "epoch": 0.36420992256954404, "grad_norm": 0.39059239625930786, "learning_rate": 9.986100506619395e-06, "loss": 0.3613, "step": 2540 }, { "epoch": 0.3643533123028391, "grad_norm": 0.3833913207054138, "learning_rate": 9.986038275919759e-06, "loss": 0.3595, "step": 2541 }, { "epoch": 0.3644967020361342, "grad_norm": 0.36123162508010864, "learning_rate": 9.985975906416486e-06, "loss": 0.3555, "step": 2542 }, { "epoch": 0.3646400917694293, "grad_norm": 0.38844922184944153, "learning_rate": 9.985913398111314e-06, "loss": 0.3839, "step": 2543 }, { "epoch": 0.3647834815027244, "grad_norm": 0.3871084451675415, "learning_rate": 9.985850751005983e-06, "loss": 0.3538, "step": 2544 }, { "epoch": 0.3649268712360195, "grad_norm": 0.3631642162799835, "learning_rate": 9.985787965102236e-06, "loss": 0.3595, "step": 2545 }, { "epoch": 0.3650702609693146, "grad_norm": 0.3754211366176605, "learning_rate": 9.985725040401823e-06, "loss": 0.3506, "step": 2546 }, { "epoch": 0.3652136507026097, "grad_norm": 0.37495148181915283, "learning_rate": 9.985661976906493e-06, "loss": 0.343, "step": 2547 }, { "epoch": 0.3653570404359048, "grad_norm": 0.3964189291000366, "learning_rate": 9.985598774618004e-06, "loss": 0.3434, "step": 2548 }, { "epoch": 0.36550043016919986, "grad_norm": 0.4000341296195984, "learning_rate": 9.985535433538114e-06, "loss": 0.385, "step": 2549 }, { "epoch": 0.365643819902495, "grad_norm": 0.4609147906303406, "learning_rate": 9.985471953668587e-06, "loss": 0.3575, "step": 2550 }, { "epoch": 0.3657872096357901, "grad_norm": 0.3490194082260132, "learning_rate": 9.985408335011188e-06, "loss": 0.3519, "step": 2551 }, { "epoch": 0.3659305993690852, "grad_norm": 0.4336046874523163, "learning_rate": 9.985344577567694e-06, "loss": 0.3701, "step": 2552 }, { "epoch": 0.36607398910238026, "grad_norm": 0.4016762673854828, "learning_rate": 9.98528068133987e-06, "loss": 0.3521, "step": 2553 }, { "epoch": 0.36621737883567534, "grad_norm": 0.44295287132263184, "learning_rate": 9.985216646329504e-06, "loss": 0.361, "step": 2554 }, { "epoch": 0.3663607685689705, "grad_norm": 0.3719962239265442, "learning_rate": 9.985152472538376e-06, "loss": 0.3648, "step": 2555 }, { "epoch": 0.36650415830226557, "grad_norm": 0.45697706937789917, "learning_rate": 9.985088159968272e-06, "loss": 0.3598, "step": 2556 }, { "epoch": 0.36664754803556066, "grad_norm": 0.3636808693408966, "learning_rate": 9.98502370862098e-06, "loss": 0.3296, "step": 2557 }, { "epoch": 0.36679093776885574, "grad_norm": 0.41232210397720337, "learning_rate": 9.984959118498298e-06, "loss": 0.3613, "step": 2558 }, { "epoch": 0.3669343275021508, "grad_norm": 0.41835761070251465, "learning_rate": 9.984894389602022e-06, "loss": 0.3797, "step": 2559 }, { "epoch": 0.36707771723544597, "grad_norm": 0.4104922413825989, "learning_rate": 9.984829521933954e-06, "loss": 0.3463, "step": 2560 }, { "epoch": 0.36722110696874105, "grad_norm": 0.3653721511363983, "learning_rate": 9.984764515495901e-06, "loss": 0.333, "step": 2561 }, { "epoch": 0.36736449670203614, "grad_norm": 0.42999714612960815, "learning_rate": 9.98469937028967e-06, "loss": 0.3619, "step": 2562 }, { "epoch": 0.3675078864353312, "grad_norm": 0.4224699139595032, "learning_rate": 9.98463408631708e-06, "loss": 0.369, "step": 2563 }, { "epoch": 0.3676512761686263, "grad_norm": 0.3846357762813568, "learning_rate": 9.984568663579941e-06, "loss": 0.3398, "step": 2564 }, { "epoch": 0.36779466590192145, "grad_norm": 0.4442278742790222, "learning_rate": 9.984503102080081e-06, "loss": 0.3739, "step": 2565 }, { "epoch": 0.36793805563521653, "grad_norm": 0.3822794258594513, "learning_rate": 9.98443740181932e-06, "loss": 0.3831, "step": 2566 }, { "epoch": 0.3680814453685116, "grad_norm": 0.38335731625556946, "learning_rate": 9.984371562799491e-06, "loss": 0.3572, "step": 2567 }, { "epoch": 0.3682248351018067, "grad_norm": 0.35249119997024536, "learning_rate": 9.984305585022425e-06, "loss": 0.3435, "step": 2568 }, { "epoch": 0.3683682248351018, "grad_norm": 0.4217718243598938, "learning_rate": 9.984239468489957e-06, "loss": 0.3395, "step": 2569 }, { "epoch": 0.36851161456839693, "grad_norm": 0.3935995101928711, "learning_rate": 9.984173213203932e-06, "loss": 0.3502, "step": 2570 }, { "epoch": 0.368655004301692, "grad_norm": 0.3794103264808655, "learning_rate": 9.98410681916619e-06, "loss": 0.3614, "step": 2571 }, { "epoch": 0.3687983940349871, "grad_norm": 0.392208456993103, "learning_rate": 9.98404028637858e-06, "loss": 0.3529, "step": 2572 }, { "epoch": 0.3689417837682822, "grad_norm": 0.3759075999259949, "learning_rate": 9.983973614842959e-06, "loss": 0.3417, "step": 2573 }, { "epoch": 0.36908517350157727, "grad_norm": 0.35408586263656616, "learning_rate": 9.983906804561176e-06, "loss": 0.3644, "step": 2574 }, { "epoch": 0.3692285632348724, "grad_norm": 0.4058893620967865, "learning_rate": 9.983839855535096e-06, "loss": 0.361, "step": 2575 }, { "epoch": 0.3693719529681675, "grad_norm": 0.3805726170539856, "learning_rate": 9.98377276776658e-06, "loss": 0.3547, "step": 2576 }, { "epoch": 0.3695153427014626, "grad_norm": 0.3838680386543274, "learning_rate": 9.983705541257497e-06, "loss": 0.3459, "step": 2577 }, { "epoch": 0.36965873243475766, "grad_norm": 0.3766072392463684, "learning_rate": 9.983638176009717e-06, "loss": 0.3652, "step": 2578 }, { "epoch": 0.36980212216805275, "grad_norm": 0.3751448690891266, "learning_rate": 9.983570672025119e-06, "loss": 0.3592, "step": 2579 }, { "epoch": 0.3699455119013479, "grad_norm": 0.4232463836669922, "learning_rate": 9.983503029305578e-06, "loss": 0.3841, "step": 2580 }, { "epoch": 0.370088901634643, "grad_norm": 0.4037887454032898, "learning_rate": 9.98343524785298e-06, "loss": 0.353, "step": 2581 }, { "epoch": 0.37023229136793806, "grad_norm": 0.41569027304649353, "learning_rate": 9.983367327669208e-06, "loss": 0.3427, "step": 2582 }, { "epoch": 0.37037568110123315, "grad_norm": 0.3910026252269745, "learning_rate": 9.983299268756158e-06, "loss": 0.3631, "step": 2583 }, { "epoch": 0.37051907083452823, "grad_norm": 0.39485159516334534, "learning_rate": 9.983231071115722e-06, "loss": 0.3577, "step": 2584 }, { "epoch": 0.37066246056782337, "grad_norm": 0.41388991475105286, "learning_rate": 9.983162734749798e-06, "loss": 0.3584, "step": 2585 }, { "epoch": 0.37080585030111846, "grad_norm": 0.3729456961154938, "learning_rate": 9.983094259660289e-06, "loss": 0.342, "step": 2586 }, { "epoch": 0.37094924003441354, "grad_norm": 0.3926927149295807, "learning_rate": 9.983025645849101e-06, "loss": 0.3676, "step": 2587 }, { "epoch": 0.3710926297677086, "grad_norm": 0.41055771708488464, "learning_rate": 9.982956893318143e-06, "loss": 0.3685, "step": 2588 }, { "epoch": 0.3712360195010037, "grad_norm": 0.373344361782074, "learning_rate": 9.982888002069333e-06, "loss": 0.3582, "step": 2589 }, { "epoch": 0.37137940923429885, "grad_norm": 0.4337364733219147, "learning_rate": 9.982818972104585e-06, "loss": 0.3653, "step": 2590 }, { "epoch": 0.37152279896759394, "grad_norm": 0.39883312582969666, "learning_rate": 9.982749803425822e-06, "loss": 0.3513, "step": 2591 }, { "epoch": 0.371666188700889, "grad_norm": 0.3611084520816803, "learning_rate": 9.98268049603497e-06, "loss": 0.3697, "step": 2592 }, { "epoch": 0.3718095784341841, "grad_norm": 0.39875462651252747, "learning_rate": 9.982611049933954e-06, "loss": 0.3477, "step": 2593 }, { "epoch": 0.3719529681674792, "grad_norm": 0.40670284628868103, "learning_rate": 9.982541465124716e-06, "loss": 0.3717, "step": 2594 }, { "epoch": 0.3720963579007743, "grad_norm": 0.3721980154514313, "learning_rate": 9.982471741609186e-06, "loss": 0.385, "step": 2595 }, { "epoch": 0.3722397476340694, "grad_norm": 0.44411730766296387, "learning_rate": 9.982401879389307e-06, "loss": 0.375, "step": 2596 }, { "epoch": 0.3723831373673645, "grad_norm": 0.4252803325653076, "learning_rate": 9.982331878467023e-06, "loss": 0.3489, "step": 2597 }, { "epoch": 0.3725265271006596, "grad_norm": 0.43289926648139954, "learning_rate": 9.982261738844286e-06, "loss": 0.3392, "step": 2598 }, { "epoch": 0.3726699168339547, "grad_norm": 0.4708189070224762, "learning_rate": 9.982191460523044e-06, "loss": 0.349, "step": 2599 }, { "epoch": 0.37281330656724976, "grad_norm": 0.472864031791687, "learning_rate": 9.98212104350526e-06, "loss": 0.3573, "step": 2600 }, { "epoch": 0.3729566963005449, "grad_norm": 0.4083557724952698, "learning_rate": 9.982050487792887e-06, "loss": 0.3564, "step": 2601 }, { "epoch": 0.37310008603384, "grad_norm": 0.43572506308555603, "learning_rate": 9.981979793387892e-06, "loss": 0.3513, "step": 2602 }, { "epoch": 0.37324347576713507, "grad_norm": 0.37796735763549805, "learning_rate": 9.981908960292244e-06, "loss": 0.3388, "step": 2603 }, { "epoch": 0.37338686550043015, "grad_norm": 0.4528954327106476, "learning_rate": 9.981837988507915e-06, "loss": 0.3426, "step": 2604 }, { "epoch": 0.37353025523372524, "grad_norm": 0.3964880704879761, "learning_rate": 9.981766878036877e-06, "loss": 0.3647, "step": 2605 }, { "epoch": 0.3736736449670204, "grad_norm": 0.3996122181415558, "learning_rate": 9.981695628881117e-06, "loss": 0.3351, "step": 2606 }, { "epoch": 0.37381703470031546, "grad_norm": 0.4956398606300354, "learning_rate": 9.98162424104261e-06, "loss": 0.3517, "step": 2607 }, { "epoch": 0.37396042443361055, "grad_norm": 0.33843564987182617, "learning_rate": 9.98155271452335e-06, "loss": 0.3425, "step": 2608 }, { "epoch": 0.37410381416690563, "grad_norm": 0.4154260456562042, "learning_rate": 9.981481049325323e-06, "loss": 0.3634, "step": 2609 }, { "epoch": 0.3742472039002007, "grad_norm": 0.41476970911026, "learning_rate": 9.981409245450528e-06, "loss": 0.3657, "step": 2610 }, { "epoch": 0.37439059363349586, "grad_norm": 0.3598114252090454, "learning_rate": 9.981337302900962e-06, "loss": 0.3633, "step": 2611 }, { "epoch": 0.37453398336679095, "grad_norm": 0.43956732749938965, "learning_rate": 9.981265221678629e-06, "loss": 0.3774, "step": 2612 }, { "epoch": 0.37467737310008603, "grad_norm": 0.34683987498283386, "learning_rate": 9.981193001785535e-06, "loss": 0.3682, "step": 2613 }, { "epoch": 0.3748207628333811, "grad_norm": 0.3749493658542633, "learning_rate": 9.98112064322369e-06, "loss": 0.3366, "step": 2614 }, { "epoch": 0.3749641525666762, "grad_norm": 0.37943026423454285, "learning_rate": 9.981048145995106e-06, "loss": 0.337, "step": 2615 }, { "epoch": 0.37510754229997134, "grad_norm": 0.3733767867088318, "learning_rate": 9.980975510101808e-06, "loss": 0.356, "step": 2616 }, { "epoch": 0.3752509320332664, "grad_norm": 0.3959715664386749, "learning_rate": 9.980902735545812e-06, "loss": 0.3667, "step": 2617 }, { "epoch": 0.3753943217665615, "grad_norm": 0.3699108064174652, "learning_rate": 9.980829822329145e-06, "loss": 0.3635, "step": 2618 }, { "epoch": 0.3755377114998566, "grad_norm": 0.37996697425842285, "learning_rate": 9.98075677045384e-06, "loss": 0.3528, "step": 2619 }, { "epoch": 0.3756811012331517, "grad_norm": 0.3888495862483978, "learning_rate": 9.980683579921927e-06, "loss": 0.367, "step": 2620 }, { "epoch": 0.3758244909664468, "grad_norm": 0.3583466112613678, "learning_rate": 9.980610250735445e-06, "loss": 0.3331, "step": 2621 }, { "epoch": 0.3759678806997419, "grad_norm": 0.3808724284172058, "learning_rate": 9.980536782896435e-06, "loss": 0.3846, "step": 2622 }, { "epoch": 0.376111270433037, "grad_norm": 0.3943977653980255, "learning_rate": 9.980463176406943e-06, "loss": 0.3773, "step": 2623 }, { "epoch": 0.3762546601663321, "grad_norm": 0.4002994894981384, "learning_rate": 9.980389431269017e-06, "loss": 0.3898, "step": 2624 }, { "epoch": 0.37639804989962716, "grad_norm": 0.3850072920322418, "learning_rate": 9.980315547484712e-06, "loss": 0.3436, "step": 2625 }, { "epoch": 0.3765414396329223, "grad_norm": 0.37194496393203735, "learning_rate": 9.980241525056083e-06, "loss": 0.3539, "step": 2626 }, { "epoch": 0.3766848293662174, "grad_norm": 0.4095064401626587, "learning_rate": 9.98016736398519e-06, "loss": 0.3813, "step": 2627 }, { "epoch": 0.3768282190995125, "grad_norm": 0.39405557513237, "learning_rate": 9.980093064274097e-06, "loss": 0.3769, "step": 2628 }, { "epoch": 0.37697160883280756, "grad_norm": 0.43740764260292053, "learning_rate": 9.980018625924876e-06, "loss": 0.3537, "step": 2629 }, { "epoch": 0.37711499856610264, "grad_norm": 0.37263941764831543, "learning_rate": 9.979944048939596e-06, "loss": 0.3464, "step": 2630 }, { "epoch": 0.3772583882993978, "grad_norm": 0.45467042922973633, "learning_rate": 9.979869333320336e-06, "loss": 0.3614, "step": 2631 }, { "epoch": 0.37740177803269287, "grad_norm": 0.38273003697395325, "learning_rate": 9.979794479069174e-06, "loss": 0.3758, "step": 2632 }, { "epoch": 0.37754516776598795, "grad_norm": 0.4358363151550293, "learning_rate": 9.979719486188193e-06, "loss": 0.3485, "step": 2633 }, { "epoch": 0.37768855749928304, "grad_norm": 0.3982354402542114, "learning_rate": 9.97964435467948e-06, "loss": 0.3404, "step": 2634 }, { "epoch": 0.3778319472325781, "grad_norm": 0.3665718138217926, "learning_rate": 9.979569084545132e-06, "loss": 0.3653, "step": 2635 }, { "epoch": 0.37797533696587327, "grad_norm": 0.40946048498153687, "learning_rate": 9.97949367578724e-06, "loss": 0.3563, "step": 2636 }, { "epoch": 0.37811872669916835, "grad_norm": 0.392570823431015, "learning_rate": 9.979418128407902e-06, "loss": 0.36, "step": 2637 }, { "epoch": 0.37826211643246344, "grad_norm": 0.4099308252334595, "learning_rate": 9.979342442409223e-06, "loss": 0.3559, "step": 2638 }, { "epoch": 0.3784055061657585, "grad_norm": 0.38815420866012573, "learning_rate": 9.979266617793312e-06, "loss": 0.3559, "step": 2639 }, { "epoch": 0.3785488958990536, "grad_norm": 0.3828076124191284, "learning_rate": 9.979190654562278e-06, "loss": 0.3541, "step": 2640 }, { "epoch": 0.37869228563234875, "grad_norm": 0.38519927859306335, "learning_rate": 9.979114552718235e-06, "loss": 0.3686, "step": 2641 }, { "epoch": 0.37883567536564383, "grad_norm": 0.39198070764541626, "learning_rate": 9.9790383122633e-06, "loss": 0.3485, "step": 2642 }, { "epoch": 0.3789790650989389, "grad_norm": 0.3746139705181122, "learning_rate": 9.9789619331996e-06, "loss": 0.364, "step": 2643 }, { "epoch": 0.379122454832234, "grad_norm": 0.3731688857078552, "learning_rate": 9.978885415529259e-06, "loss": 0.3504, "step": 2644 }, { "epoch": 0.3792658445655291, "grad_norm": 0.43244653940200806, "learning_rate": 9.978808759254407e-06, "loss": 0.3822, "step": 2645 }, { "epoch": 0.3794092342988242, "grad_norm": 0.36530524492263794, "learning_rate": 9.978731964377175e-06, "loss": 0.3707, "step": 2646 }, { "epoch": 0.3795526240321193, "grad_norm": 0.35883626341819763, "learning_rate": 9.978655030899706e-06, "loss": 0.3449, "step": 2647 }, { "epoch": 0.3796960137654144, "grad_norm": 0.3728139400482178, "learning_rate": 9.97857795882414e-06, "loss": 0.3555, "step": 2648 }, { "epoch": 0.3798394034987095, "grad_norm": 0.3817203938961029, "learning_rate": 9.978500748152623e-06, "loss": 0.3423, "step": 2649 }, { "epoch": 0.37998279323200457, "grad_norm": 0.3754515051841736, "learning_rate": 9.978423398887302e-06, "loss": 0.3683, "step": 2650 }, { "epoch": 0.3801261829652997, "grad_norm": 0.4087936580181122, "learning_rate": 9.978345911030332e-06, "loss": 0.3889, "step": 2651 }, { "epoch": 0.3802695726985948, "grad_norm": 0.4062393307685852, "learning_rate": 9.97826828458387e-06, "loss": 0.3497, "step": 2652 }, { "epoch": 0.3804129624318899, "grad_norm": 0.37506482005119324, "learning_rate": 9.978190519550077e-06, "loss": 0.3773, "step": 2653 }, { "epoch": 0.38055635216518496, "grad_norm": 0.4138149321079254, "learning_rate": 9.978112615931117e-06, "loss": 0.362, "step": 2654 }, { "epoch": 0.38069974189848005, "grad_norm": 0.42813029885292053, "learning_rate": 9.97803457372916e-06, "loss": 0.3393, "step": 2655 }, { "epoch": 0.3808431316317752, "grad_norm": 0.39391064643859863, "learning_rate": 9.97795639294638e-06, "loss": 0.3665, "step": 2656 }, { "epoch": 0.3809865213650703, "grad_norm": 0.4435546100139618, "learning_rate": 9.97787807358495e-06, "loss": 0.3794, "step": 2657 }, { "epoch": 0.38112991109836536, "grad_norm": 0.3465818464756012, "learning_rate": 9.977799615647053e-06, "loss": 0.3741, "step": 2658 }, { "epoch": 0.38127330083166044, "grad_norm": 0.39749372005462646, "learning_rate": 9.977721019134869e-06, "loss": 0.357, "step": 2659 }, { "epoch": 0.38141669056495553, "grad_norm": 0.3983592391014099, "learning_rate": 9.977642284050592e-06, "loss": 0.3798, "step": 2660 }, { "epoch": 0.38156008029825067, "grad_norm": 0.3596346378326416, "learning_rate": 9.977563410396409e-06, "loss": 0.3715, "step": 2661 }, { "epoch": 0.38170347003154576, "grad_norm": 0.40684494376182556, "learning_rate": 9.97748439817452e-06, "loss": 0.3685, "step": 2662 }, { "epoch": 0.38184685976484084, "grad_norm": 0.45657193660736084, "learning_rate": 9.97740524738712e-06, "loss": 0.3628, "step": 2663 }, { "epoch": 0.3819902494981359, "grad_norm": 0.40263545513153076, "learning_rate": 9.977325958036415e-06, "loss": 0.3701, "step": 2664 }, { "epoch": 0.382133639231431, "grad_norm": 0.3936421871185303, "learning_rate": 9.977246530124612e-06, "loss": 0.3534, "step": 2665 }, { "epoch": 0.38227702896472615, "grad_norm": 0.3606882095336914, "learning_rate": 9.977166963653921e-06, "loss": 0.3551, "step": 2666 }, { "epoch": 0.38242041869802124, "grad_norm": 0.3697725236415863, "learning_rate": 9.97708725862656e-06, "loss": 0.329, "step": 2667 }, { "epoch": 0.3825638084313163, "grad_norm": 0.38260096311569214, "learning_rate": 9.977007415044745e-06, "loss": 0.3226, "step": 2668 }, { "epoch": 0.3827071981646114, "grad_norm": 0.4015514552593231, "learning_rate": 9.976927432910699e-06, "loss": 0.3603, "step": 2669 }, { "epoch": 0.3828505878979065, "grad_norm": 0.3804076910018921, "learning_rate": 9.97684731222665e-06, "loss": 0.3623, "step": 2670 }, { "epoch": 0.38299397763120163, "grad_norm": 0.3614754378795624, "learning_rate": 9.976767052994829e-06, "loss": 0.3444, "step": 2671 }, { "epoch": 0.3831373673644967, "grad_norm": 0.3950757682323456, "learning_rate": 9.976686655217466e-06, "loss": 0.3564, "step": 2672 }, { "epoch": 0.3832807570977918, "grad_norm": 0.35367825627326965, "learning_rate": 9.976606118896804e-06, "loss": 0.3557, "step": 2673 }, { "epoch": 0.3834241468310869, "grad_norm": 0.37114188075065613, "learning_rate": 9.976525444035083e-06, "loss": 0.3512, "step": 2674 }, { "epoch": 0.383567536564382, "grad_norm": 0.4271337687969208, "learning_rate": 9.976444630634547e-06, "loss": 0.3529, "step": 2675 }, { "epoch": 0.3837109262976771, "grad_norm": 0.33951708674430847, "learning_rate": 9.97636367869745e-06, "loss": 0.3777, "step": 2676 }, { "epoch": 0.3838543160309722, "grad_norm": 0.39475879073143005, "learning_rate": 9.976282588226042e-06, "loss": 0.3559, "step": 2677 }, { "epoch": 0.3839977057642673, "grad_norm": 0.4961298704147339, "learning_rate": 9.976201359222584e-06, "loss": 0.3734, "step": 2678 }, { "epoch": 0.38414109549756237, "grad_norm": 0.3733457624912262, "learning_rate": 9.976119991689332e-06, "loss": 0.3552, "step": 2679 }, { "epoch": 0.38428448523085745, "grad_norm": 0.390243798494339, "learning_rate": 9.976038485628556e-06, "loss": 0.3391, "step": 2680 }, { "epoch": 0.3844278749641526, "grad_norm": 0.37498724460601807, "learning_rate": 9.975956841042522e-06, "loss": 0.3243, "step": 2681 }, { "epoch": 0.3845712646974477, "grad_norm": 0.3965429961681366, "learning_rate": 9.975875057933507e-06, "loss": 0.3572, "step": 2682 }, { "epoch": 0.38471465443074276, "grad_norm": 0.3634571433067322, "learning_rate": 9.97579313630378e-06, "loss": 0.3402, "step": 2683 }, { "epoch": 0.38485804416403785, "grad_norm": 0.40628311038017273, "learning_rate": 9.97571107615563e-06, "loss": 0.3362, "step": 2684 }, { "epoch": 0.38500143389733293, "grad_norm": 0.3996988832950592, "learning_rate": 9.975628877491335e-06, "loss": 0.3634, "step": 2685 }, { "epoch": 0.385144823630628, "grad_norm": 0.42599645256996155, "learning_rate": 9.975546540313188e-06, "loss": 0.3746, "step": 2686 }, { "epoch": 0.38528821336392316, "grad_norm": 0.3747369050979614, "learning_rate": 9.975464064623479e-06, "loss": 0.3528, "step": 2687 }, { "epoch": 0.38543160309721824, "grad_norm": 0.39039137959480286, "learning_rate": 9.975381450424502e-06, "loss": 0.3589, "step": 2688 }, { "epoch": 0.38557499283051333, "grad_norm": 0.42286789417266846, "learning_rate": 9.975298697718562e-06, "loss": 0.3466, "step": 2689 }, { "epoch": 0.3857183825638084, "grad_norm": 0.3988664150238037, "learning_rate": 9.975215806507959e-06, "loss": 0.3499, "step": 2690 }, { "epoch": 0.3858617722971035, "grad_norm": 0.3933200240135193, "learning_rate": 9.975132776795e-06, "loss": 0.3636, "step": 2691 }, { "epoch": 0.38600516203039864, "grad_norm": 0.4081760048866272, "learning_rate": 9.975049608581998e-06, "loss": 0.3689, "step": 2692 }, { "epoch": 0.3861485517636937, "grad_norm": 0.3807412385940552, "learning_rate": 9.974966301871267e-06, "loss": 0.3528, "step": 2693 }, { "epoch": 0.3862919414969888, "grad_norm": 0.401373028755188, "learning_rate": 9.974882856665129e-06, "loss": 0.3477, "step": 2694 }, { "epoch": 0.3864353312302839, "grad_norm": 0.40352678298950195, "learning_rate": 9.974799272965905e-06, "loss": 0.3419, "step": 2695 }, { "epoch": 0.386578720963579, "grad_norm": 0.430914044380188, "learning_rate": 9.97471555077592e-06, "loss": 0.3711, "step": 2696 }, { "epoch": 0.3867221106968741, "grad_norm": 0.3858698904514313, "learning_rate": 9.974631690097508e-06, "loss": 0.3602, "step": 2697 }, { "epoch": 0.3868655004301692, "grad_norm": 0.3892885148525238, "learning_rate": 9.974547690933003e-06, "loss": 0.3373, "step": 2698 }, { "epoch": 0.3870088901634643, "grad_norm": 0.3679846525192261, "learning_rate": 9.974463553284741e-06, "loss": 0.3564, "step": 2699 }, { "epoch": 0.3871522798967594, "grad_norm": 0.3919743597507477, "learning_rate": 9.974379277155067e-06, "loss": 0.3637, "step": 2700 }, { "epoch": 0.38729566963005446, "grad_norm": 0.39953574538230896, "learning_rate": 9.974294862546326e-06, "loss": 0.3621, "step": 2701 }, { "epoch": 0.3874390593633496, "grad_norm": 0.38328441977500916, "learning_rate": 9.974210309460866e-06, "loss": 0.3422, "step": 2702 }, { "epoch": 0.3875824490966447, "grad_norm": 0.38936370611190796, "learning_rate": 9.974125617901045e-06, "loss": 0.3632, "step": 2703 }, { "epoch": 0.3877258388299398, "grad_norm": 0.4013746380805969, "learning_rate": 9.974040787869216e-06, "loss": 0.3571, "step": 2704 }, { "epoch": 0.38786922856323486, "grad_norm": 0.40519750118255615, "learning_rate": 9.973955819367745e-06, "loss": 0.3453, "step": 2705 }, { "epoch": 0.38801261829652994, "grad_norm": 0.3458235263824463, "learning_rate": 9.973870712398996e-06, "loss": 0.3481, "step": 2706 }, { "epoch": 0.3881560080298251, "grad_norm": 0.41028308868408203, "learning_rate": 9.973785466965335e-06, "loss": 0.3732, "step": 2707 }, { "epoch": 0.38829939776312017, "grad_norm": 0.36197638511657715, "learning_rate": 9.97370008306914e-06, "loss": 0.3403, "step": 2708 }, { "epoch": 0.38844278749641525, "grad_norm": 0.3714072108268738, "learning_rate": 9.973614560712786e-06, "loss": 0.3657, "step": 2709 }, { "epoch": 0.38858617722971034, "grad_norm": 0.3522297143936157, "learning_rate": 9.97352889989865e-06, "loss": 0.3454, "step": 2710 }, { "epoch": 0.3887295669630054, "grad_norm": 0.39627745747566223, "learning_rate": 9.973443100629123e-06, "loss": 0.3445, "step": 2711 }, { "epoch": 0.38887295669630056, "grad_norm": 0.4230489134788513, "learning_rate": 9.97335716290659e-06, "loss": 0.3756, "step": 2712 }, { "epoch": 0.38901634642959565, "grad_norm": 0.37854284048080444, "learning_rate": 9.973271086733445e-06, "loss": 0.3972, "step": 2713 }, { "epoch": 0.38915973616289073, "grad_norm": 0.41541823744773865, "learning_rate": 9.973184872112083e-06, "loss": 0.3797, "step": 2714 }, { "epoch": 0.3893031258961858, "grad_norm": 0.3552483022212982, "learning_rate": 9.973098519044904e-06, "loss": 0.3639, "step": 2715 }, { "epoch": 0.3894465156294809, "grad_norm": 0.39896321296691895, "learning_rate": 9.97301202753431e-06, "loss": 0.3661, "step": 2716 }, { "epoch": 0.38958990536277605, "grad_norm": 0.42743968963623047, "learning_rate": 9.972925397582714e-06, "loss": 0.353, "step": 2717 }, { "epoch": 0.38973329509607113, "grad_norm": 0.4512389004230499, "learning_rate": 9.972838629192525e-06, "loss": 0.3701, "step": 2718 }, { "epoch": 0.3898766848293662, "grad_norm": 0.3558696508407593, "learning_rate": 9.972751722366157e-06, "loss": 0.3516, "step": 2719 }, { "epoch": 0.3900200745626613, "grad_norm": 0.4336758553981781, "learning_rate": 9.97266467710603e-06, "loss": 0.3622, "step": 2720 }, { "epoch": 0.3901634642959564, "grad_norm": 0.3859666883945465, "learning_rate": 9.972577493414568e-06, "loss": 0.3382, "step": 2721 }, { "epoch": 0.3903068540292515, "grad_norm": 0.384998083114624, "learning_rate": 9.9724901712942e-06, "loss": 0.3609, "step": 2722 }, { "epoch": 0.3904502437625466, "grad_norm": 0.39156633615493774, "learning_rate": 9.972402710747352e-06, "loss": 0.3128, "step": 2723 }, { "epoch": 0.3905936334958417, "grad_norm": 0.393087774515152, "learning_rate": 9.972315111776462e-06, "loss": 0.3646, "step": 2724 }, { "epoch": 0.3907370232291368, "grad_norm": 0.4525422751903534, "learning_rate": 9.97222737438397e-06, "loss": 0.3787, "step": 2725 }, { "epoch": 0.39088041296243187, "grad_norm": 0.37039682269096375, "learning_rate": 9.972139498572314e-06, "loss": 0.3607, "step": 2726 }, { "epoch": 0.391023802695727, "grad_norm": 0.45724713802337646, "learning_rate": 9.972051484343946e-06, "loss": 0.3506, "step": 2727 }, { "epoch": 0.3911671924290221, "grad_norm": 0.4176632761955261, "learning_rate": 9.971963331701312e-06, "loss": 0.3626, "step": 2728 }, { "epoch": 0.3913105821623172, "grad_norm": 0.36883383989334106, "learning_rate": 9.971875040646865e-06, "loss": 0.3614, "step": 2729 }, { "epoch": 0.39145397189561226, "grad_norm": 0.4108242094516754, "learning_rate": 9.971786611183067e-06, "loss": 0.3679, "step": 2730 }, { "epoch": 0.39159736162890735, "grad_norm": 0.43186691403388977, "learning_rate": 9.971698043312377e-06, "loss": 0.3607, "step": 2731 }, { "epoch": 0.3917407513622025, "grad_norm": 0.4471975862979889, "learning_rate": 9.971609337037262e-06, "loss": 0.3563, "step": 2732 }, { "epoch": 0.3918841410954976, "grad_norm": 0.3613412082195282, "learning_rate": 9.971520492360192e-06, "loss": 0.3632, "step": 2733 }, { "epoch": 0.39202753082879266, "grad_norm": 0.3877631723880768, "learning_rate": 9.971431509283637e-06, "loss": 0.3695, "step": 2734 }, { "epoch": 0.39217092056208774, "grad_norm": 0.4422767162322998, "learning_rate": 9.971342387810076e-06, "loss": 0.3659, "step": 2735 }, { "epoch": 0.39231431029538283, "grad_norm": 0.3678324520587921, "learning_rate": 9.971253127941993e-06, "loss": 0.3514, "step": 2736 }, { "epoch": 0.39245770002867797, "grad_norm": 0.4212219715118408, "learning_rate": 9.971163729681868e-06, "loss": 0.3283, "step": 2737 }, { "epoch": 0.39260108976197305, "grad_norm": 0.34832462668418884, "learning_rate": 9.971074193032192e-06, "loss": 0.3408, "step": 2738 }, { "epoch": 0.39274447949526814, "grad_norm": 0.4089105725288391, "learning_rate": 9.970984517995456e-06, "loss": 0.3322, "step": 2739 }, { "epoch": 0.3928878692285632, "grad_norm": 0.3708936870098114, "learning_rate": 9.97089470457416e-06, "loss": 0.3721, "step": 2740 }, { "epoch": 0.3930312589618583, "grad_norm": 0.38181352615356445, "learning_rate": 9.970804752770801e-06, "loss": 0.3631, "step": 2741 }, { "epoch": 0.39317464869515345, "grad_norm": 0.4141293466091156, "learning_rate": 9.970714662587887e-06, "loss": 0.35, "step": 2742 }, { "epoch": 0.39331803842844854, "grad_norm": 0.42080792784690857, "learning_rate": 9.97062443402792e-06, "loss": 0.3487, "step": 2743 }, { "epoch": 0.3934614281617436, "grad_norm": 0.3677613139152527, "learning_rate": 9.970534067093416e-06, "loss": 0.3437, "step": 2744 }, { "epoch": 0.3936048178950387, "grad_norm": 0.3950393497943878, "learning_rate": 9.97044356178689e-06, "loss": 0.3785, "step": 2745 }, { "epoch": 0.3937482076283338, "grad_norm": 0.4066928029060364, "learning_rate": 9.970352918110862e-06, "loss": 0.3391, "step": 2746 }, { "epoch": 0.39389159736162893, "grad_norm": 0.38704290986061096, "learning_rate": 9.970262136067852e-06, "loss": 0.359, "step": 2747 }, { "epoch": 0.394034987094924, "grad_norm": 0.39841046929359436, "learning_rate": 9.970171215660391e-06, "loss": 0.3513, "step": 2748 }, { "epoch": 0.3941783768282191, "grad_norm": 0.4048493504524231, "learning_rate": 9.970080156891012e-06, "loss": 0.3497, "step": 2749 }, { "epoch": 0.3943217665615142, "grad_norm": 0.4048549234867096, "learning_rate": 9.969988959762243e-06, "loss": 0.3556, "step": 2750 }, { "epoch": 0.39446515629480927, "grad_norm": 0.36967888474464417, "learning_rate": 9.969897624276627e-06, "loss": 0.3503, "step": 2751 }, { "epoch": 0.3946085460281044, "grad_norm": 0.42192092537879944, "learning_rate": 9.969806150436708e-06, "loss": 0.3525, "step": 2752 }, { "epoch": 0.3947519357613995, "grad_norm": 0.39661240577697754, "learning_rate": 9.969714538245028e-06, "loss": 0.3715, "step": 2753 }, { "epoch": 0.3948953254946946, "grad_norm": 0.422558069229126, "learning_rate": 9.969622787704143e-06, "loss": 0.386, "step": 2754 }, { "epoch": 0.39503871522798967, "grad_norm": 0.42762428522109985, "learning_rate": 9.969530898816603e-06, "loss": 0.3666, "step": 2755 }, { "epoch": 0.39518210496128475, "grad_norm": 0.35609403252601624, "learning_rate": 9.969438871584968e-06, "loss": 0.362, "step": 2756 }, { "epoch": 0.3953254946945799, "grad_norm": 0.379073828458786, "learning_rate": 9.969346706011798e-06, "loss": 0.3425, "step": 2757 }, { "epoch": 0.395468884427875, "grad_norm": 0.40854084491729736, "learning_rate": 9.969254402099661e-06, "loss": 0.3473, "step": 2758 }, { "epoch": 0.39561227416117006, "grad_norm": 0.4278686046600342, "learning_rate": 9.969161959851127e-06, "loss": 0.3267, "step": 2759 }, { "epoch": 0.39575566389446515, "grad_norm": 0.3963683843612671, "learning_rate": 9.969069379268766e-06, "loss": 0.3365, "step": 2760 }, { "epoch": 0.39589905362776023, "grad_norm": 0.3843773305416107, "learning_rate": 9.968976660355156e-06, "loss": 0.3537, "step": 2761 }, { "epoch": 0.3960424433610554, "grad_norm": 0.37771075963974, "learning_rate": 9.968883803112882e-06, "loss": 0.3411, "step": 2762 }, { "epoch": 0.39618583309435046, "grad_norm": 0.4287661910057068, "learning_rate": 9.968790807544527e-06, "loss": 0.3502, "step": 2763 }, { "epoch": 0.39632922282764554, "grad_norm": 0.37188124656677246, "learning_rate": 9.968697673652676e-06, "loss": 0.3506, "step": 2764 }, { "epoch": 0.39647261256094063, "grad_norm": 0.40937304496765137, "learning_rate": 9.968604401439928e-06, "loss": 0.369, "step": 2765 }, { "epoch": 0.3966160022942357, "grad_norm": 0.4181586802005768, "learning_rate": 9.968510990908876e-06, "loss": 0.3648, "step": 2766 }, { "epoch": 0.39675939202753085, "grad_norm": 0.39448630809783936, "learning_rate": 9.96841744206212e-06, "loss": 0.372, "step": 2767 }, { "epoch": 0.39690278176082594, "grad_norm": 0.42185986042022705, "learning_rate": 9.968323754902267e-06, "loss": 0.3401, "step": 2768 }, { "epoch": 0.397046171494121, "grad_norm": 0.3721427619457245, "learning_rate": 9.968229929431923e-06, "loss": 0.3671, "step": 2769 }, { "epoch": 0.3971895612274161, "grad_norm": 0.37554222345352173, "learning_rate": 9.968135965653699e-06, "loss": 0.3677, "step": 2770 }, { "epoch": 0.3973329509607112, "grad_norm": 0.38374119997024536, "learning_rate": 9.968041863570212e-06, "loss": 0.3772, "step": 2771 }, { "epoch": 0.39747634069400634, "grad_norm": 0.4168526828289032, "learning_rate": 9.967947623184084e-06, "loss": 0.3567, "step": 2772 }, { "epoch": 0.3976197304273014, "grad_norm": 0.37314367294311523, "learning_rate": 9.967853244497935e-06, "loss": 0.329, "step": 2773 }, { "epoch": 0.3977631201605965, "grad_norm": 0.3588894307613373, "learning_rate": 9.967758727514394e-06, "loss": 0.3376, "step": 2774 }, { "epoch": 0.3979065098938916, "grad_norm": 0.38021120429039, "learning_rate": 9.96766407223609e-06, "loss": 0.361, "step": 2775 }, { "epoch": 0.3980498996271867, "grad_norm": 0.38185790181159973, "learning_rate": 9.967569278665664e-06, "loss": 0.36, "step": 2776 }, { "epoch": 0.3981932893604818, "grad_norm": 0.39472949504852295, "learning_rate": 9.967474346805747e-06, "loss": 0.3466, "step": 2777 }, { "epoch": 0.3983366790937769, "grad_norm": 0.4056449830532074, "learning_rate": 9.967379276658987e-06, "loss": 0.3873, "step": 2778 }, { "epoch": 0.398480068827072, "grad_norm": 0.3709649443626404, "learning_rate": 9.96728406822803e-06, "loss": 0.3503, "step": 2779 }, { "epoch": 0.39862345856036707, "grad_norm": 0.4322245717048645, "learning_rate": 9.967188721515525e-06, "loss": 0.3697, "step": 2780 }, { "epoch": 0.39876684829366216, "grad_norm": 0.39548659324645996, "learning_rate": 9.967093236524126e-06, "loss": 0.3657, "step": 2781 }, { "epoch": 0.39891023802695724, "grad_norm": 0.39807453751564026, "learning_rate": 9.966997613256492e-06, "loss": 0.3476, "step": 2782 }, { "epoch": 0.3990536277602524, "grad_norm": 0.37445464730262756, "learning_rate": 9.966901851715287e-06, "loss": 0.3474, "step": 2783 }, { "epoch": 0.39919701749354747, "grad_norm": 0.35390955209732056, "learning_rate": 9.966805951903173e-06, "loss": 0.3553, "step": 2784 }, { "epoch": 0.39934040722684255, "grad_norm": 0.40678203105926514, "learning_rate": 9.966709913822824e-06, "loss": 0.3372, "step": 2785 }, { "epoch": 0.39948379696013764, "grad_norm": 0.3741723299026489, "learning_rate": 9.966613737476909e-06, "loss": 0.3578, "step": 2786 }, { "epoch": 0.3996271866934327, "grad_norm": 0.4043755531311035, "learning_rate": 9.966517422868107e-06, "loss": 0.3624, "step": 2787 }, { "epoch": 0.39977057642672786, "grad_norm": 0.40266454219818115, "learning_rate": 9.966420969999102e-06, "loss": 0.3788, "step": 2788 }, { "epoch": 0.39991396616002295, "grad_norm": 0.4336671829223633, "learning_rate": 9.966324378872576e-06, "loss": 0.3725, "step": 2789 }, { "epoch": 0.40005735589331803, "grad_norm": 0.3850388824939728, "learning_rate": 9.96622764949122e-06, "loss": 0.3803, "step": 2790 }, { "epoch": 0.4002007456266131, "grad_norm": 0.4129627048969269, "learning_rate": 9.966130781857726e-06, "loss": 0.3435, "step": 2791 }, { "epoch": 0.4003441353599082, "grad_norm": 0.4343578517436981, "learning_rate": 9.966033775974789e-06, "loss": 0.3624, "step": 2792 }, { "epoch": 0.40048752509320334, "grad_norm": 0.3709627091884613, "learning_rate": 9.96593663184511e-06, "loss": 0.3745, "step": 2793 }, { "epoch": 0.40063091482649843, "grad_norm": 0.3729788362979889, "learning_rate": 9.965839349471395e-06, "loss": 0.3167, "step": 2794 }, { "epoch": 0.4007743045597935, "grad_norm": 0.38879892230033875, "learning_rate": 9.965741928856353e-06, "loss": 0.3353, "step": 2795 }, { "epoch": 0.4009176942930886, "grad_norm": 0.3942684531211853, "learning_rate": 9.965644370002694e-06, "loss": 0.3649, "step": 2796 }, { "epoch": 0.4010610840263837, "grad_norm": 0.3930836617946625, "learning_rate": 9.965546672913133e-06, "loss": 0.3665, "step": 2797 }, { "epoch": 0.4012044737596788, "grad_norm": 0.4256313741207123, "learning_rate": 9.96544883759039e-06, "loss": 0.3703, "step": 2798 }, { "epoch": 0.4013478634929739, "grad_norm": 0.40411779284477234, "learning_rate": 9.965350864037194e-06, "loss": 0.3268, "step": 2799 }, { "epoch": 0.401491253226269, "grad_norm": 0.35438793897628784, "learning_rate": 9.965252752256265e-06, "loss": 0.3571, "step": 2800 }, { "epoch": 0.4016346429595641, "grad_norm": 0.4185468256473541, "learning_rate": 9.965154502250337e-06, "loss": 0.3595, "step": 2801 }, { "epoch": 0.40177803269285917, "grad_norm": 0.4205653965473175, "learning_rate": 9.965056114022148e-06, "loss": 0.3223, "step": 2802 }, { "epoch": 0.4019214224261543, "grad_norm": 0.3950823247432709, "learning_rate": 9.96495758757443e-06, "loss": 0.3588, "step": 2803 }, { "epoch": 0.4020648121594494, "grad_norm": 0.3698267638683319, "learning_rate": 9.964858922909935e-06, "loss": 0.3621, "step": 2804 }, { "epoch": 0.4022082018927445, "grad_norm": 0.3775898516178131, "learning_rate": 9.964760120031403e-06, "loss": 0.3475, "step": 2805 }, { "epoch": 0.40235159162603956, "grad_norm": 0.34398001432418823, "learning_rate": 9.964661178941586e-06, "loss": 0.3607, "step": 2806 }, { "epoch": 0.40249498135933465, "grad_norm": 0.3661707639694214, "learning_rate": 9.964562099643239e-06, "loss": 0.3292, "step": 2807 }, { "epoch": 0.4026383710926298, "grad_norm": 0.38763898611068726, "learning_rate": 9.96446288213912e-06, "loss": 0.3627, "step": 2808 }, { "epoch": 0.4027817608259249, "grad_norm": 0.38267648220062256, "learning_rate": 9.964363526431993e-06, "loss": 0.3679, "step": 2809 }, { "epoch": 0.40292515055921996, "grad_norm": 0.40394139289855957, "learning_rate": 9.96426403252462e-06, "loss": 0.3513, "step": 2810 }, { "epoch": 0.40306854029251504, "grad_norm": 0.3695804178714752, "learning_rate": 9.964164400419774e-06, "loss": 0.3599, "step": 2811 }, { "epoch": 0.40321193002581013, "grad_norm": 0.3907787501811981, "learning_rate": 9.964064630120227e-06, "loss": 0.3503, "step": 2812 }, { "epoch": 0.40335531975910527, "grad_norm": 0.4271308183670044, "learning_rate": 9.963964721628757e-06, "loss": 0.3754, "step": 2813 }, { "epoch": 0.40349870949240035, "grad_norm": 0.3915444016456604, "learning_rate": 9.963864674948144e-06, "loss": 0.347, "step": 2814 }, { "epoch": 0.40364209922569544, "grad_norm": 0.3922383487224579, "learning_rate": 9.963764490081175e-06, "loss": 0.3323, "step": 2815 }, { "epoch": 0.4037854889589905, "grad_norm": 0.41609808802604675, "learning_rate": 9.963664167030639e-06, "loss": 0.3557, "step": 2816 }, { "epoch": 0.4039288786922856, "grad_norm": 0.39246809482574463, "learning_rate": 9.963563705799327e-06, "loss": 0.3561, "step": 2817 }, { "epoch": 0.40407226842558075, "grad_norm": 0.4037172198295593, "learning_rate": 9.963463106390038e-06, "loss": 0.3765, "step": 2818 }, { "epoch": 0.40421565815887583, "grad_norm": 0.3913714289665222, "learning_rate": 9.96336236880557e-06, "loss": 0.3669, "step": 2819 }, { "epoch": 0.4043590478921709, "grad_norm": 0.4126971662044525, "learning_rate": 9.96326149304873e-06, "loss": 0.3352, "step": 2820 }, { "epoch": 0.404502437625466, "grad_norm": 0.41179925203323364, "learning_rate": 9.963160479122325e-06, "loss": 0.3358, "step": 2821 }, { "epoch": 0.4046458273587611, "grad_norm": 0.3851291537284851, "learning_rate": 9.963059327029166e-06, "loss": 0.3738, "step": 2822 }, { "epoch": 0.40478921709205623, "grad_norm": 0.44302695989608765, "learning_rate": 9.96295803677207e-06, "loss": 0.3534, "step": 2823 }, { "epoch": 0.4049326068253513, "grad_norm": 0.4008670151233673, "learning_rate": 9.962856608353857e-06, "loss": 0.3547, "step": 2824 }, { "epoch": 0.4050759965586464, "grad_norm": 0.43087247014045715, "learning_rate": 9.962755041777351e-06, "loss": 0.369, "step": 2825 }, { "epoch": 0.4052193862919415, "grad_norm": 0.40722140669822693, "learning_rate": 9.962653337045379e-06, "loss": 0.3522, "step": 2826 }, { "epoch": 0.40536277602523657, "grad_norm": 0.4133113920688629, "learning_rate": 9.96255149416077e-06, "loss": 0.3553, "step": 2827 }, { "epoch": 0.4055061657585317, "grad_norm": 0.4326139986515045, "learning_rate": 9.962449513126365e-06, "loss": 0.3836, "step": 2828 }, { "epoch": 0.4056495554918268, "grad_norm": 0.4237931966781616, "learning_rate": 9.962347393944996e-06, "loss": 0.3643, "step": 2829 }, { "epoch": 0.4057929452251219, "grad_norm": 0.4069666266441345, "learning_rate": 9.96224513661951e-06, "loss": 0.3617, "step": 2830 }, { "epoch": 0.40593633495841697, "grad_norm": 0.39937952160835266, "learning_rate": 9.962142741152752e-06, "loss": 0.3425, "step": 2831 }, { "epoch": 0.40607972469171205, "grad_norm": 0.4931240975856781, "learning_rate": 9.962040207547575e-06, "loss": 0.359, "step": 2832 }, { "epoch": 0.4062231144250072, "grad_norm": 0.44481927156448364, "learning_rate": 9.96193753580683e-06, "loss": 0.3427, "step": 2833 }, { "epoch": 0.4063665041583023, "grad_norm": 0.42318400740623474, "learning_rate": 9.961834725933377e-06, "loss": 0.3354, "step": 2834 }, { "epoch": 0.40650989389159736, "grad_norm": 0.4550785422325134, "learning_rate": 9.96173177793008e-06, "loss": 0.3354, "step": 2835 }, { "epoch": 0.40665328362489245, "grad_norm": 0.4415294826030731, "learning_rate": 9.9616286917998e-06, "loss": 0.363, "step": 2836 }, { "epoch": 0.40679667335818753, "grad_norm": 0.36519283056259155, "learning_rate": 9.961525467545412e-06, "loss": 0.3453, "step": 2837 }, { "epoch": 0.4069400630914827, "grad_norm": 0.36552169919013977, "learning_rate": 9.961422105169785e-06, "loss": 0.3413, "step": 2838 }, { "epoch": 0.40708345282477776, "grad_norm": 0.38284850120544434, "learning_rate": 9.961318604675801e-06, "loss": 0.3755, "step": 2839 }, { "epoch": 0.40722684255807284, "grad_norm": 0.3846365809440613, "learning_rate": 9.961214966066337e-06, "loss": 0.358, "step": 2840 }, { "epoch": 0.40737023229136793, "grad_norm": 0.368487685918808, "learning_rate": 9.961111189344282e-06, "loss": 0.3471, "step": 2841 }, { "epoch": 0.407513622024663, "grad_norm": 0.4248829782009125, "learning_rate": 9.961007274512522e-06, "loss": 0.3675, "step": 2842 }, { "epoch": 0.40765701175795815, "grad_norm": 0.36853331327438354, "learning_rate": 9.96090322157395e-06, "loss": 0.3636, "step": 2843 }, { "epoch": 0.40780040149125324, "grad_norm": 0.46622341871261597, "learning_rate": 9.960799030531465e-06, "loss": 0.3512, "step": 2844 }, { "epoch": 0.4079437912245483, "grad_norm": 0.4197642207145691, "learning_rate": 9.960694701387966e-06, "loss": 0.3562, "step": 2845 }, { "epoch": 0.4080871809578434, "grad_norm": 0.40089675784111023, "learning_rate": 9.960590234146358e-06, "loss": 0.3551, "step": 2846 }, { "epoch": 0.4082305706911385, "grad_norm": 0.37612760066986084, "learning_rate": 9.960485628809548e-06, "loss": 0.3443, "step": 2847 }, { "epoch": 0.40837396042443364, "grad_norm": 0.4190428555011749, "learning_rate": 9.960380885380449e-06, "loss": 0.3504, "step": 2848 }, { "epoch": 0.4085173501577287, "grad_norm": 0.36797547340393066, "learning_rate": 9.960276003861977e-06, "loss": 0.3638, "step": 2849 }, { "epoch": 0.4086607398910238, "grad_norm": 0.3924897611141205, "learning_rate": 9.96017098425705e-06, "loss": 0.3478, "step": 2850 }, { "epoch": 0.4088041296243189, "grad_norm": 0.42044851183891296, "learning_rate": 9.960065826568593e-06, "loss": 0.3725, "step": 2851 }, { "epoch": 0.408947519357614, "grad_norm": 0.380056768655777, "learning_rate": 9.959960530799536e-06, "loss": 0.3391, "step": 2852 }, { "epoch": 0.4090909090909091, "grad_norm": 0.39626583456993103, "learning_rate": 9.959855096952804e-06, "loss": 0.3504, "step": 2853 }, { "epoch": 0.4092342988242042, "grad_norm": 0.4163469672203064, "learning_rate": 9.959749525031337e-06, "loss": 0.3484, "step": 2854 }, { "epoch": 0.4093776885574993, "grad_norm": 0.3809990882873535, "learning_rate": 9.959643815038075e-06, "loss": 0.3417, "step": 2855 }, { "epoch": 0.40952107829079437, "grad_norm": 0.3923295736312866, "learning_rate": 9.959537966975956e-06, "loss": 0.3598, "step": 2856 }, { "epoch": 0.40966446802408946, "grad_norm": 0.3789995312690735, "learning_rate": 9.959431980847932e-06, "loss": 0.3453, "step": 2857 }, { "epoch": 0.4098078577573846, "grad_norm": 0.40105125308036804, "learning_rate": 9.959325856656947e-06, "loss": 0.3465, "step": 2858 }, { "epoch": 0.4099512474906797, "grad_norm": 0.3940887153148651, "learning_rate": 9.959219594405962e-06, "loss": 0.3458, "step": 2859 }, { "epoch": 0.41009463722397477, "grad_norm": 0.3837389349937439, "learning_rate": 9.959113194097932e-06, "loss": 0.3528, "step": 2860 }, { "epoch": 0.41023802695726985, "grad_norm": 0.39899301528930664, "learning_rate": 9.959006655735817e-06, "loss": 0.3676, "step": 2861 }, { "epoch": 0.41038141669056494, "grad_norm": 0.397100567817688, "learning_rate": 9.958899979322586e-06, "loss": 0.354, "step": 2862 }, { "epoch": 0.4105248064238601, "grad_norm": 0.3587320148944855, "learning_rate": 9.958793164861208e-06, "loss": 0.3646, "step": 2863 }, { "epoch": 0.41066819615715516, "grad_norm": 0.4102379083633423, "learning_rate": 9.958686212354657e-06, "loss": 0.3427, "step": 2864 }, { "epoch": 0.41081158589045025, "grad_norm": 0.42574262619018555, "learning_rate": 9.958579121805908e-06, "loss": 0.368, "step": 2865 }, { "epoch": 0.41095497562374533, "grad_norm": 0.365388959646225, "learning_rate": 9.958471893217948e-06, "loss": 0.3576, "step": 2866 }, { "epoch": 0.4110983653570404, "grad_norm": 0.3674774467945099, "learning_rate": 9.958364526593754e-06, "loss": 0.3748, "step": 2867 }, { "epoch": 0.41124175509033556, "grad_norm": 0.40178173780441284, "learning_rate": 9.95825702193632e-06, "loss": 0.3509, "step": 2868 }, { "epoch": 0.41138514482363064, "grad_norm": 0.3714757263660431, "learning_rate": 9.958149379248639e-06, "loss": 0.3758, "step": 2869 }, { "epoch": 0.41152853455692573, "grad_norm": 0.37537845969200134, "learning_rate": 9.958041598533705e-06, "loss": 0.3738, "step": 2870 }, { "epoch": 0.4116719242902208, "grad_norm": 0.36627739667892456, "learning_rate": 9.957933679794518e-06, "loss": 0.3649, "step": 2871 }, { "epoch": 0.4118153140235159, "grad_norm": 0.38759854435920715, "learning_rate": 9.957825623034088e-06, "loss": 0.36, "step": 2872 }, { "epoch": 0.41195870375681104, "grad_norm": 0.3943957984447479, "learning_rate": 9.957717428255417e-06, "loss": 0.3821, "step": 2873 }, { "epoch": 0.4121020934901061, "grad_norm": 0.3657185137271881, "learning_rate": 9.95760909546152e-06, "loss": 0.3561, "step": 2874 }, { "epoch": 0.4122454832234012, "grad_norm": 0.34055039286613464, "learning_rate": 9.957500624655412e-06, "loss": 0.367, "step": 2875 }, { "epoch": 0.4123888729566963, "grad_norm": 0.32751718163490295, "learning_rate": 9.957392015840111e-06, "loss": 0.3394, "step": 2876 }, { "epoch": 0.4125322626899914, "grad_norm": 0.32102835178375244, "learning_rate": 9.957283269018644e-06, "loss": 0.3691, "step": 2877 }, { "epoch": 0.41267565242328647, "grad_norm": 0.3614010512828827, "learning_rate": 9.957174384194035e-06, "loss": 0.3587, "step": 2878 }, { "epoch": 0.4128190421565816, "grad_norm": 0.35446399450302124, "learning_rate": 9.957065361369318e-06, "loss": 0.3574, "step": 2879 }, { "epoch": 0.4129624318898767, "grad_norm": 0.33859989047050476, "learning_rate": 9.956956200547528e-06, "loss": 0.3559, "step": 2880 }, { "epoch": 0.4131058216231718, "grad_norm": 0.3484876751899719, "learning_rate": 9.9568469017317e-06, "loss": 0.3545, "step": 2881 }, { "epoch": 0.41324921135646686, "grad_norm": 0.3587142527103424, "learning_rate": 9.95673746492488e-06, "loss": 0.3418, "step": 2882 }, { "epoch": 0.41339260108976195, "grad_norm": 0.33828407526016235, "learning_rate": 9.956627890130115e-06, "loss": 0.3369, "step": 2883 }, { "epoch": 0.4135359908230571, "grad_norm": 0.41199102997779846, "learning_rate": 9.956518177350454e-06, "loss": 0.3526, "step": 2884 }, { "epoch": 0.41367938055635217, "grad_norm": 0.371143102645874, "learning_rate": 9.95640832658895e-06, "loss": 0.3612, "step": 2885 }, { "epoch": 0.41382277028964726, "grad_norm": 0.3635749816894531, "learning_rate": 9.956298337848664e-06, "loss": 0.3597, "step": 2886 }, { "epoch": 0.41396616002294234, "grad_norm": 0.3839196562767029, "learning_rate": 9.956188211132656e-06, "loss": 0.3637, "step": 2887 }, { "epoch": 0.4141095497562374, "grad_norm": 0.3505268692970276, "learning_rate": 9.956077946443991e-06, "loss": 0.3319, "step": 2888 }, { "epoch": 0.41425293948953257, "grad_norm": 0.40797528624534607, "learning_rate": 9.955967543785742e-06, "loss": 0.377, "step": 2889 }, { "epoch": 0.41439632922282765, "grad_norm": 0.39011111855506897, "learning_rate": 9.955857003160981e-06, "loss": 0.3614, "step": 2890 }, { "epoch": 0.41453971895612274, "grad_norm": 0.4095868766307831, "learning_rate": 9.95574632457278e-06, "loss": 0.379, "step": 2891 }, { "epoch": 0.4146831086894178, "grad_norm": 0.3707793354988098, "learning_rate": 9.95563550802423e-06, "loss": 0.358, "step": 2892 }, { "epoch": 0.4148264984227129, "grad_norm": 0.38530802726745605, "learning_rate": 9.955524553518408e-06, "loss": 0.3676, "step": 2893 }, { "epoch": 0.41496988815600805, "grad_norm": 0.38012635707855225, "learning_rate": 9.955413461058405e-06, "loss": 0.3484, "step": 2894 }, { "epoch": 0.41511327788930313, "grad_norm": 0.3646348714828491, "learning_rate": 9.955302230647315e-06, "loss": 0.3422, "step": 2895 }, { "epoch": 0.4152566676225982, "grad_norm": 0.3656926453113556, "learning_rate": 9.955190862288234e-06, "loss": 0.3421, "step": 2896 }, { "epoch": 0.4154000573558933, "grad_norm": 0.36678531765937805, "learning_rate": 9.955079355984261e-06, "loss": 0.3343, "step": 2897 }, { "epoch": 0.4155434470891884, "grad_norm": 0.40248945355415344, "learning_rate": 9.9549677117385e-06, "loss": 0.3585, "step": 2898 }, { "epoch": 0.41568683682248353, "grad_norm": 0.3583963215351105, "learning_rate": 9.954855929554064e-06, "loss": 0.3393, "step": 2899 }, { "epoch": 0.4158302265557786, "grad_norm": 0.40074217319488525, "learning_rate": 9.954744009434058e-06, "loss": 0.35, "step": 2900 }, { "epoch": 0.4159736162890737, "grad_norm": 0.38661208748817444, "learning_rate": 9.954631951381602e-06, "loss": 0.3302, "step": 2901 }, { "epoch": 0.4161170060223688, "grad_norm": 0.3515360951423645, "learning_rate": 9.954519755399811e-06, "loss": 0.3575, "step": 2902 }, { "epoch": 0.41626039575566387, "grad_norm": 0.3754349946975708, "learning_rate": 9.954407421491814e-06, "loss": 0.3374, "step": 2903 }, { "epoch": 0.416403785488959, "grad_norm": 0.41187408566474915, "learning_rate": 9.954294949660737e-06, "loss": 0.3662, "step": 2904 }, { "epoch": 0.4165471752222541, "grad_norm": 0.3849106729030609, "learning_rate": 9.954182339909708e-06, "loss": 0.3486, "step": 2905 }, { "epoch": 0.4166905649555492, "grad_norm": 0.3731865882873535, "learning_rate": 9.954069592241863e-06, "loss": 0.3537, "step": 2906 }, { "epoch": 0.41683395468884427, "grad_norm": 0.36166825890541077, "learning_rate": 9.953956706660341e-06, "loss": 0.355, "step": 2907 }, { "epoch": 0.41697734442213935, "grad_norm": 0.36274537444114685, "learning_rate": 9.953843683168286e-06, "loss": 0.3627, "step": 2908 }, { "epoch": 0.4171207341554345, "grad_norm": 0.38406699895858765, "learning_rate": 9.953730521768844e-06, "loss": 0.3814, "step": 2909 }, { "epoch": 0.4172641238887296, "grad_norm": 0.36082983016967773, "learning_rate": 9.953617222465164e-06, "loss": 0.36, "step": 2910 }, { "epoch": 0.41740751362202466, "grad_norm": 0.38956189155578613, "learning_rate": 9.953503785260402e-06, "loss": 0.338, "step": 2911 }, { "epoch": 0.41755090335531975, "grad_norm": 0.41058412194252014, "learning_rate": 9.953390210157712e-06, "loss": 0.3542, "step": 2912 }, { "epoch": 0.41769429308861483, "grad_norm": 0.39828556776046753, "learning_rate": 9.953276497160258e-06, "loss": 0.3782, "step": 2913 }, { "epoch": 0.41783768282191, "grad_norm": 0.44208696484565735, "learning_rate": 9.953162646271208e-06, "loss": 0.3555, "step": 2914 }, { "epoch": 0.41798107255520506, "grad_norm": 0.39358365535736084, "learning_rate": 9.953048657493729e-06, "loss": 0.3344, "step": 2915 }, { "epoch": 0.41812446228850014, "grad_norm": 0.3769459128379822, "learning_rate": 9.952934530830993e-06, "loss": 0.364, "step": 2916 }, { "epoch": 0.41826785202179523, "grad_norm": 0.34390464425086975, "learning_rate": 9.952820266286181e-06, "loss": 0.3333, "step": 2917 }, { "epoch": 0.4184112417550903, "grad_norm": 0.3552175760269165, "learning_rate": 9.95270586386247e-06, "loss": 0.3408, "step": 2918 }, { "epoch": 0.41855463148838545, "grad_norm": 0.3945982754230499, "learning_rate": 9.952591323563046e-06, "loss": 0.3925, "step": 2919 }, { "epoch": 0.41869802122168054, "grad_norm": 0.359622985124588, "learning_rate": 9.952476645391099e-06, "loss": 0.3526, "step": 2920 }, { "epoch": 0.4188414109549756, "grad_norm": 0.3650292456150055, "learning_rate": 9.952361829349821e-06, "loss": 0.3677, "step": 2921 }, { "epoch": 0.4189848006882707, "grad_norm": 0.36369264125823975, "learning_rate": 9.952246875442405e-06, "loss": 0.339, "step": 2922 }, { "epoch": 0.4191281904215658, "grad_norm": 0.42902085185050964, "learning_rate": 9.95213178367206e-06, "loss": 0.3656, "step": 2923 }, { "epoch": 0.41927158015486093, "grad_norm": 0.3401362895965576, "learning_rate": 9.952016554041978e-06, "loss": 0.3626, "step": 2924 }, { "epoch": 0.419414969888156, "grad_norm": 0.36889952421188354, "learning_rate": 9.951901186555374e-06, "loss": 0.3587, "step": 2925 }, { "epoch": 0.4195583596214511, "grad_norm": 0.3595168888568878, "learning_rate": 9.95178568121546e-06, "loss": 0.3566, "step": 2926 }, { "epoch": 0.4197017493547462, "grad_norm": 0.3809199631214142, "learning_rate": 9.95167003802545e-06, "loss": 0.3467, "step": 2927 }, { "epoch": 0.4198451390880413, "grad_norm": 0.37730374932289124, "learning_rate": 9.951554256988562e-06, "loss": 0.3662, "step": 2928 }, { "epoch": 0.4199885288213364, "grad_norm": 0.3937790095806122, "learning_rate": 9.951438338108022e-06, "loss": 0.3463, "step": 2929 }, { "epoch": 0.4201319185546315, "grad_norm": 0.3862994313240051, "learning_rate": 9.951322281387053e-06, "loss": 0.3513, "step": 2930 }, { "epoch": 0.4202753082879266, "grad_norm": 0.3358701169490814, "learning_rate": 9.95120608682889e-06, "loss": 0.3554, "step": 2931 }, { "epoch": 0.42041869802122167, "grad_norm": 0.3465093970298767, "learning_rate": 9.951089754436766e-06, "loss": 0.3599, "step": 2932 }, { "epoch": 0.42056208775451676, "grad_norm": 0.3729744553565979, "learning_rate": 9.95097328421392e-06, "loss": 0.366, "step": 2933 }, { "epoch": 0.4207054774878119, "grad_norm": 0.33848679065704346, "learning_rate": 9.950856676163592e-06, "loss": 0.3551, "step": 2934 }, { "epoch": 0.420848867221107, "grad_norm": 0.36986997723579407, "learning_rate": 9.950739930289031e-06, "loss": 0.3476, "step": 2935 }, { "epoch": 0.42099225695440207, "grad_norm": 0.3717848062515259, "learning_rate": 9.950623046593486e-06, "loss": 0.3525, "step": 2936 }, { "epoch": 0.42113564668769715, "grad_norm": 0.3648192286491394, "learning_rate": 9.950506025080209e-06, "loss": 0.3615, "step": 2937 }, { "epoch": 0.42127903642099224, "grad_norm": 0.4215434491634369, "learning_rate": 9.950388865752461e-06, "loss": 0.3682, "step": 2938 }, { "epoch": 0.4214224261542874, "grad_norm": 0.36858993768692017, "learning_rate": 9.950271568613503e-06, "loss": 0.3535, "step": 2939 }, { "epoch": 0.42156581588758246, "grad_norm": 0.3296928107738495, "learning_rate": 9.9501541336666e-06, "loss": 0.3341, "step": 2940 }, { "epoch": 0.42170920562087755, "grad_norm": 0.437237024307251, "learning_rate": 9.950036560915018e-06, "loss": 0.3547, "step": 2941 }, { "epoch": 0.42185259535417263, "grad_norm": 0.37146085500717163, "learning_rate": 9.949918850362034e-06, "loss": 0.3741, "step": 2942 }, { "epoch": 0.4219959850874677, "grad_norm": 0.3871707320213318, "learning_rate": 9.949801002010924e-06, "loss": 0.3751, "step": 2943 }, { "epoch": 0.42213937482076286, "grad_norm": 0.41209515929222107, "learning_rate": 9.949683015864969e-06, "loss": 0.3721, "step": 2944 }, { "epoch": 0.42228276455405794, "grad_norm": 0.36889714002609253, "learning_rate": 9.94956489192745e-06, "loss": 0.3408, "step": 2945 }, { "epoch": 0.42242615428735303, "grad_norm": 0.40011027455329895, "learning_rate": 9.94944663020166e-06, "loss": 0.3725, "step": 2946 }, { "epoch": 0.4225695440206481, "grad_norm": 0.378462553024292, "learning_rate": 9.949328230690892e-06, "loss": 0.3285, "step": 2947 }, { "epoch": 0.4227129337539432, "grad_norm": 0.36438265442848206, "learning_rate": 9.949209693398436e-06, "loss": 0.3462, "step": 2948 }, { "epoch": 0.42285632348723834, "grad_norm": 0.37204304337501526, "learning_rate": 9.949091018327597e-06, "loss": 0.3513, "step": 2949 }, { "epoch": 0.4229997132205334, "grad_norm": 0.3894745409488678, "learning_rate": 9.948972205481676e-06, "loss": 0.3796, "step": 2950 }, { "epoch": 0.4231431029538285, "grad_norm": 0.39184027910232544, "learning_rate": 9.948853254863984e-06, "loss": 0.3508, "step": 2951 }, { "epoch": 0.4232864926871236, "grad_norm": 0.45290812849998474, "learning_rate": 9.948734166477827e-06, "loss": 0.3836, "step": 2952 }, { "epoch": 0.4234298824204187, "grad_norm": 0.3905196487903595, "learning_rate": 9.948614940326526e-06, "loss": 0.3407, "step": 2953 }, { "epoch": 0.4235732721537138, "grad_norm": 0.370288610458374, "learning_rate": 9.948495576413398e-06, "loss": 0.3725, "step": 2954 }, { "epoch": 0.4237166618870089, "grad_norm": 0.39428138732910156, "learning_rate": 9.948376074741763e-06, "loss": 0.3835, "step": 2955 }, { "epoch": 0.423860051620304, "grad_norm": 0.39450502395629883, "learning_rate": 9.948256435314952e-06, "loss": 0.346, "step": 2956 }, { "epoch": 0.4240034413535991, "grad_norm": 0.37958016991615295, "learning_rate": 9.948136658136291e-06, "loss": 0.3336, "step": 2957 }, { "epoch": 0.42414683108689416, "grad_norm": 0.38607344031333923, "learning_rate": 9.94801674320912e-06, "loss": 0.3521, "step": 2958 }, { "epoch": 0.4242902208201893, "grad_norm": 0.35886335372924805, "learning_rate": 9.947896690536772e-06, "loss": 0.3578, "step": 2959 }, { "epoch": 0.4244336105534844, "grad_norm": 0.36598506569862366, "learning_rate": 9.947776500122592e-06, "loss": 0.3392, "step": 2960 }, { "epoch": 0.42457700028677947, "grad_norm": 0.4026179313659668, "learning_rate": 9.947656171969927e-06, "loss": 0.3623, "step": 2961 }, { "epoch": 0.42472039002007456, "grad_norm": 0.4176468253135681, "learning_rate": 9.947535706082125e-06, "loss": 0.3627, "step": 2962 }, { "epoch": 0.42486377975336964, "grad_norm": 0.3937869668006897, "learning_rate": 9.947415102462538e-06, "loss": 0.3433, "step": 2963 }, { "epoch": 0.4250071694866648, "grad_norm": 0.3878801167011261, "learning_rate": 9.947294361114525e-06, "loss": 0.3528, "step": 2964 }, { "epoch": 0.42515055921995987, "grad_norm": 0.4085713326931, "learning_rate": 9.947173482041449e-06, "loss": 0.3751, "step": 2965 }, { "epoch": 0.42529394895325495, "grad_norm": 0.36983993649482727, "learning_rate": 9.947052465246672e-06, "loss": 0.34, "step": 2966 }, { "epoch": 0.42543733868655004, "grad_norm": 0.4082154333591461, "learning_rate": 9.946931310733565e-06, "loss": 0.3709, "step": 2967 }, { "epoch": 0.4255807284198451, "grad_norm": 0.4253208637237549, "learning_rate": 9.9468100185055e-06, "loss": 0.3553, "step": 2968 }, { "epoch": 0.4257241181531402, "grad_norm": 0.3493613004684448, "learning_rate": 9.946688588565854e-06, "loss": 0.3603, "step": 2969 }, { "epoch": 0.42586750788643535, "grad_norm": 0.39989984035491943, "learning_rate": 9.946567020918008e-06, "loss": 0.3499, "step": 2970 }, { "epoch": 0.42601089761973043, "grad_norm": 0.4061070382595062, "learning_rate": 9.946445315565343e-06, "loss": 0.3492, "step": 2971 }, { "epoch": 0.4261542873530255, "grad_norm": 0.41492921113967896, "learning_rate": 9.946323472511253e-06, "loss": 0.3465, "step": 2972 }, { "epoch": 0.4262976770863206, "grad_norm": 0.35145288705825806, "learning_rate": 9.946201491759122e-06, "loss": 0.3447, "step": 2973 }, { "epoch": 0.4264410668196157, "grad_norm": 0.38397371768951416, "learning_rate": 9.946079373312354e-06, "loss": 0.3578, "step": 2974 }, { "epoch": 0.42658445655291083, "grad_norm": 0.3696938753128052, "learning_rate": 9.945957117174343e-06, "loss": 0.3393, "step": 2975 }, { "epoch": 0.4267278462862059, "grad_norm": 0.3463876247406006, "learning_rate": 9.945834723348493e-06, "loss": 0.346, "step": 2976 }, { "epoch": 0.426871236019501, "grad_norm": 0.4039175510406494, "learning_rate": 9.945712191838216e-06, "loss": 0.3529, "step": 2977 }, { "epoch": 0.4270146257527961, "grad_norm": 0.34653425216674805, "learning_rate": 9.945589522646916e-06, "loss": 0.3473, "step": 2978 }, { "epoch": 0.42715801548609117, "grad_norm": 0.37959739565849304, "learning_rate": 9.945466715778011e-06, "loss": 0.3465, "step": 2979 }, { "epoch": 0.4273014052193863, "grad_norm": 0.3686051368713379, "learning_rate": 9.945343771234924e-06, "loss": 0.3295, "step": 2980 }, { "epoch": 0.4274447949526814, "grad_norm": 0.41340577602386475, "learning_rate": 9.945220689021071e-06, "loss": 0.3611, "step": 2981 }, { "epoch": 0.4275881846859765, "grad_norm": 0.4484997093677521, "learning_rate": 9.945097469139881e-06, "loss": 0.3667, "step": 2982 }, { "epoch": 0.42773157441927157, "grad_norm": 0.3650946021080017, "learning_rate": 9.944974111594784e-06, "loss": 0.3534, "step": 2983 }, { "epoch": 0.42787496415256665, "grad_norm": 0.4114135801792145, "learning_rate": 9.944850616389217e-06, "loss": 0.3611, "step": 2984 }, { "epoch": 0.4280183538858618, "grad_norm": 0.3948696553707123, "learning_rate": 9.944726983526614e-06, "loss": 0.3784, "step": 2985 }, { "epoch": 0.4281617436191569, "grad_norm": 0.4157170057296753, "learning_rate": 9.944603213010419e-06, "loss": 0.356, "step": 2986 }, { "epoch": 0.42830513335245196, "grad_norm": 0.41064590215682983, "learning_rate": 9.944479304844077e-06, "loss": 0.3682, "step": 2987 }, { "epoch": 0.42844852308574705, "grad_norm": 0.4145870804786682, "learning_rate": 9.944355259031035e-06, "loss": 0.3495, "step": 2988 }, { "epoch": 0.42859191281904213, "grad_norm": 0.3678159713745117, "learning_rate": 9.944231075574751e-06, "loss": 0.3485, "step": 2989 }, { "epoch": 0.42873530255233727, "grad_norm": 0.47711801528930664, "learning_rate": 9.944106754478677e-06, "loss": 0.3402, "step": 2990 }, { "epoch": 0.42887869228563236, "grad_norm": 0.48389536142349243, "learning_rate": 9.943982295746277e-06, "loss": 0.3515, "step": 2991 }, { "epoch": 0.42902208201892744, "grad_norm": 0.43861377239227295, "learning_rate": 9.943857699381017e-06, "loss": 0.3588, "step": 2992 }, { "epoch": 0.4291654717522225, "grad_norm": 0.4294329583644867, "learning_rate": 9.943732965386362e-06, "loss": 0.3554, "step": 2993 }, { "epoch": 0.4293088614855176, "grad_norm": 0.4008924067020416, "learning_rate": 9.943608093765788e-06, "loss": 0.3471, "step": 2994 }, { "epoch": 0.42945225121881275, "grad_norm": 0.4644307494163513, "learning_rate": 9.943483084522767e-06, "loss": 0.3582, "step": 2995 }, { "epoch": 0.42959564095210784, "grad_norm": 0.3804514408111572, "learning_rate": 9.943357937660783e-06, "loss": 0.3368, "step": 2996 }, { "epoch": 0.4297390306854029, "grad_norm": 0.4164603650569916, "learning_rate": 9.943232653183317e-06, "loss": 0.3707, "step": 2997 }, { "epoch": 0.429882420418698, "grad_norm": 0.4105282425880432, "learning_rate": 9.94310723109386e-06, "loss": 0.3601, "step": 2998 }, { "epoch": 0.4300258101519931, "grad_norm": 0.36669862270355225, "learning_rate": 9.9429816713959e-06, "loss": 0.3617, "step": 2999 }, { "epoch": 0.43016919988528823, "grad_norm": 0.3399026095867157, "learning_rate": 9.942855974092935e-06, "loss": 0.3561, "step": 3000 }, { "epoch": 0.4303125896185833, "grad_norm": 0.3795095682144165, "learning_rate": 9.942730139188463e-06, "loss": 0.3425, "step": 3001 }, { "epoch": 0.4304559793518784, "grad_norm": 0.3892856538295746, "learning_rate": 9.942604166685987e-06, "loss": 0.3647, "step": 3002 }, { "epoch": 0.4305993690851735, "grad_norm": 0.380682110786438, "learning_rate": 9.942478056589014e-06, "loss": 0.3534, "step": 3003 }, { "epoch": 0.4307427588184686, "grad_norm": 0.3677002489566803, "learning_rate": 9.942351808901055e-06, "loss": 0.382, "step": 3004 }, { "epoch": 0.4308861485517637, "grad_norm": 0.42076918482780457, "learning_rate": 9.942225423625625e-06, "loss": 0.3433, "step": 3005 }, { "epoch": 0.4310295382850588, "grad_norm": 0.37278780341148376, "learning_rate": 9.94209890076624e-06, "loss": 0.3596, "step": 3006 }, { "epoch": 0.4311729280183539, "grad_norm": 0.3871888518333435, "learning_rate": 9.941972240326424e-06, "loss": 0.3449, "step": 3007 }, { "epoch": 0.43131631775164897, "grad_norm": 0.43226730823516846, "learning_rate": 9.941845442309705e-06, "loss": 0.351, "step": 3008 }, { "epoch": 0.43145970748494406, "grad_norm": 0.3709476888179779, "learning_rate": 9.941718506719611e-06, "loss": 0.3507, "step": 3009 }, { "epoch": 0.4316030972182392, "grad_norm": 0.46870332956314087, "learning_rate": 9.941591433559674e-06, "loss": 0.3585, "step": 3010 }, { "epoch": 0.4317464869515343, "grad_norm": 0.4008190333843231, "learning_rate": 9.941464222833434e-06, "loss": 0.363, "step": 3011 }, { "epoch": 0.43188987668482937, "grad_norm": 0.37937131524086, "learning_rate": 9.941336874544431e-06, "loss": 0.3681, "step": 3012 }, { "epoch": 0.43203326641812445, "grad_norm": 0.4274957478046417, "learning_rate": 9.941209388696211e-06, "loss": 0.3469, "step": 3013 }, { "epoch": 0.43217665615141954, "grad_norm": 0.36696702241897583, "learning_rate": 9.941081765292321e-06, "loss": 0.3385, "step": 3014 }, { "epoch": 0.4323200458847147, "grad_norm": 0.3669831454753876, "learning_rate": 9.940954004336318e-06, "loss": 0.3546, "step": 3015 }, { "epoch": 0.43246343561800976, "grad_norm": 0.36699244379997253, "learning_rate": 9.940826105831755e-06, "loss": 0.3436, "step": 3016 }, { "epoch": 0.43260682535130485, "grad_norm": 0.34978023171424866, "learning_rate": 9.940698069782193e-06, "loss": 0.3501, "step": 3017 }, { "epoch": 0.43275021508459993, "grad_norm": 0.38581666350364685, "learning_rate": 9.940569896191198e-06, "loss": 0.355, "step": 3018 }, { "epoch": 0.432893604817895, "grad_norm": 0.3704851567745209, "learning_rate": 9.940441585062338e-06, "loss": 0.3513, "step": 3019 }, { "epoch": 0.43303699455119016, "grad_norm": 0.37513354420661926, "learning_rate": 9.940313136399182e-06, "loss": 0.3583, "step": 3020 }, { "epoch": 0.43318038428448524, "grad_norm": 0.3453024923801422, "learning_rate": 9.940184550205307e-06, "loss": 0.3548, "step": 3021 }, { "epoch": 0.4333237740177803, "grad_norm": 0.35018354654312134, "learning_rate": 9.940055826484296e-06, "loss": 0.349, "step": 3022 }, { "epoch": 0.4334671637510754, "grad_norm": 0.34894922375679016, "learning_rate": 9.93992696523973e-06, "loss": 0.3492, "step": 3023 }, { "epoch": 0.4336105534843705, "grad_norm": 0.3767106235027313, "learning_rate": 9.939797966475195e-06, "loss": 0.3243, "step": 3024 }, { "epoch": 0.43375394321766564, "grad_norm": 0.4008391797542572, "learning_rate": 9.939668830194283e-06, "loss": 0.3961, "step": 3025 }, { "epoch": 0.4338973329509607, "grad_norm": 0.35213422775268555, "learning_rate": 9.93953955640059e-06, "loss": 0.3468, "step": 3026 }, { "epoch": 0.4340407226842558, "grad_norm": 0.34743309020996094, "learning_rate": 9.939410145097715e-06, "loss": 0.3452, "step": 3027 }, { "epoch": 0.4341841124175509, "grad_norm": 0.35107237100601196, "learning_rate": 9.93928059628926e-06, "loss": 0.3277, "step": 3028 }, { "epoch": 0.434327502150846, "grad_norm": 0.3587780296802521, "learning_rate": 9.93915090997883e-06, "loss": 0.3406, "step": 3029 }, { "epoch": 0.4344708918841411, "grad_norm": 0.36109089851379395, "learning_rate": 9.939021086170037e-06, "loss": 0.3592, "step": 3030 }, { "epoch": 0.4346142816174362, "grad_norm": 0.3452739417552948, "learning_rate": 9.938891124866495e-06, "loss": 0.3552, "step": 3031 }, { "epoch": 0.4347576713507313, "grad_norm": 0.3698110580444336, "learning_rate": 9.938761026071822e-06, "loss": 0.3513, "step": 3032 }, { "epoch": 0.4349010610840264, "grad_norm": 0.32984423637390137, "learning_rate": 9.938630789789638e-06, "loss": 0.3262, "step": 3033 }, { "epoch": 0.43504445081732146, "grad_norm": 0.43071529269218445, "learning_rate": 9.93850041602357e-06, "loss": 0.3692, "step": 3034 }, { "epoch": 0.4351878405506166, "grad_norm": 0.37899717688560486, "learning_rate": 9.938369904777248e-06, "loss": 0.3716, "step": 3035 }, { "epoch": 0.4353312302839117, "grad_norm": 0.3775688707828522, "learning_rate": 9.938239256054303e-06, "loss": 0.3439, "step": 3036 }, { "epoch": 0.43547462001720677, "grad_norm": 0.42589235305786133, "learning_rate": 9.938108469858375e-06, "loss": 0.3611, "step": 3037 }, { "epoch": 0.43561800975050186, "grad_norm": 0.3607954978942871, "learning_rate": 9.937977546193103e-06, "loss": 0.3616, "step": 3038 }, { "epoch": 0.43576139948379694, "grad_norm": 0.43386781215667725, "learning_rate": 9.937846485062132e-06, "loss": 0.3752, "step": 3039 }, { "epoch": 0.4359047892170921, "grad_norm": 0.38314884901046753, "learning_rate": 9.937715286469112e-06, "loss": 0.3657, "step": 3040 }, { "epoch": 0.43604817895038717, "grad_norm": 0.4222504496574402, "learning_rate": 9.937583950417692e-06, "loss": 0.3354, "step": 3041 }, { "epoch": 0.43619156868368225, "grad_norm": 0.421411395072937, "learning_rate": 9.937452476911531e-06, "loss": 0.3844, "step": 3042 }, { "epoch": 0.43633495841697734, "grad_norm": 0.3707369863986969, "learning_rate": 9.93732086595429e-06, "loss": 0.3564, "step": 3043 }, { "epoch": 0.4364783481502724, "grad_norm": 0.43110936880111694, "learning_rate": 9.937189117549628e-06, "loss": 0.3503, "step": 3044 }, { "epoch": 0.43662173788356756, "grad_norm": 0.38553544878959656, "learning_rate": 9.937057231701218e-06, "loss": 0.3481, "step": 3045 }, { "epoch": 0.43676512761686265, "grad_norm": 0.38469618558883667, "learning_rate": 9.936925208412729e-06, "loss": 0.357, "step": 3046 }, { "epoch": 0.43690851735015773, "grad_norm": 0.39531177282333374, "learning_rate": 9.936793047687837e-06, "loss": 0.3453, "step": 3047 }, { "epoch": 0.4370519070834528, "grad_norm": 0.3438965976238251, "learning_rate": 9.936660749530219e-06, "loss": 0.3707, "step": 3048 }, { "epoch": 0.4371952968167479, "grad_norm": 0.3614966869354248, "learning_rate": 9.93652831394356e-06, "loss": 0.3805, "step": 3049 }, { "epoch": 0.43733868655004304, "grad_norm": 0.3994775414466858, "learning_rate": 9.936395740931549e-06, "loss": 0.3706, "step": 3050 }, { "epoch": 0.43748207628333813, "grad_norm": 0.38211050629615784, "learning_rate": 9.936263030497872e-06, "loss": 0.3492, "step": 3051 }, { "epoch": 0.4376254660166332, "grad_norm": 0.36502575874328613, "learning_rate": 9.936130182646226e-06, "loss": 0.3462, "step": 3052 }, { "epoch": 0.4377688557499283, "grad_norm": 0.369245320558548, "learning_rate": 9.93599719738031e-06, "loss": 0.3637, "step": 3053 }, { "epoch": 0.4379122454832234, "grad_norm": 0.3780440390110016, "learning_rate": 9.935864074703822e-06, "loss": 0.3349, "step": 3054 }, { "epoch": 0.4380556352165185, "grad_norm": 0.3572370707988739, "learning_rate": 9.935730814620475e-06, "loss": 0.3371, "step": 3055 }, { "epoch": 0.4381990249498136, "grad_norm": 0.3481077253818512, "learning_rate": 9.93559741713397e-06, "loss": 0.3765, "step": 3056 }, { "epoch": 0.4383424146831087, "grad_norm": 0.3468896150588989, "learning_rate": 9.935463882248029e-06, "loss": 0.3445, "step": 3057 }, { "epoch": 0.4384858044164038, "grad_norm": 0.37402161955833435, "learning_rate": 9.935330209966366e-06, "loss": 0.3328, "step": 3058 }, { "epoch": 0.43862919414969886, "grad_norm": 0.3833564817905426, "learning_rate": 9.935196400292699e-06, "loss": 0.3629, "step": 3059 }, { "epoch": 0.438772583882994, "grad_norm": 0.4122837781906128, "learning_rate": 9.935062453230759e-06, "loss": 0.3515, "step": 3060 }, { "epoch": 0.4389159736162891, "grad_norm": 0.3615701496601105, "learning_rate": 9.93492836878427e-06, "loss": 0.3438, "step": 3061 }, { "epoch": 0.4390593633495842, "grad_norm": 0.38395583629608154, "learning_rate": 9.934794146956966e-06, "loss": 0.3429, "step": 3062 }, { "epoch": 0.43920275308287926, "grad_norm": 0.38241884112358093, "learning_rate": 9.934659787752585e-06, "loss": 0.3515, "step": 3063 }, { "epoch": 0.43934614281617435, "grad_norm": 0.35514989495277405, "learning_rate": 9.934525291174866e-06, "loss": 0.346, "step": 3064 }, { "epoch": 0.43948953254946943, "grad_norm": 0.3316960334777832, "learning_rate": 9.934390657227555e-06, "loss": 0.3577, "step": 3065 }, { "epoch": 0.43963292228276457, "grad_norm": 0.3681429326534271, "learning_rate": 9.934255885914398e-06, "loss": 0.3421, "step": 3066 }, { "epoch": 0.43977631201605966, "grad_norm": 0.3812657296657562, "learning_rate": 9.934120977239147e-06, "loss": 0.3652, "step": 3067 }, { "epoch": 0.43991970174935474, "grad_norm": 0.3811974823474884, "learning_rate": 9.933985931205558e-06, "loss": 0.3606, "step": 3068 }, { "epoch": 0.4400630914826498, "grad_norm": 0.3527679741382599, "learning_rate": 9.933850747817391e-06, "loss": 0.354, "step": 3069 }, { "epoch": 0.4402064812159449, "grad_norm": 0.3689691126346588, "learning_rate": 9.933715427078409e-06, "loss": 0.3521, "step": 3070 }, { "epoch": 0.44034987094924005, "grad_norm": 0.35760506987571716, "learning_rate": 9.933579968992378e-06, "loss": 0.3377, "step": 3071 }, { "epoch": 0.44049326068253514, "grad_norm": 0.36649253964424133, "learning_rate": 9.93344437356307e-06, "loss": 0.3596, "step": 3072 }, { "epoch": 0.4406366504158302, "grad_norm": 0.34920644760131836, "learning_rate": 9.933308640794262e-06, "loss": 0.3527, "step": 3073 }, { "epoch": 0.4407800401491253, "grad_norm": 0.34509435296058655, "learning_rate": 9.933172770689727e-06, "loss": 0.3404, "step": 3074 }, { "epoch": 0.4409234298824204, "grad_norm": 0.39380672574043274, "learning_rate": 9.933036763253252e-06, "loss": 0.3718, "step": 3075 }, { "epoch": 0.44106681961571553, "grad_norm": 0.3812171518802643, "learning_rate": 9.932900618488624e-06, "loss": 0.3635, "step": 3076 }, { "epoch": 0.4412102093490106, "grad_norm": 0.36899369955062866, "learning_rate": 9.932764336399629e-06, "loss": 0.3624, "step": 3077 }, { "epoch": 0.4413535990823057, "grad_norm": 0.3302898406982422, "learning_rate": 9.932627916990063e-06, "loss": 0.3561, "step": 3078 }, { "epoch": 0.4414969888156008, "grad_norm": 0.3811708092689514, "learning_rate": 9.932491360263723e-06, "loss": 0.3862, "step": 3079 }, { "epoch": 0.4416403785488959, "grad_norm": 0.35698553919792175, "learning_rate": 9.932354666224413e-06, "loss": 0.3406, "step": 3080 }, { "epoch": 0.441783768282191, "grad_norm": 0.3778499364852905, "learning_rate": 9.932217834875935e-06, "loss": 0.3626, "step": 3081 }, { "epoch": 0.4419271580154861, "grad_norm": 0.3558368682861328, "learning_rate": 9.932080866222099e-06, "loss": 0.3654, "step": 3082 }, { "epoch": 0.4420705477487812, "grad_norm": 0.3691546618938446, "learning_rate": 9.93194376026672e-06, "loss": 0.3642, "step": 3083 }, { "epoch": 0.44221393748207627, "grad_norm": 0.4202536940574646, "learning_rate": 9.931806517013612e-06, "loss": 0.3547, "step": 3084 }, { "epoch": 0.44235732721537135, "grad_norm": 0.34372881054878235, "learning_rate": 9.9316691364666e-06, "loss": 0.3634, "step": 3085 }, { "epoch": 0.4425007169486665, "grad_norm": 0.393647164106369, "learning_rate": 9.931531618629501e-06, "loss": 0.3555, "step": 3086 }, { "epoch": 0.4426441066819616, "grad_norm": 0.45287176966667175, "learning_rate": 9.931393963506153e-06, "loss": 0.3499, "step": 3087 }, { "epoch": 0.44278749641525667, "grad_norm": 0.4000053107738495, "learning_rate": 9.93125617110038e-06, "loss": 0.3565, "step": 3088 }, { "epoch": 0.44293088614855175, "grad_norm": 0.3889431655406952, "learning_rate": 9.931118241416021e-06, "loss": 0.3434, "step": 3089 }, { "epoch": 0.44307427588184684, "grad_norm": 0.406677782535553, "learning_rate": 9.930980174456917e-06, "loss": 0.3607, "step": 3090 }, { "epoch": 0.443217665615142, "grad_norm": 0.4114449620246887, "learning_rate": 9.930841970226909e-06, "loss": 0.3681, "step": 3091 }, { "epoch": 0.44336105534843706, "grad_norm": 0.3814548850059509, "learning_rate": 9.930703628729846e-06, "loss": 0.365, "step": 3092 }, { "epoch": 0.44350444508173215, "grad_norm": 0.4129347503185272, "learning_rate": 9.930565149969577e-06, "loss": 0.3568, "step": 3093 }, { "epoch": 0.44364783481502723, "grad_norm": 0.39744579792022705, "learning_rate": 9.930426533949961e-06, "loss": 0.3578, "step": 3094 }, { "epoch": 0.4437912245483223, "grad_norm": 0.38688766956329346, "learning_rate": 9.930287780674855e-06, "loss": 0.355, "step": 3095 }, { "epoch": 0.44393461428161746, "grad_norm": 0.38113319873809814, "learning_rate": 9.930148890148122e-06, "loss": 0.3533, "step": 3096 }, { "epoch": 0.44407800401491254, "grad_norm": 0.37972211837768555, "learning_rate": 9.930009862373625e-06, "loss": 0.3528, "step": 3097 }, { "epoch": 0.4442213937482076, "grad_norm": 0.35602566599845886, "learning_rate": 9.92987069735524e-06, "loss": 0.3391, "step": 3098 }, { "epoch": 0.4443647834815027, "grad_norm": 0.3874330520629883, "learning_rate": 9.929731395096835e-06, "loss": 0.357, "step": 3099 }, { "epoch": 0.4445081732147978, "grad_norm": 0.4062826335430145, "learning_rate": 9.929591955602294e-06, "loss": 0.3519, "step": 3100 }, { "epoch": 0.44465156294809294, "grad_norm": 0.40071961283683777, "learning_rate": 9.929452378875495e-06, "loss": 0.3404, "step": 3101 }, { "epoch": 0.444794952681388, "grad_norm": 0.38011816143989563, "learning_rate": 9.929312664920326e-06, "loss": 0.3724, "step": 3102 }, { "epoch": 0.4449383424146831, "grad_norm": 0.3732149302959442, "learning_rate": 9.929172813740675e-06, "loss": 0.3604, "step": 3103 }, { "epoch": 0.4450817321479782, "grad_norm": 0.46643638610839844, "learning_rate": 9.929032825340434e-06, "loss": 0.3698, "step": 3104 }, { "epoch": 0.4452251218812733, "grad_norm": 0.45263975858688354, "learning_rate": 9.928892699723503e-06, "loss": 0.3844, "step": 3105 }, { "epoch": 0.4453685116145684, "grad_norm": 0.3914066255092621, "learning_rate": 9.928752436893781e-06, "loss": 0.3574, "step": 3106 }, { "epoch": 0.4455119013478635, "grad_norm": 0.4014909565448761, "learning_rate": 9.928612036855172e-06, "loss": 0.3579, "step": 3107 }, { "epoch": 0.4456552910811586, "grad_norm": 0.45483148097991943, "learning_rate": 9.928471499611585e-06, "loss": 0.3606, "step": 3108 }, { "epoch": 0.4457986808144537, "grad_norm": 0.38576385378837585, "learning_rate": 9.928330825166934e-06, "loss": 0.3515, "step": 3109 }, { "epoch": 0.44594207054774876, "grad_norm": 0.37669119238853455, "learning_rate": 9.928190013525134e-06, "loss": 0.3521, "step": 3110 }, { "epoch": 0.4460854602810439, "grad_norm": 0.3679998815059662, "learning_rate": 9.928049064690105e-06, "loss": 0.358, "step": 3111 }, { "epoch": 0.446228850014339, "grad_norm": 0.36906763911247253, "learning_rate": 9.92790797866577e-06, "loss": 0.3569, "step": 3112 }, { "epoch": 0.44637223974763407, "grad_norm": 0.34419021010398865, "learning_rate": 9.927766755456058e-06, "loss": 0.3456, "step": 3113 }, { "epoch": 0.44651562948092915, "grad_norm": 0.37075600028038025, "learning_rate": 9.9276253950649e-06, "loss": 0.3595, "step": 3114 }, { "epoch": 0.44665901921422424, "grad_norm": 0.36267560720443726, "learning_rate": 9.92748389749623e-06, "loss": 0.3279, "step": 3115 }, { "epoch": 0.4468024089475194, "grad_norm": 0.36748841404914856, "learning_rate": 9.92734226275399e-06, "loss": 0.3426, "step": 3116 }, { "epoch": 0.44694579868081447, "grad_norm": 0.40260010957717896, "learning_rate": 9.927200490842119e-06, "loss": 0.352, "step": 3117 }, { "epoch": 0.44708918841410955, "grad_norm": 0.41309112310409546, "learning_rate": 9.927058581764568e-06, "loss": 0.3516, "step": 3118 }, { "epoch": 0.44723257814740464, "grad_norm": 0.39142677187919617, "learning_rate": 9.926916535525283e-06, "loss": 0.3321, "step": 3119 }, { "epoch": 0.4473759678806997, "grad_norm": 0.3802240788936615, "learning_rate": 9.926774352128224e-06, "loss": 0.3423, "step": 3120 }, { "epoch": 0.44751935761399486, "grad_norm": 0.3524548411369324, "learning_rate": 9.926632031577344e-06, "loss": 0.3531, "step": 3121 }, { "epoch": 0.44766274734728995, "grad_norm": 0.37064129114151, "learning_rate": 9.926489573876606e-06, "loss": 0.3451, "step": 3122 }, { "epoch": 0.44780613708058503, "grad_norm": 0.3814503252506256, "learning_rate": 9.926346979029977e-06, "loss": 0.3554, "step": 3123 }, { "epoch": 0.4479495268138801, "grad_norm": 0.36412349343299866, "learning_rate": 9.926204247041427e-06, "loss": 0.345, "step": 3124 }, { "epoch": 0.4480929165471752, "grad_norm": 0.34943756461143494, "learning_rate": 9.926061377914928e-06, "loss": 0.35, "step": 3125 }, { "epoch": 0.44823630628047034, "grad_norm": 0.3669827878475189, "learning_rate": 9.925918371654457e-06, "loss": 0.3565, "step": 3126 }, { "epoch": 0.4483796960137654, "grad_norm": 0.39739376306533813, "learning_rate": 9.925775228264e-06, "loss": 0.3303, "step": 3127 }, { "epoch": 0.4485230857470605, "grad_norm": 0.39450928568840027, "learning_rate": 9.925631947747533e-06, "loss": 0.3343, "step": 3128 }, { "epoch": 0.4486664754803556, "grad_norm": 0.4599456489086151, "learning_rate": 9.925488530109052e-06, "loss": 0.3789, "step": 3129 }, { "epoch": 0.4488098652136507, "grad_norm": 0.4506070613861084, "learning_rate": 9.925344975352547e-06, "loss": 0.3507, "step": 3130 }, { "epoch": 0.4489532549469458, "grad_norm": 0.42716866731643677, "learning_rate": 9.925201283482014e-06, "loss": 0.3665, "step": 3131 }, { "epoch": 0.4490966446802409, "grad_norm": 0.4278530180454254, "learning_rate": 9.925057454501454e-06, "loss": 0.3642, "step": 3132 }, { "epoch": 0.449240034413536, "grad_norm": 0.42307525873184204, "learning_rate": 9.924913488414872e-06, "loss": 0.3584, "step": 3133 }, { "epoch": 0.4493834241468311, "grad_norm": 0.40157389640808105, "learning_rate": 9.924769385226272e-06, "loss": 0.3606, "step": 3134 }, { "epoch": 0.44952681388012616, "grad_norm": 0.38942232728004456, "learning_rate": 9.92462514493967e-06, "loss": 0.3492, "step": 3135 }, { "epoch": 0.4496702036134213, "grad_norm": 0.37995538115501404, "learning_rate": 9.924480767559076e-06, "loss": 0.3443, "step": 3136 }, { "epoch": 0.4498135933467164, "grad_norm": 0.3986561596393585, "learning_rate": 9.924336253088518e-06, "loss": 0.3524, "step": 3137 }, { "epoch": 0.4499569830800115, "grad_norm": 0.4268074333667755, "learning_rate": 9.924191601532009e-06, "loss": 0.3421, "step": 3138 }, { "epoch": 0.45010037281330656, "grad_norm": 0.39757460355758667, "learning_rate": 9.924046812893584e-06, "loss": 0.354, "step": 3139 }, { "epoch": 0.45024376254660164, "grad_norm": 0.34421268105506897, "learning_rate": 9.923901887177267e-06, "loss": 0.3486, "step": 3140 }, { "epoch": 0.4503871522798968, "grad_norm": 0.38799503445625305, "learning_rate": 9.923756824387099e-06, "loss": 0.3436, "step": 3141 }, { "epoch": 0.45053054201319187, "grad_norm": 0.3870716094970703, "learning_rate": 9.923611624527114e-06, "loss": 0.3277, "step": 3142 }, { "epoch": 0.45067393174648696, "grad_norm": 0.37998420000076294, "learning_rate": 9.923466287601356e-06, "loss": 0.3815, "step": 3143 }, { "epoch": 0.45081732147978204, "grad_norm": 0.37845367193222046, "learning_rate": 9.923320813613869e-06, "loss": 0.3612, "step": 3144 }, { "epoch": 0.4509607112130771, "grad_norm": 0.40412434935569763, "learning_rate": 9.923175202568705e-06, "loss": 0.3529, "step": 3145 }, { "epoch": 0.45110410094637227, "grad_norm": 0.37952253222465515, "learning_rate": 9.923029454469916e-06, "loss": 0.3612, "step": 3146 }, { "epoch": 0.45124749067966735, "grad_norm": 0.3956694006919861, "learning_rate": 9.92288356932156e-06, "loss": 0.361, "step": 3147 }, { "epoch": 0.45139088041296244, "grad_norm": 0.3464081585407257, "learning_rate": 9.922737547127699e-06, "loss": 0.3558, "step": 3148 }, { "epoch": 0.4515342701462575, "grad_norm": 0.33867621421813965, "learning_rate": 9.922591387892395e-06, "loss": 0.3374, "step": 3149 }, { "epoch": 0.4516776598795526, "grad_norm": 0.3346458077430725, "learning_rate": 9.922445091619722e-06, "loss": 0.3461, "step": 3150 }, { "epoch": 0.45182104961284775, "grad_norm": 0.3318144977092743, "learning_rate": 9.922298658313748e-06, "loss": 0.3445, "step": 3151 }, { "epoch": 0.45196443934614283, "grad_norm": 0.44265109300613403, "learning_rate": 9.922152087978551e-06, "loss": 0.3553, "step": 3152 }, { "epoch": 0.4521078290794379, "grad_norm": 0.3420930504798889, "learning_rate": 9.922005380618213e-06, "loss": 0.3698, "step": 3153 }, { "epoch": 0.452251218812733, "grad_norm": 0.34623199701309204, "learning_rate": 9.921858536236817e-06, "loss": 0.3349, "step": 3154 }, { "epoch": 0.4523946085460281, "grad_norm": 0.45285797119140625, "learning_rate": 9.92171155483845e-06, "loss": 0.3472, "step": 3155 }, { "epoch": 0.45253799827932323, "grad_norm": 0.39655452966690063, "learning_rate": 9.921564436427204e-06, "loss": 0.3646, "step": 3156 }, { "epoch": 0.4526813880126183, "grad_norm": 0.3862672448158264, "learning_rate": 9.921417181007174e-06, "loss": 0.3457, "step": 3157 }, { "epoch": 0.4528247777459134, "grad_norm": 0.3894844055175781, "learning_rate": 9.92126978858246e-06, "loss": 0.3586, "step": 3158 }, { "epoch": 0.4529681674792085, "grad_norm": 0.39435505867004395, "learning_rate": 9.921122259157168e-06, "loss": 0.3634, "step": 3159 }, { "epoch": 0.45311155721250357, "grad_norm": 0.34576481580734253, "learning_rate": 9.920974592735402e-06, "loss": 0.3486, "step": 3160 }, { "epoch": 0.45325494694579865, "grad_norm": 0.3263517916202545, "learning_rate": 9.920826789321272e-06, "loss": 0.3314, "step": 3161 }, { "epoch": 0.4533983366790938, "grad_norm": 0.3591746687889099, "learning_rate": 9.920678848918894e-06, "loss": 0.334, "step": 3162 }, { "epoch": 0.4535417264123889, "grad_norm": 0.36083632707595825, "learning_rate": 9.920530771532386e-06, "loss": 0.3568, "step": 3163 }, { "epoch": 0.45368511614568396, "grad_norm": 0.3307103216648102, "learning_rate": 9.92038255716587e-06, "loss": 0.3463, "step": 3164 }, { "epoch": 0.45382850587897905, "grad_norm": 0.36503660678863525, "learning_rate": 9.920234205823473e-06, "loss": 0.3465, "step": 3165 }, { "epoch": 0.45397189561227413, "grad_norm": 0.3600810170173645, "learning_rate": 9.920085717509327e-06, "loss": 0.353, "step": 3166 }, { "epoch": 0.4541152853455693, "grad_norm": 0.3583345413208008, "learning_rate": 9.91993709222756e-06, "loss": 0.3542, "step": 3167 }, { "epoch": 0.45425867507886436, "grad_norm": 0.3814550042152405, "learning_rate": 9.919788329982313e-06, "loss": 0.3629, "step": 3168 }, { "epoch": 0.45440206481215945, "grad_norm": 0.35860082507133484, "learning_rate": 9.919639430777727e-06, "loss": 0.3597, "step": 3169 }, { "epoch": 0.45454545454545453, "grad_norm": 0.4281991720199585, "learning_rate": 9.919490394617949e-06, "loss": 0.3619, "step": 3170 }, { "epoch": 0.4546888442787496, "grad_norm": 0.33941006660461426, "learning_rate": 9.919341221507124e-06, "loss": 0.3622, "step": 3171 }, { "epoch": 0.45483223401204476, "grad_norm": 0.3675822913646698, "learning_rate": 9.919191911449408e-06, "loss": 0.3443, "step": 3172 }, { "epoch": 0.45497562374533984, "grad_norm": 0.3898494243621826, "learning_rate": 9.919042464448956e-06, "loss": 0.3493, "step": 3173 }, { "epoch": 0.4551190134786349, "grad_norm": 0.35544729232788086, "learning_rate": 9.918892880509928e-06, "loss": 0.3595, "step": 3174 }, { "epoch": 0.45526240321193, "grad_norm": 0.3878392279148102, "learning_rate": 9.91874315963649e-06, "loss": 0.3816, "step": 3175 }, { "epoch": 0.4554057929452251, "grad_norm": 0.3724495768547058, "learning_rate": 9.918593301832807e-06, "loss": 0.3387, "step": 3176 }, { "epoch": 0.45554918267852024, "grad_norm": 0.34288665652275085, "learning_rate": 9.918443307103055e-06, "loss": 0.3609, "step": 3177 }, { "epoch": 0.4556925724118153, "grad_norm": 0.37002214789390564, "learning_rate": 9.918293175451405e-06, "loss": 0.3609, "step": 3178 }, { "epoch": 0.4558359621451104, "grad_norm": 0.34877675771713257, "learning_rate": 9.91814290688204e-06, "loss": 0.3638, "step": 3179 }, { "epoch": 0.4559793518784055, "grad_norm": 0.35730457305908203, "learning_rate": 9.917992501399143e-06, "loss": 0.3355, "step": 3180 }, { "epoch": 0.4561227416117006, "grad_norm": 0.40628698468208313, "learning_rate": 9.917841959006899e-06, "loss": 0.3695, "step": 3181 }, { "epoch": 0.4562661313449957, "grad_norm": 0.34156984090805054, "learning_rate": 9.917691279709499e-06, "loss": 0.3666, "step": 3182 }, { "epoch": 0.4564095210782908, "grad_norm": 0.3904248774051666, "learning_rate": 9.917540463511139e-06, "loss": 0.3395, "step": 3183 }, { "epoch": 0.4565529108115859, "grad_norm": 0.3895878195762634, "learning_rate": 9.917389510416017e-06, "loss": 0.3572, "step": 3184 }, { "epoch": 0.456696300544881, "grad_norm": 0.3426133990287781, "learning_rate": 9.917238420428337e-06, "loss": 0.3723, "step": 3185 }, { "epoch": 0.45683969027817606, "grad_norm": 0.39256036281585693, "learning_rate": 9.917087193552304e-06, "loss": 0.3684, "step": 3186 }, { "epoch": 0.4569830800114712, "grad_norm": 0.36614692211151123, "learning_rate": 9.916935829792124e-06, "loss": 0.3481, "step": 3187 }, { "epoch": 0.4571264697447663, "grad_norm": 0.3825885057449341, "learning_rate": 9.916784329152017e-06, "loss": 0.3929, "step": 3188 }, { "epoch": 0.45726985947806137, "grad_norm": 0.3470548391342163, "learning_rate": 9.916632691636196e-06, "loss": 0.3423, "step": 3189 }, { "epoch": 0.45741324921135645, "grad_norm": 0.39099621772766113, "learning_rate": 9.916480917248885e-06, "loss": 0.3685, "step": 3190 }, { "epoch": 0.45755663894465154, "grad_norm": 0.34406960010528564, "learning_rate": 9.916329005994308e-06, "loss": 0.3435, "step": 3191 }, { "epoch": 0.4577000286779467, "grad_norm": 0.36387452483177185, "learning_rate": 9.916176957876695e-06, "loss": 0.325, "step": 3192 }, { "epoch": 0.45784341841124176, "grad_norm": 0.3497578799724579, "learning_rate": 9.916024772900277e-06, "loss": 0.3658, "step": 3193 }, { "epoch": 0.45798680814453685, "grad_norm": 0.3703260123729706, "learning_rate": 9.915872451069293e-06, "loss": 0.3595, "step": 3194 }, { "epoch": 0.45813019787783194, "grad_norm": 0.34562137722969055, "learning_rate": 9.91571999238798e-06, "loss": 0.3479, "step": 3195 }, { "epoch": 0.458273587611127, "grad_norm": 0.42580509185791016, "learning_rate": 9.915567396860584e-06, "loss": 0.3395, "step": 3196 }, { "epoch": 0.45841697734442216, "grad_norm": 0.36933785676956177, "learning_rate": 9.915414664491355e-06, "loss": 0.3517, "step": 3197 }, { "epoch": 0.45856036707771725, "grad_norm": 0.3352588415145874, "learning_rate": 9.915261795284543e-06, "loss": 0.3602, "step": 3198 }, { "epoch": 0.45870375681101233, "grad_norm": 0.40453654527664185, "learning_rate": 9.915108789244403e-06, "loss": 0.3456, "step": 3199 }, { "epoch": 0.4588471465443074, "grad_norm": 0.36568576097488403, "learning_rate": 9.914955646375195e-06, "loss": 0.3358, "step": 3200 }, { "epoch": 0.4589905362776025, "grad_norm": 0.35666871070861816, "learning_rate": 9.914802366681182e-06, "loss": 0.348, "step": 3201 }, { "epoch": 0.45913392601089764, "grad_norm": 0.3855409622192383, "learning_rate": 9.914648950166632e-06, "loss": 0.343, "step": 3202 }, { "epoch": 0.4592773157441927, "grad_norm": 0.3630055785179138, "learning_rate": 9.914495396835816e-06, "loss": 0.354, "step": 3203 }, { "epoch": 0.4594207054774878, "grad_norm": 0.33826908469200134, "learning_rate": 9.914341706693006e-06, "loss": 0.3503, "step": 3204 }, { "epoch": 0.4595640952107829, "grad_norm": 0.4044906497001648, "learning_rate": 9.914187879742485e-06, "loss": 0.3411, "step": 3205 }, { "epoch": 0.459707484944078, "grad_norm": 0.3632989823818207, "learning_rate": 9.914033915988533e-06, "loss": 0.3495, "step": 3206 }, { "epoch": 0.4598508746773731, "grad_norm": 0.36247482895851135, "learning_rate": 9.913879815435434e-06, "loss": 0.3624, "step": 3207 }, { "epoch": 0.4599942644106682, "grad_norm": 0.35891252756118774, "learning_rate": 9.91372557808748e-06, "loss": 0.3459, "step": 3208 }, { "epoch": 0.4601376541439633, "grad_norm": 0.3959578573703766, "learning_rate": 9.913571203948968e-06, "loss": 0.3352, "step": 3209 }, { "epoch": 0.4602810438772584, "grad_norm": 0.32175546884536743, "learning_rate": 9.91341669302419e-06, "loss": 0.3262, "step": 3210 }, { "epoch": 0.46042443361055346, "grad_norm": 0.35981568694114685, "learning_rate": 9.913262045317448e-06, "loss": 0.3569, "step": 3211 }, { "epoch": 0.4605678233438486, "grad_norm": 0.3954378664493561, "learning_rate": 9.91310726083305e-06, "loss": 0.3548, "step": 3212 }, { "epoch": 0.4607112130771437, "grad_norm": 0.36180099844932556, "learning_rate": 9.912952339575304e-06, "loss": 0.3631, "step": 3213 }, { "epoch": 0.4608546028104388, "grad_norm": 0.3791588246822357, "learning_rate": 9.912797281548522e-06, "loss": 0.378, "step": 3214 }, { "epoch": 0.46099799254373386, "grad_norm": 0.33852618932724, "learning_rate": 9.91264208675702e-06, "loss": 0.3625, "step": 3215 }, { "epoch": 0.46114138227702894, "grad_norm": 0.36893224716186523, "learning_rate": 9.91248675520512e-06, "loss": 0.3442, "step": 3216 }, { "epoch": 0.4612847720103241, "grad_norm": 0.33524903655052185, "learning_rate": 9.912331286897145e-06, "loss": 0.3811, "step": 3217 }, { "epoch": 0.46142816174361917, "grad_norm": 0.35815054178237915, "learning_rate": 9.912175681837426e-06, "loss": 0.3352, "step": 3218 }, { "epoch": 0.46157155147691425, "grad_norm": 0.3488963842391968, "learning_rate": 9.91201994003029e-06, "loss": 0.3599, "step": 3219 }, { "epoch": 0.46171494121020934, "grad_norm": 0.3513133227825165, "learning_rate": 9.911864061480076e-06, "loss": 0.36, "step": 3220 }, { "epoch": 0.4618583309435044, "grad_norm": 0.3609628975391388, "learning_rate": 9.911708046191121e-06, "loss": 0.3574, "step": 3221 }, { "epoch": 0.46200172067679957, "grad_norm": 0.3536001443862915, "learning_rate": 9.911551894167772e-06, "loss": 0.3399, "step": 3222 }, { "epoch": 0.46214511041009465, "grad_norm": 0.33748379349708557, "learning_rate": 9.911395605414372e-06, "loss": 0.3446, "step": 3223 }, { "epoch": 0.46228850014338974, "grad_norm": 0.3794150650501251, "learning_rate": 9.911239179935274e-06, "loss": 0.3522, "step": 3224 }, { "epoch": 0.4624318898766848, "grad_norm": 0.3771393597126007, "learning_rate": 9.911082617734831e-06, "loss": 0.3395, "step": 3225 }, { "epoch": 0.4625752796099799, "grad_norm": 0.34472888708114624, "learning_rate": 9.910925918817404e-06, "loss": 0.3442, "step": 3226 }, { "epoch": 0.46271866934327505, "grad_norm": 0.4085237681865692, "learning_rate": 9.910769083187355e-06, "loss": 0.3713, "step": 3227 }, { "epoch": 0.46286205907657013, "grad_norm": 0.3879293203353882, "learning_rate": 9.910612110849048e-06, "loss": 0.3642, "step": 3228 }, { "epoch": 0.4630054488098652, "grad_norm": 0.34186312556266785, "learning_rate": 9.910455001806851e-06, "loss": 0.3318, "step": 3229 }, { "epoch": 0.4631488385431603, "grad_norm": 0.37507298588752747, "learning_rate": 9.910297756065143e-06, "loss": 0.3522, "step": 3230 }, { "epoch": 0.4632922282764554, "grad_norm": 0.4019441604614258, "learning_rate": 9.9101403736283e-06, "loss": 0.3645, "step": 3231 }, { "epoch": 0.4634356180097505, "grad_norm": 0.34914281964302063, "learning_rate": 9.909982854500699e-06, "loss": 0.3558, "step": 3232 }, { "epoch": 0.4635790077430456, "grad_norm": 0.3778075575828552, "learning_rate": 9.90982519868673e-06, "loss": 0.368, "step": 3233 }, { "epoch": 0.4637223974763407, "grad_norm": 0.35885968804359436, "learning_rate": 9.90966740619078e-06, "loss": 0.3414, "step": 3234 }, { "epoch": 0.4638657872096358, "grad_norm": 0.3837692141532898, "learning_rate": 9.909509477017243e-06, "loss": 0.3394, "step": 3235 }, { "epoch": 0.46400917694293087, "grad_norm": 0.38372567296028137, "learning_rate": 9.909351411170512e-06, "loss": 0.339, "step": 3236 }, { "epoch": 0.464152566676226, "grad_norm": 0.3838520348072052, "learning_rate": 9.909193208654992e-06, "loss": 0.3417, "step": 3237 }, { "epoch": 0.4642959564095211, "grad_norm": 0.3341241478919983, "learning_rate": 9.909034869475084e-06, "loss": 0.3427, "step": 3238 }, { "epoch": 0.4644393461428162, "grad_norm": 0.4035501480102539, "learning_rate": 9.908876393635196e-06, "loss": 0.3539, "step": 3239 }, { "epoch": 0.46458273587611126, "grad_norm": 0.36389726400375366, "learning_rate": 9.908717781139742e-06, "loss": 0.3465, "step": 3240 }, { "epoch": 0.46472612560940635, "grad_norm": 0.37177619338035583, "learning_rate": 9.908559031993134e-06, "loss": 0.3462, "step": 3241 }, { "epoch": 0.4648695153427015, "grad_norm": 0.4061526954174042, "learning_rate": 9.908400146199794e-06, "loss": 0.3687, "step": 3242 }, { "epoch": 0.4650129050759966, "grad_norm": 0.45268240571022034, "learning_rate": 9.908241123764146e-06, "loss": 0.3579, "step": 3243 }, { "epoch": 0.46515629480929166, "grad_norm": 0.3782220184803009, "learning_rate": 9.908081964690615e-06, "loss": 0.3701, "step": 3244 }, { "epoch": 0.46529968454258674, "grad_norm": 0.39172905683517456, "learning_rate": 9.907922668983632e-06, "loss": 0.3628, "step": 3245 }, { "epoch": 0.46544307427588183, "grad_norm": 0.39120498299598694, "learning_rate": 9.90776323664763e-06, "loss": 0.3619, "step": 3246 }, { "epoch": 0.46558646400917697, "grad_norm": 0.3907541334629059, "learning_rate": 9.907603667687051e-06, "loss": 0.377, "step": 3247 }, { "epoch": 0.46572985374247206, "grad_norm": 0.3504410982131958, "learning_rate": 9.907443962106334e-06, "loss": 0.3309, "step": 3248 }, { "epoch": 0.46587324347576714, "grad_norm": 0.3684929609298706, "learning_rate": 9.907284119909927e-06, "loss": 0.3616, "step": 3249 }, { "epoch": 0.4660166332090622, "grad_norm": 0.33863359689712524, "learning_rate": 9.907124141102278e-06, "loss": 0.3545, "step": 3250 }, { "epoch": 0.4661600229423573, "grad_norm": 0.35158583521842957, "learning_rate": 9.906964025687841e-06, "loss": 0.333, "step": 3251 }, { "epoch": 0.4663034126756524, "grad_norm": 0.37524092197418213, "learning_rate": 9.906803773671076e-06, "loss": 0.3562, "step": 3252 }, { "epoch": 0.46644680240894754, "grad_norm": 0.3813045620918274, "learning_rate": 9.906643385056441e-06, "loss": 0.3784, "step": 3253 }, { "epoch": 0.4665901921422426, "grad_norm": 0.371941477060318, "learning_rate": 9.906482859848401e-06, "loss": 0.3616, "step": 3254 }, { "epoch": 0.4667335818755377, "grad_norm": 0.378469854593277, "learning_rate": 9.906322198051428e-06, "loss": 0.3798, "step": 3255 }, { "epoch": 0.4668769716088328, "grad_norm": 0.38509711623191833, "learning_rate": 9.90616139966999e-06, "loss": 0.3633, "step": 3256 }, { "epoch": 0.4670203613421279, "grad_norm": 0.36824458837509155, "learning_rate": 9.906000464708566e-06, "loss": 0.3523, "step": 3257 }, { "epoch": 0.467163751075423, "grad_norm": 0.35701921582221985, "learning_rate": 9.905839393171638e-06, "loss": 0.3465, "step": 3258 }, { "epoch": 0.4673071408087181, "grad_norm": 0.37963977456092834, "learning_rate": 9.905678185063687e-06, "loss": 0.3485, "step": 3259 }, { "epoch": 0.4674505305420132, "grad_norm": 0.3651321828365326, "learning_rate": 9.905516840389201e-06, "loss": 0.3451, "step": 3260 }, { "epoch": 0.4675939202753083, "grad_norm": 0.3536784052848816, "learning_rate": 9.905355359152673e-06, "loss": 0.3542, "step": 3261 }, { "epoch": 0.46773731000860336, "grad_norm": 0.3579724133014679, "learning_rate": 9.905193741358596e-06, "loss": 0.3503, "step": 3262 }, { "epoch": 0.4678806997418985, "grad_norm": 0.368440181016922, "learning_rate": 9.905031987011473e-06, "loss": 0.3506, "step": 3263 }, { "epoch": 0.4680240894751936, "grad_norm": 0.3812854290008545, "learning_rate": 9.904870096115804e-06, "loss": 0.3621, "step": 3264 }, { "epoch": 0.46816747920848867, "grad_norm": 0.3395519256591797, "learning_rate": 9.904708068676095e-06, "loss": 0.3579, "step": 3265 }, { "epoch": 0.46831086894178375, "grad_norm": 0.3491934835910797, "learning_rate": 9.904545904696861e-06, "loss": 0.3499, "step": 3266 }, { "epoch": 0.46845425867507884, "grad_norm": 0.3834405243396759, "learning_rate": 9.90438360418261e-06, "loss": 0.3397, "step": 3267 }, { "epoch": 0.468597648408374, "grad_norm": 0.4004075825214386, "learning_rate": 9.904221167137868e-06, "loss": 0.3563, "step": 3268 }, { "epoch": 0.46874103814166906, "grad_norm": 0.3571562170982361, "learning_rate": 9.904058593567151e-06, "loss": 0.3452, "step": 3269 }, { "epoch": 0.46888442787496415, "grad_norm": 0.36994895339012146, "learning_rate": 9.903895883474986e-06, "loss": 0.3413, "step": 3270 }, { "epoch": 0.46902781760825923, "grad_norm": 0.3523765802383423, "learning_rate": 9.903733036865903e-06, "loss": 0.3301, "step": 3271 }, { "epoch": 0.4691712073415543, "grad_norm": 0.33741405606269836, "learning_rate": 9.903570053744436e-06, "loss": 0.3659, "step": 3272 }, { "epoch": 0.46931459707484946, "grad_norm": 0.3699011206626892, "learning_rate": 9.903406934115123e-06, "loss": 0.3698, "step": 3273 }, { "epoch": 0.46945798680814455, "grad_norm": 0.38807639479637146, "learning_rate": 9.903243677982504e-06, "loss": 0.3471, "step": 3274 }, { "epoch": 0.46960137654143963, "grad_norm": 0.35869333148002625, "learning_rate": 9.903080285351121e-06, "loss": 0.339, "step": 3275 }, { "epoch": 0.4697447662747347, "grad_norm": 0.36925607919692993, "learning_rate": 9.902916756225527e-06, "loss": 0.3557, "step": 3276 }, { "epoch": 0.4698881560080298, "grad_norm": 0.35080277919769287, "learning_rate": 9.902753090610273e-06, "loss": 0.3561, "step": 3277 }, { "epoch": 0.47003154574132494, "grad_norm": 0.3410987854003906, "learning_rate": 9.902589288509914e-06, "loss": 0.3771, "step": 3278 }, { "epoch": 0.47017493547462, "grad_norm": 0.41337448358535767, "learning_rate": 9.90242534992901e-06, "loss": 0.3598, "step": 3279 }, { "epoch": 0.4703183252079151, "grad_norm": 0.36619555950164795, "learning_rate": 9.902261274872127e-06, "loss": 0.3571, "step": 3280 }, { "epoch": 0.4704617149412102, "grad_norm": 0.33849087357521057, "learning_rate": 9.902097063343832e-06, "loss": 0.3359, "step": 3281 }, { "epoch": 0.4706051046745053, "grad_norm": 0.3745216429233551, "learning_rate": 9.901932715348694e-06, "loss": 0.3435, "step": 3282 }, { "epoch": 0.4707484944078004, "grad_norm": 0.35537731647491455, "learning_rate": 9.90176823089129e-06, "loss": 0.3568, "step": 3283 }, { "epoch": 0.4708918841410955, "grad_norm": 0.35504114627838135, "learning_rate": 9.901603609976198e-06, "loss": 0.3477, "step": 3284 }, { "epoch": 0.4710352738743906, "grad_norm": 0.3715459406375885, "learning_rate": 9.901438852608003e-06, "loss": 0.3611, "step": 3285 }, { "epoch": 0.4711786636076857, "grad_norm": 0.34865522384643555, "learning_rate": 9.90127395879129e-06, "loss": 0.3378, "step": 3286 }, { "epoch": 0.47132205334098076, "grad_norm": 0.345360666513443, "learning_rate": 9.90110892853065e-06, "loss": 0.3347, "step": 3287 }, { "epoch": 0.4714654430742759, "grad_norm": 0.40328967571258545, "learning_rate": 9.900943761830675e-06, "loss": 0.368, "step": 3288 }, { "epoch": 0.471608832807571, "grad_norm": 0.3279677629470825, "learning_rate": 9.900778458695966e-06, "loss": 0.3407, "step": 3289 }, { "epoch": 0.4717522225408661, "grad_norm": 0.38316580653190613, "learning_rate": 9.900613019131122e-06, "loss": 0.3349, "step": 3290 }, { "epoch": 0.47189561227416116, "grad_norm": 0.38755860924720764, "learning_rate": 9.900447443140753e-06, "loss": 0.3561, "step": 3291 }, { "epoch": 0.47203900200745624, "grad_norm": 0.359381765127182, "learning_rate": 9.900281730729463e-06, "loss": 0.3495, "step": 3292 }, { "epoch": 0.4721823917407514, "grad_norm": 0.35317692160606384, "learning_rate": 9.900115881901868e-06, "loss": 0.3475, "step": 3293 }, { "epoch": 0.47232578147404647, "grad_norm": 0.4372512400150299, "learning_rate": 9.899949896662587e-06, "loss": 0.3415, "step": 3294 }, { "epoch": 0.47246917120734155, "grad_norm": 0.3785814344882965, "learning_rate": 9.899783775016236e-06, "loss": 0.3509, "step": 3295 }, { "epoch": 0.47261256094063664, "grad_norm": 0.3793627619743347, "learning_rate": 9.899617516967441e-06, "loss": 0.3651, "step": 3296 }, { "epoch": 0.4727559506739317, "grad_norm": 0.44659486413002014, "learning_rate": 9.899451122520834e-06, "loss": 0.3363, "step": 3297 }, { "epoch": 0.47289934040722686, "grad_norm": 0.3425678312778473, "learning_rate": 9.899284591681043e-06, "loss": 0.3395, "step": 3298 }, { "epoch": 0.47304273014052195, "grad_norm": 0.40026283264160156, "learning_rate": 9.899117924452705e-06, "loss": 0.354, "step": 3299 }, { "epoch": 0.47318611987381703, "grad_norm": 0.41883066296577454, "learning_rate": 9.898951120840462e-06, "loss": 0.3478, "step": 3300 }, { "epoch": 0.4733295096071121, "grad_norm": 0.3632173240184784, "learning_rate": 9.898784180848955e-06, "loss": 0.3405, "step": 3301 }, { "epoch": 0.4734728993404072, "grad_norm": 0.41332289576530457, "learning_rate": 9.898617104482832e-06, "loss": 0.3319, "step": 3302 }, { "epoch": 0.47361628907370235, "grad_norm": 0.346445769071579, "learning_rate": 9.898449891746745e-06, "loss": 0.3521, "step": 3303 }, { "epoch": 0.47375967880699743, "grad_norm": 0.3771354854106903, "learning_rate": 9.898282542645348e-06, "loss": 0.3639, "step": 3304 }, { "epoch": 0.4739030685402925, "grad_norm": 0.36536383628845215, "learning_rate": 9.898115057183298e-06, "loss": 0.3661, "step": 3305 }, { "epoch": 0.4740464582735876, "grad_norm": 0.334268718957901, "learning_rate": 9.897947435365261e-06, "loss": 0.3365, "step": 3306 }, { "epoch": 0.4741898480068827, "grad_norm": 0.3486734628677368, "learning_rate": 9.897779677195902e-06, "loss": 0.341, "step": 3307 }, { "epoch": 0.4743332377401778, "grad_norm": 0.32010418176651, "learning_rate": 9.897611782679892e-06, "loss": 0.3542, "step": 3308 }, { "epoch": 0.4744766274734729, "grad_norm": 0.369080513715744, "learning_rate": 9.897443751821903e-06, "loss": 0.3426, "step": 3309 }, { "epoch": 0.474620017206768, "grad_norm": 0.345958411693573, "learning_rate": 9.897275584626613e-06, "loss": 0.3278, "step": 3310 }, { "epoch": 0.4747634069400631, "grad_norm": 0.34822946786880493, "learning_rate": 9.897107281098705e-06, "loss": 0.3433, "step": 3311 }, { "epoch": 0.47490679667335817, "grad_norm": 0.3676009476184845, "learning_rate": 9.896938841242863e-06, "loss": 0.3776, "step": 3312 }, { "epoch": 0.4750501864066533, "grad_norm": 0.395389586687088, "learning_rate": 9.896770265063776e-06, "loss": 0.3478, "step": 3313 }, { "epoch": 0.4751935761399484, "grad_norm": 0.3636237382888794, "learning_rate": 9.896601552566138e-06, "loss": 0.3526, "step": 3314 }, { "epoch": 0.4753369658732435, "grad_norm": 0.347168505191803, "learning_rate": 9.896432703754646e-06, "loss": 0.3593, "step": 3315 }, { "epoch": 0.47548035560653856, "grad_norm": 0.3536044657230377, "learning_rate": 9.896263718633999e-06, "loss": 0.3433, "step": 3316 }, { "epoch": 0.47562374533983365, "grad_norm": 0.35895729064941406, "learning_rate": 9.896094597208901e-06, "loss": 0.3534, "step": 3317 }, { "epoch": 0.4757671350731288, "grad_norm": 0.37358763813972473, "learning_rate": 9.895925339484062e-06, "loss": 0.354, "step": 3318 }, { "epoch": 0.4759105248064239, "grad_norm": 0.3177860677242279, "learning_rate": 9.895755945464194e-06, "loss": 0.3422, "step": 3319 }, { "epoch": 0.47605391453971896, "grad_norm": 0.3469354808330536, "learning_rate": 9.89558641515401e-06, "loss": 0.35, "step": 3320 }, { "epoch": 0.47619730427301404, "grad_norm": 0.37125152349472046, "learning_rate": 9.89541674855823e-06, "loss": 0.3575, "step": 3321 }, { "epoch": 0.47634069400630913, "grad_norm": 0.33382448554039, "learning_rate": 9.89524694568158e-06, "loss": 0.337, "step": 3322 }, { "epoch": 0.47648408373960427, "grad_norm": 0.353227823972702, "learning_rate": 9.895077006528786e-06, "loss": 0.3516, "step": 3323 }, { "epoch": 0.47662747347289935, "grad_norm": 0.41931676864624023, "learning_rate": 9.894906931104576e-06, "loss": 0.3483, "step": 3324 }, { "epoch": 0.47677086320619444, "grad_norm": 0.3360443711280823, "learning_rate": 9.894736719413689e-06, "loss": 0.3699, "step": 3325 }, { "epoch": 0.4769142529394895, "grad_norm": 0.35063499212265015, "learning_rate": 9.89456637146086e-06, "loss": 0.3665, "step": 3326 }, { "epoch": 0.4770576426727846, "grad_norm": 0.3636578619480133, "learning_rate": 9.894395887250832e-06, "loss": 0.3532, "step": 3327 }, { "epoch": 0.47720103240607975, "grad_norm": 0.38398051261901855, "learning_rate": 9.894225266788354e-06, "loss": 0.3493, "step": 3328 }, { "epoch": 0.47734442213937484, "grad_norm": 0.3429447412490845, "learning_rate": 9.89405451007817e-06, "loss": 0.339, "step": 3329 }, { "epoch": 0.4774878118726699, "grad_norm": 0.38114872574806213, "learning_rate": 9.893883617125038e-06, "loss": 0.355, "step": 3330 }, { "epoch": 0.477631201605965, "grad_norm": 0.3524768352508545, "learning_rate": 9.893712587933715e-06, "loss": 0.328, "step": 3331 }, { "epoch": 0.4777745913392601, "grad_norm": 0.3723195195198059, "learning_rate": 9.893541422508962e-06, "loss": 0.3476, "step": 3332 }, { "epoch": 0.47791798107255523, "grad_norm": 0.3363938331604004, "learning_rate": 9.89337012085554e-06, "loss": 0.3648, "step": 3333 }, { "epoch": 0.4780613708058503, "grad_norm": 0.3858087658882141, "learning_rate": 9.893198682978225e-06, "loss": 0.3458, "step": 3334 }, { "epoch": 0.4782047605391454, "grad_norm": 0.35128647089004517, "learning_rate": 9.893027108881785e-06, "loss": 0.3595, "step": 3335 }, { "epoch": 0.4783481502724405, "grad_norm": 0.3509899973869324, "learning_rate": 9.892855398570998e-06, "loss": 0.3526, "step": 3336 }, { "epoch": 0.47849154000573557, "grad_norm": 0.34926435351371765, "learning_rate": 9.892683552050641e-06, "loss": 0.3455, "step": 3337 }, { "epoch": 0.4786349297390307, "grad_norm": 0.3529929220676422, "learning_rate": 9.892511569325502e-06, "loss": 0.3697, "step": 3338 }, { "epoch": 0.4787783194723258, "grad_norm": 0.33192208409309387, "learning_rate": 9.892339450400366e-06, "loss": 0.3449, "step": 3339 }, { "epoch": 0.4789217092056209, "grad_norm": 0.34019026160240173, "learning_rate": 9.892167195280027e-06, "loss": 0.3586, "step": 3340 }, { "epoch": 0.47906509893891597, "grad_norm": 0.34593069553375244, "learning_rate": 9.891994803969279e-06, "loss": 0.3456, "step": 3341 }, { "epoch": 0.47920848867221105, "grad_norm": 0.35298100113868713, "learning_rate": 9.891822276472917e-06, "loss": 0.3271, "step": 3342 }, { "epoch": 0.4793518784055062, "grad_norm": 0.32510727643966675, "learning_rate": 9.891649612795751e-06, "loss": 0.3435, "step": 3343 }, { "epoch": 0.4794952681388013, "grad_norm": 0.3555040955543518, "learning_rate": 9.891476812942586e-06, "loss": 0.3437, "step": 3344 }, { "epoch": 0.47963865787209636, "grad_norm": 0.3613305687904358, "learning_rate": 9.891303876918227e-06, "loss": 0.3641, "step": 3345 }, { "epoch": 0.47978204760539145, "grad_norm": 0.35010552406311035, "learning_rate": 9.891130804727495e-06, "loss": 0.3562, "step": 3346 }, { "epoch": 0.47992543733868653, "grad_norm": 0.379666805267334, "learning_rate": 9.890957596375207e-06, "loss": 0.3662, "step": 3347 }, { "epoch": 0.4800688270719816, "grad_norm": 0.3689643442630768, "learning_rate": 9.89078425186618e-06, "loss": 0.3644, "step": 3348 }, { "epoch": 0.48021221680527676, "grad_norm": 0.37493717670440674, "learning_rate": 9.890610771205245e-06, "loss": 0.3347, "step": 3349 }, { "epoch": 0.48035560653857184, "grad_norm": 0.36276742815971375, "learning_rate": 9.890437154397227e-06, "loss": 0.3343, "step": 3350 }, { "epoch": 0.48049899627186693, "grad_norm": 0.40911197662353516, "learning_rate": 9.890263401446964e-06, "loss": 0.3334, "step": 3351 }, { "epoch": 0.480642386005162, "grad_norm": 0.37959587574005127, "learning_rate": 9.890089512359288e-06, "loss": 0.3441, "step": 3352 }, { "epoch": 0.4807857757384571, "grad_norm": 0.3864096999168396, "learning_rate": 9.889915487139044e-06, "loss": 0.347, "step": 3353 }, { "epoch": 0.48092916547175224, "grad_norm": 0.369221568107605, "learning_rate": 9.889741325791076e-06, "loss": 0.3625, "step": 3354 }, { "epoch": 0.4810725552050473, "grad_norm": 0.3912690579891205, "learning_rate": 9.889567028320229e-06, "loss": 0.3452, "step": 3355 }, { "epoch": 0.4812159449383424, "grad_norm": 0.36243677139282227, "learning_rate": 9.889392594731358e-06, "loss": 0.3422, "step": 3356 }, { "epoch": 0.4813593346716375, "grad_norm": 0.38497859239578247, "learning_rate": 9.88921802502932e-06, "loss": 0.3528, "step": 3357 }, { "epoch": 0.4815027244049326, "grad_norm": 0.3982517421245575, "learning_rate": 9.889043319218971e-06, "loss": 0.3725, "step": 3358 }, { "epoch": 0.4816461141382277, "grad_norm": 0.3682329058647156, "learning_rate": 9.888868477305179e-06, "loss": 0.3572, "step": 3359 }, { "epoch": 0.4817895038715228, "grad_norm": 0.3941921591758728, "learning_rate": 9.888693499292807e-06, "loss": 0.3451, "step": 3360 }, { "epoch": 0.4819328936048179, "grad_norm": 0.3621257245540619, "learning_rate": 9.88851838518673e-06, "loss": 0.3607, "step": 3361 }, { "epoch": 0.482076283338113, "grad_norm": 0.39426952600479126, "learning_rate": 9.88834313499182e-06, "loss": 0.3641, "step": 3362 }, { "epoch": 0.48221967307140806, "grad_norm": 0.3768260180950165, "learning_rate": 9.888167748712957e-06, "loss": 0.341, "step": 3363 }, { "epoch": 0.4823630628047032, "grad_norm": 0.35544154047966003, "learning_rate": 9.887992226355022e-06, "loss": 0.3632, "step": 3364 }, { "epoch": 0.4825064525379983, "grad_norm": 0.3640080988407135, "learning_rate": 9.887816567922905e-06, "loss": 0.3509, "step": 3365 }, { "epoch": 0.48264984227129337, "grad_norm": 0.3540503978729248, "learning_rate": 9.887640773421492e-06, "loss": 0.344, "step": 3366 }, { "epoch": 0.48279323200458846, "grad_norm": 0.36452916264533997, "learning_rate": 9.88746484285568e-06, "loss": 0.3652, "step": 3367 }, { "epoch": 0.48293662173788354, "grad_norm": 0.35935699939727783, "learning_rate": 9.887288776230364e-06, "loss": 0.3444, "step": 3368 }, { "epoch": 0.4830800114711787, "grad_norm": 0.3605632185935974, "learning_rate": 9.887112573550446e-06, "loss": 0.3694, "step": 3369 }, { "epoch": 0.48322340120447377, "grad_norm": 0.36194315552711487, "learning_rate": 9.886936234820832e-06, "loss": 0.3665, "step": 3370 }, { "epoch": 0.48336679093776885, "grad_norm": 0.33111414313316345, "learning_rate": 9.886759760046432e-06, "loss": 0.3545, "step": 3371 }, { "epoch": 0.48351018067106394, "grad_norm": 0.35272353887557983, "learning_rate": 9.886583149232157e-06, "loss": 0.3511, "step": 3372 }, { "epoch": 0.483653570404359, "grad_norm": 0.38363078236579895, "learning_rate": 9.886406402382922e-06, "loss": 0.3598, "step": 3373 }, { "epoch": 0.48379696013765416, "grad_norm": 0.35066714882850647, "learning_rate": 9.886229519503651e-06, "loss": 0.3366, "step": 3374 }, { "epoch": 0.48394034987094925, "grad_norm": 0.37325283885002136, "learning_rate": 9.886052500599268e-06, "loss": 0.3389, "step": 3375 }, { "epoch": 0.48408373960424433, "grad_norm": 0.3695336580276489, "learning_rate": 9.885875345674698e-06, "loss": 0.3559, "step": 3376 }, { "epoch": 0.4842271293375394, "grad_norm": 0.3382168114185333, "learning_rate": 9.885698054734876e-06, "loss": 0.3705, "step": 3377 }, { "epoch": 0.4843705190708345, "grad_norm": 0.3695746064186096, "learning_rate": 9.885520627784734e-06, "loss": 0.3478, "step": 3378 }, { "epoch": 0.48451390880412964, "grad_norm": 0.3474038541316986, "learning_rate": 9.885343064829214e-06, "loss": 0.3793, "step": 3379 }, { "epoch": 0.48465729853742473, "grad_norm": 0.3646996319293976, "learning_rate": 9.885165365873257e-06, "loss": 0.3521, "step": 3380 }, { "epoch": 0.4848006882707198, "grad_norm": 0.3959987759590149, "learning_rate": 9.884987530921812e-06, "loss": 0.3677, "step": 3381 }, { "epoch": 0.4849440780040149, "grad_norm": 0.35936659574508667, "learning_rate": 9.884809559979826e-06, "loss": 0.348, "step": 3382 }, { "epoch": 0.48508746773731, "grad_norm": 0.39555203914642334, "learning_rate": 9.88463145305226e-06, "loss": 0.3547, "step": 3383 }, { "epoch": 0.4852308574706051, "grad_norm": 0.34578436613082886, "learning_rate": 9.884453210144068e-06, "loss": 0.339, "step": 3384 }, { "epoch": 0.4853742472039002, "grad_norm": 0.3719114363193512, "learning_rate": 9.88427483126021e-06, "loss": 0.3386, "step": 3385 }, { "epoch": 0.4855176369371953, "grad_norm": 0.3990978002548218, "learning_rate": 9.884096316405654e-06, "loss": 0.3393, "step": 3386 }, { "epoch": 0.4856610266704904, "grad_norm": 0.3644360601902008, "learning_rate": 9.883917665585371e-06, "loss": 0.3558, "step": 3387 }, { "epoch": 0.48580441640378547, "grad_norm": 0.32109513878822327, "learning_rate": 9.883738878804334e-06, "loss": 0.3497, "step": 3388 }, { "epoch": 0.4859478061370806, "grad_norm": 0.35956981778144836, "learning_rate": 9.883559956067517e-06, "loss": 0.3409, "step": 3389 }, { "epoch": 0.4860911958703757, "grad_norm": 0.355663001537323, "learning_rate": 9.883380897379904e-06, "loss": 0.3593, "step": 3390 }, { "epoch": 0.4862345856036708, "grad_norm": 0.3812342584133148, "learning_rate": 9.883201702746478e-06, "loss": 0.3537, "step": 3391 }, { "epoch": 0.48637797533696586, "grad_norm": 0.3419836461544037, "learning_rate": 9.88302237217223e-06, "loss": 0.3479, "step": 3392 }, { "epoch": 0.48652136507026095, "grad_norm": 0.34848105907440186, "learning_rate": 9.882842905662152e-06, "loss": 0.3378, "step": 3393 }, { "epoch": 0.4866647548035561, "grad_norm": 0.41223597526550293, "learning_rate": 9.882663303221236e-06, "loss": 0.3395, "step": 3394 }, { "epoch": 0.4868081445368512, "grad_norm": 0.3689386248588562, "learning_rate": 9.882483564854486e-06, "loss": 0.3274, "step": 3395 }, { "epoch": 0.48695153427014626, "grad_norm": 0.3306465148925781, "learning_rate": 9.882303690566904e-06, "loss": 0.3619, "step": 3396 }, { "epoch": 0.48709492400344134, "grad_norm": 0.3846573531627655, "learning_rate": 9.882123680363495e-06, "loss": 0.3183, "step": 3397 }, { "epoch": 0.48723831373673643, "grad_norm": 0.3797530233860016, "learning_rate": 9.881943534249276e-06, "loss": 0.3576, "step": 3398 }, { "epoch": 0.48738170347003157, "grad_norm": 0.34797385334968567, "learning_rate": 9.881763252229259e-06, "loss": 0.3418, "step": 3399 }, { "epoch": 0.48752509320332665, "grad_norm": 0.3682803809642792, "learning_rate": 9.88158283430846e-06, "loss": 0.3165, "step": 3400 }, { "epoch": 0.48766848293662174, "grad_norm": 0.3610822260379791, "learning_rate": 9.881402280491908e-06, "loss": 0.365, "step": 3401 }, { "epoch": 0.4878118726699168, "grad_norm": 0.3503032922744751, "learning_rate": 9.881221590784622e-06, "loss": 0.3489, "step": 3402 }, { "epoch": 0.4879552624032119, "grad_norm": 0.38543763756752014, "learning_rate": 9.881040765191638e-06, "loss": 0.3348, "step": 3403 }, { "epoch": 0.48809865213650705, "grad_norm": 0.33153387904167175, "learning_rate": 9.880859803717986e-06, "loss": 0.3505, "step": 3404 }, { "epoch": 0.48824204186980213, "grad_norm": 0.3350125849246979, "learning_rate": 9.880678706368705e-06, "loss": 0.3451, "step": 3405 }, { "epoch": 0.4883854316030972, "grad_norm": 0.35706827044487, "learning_rate": 9.880497473148838e-06, "loss": 0.3491, "step": 3406 }, { "epoch": 0.4885288213363923, "grad_norm": 0.32198330760002136, "learning_rate": 9.880316104063428e-06, "loss": 0.3436, "step": 3407 }, { "epoch": 0.4886722110696874, "grad_norm": 0.3307148814201355, "learning_rate": 9.880134599117525e-06, "loss": 0.3495, "step": 3408 }, { "epoch": 0.48881560080298253, "grad_norm": 0.34489962458610535, "learning_rate": 9.879952958316182e-06, "loss": 0.3332, "step": 3409 }, { "epoch": 0.4889589905362776, "grad_norm": 0.3441861867904663, "learning_rate": 9.879771181664458e-06, "loss": 0.3476, "step": 3410 }, { "epoch": 0.4891023802695727, "grad_norm": 0.3272917568683624, "learning_rate": 9.879589269167407e-06, "loss": 0.3194, "step": 3411 }, { "epoch": 0.4892457700028678, "grad_norm": 0.32657623291015625, "learning_rate": 9.879407220830097e-06, "loss": 0.3693, "step": 3412 }, { "epoch": 0.48938915973616287, "grad_norm": 0.36081767082214355, "learning_rate": 9.879225036657597e-06, "loss": 0.3598, "step": 3413 }, { "epoch": 0.489532549469458, "grad_norm": 0.34549498558044434, "learning_rate": 9.879042716654978e-06, "loss": 0.3575, "step": 3414 }, { "epoch": 0.4896759392027531, "grad_norm": 0.37785154581069946, "learning_rate": 9.878860260827316e-06, "loss": 0.3513, "step": 3415 }, { "epoch": 0.4898193289360482, "grad_norm": 0.3779292106628418, "learning_rate": 9.878677669179689e-06, "loss": 0.3553, "step": 3416 }, { "epoch": 0.48996271866934327, "grad_norm": 0.3901439309120178, "learning_rate": 9.878494941717179e-06, "loss": 0.377, "step": 3417 }, { "epoch": 0.49010610840263835, "grad_norm": 0.43910980224609375, "learning_rate": 9.878312078444876e-06, "loss": 0.3416, "step": 3418 }, { "epoch": 0.4902494981359335, "grad_norm": 0.3735576272010803, "learning_rate": 9.878129079367869e-06, "loss": 0.3422, "step": 3419 }, { "epoch": 0.4903928878692286, "grad_norm": 0.37521788477897644, "learning_rate": 9.877945944491253e-06, "loss": 0.342, "step": 3420 }, { "epoch": 0.49053627760252366, "grad_norm": 0.39978519082069397, "learning_rate": 9.877762673820122e-06, "loss": 0.3263, "step": 3421 }, { "epoch": 0.49067966733581875, "grad_norm": 0.398127943277359, "learning_rate": 9.877579267359584e-06, "loss": 0.3472, "step": 3422 }, { "epoch": 0.49082305706911383, "grad_norm": 0.3762064576148987, "learning_rate": 9.877395725114743e-06, "loss": 0.3534, "step": 3423 }, { "epoch": 0.490966446802409, "grad_norm": 0.3822661340236664, "learning_rate": 9.877212047090706e-06, "loss": 0.3482, "step": 3424 }, { "epoch": 0.49110983653570406, "grad_norm": 0.3697286546230316, "learning_rate": 9.87702823329259e-06, "loss": 0.3321, "step": 3425 }, { "epoch": 0.49125322626899914, "grad_norm": 0.344666987657547, "learning_rate": 9.876844283725509e-06, "loss": 0.3517, "step": 3426 }, { "epoch": 0.49139661600229423, "grad_norm": 0.4210207462310791, "learning_rate": 9.876660198394585e-06, "loss": 0.358, "step": 3427 }, { "epoch": 0.4915400057355893, "grad_norm": 0.37717777490615845, "learning_rate": 9.876475977304945e-06, "loss": 0.3545, "step": 3428 }, { "epoch": 0.49168339546888445, "grad_norm": 0.35547947883605957, "learning_rate": 9.876291620461713e-06, "loss": 0.3431, "step": 3429 }, { "epoch": 0.49182678520217954, "grad_norm": 0.36924466490745544, "learning_rate": 9.876107127870024e-06, "loss": 0.3245, "step": 3430 }, { "epoch": 0.4919701749354746, "grad_norm": 0.3626033663749695, "learning_rate": 9.875922499535014e-06, "loss": 0.3309, "step": 3431 }, { "epoch": 0.4921135646687697, "grad_norm": 0.3992258310317993, "learning_rate": 9.87573773546182e-06, "loss": 0.3364, "step": 3432 }, { "epoch": 0.4922569544020648, "grad_norm": 0.36549514532089233, "learning_rate": 9.87555283565559e-06, "loss": 0.3434, "step": 3433 }, { "epoch": 0.49240034413535994, "grad_norm": 0.35323357582092285, "learning_rate": 9.87536780012147e-06, "loss": 0.339, "step": 3434 }, { "epoch": 0.492543733868655, "grad_norm": 0.37720146775245667, "learning_rate": 9.875182628864608e-06, "loss": 0.3533, "step": 3435 }, { "epoch": 0.4926871236019501, "grad_norm": 0.3844808042049408, "learning_rate": 9.87499732189016e-06, "loss": 0.3732, "step": 3436 }, { "epoch": 0.4928305133352452, "grad_norm": 0.3740902841091156, "learning_rate": 9.874811879203288e-06, "loss": 0.3462, "step": 3437 }, { "epoch": 0.4929739030685403, "grad_norm": 0.3230512738227844, "learning_rate": 9.874626300809151e-06, "loss": 0.3662, "step": 3438 }, { "epoch": 0.4931172928018354, "grad_norm": 0.4149497151374817, "learning_rate": 9.874440586712918e-06, "loss": 0.3647, "step": 3439 }, { "epoch": 0.4932606825351305, "grad_norm": 0.3449086844921112, "learning_rate": 9.874254736919756e-06, "loss": 0.3353, "step": 3440 }, { "epoch": 0.4934040722684256, "grad_norm": 0.35230737924575806, "learning_rate": 9.87406875143484e-06, "loss": 0.3482, "step": 3441 }, { "epoch": 0.49354746200172067, "grad_norm": 0.3622065484523773, "learning_rate": 9.873882630263348e-06, "loss": 0.3269, "step": 3442 }, { "epoch": 0.49369085173501576, "grad_norm": 0.35281866788864136, "learning_rate": 9.873696373410462e-06, "loss": 0.3491, "step": 3443 }, { "epoch": 0.49383424146831084, "grad_norm": 0.35318124294281006, "learning_rate": 9.873509980881364e-06, "loss": 0.3625, "step": 3444 }, { "epoch": 0.493977631201606, "grad_norm": 0.37552589178085327, "learning_rate": 9.873323452681247e-06, "loss": 0.3532, "step": 3445 }, { "epoch": 0.49412102093490107, "grad_norm": 0.3696820139884949, "learning_rate": 9.873136788815301e-06, "loss": 0.3437, "step": 3446 }, { "epoch": 0.49426441066819615, "grad_norm": 0.36662113666534424, "learning_rate": 9.872949989288723e-06, "loss": 0.3596, "step": 3447 }, { "epoch": 0.49440780040149124, "grad_norm": 0.36454668641090393, "learning_rate": 9.872763054106713e-06, "loss": 0.3527, "step": 3448 }, { "epoch": 0.4945511901347863, "grad_norm": 0.3612547814846039, "learning_rate": 9.872575983274474e-06, "loss": 0.3423, "step": 3449 }, { "epoch": 0.49469457986808146, "grad_norm": 0.36753153800964355, "learning_rate": 9.872388776797217e-06, "loss": 0.3617, "step": 3450 }, { "epoch": 0.49483796960137655, "grad_norm": 0.38535866141319275, "learning_rate": 9.872201434680152e-06, "loss": 0.3446, "step": 3451 }, { "epoch": 0.49498135933467163, "grad_norm": 0.3602127432823181, "learning_rate": 9.872013956928493e-06, "loss": 0.3337, "step": 3452 }, { "epoch": 0.4951247490679667, "grad_norm": 0.3566381335258484, "learning_rate": 9.87182634354746e-06, "loss": 0.3666, "step": 3453 }, { "epoch": 0.4952681388012618, "grad_norm": 0.4501229226589203, "learning_rate": 9.871638594542276e-06, "loss": 0.3577, "step": 3454 }, { "epoch": 0.49541152853455694, "grad_norm": 0.3476310670375824, "learning_rate": 9.871450709918166e-06, "loss": 0.3323, "step": 3455 }, { "epoch": 0.49555491826785203, "grad_norm": 0.39611926674842834, "learning_rate": 9.871262689680366e-06, "loss": 0.3453, "step": 3456 }, { "epoch": 0.4956983080011471, "grad_norm": 0.39851129055023193, "learning_rate": 9.871074533834102e-06, "loss": 0.3914, "step": 3457 }, { "epoch": 0.4958416977344422, "grad_norm": 0.3544714152812958, "learning_rate": 9.870886242384618e-06, "loss": 0.3614, "step": 3458 }, { "epoch": 0.4959850874677373, "grad_norm": 0.3943442702293396, "learning_rate": 9.870697815337154e-06, "loss": 0.3262, "step": 3459 }, { "epoch": 0.4961284772010324, "grad_norm": 0.42310118675231934, "learning_rate": 9.870509252696953e-06, "loss": 0.3461, "step": 3460 }, { "epoch": 0.4962718669343275, "grad_norm": 0.36289528012275696, "learning_rate": 9.870320554469269e-06, "loss": 0.3571, "step": 3461 }, { "epoch": 0.4964152566676226, "grad_norm": 0.3531302213668823, "learning_rate": 9.870131720659352e-06, "loss": 0.3434, "step": 3462 }, { "epoch": 0.4965586464009177, "grad_norm": 0.3899306356906891, "learning_rate": 9.86994275127246e-06, "loss": 0.3397, "step": 3463 }, { "epoch": 0.49670203613421277, "grad_norm": 0.35489198565483093, "learning_rate": 9.869753646313853e-06, "loss": 0.35, "step": 3464 }, { "epoch": 0.4968454258675079, "grad_norm": 0.35201478004455566, "learning_rate": 9.869564405788797e-06, "loss": 0.3495, "step": 3465 }, { "epoch": 0.496988815600803, "grad_norm": 0.35681799054145813, "learning_rate": 9.869375029702556e-06, "loss": 0.3383, "step": 3466 }, { "epoch": 0.4971322053340981, "grad_norm": 0.37277358770370483, "learning_rate": 9.869185518060405e-06, "loss": 0.3359, "step": 3467 }, { "epoch": 0.49727559506739316, "grad_norm": 0.4523216187953949, "learning_rate": 9.868995870867622e-06, "loss": 0.364, "step": 3468 }, { "epoch": 0.49741898480068825, "grad_norm": 0.3656332492828369, "learning_rate": 9.868806088129481e-06, "loss": 0.3246, "step": 3469 }, { "epoch": 0.4975623745339834, "grad_norm": 0.3697415292263031, "learning_rate": 9.86861616985127e-06, "loss": 0.3321, "step": 3470 }, { "epoch": 0.49770576426727847, "grad_norm": 0.4306221604347229, "learning_rate": 9.868426116038274e-06, "loss": 0.3524, "step": 3471 }, { "epoch": 0.49784915400057356, "grad_norm": 0.4137745797634125, "learning_rate": 9.868235926695783e-06, "loss": 0.3616, "step": 3472 }, { "epoch": 0.49799254373386864, "grad_norm": 0.40247100591659546, "learning_rate": 9.868045601829092e-06, "loss": 0.3496, "step": 3473 }, { "epoch": 0.4981359334671637, "grad_norm": 0.4280024468898773, "learning_rate": 9.8678551414435e-06, "loss": 0.3439, "step": 3474 }, { "epoch": 0.49827932320045887, "grad_norm": 0.369617760181427, "learning_rate": 9.867664545544311e-06, "loss": 0.3371, "step": 3475 }, { "epoch": 0.49842271293375395, "grad_norm": 0.34404969215393066, "learning_rate": 9.867473814136828e-06, "loss": 0.3421, "step": 3476 }, { "epoch": 0.49856610266704904, "grad_norm": 0.3875814378261566, "learning_rate": 9.867282947226362e-06, "loss": 0.3424, "step": 3477 }, { "epoch": 0.4987094924003441, "grad_norm": 0.4098771810531616, "learning_rate": 9.867091944818225e-06, "loss": 0.3442, "step": 3478 }, { "epoch": 0.4988528821336392, "grad_norm": 0.3878532350063324, "learning_rate": 9.866900806917737e-06, "loss": 0.358, "step": 3479 }, { "epoch": 0.49899627186693435, "grad_norm": 0.3578612208366394, "learning_rate": 9.866709533530213e-06, "loss": 0.3591, "step": 3480 }, { "epoch": 0.49913966160022943, "grad_norm": 0.4064209461212158, "learning_rate": 9.866518124660987e-06, "loss": 0.3591, "step": 3481 }, { "epoch": 0.4992830513335245, "grad_norm": 0.4252238869667053, "learning_rate": 9.86632658031538e-06, "loss": 0.3547, "step": 3482 }, { "epoch": 0.4994264410668196, "grad_norm": 0.3815094828605652, "learning_rate": 9.866134900498727e-06, "loss": 0.357, "step": 3483 }, { "epoch": 0.4995698308001147, "grad_norm": 0.35886698961257935, "learning_rate": 9.865943085216365e-06, "loss": 0.3463, "step": 3484 }, { "epoch": 0.49971322053340983, "grad_norm": 0.3468286991119385, "learning_rate": 9.865751134473632e-06, "loss": 0.3374, "step": 3485 }, { "epoch": 0.4998566102667049, "grad_norm": 0.34973689913749695, "learning_rate": 9.865559048275872e-06, "loss": 0.3457, "step": 3486 }, { "epoch": 0.5, "grad_norm": 0.372466504573822, "learning_rate": 9.865366826628433e-06, "loss": 0.3419, "step": 3487 }, { "epoch": 0.5001433897332951, "grad_norm": 0.32956522703170776, "learning_rate": 9.865174469536665e-06, "loss": 0.337, "step": 3488 }, { "epoch": 0.5002867794665902, "grad_norm": 0.3923748731613159, "learning_rate": 9.864981977005926e-06, "loss": 0.3539, "step": 3489 }, { "epoch": 0.5004301691998853, "grad_norm": 0.361371785402298, "learning_rate": 9.86478934904157e-06, "loss": 0.3307, "step": 3490 }, { "epoch": 0.5005735589331803, "grad_norm": 0.3664003610610962, "learning_rate": 9.864596585648964e-06, "loss": 0.3603, "step": 3491 }, { "epoch": 0.5007169486664754, "grad_norm": 0.36894840002059937, "learning_rate": 9.86440368683347e-06, "loss": 0.3356, "step": 3492 }, { "epoch": 0.5008603383997706, "grad_norm": 0.3475186824798584, "learning_rate": 9.864210652600463e-06, "loss": 0.3516, "step": 3493 }, { "epoch": 0.5010037281330657, "grad_norm": 0.3905405104160309, "learning_rate": 9.864017482955312e-06, "loss": 0.3535, "step": 3494 }, { "epoch": 0.5011471178663608, "grad_norm": 0.3540593385696411, "learning_rate": 9.863824177903396e-06, "loss": 0.3915, "step": 3495 }, { "epoch": 0.5012905075996559, "grad_norm": 0.398068904876709, "learning_rate": 9.863630737450098e-06, "loss": 0.3395, "step": 3496 }, { "epoch": 0.501433897332951, "grad_norm": 0.3979916274547577, "learning_rate": 9.863437161600802e-06, "loss": 0.3588, "step": 3497 }, { "epoch": 0.501577287066246, "grad_norm": 0.4034597873687744, "learning_rate": 9.863243450360898e-06, "loss": 0.3648, "step": 3498 }, { "epoch": 0.5017206767995411, "grad_norm": 0.3758794963359833, "learning_rate": 9.863049603735775e-06, "loss": 0.3526, "step": 3499 }, { "epoch": 0.5018640665328362, "grad_norm": 0.38847002387046814, "learning_rate": 9.862855621730833e-06, "loss": 0.3431, "step": 3500 }, { "epoch": 0.5020074562661313, "grad_norm": 0.3657136559486389, "learning_rate": 9.86266150435147e-06, "loss": 0.3522, "step": 3501 }, { "epoch": 0.5021508459994264, "grad_norm": 0.37545374035835266, "learning_rate": 9.862467251603092e-06, "loss": 0.3593, "step": 3502 }, { "epoch": 0.5022942357327216, "grad_norm": 0.41316288709640503, "learning_rate": 9.862272863491106e-06, "loss": 0.3514, "step": 3503 }, { "epoch": 0.5024376254660167, "grad_norm": 0.36391255259513855, "learning_rate": 9.862078340020922e-06, "loss": 0.3462, "step": 3504 }, { "epoch": 0.5025810151993118, "grad_norm": 0.36016467213630676, "learning_rate": 9.861883681197957e-06, "loss": 0.3584, "step": 3505 }, { "epoch": 0.5027244049326068, "grad_norm": 0.38677117228507996, "learning_rate": 9.86168888702763e-06, "loss": 0.3587, "step": 3506 }, { "epoch": 0.5028677946659019, "grad_norm": 0.38841044902801514, "learning_rate": 9.86149395751536e-06, "loss": 0.3621, "step": 3507 }, { "epoch": 0.503011184399197, "grad_norm": 0.36378100514411926, "learning_rate": 9.861298892666579e-06, "loss": 0.3444, "step": 3508 }, { "epoch": 0.5031545741324921, "grad_norm": 0.3777617812156677, "learning_rate": 9.861103692486715e-06, "loss": 0.357, "step": 3509 }, { "epoch": 0.5032979638657872, "grad_norm": 0.48939424753189087, "learning_rate": 9.860908356981201e-06, "loss": 0.3749, "step": 3510 }, { "epoch": 0.5034413535990823, "grad_norm": 0.35048970580101013, "learning_rate": 9.860712886155476e-06, "loss": 0.3451, "step": 3511 }, { "epoch": 0.5035847433323773, "grad_norm": 0.38271862268447876, "learning_rate": 9.860517280014982e-06, "loss": 0.3485, "step": 3512 }, { "epoch": 0.5037281330656725, "grad_norm": 0.40055376291275024, "learning_rate": 9.860321538565164e-06, "loss": 0.3488, "step": 3513 }, { "epoch": 0.5038715227989676, "grad_norm": 0.3655298054218292, "learning_rate": 9.86012566181147e-06, "loss": 0.3206, "step": 3514 }, { "epoch": 0.5040149125322627, "grad_norm": 0.3646738827228546, "learning_rate": 9.859929649759355e-06, "loss": 0.3212, "step": 3515 }, { "epoch": 0.5041583022655578, "grad_norm": 0.3779745101928711, "learning_rate": 9.859733502414274e-06, "loss": 0.353, "step": 3516 }, { "epoch": 0.5043016919988529, "grad_norm": 0.47055357694625854, "learning_rate": 9.859537219781688e-06, "loss": 0.3801, "step": 3517 }, { "epoch": 0.504445081732148, "grad_norm": 0.3433860242366791, "learning_rate": 9.859340801867062e-06, "loss": 0.3543, "step": 3518 }, { "epoch": 0.5045884714654431, "grad_norm": 0.3602169454097748, "learning_rate": 9.85914424867586e-06, "loss": 0.3383, "step": 3519 }, { "epoch": 0.5047318611987381, "grad_norm": 0.41660964488983154, "learning_rate": 9.85894756021356e-06, "loss": 0.3229, "step": 3520 }, { "epoch": 0.5048752509320332, "grad_norm": 0.3593636155128479, "learning_rate": 9.858750736485633e-06, "loss": 0.362, "step": 3521 }, { "epoch": 0.5050186406653283, "grad_norm": 0.3997578024864197, "learning_rate": 9.85855377749756e-06, "loss": 0.3531, "step": 3522 }, { "epoch": 0.5051620303986235, "grad_norm": 0.3464927077293396, "learning_rate": 9.858356683254824e-06, "loss": 0.3524, "step": 3523 }, { "epoch": 0.5053054201319186, "grad_norm": 0.35422560572624207, "learning_rate": 9.858159453762912e-06, "loss": 0.3715, "step": 3524 }, { "epoch": 0.5054488098652137, "grad_norm": 0.39726442098617554, "learning_rate": 9.857962089027314e-06, "loss": 0.3468, "step": 3525 }, { "epoch": 0.5055921995985088, "grad_norm": 0.3367362320423126, "learning_rate": 9.857764589053522e-06, "loss": 0.3521, "step": 3526 }, { "epoch": 0.5057355893318038, "grad_norm": 0.3544955849647522, "learning_rate": 9.857566953847038e-06, "loss": 0.3725, "step": 3527 }, { "epoch": 0.5058789790650989, "grad_norm": 0.3950585722923279, "learning_rate": 9.857369183413363e-06, "loss": 0.3418, "step": 3528 }, { "epoch": 0.506022368798394, "grad_norm": 0.3320585787296295, "learning_rate": 9.857171277758002e-06, "loss": 0.3535, "step": 3529 }, { "epoch": 0.5061657585316891, "grad_norm": 0.35541632771492004, "learning_rate": 9.856973236886462e-06, "loss": 0.3601, "step": 3530 }, { "epoch": 0.5063091482649842, "grad_norm": 0.3724015951156616, "learning_rate": 9.856775060804262e-06, "loss": 0.341, "step": 3531 }, { "epoch": 0.5064525379982793, "grad_norm": 0.3450840413570404, "learning_rate": 9.856576749516913e-06, "loss": 0.342, "step": 3532 }, { "epoch": 0.5065959277315745, "grad_norm": 0.344765305519104, "learning_rate": 9.85637830302994e-06, "loss": 0.3389, "step": 3533 }, { "epoch": 0.5067393174648696, "grad_norm": 0.3214212656021118, "learning_rate": 9.856179721348863e-06, "loss": 0.3531, "step": 3534 }, { "epoch": 0.5068827071981646, "grad_norm": 0.3548281490802765, "learning_rate": 9.855981004479215e-06, "loss": 0.352, "step": 3535 }, { "epoch": 0.5070260969314597, "grad_norm": 0.37033963203430176, "learning_rate": 9.855782152426525e-06, "loss": 0.3586, "step": 3536 }, { "epoch": 0.5071694866647548, "grad_norm": 0.3631625175476074, "learning_rate": 9.85558316519633e-06, "loss": 0.3454, "step": 3537 }, { "epoch": 0.5073128763980499, "grad_norm": 0.3434281349182129, "learning_rate": 9.855384042794168e-06, "loss": 0.3618, "step": 3538 }, { "epoch": 0.507456266131345, "grad_norm": 0.35657086968421936, "learning_rate": 9.855184785225583e-06, "loss": 0.3583, "step": 3539 }, { "epoch": 0.5075996558646401, "grad_norm": 0.3628326952457428, "learning_rate": 9.854985392496123e-06, "loss": 0.3346, "step": 3540 }, { "epoch": 0.5077430455979351, "grad_norm": 0.3625187277793884, "learning_rate": 9.854785864611339e-06, "loss": 0.3816, "step": 3541 }, { "epoch": 0.5078864353312302, "grad_norm": 0.37056654691696167, "learning_rate": 9.854586201576783e-06, "loss": 0.3497, "step": 3542 }, { "epoch": 0.5080298250645254, "grad_norm": 0.3619905114173889, "learning_rate": 9.854386403398015e-06, "loss": 0.3519, "step": 3543 }, { "epoch": 0.5081732147978205, "grad_norm": 0.366966187953949, "learning_rate": 9.854186470080598e-06, "loss": 0.3635, "step": 3544 }, { "epoch": 0.5083166045311156, "grad_norm": 0.35143497586250305, "learning_rate": 9.853986401630095e-06, "loss": 0.3432, "step": 3545 }, { "epoch": 0.5084599942644107, "grad_norm": 0.36916229128837585, "learning_rate": 9.853786198052079e-06, "loss": 0.327, "step": 3546 }, { "epoch": 0.5086033839977058, "grad_norm": 0.33904510736465454, "learning_rate": 9.853585859352122e-06, "loss": 0.3646, "step": 3547 }, { "epoch": 0.5087467737310009, "grad_norm": 0.3330213129520416, "learning_rate": 9.8533853855358e-06, "loss": 0.3418, "step": 3548 }, { "epoch": 0.5088901634642959, "grad_norm": 0.3437788784503937, "learning_rate": 9.853184776608695e-06, "loss": 0.3322, "step": 3549 }, { "epoch": 0.509033553197591, "grad_norm": 0.3901200592517853, "learning_rate": 9.85298403257639e-06, "loss": 0.348, "step": 3550 }, { "epoch": 0.5091769429308861, "grad_norm": 0.3557794392108917, "learning_rate": 9.852783153444476e-06, "loss": 0.3479, "step": 3551 }, { "epoch": 0.5093203326641812, "grad_norm": 0.34621870517730713, "learning_rate": 9.852582139218546e-06, "loss": 0.3368, "step": 3552 }, { "epoch": 0.5094637223974764, "grad_norm": 0.35138189792633057, "learning_rate": 9.852380989904191e-06, "loss": 0.3489, "step": 3553 }, { "epoch": 0.5096071121307715, "grad_norm": 0.3756104111671448, "learning_rate": 9.852179705507014e-06, "loss": 0.3467, "step": 3554 }, { "epoch": 0.5097505018640666, "grad_norm": 0.3343070447444916, "learning_rate": 9.85197828603262e-06, "loss": 0.335, "step": 3555 }, { "epoch": 0.5098938915973616, "grad_norm": 0.33677053451538086, "learning_rate": 9.851776731486613e-06, "loss": 0.3457, "step": 3556 }, { "epoch": 0.5100372813306567, "grad_norm": 0.3634888231754303, "learning_rate": 9.851575041874606e-06, "loss": 0.3501, "step": 3557 }, { "epoch": 0.5101806710639518, "grad_norm": 0.3903045952320099, "learning_rate": 9.851373217202212e-06, "loss": 0.346, "step": 3558 }, { "epoch": 0.5103240607972469, "grad_norm": 0.43077707290649414, "learning_rate": 9.85117125747505e-06, "loss": 0.335, "step": 3559 }, { "epoch": 0.510467450530542, "grad_norm": 0.3263390362262726, "learning_rate": 9.850969162698744e-06, "loss": 0.3396, "step": 3560 }, { "epoch": 0.5106108402638371, "grad_norm": 0.376193642616272, "learning_rate": 9.850766932878917e-06, "loss": 0.3524, "step": 3561 }, { "epoch": 0.5107542299971322, "grad_norm": 0.36223524808883667, "learning_rate": 9.850564568021202e-06, "loss": 0.3521, "step": 3562 }, { "epoch": 0.5108976197304274, "grad_norm": 0.3180772364139557, "learning_rate": 9.850362068131231e-06, "loss": 0.3439, "step": 3563 }, { "epoch": 0.5110410094637224, "grad_norm": 0.38980647921562195, "learning_rate": 9.85015943321464e-06, "loss": 0.3576, "step": 3564 }, { "epoch": 0.5111843991970175, "grad_norm": 0.34941965341567993, "learning_rate": 9.849956663277073e-06, "loss": 0.3311, "step": 3565 }, { "epoch": 0.5113277889303126, "grad_norm": 0.34276771545410156, "learning_rate": 9.849753758324173e-06, "loss": 0.3397, "step": 3566 }, { "epoch": 0.5114711786636077, "grad_norm": 0.37887895107269287, "learning_rate": 9.849550718361586e-06, "loss": 0.3678, "step": 3567 }, { "epoch": 0.5116145683969028, "grad_norm": 0.37244799733161926, "learning_rate": 9.849347543394969e-06, "loss": 0.3277, "step": 3568 }, { "epoch": 0.5117579581301979, "grad_norm": 0.34811994433403015, "learning_rate": 9.849144233429977e-06, "loss": 0.3412, "step": 3569 }, { "epoch": 0.511901347863493, "grad_norm": 0.3469882011413574, "learning_rate": 9.848940788472268e-06, "loss": 0.3407, "step": 3570 }, { "epoch": 0.512044737596788, "grad_norm": 0.39273834228515625, "learning_rate": 9.848737208527506e-06, "loss": 0.3683, "step": 3571 }, { "epoch": 0.5121881273300831, "grad_norm": 0.34880170226097107, "learning_rate": 9.84853349360136e-06, "loss": 0.3338, "step": 3572 }, { "epoch": 0.5123315170633783, "grad_norm": 0.3583586513996124, "learning_rate": 9.8483296436995e-06, "loss": 0.3489, "step": 3573 }, { "epoch": 0.5124749067966734, "grad_norm": 0.41049814224243164, "learning_rate": 9.848125658827598e-06, "loss": 0.3271, "step": 3574 }, { "epoch": 0.5126182965299685, "grad_norm": 0.4233921468257904, "learning_rate": 9.847921538991339e-06, "loss": 0.3466, "step": 3575 }, { "epoch": 0.5127616862632636, "grad_norm": 0.397855669260025, "learning_rate": 9.8477172841964e-06, "loss": 0.3371, "step": 3576 }, { "epoch": 0.5129050759965587, "grad_norm": 0.39448291063308716, "learning_rate": 9.84751289444847e-06, "loss": 0.3533, "step": 3577 }, { "epoch": 0.5130484657298537, "grad_norm": 0.4161033034324646, "learning_rate": 9.847308369753238e-06, "loss": 0.353, "step": 3578 }, { "epoch": 0.5131918554631488, "grad_norm": 0.4127802848815918, "learning_rate": 9.847103710116396e-06, "loss": 0.3384, "step": 3579 }, { "epoch": 0.5133352451964439, "grad_norm": 0.38672637939453125, "learning_rate": 9.846898915543644e-06, "loss": 0.3451, "step": 3580 }, { "epoch": 0.513478634929739, "grad_norm": 0.35406073927879333, "learning_rate": 9.846693986040682e-06, "loss": 0.3346, "step": 3581 }, { "epoch": 0.5136220246630341, "grad_norm": 0.3539005219936371, "learning_rate": 9.846488921613216e-06, "loss": 0.327, "step": 3582 }, { "epoch": 0.5137654143963292, "grad_norm": 0.38241046667099, "learning_rate": 9.846283722266952e-06, "loss": 0.3619, "step": 3583 }, { "epoch": 0.5139088041296244, "grad_norm": 0.3513106405735016, "learning_rate": 9.846078388007604e-06, "loss": 0.3395, "step": 3584 }, { "epoch": 0.5140521938629194, "grad_norm": 0.3827586770057678, "learning_rate": 9.84587291884089e-06, "loss": 0.3411, "step": 3585 }, { "epoch": 0.5141955835962145, "grad_norm": 0.3743971884250641, "learning_rate": 9.845667314772528e-06, "loss": 0.3774, "step": 3586 }, { "epoch": 0.5143389733295096, "grad_norm": 0.3728847801685333, "learning_rate": 9.845461575808241e-06, "loss": 0.3374, "step": 3587 }, { "epoch": 0.5144823630628047, "grad_norm": 0.37794405221939087, "learning_rate": 9.845255701953758e-06, "loss": 0.3295, "step": 3588 }, { "epoch": 0.5146257527960998, "grad_norm": 0.360684335231781, "learning_rate": 9.84504969321481e-06, "loss": 0.3834, "step": 3589 }, { "epoch": 0.5147691425293949, "grad_norm": 0.4192664921283722, "learning_rate": 9.844843549597132e-06, "loss": 0.3615, "step": 3590 }, { "epoch": 0.51491253226269, "grad_norm": 0.34456223249435425, "learning_rate": 9.844637271106461e-06, "loss": 0.3422, "step": 3591 }, { "epoch": 0.515055921995985, "grad_norm": 0.3246738612651825, "learning_rate": 9.844430857748541e-06, "loss": 0.3247, "step": 3592 }, { "epoch": 0.5151993117292801, "grad_norm": 0.4052858054637909, "learning_rate": 9.844224309529119e-06, "loss": 0.3434, "step": 3593 }, { "epoch": 0.5153427014625753, "grad_norm": 0.4039464592933655, "learning_rate": 9.844017626453944e-06, "loss": 0.3517, "step": 3594 }, { "epoch": 0.5154860911958704, "grad_norm": 0.3642721176147461, "learning_rate": 9.843810808528769e-06, "loss": 0.355, "step": 3595 }, { "epoch": 0.5156294809291655, "grad_norm": 0.360688716173172, "learning_rate": 9.843603855759352e-06, "loss": 0.3488, "step": 3596 }, { "epoch": 0.5157728706624606, "grad_norm": 0.336684912443161, "learning_rate": 9.843396768151456e-06, "loss": 0.3308, "step": 3597 }, { "epoch": 0.5159162603957557, "grad_norm": 0.38602668046951294, "learning_rate": 9.843189545710843e-06, "loss": 0.3507, "step": 3598 }, { "epoch": 0.5160596501290508, "grad_norm": 0.36329859495162964, "learning_rate": 9.842982188443283e-06, "loss": 0.3518, "step": 3599 }, { "epoch": 0.5162030398623458, "grad_norm": 0.3472753167152405, "learning_rate": 9.842774696354552e-06, "loss": 0.3383, "step": 3600 }, { "epoch": 0.5163464295956409, "grad_norm": 0.34352126717567444, "learning_rate": 9.84256706945042e-06, "loss": 0.3611, "step": 3601 }, { "epoch": 0.516489819328936, "grad_norm": 0.37445253133773804, "learning_rate": 9.84235930773667e-06, "loss": 0.3596, "step": 3602 }, { "epoch": 0.5166332090622311, "grad_norm": 0.33925822377204895, "learning_rate": 9.842151411219086e-06, "loss": 0.3285, "step": 3603 }, { "epoch": 0.5167765987955263, "grad_norm": 0.3285270035266876, "learning_rate": 9.841943379903456e-06, "loss": 0.3406, "step": 3604 }, { "epoch": 0.5169199885288214, "grad_norm": 0.34875473380088806, "learning_rate": 9.841735213795571e-06, "loss": 0.3434, "step": 3605 }, { "epoch": 0.5170633782621165, "grad_norm": 0.3630627989768982, "learning_rate": 9.841526912901224e-06, "loss": 0.376, "step": 3606 }, { "epoch": 0.5172067679954115, "grad_norm": 0.3667210340499878, "learning_rate": 9.841318477226219e-06, "loss": 0.3507, "step": 3607 }, { "epoch": 0.5173501577287066, "grad_norm": 0.3276135325431824, "learning_rate": 9.841109906776352e-06, "loss": 0.334, "step": 3608 }, { "epoch": 0.5174935474620017, "grad_norm": 0.3603757917881012, "learning_rate": 9.840901201557431e-06, "loss": 0.3264, "step": 3609 }, { "epoch": 0.5176369371952968, "grad_norm": 0.3690629005432129, "learning_rate": 9.84069236157527e-06, "loss": 0.3418, "step": 3610 }, { "epoch": 0.5177803269285919, "grad_norm": 0.33375099301338196, "learning_rate": 9.840483386835678e-06, "loss": 0.3371, "step": 3611 }, { "epoch": 0.517923716661887, "grad_norm": 0.3560677468776703, "learning_rate": 9.840274277344476e-06, "loss": 0.329, "step": 3612 }, { "epoch": 0.518067106395182, "grad_norm": 0.339468389749527, "learning_rate": 9.840065033107484e-06, "loss": 0.3423, "step": 3613 }, { "epoch": 0.5182104961284772, "grad_norm": 0.35111236572265625, "learning_rate": 9.839855654130525e-06, "loss": 0.3637, "step": 3614 }, { "epoch": 0.5183538858617723, "grad_norm": 0.3491220772266388, "learning_rate": 9.839646140419432e-06, "loss": 0.3615, "step": 3615 }, { "epoch": 0.5184972755950674, "grad_norm": 0.3252251148223877, "learning_rate": 9.839436491980034e-06, "loss": 0.3561, "step": 3616 }, { "epoch": 0.5186406653283625, "grad_norm": 0.35744866728782654, "learning_rate": 9.839226708818168e-06, "loss": 0.3428, "step": 3617 }, { "epoch": 0.5187840550616576, "grad_norm": 0.3119225800037384, "learning_rate": 9.839016790939677e-06, "loss": 0.3469, "step": 3618 }, { "epoch": 0.5189274447949527, "grad_norm": 0.36003297567367554, "learning_rate": 9.8388067383504e-06, "loss": 0.3214, "step": 3619 }, { "epoch": 0.5190708345282478, "grad_norm": 0.3310968577861786, "learning_rate": 9.838596551056188e-06, "loss": 0.3109, "step": 3620 }, { "epoch": 0.5192142242615428, "grad_norm": 0.3424806594848633, "learning_rate": 9.83838622906289e-06, "loss": 0.3514, "step": 3621 }, { "epoch": 0.5193576139948379, "grad_norm": 0.36871403455734253, "learning_rate": 9.838175772376363e-06, "loss": 0.3819, "step": 3622 }, { "epoch": 0.519501003728133, "grad_norm": 0.3452534079551697, "learning_rate": 9.837965181002464e-06, "loss": 0.3407, "step": 3623 }, { "epoch": 0.5196443934614282, "grad_norm": 0.4509350657463074, "learning_rate": 9.837754454947058e-06, "loss": 0.3541, "step": 3624 }, { "epoch": 0.5197877831947233, "grad_norm": 0.36011239886283875, "learning_rate": 9.83754359421601e-06, "loss": 0.3477, "step": 3625 }, { "epoch": 0.5199311729280184, "grad_norm": 0.3403690457344055, "learning_rate": 9.837332598815188e-06, "loss": 0.3283, "step": 3626 }, { "epoch": 0.5200745626613135, "grad_norm": 0.36690598726272583, "learning_rate": 9.83712146875047e-06, "loss": 0.3438, "step": 3627 }, { "epoch": 0.5202179523946086, "grad_norm": 0.32994869351387024, "learning_rate": 9.83691020402773e-06, "loss": 0.3471, "step": 3628 }, { "epoch": 0.5203613421279036, "grad_norm": 0.33389824628829956, "learning_rate": 9.83669880465285e-06, "loss": 0.3275, "step": 3629 }, { "epoch": 0.5205047318611987, "grad_norm": 0.33906883001327515, "learning_rate": 9.836487270631717e-06, "loss": 0.3539, "step": 3630 }, { "epoch": 0.5206481215944938, "grad_norm": 0.3138020634651184, "learning_rate": 9.836275601970216e-06, "loss": 0.3435, "step": 3631 }, { "epoch": 0.5207915113277889, "grad_norm": 0.3297393321990967, "learning_rate": 9.836063798674243e-06, "loss": 0.3401, "step": 3632 }, { "epoch": 0.520934901061084, "grad_norm": 0.4036850333213806, "learning_rate": 9.835851860749693e-06, "loss": 0.3418, "step": 3633 }, { "epoch": 0.5210782907943792, "grad_norm": 0.32273221015930176, "learning_rate": 9.835639788202467e-06, "loss": 0.3439, "step": 3634 }, { "epoch": 0.5212216805276743, "grad_norm": 0.34597834944725037, "learning_rate": 9.835427581038467e-06, "loss": 0.3494, "step": 3635 }, { "epoch": 0.5213650702609693, "grad_norm": 0.35334569215774536, "learning_rate": 9.835215239263602e-06, "loss": 0.3475, "step": 3636 }, { "epoch": 0.5215084599942644, "grad_norm": 0.35325896739959717, "learning_rate": 9.835002762883782e-06, "loss": 0.3406, "step": 3637 }, { "epoch": 0.5216518497275595, "grad_norm": 0.34796980023384094, "learning_rate": 9.834790151904922e-06, "loss": 0.3529, "step": 3638 }, { "epoch": 0.5217952394608546, "grad_norm": 0.35386309027671814, "learning_rate": 9.834577406332944e-06, "loss": 0.337, "step": 3639 }, { "epoch": 0.5219386291941497, "grad_norm": 0.3660098612308502, "learning_rate": 9.834364526173765e-06, "loss": 0.3615, "step": 3640 }, { "epoch": 0.5220820189274448, "grad_norm": 0.37884101271629333, "learning_rate": 9.834151511433316e-06, "loss": 0.3327, "step": 3641 }, { "epoch": 0.5222254086607399, "grad_norm": 0.34819501638412476, "learning_rate": 9.833938362117525e-06, "loss": 0.352, "step": 3642 }, { "epoch": 0.5223687983940349, "grad_norm": 0.3250502049922943, "learning_rate": 9.833725078232326e-06, "loss": 0.3489, "step": 3643 }, { "epoch": 0.5225121881273301, "grad_norm": 0.356260746717453, "learning_rate": 9.833511659783656e-06, "loss": 0.3539, "step": 3644 }, { "epoch": 0.5226555778606252, "grad_norm": 0.34889471530914307, "learning_rate": 9.833298106777459e-06, "loss": 0.3556, "step": 3645 }, { "epoch": 0.5227989675939203, "grad_norm": 0.40645432472229004, "learning_rate": 9.833084419219676e-06, "loss": 0.3619, "step": 3646 }, { "epoch": 0.5229423573272154, "grad_norm": 0.35299262404441833, "learning_rate": 9.832870597116257e-06, "loss": 0.3395, "step": 3647 }, { "epoch": 0.5230857470605105, "grad_norm": 0.35016247630119324, "learning_rate": 9.832656640473154e-06, "loss": 0.3414, "step": 3648 }, { "epoch": 0.5232291367938056, "grad_norm": 0.3518370985984802, "learning_rate": 9.832442549296325e-06, "loss": 0.3294, "step": 3649 }, { "epoch": 0.5233725265271006, "grad_norm": 0.35259851813316345, "learning_rate": 9.832228323591731e-06, "loss": 0.3148, "step": 3650 }, { "epoch": 0.5235159162603957, "grad_norm": 0.43219780921936035, "learning_rate": 9.832013963365332e-06, "loss": 0.3547, "step": 3651 }, { "epoch": 0.5236593059936908, "grad_norm": 0.38354840874671936, "learning_rate": 9.831799468623098e-06, "loss": 0.3534, "step": 3652 }, { "epoch": 0.5238026957269859, "grad_norm": 0.35545676946640015, "learning_rate": 9.831584839371e-06, "loss": 0.3329, "step": 3653 }, { "epoch": 0.5239460854602811, "grad_norm": 0.3672197163105011, "learning_rate": 9.83137007561501e-06, "loss": 0.3654, "step": 3654 }, { "epoch": 0.5240894751935762, "grad_norm": 0.35653162002563477, "learning_rate": 9.831155177361112e-06, "loss": 0.3514, "step": 3655 }, { "epoch": 0.5242328649268713, "grad_norm": 0.34410586953163147, "learning_rate": 9.830940144615284e-06, "loss": 0.3545, "step": 3656 }, { "epoch": 0.5243762546601664, "grad_norm": 0.34150269627571106, "learning_rate": 9.830724977383515e-06, "loss": 0.3189, "step": 3657 }, { "epoch": 0.5245196443934614, "grad_norm": 0.37861186265945435, "learning_rate": 9.830509675671792e-06, "loss": 0.3559, "step": 3658 }, { "epoch": 0.5246630341267565, "grad_norm": 0.4327262341976166, "learning_rate": 9.830294239486111e-06, "loss": 0.342, "step": 3659 }, { "epoch": 0.5248064238600516, "grad_norm": 0.3509977459907532, "learning_rate": 9.83007866883247e-06, "loss": 0.343, "step": 3660 }, { "epoch": 0.5249498135933467, "grad_norm": 0.4198108911514282, "learning_rate": 9.829862963716867e-06, "loss": 0.3528, "step": 3661 }, { "epoch": 0.5250932033266418, "grad_norm": 0.4369404911994934, "learning_rate": 9.82964712414531e-06, "loss": 0.3581, "step": 3662 }, { "epoch": 0.5252365930599369, "grad_norm": 0.33605706691741943, "learning_rate": 9.829431150123806e-06, "loss": 0.3309, "step": 3663 }, { "epoch": 0.5253799827932321, "grad_norm": 0.3932434320449829, "learning_rate": 9.829215041658368e-06, "loss": 0.3354, "step": 3664 }, { "epoch": 0.5255233725265271, "grad_norm": 0.3660789132118225, "learning_rate": 9.828998798755012e-06, "loss": 0.3467, "step": 3665 }, { "epoch": 0.5256667622598222, "grad_norm": 0.3558273911476135, "learning_rate": 9.828782421419758e-06, "loss": 0.3341, "step": 3666 }, { "epoch": 0.5258101519931173, "grad_norm": 0.34755241870880127, "learning_rate": 9.828565909658628e-06, "loss": 0.3558, "step": 3667 }, { "epoch": 0.5259535417264124, "grad_norm": 0.3364209830760956, "learning_rate": 9.828349263477653e-06, "loss": 0.3385, "step": 3668 }, { "epoch": 0.5260969314597075, "grad_norm": 0.37857580184936523, "learning_rate": 9.828132482882859e-06, "loss": 0.3719, "step": 3669 }, { "epoch": 0.5262403211930026, "grad_norm": 0.3495214581489563, "learning_rate": 9.827915567880286e-06, "loss": 0.3414, "step": 3670 }, { "epoch": 0.5263837109262977, "grad_norm": 0.3581622838973999, "learning_rate": 9.82769851847597e-06, "loss": 0.3355, "step": 3671 }, { "epoch": 0.5265271006595927, "grad_norm": 0.33828839659690857, "learning_rate": 9.827481334675955e-06, "loss": 0.3511, "step": 3672 }, { "epoch": 0.5266704903928878, "grad_norm": 0.370075523853302, "learning_rate": 9.827264016486283e-06, "loss": 0.3629, "step": 3673 }, { "epoch": 0.526813880126183, "grad_norm": 0.46311715245246887, "learning_rate": 9.827046563913008e-06, "loss": 0.3447, "step": 3674 }, { "epoch": 0.5269572698594781, "grad_norm": 0.40766486525535583, "learning_rate": 9.826828976962181e-06, "loss": 0.3572, "step": 3675 }, { "epoch": 0.5271006595927732, "grad_norm": 0.38234028220176697, "learning_rate": 9.826611255639861e-06, "loss": 0.3307, "step": 3676 }, { "epoch": 0.5272440493260683, "grad_norm": 0.4430690407752991, "learning_rate": 9.826393399952109e-06, "loss": 0.3468, "step": 3677 }, { "epoch": 0.5273874390593634, "grad_norm": 0.40231412649154663, "learning_rate": 9.826175409904989e-06, "loss": 0.3351, "step": 3678 }, { "epoch": 0.5275308287926584, "grad_norm": 0.40834084153175354, "learning_rate": 9.825957285504569e-06, "loss": 0.3332, "step": 3679 }, { "epoch": 0.5276742185259535, "grad_norm": 0.36175253987312317, "learning_rate": 9.825739026756922e-06, "loss": 0.3445, "step": 3680 }, { "epoch": 0.5278176082592486, "grad_norm": 0.38445231318473816, "learning_rate": 9.825520633668123e-06, "loss": 0.3491, "step": 3681 }, { "epoch": 0.5279609979925437, "grad_norm": 0.40938371419906616, "learning_rate": 9.825302106244255e-06, "loss": 0.354, "step": 3682 }, { "epoch": 0.5281043877258388, "grad_norm": 0.35177916288375854, "learning_rate": 9.825083444491397e-06, "loss": 0.3652, "step": 3683 }, { "epoch": 0.5282477774591339, "grad_norm": 0.34650230407714844, "learning_rate": 9.82486464841564e-06, "loss": 0.3375, "step": 3684 }, { "epoch": 0.5283911671924291, "grad_norm": 0.35158249735832214, "learning_rate": 9.82464571802307e-06, "loss": 0.3393, "step": 3685 }, { "epoch": 0.5285345569257242, "grad_norm": 0.41355186700820923, "learning_rate": 9.824426653319788e-06, "loss": 0.3457, "step": 3686 }, { "epoch": 0.5286779466590192, "grad_norm": 0.3969486355781555, "learning_rate": 9.824207454311889e-06, "loss": 0.3519, "step": 3687 }, { "epoch": 0.5288213363923143, "grad_norm": 0.34534624218940735, "learning_rate": 9.823988121005475e-06, "loss": 0.3458, "step": 3688 }, { "epoch": 0.5289647261256094, "grad_norm": 0.3919183909893036, "learning_rate": 9.823768653406652e-06, "loss": 0.3574, "step": 3689 }, { "epoch": 0.5291081158589045, "grad_norm": 0.3768875300884247, "learning_rate": 9.82354905152153e-06, "loss": 0.3514, "step": 3690 }, { "epoch": 0.5292515055921996, "grad_norm": 0.3464787006378174, "learning_rate": 9.823329315356223e-06, "loss": 0.3457, "step": 3691 }, { "epoch": 0.5293948953254947, "grad_norm": 0.34448495507240295, "learning_rate": 9.823109444916848e-06, "loss": 0.3607, "step": 3692 }, { "epoch": 0.5295382850587897, "grad_norm": 0.34182897210121155, "learning_rate": 9.822889440209525e-06, "loss": 0.3571, "step": 3693 }, { "epoch": 0.5296816747920848, "grad_norm": 0.3494643270969391, "learning_rate": 9.822669301240378e-06, "loss": 0.3656, "step": 3694 }, { "epoch": 0.52982506452538, "grad_norm": 0.3711739778518677, "learning_rate": 9.822449028015537e-06, "loss": 0.3482, "step": 3695 }, { "epoch": 0.5299684542586751, "grad_norm": 0.3200431168079376, "learning_rate": 9.822228620541132e-06, "loss": 0.3278, "step": 3696 }, { "epoch": 0.5301118439919702, "grad_norm": 0.34390443563461304, "learning_rate": 9.822008078823302e-06, "loss": 0.3364, "step": 3697 }, { "epoch": 0.5302552337252653, "grad_norm": 0.37505432963371277, "learning_rate": 9.821787402868185e-06, "loss": 0.3531, "step": 3698 }, { "epoch": 0.5303986234585604, "grad_norm": 0.34464919567108154, "learning_rate": 9.821566592681923e-06, "loss": 0.3456, "step": 3699 }, { "epoch": 0.5305420131918555, "grad_norm": 0.3259536623954773, "learning_rate": 9.821345648270664e-06, "loss": 0.3407, "step": 3700 }, { "epoch": 0.5306854029251505, "grad_norm": 0.34205031394958496, "learning_rate": 9.821124569640559e-06, "loss": 0.335, "step": 3701 }, { "epoch": 0.5308287926584456, "grad_norm": 0.3228526711463928, "learning_rate": 9.820903356797761e-06, "loss": 0.3268, "step": 3702 }, { "epoch": 0.5309721823917407, "grad_norm": 0.3436049222946167, "learning_rate": 9.820682009748433e-06, "loss": 0.3438, "step": 3703 }, { "epoch": 0.5311155721250358, "grad_norm": 0.33880043029785156, "learning_rate": 9.820460528498729e-06, "loss": 0.3415, "step": 3704 }, { "epoch": 0.531258961858331, "grad_norm": 0.3222217857837677, "learning_rate": 9.820238913054822e-06, "loss": 0.3191, "step": 3705 }, { "epoch": 0.5314023515916261, "grad_norm": 0.3275488018989563, "learning_rate": 9.820017163422878e-06, "loss": 0.333, "step": 3706 }, { "epoch": 0.5315457413249212, "grad_norm": 0.34501034021377563, "learning_rate": 9.81979527960907e-06, "loss": 0.3158, "step": 3707 }, { "epoch": 0.5316891310582162, "grad_norm": 0.34555256366729736, "learning_rate": 9.819573261619578e-06, "loss": 0.3561, "step": 3708 }, { "epoch": 0.5318325207915113, "grad_norm": 0.3321095407009125, "learning_rate": 9.819351109460578e-06, "loss": 0.3668, "step": 3709 }, { "epoch": 0.5319759105248064, "grad_norm": 0.3280576765537262, "learning_rate": 9.819128823138257e-06, "loss": 0.323, "step": 3710 }, { "epoch": 0.5321193002581015, "grad_norm": 0.36306262016296387, "learning_rate": 9.818906402658804e-06, "loss": 0.3657, "step": 3711 }, { "epoch": 0.5322626899913966, "grad_norm": 0.37857598066329956, "learning_rate": 9.818683848028409e-06, "loss": 0.3335, "step": 3712 }, { "epoch": 0.5324060797246917, "grad_norm": 0.3890175223350525, "learning_rate": 9.818461159253267e-06, "loss": 0.3537, "step": 3713 }, { "epoch": 0.5325494694579868, "grad_norm": 0.35585376620292664, "learning_rate": 9.81823833633958e-06, "loss": 0.3248, "step": 3714 }, { "epoch": 0.532692859191282, "grad_norm": 0.35341161489486694, "learning_rate": 9.818015379293548e-06, "loss": 0.3248, "step": 3715 }, { "epoch": 0.532836248924577, "grad_norm": 0.3703019320964813, "learning_rate": 9.81779228812138e-06, "loss": 0.3561, "step": 3716 }, { "epoch": 0.5329796386578721, "grad_norm": 0.35191407799720764, "learning_rate": 9.817569062829286e-06, "loss": 0.3434, "step": 3717 }, { "epoch": 0.5331230283911672, "grad_norm": 0.35135239362716675, "learning_rate": 9.817345703423481e-06, "loss": 0.3716, "step": 3718 }, { "epoch": 0.5332664181244623, "grad_norm": 0.3807965815067291, "learning_rate": 9.817122209910182e-06, "loss": 0.3257, "step": 3719 }, { "epoch": 0.5334098078577574, "grad_norm": 0.42562949657440186, "learning_rate": 9.816898582295609e-06, "loss": 0.3611, "step": 3720 }, { "epoch": 0.5335531975910525, "grad_norm": 0.42501088976860046, "learning_rate": 9.816674820585991e-06, "loss": 0.3825, "step": 3721 }, { "epoch": 0.5336965873243475, "grad_norm": 0.33717671036720276, "learning_rate": 9.816450924787555e-06, "loss": 0.3485, "step": 3722 }, { "epoch": 0.5338399770576426, "grad_norm": 0.3620028495788574, "learning_rate": 9.816226894906533e-06, "loss": 0.3338, "step": 3723 }, { "epoch": 0.5339833667909377, "grad_norm": 0.3967439830303192, "learning_rate": 9.816002730949165e-06, "loss": 0.3321, "step": 3724 }, { "epoch": 0.5341267565242329, "grad_norm": 0.35094302892684937, "learning_rate": 9.815778432921687e-06, "loss": 0.3263, "step": 3725 }, { "epoch": 0.534270146257528, "grad_norm": 0.38153842091560364, "learning_rate": 9.815554000830349e-06, "loss": 0.3612, "step": 3726 }, { "epoch": 0.5344135359908231, "grad_norm": 0.38499715924263, "learning_rate": 9.815329434681393e-06, "loss": 0.3554, "step": 3727 }, { "epoch": 0.5345569257241182, "grad_norm": 0.3459027409553528, "learning_rate": 9.815104734481074e-06, "loss": 0.3455, "step": 3728 }, { "epoch": 0.5347003154574133, "grad_norm": 0.35054266452789307, "learning_rate": 9.814879900235645e-06, "loss": 0.331, "step": 3729 }, { "epoch": 0.5348437051907083, "grad_norm": 0.37404483556747437, "learning_rate": 9.814654931951367e-06, "loss": 0.3602, "step": 3730 }, { "epoch": 0.5349870949240034, "grad_norm": 0.3706057667732239, "learning_rate": 9.814429829634499e-06, "loss": 0.3376, "step": 3731 }, { "epoch": 0.5351304846572985, "grad_norm": 0.3287467658519745, "learning_rate": 9.814204593291315e-06, "loss": 0.3387, "step": 3732 }, { "epoch": 0.5352738743905936, "grad_norm": 0.33504626154899597, "learning_rate": 9.813979222928078e-06, "loss": 0.3503, "step": 3733 }, { "epoch": 0.5354172641238887, "grad_norm": 0.3445277214050293, "learning_rate": 9.813753718551067e-06, "loss": 0.3544, "step": 3734 }, { "epoch": 0.5355606538571839, "grad_norm": 0.3352513313293457, "learning_rate": 9.813528080166556e-06, "loss": 0.3441, "step": 3735 }, { "epoch": 0.535704043590479, "grad_norm": 0.3894737660884857, "learning_rate": 9.813302307780828e-06, "loss": 0.3174, "step": 3736 }, { "epoch": 0.535847433323774, "grad_norm": 0.36199358105659485, "learning_rate": 9.813076401400168e-06, "loss": 0.354, "step": 3737 }, { "epoch": 0.5359908230570691, "grad_norm": 0.34082481265068054, "learning_rate": 9.812850361030864e-06, "loss": 0.3466, "step": 3738 }, { "epoch": 0.5361342127903642, "grad_norm": 0.3628863990306854, "learning_rate": 9.812624186679211e-06, "loss": 0.3579, "step": 3739 }, { "epoch": 0.5362776025236593, "grad_norm": 0.38280120491981506, "learning_rate": 9.812397878351502e-06, "loss": 0.3411, "step": 3740 }, { "epoch": 0.5364209922569544, "grad_norm": 0.3163264989852905, "learning_rate": 9.812171436054042e-06, "loss": 0.3515, "step": 3741 }, { "epoch": 0.5365643819902495, "grad_norm": 0.3420977294445038, "learning_rate": 9.811944859793128e-06, "loss": 0.3447, "step": 3742 }, { "epoch": 0.5367077717235446, "grad_norm": 0.36082425713539124, "learning_rate": 9.811718149575074e-06, "loss": 0.3594, "step": 3743 }, { "epoch": 0.5368511614568396, "grad_norm": 0.33458954095840454, "learning_rate": 9.811491305406187e-06, "loss": 0.3284, "step": 3744 }, { "epoch": 0.5369945511901348, "grad_norm": 0.3592126667499542, "learning_rate": 9.811264327292784e-06, "loss": 0.3541, "step": 3745 }, { "epoch": 0.5371379409234299, "grad_norm": 0.36345481872558594, "learning_rate": 9.811037215241183e-06, "loss": 0.3233, "step": 3746 }, { "epoch": 0.537281330656725, "grad_norm": 0.38271281123161316, "learning_rate": 9.810809969257708e-06, "loss": 0.3767, "step": 3747 }, { "epoch": 0.5374247203900201, "grad_norm": 0.39518505334854126, "learning_rate": 9.810582589348684e-06, "loss": 0.3446, "step": 3748 }, { "epoch": 0.5375681101233152, "grad_norm": 0.35062482953071594, "learning_rate": 9.81035507552044e-06, "loss": 0.3418, "step": 3749 }, { "epoch": 0.5377114998566103, "grad_norm": 0.4047660231590271, "learning_rate": 9.810127427779308e-06, "loss": 0.3557, "step": 3750 }, { "epoch": 0.5378548895899053, "grad_norm": 0.35385653376579285, "learning_rate": 9.809899646131631e-06, "loss": 0.3723, "step": 3751 }, { "epoch": 0.5379982793232004, "grad_norm": 0.39329981803894043, "learning_rate": 9.809671730583745e-06, "loss": 0.347, "step": 3752 }, { "epoch": 0.5381416690564955, "grad_norm": 0.3690130412578583, "learning_rate": 9.809443681141997e-06, "loss": 0.3349, "step": 3753 }, { "epoch": 0.5382850587897906, "grad_norm": 0.3308526277542114, "learning_rate": 9.809215497812736e-06, "loss": 0.3602, "step": 3754 }, { "epoch": 0.5384284485230858, "grad_norm": 0.46771273016929626, "learning_rate": 9.808987180602313e-06, "loss": 0.3651, "step": 3755 }, { "epoch": 0.5385718382563809, "grad_norm": 0.37574121356010437, "learning_rate": 9.808758729517085e-06, "loss": 0.3379, "step": 3756 }, { "epoch": 0.538715227989676, "grad_norm": 0.38026192784309387, "learning_rate": 9.808530144563411e-06, "loss": 0.3502, "step": 3757 }, { "epoch": 0.5388586177229711, "grad_norm": 0.32950127124786377, "learning_rate": 9.808301425747655e-06, "loss": 0.3392, "step": 3758 }, { "epoch": 0.5390020074562661, "grad_norm": 0.34213942289352417, "learning_rate": 9.808072573076184e-06, "loss": 0.3828, "step": 3759 }, { "epoch": 0.5391453971895612, "grad_norm": 0.33455318212509155, "learning_rate": 9.807843586555368e-06, "loss": 0.3382, "step": 3760 }, { "epoch": 0.5392887869228563, "grad_norm": 0.33399727940559387, "learning_rate": 9.807614466191582e-06, "loss": 0.3394, "step": 3761 }, { "epoch": 0.5394321766561514, "grad_norm": 0.3538355827331543, "learning_rate": 9.807385211991206e-06, "loss": 0.3535, "step": 3762 }, { "epoch": 0.5395755663894465, "grad_norm": 0.34516793489456177, "learning_rate": 9.807155823960619e-06, "loss": 0.3158, "step": 3763 }, { "epoch": 0.5397189561227416, "grad_norm": 0.4177759289741516, "learning_rate": 9.80692630210621e-06, "loss": 0.3563, "step": 3764 }, { "epoch": 0.5398623458560368, "grad_norm": 0.3734297752380371, "learning_rate": 9.806696646434367e-06, "loss": 0.3684, "step": 3765 }, { "epoch": 0.5400057355893318, "grad_norm": 0.37645694613456726, "learning_rate": 9.806466856951485e-06, "loss": 0.3247, "step": 3766 }, { "epoch": 0.5401491253226269, "grad_norm": 0.34420496225357056, "learning_rate": 9.806236933663958e-06, "loss": 0.3468, "step": 3767 }, { "epoch": 0.540292515055922, "grad_norm": 0.40506383776664734, "learning_rate": 9.806006876578187e-06, "loss": 0.3515, "step": 3768 }, { "epoch": 0.5404359047892171, "grad_norm": 0.3763051927089691, "learning_rate": 9.805776685700579e-06, "loss": 0.3436, "step": 3769 }, { "epoch": 0.5405792945225122, "grad_norm": 0.3483552038669586, "learning_rate": 9.805546361037538e-06, "loss": 0.3374, "step": 3770 }, { "epoch": 0.5407226842558073, "grad_norm": 0.3864010274410248, "learning_rate": 9.805315902595483e-06, "loss": 0.367, "step": 3771 }, { "epoch": 0.5408660739891024, "grad_norm": 0.3964545428752899, "learning_rate": 9.805085310380823e-06, "loss": 0.3371, "step": 3772 }, { "epoch": 0.5410094637223974, "grad_norm": 0.3157690167427063, "learning_rate": 9.80485458439998e-06, "loss": 0.347, "step": 3773 }, { "epoch": 0.5411528534556925, "grad_norm": 0.36136072874069214, "learning_rate": 9.804623724659377e-06, "loss": 0.3458, "step": 3774 }, { "epoch": 0.5412962431889876, "grad_norm": 0.407014936208725, "learning_rate": 9.80439273116544e-06, "loss": 0.3422, "step": 3775 }, { "epoch": 0.5414396329222828, "grad_norm": 0.3742411136627197, "learning_rate": 9.8041616039246e-06, "loss": 0.3349, "step": 3776 }, { "epoch": 0.5415830226555779, "grad_norm": 0.3175226151943207, "learning_rate": 9.80393034294329e-06, "loss": 0.3361, "step": 3777 }, { "epoch": 0.541726412388873, "grad_norm": 0.3755585849285126, "learning_rate": 9.80369894822795e-06, "loss": 0.3231, "step": 3778 }, { "epoch": 0.5418698021221681, "grad_norm": 0.3945448100566864, "learning_rate": 9.80346741978502e-06, "loss": 0.3612, "step": 3779 }, { "epoch": 0.5420131918554632, "grad_norm": 0.3903330862522125, "learning_rate": 9.803235757620948e-06, "loss": 0.3466, "step": 3780 }, { "epoch": 0.5421565815887582, "grad_norm": 0.38304850459098816, "learning_rate": 9.80300396174218e-06, "loss": 0.3493, "step": 3781 }, { "epoch": 0.5422999713220533, "grad_norm": 0.4451850354671478, "learning_rate": 9.80277203215517e-06, "loss": 0.3587, "step": 3782 }, { "epoch": 0.5424433610553484, "grad_norm": 0.35527074337005615, "learning_rate": 9.802539968866373e-06, "loss": 0.3557, "step": 3783 }, { "epoch": 0.5425867507886435, "grad_norm": 0.40375861525535583, "learning_rate": 9.802307771882253e-06, "loss": 0.3525, "step": 3784 }, { "epoch": 0.5427301405219386, "grad_norm": 0.39117908477783203, "learning_rate": 9.802075441209273e-06, "loss": 0.3389, "step": 3785 }, { "epoch": 0.5428735302552338, "grad_norm": 0.35524293780326843, "learning_rate": 9.801842976853899e-06, "loss": 0.3389, "step": 3786 }, { "epoch": 0.5430169199885289, "grad_norm": 0.3787282407283783, "learning_rate": 9.801610378822602e-06, "loss": 0.3556, "step": 3787 }, { "epoch": 0.5431603097218239, "grad_norm": 0.3780493438243866, "learning_rate": 9.80137764712186e-06, "loss": 0.3632, "step": 3788 }, { "epoch": 0.543303699455119, "grad_norm": 0.3513912856578827, "learning_rate": 9.801144781758148e-06, "loss": 0.3526, "step": 3789 }, { "epoch": 0.5434470891884141, "grad_norm": 0.3606875240802765, "learning_rate": 9.800911782737952e-06, "loss": 0.3444, "step": 3790 }, { "epoch": 0.5435904789217092, "grad_norm": 0.37187132239341736, "learning_rate": 9.800678650067757e-06, "loss": 0.348, "step": 3791 }, { "epoch": 0.5437338686550043, "grad_norm": 0.3446318507194519, "learning_rate": 9.800445383754053e-06, "loss": 0.3595, "step": 3792 }, { "epoch": 0.5438772583882994, "grad_norm": 0.34952598810195923, "learning_rate": 9.800211983803337e-06, "loss": 0.3374, "step": 3793 }, { "epoch": 0.5440206481215945, "grad_norm": 0.3832535743713379, "learning_rate": 9.799978450222099e-06, "loss": 0.3391, "step": 3794 }, { "epoch": 0.5441640378548895, "grad_norm": 0.3602314293384552, "learning_rate": 9.799744783016847e-06, "loss": 0.3538, "step": 3795 }, { "epoch": 0.5443074275881847, "grad_norm": 0.37273451685905457, "learning_rate": 9.799510982194083e-06, "loss": 0.3367, "step": 3796 }, { "epoch": 0.5444508173214798, "grad_norm": 0.4085446298122406, "learning_rate": 9.799277047760317e-06, "loss": 0.357, "step": 3797 }, { "epoch": 0.5445942070547749, "grad_norm": 0.33939629793167114, "learning_rate": 9.79904297972206e-06, "loss": 0.3269, "step": 3798 }, { "epoch": 0.54473759678807, "grad_norm": 0.3535926043987274, "learning_rate": 9.79880877808583e-06, "loss": 0.3496, "step": 3799 }, { "epoch": 0.5448809865213651, "grad_norm": 0.3857211172580719, "learning_rate": 9.798574442858143e-06, "loss": 0.3476, "step": 3800 }, { "epoch": 0.5450243762546602, "grad_norm": 0.33995574712753296, "learning_rate": 9.798339974045527e-06, "loss": 0.3412, "step": 3801 }, { "epoch": 0.5451677659879552, "grad_norm": 0.3955793082714081, "learning_rate": 9.798105371654505e-06, "loss": 0.3647, "step": 3802 }, { "epoch": 0.5453111557212503, "grad_norm": 0.35518181324005127, "learning_rate": 9.797870635691613e-06, "loss": 0.3332, "step": 3803 }, { "epoch": 0.5454545454545454, "grad_norm": 0.34164634346961975, "learning_rate": 9.797635766163383e-06, "loss": 0.3271, "step": 3804 }, { "epoch": 0.5455979351878405, "grad_norm": 0.33813363313674927, "learning_rate": 9.797400763076353e-06, "loss": 0.3551, "step": 3805 }, { "epoch": 0.5457413249211357, "grad_norm": 0.35389870405197144, "learning_rate": 9.797165626437064e-06, "loss": 0.3667, "step": 3806 }, { "epoch": 0.5458847146544308, "grad_norm": 0.35537853837013245, "learning_rate": 9.796930356252066e-06, "loss": 0.3575, "step": 3807 }, { "epoch": 0.5460281043877259, "grad_norm": 0.342031866312027, "learning_rate": 9.796694952527903e-06, "loss": 0.3537, "step": 3808 }, { "epoch": 0.546171494121021, "grad_norm": 0.33011266589164734, "learning_rate": 9.796459415271133e-06, "loss": 0.3321, "step": 3809 }, { "epoch": 0.546314883854316, "grad_norm": 0.3494466543197632, "learning_rate": 9.796223744488312e-06, "loss": 0.3535, "step": 3810 }, { "epoch": 0.5464582735876111, "grad_norm": 0.3366827368736267, "learning_rate": 9.795987940185999e-06, "loss": 0.3454, "step": 3811 }, { "epoch": 0.5466016633209062, "grad_norm": 0.341215580701828, "learning_rate": 9.795752002370758e-06, "loss": 0.343, "step": 3812 }, { "epoch": 0.5467450530542013, "grad_norm": 0.3363867700099945, "learning_rate": 9.79551593104916e-06, "loss": 0.3239, "step": 3813 }, { "epoch": 0.5468884427874964, "grad_norm": 0.3307889401912689, "learning_rate": 9.795279726227775e-06, "loss": 0.3388, "step": 3814 }, { "epoch": 0.5470318325207915, "grad_norm": 0.33560076355934143, "learning_rate": 9.79504338791318e-06, "loss": 0.3544, "step": 3815 }, { "epoch": 0.5471752222540867, "grad_norm": 0.3569437861442566, "learning_rate": 9.794806916111951e-06, "loss": 0.3202, "step": 3816 }, { "epoch": 0.5473186119873817, "grad_norm": 0.34530654549598694, "learning_rate": 9.794570310830675e-06, "loss": 0.3424, "step": 3817 }, { "epoch": 0.5474620017206768, "grad_norm": 0.37642717361450195, "learning_rate": 9.794333572075936e-06, "loss": 0.3462, "step": 3818 }, { "epoch": 0.5476053914539719, "grad_norm": 0.3784891664981842, "learning_rate": 9.794096699854326e-06, "loss": 0.3298, "step": 3819 }, { "epoch": 0.547748781187267, "grad_norm": 0.41272062063217163, "learning_rate": 9.793859694172439e-06, "loss": 0.3425, "step": 3820 }, { "epoch": 0.5478921709205621, "grad_norm": 0.3362604081630707, "learning_rate": 9.793622555036873e-06, "loss": 0.3537, "step": 3821 }, { "epoch": 0.5480355606538572, "grad_norm": 0.42794308066368103, "learning_rate": 9.793385282454226e-06, "loss": 0.3507, "step": 3822 }, { "epoch": 0.5481789503871523, "grad_norm": 0.3724355399608612, "learning_rate": 9.79314787643111e-06, "loss": 0.3511, "step": 3823 }, { "epoch": 0.5483223401204473, "grad_norm": 0.34829550981521606, "learning_rate": 9.792910336974128e-06, "loss": 0.336, "step": 3824 }, { "epoch": 0.5484657298537424, "grad_norm": 0.3903694450855255, "learning_rate": 9.792672664089896e-06, "loss": 0.3704, "step": 3825 }, { "epoch": 0.5486091195870376, "grad_norm": 0.3437413275241852, "learning_rate": 9.792434857785029e-06, "loss": 0.3425, "step": 3826 }, { "epoch": 0.5487525093203327, "grad_norm": 0.37462499737739563, "learning_rate": 9.792196918066148e-06, "loss": 0.3463, "step": 3827 }, { "epoch": 0.5488958990536278, "grad_norm": 0.4077604413032532, "learning_rate": 9.791958844939877e-06, "loss": 0.3281, "step": 3828 }, { "epoch": 0.5490392887869229, "grad_norm": 0.36761265993118286, "learning_rate": 9.791720638412843e-06, "loss": 0.3385, "step": 3829 }, { "epoch": 0.549182678520218, "grad_norm": 0.44702890515327454, "learning_rate": 9.791482298491677e-06, "loss": 0.3453, "step": 3830 }, { "epoch": 0.549326068253513, "grad_norm": 0.4570419490337372, "learning_rate": 9.791243825183014e-06, "loss": 0.357, "step": 3831 }, { "epoch": 0.5494694579868081, "grad_norm": 0.3593046963214874, "learning_rate": 9.791005218493494e-06, "loss": 0.3627, "step": 3832 }, { "epoch": 0.5496128477201032, "grad_norm": 0.4197724759578705, "learning_rate": 9.79076647842976e-06, "loss": 0.3562, "step": 3833 }, { "epoch": 0.5497562374533983, "grad_norm": 0.373060941696167, "learning_rate": 9.790527604998454e-06, "loss": 0.3631, "step": 3834 }, { "epoch": 0.5498996271866934, "grad_norm": 0.3527699112892151, "learning_rate": 9.790288598206232e-06, "loss": 0.3502, "step": 3835 }, { "epoch": 0.5500430169199886, "grad_norm": 0.3904514014720917, "learning_rate": 9.790049458059741e-06, "loss": 0.3448, "step": 3836 }, { "epoch": 0.5501864066532837, "grad_norm": 0.3359229564666748, "learning_rate": 9.789810184565643e-06, "loss": 0.334, "step": 3837 }, { "epoch": 0.5503297963865788, "grad_norm": 0.38227835297584534, "learning_rate": 9.789570777730599e-06, "loss": 0.3579, "step": 3838 }, { "epoch": 0.5504731861198738, "grad_norm": 0.37122008204460144, "learning_rate": 9.78933123756127e-06, "loss": 0.345, "step": 3839 }, { "epoch": 0.5506165758531689, "grad_norm": 0.37163034081459045, "learning_rate": 9.789091564064328e-06, "loss": 0.3342, "step": 3840 }, { "epoch": 0.550759965586464, "grad_norm": 0.3848218321800232, "learning_rate": 9.788851757246443e-06, "loss": 0.3553, "step": 3841 }, { "epoch": 0.5509033553197591, "grad_norm": 0.363982230424881, "learning_rate": 9.788611817114292e-06, "loss": 0.347, "step": 3842 }, { "epoch": 0.5510467450530542, "grad_norm": 0.36608001589775085, "learning_rate": 9.788371743674553e-06, "loss": 0.3458, "step": 3843 }, { "epoch": 0.5511901347863493, "grad_norm": 0.3555026948451996, "learning_rate": 9.788131536933914e-06, "loss": 0.3434, "step": 3844 }, { "epoch": 0.5513335245196443, "grad_norm": 0.36975592374801636, "learning_rate": 9.787891196899055e-06, "loss": 0.354, "step": 3845 }, { "epoch": 0.5514769142529395, "grad_norm": 0.32040852308273315, "learning_rate": 9.787650723576673e-06, "loss": 0.3655, "step": 3846 }, { "epoch": 0.5516203039862346, "grad_norm": 0.3317064940929413, "learning_rate": 9.787410116973459e-06, "loss": 0.3441, "step": 3847 }, { "epoch": 0.5517636937195297, "grad_norm": 0.37303411960601807, "learning_rate": 9.78716937709611e-06, "loss": 0.3509, "step": 3848 }, { "epoch": 0.5519070834528248, "grad_norm": 0.3421681821346283, "learning_rate": 9.786928503951332e-06, "loss": 0.3467, "step": 3849 }, { "epoch": 0.5520504731861199, "grad_norm": 0.3367961347103119, "learning_rate": 9.786687497545825e-06, "loss": 0.3253, "step": 3850 }, { "epoch": 0.552193862919415, "grad_norm": 0.3634362816810608, "learning_rate": 9.786446357886305e-06, "loss": 0.3561, "step": 3851 }, { "epoch": 0.55233725265271, "grad_norm": 0.34080857038497925, "learning_rate": 9.786205084979478e-06, "loss": 0.3338, "step": 3852 }, { "epoch": 0.5524806423860051, "grad_norm": 0.3811698853969574, "learning_rate": 9.785963678832066e-06, "loss": 0.3592, "step": 3853 }, { "epoch": 0.5526240321193002, "grad_norm": 0.34341904520988464, "learning_rate": 9.785722139450787e-06, "loss": 0.3597, "step": 3854 }, { "epoch": 0.5527674218525953, "grad_norm": 0.3431067168712616, "learning_rate": 9.785480466842365e-06, "loss": 0.3323, "step": 3855 }, { "epoch": 0.5529108115858905, "grad_norm": 0.378322571516037, "learning_rate": 9.785238661013529e-06, "loss": 0.3185, "step": 3856 }, { "epoch": 0.5530542013191856, "grad_norm": 0.35836291313171387, "learning_rate": 9.78499672197101e-06, "loss": 0.3522, "step": 3857 }, { "epoch": 0.5531975910524807, "grad_norm": 0.3552044630050659, "learning_rate": 9.784754649721543e-06, "loss": 0.3555, "step": 3858 }, { "epoch": 0.5533409807857758, "grad_norm": 0.4250253736972809, "learning_rate": 9.784512444271866e-06, "loss": 0.366, "step": 3859 }, { "epoch": 0.5534843705190708, "grad_norm": 0.4327094256877899, "learning_rate": 9.784270105628722e-06, "loss": 0.3573, "step": 3860 }, { "epoch": 0.5536277602523659, "grad_norm": 0.39931970834732056, "learning_rate": 9.784027633798858e-06, "loss": 0.3352, "step": 3861 }, { "epoch": 0.553771149985661, "grad_norm": 0.36207181215286255, "learning_rate": 9.783785028789025e-06, "loss": 0.3252, "step": 3862 }, { "epoch": 0.5539145397189561, "grad_norm": 0.4064028859138489, "learning_rate": 9.783542290605974e-06, "loss": 0.3555, "step": 3863 }, { "epoch": 0.5540579294522512, "grad_norm": 0.3781375586986542, "learning_rate": 9.783299419256466e-06, "loss": 0.3513, "step": 3864 }, { "epoch": 0.5542013191855463, "grad_norm": 0.3046143352985382, "learning_rate": 9.783056414747261e-06, "loss": 0.3597, "step": 3865 }, { "epoch": 0.5543447089188414, "grad_norm": 0.3683261573314667, "learning_rate": 9.782813277085122e-06, "loss": 0.3503, "step": 3866 }, { "epoch": 0.5544880986521366, "grad_norm": 0.395308256149292, "learning_rate": 9.782570006276817e-06, "loss": 0.3463, "step": 3867 }, { "epoch": 0.5546314883854316, "grad_norm": 0.32585546374320984, "learning_rate": 9.782326602329122e-06, "loss": 0.3569, "step": 3868 }, { "epoch": 0.5547748781187267, "grad_norm": 0.3445412814617157, "learning_rate": 9.782083065248809e-06, "loss": 0.3393, "step": 3869 }, { "epoch": 0.5549182678520218, "grad_norm": 0.39259764552116394, "learning_rate": 9.781839395042662e-06, "loss": 0.3434, "step": 3870 }, { "epoch": 0.5550616575853169, "grad_norm": 0.3799654543399811, "learning_rate": 9.781595591717462e-06, "loss": 0.3399, "step": 3871 }, { "epoch": 0.555205047318612, "grad_norm": 0.3557201623916626, "learning_rate": 9.781351655279994e-06, "loss": 0.3433, "step": 3872 }, { "epoch": 0.5553484370519071, "grad_norm": 0.35762837529182434, "learning_rate": 9.781107585737053e-06, "loss": 0.3405, "step": 3873 }, { "epoch": 0.5554918267852021, "grad_norm": 0.3550649881362915, "learning_rate": 9.78086338309543e-06, "loss": 0.346, "step": 3874 }, { "epoch": 0.5556352165184972, "grad_norm": 0.339249849319458, "learning_rate": 9.780619047361926e-06, "loss": 0.3425, "step": 3875 }, { "epoch": 0.5557786062517923, "grad_norm": 0.35526376962661743, "learning_rate": 9.780374578543341e-06, "loss": 0.3625, "step": 3876 }, { "epoch": 0.5559219959850875, "grad_norm": 0.3244812488555908, "learning_rate": 9.780129976646482e-06, "loss": 0.329, "step": 3877 }, { "epoch": 0.5560653857183826, "grad_norm": 0.3490516245365143, "learning_rate": 9.779885241678158e-06, "loss": 0.3278, "step": 3878 }, { "epoch": 0.5562087754516777, "grad_norm": 0.3481973111629486, "learning_rate": 9.779640373645181e-06, "loss": 0.3716, "step": 3879 }, { "epoch": 0.5563521651849728, "grad_norm": 0.32009223103523254, "learning_rate": 9.779395372554368e-06, "loss": 0.3172, "step": 3880 }, { "epoch": 0.5564955549182679, "grad_norm": 0.2983565032482147, "learning_rate": 9.77915023841254e-06, "loss": 0.3261, "step": 3881 }, { "epoch": 0.5566389446515629, "grad_norm": 0.3718138337135315, "learning_rate": 9.778904971226522e-06, "loss": 0.3643, "step": 3882 }, { "epoch": 0.556782334384858, "grad_norm": 0.37506502866744995, "learning_rate": 9.77865957100314e-06, "loss": 0.3347, "step": 3883 }, { "epoch": 0.5569257241181531, "grad_norm": 0.3699721097946167, "learning_rate": 9.778414037749227e-06, "loss": 0.3453, "step": 3884 }, { "epoch": 0.5570691138514482, "grad_norm": 0.32532423734664917, "learning_rate": 9.778168371471616e-06, "loss": 0.3429, "step": 3885 }, { "epoch": 0.5572125035847433, "grad_norm": 0.36232084035873413, "learning_rate": 9.777922572177151e-06, "loss": 0.3567, "step": 3886 }, { "epoch": 0.5573558933180385, "grad_norm": 0.369322806596756, "learning_rate": 9.77767663987267e-06, "loss": 0.3256, "step": 3887 }, { "epoch": 0.5574992830513336, "grad_norm": 0.3518822193145752, "learning_rate": 9.777430574565021e-06, "loss": 0.3487, "step": 3888 }, { "epoch": 0.5576426727846286, "grad_norm": 0.4185302257537842, "learning_rate": 9.777184376261052e-06, "loss": 0.3522, "step": 3889 }, { "epoch": 0.5577860625179237, "grad_norm": 0.35959550738334656, "learning_rate": 9.77693804496762e-06, "loss": 0.3453, "step": 3890 }, { "epoch": 0.5579294522512188, "grad_norm": 0.5285588502883911, "learning_rate": 9.776691580691582e-06, "loss": 0.3696, "step": 3891 }, { "epoch": 0.5580728419845139, "grad_norm": 0.32535994052886963, "learning_rate": 9.776444983439798e-06, "loss": 0.3324, "step": 3892 }, { "epoch": 0.558216231717809, "grad_norm": 0.3817208707332611, "learning_rate": 9.776198253219133e-06, "loss": 0.3318, "step": 3893 }, { "epoch": 0.5583596214511041, "grad_norm": 0.3472558856010437, "learning_rate": 9.775951390036455e-06, "loss": 0.3401, "step": 3894 }, { "epoch": 0.5585030111843992, "grad_norm": 0.3473169803619385, "learning_rate": 9.775704393898638e-06, "loss": 0.353, "step": 3895 }, { "epoch": 0.5586464009176942, "grad_norm": 0.3515196144580841, "learning_rate": 9.775457264812556e-06, "loss": 0.36, "step": 3896 }, { "epoch": 0.5587897906509894, "grad_norm": 0.3506513833999634, "learning_rate": 9.775210002785092e-06, "loss": 0.3651, "step": 3897 }, { "epoch": 0.5589331803842845, "grad_norm": 0.30883949995040894, "learning_rate": 9.774962607823125e-06, "loss": 0.3455, "step": 3898 }, { "epoch": 0.5590765701175796, "grad_norm": 0.3398672938346863, "learning_rate": 9.774715079933546e-06, "loss": 0.3329, "step": 3899 }, { "epoch": 0.5592199598508747, "grad_norm": 0.33664068579673767, "learning_rate": 9.774467419123241e-06, "loss": 0.3621, "step": 3900 }, { "epoch": 0.5593633495841698, "grad_norm": 0.3354542553424835, "learning_rate": 9.77421962539911e-06, "loss": 0.3491, "step": 3901 }, { "epoch": 0.5595067393174649, "grad_norm": 0.3241541385650635, "learning_rate": 9.773971698768048e-06, "loss": 0.3825, "step": 3902 }, { "epoch": 0.55965012905076, "grad_norm": 0.347939133644104, "learning_rate": 9.773723639236958e-06, "loss": 0.341, "step": 3903 }, { "epoch": 0.559793518784055, "grad_norm": 0.3431301414966583, "learning_rate": 9.773475446812747e-06, "loss": 0.3444, "step": 3904 }, { "epoch": 0.5599369085173501, "grad_norm": 0.31800559163093567, "learning_rate": 9.77322712150232e-06, "loss": 0.3577, "step": 3905 }, { "epoch": 0.5600802982506452, "grad_norm": 0.333205908536911, "learning_rate": 9.772978663312593e-06, "loss": 0.3542, "step": 3906 }, { "epoch": 0.5602236879839404, "grad_norm": 0.31396275758743286, "learning_rate": 9.772730072250483e-06, "loss": 0.3453, "step": 3907 }, { "epoch": 0.5603670777172355, "grad_norm": 0.31050869822502136, "learning_rate": 9.772481348322909e-06, "loss": 0.355, "step": 3908 }, { "epoch": 0.5605104674505306, "grad_norm": 0.3440808951854706, "learning_rate": 9.772232491536796e-06, "loss": 0.3432, "step": 3909 }, { "epoch": 0.5606538571838257, "grad_norm": 0.33109793066978455, "learning_rate": 9.771983501899072e-06, "loss": 0.3601, "step": 3910 }, { "epoch": 0.5607972469171207, "grad_norm": 0.3114040195941925, "learning_rate": 9.771734379416667e-06, "loss": 0.3492, "step": 3911 }, { "epoch": 0.5609406366504158, "grad_norm": 0.35361456871032715, "learning_rate": 9.771485124096519e-06, "loss": 0.3357, "step": 3912 }, { "epoch": 0.5610840263837109, "grad_norm": 0.31928664445877075, "learning_rate": 9.771235735945562e-06, "loss": 0.3276, "step": 3913 }, { "epoch": 0.561227416117006, "grad_norm": 0.36390209197998047, "learning_rate": 9.770986214970743e-06, "loss": 0.3542, "step": 3914 }, { "epoch": 0.5613708058503011, "grad_norm": 0.3559401333332062, "learning_rate": 9.770736561179009e-06, "loss": 0.3494, "step": 3915 }, { "epoch": 0.5615141955835962, "grad_norm": 0.36627092957496643, "learning_rate": 9.770486774577306e-06, "loss": 0.3489, "step": 3916 }, { "epoch": 0.5616575853168914, "grad_norm": 0.4542216360569, "learning_rate": 9.770236855172589e-06, "loss": 0.3257, "step": 3917 }, { "epoch": 0.5618009750501864, "grad_norm": 0.4048253893852234, "learning_rate": 9.769986802971816e-06, "loss": 0.3255, "step": 3918 }, { "epoch": 0.5619443647834815, "grad_norm": 0.3599553108215332, "learning_rate": 9.76973661798195e-06, "loss": 0.3465, "step": 3919 }, { "epoch": 0.5620877545167766, "grad_norm": 0.4248371422290802, "learning_rate": 9.76948630020995e-06, "loss": 0.3397, "step": 3920 }, { "epoch": 0.5622311442500717, "grad_norm": 0.4017695486545563, "learning_rate": 9.769235849662792e-06, "loss": 0.3491, "step": 3921 }, { "epoch": 0.5623745339833668, "grad_norm": 0.3611791729927063, "learning_rate": 9.768985266347443e-06, "loss": 0.34, "step": 3922 }, { "epoch": 0.5625179237166619, "grad_norm": 0.3404203951358795, "learning_rate": 9.768734550270881e-06, "loss": 0.3291, "step": 3923 }, { "epoch": 0.562661313449957, "grad_norm": 0.3527418076992035, "learning_rate": 9.768483701440084e-06, "loss": 0.3712, "step": 3924 }, { "epoch": 0.562804703183252, "grad_norm": 0.38498038053512573, "learning_rate": 9.768232719862036e-06, "loss": 0.3601, "step": 3925 }, { "epoch": 0.5629480929165471, "grad_norm": 0.33817651867866516, "learning_rate": 9.767981605543724e-06, "loss": 0.353, "step": 3926 }, { "epoch": 0.5630914826498423, "grad_norm": 0.3840610086917877, "learning_rate": 9.76773035849214e-06, "loss": 0.3272, "step": 3927 }, { "epoch": 0.5632348723831374, "grad_norm": 0.351112961769104, "learning_rate": 9.767478978714276e-06, "loss": 0.3664, "step": 3928 }, { "epoch": 0.5633782621164325, "grad_norm": 0.3893902599811554, "learning_rate": 9.76722746621713e-06, "loss": 0.3551, "step": 3929 }, { "epoch": 0.5635216518497276, "grad_norm": 0.3605807423591614, "learning_rate": 9.766975821007706e-06, "loss": 0.3396, "step": 3930 }, { "epoch": 0.5636650415830227, "grad_norm": 0.3792082667350769, "learning_rate": 9.766724043093008e-06, "loss": 0.3432, "step": 3931 }, { "epoch": 0.5638084313163177, "grad_norm": 0.36018186807632446, "learning_rate": 9.766472132480045e-06, "loss": 0.3545, "step": 3932 }, { "epoch": 0.5639518210496128, "grad_norm": 0.37103071808815, "learning_rate": 9.766220089175828e-06, "loss": 0.3802, "step": 3933 }, { "epoch": 0.5640952107829079, "grad_norm": 0.336514413356781, "learning_rate": 9.765967913187378e-06, "loss": 0.3507, "step": 3934 }, { "epoch": 0.564238600516203, "grad_norm": 0.35347503423690796, "learning_rate": 9.765715604521712e-06, "loss": 0.3303, "step": 3935 }, { "epoch": 0.5643819902494981, "grad_norm": 0.3995618522167206, "learning_rate": 9.765463163185856e-06, "loss": 0.3556, "step": 3936 }, { "epoch": 0.5645253799827933, "grad_norm": 0.32667285203933716, "learning_rate": 9.765210589186835e-06, "loss": 0.3577, "step": 3937 }, { "epoch": 0.5646687697160884, "grad_norm": 0.35382893681526184, "learning_rate": 9.764957882531681e-06, "loss": 0.3567, "step": 3938 }, { "epoch": 0.5648121594493835, "grad_norm": 0.4146491289138794, "learning_rate": 9.764705043227429e-06, "loss": 0.3597, "step": 3939 }, { "epoch": 0.5649555491826785, "grad_norm": 0.33447590470314026, "learning_rate": 9.764452071281119e-06, "loss": 0.3423, "step": 3940 }, { "epoch": 0.5650989389159736, "grad_norm": 0.33579137921333313, "learning_rate": 9.764198966699794e-06, "loss": 0.3538, "step": 3941 }, { "epoch": 0.5652423286492687, "grad_norm": 0.3252735733985901, "learning_rate": 9.763945729490496e-06, "loss": 0.3501, "step": 3942 }, { "epoch": 0.5653857183825638, "grad_norm": 0.35732871294021606, "learning_rate": 9.763692359660278e-06, "loss": 0.3553, "step": 3943 }, { "epoch": 0.5655291081158589, "grad_norm": 0.34759268164634705, "learning_rate": 9.763438857216194e-06, "loss": 0.3643, "step": 3944 }, { "epoch": 0.565672497849154, "grad_norm": 0.35986432433128357, "learning_rate": 9.763185222165298e-06, "loss": 0.368, "step": 3945 }, { "epoch": 0.565815887582449, "grad_norm": 0.3281925916671753, "learning_rate": 9.762931454514653e-06, "loss": 0.3214, "step": 3946 }, { "epoch": 0.5659592773157442, "grad_norm": 0.38810446858406067, "learning_rate": 9.762677554271323e-06, "loss": 0.3408, "step": 3947 }, { "epoch": 0.5661026670490393, "grad_norm": 0.362530380487442, "learning_rate": 9.762423521442377e-06, "loss": 0.3518, "step": 3948 }, { "epoch": 0.5662460567823344, "grad_norm": 0.3354359269142151, "learning_rate": 9.762169356034885e-06, "loss": 0.3456, "step": 3949 }, { "epoch": 0.5663894465156295, "grad_norm": 0.40087270736694336, "learning_rate": 9.761915058055924e-06, "loss": 0.3464, "step": 3950 }, { "epoch": 0.5665328362489246, "grad_norm": 0.3646170198917389, "learning_rate": 9.761660627512572e-06, "loss": 0.3468, "step": 3951 }, { "epoch": 0.5666762259822197, "grad_norm": 0.35888025164604187, "learning_rate": 9.761406064411915e-06, "loss": 0.3434, "step": 3952 }, { "epoch": 0.5668196157155148, "grad_norm": 0.3710381090641022, "learning_rate": 9.761151368761037e-06, "loss": 0.3623, "step": 3953 }, { "epoch": 0.5669630054488098, "grad_norm": 0.35225099325180054, "learning_rate": 9.760896540567029e-06, "loss": 0.3428, "step": 3954 }, { "epoch": 0.5671063951821049, "grad_norm": 0.3436374366283417, "learning_rate": 9.760641579836985e-06, "loss": 0.3341, "step": 3955 }, { "epoch": 0.5672497849154, "grad_norm": 0.31854721903800964, "learning_rate": 9.760386486578003e-06, "loss": 0.3526, "step": 3956 }, { "epoch": 0.5673931746486952, "grad_norm": 0.33927252888679504, "learning_rate": 9.760131260797182e-06, "loss": 0.3451, "step": 3957 }, { "epoch": 0.5675365643819903, "grad_norm": 0.37204816937446594, "learning_rate": 9.759875902501631e-06, "loss": 0.3621, "step": 3958 }, { "epoch": 0.5676799541152854, "grad_norm": 0.338638037443161, "learning_rate": 9.759620411698457e-06, "loss": 0.3572, "step": 3959 }, { "epoch": 0.5678233438485805, "grad_norm": 0.3354640305042267, "learning_rate": 9.759364788394772e-06, "loss": 0.3353, "step": 3960 }, { "epoch": 0.5679667335818755, "grad_norm": 0.3282216489315033, "learning_rate": 9.759109032597691e-06, "loss": 0.3484, "step": 3961 }, { "epoch": 0.5681101233151706, "grad_norm": 0.308264821767807, "learning_rate": 9.758853144314338e-06, "loss": 0.3661, "step": 3962 }, { "epoch": 0.5682535130484657, "grad_norm": 0.32098856568336487, "learning_rate": 9.758597123551833e-06, "loss": 0.3479, "step": 3963 }, { "epoch": 0.5683969027817608, "grad_norm": 0.3445150852203369, "learning_rate": 9.758340970317302e-06, "loss": 0.3508, "step": 3964 }, { "epoch": 0.5685402925150559, "grad_norm": 0.3239324986934662, "learning_rate": 9.758084684617881e-06, "loss": 0.323, "step": 3965 }, { "epoch": 0.568683682248351, "grad_norm": 0.3446011543273926, "learning_rate": 9.7578282664607e-06, "loss": 0.3606, "step": 3966 }, { "epoch": 0.5688270719816461, "grad_norm": 0.34167027473449707, "learning_rate": 9.7575717158529e-06, "loss": 0.3571, "step": 3967 }, { "epoch": 0.5689704617149413, "grad_norm": 0.35720521211624146, "learning_rate": 9.757315032801622e-06, "loss": 0.3736, "step": 3968 }, { "epoch": 0.5691138514482363, "grad_norm": 0.34394463896751404, "learning_rate": 9.757058217314011e-06, "loss": 0.3421, "step": 3969 }, { "epoch": 0.5692572411815314, "grad_norm": 0.3251662850379944, "learning_rate": 9.756801269397218e-06, "loss": 0.3415, "step": 3970 }, { "epoch": 0.5694006309148265, "grad_norm": 0.35420364141464233, "learning_rate": 9.756544189058395e-06, "loss": 0.343, "step": 3971 }, { "epoch": 0.5695440206481216, "grad_norm": 0.35533037781715393, "learning_rate": 9.756286976304697e-06, "loss": 0.3499, "step": 3972 }, { "epoch": 0.5696874103814167, "grad_norm": 0.32963740825653076, "learning_rate": 9.756029631143287e-06, "loss": 0.3615, "step": 3973 }, { "epoch": 0.5698308001147118, "grad_norm": 0.36529698967933655, "learning_rate": 9.755772153581328e-06, "loss": 0.3379, "step": 3974 }, { "epoch": 0.5699741898480069, "grad_norm": 0.3660565912723541, "learning_rate": 9.755514543625988e-06, "loss": 0.3294, "step": 3975 }, { "epoch": 0.5701175795813019, "grad_norm": 0.32806897163391113, "learning_rate": 9.755256801284438e-06, "loss": 0.3629, "step": 3976 }, { "epoch": 0.570260969314597, "grad_norm": 0.3325846791267395, "learning_rate": 9.754998926563854e-06, "loss": 0.346, "step": 3977 }, { "epoch": 0.5704043590478922, "grad_norm": 0.353543221950531, "learning_rate": 9.754740919471416e-06, "loss": 0.3562, "step": 3978 }, { "epoch": 0.5705477487811873, "grad_norm": 0.3536864221096039, "learning_rate": 9.754482780014303e-06, "loss": 0.3445, "step": 3979 }, { "epoch": 0.5706911385144824, "grad_norm": 0.328363835811615, "learning_rate": 9.754224508199705e-06, "loss": 0.3348, "step": 3980 }, { "epoch": 0.5708345282477775, "grad_norm": 0.37026622891426086, "learning_rate": 9.75396610403481e-06, "loss": 0.3543, "step": 3981 }, { "epoch": 0.5709779179810726, "grad_norm": 0.3490540385246277, "learning_rate": 9.753707567526808e-06, "loss": 0.3376, "step": 3982 }, { "epoch": 0.5711213077143676, "grad_norm": 0.33738285303115845, "learning_rate": 9.753448898682904e-06, "loss": 0.3375, "step": 3983 }, { "epoch": 0.5712646974476627, "grad_norm": 0.3637659251689911, "learning_rate": 9.753190097510292e-06, "loss": 0.3484, "step": 3984 }, { "epoch": 0.5714080871809578, "grad_norm": 0.3380964696407318, "learning_rate": 9.752931164016181e-06, "loss": 0.3351, "step": 3985 }, { "epoch": 0.5715514769142529, "grad_norm": 0.3571431338787079, "learning_rate": 9.752672098207779e-06, "loss": 0.334, "step": 3986 }, { "epoch": 0.571694866647548, "grad_norm": 0.36676275730133057, "learning_rate": 9.752412900092293e-06, "loss": 0.3514, "step": 3987 }, { "epoch": 0.5718382563808432, "grad_norm": 0.3398522138595581, "learning_rate": 9.752153569676946e-06, "loss": 0.3426, "step": 3988 }, { "epoch": 0.5719816461141383, "grad_norm": 0.3840644359588623, "learning_rate": 9.751894106968953e-06, "loss": 0.331, "step": 3989 }, { "epoch": 0.5721250358474333, "grad_norm": 0.3809833526611328, "learning_rate": 9.751634511975536e-06, "loss": 0.3399, "step": 3990 }, { "epoch": 0.5722684255807284, "grad_norm": 0.3547172248363495, "learning_rate": 9.751374784703926e-06, "loss": 0.3474, "step": 3991 }, { "epoch": 0.5724118153140235, "grad_norm": 0.36234867572784424, "learning_rate": 9.75111492516135e-06, "loss": 0.3467, "step": 3992 }, { "epoch": 0.5725552050473186, "grad_norm": 0.3904486894607544, "learning_rate": 9.750854933355043e-06, "loss": 0.3648, "step": 3993 }, { "epoch": 0.5726985947806137, "grad_norm": 0.3235693573951721, "learning_rate": 9.750594809292242e-06, "loss": 0.3288, "step": 3994 }, { "epoch": 0.5728419845139088, "grad_norm": 0.3316763937473297, "learning_rate": 9.750334552980191e-06, "loss": 0.3455, "step": 3995 }, { "epoch": 0.5729853742472039, "grad_norm": 0.34850725531578064, "learning_rate": 9.750074164426133e-06, "loss": 0.3424, "step": 3996 }, { "epoch": 0.573128763980499, "grad_norm": 0.33410295844078064, "learning_rate": 9.749813643637316e-06, "loss": 0.34, "step": 3997 }, { "epoch": 0.5732721537137941, "grad_norm": 0.33203497529029846, "learning_rate": 9.749552990620994e-06, "loss": 0.3328, "step": 3998 }, { "epoch": 0.5734155434470892, "grad_norm": 0.39075276255607605, "learning_rate": 9.749292205384424e-06, "loss": 0.3518, "step": 3999 }, { "epoch": 0.5735589331803843, "grad_norm": 0.30989447236061096, "learning_rate": 9.749031287934865e-06, "loss": 0.316, "step": 4000 }, { "epoch": 0.5737023229136794, "grad_norm": 0.3373735547065735, "learning_rate": 9.748770238279579e-06, "loss": 0.3349, "step": 4001 }, { "epoch": 0.5738457126469745, "grad_norm": 0.34965842962265015, "learning_rate": 9.748509056425835e-06, "loss": 0.3361, "step": 4002 }, { "epoch": 0.5739891023802696, "grad_norm": 0.3183128237724304, "learning_rate": 9.748247742380905e-06, "loss": 0.3526, "step": 4003 }, { "epoch": 0.5741324921135647, "grad_norm": 0.39008647203445435, "learning_rate": 9.747986296152058e-06, "loss": 0.3435, "step": 4004 }, { "epoch": 0.5742758818468597, "grad_norm": 0.32701370120048523, "learning_rate": 9.74772471774658e-06, "loss": 0.3212, "step": 4005 }, { "epoch": 0.5744192715801548, "grad_norm": 0.34209540486335754, "learning_rate": 9.74746300717175e-06, "loss": 0.3256, "step": 4006 }, { "epoch": 0.5745626613134499, "grad_norm": 0.3585960268974304, "learning_rate": 9.74720116443485e-06, "loss": 0.3414, "step": 4007 }, { "epoch": 0.5747060510467451, "grad_norm": 0.3715198040008545, "learning_rate": 9.746939189543171e-06, "loss": 0.3286, "step": 4008 }, { "epoch": 0.5748494407800402, "grad_norm": 0.3948216140270233, "learning_rate": 9.74667708250401e-06, "loss": 0.3696, "step": 4009 }, { "epoch": 0.5749928305133353, "grad_norm": 0.3572084307670593, "learning_rate": 9.746414843324661e-06, "loss": 0.3335, "step": 4010 }, { "epoch": 0.5751362202466304, "grad_norm": 0.3569199740886688, "learning_rate": 9.746152472012422e-06, "loss": 0.3271, "step": 4011 }, { "epoch": 0.5752796099799254, "grad_norm": 0.4287930130958557, "learning_rate": 9.745889968574602e-06, "loss": 0.3329, "step": 4012 }, { "epoch": 0.5754229997132205, "grad_norm": 0.3585428297519684, "learning_rate": 9.745627333018504e-06, "loss": 0.3214, "step": 4013 }, { "epoch": 0.5755663894465156, "grad_norm": 0.37411656975746155, "learning_rate": 9.74536456535144e-06, "loss": 0.3557, "step": 4014 }, { "epoch": 0.5757097791798107, "grad_norm": 0.35519540309906006, "learning_rate": 9.745101665580728e-06, "loss": 0.3432, "step": 4015 }, { "epoch": 0.5758531689131058, "grad_norm": 0.3906291425228119, "learning_rate": 9.744838633713684e-06, "loss": 0.3534, "step": 4016 }, { "epoch": 0.5759965586464009, "grad_norm": 0.3430328369140625, "learning_rate": 9.744575469757632e-06, "loss": 0.3682, "step": 4017 }, { "epoch": 0.5761399483796961, "grad_norm": 0.3509780466556549, "learning_rate": 9.744312173719897e-06, "loss": 0.3526, "step": 4018 }, { "epoch": 0.5762833381129912, "grad_norm": 0.3771575093269348, "learning_rate": 9.74404874560781e-06, "loss": 0.3269, "step": 4019 }, { "epoch": 0.5764267278462862, "grad_norm": 0.34598904848098755, "learning_rate": 9.743785185428703e-06, "loss": 0.3327, "step": 4020 }, { "epoch": 0.5765701175795813, "grad_norm": 0.3278886675834656, "learning_rate": 9.743521493189914e-06, "loss": 0.3531, "step": 4021 }, { "epoch": 0.5767135073128764, "grad_norm": 0.3800676465034485, "learning_rate": 9.743257668898784e-06, "loss": 0.3517, "step": 4022 }, { "epoch": 0.5768568970461715, "grad_norm": 0.3497944176197052, "learning_rate": 9.742993712562656e-06, "loss": 0.3481, "step": 4023 }, { "epoch": 0.5770002867794666, "grad_norm": 0.32886290550231934, "learning_rate": 9.74272962418888e-06, "loss": 0.3615, "step": 4024 }, { "epoch": 0.5771436765127617, "grad_norm": 0.3364174962043762, "learning_rate": 9.742465403784807e-06, "loss": 0.3515, "step": 4025 }, { "epoch": 0.5772870662460567, "grad_norm": 0.3427848815917969, "learning_rate": 9.742201051357792e-06, "loss": 0.3545, "step": 4026 }, { "epoch": 0.5774304559793518, "grad_norm": 0.37147384881973267, "learning_rate": 9.741936566915193e-06, "loss": 0.3624, "step": 4027 }, { "epoch": 0.577573845712647, "grad_norm": 0.378888875246048, "learning_rate": 9.741671950464377e-06, "loss": 0.344, "step": 4028 }, { "epoch": 0.5777172354459421, "grad_norm": 0.36815565824508667, "learning_rate": 9.741407202012706e-06, "loss": 0.3689, "step": 4029 }, { "epoch": 0.5778606251792372, "grad_norm": 0.3402126133441925, "learning_rate": 9.741142321567553e-06, "loss": 0.3277, "step": 4030 }, { "epoch": 0.5780040149125323, "grad_norm": 0.40050631761550903, "learning_rate": 9.740877309136293e-06, "loss": 0.3583, "step": 4031 }, { "epoch": 0.5781474046458274, "grad_norm": 0.3050706684589386, "learning_rate": 9.740612164726298e-06, "loss": 0.3336, "step": 4032 }, { "epoch": 0.5782907943791225, "grad_norm": 0.35739055275917053, "learning_rate": 9.740346888344953e-06, "loss": 0.3486, "step": 4033 }, { "epoch": 0.5784341841124175, "grad_norm": 0.34587499499320984, "learning_rate": 9.740081479999646e-06, "loss": 0.3729, "step": 4034 }, { "epoch": 0.5785775738457126, "grad_norm": 0.3284282386302948, "learning_rate": 9.73981593969776e-06, "loss": 0.3475, "step": 4035 }, { "epoch": 0.5787209635790077, "grad_norm": 0.3649701774120331, "learning_rate": 9.73955026744669e-06, "loss": 0.3356, "step": 4036 }, { "epoch": 0.5788643533123028, "grad_norm": 0.43018561601638794, "learning_rate": 9.739284463253831e-06, "loss": 0.3579, "step": 4037 }, { "epoch": 0.579007743045598, "grad_norm": 0.35946813225746155, "learning_rate": 9.739018527126583e-06, "loss": 0.3372, "step": 4038 }, { "epoch": 0.5791511327788931, "grad_norm": 0.32548633217811584, "learning_rate": 9.73875245907235e-06, "loss": 0.3477, "step": 4039 }, { "epoch": 0.5792945225121882, "grad_norm": 0.3754650056362152, "learning_rate": 9.738486259098536e-06, "loss": 0.3594, "step": 4040 }, { "epoch": 0.5794379122454832, "grad_norm": 0.4153366982936859, "learning_rate": 9.738219927212555e-06, "loss": 0.356, "step": 4041 }, { "epoch": 0.5795813019787783, "grad_norm": 0.3197060525417328, "learning_rate": 9.737953463421819e-06, "loss": 0.3395, "step": 4042 }, { "epoch": 0.5797246917120734, "grad_norm": 0.3352653980255127, "learning_rate": 9.737686867733748e-06, "loss": 0.3397, "step": 4043 }, { "epoch": 0.5798680814453685, "grad_norm": 0.34105610847473145, "learning_rate": 9.737420140155763e-06, "loss": 0.3591, "step": 4044 }, { "epoch": 0.5800114711786636, "grad_norm": 0.33064883947372437, "learning_rate": 9.73715328069529e-06, "loss": 0.3601, "step": 4045 }, { "epoch": 0.5801548609119587, "grad_norm": 0.3686186373233795, "learning_rate": 9.736886289359753e-06, "loss": 0.3264, "step": 4046 }, { "epoch": 0.5802982506452538, "grad_norm": 0.37019723653793335, "learning_rate": 9.736619166156591e-06, "loss": 0.3429, "step": 4047 }, { "epoch": 0.580441640378549, "grad_norm": 0.3327651619911194, "learning_rate": 9.736351911093236e-06, "loss": 0.3511, "step": 4048 }, { "epoch": 0.580585030111844, "grad_norm": 0.3465811610221863, "learning_rate": 9.736084524177132e-06, "loss": 0.3156, "step": 4049 }, { "epoch": 0.5807284198451391, "grad_norm": 0.3842528462409973, "learning_rate": 9.73581700541572e-06, "loss": 0.3319, "step": 4050 }, { "epoch": 0.5808718095784342, "grad_norm": 0.33268263936042786, "learning_rate": 9.735549354816444e-06, "loss": 0.3524, "step": 4051 }, { "epoch": 0.5810151993117293, "grad_norm": 0.34148454666137695, "learning_rate": 9.735281572386762e-06, "loss": 0.3202, "step": 4052 }, { "epoch": 0.5811585890450244, "grad_norm": 0.34762024879455566, "learning_rate": 9.735013658134125e-06, "loss": 0.3261, "step": 4053 }, { "epoch": 0.5813019787783195, "grad_norm": 0.3714047372341156, "learning_rate": 9.73474561206599e-06, "loss": 0.3445, "step": 4054 }, { "epoch": 0.5814453685116145, "grad_norm": 0.3257596790790558, "learning_rate": 9.734477434189822e-06, "loss": 0.3181, "step": 4055 }, { "epoch": 0.5815887582449096, "grad_norm": 0.3482559025287628, "learning_rate": 9.734209124513085e-06, "loss": 0.3424, "step": 4056 }, { "epoch": 0.5817321479782047, "grad_norm": 0.3580262064933777, "learning_rate": 9.73394068304325e-06, "loss": 0.3464, "step": 4057 }, { "epoch": 0.5818755377114998, "grad_norm": 0.3211272358894348, "learning_rate": 9.733672109787786e-06, "loss": 0.3342, "step": 4058 }, { "epoch": 0.582018927444795, "grad_norm": 0.33643853664398193, "learning_rate": 9.733403404754174e-06, "loss": 0.3377, "step": 4059 }, { "epoch": 0.5821623171780901, "grad_norm": 0.36025041341781616, "learning_rate": 9.73313456794989e-06, "loss": 0.3316, "step": 4060 }, { "epoch": 0.5823057069113852, "grad_norm": 0.3536798357963562, "learning_rate": 9.732865599382423e-06, "loss": 0.3453, "step": 4061 }, { "epoch": 0.5824490966446803, "grad_norm": 0.3368890583515167, "learning_rate": 9.732596499059257e-06, "loss": 0.3633, "step": 4062 }, { "epoch": 0.5825924863779753, "grad_norm": 0.3155968487262726, "learning_rate": 9.732327266987887e-06, "loss": 0.3445, "step": 4063 }, { "epoch": 0.5827358761112704, "grad_norm": 0.36600393056869507, "learning_rate": 9.732057903175802e-06, "loss": 0.3554, "step": 4064 }, { "epoch": 0.5828792658445655, "grad_norm": 0.3331996202468872, "learning_rate": 9.731788407630507e-06, "loss": 0.3338, "step": 4065 }, { "epoch": 0.5830226555778606, "grad_norm": 0.32459521293640137, "learning_rate": 9.7315187803595e-06, "loss": 0.3342, "step": 4066 }, { "epoch": 0.5831660453111557, "grad_norm": 0.33520305156707764, "learning_rate": 9.731249021370289e-06, "loss": 0.3572, "step": 4067 }, { "epoch": 0.5833094350444508, "grad_norm": 0.3463248908519745, "learning_rate": 9.730979130670383e-06, "loss": 0.341, "step": 4068 }, { "epoch": 0.583452824777746, "grad_norm": 0.3470325469970703, "learning_rate": 9.730709108267297e-06, "loss": 0.3499, "step": 4069 }, { "epoch": 0.583596214511041, "grad_norm": 0.3113751709461212, "learning_rate": 9.730438954168546e-06, "loss": 0.3382, "step": 4070 }, { "epoch": 0.5837396042443361, "grad_norm": 0.3351184129714966, "learning_rate": 9.73016866838165e-06, "loss": 0.3466, "step": 4071 }, { "epoch": 0.5838829939776312, "grad_norm": 0.32613274455070496, "learning_rate": 9.729898250914137e-06, "loss": 0.3572, "step": 4072 }, { "epoch": 0.5840263837109263, "grad_norm": 0.30885034799575806, "learning_rate": 9.72962770177353e-06, "loss": 0.3569, "step": 4073 }, { "epoch": 0.5841697734442214, "grad_norm": 0.33725669980049133, "learning_rate": 9.729357020967365e-06, "loss": 0.3242, "step": 4074 }, { "epoch": 0.5843131631775165, "grad_norm": 0.3677501976490021, "learning_rate": 9.729086208503174e-06, "loss": 0.3435, "step": 4075 }, { "epoch": 0.5844565529108116, "grad_norm": 0.3166502118110657, "learning_rate": 9.728815264388499e-06, "loss": 0.353, "step": 4076 }, { "epoch": 0.5845999426441066, "grad_norm": 0.341450572013855, "learning_rate": 9.728544188630882e-06, "loss": 0.3454, "step": 4077 }, { "epoch": 0.5847433323774017, "grad_norm": 0.32445675134658813, "learning_rate": 9.728272981237867e-06, "loss": 0.3321, "step": 4078 }, { "epoch": 0.5848867221106969, "grad_norm": 0.34034207463264465, "learning_rate": 9.728001642217006e-06, "loss": 0.3563, "step": 4079 }, { "epoch": 0.585030111843992, "grad_norm": 0.32160845398902893, "learning_rate": 9.727730171575853e-06, "loss": 0.3223, "step": 4080 }, { "epoch": 0.5851735015772871, "grad_norm": 0.3400014042854309, "learning_rate": 9.727458569321964e-06, "loss": 0.3449, "step": 4081 }, { "epoch": 0.5853168913105822, "grad_norm": 0.3256453573703766, "learning_rate": 9.727186835462901e-06, "loss": 0.3599, "step": 4082 }, { "epoch": 0.5854602810438773, "grad_norm": 0.3069766163825989, "learning_rate": 9.726914970006229e-06, "loss": 0.3515, "step": 4083 }, { "epoch": 0.5856036707771723, "grad_norm": 0.3218035399913788, "learning_rate": 9.726642972959515e-06, "loss": 0.3522, "step": 4084 }, { "epoch": 0.5857470605104674, "grad_norm": 0.32103586196899414, "learning_rate": 9.726370844330331e-06, "loss": 0.3479, "step": 4085 }, { "epoch": 0.5858904502437625, "grad_norm": 0.3384869396686554, "learning_rate": 9.726098584126253e-06, "loss": 0.3783, "step": 4086 }, { "epoch": 0.5860338399770576, "grad_norm": 0.3329903185367584, "learning_rate": 9.725826192354862e-06, "loss": 0.3307, "step": 4087 }, { "epoch": 0.5861772297103527, "grad_norm": 0.33782684803009033, "learning_rate": 9.725553669023739e-06, "loss": 0.3419, "step": 4088 }, { "epoch": 0.5863206194436479, "grad_norm": 0.3553965389728546, "learning_rate": 9.725281014140471e-06, "loss": 0.3575, "step": 4089 }, { "epoch": 0.586464009176943, "grad_norm": 0.35284698009490967, "learning_rate": 9.725008227712647e-06, "loss": 0.3311, "step": 4090 }, { "epoch": 0.586607398910238, "grad_norm": 0.32527658343315125, "learning_rate": 9.724735309747864e-06, "loss": 0.3417, "step": 4091 }, { "epoch": 0.5867507886435331, "grad_norm": 0.3668079972267151, "learning_rate": 9.724462260253718e-06, "loss": 0.3509, "step": 4092 }, { "epoch": 0.5868941783768282, "grad_norm": 0.39911991357803345, "learning_rate": 9.724189079237809e-06, "loss": 0.3521, "step": 4093 }, { "epoch": 0.5870375681101233, "grad_norm": 0.3264872431755066, "learning_rate": 9.723915766707743e-06, "loss": 0.3428, "step": 4094 }, { "epoch": 0.5871809578434184, "grad_norm": 0.3864060640335083, "learning_rate": 9.72364232267113e-06, "loss": 0.3337, "step": 4095 }, { "epoch": 0.5873243475767135, "grad_norm": 0.32248103618621826, "learning_rate": 9.723368747135582e-06, "loss": 0.336, "step": 4096 }, { "epoch": 0.5874677373100086, "grad_norm": 0.36626484990119934, "learning_rate": 9.723095040108712e-06, "loss": 0.3319, "step": 4097 }, { "epoch": 0.5876111270433036, "grad_norm": 0.3443489670753479, "learning_rate": 9.722821201598143e-06, "loss": 0.3395, "step": 4098 }, { "epoch": 0.5877545167765988, "grad_norm": 0.3446875810623169, "learning_rate": 9.722547231611497e-06, "loss": 0.3497, "step": 4099 }, { "epoch": 0.5878979065098939, "grad_norm": 0.34835848212242126, "learning_rate": 9.722273130156399e-06, "loss": 0.3303, "step": 4100 }, { "epoch": 0.588041296243189, "grad_norm": 0.3410651385784149, "learning_rate": 9.721998897240482e-06, "loss": 0.3573, "step": 4101 }, { "epoch": 0.5881846859764841, "grad_norm": 0.35260263085365295, "learning_rate": 9.72172453287138e-06, "loss": 0.3478, "step": 4102 }, { "epoch": 0.5883280757097792, "grad_norm": 0.33498120307922363, "learning_rate": 9.721450037056729e-06, "loss": 0.3261, "step": 4103 }, { "epoch": 0.5884714654430743, "grad_norm": 0.336581826210022, "learning_rate": 9.721175409804173e-06, "loss": 0.3404, "step": 4104 }, { "epoch": 0.5886148551763694, "grad_norm": 0.3349739909172058, "learning_rate": 9.720900651121357e-06, "loss": 0.3307, "step": 4105 }, { "epoch": 0.5887582449096644, "grad_norm": 0.341248095035553, "learning_rate": 9.720625761015928e-06, "loss": 0.3247, "step": 4106 }, { "epoch": 0.5889016346429595, "grad_norm": 0.34198853373527527, "learning_rate": 9.720350739495539e-06, "loss": 0.3289, "step": 4107 }, { "epoch": 0.5890450243762546, "grad_norm": 0.28881365060806274, "learning_rate": 9.720075586567848e-06, "loss": 0.3476, "step": 4108 }, { "epoch": 0.5891884141095498, "grad_norm": 0.33478811383247375, "learning_rate": 9.719800302240512e-06, "loss": 0.3392, "step": 4109 }, { "epoch": 0.5893318038428449, "grad_norm": 0.33759090304374695, "learning_rate": 9.719524886521196e-06, "loss": 0.3712, "step": 4110 }, { "epoch": 0.58947519357614, "grad_norm": 0.33633479475975037, "learning_rate": 9.71924933941757e-06, "loss": 0.3694, "step": 4111 }, { "epoch": 0.5896185833094351, "grad_norm": 0.368802547454834, "learning_rate": 9.718973660937299e-06, "loss": 0.3827, "step": 4112 }, { "epoch": 0.5897619730427301, "grad_norm": 0.324535608291626, "learning_rate": 9.71869785108806e-06, "loss": 0.3336, "step": 4113 }, { "epoch": 0.5899053627760252, "grad_norm": 0.3656655251979828, "learning_rate": 9.718421909877533e-06, "loss": 0.3586, "step": 4114 }, { "epoch": 0.5900487525093203, "grad_norm": 0.3742040991783142, "learning_rate": 9.718145837313399e-06, "loss": 0.3274, "step": 4115 }, { "epoch": 0.5901921422426154, "grad_norm": 0.3194662630558014, "learning_rate": 9.717869633403341e-06, "loss": 0.3489, "step": 4116 }, { "epoch": 0.5903355319759105, "grad_norm": 0.3634888529777527, "learning_rate": 9.717593298155053e-06, "loss": 0.3293, "step": 4117 }, { "epoch": 0.5904789217092056, "grad_norm": 0.3398366868495941, "learning_rate": 9.717316831576222e-06, "loss": 0.3412, "step": 4118 }, { "epoch": 0.5906223114425008, "grad_norm": 0.3349958062171936, "learning_rate": 9.717040233674547e-06, "loss": 0.342, "step": 4119 }, { "epoch": 0.5907657011757959, "grad_norm": 0.33793210983276367, "learning_rate": 9.716763504457728e-06, "loss": 0.3525, "step": 4120 }, { "epoch": 0.5909090909090909, "grad_norm": 0.33849066495895386, "learning_rate": 9.71648664393347e-06, "loss": 0.3337, "step": 4121 }, { "epoch": 0.591052480642386, "grad_norm": 0.34148791432380676, "learning_rate": 9.716209652109477e-06, "loss": 0.3406, "step": 4122 }, { "epoch": 0.5911958703756811, "grad_norm": 0.3387943506240845, "learning_rate": 9.715932528993464e-06, "loss": 0.3222, "step": 4123 }, { "epoch": 0.5913392601089762, "grad_norm": 0.35613900423049927, "learning_rate": 9.715655274593143e-06, "loss": 0.3321, "step": 4124 }, { "epoch": 0.5914826498422713, "grad_norm": 0.3024784028530121, "learning_rate": 9.715377888916233e-06, "loss": 0.3229, "step": 4125 }, { "epoch": 0.5916260395755664, "grad_norm": 0.3237224817276001, "learning_rate": 9.715100371970458e-06, "loss": 0.3199, "step": 4126 }, { "epoch": 0.5917694293088614, "grad_norm": 0.34479662775993347, "learning_rate": 9.71482272376354e-06, "loss": 0.3595, "step": 4127 }, { "epoch": 0.5919128190421565, "grad_norm": 0.340932697057724, "learning_rate": 9.714544944303211e-06, "loss": 0.3471, "step": 4128 }, { "epoch": 0.5920562087754517, "grad_norm": 0.343178391456604, "learning_rate": 9.714267033597202e-06, "loss": 0.3532, "step": 4129 }, { "epoch": 0.5921995985087468, "grad_norm": 0.3416711688041687, "learning_rate": 9.713988991653251e-06, "loss": 0.35, "step": 4130 }, { "epoch": 0.5923429882420419, "grad_norm": 0.34947434067726135, "learning_rate": 9.7137108184791e-06, "loss": 0.3313, "step": 4131 }, { "epoch": 0.592486377975337, "grad_norm": 0.3592835068702698, "learning_rate": 9.713432514082488e-06, "loss": 0.3435, "step": 4132 }, { "epoch": 0.5926297677086321, "grad_norm": 0.36289557814598083, "learning_rate": 9.713154078471168e-06, "loss": 0.35, "step": 4133 }, { "epoch": 0.5927731574419272, "grad_norm": 0.3343643248081207, "learning_rate": 9.712875511652886e-06, "loss": 0.343, "step": 4134 }, { "epoch": 0.5929165471752222, "grad_norm": 0.3401416838169098, "learning_rate": 9.712596813635402e-06, "loss": 0.3264, "step": 4135 }, { "epoch": 0.5930599369085173, "grad_norm": 0.30745890736579895, "learning_rate": 9.71231798442647e-06, "loss": 0.3301, "step": 4136 }, { "epoch": 0.5932033266418124, "grad_norm": 0.34708672761917114, "learning_rate": 9.712039024033856e-06, "loss": 0.3323, "step": 4137 }, { "epoch": 0.5933467163751075, "grad_norm": 0.3460533618927002, "learning_rate": 9.711759932465324e-06, "loss": 0.3438, "step": 4138 }, { "epoch": 0.5934901061084027, "grad_norm": 0.3515074551105499, "learning_rate": 9.711480709728641e-06, "loss": 0.3427, "step": 4139 }, { "epoch": 0.5936334958416978, "grad_norm": 0.3175188899040222, "learning_rate": 9.711201355831586e-06, "loss": 0.3143, "step": 4140 }, { "epoch": 0.5937768855749929, "grad_norm": 0.3605867624282837, "learning_rate": 9.71092187078193e-06, "loss": 0.331, "step": 4141 }, { "epoch": 0.593920275308288, "grad_norm": 0.4326440393924713, "learning_rate": 9.710642254587456e-06, "loss": 0.3567, "step": 4142 }, { "epoch": 0.594063665041583, "grad_norm": 0.33879610896110535, "learning_rate": 9.710362507255949e-06, "loss": 0.3486, "step": 4143 }, { "epoch": 0.5942070547748781, "grad_norm": 0.36755669116973877, "learning_rate": 9.710082628795194e-06, "loss": 0.3683, "step": 4144 }, { "epoch": 0.5943504445081732, "grad_norm": 0.3689135015010834, "learning_rate": 9.709802619212987e-06, "loss": 0.3721, "step": 4145 }, { "epoch": 0.5944938342414683, "grad_norm": 0.35058048367500305, "learning_rate": 9.709522478517119e-06, "loss": 0.3508, "step": 4146 }, { "epoch": 0.5946372239747634, "grad_norm": 0.37933605909347534, "learning_rate": 9.709242206715387e-06, "loss": 0.3198, "step": 4147 }, { "epoch": 0.5947806137080585, "grad_norm": 0.33029988408088684, "learning_rate": 9.7089618038156e-06, "loss": 0.3399, "step": 4148 }, { "epoch": 0.5949240034413535, "grad_norm": 0.3367856442928314, "learning_rate": 9.708681269825558e-06, "loss": 0.3431, "step": 4149 }, { "epoch": 0.5950673931746487, "grad_norm": 0.3528933525085449, "learning_rate": 9.708400604753074e-06, "loss": 0.3478, "step": 4150 }, { "epoch": 0.5952107829079438, "grad_norm": 0.32253608107566833, "learning_rate": 9.708119808605959e-06, "loss": 0.3401, "step": 4151 }, { "epoch": 0.5953541726412389, "grad_norm": 0.3319278955459595, "learning_rate": 9.707838881392032e-06, "loss": 0.3413, "step": 4152 }, { "epoch": 0.595497562374534, "grad_norm": 0.35450440645217896, "learning_rate": 9.70755782311911e-06, "loss": 0.3482, "step": 4153 }, { "epoch": 0.5956409521078291, "grad_norm": 0.3409042954444885, "learning_rate": 9.707276633795022e-06, "loss": 0.3637, "step": 4154 }, { "epoch": 0.5957843418411242, "grad_norm": 0.3739034831523895, "learning_rate": 9.706995313427595e-06, "loss": 0.3399, "step": 4155 }, { "epoch": 0.5959277315744193, "grad_norm": 0.32860133051872253, "learning_rate": 9.706713862024657e-06, "loss": 0.3485, "step": 4156 }, { "epoch": 0.5960711213077143, "grad_norm": 0.3512510657310486, "learning_rate": 9.706432279594046e-06, "loss": 0.344, "step": 4157 }, { "epoch": 0.5962145110410094, "grad_norm": 0.34068503975868225, "learning_rate": 9.7061505661436e-06, "loss": 0.3301, "step": 4158 }, { "epoch": 0.5963579007743045, "grad_norm": 0.3457764685153961, "learning_rate": 9.705868721681162e-06, "loss": 0.3436, "step": 4159 }, { "epoch": 0.5965012905075997, "grad_norm": 0.32727983593940735, "learning_rate": 9.705586746214577e-06, "loss": 0.3365, "step": 4160 }, { "epoch": 0.5966446802408948, "grad_norm": 0.3596722185611725, "learning_rate": 9.705304639751696e-06, "loss": 0.3411, "step": 4161 }, { "epoch": 0.5967880699741899, "grad_norm": 0.3738853931427002, "learning_rate": 9.705022402300373e-06, "loss": 0.3572, "step": 4162 }, { "epoch": 0.596931459707485, "grad_norm": 0.34055155515670776, "learning_rate": 9.704740033868461e-06, "loss": 0.3251, "step": 4163 }, { "epoch": 0.59707484944078, "grad_norm": 0.3311058282852173, "learning_rate": 9.704457534463826e-06, "loss": 0.3337, "step": 4164 }, { "epoch": 0.5972182391740751, "grad_norm": 0.34464311599731445, "learning_rate": 9.70417490409433e-06, "loss": 0.335, "step": 4165 }, { "epoch": 0.5973616289073702, "grad_norm": 0.3471783995628357, "learning_rate": 9.70389214276784e-06, "loss": 0.3586, "step": 4166 }, { "epoch": 0.5975050186406653, "grad_norm": 0.3360903263092041, "learning_rate": 9.703609250492231e-06, "loss": 0.338, "step": 4167 }, { "epoch": 0.5976484083739604, "grad_norm": 0.32690662145614624, "learning_rate": 9.703326227275374e-06, "loss": 0.3236, "step": 4168 }, { "epoch": 0.5977917981072555, "grad_norm": 0.33744537830352783, "learning_rate": 9.70304307312515e-06, "loss": 0.3531, "step": 4169 }, { "epoch": 0.5979351878405507, "grad_norm": 0.33771947026252747, "learning_rate": 9.702759788049443e-06, "loss": 0.3239, "step": 4170 }, { "epoch": 0.5980785775738457, "grad_norm": 0.35339462757110596, "learning_rate": 9.702476372056136e-06, "loss": 0.3322, "step": 4171 }, { "epoch": 0.5982219673071408, "grad_norm": 0.33420562744140625, "learning_rate": 9.70219282515312e-06, "loss": 0.3277, "step": 4172 }, { "epoch": 0.5983653570404359, "grad_norm": 0.37112095952033997, "learning_rate": 9.70190914734829e-06, "loss": 0.3183, "step": 4173 }, { "epoch": 0.598508746773731, "grad_norm": 0.3570139408111572, "learning_rate": 9.701625338649543e-06, "loss": 0.3341, "step": 4174 }, { "epoch": 0.5986521365070261, "grad_norm": 0.40846002101898193, "learning_rate": 9.701341399064779e-06, "loss": 0.3624, "step": 4175 }, { "epoch": 0.5987955262403212, "grad_norm": 0.3731841444969177, "learning_rate": 9.701057328601902e-06, "loss": 0.3379, "step": 4176 }, { "epoch": 0.5989389159736163, "grad_norm": 0.3219356834888458, "learning_rate": 9.70077312726882e-06, "loss": 0.3254, "step": 4177 }, { "epoch": 0.5990823057069113, "grad_norm": 0.44913703203201294, "learning_rate": 9.700488795073446e-06, "loss": 0.3685, "step": 4178 }, { "epoch": 0.5992256954402064, "grad_norm": 0.40106040239334106, "learning_rate": 9.700204332023695e-06, "loss": 0.345, "step": 4179 }, { "epoch": 0.5993690851735016, "grad_norm": 0.4160066843032837, "learning_rate": 9.699919738127484e-06, "loss": 0.3501, "step": 4180 }, { "epoch": 0.5995124749067967, "grad_norm": 0.4568743109703064, "learning_rate": 9.699635013392738e-06, "loss": 0.3176, "step": 4181 }, { "epoch": 0.5996558646400918, "grad_norm": 0.37213611602783203, "learning_rate": 9.699350157827384e-06, "loss": 0.3462, "step": 4182 }, { "epoch": 0.5997992543733869, "grad_norm": 0.39506295323371887, "learning_rate": 9.69906517143935e-06, "loss": 0.3324, "step": 4183 }, { "epoch": 0.599942644106682, "grad_norm": 0.39625418186187744, "learning_rate": 9.69878005423657e-06, "loss": 0.3455, "step": 4184 }, { "epoch": 0.600086033839977, "grad_norm": 0.3270394504070282, "learning_rate": 9.69849480622698e-06, "loss": 0.3381, "step": 4185 }, { "epoch": 0.6002294235732721, "grad_norm": 0.4494045078754425, "learning_rate": 9.698209427418523e-06, "loss": 0.3442, "step": 4186 }, { "epoch": 0.6003728133065672, "grad_norm": 0.35026177763938904, "learning_rate": 9.697923917819144e-06, "loss": 0.3228, "step": 4187 }, { "epoch": 0.6005162030398623, "grad_norm": 0.3412916362285614, "learning_rate": 9.697638277436788e-06, "loss": 0.3469, "step": 4188 }, { "epoch": 0.6006595927731574, "grad_norm": 0.45621612668037415, "learning_rate": 9.697352506279409e-06, "loss": 0.3452, "step": 4189 }, { "epoch": 0.6008029825064526, "grad_norm": 0.3492678701877594, "learning_rate": 9.697066604354963e-06, "loss": 0.3389, "step": 4190 }, { "epoch": 0.6009463722397477, "grad_norm": 0.3786824345588684, "learning_rate": 9.696780571671408e-06, "loss": 0.3481, "step": 4191 }, { "epoch": 0.6010897619730428, "grad_norm": 0.41517046093940735, "learning_rate": 9.696494408236707e-06, "loss": 0.3634, "step": 4192 }, { "epoch": 0.6012331517063378, "grad_norm": 0.34507492184638977, "learning_rate": 9.696208114058826e-06, "loss": 0.3421, "step": 4193 }, { "epoch": 0.6013765414396329, "grad_norm": 0.35566991567611694, "learning_rate": 9.695921689145735e-06, "loss": 0.3681, "step": 4194 }, { "epoch": 0.601519931172928, "grad_norm": 0.36100366711616516, "learning_rate": 9.695635133505408e-06, "loss": 0.361, "step": 4195 }, { "epoch": 0.6016633209062231, "grad_norm": 0.3773626685142517, "learning_rate": 9.695348447145823e-06, "loss": 0.3271, "step": 4196 }, { "epoch": 0.6018067106395182, "grad_norm": 0.374933660030365, "learning_rate": 9.69506163007496e-06, "loss": 0.3327, "step": 4197 }, { "epoch": 0.6019501003728133, "grad_norm": 0.35190117359161377, "learning_rate": 9.694774682300802e-06, "loss": 0.3292, "step": 4198 }, { "epoch": 0.6020934901061084, "grad_norm": 0.34231093525886536, "learning_rate": 9.69448760383134e-06, "loss": 0.339, "step": 4199 }, { "epoch": 0.6022368798394035, "grad_norm": 0.3496117889881134, "learning_rate": 9.694200394674564e-06, "loss": 0.353, "step": 4200 }, { "epoch": 0.6023802695726986, "grad_norm": 0.32024094462394714, "learning_rate": 9.69391305483847e-06, "loss": 0.3347, "step": 4201 }, { "epoch": 0.6025236593059937, "grad_norm": 0.33153244853019714, "learning_rate": 9.693625584331057e-06, "loss": 0.3522, "step": 4202 }, { "epoch": 0.6026670490392888, "grad_norm": 0.335997611284256, "learning_rate": 9.693337983160328e-06, "loss": 0.3564, "step": 4203 }, { "epoch": 0.6028104387725839, "grad_norm": 0.3180698752403259, "learning_rate": 9.693050251334291e-06, "loss": 0.3461, "step": 4204 }, { "epoch": 0.602953828505879, "grad_norm": 0.3278844952583313, "learning_rate": 9.692762388860952e-06, "loss": 0.3422, "step": 4205 }, { "epoch": 0.6030972182391741, "grad_norm": 0.37053632736206055, "learning_rate": 9.692474395748328e-06, "loss": 0.3572, "step": 4206 }, { "epoch": 0.6032406079724691, "grad_norm": 0.2989088296890259, "learning_rate": 9.692186272004433e-06, "loss": 0.3435, "step": 4207 }, { "epoch": 0.6033839977057642, "grad_norm": 0.3200797140598297, "learning_rate": 9.691898017637293e-06, "loss": 0.3394, "step": 4208 }, { "epoch": 0.6035273874390593, "grad_norm": 0.3517177402973175, "learning_rate": 9.691609632654927e-06, "loss": 0.3315, "step": 4209 }, { "epoch": 0.6036707771723545, "grad_norm": 0.3572150468826294, "learning_rate": 9.691321117065368e-06, "loss": 0.3282, "step": 4210 }, { "epoch": 0.6038141669056496, "grad_norm": 0.3266397714614868, "learning_rate": 9.691032470876647e-06, "loss": 0.3366, "step": 4211 }, { "epoch": 0.6039575566389447, "grad_norm": 0.35131824016571045, "learning_rate": 9.690743694096796e-06, "loss": 0.3269, "step": 4212 }, { "epoch": 0.6041009463722398, "grad_norm": 0.33391693234443665, "learning_rate": 9.690454786733857e-06, "loss": 0.3365, "step": 4213 }, { "epoch": 0.6042443361055349, "grad_norm": 0.37380731105804443, "learning_rate": 9.69016574879587e-06, "loss": 0.3519, "step": 4214 }, { "epoch": 0.6043877258388299, "grad_norm": 0.3214418590068817, "learning_rate": 9.689876580290888e-06, "loss": 0.3504, "step": 4215 }, { "epoch": 0.604531115572125, "grad_norm": 0.32095178961753845, "learning_rate": 9.689587281226952e-06, "loss": 0.341, "step": 4216 }, { "epoch": 0.6046745053054201, "grad_norm": 0.35074084997177124, "learning_rate": 9.689297851612122e-06, "loss": 0.3332, "step": 4217 }, { "epoch": 0.6048178950387152, "grad_norm": 0.3256160020828247, "learning_rate": 9.689008291454452e-06, "loss": 0.3703, "step": 4218 }, { "epoch": 0.6049612847720103, "grad_norm": 0.30772027373313904, "learning_rate": 9.688718600762006e-06, "loss": 0.3268, "step": 4219 }, { "epoch": 0.6051046745053055, "grad_norm": 0.35745006799697876, "learning_rate": 9.688428779542845e-06, "loss": 0.3551, "step": 4220 }, { "epoch": 0.6052480642386006, "grad_norm": 0.37994152307510376, "learning_rate": 9.68813882780504e-06, "loss": 0.3631, "step": 4221 }, { "epoch": 0.6053914539718956, "grad_norm": 0.33041703701019287, "learning_rate": 9.68784874555666e-06, "loss": 0.3483, "step": 4222 }, { "epoch": 0.6055348437051907, "grad_norm": 0.34992098808288574, "learning_rate": 9.687558532805786e-06, "loss": 0.3395, "step": 4223 }, { "epoch": 0.6056782334384858, "grad_norm": 0.37639427185058594, "learning_rate": 9.68726818956049e-06, "loss": 0.3489, "step": 4224 }, { "epoch": 0.6058216231717809, "grad_norm": 0.33812710642814636, "learning_rate": 9.686977715828858e-06, "loss": 0.3667, "step": 4225 }, { "epoch": 0.605965012905076, "grad_norm": 0.34339389204978943, "learning_rate": 9.686687111618977e-06, "loss": 0.3562, "step": 4226 }, { "epoch": 0.6061084026383711, "grad_norm": 0.3371250629425049, "learning_rate": 9.686396376938938e-06, "loss": 0.3407, "step": 4227 }, { "epoch": 0.6062517923716662, "grad_norm": 0.37638652324676514, "learning_rate": 9.68610551179683e-06, "loss": 0.3578, "step": 4228 }, { "epoch": 0.6063951821049612, "grad_norm": 0.3302375376224518, "learning_rate": 9.685814516200756e-06, "loss": 0.3314, "step": 4229 }, { "epoch": 0.6065385718382564, "grad_norm": 0.3547658920288086, "learning_rate": 9.685523390158812e-06, "loss": 0.3634, "step": 4230 }, { "epoch": 0.6066819615715515, "grad_norm": 0.36600080132484436, "learning_rate": 9.685232133679105e-06, "loss": 0.35, "step": 4231 }, { "epoch": 0.6068253513048466, "grad_norm": 0.3420559763908386, "learning_rate": 9.684940746769742e-06, "loss": 0.3209, "step": 4232 }, { "epoch": 0.6069687410381417, "grad_norm": 0.347917765378952, "learning_rate": 9.684649229438836e-06, "loss": 0.357, "step": 4233 }, { "epoch": 0.6071121307714368, "grad_norm": 0.3327649235725403, "learning_rate": 9.684357581694502e-06, "loss": 0.3622, "step": 4234 }, { "epoch": 0.6072555205047319, "grad_norm": 0.3160584568977356, "learning_rate": 9.68406580354486e-06, "loss": 0.3344, "step": 4235 }, { "epoch": 0.607398910238027, "grad_norm": 0.32870346307754517, "learning_rate": 9.68377389499803e-06, "loss": 0.3385, "step": 4236 }, { "epoch": 0.607542299971322, "grad_norm": 0.3196484446525574, "learning_rate": 9.683481856062138e-06, "loss": 0.3192, "step": 4237 }, { "epoch": 0.6076856897046171, "grad_norm": 0.3927472233772278, "learning_rate": 9.683189686745319e-06, "loss": 0.3385, "step": 4238 }, { "epoch": 0.6078290794379122, "grad_norm": 0.321275532245636, "learning_rate": 9.6828973870557e-06, "loss": 0.3635, "step": 4239 }, { "epoch": 0.6079724691712074, "grad_norm": 0.33689454197883606, "learning_rate": 9.682604957001424e-06, "loss": 0.3388, "step": 4240 }, { "epoch": 0.6081158589045025, "grad_norm": 0.34812429547309875, "learning_rate": 9.68231239659063e-06, "loss": 0.3612, "step": 4241 }, { "epoch": 0.6082592486377976, "grad_norm": 0.31758174300193787, "learning_rate": 9.682019705831459e-06, "loss": 0.34, "step": 4242 }, { "epoch": 0.6084026383710927, "grad_norm": 0.3217751383781433, "learning_rate": 9.681726884732063e-06, "loss": 0.3409, "step": 4243 }, { "epoch": 0.6085460281043877, "grad_norm": 0.3188716769218445, "learning_rate": 9.681433933300592e-06, "loss": 0.3423, "step": 4244 }, { "epoch": 0.6086894178376828, "grad_norm": 0.33543726801872253, "learning_rate": 9.6811408515452e-06, "loss": 0.3479, "step": 4245 }, { "epoch": 0.6088328075709779, "grad_norm": 0.3586466610431671, "learning_rate": 9.680847639474052e-06, "loss": 0.3651, "step": 4246 }, { "epoch": 0.608976197304273, "grad_norm": 0.32992318272590637, "learning_rate": 9.680554297095303e-06, "loss": 0.349, "step": 4247 }, { "epoch": 0.6091195870375681, "grad_norm": 0.3815869092941284, "learning_rate": 9.680260824417122e-06, "loss": 0.3379, "step": 4248 }, { "epoch": 0.6092629767708632, "grad_norm": 0.29971814155578613, "learning_rate": 9.67996722144768e-06, "loss": 0.3392, "step": 4249 }, { "epoch": 0.6094063665041582, "grad_norm": 0.335680216550827, "learning_rate": 9.67967348819515e-06, "loss": 0.3389, "step": 4250 }, { "epoch": 0.6095497562374534, "grad_norm": 0.3491373360157013, "learning_rate": 9.679379624667708e-06, "loss": 0.333, "step": 4251 }, { "epoch": 0.6096931459707485, "grad_norm": 0.31709152460098267, "learning_rate": 9.679085630873537e-06, "loss": 0.3472, "step": 4252 }, { "epoch": 0.6098365357040436, "grad_norm": 0.37566184997558594, "learning_rate": 9.678791506820819e-06, "loss": 0.3401, "step": 4253 }, { "epoch": 0.6099799254373387, "grad_norm": 0.32503390312194824, "learning_rate": 9.678497252517744e-06, "loss": 0.3288, "step": 4254 }, { "epoch": 0.6101233151706338, "grad_norm": 0.30822300910949707, "learning_rate": 9.6782028679725e-06, "loss": 0.3462, "step": 4255 }, { "epoch": 0.6102667049039289, "grad_norm": 0.3389871418476105, "learning_rate": 9.677908353193286e-06, "loss": 0.3351, "step": 4256 }, { "epoch": 0.610410094637224, "grad_norm": 0.3252227306365967, "learning_rate": 9.677613708188298e-06, "loss": 0.3647, "step": 4257 }, { "epoch": 0.610553484370519, "grad_norm": 0.3329218626022339, "learning_rate": 9.677318932965743e-06, "loss": 0.3456, "step": 4258 }, { "epoch": 0.6106968741038141, "grad_norm": 0.3587663471698761, "learning_rate": 9.67702402753382e-06, "loss": 0.3385, "step": 4259 }, { "epoch": 0.6108402638371092, "grad_norm": 0.3414668142795563, "learning_rate": 9.676728991900745e-06, "loss": 0.342, "step": 4260 }, { "epoch": 0.6109836535704044, "grad_norm": 0.35634493827819824, "learning_rate": 9.67643382607473e-06, "loss": 0.3464, "step": 4261 }, { "epoch": 0.6111270433036995, "grad_norm": 0.3229708969593048, "learning_rate": 9.67613853006399e-06, "loss": 0.3604, "step": 4262 }, { "epoch": 0.6112704330369946, "grad_norm": 0.4105183780193329, "learning_rate": 9.675843103876745e-06, "loss": 0.3448, "step": 4263 }, { "epoch": 0.6114138227702897, "grad_norm": 0.3422705829143524, "learning_rate": 9.675547547521222e-06, "loss": 0.3307, "step": 4264 }, { "epoch": 0.6115572125035847, "grad_norm": 0.3378664255142212, "learning_rate": 9.675251861005648e-06, "loss": 0.3339, "step": 4265 }, { "epoch": 0.6117006022368798, "grad_norm": 0.3428305685520172, "learning_rate": 9.674956044338254e-06, "loss": 0.3551, "step": 4266 }, { "epoch": 0.6118439919701749, "grad_norm": 0.33681347966194153, "learning_rate": 9.674660097527275e-06, "loss": 0.3571, "step": 4267 }, { "epoch": 0.61198738170347, "grad_norm": 0.32021188735961914, "learning_rate": 9.674364020580949e-06, "loss": 0.3282, "step": 4268 }, { "epoch": 0.6121307714367651, "grad_norm": 0.3400915563106537, "learning_rate": 9.674067813507521e-06, "loss": 0.3451, "step": 4269 }, { "epoch": 0.6122741611700602, "grad_norm": 0.3392179310321808, "learning_rate": 9.673771476315235e-06, "loss": 0.364, "step": 4270 }, { "epoch": 0.6124175509033554, "grad_norm": 0.3413202166557312, "learning_rate": 9.673475009012339e-06, "loss": 0.3237, "step": 4271 }, { "epoch": 0.6125609406366505, "grad_norm": 0.31210052967071533, "learning_rate": 9.673178411607088e-06, "loss": 0.3549, "step": 4272 }, { "epoch": 0.6127043303699455, "grad_norm": 0.33230510354042053, "learning_rate": 9.67288168410774e-06, "loss": 0.3423, "step": 4273 }, { "epoch": 0.6128477201032406, "grad_norm": 0.3426753282546997, "learning_rate": 9.672584826522553e-06, "loss": 0.3599, "step": 4274 }, { "epoch": 0.6129911098365357, "grad_norm": 0.32181262969970703, "learning_rate": 9.672287838859792e-06, "loss": 0.3503, "step": 4275 }, { "epoch": 0.6131344995698308, "grad_norm": 0.334940642118454, "learning_rate": 9.671990721127726e-06, "loss": 0.3539, "step": 4276 }, { "epoch": 0.6132778893031259, "grad_norm": 0.32869985699653625, "learning_rate": 9.671693473334625e-06, "loss": 0.3501, "step": 4277 }, { "epoch": 0.613421279036421, "grad_norm": 0.3549930453300476, "learning_rate": 9.671396095488766e-06, "loss": 0.3496, "step": 4278 }, { "epoch": 0.613564668769716, "grad_norm": 0.3417908251285553, "learning_rate": 9.671098587598422e-06, "loss": 0.3199, "step": 4279 }, { "epoch": 0.6137080585030111, "grad_norm": 0.33791953325271606, "learning_rate": 9.67080094967188e-06, "loss": 0.3394, "step": 4280 }, { "epoch": 0.6138514482363063, "grad_norm": 0.3572651743888855, "learning_rate": 9.670503181717425e-06, "loss": 0.3437, "step": 4281 }, { "epoch": 0.6139948379696014, "grad_norm": 0.3676668703556061, "learning_rate": 9.670205283743347e-06, "loss": 0.3418, "step": 4282 }, { "epoch": 0.6141382277028965, "grad_norm": 0.3298411965370178, "learning_rate": 9.669907255757937e-06, "loss": 0.3325, "step": 4283 }, { "epoch": 0.6142816174361916, "grad_norm": 0.34005358815193176, "learning_rate": 9.669609097769492e-06, "loss": 0.3465, "step": 4284 }, { "epoch": 0.6144250071694867, "grad_norm": 0.4093707203865051, "learning_rate": 9.669310809786315e-06, "loss": 0.3411, "step": 4285 }, { "epoch": 0.6145683969027818, "grad_norm": 0.34513720870018005, "learning_rate": 9.669012391816706e-06, "loss": 0.3221, "step": 4286 }, { "epoch": 0.6147117866360768, "grad_norm": 0.3906248211860657, "learning_rate": 9.668713843868974e-06, "loss": 0.3361, "step": 4287 }, { "epoch": 0.6148551763693719, "grad_norm": 0.3428555428981781, "learning_rate": 9.668415165951432e-06, "loss": 0.3406, "step": 4288 }, { "epoch": 0.614998566102667, "grad_norm": 0.36033251881599426, "learning_rate": 9.668116358072393e-06, "loss": 0.352, "step": 4289 }, { "epoch": 0.6151419558359621, "grad_norm": 0.35447958111763, "learning_rate": 9.667817420240173e-06, "loss": 0.3463, "step": 4290 }, { "epoch": 0.6152853455692573, "grad_norm": 0.31878161430358887, "learning_rate": 9.667518352463099e-06, "loss": 0.335, "step": 4291 }, { "epoch": 0.6154287353025524, "grad_norm": 0.3371012210845947, "learning_rate": 9.667219154749494e-06, "loss": 0.3336, "step": 4292 }, { "epoch": 0.6155721250358475, "grad_norm": 0.35933640599250793, "learning_rate": 9.666919827107686e-06, "loss": 0.3507, "step": 4293 }, { "epoch": 0.6157155147691425, "grad_norm": 0.33755239844322205, "learning_rate": 9.666620369546009e-06, "loss": 0.3483, "step": 4294 }, { "epoch": 0.6158589045024376, "grad_norm": 0.36478471755981445, "learning_rate": 9.666320782072802e-06, "loss": 0.3433, "step": 4295 }, { "epoch": 0.6160022942357327, "grad_norm": 0.3383336663246155, "learning_rate": 9.6660210646964e-06, "loss": 0.3479, "step": 4296 }, { "epoch": 0.6161456839690278, "grad_norm": 0.3622386157512665, "learning_rate": 9.66572121742515e-06, "loss": 0.3474, "step": 4297 }, { "epoch": 0.6162890737023229, "grad_norm": 0.49916744232177734, "learning_rate": 9.665421240267399e-06, "loss": 0.3468, "step": 4298 }, { "epoch": 0.616432463435618, "grad_norm": 0.3316778540611267, "learning_rate": 9.665121133231497e-06, "loss": 0.3437, "step": 4299 }, { "epoch": 0.6165758531689131, "grad_norm": 0.34380924701690674, "learning_rate": 9.664820896325798e-06, "loss": 0.3521, "step": 4300 }, { "epoch": 0.6167192429022083, "grad_norm": 0.3651982545852661, "learning_rate": 9.664520529558661e-06, "loss": 0.3332, "step": 4301 }, { "epoch": 0.6168626326355033, "grad_norm": 0.31341204047203064, "learning_rate": 9.66422003293845e-06, "loss": 0.344, "step": 4302 }, { "epoch": 0.6170060223687984, "grad_norm": 0.32372140884399414, "learning_rate": 9.663919406473528e-06, "loss": 0.3506, "step": 4303 }, { "epoch": 0.6171494121020935, "grad_norm": 0.4160394072532654, "learning_rate": 9.663618650172263e-06, "loss": 0.3541, "step": 4304 }, { "epoch": 0.6172928018353886, "grad_norm": 0.3328917622566223, "learning_rate": 9.663317764043028e-06, "loss": 0.342, "step": 4305 }, { "epoch": 0.6174361915686837, "grad_norm": 0.32408398389816284, "learning_rate": 9.663016748094201e-06, "loss": 0.34, "step": 4306 }, { "epoch": 0.6175795813019788, "grad_norm": 0.38333678245544434, "learning_rate": 9.66271560233416e-06, "loss": 0.3439, "step": 4307 }, { "epoch": 0.6177229710352738, "grad_norm": 0.3233143985271454, "learning_rate": 9.662414326771292e-06, "loss": 0.3417, "step": 4308 }, { "epoch": 0.6178663607685689, "grad_norm": 0.3882787823677063, "learning_rate": 9.662112921413978e-06, "loss": 0.3603, "step": 4309 }, { "epoch": 0.618009750501864, "grad_norm": 0.36150798201560974, "learning_rate": 9.661811386270613e-06, "loss": 0.3382, "step": 4310 }, { "epoch": 0.6181531402351592, "grad_norm": 0.36436548829078674, "learning_rate": 9.66150972134959e-06, "loss": 0.3249, "step": 4311 }, { "epoch": 0.6182965299684543, "grad_norm": 0.35746484994888306, "learning_rate": 9.661207926659309e-06, "loss": 0.3395, "step": 4312 }, { "epoch": 0.6184399197017494, "grad_norm": 0.3667820990085602, "learning_rate": 9.660906002208168e-06, "loss": 0.3534, "step": 4313 }, { "epoch": 0.6185833094350445, "grad_norm": 0.382565975189209, "learning_rate": 9.660603948004574e-06, "loss": 0.3427, "step": 4314 }, { "epoch": 0.6187266991683396, "grad_norm": 0.33908432722091675, "learning_rate": 9.660301764056935e-06, "loss": 0.3397, "step": 4315 }, { "epoch": 0.6188700889016346, "grad_norm": 0.31337061524391174, "learning_rate": 9.659999450373664e-06, "loss": 0.3411, "step": 4316 }, { "epoch": 0.6190134786349297, "grad_norm": 0.36059021949768066, "learning_rate": 9.659697006963177e-06, "loss": 0.3482, "step": 4317 }, { "epoch": 0.6191568683682248, "grad_norm": 0.3570137023925781, "learning_rate": 9.659394433833894e-06, "loss": 0.3302, "step": 4318 }, { "epoch": 0.6193002581015199, "grad_norm": 0.3346366286277771, "learning_rate": 9.659091730994235e-06, "loss": 0.3502, "step": 4319 }, { "epoch": 0.619443647834815, "grad_norm": 0.3878141939640045, "learning_rate": 9.658788898452631e-06, "loss": 0.3582, "step": 4320 }, { "epoch": 0.6195870375681102, "grad_norm": 0.34845033288002014, "learning_rate": 9.65848593621751e-06, "loss": 0.3468, "step": 4321 }, { "epoch": 0.6197304273014053, "grad_norm": 0.3836890459060669, "learning_rate": 9.658182844297308e-06, "loss": 0.3473, "step": 4322 }, { "epoch": 0.6198738170347003, "grad_norm": 0.3368264436721802, "learning_rate": 9.65787962270046e-06, "loss": 0.3342, "step": 4323 }, { "epoch": 0.6200172067679954, "grad_norm": 0.35879242420196533, "learning_rate": 9.65757627143541e-06, "loss": 0.3614, "step": 4324 }, { "epoch": 0.6201605965012905, "grad_norm": 0.32383766770362854, "learning_rate": 9.6572727905106e-06, "loss": 0.3503, "step": 4325 }, { "epoch": 0.6203039862345856, "grad_norm": 0.36596062779426575, "learning_rate": 9.656969179934478e-06, "loss": 0.3351, "step": 4326 }, { "epoch": 0.6204473759678807, "grad_norm": 0.31624874472618103, "learning_rate": 9.6566654397155e-06, "loss": 0.3258, "step": 4327 }, { "epoch": 0.6205907657011758, "grad_norm": 0.3422241806983948, "learning_rate": 9.656361569862118e-06, "loss": 0.3308, "step": 4328 }, { "epoch": 0.6207341554344709, "grad_norm": 0.3203805685043335, "learning_rate": 9.656057570382796e-06, "loss": 0.3441, "step": 4329 }, { "epoch": 0.6208775451677659, "grad_norm": 0.3201138377189636, "learning_rate": 9.65575344128599e-06, "loss": 0.3155, "step": 4330 }, { "epoch": 0.6210209349010611, "grad_norm": 0.31826120615005493, "learning_rate": 9.655449182580172e-06, "loss": 0.3335, "step": 4331 }, { "epoch": 0.6211643246343562, "grad_norm": 0.3368387520313263, "learning_rate": 9.65514479427381e-06, "loss": 0.3268, "step": 4332 }, { "epoch": 0.6213077143676513, "grad_norm": 0.3344636857509613, "learning_rate": 9.654840276375377e-06, "loss": 0.3512, "step": 4333 }, { "epoch": 0.6214511041009464, "grad_norm": 0.3176398277282715, "learning_rate": 9.654535628893352e-06, "loss": 0.3474, "step": 4334 }, { "epoch": 0.6215944938342415, "grad_norm": 0.48212236166000366, "learning_rate": 9.654230851836216e-06, "loss": 0.3528, "step": 4335 }, { "epoch": 0.6217378835675366, "grad_norm": 0.33698415756225586, "learning_rate": 9.65392594521245e-06, "loss": 0.3382, "step": 4336 }, { "epoch": 0.6218812733008316, "grad_norm": 0.3436312973499298, "learning_rate": 9.653620909030546e-06, "loss": 0.3686, "step": 4337 }, { "epoch": 0.6220246630341267, "grad_norm": 0.30945050716400146, "learning_rate": 9.653315743298995e-06, "loss": 0.3468, "step": 4338 }, { "epoch": 0.6221680527674218, "grad_norm": 0.33063217997550964, "learning_rate": 9.653010448026291e-06, "loss": 0.3281, "step": 4339 }, { "epoch": 0.6223114425007169, "grad_norm": 0.3427734076976776, "learning_rate": 9.652705023220934e-06, "loss": 0.3539, "step": 4340 }, { "epoch": 0.622454832234012, "grad_norm": 0.3367437422275543, "learning_rate": 9.652399468891427e-06, "loss": 0.343, "step": 4341 }, { "epoch": 0.6225982219673072, "grad_norm": 0.3335232734680176, "learning_rate": 9.652093785046274e-06, "loss": 0.3528, "step": 4342 }, { "epoch": 0.6227416117006023, "grad_norm": 0.34091344475746155, "learning_rate": 9.651787971693989e-06, "loss": 0.3591, "step": 4343 }, { "epoch": 0.6228850014338974, "grad_norm": 0.3332798480987549, "learning_rate": 9.651482028843081e-06, "loss": 0.3444, "step": 4344 }, { "epoch": 0.6230283911671924, "grad_norm": 0.3327278792858124, "learning_rate": 9.651175956502067e-06, "loss": 0.3539, "step": 4345 }, { "epoch": 0.6231717809004875, "grad_norm": 0.3333001732826233, "learning_rate": 9.650869754679472e-06, "loss": 0.3312, "step": 4346 }, { "epoch": 0.6233151706337826, "grad_norm": 0.3187487721443176, "learning_rate": 9.650563423383816e-06, "loss": 0.3396, "step": 4347 }, { "epoch": 0.6234585603670777, "grad_norm": 0.33771488070487976, "learning_rate": 9.65025696262363e-06, "loss": 0.3177, "step": 4348 }, { "epoch": 0.6236019501003728, "grad_norm": 0.31922006607055664, "learning_rate": 9.649950372407441e-06, "loss": 0.3377, "step": 4349 }, { "epoch": 0.6237453398336679, "grad_norm": 0.3627775311470032, "learning_rate": 9.649643652743788e-06, "loss": 0.3426, "step": 4350 }, { "epoch": 0.623888729566963, "grad_norm": 0.33032068610191345, "learning_rate": 9.649336803641209e-06, "loss": 0.3462, "step": 4351 }, { "epoch": 0.6240321193002581, "grad_norm": 0.3203510046005249, "learning_rate": 9.649029825108244e-06, "loss": 0.3482, "step": 4352 }, { "epoch": 0.6241755090335532, "grad_norm": 0.33655810356140137, "learning_rate": 9.64872271715344e-06, "loss": 0.3459, "step": 4353 }, { "epoch": 0.6243188987668483, "grad_norm": 0.31134799122810364, "learning_rate": 9.648415479785348e-06, "loss": 0.3302, "step": 4354 }, { "epoch": 0.6244622885001434, "grad_norm": 0.3239598572254181, "learning_rate": 9.64810811301252e-06, "loss": 0.3452, "step": 4355 }, { "epoch": 0.6246056782334385, "grad_norm": 0.3204440772533417, "learning_rate": 9.64780061684351e-06, "loss": 0.3556, "step": 4356 }, { "epoch": 0.6247490679667336, "grad_norm": 0.3078983426094055, "learning_rate": 9.647492991286882e-06, "loss": 0.3423, "step": 4357 }, { "epoch": 0.6248924577000287, "grad_norm": 0.33368349075317383, "learning_rate": 9.647185236351197e-06, "loss": 0.3536, "step": 4358 }, { "epoch": 0.6250358474333237, "grad_norm": 0.352049857378006, "learning_rate": 9.646877352045026e-06, "loss": 0.3455, "step": 4359 }, { "epoch": 0.6251792371666188, "grad_norm": 0.3223104178905487, "learning_rate": 9.646569338376937e-06, "loss": 0.3461, "step": 4360 }, { "epoch": 0.6253226268999139, "grad_norm": 0.3557451069355011, "learning_rate": 9.646261195355504e-06, "loss": 0.3423, "step": 4361 }, { "epoch": 0.6254660166332091, "grad_norm": 0.339875727891922, "learning_rate": 9.645952922989308e-06, "loss": 0.3395, "step": 4362 }, { "epoch": 0.6256094063665042, "grad_norm": 0.2945094108581543, "learning_rate": 9.645644521286927e-06, "loss": 0.3423, "step": 4363 }, { "epoch": 0.6257527960997993, "grad_norm": 0.3471589982509613, "learning_rate": 9.645335990256952e-06, "loss": 0.3373, "step": 4364 }, { "epoch": 0.6258961858330944, "grad_norm": 0.32012367248535156, "learning_rate": 9.645027329907968e-06, "loss": 0.3352, "step": 4365 }, { "epoch": 0.6260395755663895, "grad_norm": 0.30538779497146606, "learning_rate": 9.644718540248568e-06, "loss": 0.37, "step": 4366 }, { "epoch": 0.6261829652996845, "grad_norm": 0.3235299289226532, "learning_rate": 9.644409621287349e-06, "loss": 0.3341, "step": 4367 }, { "epoch": 0.6263263550329796, "grad_norm": 0.33942848443984985, "learning_rate": 9.644100573032911e-06, "loss": 0.3334, "step": 4368 }, { "epoch": 0.6264697447662747, "grad_norm": 0.3219550848007202, "learning_rate": 9.643791395493855e-06, "loss": 0.3685, "step": 4369 }, { "epoch": 0.6266131344995698, "grad_norm": 0.3163015842437744, "learning_rate": 9.643482088678792e-06, "loss": 0.3426, "step": 4370 }, { "epoch": 0.6267565242328649, "grad_norm": 0.33370789885520935, "learning_rate": 9.64317265259633e-06, "loss": 0.3355, "step": 4371 }, { "epoch": 0.6268999139661601, "grad_norm": 0.37448593974113464, "learning_rate": 9.642863087255086e-06, "loss": 0.368, "step": 4372 }, { "epoch": 0.6270433036994552, "grad_norm": 0.3477407991886139, "learning_rate": 9.642553392663672e-06, "loss": 0.3295, "step": 4373 }, { "epoch": 0.6271866934327502, "grad_norm": 0.3082531690597534, "learning_rate": 9.642243568830715e-06, "loss": 0.3427, "step": 4374 }, { "epoch": 0.6273300831660453, "grad_norm": 0.3437632918357849, "learning_rate": 9.641933615764838e-06, "loss": 0.3394, "step": 4375 }, { "epoch": 0.6274734728993404, "grad_norm": 0.33743539452552795, "learning_rate": 9.641623533474671e-06, "loss": 0.3636, "step": 4376 }, { "epoch": 0.6276168626326355, "grad_norm": 0.30617737770080566, "learning_rate": 9.641313321968844e-06, "loss": 0.3421, "step": 4377 }, { "epoch": 0.6277602523659306, "grad_norm": 0.366947740316391, "learning_rate": 9.641002981255994e-06, "loss": 0.3549, "step": 4378 }, { "epoch": 0.6279036420992257, "grad_norm": 0.30424389243125916, "learning_rate": 9.64069251134476e-06, "loss": 0.3425, "step": 4379 }, { "epoch": 0.6280470318325208, "grad_norm": 0.31963664293289185, "learning_rate": 9.640381912243784e-06, "loss": 0.3363, "step": 4380 }, { "epoch": 0.6281904215658158, "grad_norm": 0.32714903354644775, "learning_rate": 9.640071183961714e-06, "loss": 0.336, "step": 4381 }, { "epoch": 0.628333811299111, "grad_norm": 0.32926684617996216, "learning_rate": 9.6397603265072e-06, "loss": 0.3548, "step": 4382 }, { "epoch": 0.6284772010324061, "grad_norm": 0.31335529685020447, "learning_rate": 9.6394493398889e-06, "loss": 0.3304, "step": 4383 }, { "epoch": 0.6286205907657012, "grad_norm": 0.32629629969596863, "learning_rate": 9.639138224115463e-06, "loss": 0.3411, "step": 4384 }, { "epoch": 0.6287639804989963, "grad_norm": 0.2941933870315552, "learning_rate": 9.638826979195555e-06, "loss": 0.3402, "step": 4385 }, { "epoch": 0.6289073702322914, "grad_norm": 0.35793036222457886, "learning_rate": 9.63851560513784e-06, "loss": 0.3643, "step": 4386 }, { "epoch": 0.6290507599655865, "grad_norm": 0.3750351667404175, "learning_rate": 9.638204101950986e-06, "loss": 0.3619, "step": 4387 }, { "epoch": 0.6291941496988815, "grad_norm": 0.32771915197372437, "learning_rate": 9.637892469643663e-06, "loss": 0.3682, "step": 4388 }, { "epoch": 0.6293375394321766, "grad_norm": 0.3415127396583557, "learning_rate": 9.63758070822455e-06, "loss": 0.3643, "step": 4389 }, { "epoch": 0.6294809291654717, "grad_norm": 0.3899378776550293, "learning_rate": 9.637268817702323e-06, "loss": 0.3503, "step": 4390 }, { "epoch": 0.6296243188987668, "grad_norm": 0.32026293873786926, "learning_rate": 9.636956798085667e-06, "loss": 0.3701, "step": 4391 }, { "epoch": 0.629767708632062, "grad_norm": 0.32749494910240173, "learning_rate": 9.636644649383266e-06, "loss": 0.3404, "step": 4392 }, { "epoch": 0.6299110983653571, "grad_norm": 0.31670814752578735, "learning_rate": 9.636332371603808e-06, "loss": 0.3332, "step": 4393 }, { "epoch": 0.6300544880986522, "grad_norm": 0.3370712399482727, "learning_rate": 9.636019964755993e-06, "loss": 0.3447, "step": 4394 }, { "epoch": 0.6301978778319473, "grad_norm": 0.3217771053314209, "learning_rate": 9.635707428848511e-06, "loss": 0.3137, "step": 4395 }, { "epoch": 0.6303412675652423, "grad_norm": 0.33172082901000977, "learning_rate": 9.635394763890066e-06, "loss": 0.3287, "step": 4396 }, { "epoch": 0.6304846572985374, "grad_norm": 0.33521395921707153, "learning_rate": 9.635081969889362e-06, "loss": 0.3499, "step": 4397 }, { "epoch": 0.6306280470318325, "grad_norm": 0.31507906317710876, "learning_rate": 9.634769046855106e-06, "loss": 0.3318, "step": 4398 }, { "epoch": 0.6307714367651276, "grad_norm": 0.3436123728752136, "learning_rate": 9.634455994796008e-06, "loss": 0.3426, "step": 4399 }, { "epoch": 0.6309148264984227, "grad_norm": 0.3662380874156952, "learning_rate": 9.634142813720784e-06, "loss": 0.3377, "step": 4400 }, { "epoch": 0.6310582162317178, "grad_norm": 0.31893080472946167, "learning_rate": 9.633829503638155e-06, "loss": 0.3389, "step": 4401 }, { "epoch": 0.631201605965013, "grad_norm": 0.3132559359073639, "learning_rate": 9.633516064556839e-06, "loss": 0.3424, "step": 4402 }, { "epoch": 0.631344995698308, "grad_norm": 0.33821555972099304, "learning_rate": 9.633202496485563e-06, "loss": 0.3434, "step": 4403 }, { "epoch": 0.6314883854316031, "grad_norm": 0.33163368701934814, "learning_rate": 9.63288879943306e-06, "loss": 0.3364, "step": 4404 }, { "epoch": 0.6316317751648982, "grad_norm": 0.3196810781955719, "learning_rate": 9.632574973408056e-06, "loss": 0.3209, "step": 4405 }, { "epoch": 0.6317751648981933, "grad_norm": 0.3447357714176178, "learning_rate": 9.632261018419291e-06, "loss": 0.3412, "step": 4406 }, { "epoch": 0.6319185546314884, "grad_norm": 0.3331955671310425, "learning_rate": 9.631946934475507e-06, "loss": 0.3604, "step": 4407 }, { "epoch": 0.6320619443647835, "grad_norm": 0.36815205216407776, "learning_rate": 9.631632721585444e-06, "loss": 0.3464, "step": 4408 }, { "epoch": 0.6322053340980786, "grad_norm": 0.33382201194763184, "learning_rate": 9.631318379757852e-06, "loss": 0.3561, "step": 4409 }, { "epoch": 0.6323487238313736, "grad_norm": 0.36201760172843933, "learning_rate": 9.631003909001481e-06, "loss": 0.3646, "step": 4410 }, { "epoch": 0.6324921135646687, "grad_norm": 0.39800846576690674, "learning_rate": 9.630689309325083e-06, "loss": 0.3431, "step": 4411 }, { "epoch": 0.6326355032979639, "grad_norm": 0.34119299054145813, "learning_rate": 9.63037458073742e-06, "loss": 0.32, "step": 4412 }, { "epoch": 0.632778893031259, "grad_norm": 0.3479185700416565, "learning_rate": 9.63005972324725e-06, "loss": 0.3388, "step": 4413 }, { "epoch": 0.6329222827645541, "grad_norm": 0.36187469959259033, "learning_rate": 9.62974473686334e-06, "loss": 0.371, "step": 4414 }, { "epoch": 0.6330656724978492, "grad_norm": 0.3582991659641266, "learning_rate": 9.62942962159446e-06, "loss": 0.3697, "step": 4415 }, { "epoch": 0.6332090622311443, "grad_norm": 0.35685431957244873, "learning_rate": 9.62911437744938e-06, "loss": 0.3238, "step": 4416 }, { "epoch": 0.6333524519644393, "grad_norm": 0.367367684841156, "learning_rate": 9.628799004436874e-06, "loss": 0.3563, "step": 4417 }, { "epoch": 0.6334958416977344, "grad_norm": 0.3153360188007355, "learning_rate": 9.628483502565728e-06, "loss": 0.3478, "step": 4418 }, { "epoch": 0.6336392314310295, "grad_norm": 0.34971198439598083, "learning_rate": 9.628167871844718e-06, "loss": 0.3492, "step": 4419 }, { "epoch": 0.6337826211643246, "grad_norm": 0.339580237865448, "learning_rate": 9.627852112282637e-06, "loss": 0.3478, "step": 4420 }, { "epoch": 0.6339260108976197, "grad_norm": 0.33308833837509155, "learning_rate": 9.62753622388827e-06, "loss": 0.3453, "step": 4421 }, { "epoch": 0.6340694006309149, "grad_norm": 0.34641650319099426, "learning_rate": 9.627220206670413e-06, "loss": 0.3222, "step": 4422 }, { "epoch": 0.63421279036421, "grad_norm": 0.3146747648715973, "learning_rate": 9.626904060637866e-06, "loss": 0.3529, "step": 4423 }, { "epoch": 0.634356180097505, "grad_norm": 0.34278494119644165, "learning_rate": 9.626587785799424e-06, "loss": 0.3466, "step": 4424 }, { "epoch": 0.6344995698308001, "grad_norm": 0.34481367468833923, "learning_rate": 9.626271382163897e-06, "loss": 0.3503, "step": 4425 }, { "epoch": 0.6346429595640952, "grad_norm": 0.33229631185531616, "learning_rate": 9.62595484974009e-06, "loss": 0.35, "step": 4426 }, { "epoch": 0.6347863492973903, "grad_norm": 0.351970374584198, "learning_rate": 9.625638188536817e-06, "loss": 0.3615, "step": 4427 }, { "epoch": 0.6349297390306854, "grad_norm": 0.32759982347488403, "learning_rate": 9.625321398562893e-06, "loss": 0.3344, "step": 4428 }, { "epoch": 0.6350731287639805, "grad_norm": 0.33890289068222046, "learning_rate": 9.625004479827135e-06, "loss": 0.3283, "step": 4429 }, { "epoch": 0.6352165184972756, "grad_norm": 0.30199792981147766, "learning_rate": 9.624687432338366e-06, "loss": 0.3558, "step": 4430 }, { "epoch": 0.6353599082305706, "grad_norm": 0.3589778542518616, "learning_rate": 9.624370256105416e-06, "loss": 0.3059, "step": 4431 }, { "epoch": 0.6355032979638657, "grad_norm": 0.3437421917915344, "learning_rate": 9.62405295113711e-06, "loss": 0.3373, "step": 4432 }, { "epoch": 0.6356466876971609, "grad_norm": 0.35106146335601807, "learning_rate": 9.623735517442283e-06, "loss": 0.3215, "step": 4433 }, { "epoch": 0.635790077430456, "grad_norm": 0.3427734971046448, "learning_rate": 9.623417955029772e-06, "loss": 0.3437, "step": 4434 }, { "epoch": 0.6359334671637511, "grad_norm": 0.37479373812675476, "learning_rate": 9.623100263908417e-06, "loss": 0.3419, "step": 4435 }, { "epoch": 0.6360768568970462, "grad_norm": 0.39062175154685974, "learning_rate": 9.622782444087061e-06, "loss": 0.3502, "step": 4436 }, { "epoch": 0.6362202466303413, "grad_norm": 0.3572677671909332, "learning_rate": 9.622464495574555e-06, "loss": 0.3774, "step": 4437 }, { "epoch": 0.6363636363636364, "grad_norm": 0.3654273450374603, "learning_rate": 9.622146418379749e-06, "loss": 0.3424, "step": 4438 }, { "epoch": 0.6365070260969314, "grad_norm": 0.3659156560897827, "learning_rate": 9.621828212511495e-06, "loss": 0.3408, "step": 4439 }, { "epoch": 0.6366504158302265, "grad_norm": 0.31871166825294495, "learning_rate": 9.621509877978655e-06, "loss": 0.318, "step": 4440 }, { "epoch": 0.6367938055635216, "grad_norm": 0.3601683974266052, "learning_rate": 9.621191414790087e-06, "loss": 0.3344, "step": 4441 }, { "epoch": 0.6369371952968167, "grad_norm": 0.39503955841064453, "learning_rate": 9.620872822954658e-06, "loss": 0.3663, "step": 4442 }, { "epoch": 0.6370805850301119, "grad_norm": 0.3138166069984436, "learning_rate": 9.62055410248124e-06, "loss": 0.342, "step": 4443 }, { "epoch": 0.637223974763407, "grad_norm": 0.3491644859313965, "learning_rate": 9.620235253378704e-06, "loss": 0.3636, "step": 4444 }, { "epoch": 0.6373673644967021, "grad_norm": 0.34997496008872986, "learning_rate": 9.619916275655925e-06, "loss": 0.3758, "step": 4445 }, { "epoch": 0.6375107542299971, "grad_norm": 0.32916972041130066, "learning_rate": 9.619597169321783e-06, "loss": 0.3283, "step": 4446 }, { "epoch": 0.6376541439632922, "grad_norm": 0.32575762271881104, "learning_rate": 9.619277934385163e-06, "loss": 0.3507, "step": 4447 }, { "epoch": 0.6377975336965873, "grad_norm": 0.37490588426589966, "learning_rate": 9.61895857085495e-06, "loss": 0.345, "step": 4448 }, { "epoch": 0.6379409234298824, "grad_norm": 0.3222721517086029, "learning_rate": 9.618639078740037e-06, "loss": 0.3419, "step": 4449 }, { "epoch": 0.6380843131631775, "grad_norm": 0.32756999135017395, "learning_rate": 9.618319458049317e-06, "loss": 0.3482, "step": 4450 }, { "epoch": 0.6382277028964726, "grad_norm": 0.34207722544670105, "learning_rate": 9.617999708791686e-06, "loss": 0.3445, "step": 4451 }, { "epoch": 0.6383710926297677, "grad_norm": 0.3473362624645233, "learning_rate": 9.617679830976047e-06, "loss": 0.3449, "step": 4452 }, { "epoch": 0.6385144823630629, "grad_norm": 0.3298107981681824, "learning_rate": 9.617359824611306e-06, "loss": 0.3264, "step": 4453 }, { "epoch": 0.6386578720963579, "grad_norm": 0.34102755784988403, "learning_rate": 9.61703968970637e-06, "loss": 0.3442, "step": 4454 }, { "epoch": 0.638801261829653, "grad_norm": 0.33010730147361755, "learning_rate": 9.616719426270152e-06, "loss": 0.3477, "step": 4455 }, { "epoch": 0.6389446515629481, "grad_norm": 0.33417046070098877, "learning_rate": 9.616399034311565e-06, "loss": 0.3705, "step": 4456 }, { "epoch": 0.6390880412962432, "grad_norm": 0.38003382086753845, "learning_rate": 9.616078513839533e-06, "loss": 0.3434, "step": 4457 }, { "epoch": 0.6392314310295383, "grad_norm": 0.3474103510379791, "learning_rate": 9.615757864862973e-06, "loss": 0.3454, "step": 4458 }, { "epoch": 0.6393748207628334, "grad_norm": 0.3389139771461487, "learning_rate": 9.615437087390816e-06, "loss": 0.3462, "step": 4459 }, { "epoch": 0.6395182104961284, "grad_norm": 0.3718365430831909, "learning_rate": 9.615116181431992e-06, "loss": 0.3564, "step": 4460 }, { "epoch": 0.6396616002294235, "grad_norm": 0.31306320428848267, "learning_rate": 9.61479514699543e-06, "loss": 0.3383, "step": 4461 }, { "epoch": 0.6398049899627186, "grad_norm": 0.367482990026474, "learning_rate": 9.614473984090071e-06, "loss": 0.3641, "step": 4462 }, { "epoch": 0.6399483796960138, "grad_norm": 0.34778934717178345, "learning_rate": 9.614152692724854e-06, "loss": 0.369, "step": 4463 }, { "epoch": 0.6400917694293089, "grad_norm": 0.3219737708568573, "learning_rate": 9.613831272908726e-06, "loss": 0.3366, "step": 4464 }, { "epoch": 0.640235159162604, "grad_norm": 0.37321949005126953, "learning_rate": 9.613509724650632e-06, "loss": 0.3433, "step": 4465 }, { "epoch": 0.6403785488958991, "grad_norm": 0.3189009726047516, "learning_rate": 9.613188047959522e-06, "loss": 0.3418, "step": 4466 }, { "epoch": 0.6405219386291942, "grad_norm": 0.33165401220321655, "learning_rate": 9.612866242844357e-06, "loss": 0.3332, "step": 4467 }, { "epoch": 0.6406653283624892, "grad_norm": 0.33128491044044495, "learning_rate": 9.61254430931409e-06, "loss": 0.3351, "step": 4468 }, { "epoch": 0.6408087180957843, "grad_norm": 0.3245438039302826, "learning_rate": 9.612222247377683e-06, "loss": 0.3234, "step": 4469 }, { "epoch": 0.6409521078290794, "grad_norm": 0.3405540883541107, "learning_rate": 9.611900057044105e-06, "loss": 0.3322, "step": 4470 }, { "epoch": 0.6410954975623745, "grad_norm": 0.34747958183288574, "learning_rate": 9.611577738322325e-06, "loss": 0.3495, "step": 4471 }, { "epoch": 0.6412388872956696, "grad_norm": 0.3087257146835327, "learning_rate": 9.611255291221312e-06, "loss": 0.3256, "step": 4472 }, { "epoch": 0.6413822770289648, "grad_norm": 0.349147766828537, "learning_rate": 9.610932715750048e-06, "loss": 0.3379, "step": 4473 }, { "epoch": 0.6415256667622599, "grad_norm": 0.320499449968338, "learning_rate": 9.610610011917508e-06, "loss": 0.3595, "step": 4474 }, { "epoch": 0.641669056495555, "grad_norm": 0.349936842918396, "learning_rate": 9.610287179732678e-06, "loss": 0.3352, "step": 4475 }, { "epoch": 0.64181244622885, "grad_norm": 0.3446026146411896, "learning_rate": 9.609964219204547e-06, "loss": 0.3437, "step": 4476 }, { "epoch": 0.6419558359621451, "grad_norm": 0.3469204604625702, "learning_rate": 9.6096411303421e-06, "loss": 0.3617, "step": 4477 }, { "epoch": 0.6420992256954402, "grad_norm": 0.35035818815231323, "learning_rate": 9.609317913154338e-06, "loss": 0.3444, "step": 4478 }, { "epoch": 0.6422426154287353, "grad_norm": 0.320620596408844, "learning_rate": 9.608994567650254e-06, "loss": 0.3361, "step": 4479 }, { "epoch": 0.6423860051620304, "grad_norm": 0.32280832529067993, "learning_rate": 9.608671093838852e-06, "loss": 0.3457, "step": 4480 }, { "epoch": 0.6425293948953255, "grad_norm": 0.3461480736732483, "learning_rate": 9.608347491729136e-06, "loss": 0.3601, "step": 4481 }, { "epoch": 0.6426727846286205, "grad_norm": 0.35104113817214966, "learning_rate": 9.608023761330114e-06, "loss": 0.3334, "step": 4482 }, { "epoch": 0.6428161743619157, "grad_norm": 0.38532447814941406, "learning_rate": 9.607699902650801e-06, "loss": 0.368, "step": 4483 }, { "epoch": 0.6429595640952108, "grad_norm": 0.31680208444595337, "learning_rate": 9.60737591570021e-06, "loss": 0.3305, "step": 4484 }, { "epoch": 0.6431029538285059, "grad_norm": 0.37201884388923645, "learning_rate": 9.60705180048736e-06, "loss": 0.3634, "step": 4485 }, { "epoch": 0.643246343561801, "grad_norm": 0.3258952498435974, "learning_rate": 9.606727557021276e-06, "loss": 0.3368, "step": 4486 }, { "epoch": 0.6433897332950961, "grad_norm": 0.34208613634109497, "learning_rate": 9.606403185310981e-06, "loss": 0.3254, "step": 4487 }, { "epoch": 0.6435331230283912, "grad_norm": 0.3614274263381958, "learning_rate": 9.606078685365509e-06, "loss": 0.3432, "step": 4488 }, { "epoch": 0.6436765127616862, "grad_norm": 0.33083733916282654, "learning_rate": 9.605754057193892e-06, "loss": 0.3563, "step": 4489 }, { "epoch": 0.6438199024949813, "grad_norm": 0.3746029734611511, "learning_rate": 9.605429300805166e-06, "loss": 0.3449, "step": 4490 }, { "epoch": 0.6439632922282764, "grad_norm": 0.3980678915977478, "learning_rate": 9.605104416208375e-06, "loss": 0.3605, "step": 4491 }, { "epoch": 0.6441066819615715, "grad_norm": 0.30632856488227844, "learning_rate": 9.604779403412559e-06, "loss": 0.3193, "step": 4492 }, { "epoch": 0.6442500716948667, "grad_norm": 0.3461783528327942, "learning_rate": 9.604454262426769e-06, "loss": 0.3373, "step": 4493 }, { "epoch": 0.6443934614281618, "grad_norm": 0.3537108302116394, "learning_rate": 9.604128993260055e-06, "loss": 0.335, "step": 4494 }, { "epoch": 0.6445368511614569, "grad_norm": 0.3237949013710022, "learning_rate": 9.603803595921471e-06, "loss": 0.3241, "step": 4495 }, { "epoch": 0.644680240894752, "grad_norm": 0.3172100782394409, "learning_rate": 9.603478070420078e-06, "loss": 0.3456, "step": 4496 }, { "epoch": 0.644823630628047, "grad_norm": 0.33673179149627686, "learning_rate": 9.603152416764939e-06, "loss": 0.345, "step": 4497 }, { "epoch": 0.6449670203613421, "grad_norm": 0.3398747146129608, "learning_rate": 9.602826634965115e-06, "loss": 0.3206, "step": 4498 }, { "epoch": 0.6451104100946372, "grad_norm": 0.3161279559135437, "learning_rate": 9.602500725029677e-06, "loss": 0.3307, "step": 4499 }, { "epoch": 0.6452537998279323, "grad_norm": 0.338354229927063, "learning_rate": 9.602174686967701e-06, "loss": 0.349, "step": 4500 }, { "epoch": 0.6453971895612274, "grad_norm": 0.34348198771476746, "learning_rate": 9.60184852078826e-06, "loss": 0.33, "step": 4501 }, { "epoch": 0.6455405792945225, "grad_norm": 0.3915676176548004, "learning_rate": 9.601522226500437e-06, "loss": 0.3387, "step": 4502 }, { "epoch": 0.6456839690278177, "grad_norm": 0.3494124710559845, "learning_rate": 9.601195804113312e-06, "loss": 0.3678, "step": 4503 }, { "epoch": 0.6458273587611127, "grad_norm": 0.312519907951355, "learning_rate": 9.600869253635974e-06, "loss": 0.3407, "step": 4504 }, { "epoch": 0.6459707484944078, "grad_norm": 0.3573874235153198, "learning_rate": 9.600542575077514e-06, "loss": 0.3488, "step": 4505 }, { "epoch": 0.6461141382277029, "grad_norm": 0.3443455994129181, "learning_rate": 9.600215768447025e-06, "loss": 0.3258, "step": 4506 }, { "epoch": 0.646257527960998, "grad_norm": 0.36206620931625366, "learning_rate": 9.599888833753605e-06, "loss": 0.3175, "step": 4507 }, { "epoch": 0.6464009176942931, "grad_norm": 0.31369009613990784, "learning_rate": 9.599561771006357e-06, "loss": 0.3327, "step": 4508 }, { "epoch": 0.6465443074275882, "grad_norm": 0.3626684248447418, "learning_rate": 9.599234580214385e-06, "loss": 0.3513, "step": 4509 }, { "epoch": 0.6466876971608833, "grad_norm": 0.37607407569885254, "learning_rate": 9.598907261386796e-06, "loss": 0.3319, "step": 4510 }, { "epoch": 0.6468310868941783, "grad_norm": 0.3232974112033844, "learning_rate": 9.598579814532706e-06, "loss": 0.3315, "step": 4511 }, { "epoch": 0.6469744766274734, "grad_norm": 0.35437285900115967, "learning_rate": 9.598252239661223e-06, "loss": 0.3442, "step": 4512 }, { "epoch": 0.6471178663607686, "grad_norm": 0.3322053849697113, "learning_rate": 9.597924536781474e-06, "loss": 0.3403, "step": 4513 }, { "epoch": 0.6472612560940637, "grad_norm": 0.35003405809402466, "learning_rate": 9.59759670590258e-06, "loss": 0.3515, "step": 4514 }, { "epoch": 0.6474046458273588, "grad_norm": 0.3262450695037842, "learning_rate": 9.597268747033663e-06, "loss": 0.3421, "step": 4515 }, { "epoch": 0.6475480355606539, "grad_norm": 0.33310988545417786, "learning_rate": 9.59694066018386e-06, "loss": 0.3491, "step": 4516 }, { "epoch": 0.647691425293949, "grad_norm": 0.3507440388202667, "learning_rate": 9.596612445362297e-06, "loss": 0.327, "step": 4517 }, { "epoch": 0.647834815027244, "grad_norm": 0.36172211170196533, "learning_rate": 9.596284102578115e-06, "loss": 0.3206, "step": 4518 }, { "epoch": 0.6479782047605391, "grad_norm": 0.3138893246650696, "learning_rate": 9.595955631840454e-06, "loss": 0.3363, "step": 4519 }, { "epoch": 0.6481215944938342, "grad_norm": 0.341119647026062, "learning_rate": 9.59562703315846e-06, "loss": 0.3405, "step": 4520 }, { "epoch": 0.6482649842271293, "grad_norm": 0.3557675778865814, "learning_rate": 9.595298306541277e-06, "loss": 0.3388, "step": 4521 }, { "epoch": 0.6484083739604244, "grad_norm": 0.315584659576416, "learning_rate": 9.594969451998059e-06, "loss": 0.3214, "step": 4522 }, { "epoch": 0.6485517636937196, "grad_norm": 0.36153537034988403, "learning_rate": 9.59464046953796e-06, "loss": 0.3714, "step": 4523 }, { "epoch": 0.6486951534270147, "grad_norm": 0.4096900224685669, "learning_rate": 9.594311359170138e-06, "loss": 0.3446, "step": 4524 }, { "epoch": 0.6488385431603098, "grad_norm": 0.3281312882900238, "learning_rate": 9.593982120903754e-06, "loss": 0.3358, "step": 4525 }, { "epoch": 0.6489819328936048, "grad_norm": 0.3335077166557312, "learning_rate": 9.593652754747975e-06, "loss": 0.3216, "step": 4526 }, { "epoch": 0.6491253226268999, "grad_norm": 0.39260005950927734, "learning_rate": 9.59332326071197e-06, "loss": 0.385, "step": 4527 }, { "epoch": 0.649268712360195, "grad_norm": 0.34186989068984985, "learning_rate": 9.592993638804912e-06, "loss": 0.3223, "step": 4528 }, { "epoch": 0.6494121020934901, "grad_norm": 0.36629828810691833, "learning_rate": 9.592663889035975e-06, "loss": 0.3336, "step": 4529 }, { "epoch": 0.6495554918267852, "grad_norm": 0.3623661994934082, "learning_rate": 9.592334011414341e-06, "loss": 0.3212, "step": 4530 }, { "epoch": 0.6496988815600803, "grad_norm": 0.3528212308883667, "learning_rate": 9.592004005949193e-06, "loss": 0.348, "step": 4531 }, { "epoch": 0.6498422712933754, "grad_norm": 0.34919625520706177, "learning_rate": 9.591673872649717e-06, "loss": 0.3507, "step": 4532 }, { "epoch": 0.6499856610266704, "grad_norm": 0.32658055424690247, "learning_rate": 9.591343611525103e-06, "loss": 0.3521, "step": 4533 }, { "epoch": 0.6501290507599656, "grad_norm": 0.366763174533844, "learning_rate": 9.591013222584547e-06, "loss": 0.3118, "step": 4534 }, { "epoch": 0.6502724404932607, "grad_norm": 0.34018954634666443, "learning_rate": 9.590682705837244e-06, "loss": 0.331, "step": 4535 }, { "epoch": 0.6504158302265558, "grad_norm": 0.3388351798057556, "learning_rate": 9.590352061292396e-06, "loss": 0.3337, "step": 4536 }, { "epoch": 0.6505592199598509, "grad_norm": 0.3543311655521393, "learning_rate": 9.590021288959209e-06, "loss": 0.35, "step": 4537 }, { "epoch": 0.650702609693146, "grad_norm": 0.3442973792552948, "learning_rate": 9.58969038884689e-06, "loss": 0.3261, "step": 4538 }, { "epoch": 0.6508459994264411, "grad_norm": 0.372959703207016, "learning_rate": 9.58935936096465e-06, "loss": 0.345, "step": 4539 }, { "epoch": 0.6509893891597361, "grad_norm": 0.30880481004714966, "learning_rate": 9.589028205321705e-06, "loss": 0.3458, "step": 4540 }, { "epoch": 0.6511327788930312, "grad_norm": 0.370486855506897, "learning_rate": 9.588696921927274e-06, "loss": 0.3452, "step": 4541 }, { "epoch": 0.6512761686263263, "grad_norm": 0.3455713093280792, "learning_rate": 9.588365510790582e-06, "loss": 0.3379, "step": 4542 }, { "epoch": 0.6514195583596214, "grad_norm": 0.327099084854126, "learning_rate": 9.58803397192085e-06, "loss": 0.3439, "step": 4543 }, { "epoch": 0.6515629480929166, "grad_norm": 0.32535022497177124, "learning_rate": 9.58770230532731e-06, "loss": 0.3462, "step": 4544 }, { "epoch": 0.6517063378262117, "grad_norm": 0.3619349002838135, "learning_rate": 9.587370511019196e-06, "loss": 0.3369, "step": 4545 }, { "epoch": 0.6518497275595068, "grad_norm": 0.3440040349960327, "learning_rate": 9.587038589005741e-06, "loss": 0.3455, "step": 4546 }, { "epoch": 0.6519931172928018, "grad_norm": 0.3629455268383026, "learning_rate": 9.586706539296191e-06, "loss": 0.3266, "step": 4547 }, { "epoch": 0.6521365070260969, "grad_norm": 0.34152379631996155, "learning_rate": 9.586374361899786e-06, "loss": 0.3358, "step": 4548 }, { "epoch": 0.652279896759392, "grad_norm": 0.4052877426147461, "learning_rate": 9.586042056825773e-06, "loss": 0.3526, "step": 4549 }, { "epoch": 0.6524232864926871, "grad_norm": 0.36487436294555664, "learning_rate": 9.585709624083405e-06, "loss": 0.3407, "step": 4550 }, { "epoch": 0.6525666762259822, "grad_norm": 0.4250762462615967, "learning_rate": 9.585377063681933e-06, "loss": 0.3534, "step": 4551 }, { "epoch": 0.6527100659592773, "grad_norm": 0.38380733132362366, "learning_rate": 9.585044375630619e-06, "loss": 0.3343, "step": 4552 }, { "epoch": 0.6528534556925724, "grad_norm": 0.3984414041042328, "learning_rate": 9.584711559938723e-06, "loss": 0.3377, "step": 4553 }, { "epoch": 0.6529968454258676, "grad_norm": 0.30595582723617554, "learning_rate": 9.584378616615509e-06, "loss": 0.3428, "step": 4554 }, { "epoch": 0.6531402351591626, "grad_norm": 0.32911697030067444, "learning_rate": 9.584045545670246e-06, "loss": 0.3177, "step": 4555 }, { "epoch": 0.6532836248924577, "grad_norm": 0.3232525587081909, "learning_rate": 9.583712347112208e-06, "loss": 0.3586, "step": 4556 }, { "epoch": 0.6534270146257528, "grad_norm": 0.36489924788475037, "learning_rate": 9.583379020950668e-06, "loss": 0.3499, "step": 4557 }, { "epoch": 0.6535704043590479, "grad_norm": 0.31934335827827454, "learning_rate": 9.583045567194908e-06, "loss": 0.3304, "step": 4558 }, { "epoch": 0.653713794092343, "grad_norm": 0.358833372592926, "learning_rate": 9.582711985854208e-06, "loss": 0.3371, "step": 4559 }, { "epoch": 0.6538571838256381, "grad_norm": 0.36287111043930054, "learning_rate": 9.582378276937856e-06, "loss": 0.3568, "step": 4560 }, { "epoch": 0.6540005735589332, "grad_norm": 0.319770485162735, "learning_rate": 9.582044440455143e-06, "loss": 0.3374, "step": 4561 }, { "epoch": 0.6541439632922282, "grad_norm": 0.3465580642223358, "learning_rate": 9.58171047641536e-06, "loss": 0.337, "step": 4562 }, { "epoch": 0.6542873530255233, "grad_norm": 0.3497934341430664, "learning_rate": 9.581376384827805e-06, "loss": 0.3496, "step": 4563 }, { "epoch": 0.6544307427588185, "grad_norm": 0.31152525544166565, "learning_rate": 9.581042165701781e-06, "loss": 0.3367, "step": 4564 }, { "epoch": 0.6545741324921136, "grad_norm": 0.31645530462265015, "learning_rate": 9.580707819046588e-06, "loss": 0.3355, "step": 4565 }, { "epoch": 0.6547175222254087, "grad_norm": 0.3365459740161896, "learning_rate": 9.580373344871536e-06, "loss": 0.3283, "step": 4566 }, { "epoch": 0.6548609119587038, "grad_norm": 0.32901230454444885, "learning_rate": 9.580038743185938e-06, "loss": 0.3369, "step": 4567 }, { "epoch": 0.6550043016919989, "grad_norm": 0.3394016623497009, "learning_rate": 9.579704013999105e-06, "loss": 0.3677, "step": 4568 }, { "epoch": 0.6551476914252939, "grad_norm": 0.32891449332237244, "learning_rate": 9.579369157320358e-06, "loss": 0.3442, "step": 4569 }, { "epoch": 0.655291081158589, "grad_norm": 0.33891424536705017, "learning_rate": 9.579034173159017e-06, "loss": 0.338, "step": 4570 }, { "epoch": 0.6554344708918841, "grad_norm": 0.31001871824264526, "learning_rate": 9.578699061524408e-06, "loss": 0.3347, "step": 4571 }, { "epoch": 0.6555778606251792, "grad_norm": 0.3308751881122589, "learning_rate": 9.578363822425863e-06, "loss": 0.3288, "step": 4572 }, { "epoch": 0.6557212503584743, "grad_norm": 0.35994911193847656, "learning_rate": 9.578028455872709e-06, "loss": 0.3541, "step": 4573 }, { "epoch": 0.6558646400917695, "grad_norm": 0.3213261067867279, "learning_rate": 9.577692961874287e-06, "loss": 0.3476, "step": 4574 }, { "epoch": 0.6560080298250646, "grad_norm": 0.31404125690460205, "learning_rate": 9.577357340439934e-06, "loss": 0.3522, "step": 4575 }, { "epoch": 0.6561514195583596, "grad_norm": 0.3453184962272644, "learning_rate": 9.577021591578995e-06, "loss": 0.3368, "step": 4576 }, { "epoch": 0.6562948092916547, "grad_norm": 0.3180348873138428, "learning_rate": 9.576685715300815e-06, "loss": 0.348, "step": 4577 }, { "epoch": 0.6564381990249498, "grad_norm": 0.31761670112609863, "learning_rate": 9.576349711614745e-06, "loss": 0.3236, "step": 4578 }, { "epoch": 0.6565815887582449, "grad_norm": 0.3353715240955353, "learning_rate": 9.57601358053014e-06, "loss": 0.3301, "step": 4579 }, { "epoch": 0.65672497849154, "grad_norm": 0.3470332622528076, "learning_rate": 9.575677322056354e-06, "loss": 0.3444, "step": 4580 }, { "epoch": 0.6568683682248351, "grad_norm": 0.32043129205703735, "learning_rate": 9.57534093620275e-06, "loss": 0.3231, "step": 4581 }, { "epoch": 0.6570117579581302, "grad_norm": 0.34303292632102966, "learning_rate": 9.575004422978694e-06, "loss": 0.3583, "step": 4582 }, { "epoch": 0.6571551476914252, "grad_norm": 0.3347238302230835, "learning_rate": 9.574667782393552e-06, "loss": 0.3384, "step": 4583 }, { "epoch": 0.6572985374247204, "grad_norm": 0.3317907452583313, "learning_rate": 9.574331014456696e-06, "loss": 0.3314, "step": 4584 }, { "epoch": 0.6574419271580155, "grad_norm": 0.3356834948062897, "learning_rate": 9.5739941191775e-06, "loss": 0.3512, "step": 4585 }, { "epoch": 0.6575853168913106, "grad_norm": 0.340043306350708, "learning_rate": 9.573657096565348e-06, "loss": 0.3422, "step": 4586 }, { "epoch": 0.6577287066246057, "grad_norm": 0.32289832830429077, "learning_rate": 9.573319946629614e-06, "loss": 0.3442, "step": 4587 }, { "epoch": 0.6578720963579008, "grad_norm": 0.3633677661418915, "learning_rate": 9.57298266937969e-06, "loss": 0.3618, "step": 4588 }, { "epoch": 0.6580154860911959, "grad_norm": 0.3647274374961853, "learning_rate": 9.572645264824961e-06, "loss": 0.328, "step": 4589 }, { "epoch": 0.658158875824491, "grad_norm": 0.31038954854011536, "learning_rate": 9.572307732974823e-06, "loss": 0.3395, "step": 4590 }, { "epoch": 0.658302265557786, "grad_norm": 0.3281446099281311, "learning_rate": 9.571970073838671e-06, "loss": 0.3499, "step": 4591 }, { "epoch": 0.6584456552910811, "grad_norm": 0.3158714175224304, "learning_rate": 9.571632287425905e-06, "loss": 0.328, "step": 4592 }, { "epoch": 0.6585890450243762, "grad_norm": 0.308998167514801, "learning_rate": 9.57129437374593e-06, "loss": 0.3331, "step": 4593 }, { "epoch": 0.6587324347576714, "grad_norm": 0.3212130069732666, "learning_rate": 9.57095633280815e-06, "loss": 0.3366, "step": 4594 }, { "epoch": 0.6588758244909665, "grad_norm": 0.3401208221912384, "learning_rate": 9.570618164621977e-06, "loss": 0.3402, "step": 4595 }, { "epoch": 0.6590192142242616, "grad_norm": 0.31622645258903503, "learning_rate": 9.570279869196826e-06, "loss": 0.3438, "step": 4596 }, { "epoch": 0.6591626039575567, "grad_norm": 0.31651514768600464, "learning_rate": 9.569941446542114e-06, "loss": 0.3517, "step": 4597 }, { "epoch": 0.6593059936908517, "grad_norm": 0.3534564673900604, "learning_rate": 9.569602896667263e-06, "loss": 0.3316, "step": 4598 }, { "epoch": 0.6594493834241468, "grad_norm": 0.32265540957450867, "learning_rate": 9.569264219581696e-06, "loss": 0.3232, "step": 4599 }, { "epoch": 0.6595927731574419, "grad_norm": 0.35210007429122925, "learning_rate": 9.56892541529484e-06, "loss": 0.3385, "step": 4600 }, { "epoch": 0.659736162890737, "grad_norm": 0.2999691367149353, "learning_rate": 9.568586483816131e-06, "loss": 0.3436, "step": 4601 }, { "epoch": 0.6598795526240321, "grad_norm": 0.33381086587905884, "learning_rate": 9.568247425155e-06, "loss": 0.3459, "step": 4602 }, { "epoch": 0.6600229423573272, "grad_norm": 0.32578200101852417, "learning_rate": 9.56790823932089e-06, "loss": 0.3347, "step": 4603 }, { "epoch": 0.6601663320906224, "grad_norm": 0.339430570602417, "learning_rate": 9.567568926323241e-06, "loss": 0.318, "step": 4604 }, { "epoch": 0.6603097218239175, "grad_norm": 0.3531012535095215, "learning_rate": 9.567229486171499e-06, "loss": 0.3326, "step": 4605 }, { "epoch": 0.6604531115572125, "grad_norm": 0.3198702037334442, "learning_rate": 9.566889918875114e-06, "loss": 0.3577, "step": 4606 }, { "epoch": 0.6605965012905076, "grad_norm": 0.3307073712348938, "learning_rate": 9.566550224443539e-06, "loss": 0.3252, "step": 4607 }, { "epoch": 0.6607398910238027, "grad_norm": 0.3919183313846588, "learning_rate": 9.56621040288623e-06, "loss": 0.3523, "step": 4608 }, { "epoch": 0.6608832807570978, "grad_norm": 0.3346807062625885, "learning_rate": 9.565870454212647e-06, "loss": 0.3574, "step": 4609 }, { "epoch": 0.6610266704903929, "grad_norm": 0.34495145082473755, "learning_rate": 9.565530378432254e-06, "loss": 0.3456, "step": 4610 }, { "epoch": 0.661170060223688, "grad_norm": 0.3382564187049866, "learning_rate": 9.56519017555452e-06, "loss": 0.3579, "step": 4611 }, { "epoch": 0.661313449956983, "grad_norm": 0.35096660256385803, "learning_rate": 9.564849845588913e-06, "loss": 0.3476, "step": 4612 }, { "epoch": 0.6614568396902781, "grad_norm": 0.3619763255119324, "learning_rate": 9.564509388544908e-06, "loss": 0.3434, "step": 4613 }, { "epoch": 0.6616002294235733, "grad_norm": 0.34537938237190247, "learning_rate": 9.564168804431984e-06, "loss": 0.3138, "step": 4614 }, { "epoch": 0.6617436191568684, "grad_norm": 0.3506176769733429, "learning_rate": 9.563828093259621e-06, "loss": 0.3449, "step": 4615 }, { "epoch": 0.6618870088901635, "grad_norm": 0.36646297574043274, "learning_rate": 9.563487255037304e-06, "loss": 0.3277, "step": 4616 }, { "epoch": 0.6620303986234586, "grad_norm": 0.3475499153137207, "learning_rate": 9.563146289774522e-06, "loss": 0.3274, "step": 4617 }, { "epoch": 0.6621737883567537, "grad_norm": 0.31626462936401367, "learning_rate": 9.56280519748077e-06, "loss": 0.344, "step": 4618 }, { "epoch": 0.6623171780900488, "grad_norm": 0.38369956612586975, "learning_rate": 9.562463978165536e-06, "loss": 0.3436, "step": 4619 }, { "epoch": 0.6624605678233438, "grad_norm": 0.3466903269290924, "learning_rate": 9.562122631838325e-06, "loss": 0.3483, "step": 4620 }, { "epoch": 0.6626039575566389, "grad_norm": 0.31690216064453125, "learning_rate": 9.561781158508637e-06, "loss": 0.3362, "step": 4621 }, { "epoch": 0.662747347289934, "grad_norm": 0.32240960001945496, "learning_rate": 9.561439558185979e-06, "loss": 0.3403, "step": 4622 }, { "epoch": 0.6628907370232291, "grad_norm": 0.34082189202308655, "learning_rate": 9.56109783087986e-06, "loss": 0.3458, "step": 4623 }, { "epoch": 0.6630341267565242, "grad_norm": 0.3389882445335388, "learning_rate": 9.560755976599795e-06, "loss": 0.339, "step": 4624 }, { "epoch": 0.6631775164898194, "grad_norm": 0.3075895607471466, "learning_rate": 9.560413995355299e-06, "loss": 0.3483, "step": 4625 }, { "epoch": 0.6633209062231145, "grad_norm": 0.33506011962890625, "learning_rate": 9.560071887155893e-06, "loss": 0.343, "step": 4626 }, { "epoch": 0.6634642959564095, "grad_norm": 0.34422919154167175, "learning_rate": 9.559729652011098e-06, "loss": 0.3705, "step": 4627 }, { "epoch": 0.6636076856897046, "grad_norm": 0.34217801690101624, "learning_rate": 9.559387289930446e-06, "loss": 0.3425, "step": 4628 }, { "epoch": 0.6637510754229997, "grad_norm": 0.3567781448364258, "learning_rate": 9.559044800923465e-06, "loss": 0.3532, "step": 4629 }, { "epoch": 0.6638944651562948, "grad_norm": 0.35000333189964294, "learning_rate": 9.55870218499969e-06, "loss": 0.3382, "step": 4630 }, { "epoch": 0.6640378548895899, "grad_norm": 0.36215081810951233, "learning_rate": 9.558359442168659e-06, "loss": 0.3328, "step": 4631 }, { "epoch": 0.664181244622885, "grad_norm": 0.4131838381290436, "learning_rate": 9.558016572439912e-06, "loss": 0.3482, "step": 4632 }, { "epoch": 0.66432463435618, "grad_norm": 0.3153582215309143, "learning_rate": 9.557673575822997e-06, "loss": 0.3421, "step": 4633 }, { "epoch": 0.6644680240894751, "grad_norm": 0.3238140046596527, "learning_rate": 9.557330452327459e-06, "loss": 0.3394, "step": 4634 }, { "epoch": 0.6646114138227703, "grad_norm": 0.3672997057437897, "learning_rate": 9.556987201962853e-06, "loss": 0.3751, "step": 4635 }, { "epoch": 0.6647548035560654, "grad_norm": 0.3261988162994385, "learning_rate": 9.556643824738732e-06, "loss": 0.351, "step": 4636 }, { "epoch": 0.6648981932893605, "grad_norm": 0.3563147187232971, "learning_rate": 9.556300320664655e-06, "loss": 0.3269, "step": 4637 }, { "epoch": 0.6650415830226556, "grad_norm": 0.3375175893306732, "learning_rate": 9.55595668975019e-06, "loss": 0.3226, "step": 4638 }, { "epoch": 0.6651849727559507, "grad_norm": 0.33219537138938904, "learning_rate": 9.555612932004897e-06, "loss": 0.3482, "step": 4639 }, { "epoch": 0.6653283624892458, "grad_norm": 0.3739750385284424, "learning_rate": 9.555269047438346e-06, "loss": 0.348, "step": 4640 }, { "epoch": 0.6654717522225408, "grad_norm": 0.3167709708213806, "learning_rate": 9.554925036060114e-06, "loss": 0.3233, "step": 4641 }, { "epoch": 0.6656151419558359, "grad_norm": 0.35400089621543884, "learning_rate": 9.554580897879776e-06, "loss": 0.3331, "step": 4642 }, { "epoch": 0.665758531689131, "grad_norm": 0.36666733026504517, "learning_rate": 9.554236632906912e-06, "loss": 0.3461, "step": 4643 }, { "epoch": 0.6659019214224261, "grad_norm": 0.36433807015419006, "learning_rate": 9.553892241151107e-06, "loss": 0.3547, "step": 4644 }, { "epoch": 0.6660453111557213, "grad_norm": 0.35957735776901245, "learning_rate": 9.553547722621943e-06, "loss": 0.3432, "step": 4645 }, { "epoch": 0.6661887008890164, "grad_norm": 0.34545913338661194, "learning_rate": 9.55320307732902e-06, "loss": 0.3424, "step": 4646 }, { "epoch": 0.6663320906223115, "grad_norm": 0.357211172580719, "learning_rate": 9.552858305281924e-06, "loss": 0.3454, "step": 4647 }, { "epoch": 0.6664754803556066, "grad_norm": 0.32701966166496277, "learning_rate": 9.552513406490259e-06, "loss": 0.352, "step": 4648 }, { "epoch": 0.6666188700889016, "grad_norm": 0.35262301564216614, "learning_rate": 9.552168380963623e-06, "loss": 0.3312, "step": 4649 }, { "epoch": 0.6667622598221967, "grad_norm": 0.341509073972702, "learning_rate": 9.55182322871162e-06, "loss": 0.3598, "step": 4650 }, { "epoch": 0.6669056495554918, "grad_norm": 0.34027737379074097, "learning_rate": 9.551477949743861e-06, "loss": 0.3303, "step": 4651 }, { "epoch": 0.6670490392887869, "grad_norm": 0.3426494896411896, "learning_rate": 9.551132544069958e-06, "loss": 0.346, "step": 4652 }, { "epoch": 0.667192429022082, "grad_norm": 0.3536924123764038, "learning_rate": 9.550787011699527e-06, "loss": 0.3836, "step": 4653 }, { "epoch": 0.6673358187553771, "grad_norm": 0.3278310000896454, "learning_rate": 9.550441352642184e-06, "loss": 0.3463, "step": 4654 }, { "epoch": 0.6674792084886723, "grad_norm": 0.30352723598480225, "learning_rate": 9.550095566907556e-06, "loss": 0.3146, "step": 4655 }, { "epoch": 0.6676225982219673, "grad_norm": 0.3471587598323822, "learning_rate": 9.549749654505265e-06, "loss": 0.3304, "step": 4656 }, { "epoch": 0.6677659879552624, "grad_norm": 0.2975069284439087, "learning_rate": 9.549403615444943e-06, "loss": 0.3329, "step": 4657 }, { "epoch": 0.6679093776885575, "grad_norm": 0.3208134174346924, "learning_rate": 9.549057449736221e-06, "loss": 0.3205, "step": 4658 }, { "epoch": 0.6680527674218526, "grad_norm": 0.3379124104976654, "learning_rate": 9.54871115738874e-06, "loss": 0.3643, "step": 4659 }, { "epoch": 0.6681961571551477, "grad_norm": 0.3250288963317871, "learning_rate": 9.548364738412135e-06, "loss": 0.3304, "step": 4660 }, { "epoch": 0.6683395468884428, "grad_norm": 0.34735190868377686, "learning_rate": 9.548018192816056e-06, "loss": 0.3391, "step": 4661 }, { "epoch": 0.6684829366217379, "grad_norm": 0.35755783319473267, "learning_rate": 9.547671520610143e-06, "loss": 0.3401, "step": 4662 }, { "epoch": 0.6686263263550329, "grad_norm": 0.33865872025489807, "learning_rate": 9.547324721804052e-06, "loss": 0.3534, "step": 4663 }, { "epoch": 0.668769716088328, "grad_norm": 0.3362581133842468, "learning_rate": 9.546977796407437e-06, "loss": 0.3435, "step": 4664 }, { "epoch": 0.6689131058216232, "grad_norm": 0.3594667613506317, "learning_rate": 9.546630744429952e-06, "loss": 0.3402, "step": 4665 }, { "epoch": 0.6690564955549183, "grad_norm": 0.33668243885040283, "learning_rate": 9.546283565881264e-06, "loss": 0.3124, "step": 4666 }, { "epoch": 0.6691998852882134, "grad_norm": 0.3523906171321869, "learning_rate": 9.545936260771034e-06, "loss": 0.3571, "step": 4667 }, { "epoch": 0.6693432750215085, "grad_norm": 0.3412260115146637, "learning_rate": 9.54558882910893e-06, "loss": 0.3544, "step": 4668 }, { "epoch": 0.6694866647548036, "grad_norm": 0.3207428455352783, "learning_rate": 9.545241270904627e-06, "loss": 0.3421, "step": 4669 }, { "epoch": 0.6696300544880986, "grad_norm": 0.34073585271835327, "learning_rate": 9.544893586167799e-06, "loss": 0.3084, "step": 4670 }, { "epoch": 0.6697734442213937, "grad_norm": 0.3291623294353485, "learning_rate": 9.544545774908122e-06, "loss": 0.3454, "step": 4671 }, { "epoch": 0.6699168339546888, "grad_norm": 0.33292460441589355, "learning_rate": 9.544197837135285e-06, "loss": 0.3352, "step": 4672 }, { "epoch": 0.6700602236879839, "grad_norm": 0.35817429423332214, "learning_rate": 9.543849772858969e-06, "loss": 0.329, "step": 4673 }, { "epoch": 0.670203613421279, "grad_norm": 0.3604503273963928, "learning_rate": 9.543501582088865e-06, "loss": 0.3325, "step": 4674 }, { "epoch": 0.6703470031545742, "grad_norm": 0.36263519525527954, "learning_rate": 9.543153264834668e-06, "loss": 0.3409, "step": 4675 }, { "epoch": 0.6704903928878693, "grad_norm": 0.3309197425842285, "learning_rate": 9.54280482110607e-06, "loss": 0.3424, "step": 4676 }, { "epoch": 0.6706337826211644, "grad_norm": 0.39208781719207764, "learning_rate": 9.542456250912777e-06, "loss": 0.3119, "step": 4677 }, { "epoch": 0.6707771723544594, "grad_norm": 0.357726126909256, "learning_rate": 9.542107554264488e-06, "loss": 0.3444, "step": 4678 }, { "epoch": 0.6709205620877545, "grad_norm": 0.3495846390724182, "learning_rate": 9.541758731170911e-06, "loss": 0.3486, "step": 4679 }, { "epoch": 0.6710639518210496, "grad_norm": 0.3920809030532837, "learning_rate": 9.541409781641758e-06, "loss": 0.3374, "step": 4680 }, { "epoch": 0.6712073415543447, "grad_norm": 0.35932815074920654, "learning_rate": 9.541060705686741e-06, "loss": 0.3424, "step": 4681 }, { "epoch": 0.6713507312876398, "grad_norm": 0.3778521418571472, "learning_rate": 9.540711503315583e-06, "loss": 0.3418, "step": 4682 }, { "epoch": 0.6714941210209349, "grad_norm": 0.44473519921302795, "learning_rate": 9.540362174538e-06, "loss": 0.362, "step": 4683 }, { "epoch": 0.67163751075423, "grad_norm": 0.3620142340660095, "learning_rate": 9.540012719363718e-06, "loss": 0.3352, "step": 4684 }, { "epoch": 0.6717809004875251, "grad_norm": 0.40558749437332153, "learning_rate": 9.539663137802467e-06, "loss": 0.3379, "step": 4685 }, { "epoch": 0.6719242902208202, "grad_norm": 0.35241562128067017, "learning_rate": 9.539313429863975e-06, "loss": 0.3297, "step": 4686 }, { "epoch": 0.6720676799541153, "grad_norm": 0.37011367082595825, "learning_rate": 9.538963595557981e-06, "loss": 0.3254, "step": 4687 }, { "epoch": 0.6722110696874104, "grad_norm": 0.37372031807899475, "learning_rate": 9.538613634894224e-06, "loss": 0.3345, "step": 4688 }, { "epoch": 0.6723544594207055, "grad_norm": 0.4097294211387634, "learning_rate": 9.538263547882446e-06, "loss": 0.3508, "step": 4689 }, { "epoch": 0.6724978491540006, "grad_norm": 0.3027028441429138, "learning_rate": 9.537913334532389e-06, "loss": 0.3278, "step": 4690 }, { "epoch": 0.6726412388872957, "grad_norm": 0.39115840196609497, "learning_rate": 9.537562994853806e-06, "loss": 0.346, "step": 4691 }, { "epoch": 0.6727846286205907, "grad_norm": 0.40953245759010315, "learning_rate": 9.537212528856449e-06, "loss": 0.3396, "step": 4692 }, { "epoch": 0.6729280183538858, "grad_norm": 0.3348236083984375, "learning_rate": 9.536861936550078e-06, "loss": 0.3299, "step": 4693 }, { "epoch": 0.6730714080871809, "grad_norm": 0.3662605881690979, "learning_rate": 9.536511217944446e-06, "loss": 0.3127, "step": 4694 }, { "epoch": 0.6732147978204761, "grad_norm": 0.3756447732448578, "learning_rate": 9.536160373049323e-06, "loss": 0.3566, "step": 4695 }, { "epoch": 0.6733581875537712, "grad_norm": 0.32203948497772217, "learning_rate": 9.535809401874473e-06, "loss": 0.3366, "step": 4696 }, { "epoch": 0.6735015772870663, "grad_norm": 0.3663051724433899, "learning_rate": 9.535458304429664e-06, "loss": 0.3376, "step": 4697 }, { "epoch": 0.6736449670203614, "grad_norm": 0.37892603874206543, "learning_rate": 9.535107080724673e-06, "loss": 0.3462, "step": 4698 }, { "epoch": 0.6737883567536564, "grad_norm": 0.364927738904953, "learning_rate": 9.534755730769279e-06, "loss": 0.3302, "step": 4699 }, { "epoch": 0.6739317464869515, "grad_norm": 0.3916381895542145, "learning_rate": 9.53440425457326e-06, "loss": 0.3471, "step": 4700 }, { "epoch": 0.6740751362202466, "grad_norm": 0.4042389392852783, "learning_rate": 9.534052652146402e-06, "loss": 0.3733, "step": 4701 }, { "epoch": 0.6742185259535417, "grad_norm": 0.3661157190799713, "learning_rate": 9.533700923498492e-06, "loss": 0.3364, "step": 4702 }, { "epoch": 0.6743619156868368, "grad_norm": 0.3861170709133148, "learning_rate": 9.533349068639323e-06, "loss": 0.3436, "step": 4703 }, { "epoch": 0.6745053054201319, "grad_norm": 0.31305813789367676, "learning_rate": 9.532997087578692e-06, "loss": 0.3432, "step": 4704 }, { "epoch": 0.6746486951534271, "grad_norm": 0.3709104061126709, "learning_rate": 9.532644980326392e-06, "loss": 0.3312, "step": 4705 }, { "epoch": 0.6747920848867222, "grad_norm": 0.34468281269073486, "learning_rate": 9.53229274689223e-06, "loss": 0.3432, "step": 4706 }, { "epoch": 0.6749354746200172, "grad_norm": 0.3102134168148041, "learning_rate": 9.53194038728601e-06, "loss": 0.3413, "step": 4707 }, { "epoch": 0.6750788643533123, "grad_norm": 0.3375031054019928, "learning_rate": 9.53158790151754e-06, "loss": 0.3333, "step": 4708 }, { "epoch": 0.6752222540866074, "grad_norm": 0.33933350443840027, "learning_rate": 9.531235289596633e-06, "loss": 0.3435, "step": 4709 }, { "epoch": 0.6753656438199025, "grad_norm": 0.3287501931190491, "learning_rate": 9.530882551533108e-06, "loss": 0.3344, "step": 4710 }, { "epoch": 0.6755090335531976, "grad_norm": 0.332792729139328, "learning_rate": 9.53052968733678e-06, "loss": 0.3437, "step": 4711 }, { "epoch": 0.6756524232864927, "grad_norm": 0.32560744881629944, "learning_rate": 9.530176697017476e-06, "loss": 0.3315, "step": 4712 }, { "epoch": 0.6757958130197877, "grad_norm": 0.3352174162864685, "learning_rate": 9.529823580585023e-06, "loss": 0.3287, "step": 4713 }, { "epoch": 0.6759392027530828, "grad_norm": 0.3390190899372101, "learning_rate": 9.529470338049249e-06, "loss": 0.335, "step": 4714 }, { "epoch": 0.6760825924863779, "grad_norm": 0.35113367438316345, "learning_rate": 9.529116969419988e-06, "loss": 0.3661, "step": 4715 }, { "epoch": 0.6762259822196731, "grad_norm": 0.36583396792411804, "learning_rate": 9.528763474707077e-06, "loss": 0.3399, "step": 4716 }, { "epoch": 0.6763693719529682, "grad_norm": 0.3348177671432495, "learning_rate": 9.528409853920357e-06, "loss": 0.3455, "step": 4717 }, { "epoch": 0.6765127616862633, "grad_norm": 0.34758028388023376, "learning_rate": 9.528056107069675e-06, "loss": 0.324, "step": 4718 }, { "epoch": 0.6766561514195584, "grad_norm": 0.3269006013870239, "learning_rate": 9.527702234164873e-06, "loss": 0.3237, "step": 4719 }, { "epoch": 0.6767995411528535, "grad_norm": 0.33726319670677185, "learning_rate": 9.527348235215807e-06, "loss": 0.3383, "step": 4720 }, { "epoch": 0.6769429308861485, "grad_norm": 0.38262563943862915, "learning_rate": 9.526994110232333e-06, "loss": 0.3368, "step": 4721 }, { "epoch": 0.6770863206194436, "grad_norm": 0.36949047446250916, "learning_rate": 9.526639859224306e-06, "loss": 0.3505, "step": 4722 }, { "epoch": 0.6772297103527387, "grad_norm": 0.35136741399765015, "learning_rate": 9.526285482201586e-06, "loss": 0.3527, "step": 4723 }, { "epoch": 0.6773731000860338, "grad_norm": 0.37475648522377014, "learning_rate": 9.525930979174043e-06, "loss": 0.3518, "step": 4724 }, { "epoch": 0.6775164898193289, "grad_norm": 0.35045430064201355, "learning_rate": 9.525576350151542e-06, "loss": 0.3312, "step": 4725 }, { "epoch": 0.6776598795526241, "grad_norm": 0.34210535883903503, "learning_rate": 9.525221595143959e-06, "loss": 0.3453, "step": 4726 }, { "epoch": 0.6778032692859192, "grad_norm": 0.34082987904548645, "learning_rate": 9.524866714161167e-06, "loss": 0.3335, "step": 4727 }, { "epoch": 0.6779466590192142, "grad_norm": 0.34678223729133606, "learning_rate": 9.524511707213046e-06, "loss": 0.3655, "step": 4728 }, { "epoch": 0.6780900487525093, "grad_norm": 0.3411731421947479, "learning_rate": 9.52415657430948e-06, "loss": 0.3361, "step": 4729 }, { "epoch": 0.6782334384858044, "grad_norm": 0.32923176884651184, "learning_rate": 9.523801315460353e-06, "loss": 0.333, "step": 4730 }, { "epoch": 0.6783768282190995, "grad_norm": 0.3318566679954529, "learning_rate": 9.523445930675558e-06, "loss": 0.3572, "step": 4731 }, { "epoch": 0.6785202179523946, "grad_norm": 0.3458823561668396, "learning_rate": 9.523090419964986e-06, "loss": 0.3442, "step": 4732 }, { "epoch": 0.6786636076856897, "grad_norm": 0.3090314269065857, "learning_rate": 9.522734783338534e-06, "loss": 0.3266, "step": 4733 }, { "epoch": 0.6788069974189848, "grad_norm": 0.30421552062034607, "learning_rate": 9.522379020806104e-06, "loss": 0.3415, "step": 4734 }, { "epoch": 0.6789503871522798, "grad_norm": 0.338971346616745, "learning_rate": 9.522023132377598e-06, "loss": 0.3439, "step": 4735 }, { "epoch": 0.679093776885575, "grad_norm": 0.35281866788864136, "learning_rate": 9.521667118062922e-06, "loss": 0.3583, "step": 4736 }, { "epoch": 0.6792371666188701, "grad_norm": 0.3301742374897003, "learning_rate": 9.521310977871993e-06, "loss": 0.351, "step": 4737 }, { "epoch": 0.6793805563521652, "grad_norm": 0.3256245255470276, "learning_rate": 9.52095471181472e-06, "loss": 0.3487, "step": 4738 }, { "epoch": 0.6795239460854603, "grad_norm": 0.3269549608230591, "learning_rate": 9.520598319901021e-06, "loss": 0.347, "step": 4739 }, { "epoch": 0.6796673358187554, "grad_norm": 0.3032067120075226, "learning_rate": 9.520241802140819e-06, "loss": 0.3234, "step": 4740 }, { "epoch": 0.6798107255520505, "grad_norm": 0.3613465428352356, "learning_rate": 9.51988515854404e-06, "loss": 0.3556, "step": 4741 }, { "epoch": 0.6799541152853456, "grad_norm": 0.31832727789878845, "learning_rate": 9.519528389120611e-06, "loss": 0.3477, "step": 4742 }, { "epoch": 0.6800975050186406, "grad_norm": 0.29101064801216125, "learning_rate": 9.519171493880464e-06, "loss": 0.3285, "step": 4743 }, { "epoch": 0.6802408947519357, "grad_norm": 0.301673024892807, "learning_rate": 9.518814472833533e-06, "loss": 0.3263, "step": 4744 }, { "epoch": 0.6803842844852308, "grad_norm": 0.3198550045490265, "learning_rate": 9.51845732598976e-06, "loss": 0.3292, "step": 4745 }, { "epoch": 0.680527674218526, "grad_norm": 0.308933287858963, "learning_rate": 9.518100053359086e-06, "loss": 0.3349, "step": 4746 }, { "epoch": 0.6806710639518211, "grad_norm": 0.33609431982040405, "learning_rate": 9.517742654951454e-06, "loss": 0.341, "step": 4747 }, { "epoch": 0.6808144536851162, "grad_norm": 0.31498831510543823, "learning_rate": 9.517385130776819e-06, "loss": 0.3312, "step": 4748 }, { "epoch": 0.6809578434184113, "grad_norm": 0.30426672101020813, "learning_rate": 9.517027480845128e-06, "loss": 0.3591, "step": 4749 }, { "epoch": 0.6811012331517063, "grad_norm": 0.32879015803337097, "learning_rate": 9.516669705166343e-06, "loss": 0.3699, "step": 4750 }, { "epoch": 0.6812446228850014, "grad_norm": 0.3177947700023651, "learning_rate": 9.51631180375042e-06, "loss": 0.3415, "step": 4751 }, { "epoch": 0.6813880126182965, "grad_norm": 0.3266088664531708, "learning_rate": 9.515953776607324e-06, "loss": 0.3307, "step": 4752 }, { "epoch": 0.6815314023515916, "grad_norm": 0.30581384897232056, "learning_rate": 9.515595623747023e-06, "loss": 0.3365, "step": 4753 }, { "epoch": 0.6816747920848867, "grad_norm": 0.3127673268318176, "learning_rate": 9.515237345179485e-06, "loss": 0.3421, "step": 4754 }, { "epoch": 0.6818181818181818, "grad_norm": 0.3260481655597687, "learning_rate": 9.514878940914685e-06, "loss": 0.3552, "step": 4755 }, { "epoch": 0.681961571551477, "grad_norm": 0.2828691899776459, "learning_rate": 9.5145204109626e-06, "loss": 0.3208, "step": 4756 }, { "epoch": 0.682104961284772, "grad_norm": 0.3459784984588623, "learning_rate": 9.514161755333214e-06, "loss": 0.3519, "step": 4757 }, { "epoch": 0.6822483510180671, "grad_norm": 0.3274277150630951, "learning_rate": 9.513802974036506e-06, "loss": 0.3697, "step": 4758 }, { "epoch": 0.6823917407513622, "grad_norm": 0.2978686988353729, "learning_rate": 9.513444067082466e-06, "loss": 0.3392, "step": 4759 }, { "epoch": 0.6825351304846573, "grad_norm": 0.3387991487979889, "learning_rate": 9.51308503448109e-06, "loss": 0.3323, "step": 4760 }, { "epoch": 0.6826785202179524, "grad_norm": 0.3761577308177948, "learning_rate": 9.512725876242366e-06, "loss": 0.3513, "step": 4761 }, { "epoch": 0.6828219099512475, "grad_norm": 0.33428817987442017, "learning_rate": 9.512366592376296e-06, "loss": 0.3275, "step": 4762 }, { "epoch": 0.6829652996845426, "grad_norm": 0.3119037449359894, "learning_rate": 9.512007182892881e-06, "loss": 0.3625, "step": 4763 }, { "epoch": 0.6831086894178376, "grad_norm": 0.37763166427612305, "learning_rate": 9.511647647802129e-06, "loss": 0.3443, "step": 4764 }, { "epoch": 0.6832520791511327, "grad_norm": 0.33598893880844116, "learning_rate": 9.511287987114044e-06, "loss": 0.3344, "step": 4765 }, { "epoch": 0.6833954688844279, "grad_norm": 0.3197784125804901, "learning_rate": 9.510928200838643e-06, "loss": 0.3446, "step": 4766 }, { "epoch": 0.683538858617723, "grad_norm": 0.353280633687973, "learning_rate": 9.510568288985938e-06, "loss": 0.3269, "step": 4767 }, { "epoch": 0.6836822483510181, "grad_norm": 0.32250791788101196, "learning_rate": 9.510208251565952e-06, "loss": 0.3167, "step": 4768 }, { "epoch": 0.6838256380843132, "grad_norm": 0.3413933217525482, "learning_rate": 9.509848088588706e-06, "loss": 0.3189, "step": 4769 }, { "epoch": 0.6839690278176083, "grad_norm": 0.3612247109413147, "learning_rate": 9.509487800064227e-06, "loss": 0.3346, "step": 4770 }, { "epoch": 0.6841124175509034, "grad_norm": 0.3604884147644043, "learning_rate": 9.509127386002544e-06, "loss": 0.331, "step": 4771 }, { "epoch": 0.6842558072841984, "grad_norm": 0.3770970404148102, "learning_rate": 9.508766846413691e-06, "loss": 0.3335, "step": 4772 }, { "epoch": 0.6843991970174935, "grad_norm": 0.3910280764102936, "learning_rate": 9.508406181307704e-06, "loss": 0.3602, "step": 4773 }, { "epoch": 0.6845425867507886, "grad_norm": 0.3436557352542877, "learning_rate": 9.508045390694625e-06, "loss": 0.355, "step": 4774 }, { "epoch": 0.6846859764840837, "grad_norm": 0.3574991524219513, "learning_rate": 9.507684474584498e-06, "loss": 0.3308, "step": 4775 }, { "epoch": 0.6848293662173789, "grad_norm": 0.39994218945503235, "learning_rate": 9.507323432987368e-06, "loss": 0.3578, "step": 4776 }, { "epoch": 0.684972755950674, "grad_norm": 0.3409821093082428, "learning_rate": 9.506962265913287e-06, "loss": 0.3248, "step": 4777 }, { "epoch": 0.6851161456839691, "grad_norm": 0.34396299719810486, "learning_rate": 9.506600973372309e-06, "loss": 0.3292, "step": 4778 }, { "epoch": 0.6852595354172641, "grad_norm": 0.3433883488178253, "learning_rate": 9.506239555374495e-06, "loss": 0.3506, "step": 4779 }, { "epoch": 0.6854029251505592, "grad_norm": 0.3283320963382721, "learning_rate": 9.505878011929901e-06, "loss": 0.3522, "step": 4780 }, { "epoch": 0.6855463148838543, "grad_norm": 0.327048659324646, "learning_rate": 9.505516343048596e-06, "loss": 0.337, "step": 4781 }, { "epoch": 0.6856897046171494, "grad_norm": 0.34374359250068665, "learning_rate": 9.505154548740646e-06, "loss": 0.3213, "step": 4782 }, { "epoch": 0.6858330943504445, "grad_norm": 0.38607555627822876, "learning_rate": 9.504792629016124e-06, "loss": 0.355, "step": 4783 }, { "epoch": 0.6859764840837396, "grad_norm": 0.3813173770904541, "learning_rate": 9.504430583885105e-06, "loss": 0.3315, "step": 4784 }, { "epoch": 0.6861198738170347, "grad_norm": 0.34379294514656067, "learning_rate": 9.504068413357667e-06, "loss": 0.3368, "step": 4785 }, { "epoch": 0.6862632635503298, "grad_norm": 0.36331111192703247, "learning_rate": 9.503706117443895e-06, "loss": 0.3298, "step": 4786 }, { "epoch": 0.6864066532836249, "grad_norm": 0.33495888113975525, "learning_rate": 9.503343696153869e-06, "loss": 0.3551, "step": 4787 }, { "epoch": 0.68655004301692, "grad_norm": 0.37794092297554016, "learning_rate": 9.502981149497685e-06, "loss": 0.3295, "step": 4788 }, { "epoch": 0.6866934327502151, "grad_norm": 0.3619330823421478, "learning_rate": 9.50261847748543e-06, "loss": 0.342, "step": 4789 }, { "epoch": 0.6868368224835102, "grad_norm": 0.3403645157814026, "learning_rate": 9.502255680127206e-06, "loss": 0.3357, "step": 4790 }, { "epoch": 0.6869802122168053, "grad_norm": 0.36657819151878357, "learning_rate": 9.501892757433107e-06, "loss": 0.3473, "step": 4791 }, { "epoch": 0.6871236019501004, "grad_norm": 0.3600787818431854, "learning_rate": 9.50152970941324e-06, "loss": 0.3566, "step": 4792 }, { "epoch": 0.6872669916833954, "grad_norm": 0.3288843333721161, "learning_rate": 9.501166536077711e-06, "loss": 0.3748, "step": 4793 }, { "epoch": 0.6874103814166905, "grad_norm": 0.36730533838272095, "learning_rate": 9.500803237436629e-06, "loss": 0.3533, "step": 4794 }, { "epoch": 0.6875537711499856, "grad_norm": 0.34617292881011963, "learning_rate": 9.500439813500109e-06, "loss": 0.3395, "step": 4795 }, { "epoch": 0.6876971608832808, "grad_norm": 0.362649142742157, "learning_rate": 9.500076264278267e-06, "loss": 0.3369, "step": 4796 }, { "epoch": 0.6878405506165759, "grad_norm": 0.3685762286186218, "learning_rate": 9.499712589781224e-06, "loss": 0.3326, "step": 4797 }, { "epoch": 0.687983940349871, "grad_norm": 0.37997567653656006, "learning_rate": 9.499348790019106e-06, "loss": 0.3541, "step": 4798 }, { "epoch": 0.6881273300831661, "grad_norm": 0.36270424723625183, "learning_rate": 9.498984865002036e-06, "loss": 0.3353, "step": 4799 }, { "epoch": 0.6882707198164612, "grad_norm": 0.3768434524536133, "learning_rate": 9.498620814740152e-06, "loss": 0.3433, "step": 4800 }, { "epoch": 0.6884141095497562, "grad_norm": 0.36356842517852783, "learning_rate": 9.498256639243581e-06, "loss": 0.3493, "step": 4801 }, { "epoch": 0.6885574992830513, "grad_norm": 0.3747842013835907, "learning_rate": 9.497892338522468e-06, "loss": 0.362, "step": 4802 }, { "epoch": 0.6887008890163464, "grad_norm": 0.3425275981426239, "learning_rate": 9.497527912586949e-06, "loss": 0.332, "step": 4803 }, { "epoch": 0.6888442787496415, "grad_norm": 0.30900225043296814, "learning_rate": 9.497163361447173e-06, "loss": 0.3485, "step": 4804 }, { "epoch": 0.6889876684829366, "grad_norm": 0.3023358881473541, "learning_rate": 9.496798685113285e-06, "loss": 0.3252, "step": 4805 }, { "epoch": 0.6891310582162318, "grad_norm": 0.32799696922302246, "learning_rate": 9.496433883595441e-06, "loss": 0.335, "step": 4806 }, { "epoch": 0.6892744479495269, "grad_norm": 0.37188032269477844, "learning_rate": 9.496068956903793e-06, "loss": 0.3578, "step": 4807 }, { "epoch": 0.6894178376828219, "grad_norm": 0.327902615070343, "learning_rate": 9.495703905048502e-06, "loss": 0.3473, "step": 4808 }, { "epoch": 0.689561227416117, "grad_norm": 0.34762436151504517, "learning_rate": 9.495338728039729e-06, "loss": 0.3611, "step": 4809 }, { "epoch": 0.6897046171494121, "grad_norm": 0.3181697726249695, "learning_rate": 9.494973425887643e-06, "loss": 0.3345, "step": 4810 }, { "epoch": 0.6898480068827072, "grad_norm": 0.3781544268131256, "learning_rate": 9.49460799860241e-06, "loss": 0.3324, "step": 4811 }, { "epoch": 0.6899913966160023, "grad_norm": 0.32787802815437317, "learning_rate": 9.494242446194205e-06, "loss": 0.3495, "step": 4812 }, { "epoch": 0.6901347863492974, "grad_norm": 0.330746054649353, "learning_rate": 9.493876768673203e-06, "loss": 0.3625, "step": 4813 }, { "epoch": 0.6902781760825925, "grad_norm": 0.3753693401813507, "learning_rate": 9.493510966049586e-06, "loss": 0.3176, "step": 4814 }, { "epoch": 0.6904215658158875, "grad_norm": 0.33108603954315186, "learning_rate": 9.493145038333535e-06, "loss": 0.3519, "step": 4815 }, { "epoch": 0.6905649555491826, "grad_norm": 0.3346695005893707, "learning_rate": 9.492778985535238e-06, "loss": 0.3279, "step": 4816 }, { "epoch": 0.6907083452824778, "grad_norm": 0.3724648356437683, "learning_rate": 9.492412807664884e-06, "loss": 0.3653, "step": 4817 }, { "epoch": 0.6908517350157729, "grad_norm": 0.3079169988632202, "learning_rate": 9.49204650473267e-06, "loss": 0.3765, "step": 4818 }, { "epoch": 0.690995124749068, "grad_norm": 0.32777488231658936, "learning_rate": 9.491680076748791e-06, "loss": 0.3376, "step": 4819 }, { "epoch": 0.6911385144823631, "grad_norm": 0.33319440484046936, "learning_rate": 9.491313523723447e-06, "loss": 0.332, "step": 4820 }, { "epoch": 0.6912819042156582, "grad_norm": 0.34148460626602173, "learning_rate": 9.490946845666844e-06, "loss": 0.3406, "step": 4821 }, { "epoch": 0.6914252939489532, "grad_norm": 0.3168657422065735, "learning_rate": 9.49058004258919e-06, "loss": 0.3394, "step": 4822 }, { "epoch": 0.6915686836822483, "grad_norm": 0.38288503885269165, "learning_rate": 9.490213114500694e-06, "loss": 0.334, "step": 4823 }, { "epoch": 0.6917120734155434, "grad_norm": 0.3175756335258484, "learning_rate": 9.489846061411574e-06, "loss": 0.3279, "step": 4824 }, { "epoch": 0.6918554631488385, "grad_norm": 0.3442259132862091, "learning_rate": 9.489478883332045e-06, "loss": 0.3402, "step": 4825 }, { "epoch": 0.6919988528821336, "grad_norm": 0.34913188219070435, "learning_rate": 9.48911158027233e-06, "loss": 0.3512, "step": 4826 }, { "epoch": 0.6921422426154288, "grad_norm": 0.3251129686832428, "learning_rate": 9.488744152242654e-06, "loss": 0.3523, "step": 4827 }, { "epoch": 0.6922856323487239, "grad_norm": 0.35317087173461914, "learning_rate": 9.488376599253247e-06, "loss": 0.3312, "step": 4828 }, { "epoch": 0.692429022082019, "grad_norm": 0.3590247333049774, "learning_rate": 9.488008921314338e-06, "loss": 0.3305, "step": 4829 }, { "epoch": 0.692572411815314, "grad_norm": 0.3157189190387726, "learning_rate": 9.487641118436166e-06, "loss": 0.3444, "step": 4830 }, { "epoch": 0.6927158015486091, "grad_norm": 0.36501675844192505, "learning_rate": 9.487273190628969e-06, "loss": 0.3484, "step": 4831 }, { "epoch": 0.6928591912819042, "grad_norm": 0.3526627719402313, "learning_rate": 9.486905137902989e-06, "loss": 0.3428, "step": 4832 }, { "epoch": 0.6930025810151993, "grad_norm": 0.30149033665657043, "learning_rate": 9.48653696026847e-06, "loss": 0.339, "step": 4833 }, { "epoch": 0.6931459707484944, "grad_norm": 0.36849623918533325, "learning_rate": 9.486168657735666e-06, "loss": 0.3396, "step": 4834 }, { "epoch": 0.6932893604817895, "grad_norm": 0.32630395889282227, "learning_rate": 9.485800230314826e-06, "loss": 0.3222, "step": 4835 }, { "epoch": 0.6934327502150845, "grad_norm": 0.31700843572616577, "learning_rate": 9.485431678016209e-06, "loss": 0.348, "step": 4836 }, { "epoch": 0.6935761399483797, "grad_norm": 0.3950468599796295, "learning_rate": 9.485063000850073e-06, "loss": 0.3716, "step": 4837 }, { "epoch": 0.6937195296816748, "grad_norm": 0.31123220920562744, "learning_rate": 9.484694198826682e-06, "loss": 0.3364, "step": 4838 }, { "epoch": 0.6938629194149699, "grad_norm": 0.3200691044330597, "learning_rate": 9.484325271956304e-06, "loss": 0.3335, "step": 4839 }, { "epoch": 0.694006309148265, "grad_norm": 0.321394681930542, "learning_rate": 9.483956220249209e-06, "loss": 0.3396, "step": 4840 }, { "epoch": 0.6941496988815601, "grad_norm": 0.32333022356033325, "learning_rate": 9.483587043715672e-06, "loss": 0.3505, "step": 4841 }, { "epoch": 0.6942930886148552, "grad_norm": 0.3258863091468811, "learning_rate": 9.483217742365966e-06, "loss": 0.3445, "step": 4842 }, { "epoch": 0.6944364783481503, "grad_norm": 0.3305702805519104, "learning_rate": 9.482848316210375e-06, "loss": 0.323, "step": 4843 }, { "epoch": 0.6945798680814453, "grad_norm": 0.31334853172302246, "learning_rate": 9.482478765259185e-06, "loss": 0.3357, "step": 4844 }, { "epoch": 0.6947232578147404, "grad_norm": 0.3110189735889435, "learning_rate": 9.48210908952268e-06, "loss": 0.343, "step": 4845 }, { "epoch": 0.6948666475480355, "grad_norm": 0.3422201871871948, "learning_rate": 9.481739289011151e-06, "loss": 0.3317, "step": 4846 }, { "epoch": 0.6950100372813307, "grad_norm": 0.37492746114730835, "learning_rate": 9.481369363734896e-06, "loss": 0.3571, "step": 4847 }, { "epoch": 0.6951534270146258, "grad_norm": 0.3017086982727051, "learning_rate": 9.480999313704212e-06, "loss": 0.3242, "step": 4848 }, { "epoch": 0.6952968167479209, "grad_norm": 0.3199295997619629, "learning_rate": 9.4806291389294e-06, "loss": 0.3246, "step": 4849 }, { "epoch": 0.695440206481216, "grad_norm": 0.329694926738739, "learning_rate": 9.480258839420764e-06, "loss": 0.3564, "step": 4850 }, { "epoch": 0.695583596214511, "grad_norm": 0.3148115575313568, "learning_rate": 9.479888415188616e-06, "loss": 0.3197, "step": 4851 }, { "epoch": 0.6957269859478061, "grad_norm": 0.3289775252342224, "learning_rate": 9.479517866243264e-06, "loss": 0.3286, "step": 4852 }, { "epoch": 0.6958703756811012, "grad_norm": 0.3382583260536194, "learning_rate": 9.479147192595027e-06, "loss": 0.3508, "step": 4853 }, { "epoch": 0.6960137654143963, "grad_norm": 0.3312254548072815, "learning_rate": 9.47877639425422e-06, "loss": 0.3577, "step": 4854 }, { "epoch": 0.6961571551476914, "grad_norm": 0.3242424428462982, "learning_rate": 9.47840547123117e-06, "loss": 0.3356, "step": 4855 }, { "epoch": 0.6963005448809865, "grad_norm": 0.3075699210166931, "learning_rate": 9.4780344235362e-06, "loss": 0.3435, "step": 4856 }, { "epoch": 0.6964439346142817, "grad_norm": 0.31361690163612366, "learning_rate": 9.477663251179641e-06, "loss": 0.3678, "step": 4857 }, { "epoch": 0.6965873243475768, "grad_norm": 0.3397098183631897, "learning_rate": 9.477291954171824e-06, "loss": 0.3479, "step": 4858 }, { "epoch": 0.6967307140808718, "grad_norm": 0.30828049778938293, "learning_rate": 9.476920532523085e-06, "loss": 0.3338, "step": 4859 }, { "epoch": 0.6968741038141669, "grad_norm": 0.34459078311920166, "learning_rate": 9.476548986243767e-06, "loss": 0.3488, "step": 4860 }, { "epoch": 0.697017493547462, "grad_norm": 0.3482077717781067, "learning_rate": 9.476177315344213e-06, "loss": 0.3551, "step": 4861 }, { "epoch": 0.6971608832807571, "grad_norm": 0.3444846272468567, "learning_rate": 9.475805519834766e-06, "loss": 0.3485, "step": 4862 }, { "epoch": 0.6973042730140522, "grad_norm": 0.33756622672080994, "learning_rate": 9.475433599725779e-06, "loss": 0.324, "step": 4863 }, { "epoch": 0.6974476627473473, "grad_norm": 0.3452281057834625, "learning_rate": 9.475061555027605e-06, "loss": 0.349, "step": 4864 }, { "epoch": 0.6975910524806423, "grad_norm": 0.3447043001651764, "learning_rate": 9.4746893857506e-06, "loss": 0.3515, "step": 4865 }, { "epoch": 0.6977344422139374, "grad_norm": 0.3268815577030182, "learning_rate": 9.474317091905128e-06, "loss": 0.3321, "step": 4866 }, { "epoch": 0.6978778319472326, "grad_norm": 0.33708080649375916, "learning_rate": 9.47394467350155e-06, "loss": 0.3391, "step": 4867 }, { "epoch": 0.6980212216805277, "grad_norm": 0.3131340444087982, "learning_rate": 9.473572130550234e-06, "loss": 0.3554, "step": 4868 }, { "epoch": 0.6981646114138228, "grad_norm": 0.3049304187297821, "learning_rate": 9.473199463061552e-06, "loss": 0.3396, "step": 4869 }, { "epoch": 0.6983080011471179, "grad_norm": 0.33555033802986145, "learning_rate": 9.472826671045879e-06, "loss": 0.3382, "step": 4870 }, { "epoch": 0.698451390880413, "grad_norm": 0.333477646112442, "learning_rate": 9.47245375451359e-06, "loss": 0.3403, "step": 4871 }, { "epoch": 0.698594780613708, "grad_norm": 0.31968313455581665, "learning_rate": 9.47208071347507e-06, "loss": 0.3204, "step": 4872 }, { "epoch": 0.6987381703470031, "grad_norm": 0.29814860224723816, "learning_rate": 9.471707547940701e-06, "loss": 0.3295, "step": 4873 }, { "epoch": 0.6988815600802982, "grad_norm": 0.2998638451099396, "learning_rate": 9.471334257920872e-06, "loss": 0.3349, "step": 4874 }, { "epoch": 0.6990249498135933, "grad_norm": 0.32101818919181824, "learning_rate": 9.470960843425978e-06, "loss": 0.3227, "step": 4875 }, { "epoch": 0.6991683395468884, "grad_norm": 0.31693920493125916, "learning_rate": 9.47058730446641e-06, "loss": 0.3574, "step": 4876 }, { "epoch": 0.6993117292801836, "grad_norm": 0.310634046792984, "learning_rate": 9.47021364105257e-06, "loss": 0.3256, "step": 4877 }, { "epoch": 0.6994551190134787, "grad_norm": 0.3127625286579132, "learning_rate": 9.469839853194858e-06, "loss": 0.3329, "step": 4878 }, { "epoch": 0.6995985087467738, "grad_norm": 0.34295517206192017, "learning_rate": 9.469465940903679e-06, "loss": 0.3541, "step": 4879 }, { "epoch": 0.6997418984800688, "grad_norm": 0.2925989627838135, "learning_rate": 9.469091904189444e-06, "loss": 0.3399, "step": 4880 }, { "epoch": 0.6998852882133639, "grad_norm": 0.32343530654907227, "learning_rate": 9.468717743062568e-06, "loss": 0.3279, "step": 4881 }, { "epoch": 0.700028677946659, "grad_norm": 0.3452737033367157, "learning_rate": 9.46834345753346e-06, "loss": 0.3283, "step": 4882 }, { "epoch": 0.7001720676799541, "grad_norm": 0.31256142258644104, "learning_rate": 9.467969047612547e-06, "loss": 0.3321, "step": 4883 }, { "epoch": 0.7003154574132492, "grad_norm": 0.354510635137558, "learning_rate": 9.467594513310248e-06, "loss": 0.3405, "step": 4884 }, { "epoch": 0.7004588471465443, "grad_norm": 0.3604164123535156, "learning_rate": 9.467219854636989e-06, "loss": 0.3137, "step": 4885 }, { "epoch": 0.7006022368798394, "grad_norm": 0.32052552700042725, "learning_rate": 9.4668450716032e-06, "loss": 0.3521, "step": 4886 }, { "epoch": 0.7007456266131346, "grad_norm": 0.351471483707428, "learning_rate": 9.466470164219319e-06, "loss": 0.3258, "step": 4887 }, { "epoch": 0.7008890163464296, "grad_norm": 0.3371933698654175, "learning_rate": 9.466095132495777e-06, "loss": 0.322, "step": 4888 }, { "epoch": 0.7010324060797247, "grad_norm": 0.3406411409378052, "learning_rate": 9.465719976443017e-06, "loss": 0.3274, "step": 4889 }, { "epoch": 0.7011757958130198, "grad_norm": 0.3527389168739319, "learning_rate": 9.465344696071484e-06, "loss": 0.3547, "step": 4890 }, { "epoch": 0.7013191855463149, "grad_norm": 0.30918410420417786, "learning_rate": 9.464969291391621e-06, "loss": 0.3398, "step": 4891 }, { "epoch": 0.70146257527961, "grad_norm": 0.31908541917800903, "learning_rate": 9.464593762413883e-06, "loss": 0.3513, "step": 4892 }, { "epoch": 0.7016059650129051, "grad_norm": 0.3265797197818756, "learning_rate": 9.464218109148722e-06, "loss": 0.3346, "step": 4893 }, { "epoch": 0.7017493547462001, "grad_norm": 0.34411728382110596, "learning_rate": 9.463842331606596e-06, "loss": 0.3383, "step": 4894 }, { "epoch": 0.7018927444794952, "grad_norm": 0.3361961841583252, "learning_rate": 9.463466429797967e-06, "loss": 0.3421, "step": 4895 }, { "epoch": 0.7020361342127903, "grad_norm": 0.33043110370635986, "learning_rate": 9.4630904037333e-06, "loss": 0.3094, "step": 4896 }, { "epoch": 0.7021795239460855, "grad_norm": 0.3589743375778198, "learning_rate": 9.462714253423059e-06, "loss": 0.3486, "step": 4897 }, { "epoch": 0.7023229136793806, "grad_norm": 0.36861318349838257, "learning_rate": 9.462337978877719e-06, "loss": 0.3456, "step": 4898 }, { "epoch": 0.7024663034126757, "grad_norm": 0.3424713611602783, "learning_rate": 9.461961580107753e-06, "loss": 0.346, "step": 4899 }, { "epoch": 0.7026096931459708, "grad_norm": 0.3476894497871399, "learning_rate": 9.461585057123641e-06, "loss": 0.3537, "step": 4900 }, { "epoch": 0.7027530828792659, "grad_norm": 0.3668852150440216, "learning_rate": 9.461208409935864e-06, "loss": 0.3333, "step": 4901 }, { "epoch": 0.7028964726125609, "grad_norm": 0.35421425104141235, "learning_rate": 9.460831638554909e-06, "loss": 0.3543, "step": 4902 }, { "epoch": 0.703039862345856, "grad_norm": 0.33842697739601135, "learning_rate": 9.460454742991263e-06, "loss": 0.3654, "step": 4903 }, { "epoch": 0.7031832520791511, "grad_norm": 0.34394371509552, "learning_rate": 9.460077723255418e-06, "loss": 0.3517, "step": 4904 }, { "epoch": 0.7033266418124462, "grad_norm": 0.361855685710907, "learning_rate": 9.45970057935787e-06, "loss": 0.3314, "step": 4905 }, { "epoch": 0.7034700315457413, "grad_norm": 0.32425323128700256, "learning_rate": 9.459323311309117e-06, "loss": 0.3489, "step": 4906 }, { "epoch": 0.7036134212790364, "grad_norm": 0.3528120815753937, "learning_rate": 9.458945919119664e-06, "loss": 0.3492, "step": 4907 }, { "epoch": 0.7037568110123316, "grad_norm": 0.33600205183029175, "learning_rate": 9.458568402800015e-06, "loss": 0.3305, "step": 4908 }, { "epoch": 0.7039002007456266, "grad_norm": 0.30881205201148987, "learning_rate": 9.458190762360682e-06, "loss": 0.326, "step": 4909 }, { "epoch": 0.7040435904789217, "grad_norm": 0.3646817207336426, "learning_rate": 9.457812997812173e-06, "loss": 0.3228, "step": 4910 }, { "epoch": 0.7041869802122168, "grad_norm": 0.33997195959091187, "learning_rate": 9.45743510916501e-06, "loss": 0.323, "step": 4911 }, { "epoch": 0.7043303699455119, "grad_norm": 0.33613303303718567, "learning_rate": 9.457057096429709e-06, "loss": 0.3255, "step": 4912 }, { "epoch": 0.704473759678807, "grad_norm": 0.2991458475589752, "learning_rate": 9.456678959616796e-06, "loss": 0.3283, "step": 4913 }, { "epoch": 0.7046171494121021, "grad_norm": 0.3373594582080841, "learning_rate": 9.456300698736795e-06, "loss": 0.3406, "step": 4914 }, { "epoch": 0.7047605391453972, "grad_norm": 0.2880764603614807, "learning_rate": 9.45592231380024e-06, "loss": 0.3639, "step": 4915 }, { "epoch": 0.7049039288786922, "grad_norm": 0.3196403682231903, "learning_rate": 9.45554380481766e-06, "loss": 0.328, "step": 4916 }, { "epoch": 0.7050473186119873, "grad_norm": 0.3330081105232239, "learning_rate": 9.455165171799596e-06, "loss": 0.3353, "step": 4917 }, { "epoch": 0.7051907083452825, "grad_norm": 0.3090411424636841, "learning_rate": 9.454786414756586e-06, "loss": 0.3332, "step": 4918 }, { "epoch": 0.7053340980785776, "grad_norm": 0.34424275159835815, "learning_rate": 9.454407533699175e-06, "loss": 0.3532, "step": 4919 }, { "epoch": 0.7054774878118727, "grad_norm": 0.33449608087539673, "learning_rate": 9.45402852863791e-06, "loss": 0.3392, "step": 4920 }, { "epoch": 0.7056208775451678, "grad_norm": 0.33161720633506775, "learning_rate": 9.453649399583344e-06, "loss": 0.3352, "step": 4921 }, { "epoch": 0.7057642672784629, "grad_norm": 0.33917954564094543, "learning_rate": 9.453270146546027e-06, "loss": 0.3536, "step": 4922 }, { "epoch": 0.705907657011758, "grad_norm": 0.32440105080604553, "learning_rate": 9.452890769536522e-06, "loss": 0.3434, "step": 4923 }, { "epoch": 0.706051046745053, "grad_norm": 0.35248515009880066, "learning_rate": 9.452511268565387e-06, "loss": 0.3541, "step": 4924 }, { "epoch": 0.7061944364783481, "grad_norm": 0.31970736384391785, "learning_rate": 9.452131643643188e-06, "loss": 0.3326, "step": 4925 }, { "epoch": 0.7063378262116432, "grad_norm": 0.30432188510894775, "learning_rate": 9.45175189478049e-06, "loss": 0.3353, "step": 4926 }, { "epoch": 0.7064812159449383, "grad_norm": 0.3272446393966675, "learning_rate": 9.45137202198787e-06, "loss": 0.3536, "step": 4927 }, { "epoch": 0.7066246056782335, "grad_norm": 0.35874098539352417, "learning_rate": 9.4509920252759e-06, "loss": 0.3649, "step": 4928 }, { "epoch": 0.7067679954115286, "grad_norm": 0.3123079836368561, "learning_rate": 9.45061190465516e-06, "loss": 0.3386, "step": 4929 }, { "epoch": 0.7069113851448237, "grad_norm": 0.34962984919548035, "learning_rate": 9.450231660136228e-06, "loss": 0.3353, "step": 4930 }, { "epoch": 0.7070547748781187, "grad_norm": 0.3062015771865845, "learning_rate": 9.449851291729696e-06, "loss": 0.3265, "step": 4931 }, { "epoch": 0.7071981646114138, "grad_norm": 0.32802003622055054, "learning_rate": 9.449470799446146e-06, "loss": 0.3395, "step": 4932 }, { "epoch": 0.7073415543447089, "grad_norm": 0.3001674711704254, "learning_rate": 9.449090183296175e-06, "loss": 0.3385, "step": 4933 }, { "epoch": 0.707484944078004, "grad_norm": 0.3111719787120819, "learning_rate": 9.448709443290378e-06, "loss": 0.3231, "step": 4934 }, { "epoch": 0.7076283338112991, "grad_norm": 0.36081463098526, "learning_rate": 9.448328579439351e-06, "loss": 0.3519, "step": 4935 }, { "epoch": 0.7077717235445942, "grad_norm": 0.3500661253929138, "learning_rate": 9.447947591753702e-06, "loss": 0.3332, "step": 4936 }, { "epoch": 0.7079151132778893, "grad_norm": 0.34800687432289124, "learning_rate": 9.447566480244031e-06, "loss": 0.3331, "step": 4937 }, { "epoch": 0.7080585030111844, "grad_norm": 0.32863885164260864, "learning_rate": 9.447185244920953e-06, "loss": 0.339, "step": 4938 }, { "epoch": 0.7082018927444795, "grad_norm": 0.3401344418525696, "learning_rate": 9.44680388579508e-06, "loss": 0.3477, "step": 4939 }, { "epoch": 0.7083452824777746, "grad_norm": 0.31322628259658813, "learning_rate": 9.446422402877025e-06, "loss": 0.3273, "step": 4940 }, { "epoch": 0.7084886722110697, "grad_norm": 0.38815218210220337, "learning_rate": 9.446040796177411e-06, "loss": 0.3247, "step": 4941 }, { "epoch": 0.7086320619443648, "grad_norm": 0.37998542189598083, "learning_rate": 9.445659065706861e-06, "loss": 0.355, "step": 4942 }, { "epoch": 0.7087754516776599, "grad_norm": 0.30462273955345154, "learning_rate": 9.445277211476e-06, "loss": 0.3265, "step": 4943 }, { "epoch": 0.708918841410955, "grad_norm": 0.3683180809020996, "learning_rate": 9.444895233495462e-06, "loss": 0.3288, "step": 4944 }, { "epoch": 0.70906223114425, "grad_norm": 0.36350008845329285, "learning_rate": 9.444513131775875e-06, "loss": 0.3593, "step": 4945 }, { "epoch": 0.7092056208775451, "grad_norm": 0.3306763172149658, "learning_rate": 9.444130906327881e-06, "loss": 0.3438, "step": 4946 }, { "epoch": 0.7093490106108402, "grad_norm": 0.3485810458660126, "learning_rate": 9.443748557162118e-06, "loss": 0.3245, "step": 4947 }, { "epoch": 0.7094924003441354, "grad_norm": 0.3681693375110626, "learning_rate": 9.443366084289233e-06, "loss": 0.3398, "step": 4948 }, { "epoch": 0.7096357900774305, "grad_norm": 0.33675068616867065, "learning_rate": 9.44298348771987e-06, "loss": 0.3344, "step": 4949 }, { "epoch": 0.7097791798107256, "grad_norm": 0.34630170464515686, "learning_rate": 9.442600767464683e-06, "loss": 0.3286, "step": 4950 }, { "epoch": 0.7099225695440207, "grad_norm": 0.3412284851074219, "learning_rate": 9.442217923534323e-06, "loss": 0.3406, "step": 4951 }, { "epoch": 0.7100659592773158, "grad_norm": 0.3084086775779724, "learning_rate": 9.441834955939449e-06, "loss": 0.3463, "step": 4952 }, { "epoch": 0.7102093490106108, "grad_norm": 0.36016836762428284, "learning_rate": 9.441451864690723e-06, "loss": 0.3407, "step": 4953 }, { "epoch": 0.7103527387439059, "grad_norm": 0.33369046449661255, "learning_rate": 9.44106864979881e-06, "loss": 0.3317, "step": 4954 }, { "epoch": 0.710496128477201, "grad_norm": 0.3190622627735138, "learning_rate": 9.440685311274376e-06, "loss": 0.3071, "step": 4955 }, { "epoch": 0.7106395182104961, "grad_norm": 0.3866938352584839, "learning_rate": 9.440301849128095e-06, "loss": 0.3475, "step": 4956 }, { "epoch": 0.7107829079437912, "grad_norm": 0.3438397943973541, "learning_rate": 9.439918263370643e-06, "loss": 0.3268, "step": 4957 }, { "epoch": 0.7109262976770864, "grad_norm": 0.3895823359489441, "learning_rate": 9.439534554012693e-06, "loss": 0.3393, "step": 4958 }, { "epoch": 0.7110696874103815, "grad_norm": 0.355823814868927, "learning_rate": 9.43915072106493e-06, "loss": 0.3106, "step": 4959 }, { "epoch": 0.7112130771436765, "grad_norm": 0.3616766929626465, "learning_rate": 9.438766764538042e-06, "loss": 0.3439, "step": 4960 }, { "epoch": 0.7113564668769716, "grad_norm": 0.32822540402412415, "learning_rate": 9.438382684442715e-06, "loss": 0.3478, "step": 4961 }, { "epoch": 0.7114998566102667, "grad_norm": 0.34591272473335266, "learning_rate": 9.437998480789641e-06, "loss": 0.317, "step": 4962 }, { "epoch": 0.7116432463435618, "grad_norm": 0.33795663714408875, "learning_rate": 9.437614153589517e-06, "loss": 0.3425, "step": 4963 }, { "epoch": 0.7117866360768569, "grad_norm": 0.3694150745868683, "learning_rate": 9.437229702853041e-06, "loss": 0.3348, "step": 4964 }, { "epoch": 0.711930025810152, "grad_norm": 0.3306317925453186, "learning_rate": 9.436845128590915e-06, "loss": 0.3392, "step": 4965 }, { "epoch": 0.712073415543447, "grad_norm": 0.353299617767334, "learning_rate": 9.436460430813848e-06, "loss": 0.3441, "step": 4966 }, { "epoch": 0.7122168052767421, "grad_norm": 0.3625064790248871, "learning_rate": 9.436075609532545e-06, "loss": 0.3424, "step": 4967 }, { "epoch": 0.7123601950100373, "grad_norm": 0.34309330582618713, "learning_rate": 9.435690664757721e-06, "loss": 0.3442, "step": 4968 }, { "epoch": 0.7125035847433324, "grad_norm": 0.32557401061058044, "learning_rate": 9.435305596500093e-06, "loss": 0.3161, "step": 4969 }, { "epoch": 0.7126469744766275, "grad_norm": 0.35303595662117004, "learning_rate": 9.43492040477038e-06, "loss": 0.3459, "step": 4970 }, { "epoch": 0.7127903642099226, "grad_norm": 0.3778327703475952, "learning_rate": 9.434535089579304e-06, "loss": 0.345, "step": 4971 }, { "epoch": 0.7129337539432177, "grad_norm": 0.34907373785972595, "learning_rate": 9.434149650937594e-06, "loss": 0.3554, "step": 4972 }, { "epoch": 0.7130771436765128, "grad_norm": 0.3683696985244751, "learning_rate": 9.43376408885598e-06, "loss": 0.3596, "step": 4973 }, { "epoch": 0.7132205334098078, "grad_norm": 0.3277769386768341, "learning_rate": 9.43337840334519e-06, "loss": 0.3301, "step": 4974 }, { "epoch": 0.7133639231431029, "grad_norm": 0.36894068121910095, "learning_rate": 9.432992594415968e-06, "loss": 0.3412, "step": 4975 }, { "epoch": 0.713507312876398, "grad_norm": 0.32747477293014526, "learning_rate": 9.432606662079051e-06, "loss": 0.3489, "step": 4976 }, { "epoch": 0.7136507026096931, "grad_norm": 0.32746586203575134, "learning_rate": 9.432220606345183e-06, "loss": 0.346, "step": 4977 }, { "epoch": 0.7137940923429883, "grad_norm": 0.38728174567222595, "learning_rate": 9.431834427225112e-06, "loss": 0.3428, "step": 4978 }, { "epoch": 0.7139374820762834, "grad_norm": 0.36668506264686584, "learning_rate": 9.431448124729588e-06, "loss": 0.3297, "step": 4979 }, { "epoch": 0.7140808718095785, "grad_norm": 0.32357364892959595, "learning_rate": 9.431061698869363e-06, "loss": 0.3396, "step": 4980 }, { "epoch": 0.7142242615428736, "grad_norm": 0.34654873609542847, "learning_rate": 9.430675149655198e-06, "loss": 0.35, "step": 4981 }, { "epoch": 0.7143676512761686, "grad_norm": 0.34673011302948, "learning_rate": 9.430288477097852e-06, "loss": 0.3352, "step": 4982 }, { "epoch": 0.7145110410094637, "grad_norm": 0.3340079188346863, "learning_rate": 9.429901681208091e-06, "loss": 0.3627, "step": 4983 }, { "epoch": 0.7146544307427588, "grad_norm": 0.3189244568347931, "learning_rate": 9.42951476199668e-06, "loss": 0.3134, "step": 4984 }, { "epoch": 0.7147978204760539, "grad_norm": 0.3604307472705841, "learning_rate": 9.429127719474393e-06, "loss": 0.3374, "step": 4985 }, { "epoch": 0.714941210209349, "grad_norm": 0.3420432507991791, "learning_rate": 9.428740553652005e-06, "loss": 0.3236, "step": 4986 }, { "epoch": 0.7150845999426441, "grad_norm": 0.3266895115375519, "learning_rate": 9.428353264540291e-06, "loss": 0.3457, "step": 4987 }, { "epoch": 0.7152279896759393, "grad_norm": 0.39773955941200256, "learning_rate": 9.427965852150034e-06, "loss": 0.3417, "step": 4988 }, { "epoch": 0.7153713794092343, "grad_norm": 0.3476099669933319, "learning_rate": 9.42757831649202e-06, "loss": 0.33, "step": 4989 }, { "epoch": 0.7155147691425294, "grad_norm": 0.3467300832271576, "learning_rate": 9.427190657577035e-06, "loss": 0.3409, "step": 4990 }, { "epoch": 0.7156581588758245, "grad_norm": 0.36942586302757263, "learning_rate": 9.426802875415874e-06, "loss": 0.3625, "step": 4991 }, { "epoch": 0.7158015486091196, "grad_norm": 0.3950454592704773, "learning_rate": 9.426414970019331e-06, "loss": 0.3549, "step": 4992 }, { "epoch": 0.7159449383424147, "grad_norm": 0.37905624508857727, "learning_rate": 9.426026941398203e-06, "loss": 0.351, "step": 4993 }, { "epoch": 0.7160883280757098, "grad_norm": 0.38301628828048706, "learning_rate": 9.425638789563294e-06, "loss": 0.354, "step": 4994 }, { "epoch": 0.7162317178090049, "grad_norm": 0.3987453281879425, "learning_rate": 9.425250514525408e-06, "loss": 0.3349, "step": 4995 }, { "epoch": 0.7163751075422999, "grad_norm": 0.30713313817977905, "learning_rate": 9.424862116295357e-06, "loss": 0.3275, "step": 4996 }, { "epoch": 0.716518497275595, "grad_norm": 0.38161325454711914, "learning_rate": 9.42447359488395e-06, "loss": 0.3554, "step": 4997 }, { "epoch": 0.7166618870088901, "grad_norm": 0.34613797068595886, "learning_rate": 9.424084950302004e-06, "loss": 0.3472, "step": 4998 }, { "epoch": 0.7168052767421853, "grad_norm": 0.32308071851730347, "learning_rate": 9.423696182560339e-06, "loss": 0.3308, "step": 4999 }, { "epoch": 0.7169486664754804, "grad_norm": 0.33139047026634216, "learning_rate": 9.423307291669777e-06, "loss": 0.3498, "step": 5000 }, { "epoch": 0.7170920562087755, "grad_norm": 0.3298778235912323, "learning_rate": 9.422918277641144e-06, "loss": 0.3256, "step": 5001 }, { "epoch": 0.7172354459420706, "grad_norm": 0.338354229927063, "learning_rate": 9.422529140485272e-06, "loss": 0.3515, "step": 5002 }, { "epoch": 0.7173788356753656, "grad_norm": 0.3217373788356781, "learning_rate": 9.42213988021299e-06, "loss": 0.3373, "step": 5003 }, { "epoch": 0.7175222254086607, "grad_norm": 0.33899986743927, "learning_rate": 9.421750496835136e-06, "loss": 0.3233, "step": 5004 }, { "epoch": 0.7176656151419558, "grad_norm": 0.35043734312057495, "learning_rate": 9.421360990362552e-06, "loss": 0.3335, "step": 5005 }, { "epoch": 0.7178090048752509, "grad_norm": 0.33571138978004456, "learning_rate": 9.420971360806077e-06, "loss": 0.343, "step": 5006 }, { "epoch": 0.717952394608546, "grad_norm": 0.3537544310092926, "learning_rate": 9.420581608176562e-06, "loss": 0.3389, "step": 5007 }, { "epoch": 0.7180957843418411, "grad_norm": 0.33765488862991333, "learning_rate": 9.420191732484854e-06, "loss": 0.3209, "step": 5008 }, { "epoch": 0.7182391740751363, "grad_norm": 0.3398979902267456, "learning_rate": 9.419801733741806e-06, "loss": 0.3431, "step": 5009 }, { "epoch": 0.7183825638084314, "grad_norm": 0.30967575311660767, "learning_rate": 9.41941161195828e-06, "loss": 0.3261, "step": 5010 }, { "epoch": 0.7185259535417264, "grad_norm": 0.3176971971988678, "learning_rate": 9.419021367145132e-06, "loss": 0.3156, "step": 5011 }, { "epoch": 0.7186693432750215, "grad_norm": 0.3416244387626648, "learning_rate": 9.418630999313225e-06, "loss": 0.339, "step": 5012 }, { "epoch": 0.7188127330083166, "grad_norm": 0.3414463400840759, "learning_rate": 9.418240508473431e-06, "loss": 0.3598, "step": 5013 }, { "epoch": 0.7189561227416117, "grad_norm": 0.3068556785583496, "learning_rate": 9.417849894636615e-06, "loss": 0.3324, "step": 5014 }, { "epoch": 0.7190995124749068, "grad_norm": 0.3204443156719208, "learning_rate": 9.417459157813655e-06, "loss": 0.3504, "step": 5015 }, { "epoch": 0.7192429022082019, "grad_norm": 0.3194998502731323, "learning_rate": 9.417068298015428e-06, "loss": 0.3528, "step": 5016 }, { "epoch": 0.719386291941497, "grad_norm": 0.3303152918815613, "learning_rate": 9.416677315252813e-06, "loss": 0.3242, "step": 5017 }, { "epoch": 0.719529681674792, "grad_norm": 0.37648648023605347, "learning_rate": 9.416286209536698e-06, "loss": 0.325, "step": 5018 }, { "epoch": 0.7196730714080872, "grad_norm": 0.3171222507953644, "learning_rate": 9.415894980877966e-06, "loss": 0.3394, "step": 5019 }, { "epoch": 0.7198164611413823, "grad_norm": 0.3430967628955841, "learning_rate": 9.415503629287512e-06, "loss": 0.337, "step": 5020 }, { "epoch": 0.7199598508746774, "grad_norm": 0.3488004505634308, "learning_rate": 9.41511215477623e-06, "loss": 0.3303, "step": 5021 }, { "epoch": 0.7201032406079725, "grad_norm": 0.3645239770412445, "learning_rate": 9.414720557355014e-06, "loss": 0.3391, "step": 5022 }, { "epoch": 0.7202466303412676, "grad_norm": 0.3327391445636749, "learning_rate": 9.414328837034773e-06, "loss": 0.3205, "step": 5023 }, { "epoch": 0.7203900200745627, "grad_norm": 0.3135761022567749, "learning_rate": 9.413936993826405e-06, "loss": 0.3465, "step": 5024 }, { "epoch": 0.7205334098078577, "grad_norm": 0.31729400157928467, "learning_rate": 9.413545027740822e-06, "loss": 0.3794, "step": 5025 }, { "epoch": 0.7206767995411528, "grad_norm": 0.3114728331565857, "learning_rate": 9.413152938788935e-06, "loss": 0.3463, "step": 5026 }, { "epoch": 0.7208201892744479, "grad_norm": 0.3491017520427704, "learning_rate": 9.412760726981658e-06, "loss": 0.3457, "step": 5027 }, { "epoch": 0.720963579007743, "grad_norm": 0.297788143157959, "learning_rate": 9.412368392329912e-06, "loss": 0.3285, "step": 5028 }, { "epoch": 0.7211069687410382, "grad_norm": 0.31251901388168335, "learning_rate": 9.411975934844615e-06, "loss": 0.3233, "step": 5029 }, { "epoch": 0.7212503584743333, "grad_norm": 0.3300023376941681, "learning_rate": 9.411583354536696e-06, "loss": 0.3725, "step": 5030 }, { "epoch": 0.7213937482076284, "grad_norm": 0.31343260407447815, "learning_rate": 9.411190651417083e-06, "loss": 0.3405, "step": 5031 }, { "epoch": 0.7215371379409234, "grad_norm": 0.3591574728488922, "learning_rate": 9.410797825496708e-06, "loss": 0.3316, "step": 5032 }, { "epoch": 0.7216805276742185, "grad_norm": 0.3241869807243347, "learning_rate": 9.410404876786506e-06, "loss": 0.3299, "step": 5033 }, { "epoch": 0.7218239174075136, "grad_norm": 0.3236161470413208, "learning_rate": 9.410011805297416e-06, "loss": 0.3263, "step": 5034 }, { "epoch": 0.7219673071408087, "grad_norm": 0.35405269265174866, "learning_rate": 9.409618611040383e-06, "loss": 0.3558, "step": 5035 }, { "epoch": 0.7221106968741038, "grad_norm": 0.3593274652957916, "learning_rate": 9.40922529402635e-06, "loss": 0.3362, "step": 5036 }, { "epoch": 0.7222540866073989, "grad_norm": 0.342213898897171, "learning_rate": 9.40883185426627e-06, "loss": 0.3524, "step": 5037 }, { "epoch": 0.722397476340694, "grad_norm": 0.3279518485069275, "learning_rate": 9.408438291771088e-06, "loss": 0.3233, "step": 5038 }, { "epoch": 0.7225408660739892, "grad_norm": 0.3303312659263611, "learning_rate": 9.40804460655177e-06, "loss": 0.3262, "step": 5039 }, { "epoch": 0.7226842558072842, "grad_norm": 0.34511280059814453, "learning_rate": 9.40765079861927e-06, "loss": 0.3509, "step": 5040 }, { "epoch": 0.7228276455405793, "grad_norm": 0.33510807156562805, "learning_rate": 9.407256867984551e-06, "loss": 0.3542, "step": 5041 }, { "epoch": 0.7229710352738744, "grad_norm": 0.34187692403793335, "learning_rate": 9.406862814658581e-06, "loss": 0.3411, "step": 5042 }, { "epoch": 0.7231144250071695, "grad_norm": 0.33110618591308594, "learning_rate": 9.40646863865233e-06, "loss": 0.3312, "step": 5043 }, { "epoch": 0.7232578147404646, "grad_norm": 0.33757948875427246, "learning_rate": 9.40607433997677e-06, "loss": 0.3173, "step": 5044 }, { "epoch": 0.7234012044737597, "grad_norm": 0.3479660451412201, "learning_rate": 9.405679918642877e-06, "loss": 0.3439, "step": 5045 }, { "epoch": 0.7235445942070547, "grad_norm": 0.32359760999679565, "learning_rate": 9.405285374661633e-06, "loss": 0.3242, "step": 5046 }, { "epoch": 0.7236879839403498, "grad_norm": 0.3203551769256592, "learning_rate": 9.40489070804402e-06, "loss": 0.3271, "step": 5047 }, { "epoch": 0.7238313736736449, "grad_norm": 0.31363075971603394, "learning_rate": 9.404495918801028e-06, "loss": 0.3191, "step": 5048 }, { "epoch": 0.7239747634069401, "grad_norm": 0.34181392192840576, "learning_rate": 9.404101006943644e-06, "loss": 0.3332, "step": 5049 }, { "epoch": 0.7241181531402352, "grad_norm": 0.3152591586112976, "learning_rate": 9.403705972482862e-06, "loss": 0.3647, "step": 5050 }, { "epoch": 0.7242615428735303, "grad_norm": 0.30786004662513733, "learning_rate": 9.40331081542968e-06, "loss": 0.3434, "step": 5051 }, { "epoch": 0.7244049326068254, "grad_norm": 0.34975123405456543, "learning_rate": 9.4029155357951e-06, "loss": 0.3443, "step": 5052 }, { "epoch": 0.7245483223401205, "grad_norm": 0.29867804050445557, "learning_rate": 9.402520133590123e-06, "loss": 0.3247, "step": 5053 }, { "epoch": 0.7246917120734155, "grad_norm": 0.31950464844703674, "learning_rate": 9.402124608825758e-06, "loss": 0.3388, "step": 5054 }, { "epoch": 0.7248351018067106, "grad_norm": 0.3073100447654724, "learning_rate": 9.401728961513014e-06, "loss": 0.3209, "step": 5055 }, { "epoch": 0.7249784915400057, "grad_norm": 0.32195138931274414, "learning_rate": 9.40133319166291e-06, "loss": 0.337, "step": 5056 }, { "epoch": 0.7251218812733008, "grad_norm": 0.29510724544525146, "learning_rate": 9.400937299286459e-06, "loss": 0.3303, "step": 5057 }, { "epoch": 0.7252652710065959, "grad_norm": 0.332945317029953, "learning_rate": 9.400541284394683e-06, "loss": 0.3575, "step": 5058 }, { "epoch": 0.7254086607398911, "grad_norm": 0.29755476117134094, "learning_rate": 9.400145146998607e-06, "loss": 0.3228, "step": 5059 }, { "epoch": 0.7255520504731862, "grad_norm": 0.32432788610458374, "learning_rate": 9.39974888710926e-06, "loss": 0.3537, "step": 5060 }, { "epoch": 0.7256954402064812, "grad_norm": 0.31562259793281555, "learning_rate": 9.39935250473767e-06, "loss": 0.3441, "step": 5061 }, { "epoch": 0.7258388299397763, "grad_norm": 0.34560486674308777, "learning_rate": 9.398955999894877e-06, "loss": 0.3679, "step": 5062 }, { "epoch": 0.7259822196730714, "grad_norm": 0.29449576139450073, "learning_rate": 9.398559372591914e-06, "loss": 0.3207, "step": 5063 }, { "epoch": 0.7261256094063665, "grad_norm": 0.3314898610115051, "learning_rate": 9.398162622839823e-06, "loss": 0.3332, "step": 5064 }, { "epoch": 0.7262689991396616, "grad_norm": 0.3454066812992096, "learning_rate": 9.397765750649652e-06, "loss": 0.3306, "step": 5065 }, { "epoch": 0.7264123888729567, "grad_norm": 0.3303155303001404, "learning_rate": 9.397368756032445e-06, "loss": 0.3602, "step": 5066 }, { "epoch": 0.7265557786062518, "grad_norm": 0.3194107711315155, "learning_rate": 9.396971638999259e-06, "loss": 0.3393, "step": 5067 }, { "epoch": 0.7266991683395468, "grad_norm": 0.3144644498825073, "learning_rate": 9.396574399561147e-06, "loss": 0.3308, "step": 5068 }, { "epoch": 0.726842558072842, "grad_norm": 0.30045953392982483, "learning_rate": 9.396177037729164e-06, "loss": 0.3323, "step": 5069 }, { "epoch": 0.7269859478061371, "grad_norm": 0.30040183663368225, "learning_rate": 9.395779553514377e-06, "loss": 0.3374, "step": 5070 }, { "epoch": 0.7271293375394322, "grad_norm": 0.3265213072299957, "learning_rate": 9.395381946927849e-06, "loss": 0.3238, "step": 5071 }, { "epoch": 0.7272727272727273, "grad_norm": 0.2920713722705841, "learning_rate": 9.394984217980648e-06, "loss": 0.3439, "step": 5072 }, { "epoch": 0.7274161170060224, "grad_norm": 0.3354981243610382, "learning_rate": 9.394586366683846e-06, "loss": 0.3486, "step": 5073 }, { "epoch": 0.7275595067393175, "grad_norm": 0.2977798879146576, "learning_rate": 9.394188393048522e-06, "loss": 0.3269, "step": 5074 }, { "epoch": 0.7277028964726125, "grad_norm": 0.32408517599105835, "learning_rate": 9.393790297085752e-06, "loss": 0.3478, "step": 5075 }, { "epoch": 0.7278462862059076, "grad_norm": 0.329085111618042, "learning_rate": 9.393392078806621e-06, "loss": 0.3329, "step": 5076 }, { "epoch": 0.7279896759392027, "grad_norm": 0.29348185658454895, "learning_rate": 9.39299373822221e-06, "loss": 0.3292, "step": 5077 }, { "epoch": 0.7281330656724978, "grad_norm": 0.2909221351146698, "learning_rate": 9.392595275343611e-06, "loss": 0.3328, "step": 5078 }, { "epoch": 0.728276455405793, "grad_norm": 0.34935855865478516, "learning_rate": 9.392196690181917e-06, "loss": 0.3356, "step": 5079 }, { "epoch": 0.7284198451390881, "grad_norm": 0.35524436831474304, "learning_rate": 9.391797982748226e-06, "loss": 0.3391, "step": 5080 }, { "epoch": 0.7285632348723832, "grad_norm": 0.306780606508255, "learning_rate": 9.391399153053633e-06, "loss": 0.3463, "step": 5081 }, { "epoch": 0.7287066246056783, "grad_norm": 0.34221330285072327, "learning_rate": 9.391000201109242e-06, "loss": 0.338, "step": 5082 }, { "epoch": 0.7288500143389733, "grad_norm": 0.409767210483551, "learning_rate": 9.39060112692616e-06, "loss": 0.357, "step": 5083 }, { "epoch": 0.7289934040722684, "grad_norm": 0.3244655132293701, "learning_rate": 9.390201930515498e-06, "loss": 0.3301, "step": 5084 }, { "epoch": 0.7291367938055635, "grad_norm": 0.3224220871925354, "learning_rate": 9.389802611888368e-06, "loss": 0.3381, "step": 5085 }, { "epoch": 0.7292801835388586, "grad_norm": 0.33362647891044617, "learning_rate": 9.389403171055885e-06, "loss": 0.3162, "step": 5086 }, { "epoch": 0.7294235732721537, "grad_norm": 0.36691299080848694, "learning_rate": 9.38900360802917e-06, "loss": 0.337, "step": 5087 }, { "epoch": 0.7295669630054488, "grad_norm": 0.3245421051979065, "learning_rate": 9.388603922819347e-06, "loss": 0.3577, "step": 5088 }, { "epoch": 0.729710352738744, "grad_norm": 0.3269575536251068, "learning_rate": 9.38820411543754e-06, "loss": 0.3322, "step": 5089 }, { "epoch": 0.729853742472039, "grad_norm": 0.33841681480407715, "learning_rate": 9.387804185894881e-06, "loss": 0.3357, "step": 5090 }, { "epoch": 0.7299971322053341, "grad_norm": 0.3392511308193207, "learning_rate": 9.387404134202504e-06, "loss": 0.3287, "step": 5091 }, { "epoch": 0.7301405219386292, "grad_norm": 0.4252391755580902, "learning_rate": 9.387003960371546e-06, "loss": 0.3488, "step": 5092 }, { "epoch": 0.7302839116719243, "grad_norm": 0.33841150999069214, "learning_rate": 9.386603664413146e-06, "loss": 0.3309, "step": 5093 }, { "epoch": 0.7304273014052194, "grad_norm": 0.365295946598053, "learning_rate": 9.386203246338445e-06, "loss": 0.3172, "step": 5094 }, { "epoch": 0.7305706911385145, "grad_norm": 0.3492867350578308, "learning_rate": 9.385802706158596e-06, "loss": 0.3241, "step": 5095 }, { "epoch": 0.7307140808718096, "grad_norm": 0.3071993291378021, "learning_rate": 9.385402043884745e-06, "loss": 0.3112, "step": 5096 }, { "epoch": 0.7308574706051046, "grad_norm": 0.37071943283081055, "learning_rate": 9.385001259528047e-06, "loss": 0.3224, "step": 5097 }, { "epoch": 0.7310008603383997, "grad_norm": 0.3445199131965637, "learning_rate": 9.38460035309966e-06, "loss": 0.3544, "step": 5098 }, { "epoch": 0.7311442500716948, "grad_norm": 0.3305775821208954, "learning_rate": 9.384199324610743e-06, "loss": 0.3501, "step": 5099 }, { "epoch": 0.73128763980499, "grad_norm": 0.4071780741214752, "learning_rate": 9.383798174072461e-06, "loss": 0.3574, "step": 5100 }, { "epoch": 0.7314310295382851, "grad_norm": 0.3301633894443512, "learning_rate": 9.383396901495983e-06, "loss": 0.333, "step": 5101 }, { "epoch": 0.7315744192715802, "grad_norm": 0.3266198933124542, "learning_rate": 9.382995506892476e-06, "loss": 0.3319, "step": 5102 }, { "epoch": 0.7317178090048753, "grad_norm": 0.3632924556732178, "learning_rate": 9.382593990273118e-06, "loss": 0.3559, "step": 5103 }, { "epoch": 0.7318611987381703, "grad_norm": 0.3519163727760315, "learning_rate": 9.382192351649083e-06, "loss": 0.3226, "step": 5104 }, { "epoch": 0.7320045884714654, "grad_norm": 0.31759554147720337, "learning_rate": 9.381790591031557e-06, "loss": 0.3287, "step": 5105 }, { "epoch": 0.7321479782047605, "grad_norm": 0.3139306902885437, "learning_rate": 9.38138870843172e-06, "loss": 0.3348, "step": 5106 }, { "epoch": 0.7322913679380556, "grad_norm": 0.3540799915790558, "learning_rate": 9.380986703860761e-06, "loss": 0.3165, "step": 5107 }, { "epoch": 0.7324347576713507, "grad_norm": 0.32188868522644043, "learning_rate": 9.380584577329872e-06, "loss": 0.329, "step": 5108 }, { "epoch": 0.7325781474046458, "grad_norm": 0.3345193564891815, "learning_rate": 9.380182328850246e-06, "loss": 0.3292, "step": 5109 }, { "epoch": 0.732721537137941, "grad_norm": 0.3324742913246155, "learning_rate": 9.379779958433081e-06, "loss": 0.352, "step": 5110 }, { "epoch": 0.732864926871236, "grad_norm": 0.33626511693000793, "learning_rate": 9.379377466089582e-06, "loss": 0.3365, "step": 5111 }, { "epoch": 0.7330083166045311, "grad_norm": 0.36383143067359924, "learning_rate": 9.37897485183095e-06, "loss": 0.3247, "step": 5112 }, { "epoch": 0.7331517063378262, "grad_norm": 0.31404897570610046, "learning_rate": 9.378572115668394e-06, "loss": 0.3265, "step": 5113 }, { "epoch": 0.7332950960711213, "grad_norm": 0.39668673276901245, "learning_rate": 9.378169257613126e-06, "loss": 0.3474, "step": 5114 }, { "epoch": 0.7334384858044164, "grad_norm": 0.33124473690986633, "learning_rate": 9.37776627767636e-06, "loss": 0.3467, "step": 5115 }, { "epoch": 0.7335818755377115, "grad_norm": 0.3594803214073181, "learning_rate": 9.377363175869317e-06, "loss": 0.3527, "step": 5116 }, { "epoch": 0.7337252652710066, "grad_norm": 0.32358425855636597, "learning_rate": 9.376959952203214e-06, "loss": 0.3252, "step": 5117 }, { "epoch": 0.7338686550043017, "grad_norm": 0.4008440375328064, "learning_rate": 9.37655660668928e-06, "loss": 0.3414, "step": 5118 }, { "epoch": 0.7340120447375967, "grad_norm": 0.35641127824783325, "learning_rate": 9.376153139338744e-06, "loss": 0.3222, "step": 5119 }, { "epoch": 0.7341554344708919, "grad_norm": 0.341458261013031, "learning_rate": 9.375749550162836e-06, "loss": 0.341, "step": 5120 }, { "epoch": 0.734298824204187, "grad_norm": 0.40481704473495483, "learning_rate": 9.37534583917279e-06, "loss": 0.3369, "step": 5121 }, { "epoch": 0.7344422139374821, "grad_norm": 0.35134604573249817, "learning_rate": 9.374942006379848e-06, "loss": 0.325, "step": 5122 }, { "epoch": 0.7345856036707772, "grad_norm": 0.3166784346103668, "learning_rate": 9.37453805179525e-06, "loss": 0.335, "step": 5123 }, { "epoch": 0.7347289934040723, "grad_norm": 0.3164478540420532, "learning_rate": 9.374133975430243e-06, "loss": 0.3209, "step": 5124 }, { "epoch": 0.7348723831373674, "grad_norm": 0.33993732929229736, "learning_rate": 9.373729777296072e-06, "loss": 0.3484, "step": 5125 }, { "epoch": 0.7350157728706624, "grad_norm": 0.37278521060943604, "learning_rate": 9.373325457403994e-06, "loss": 0.3323, "step": 5126 }, { "epoch": 0.7351591626039575, "grad_norm": 0.30534833669662476, "learning_rate": 9.372921015765262e-06, "loss": 0.3323, "step": 5127 }, { "epoch": 0.7353025523372526, "grad_norm": 0.3412076532840729, "learning_rate": 9.372516452391137e-06, "loss": 0.3514, "step": 5128 }, { "epoch": 0.7354459420705477, "grad_norm": 0.3501673638820648, "learning_rate": 9.372111767292877e-06, "loss": 0.3231, "step": 5129 }, { "epoch": 0.7355893318038429, "grad_norm": 0.3680213689804077, "learning_rate": 9.371706960481755e-06, "loss": 0.354, "step": 5130 }, { "epoch": 0.735732721537138, "grad_norm": 0.35070300102233887, "learning_rate": 9.371302031969034e-06, "loss": 0.3284, "step": 5131 }, { "epoch": 0.7358761112704331, "grad_norm": 0.34975162148475647, "learning_rate": 9.370896981765988e-06, "loss": 0.3168, "step": 5132 }, { "epoch": 0.7360195010037281, "grad_norm": 0.3604351580142975, "learning_rate": 9.370491809883895e-06, "loss": 0.3603, "step": 5133 }, { "epoch": 0.7361628907370232, "grad_norm": 0.31862741708755493, "learning_rate": 9.370086516334034e-06, "loss": 0.3455, "step": 5134 }, { "epoch": 0.7363062804703183, "grad_norm": 0.3044281601905823, "learning_rate": 9.369681101127686e-06, "loss": 0.3546, "step": 5135 }, { "epoch": 0.7364496702036134, "grad_norm": 0.34244632720947266, "learning_rate": 9.369275564276136e-06, "loss": 0.3399, "step": 5136 }, { "epoch": 0.7365930599369085, "grad_norm": 0.34207454323768616, "learning_rate": 9.368869905790679e-06, "loss": 0.3245, "step": 5137 }, { "epoch": 0.7367364496702036, "grad_norm": 0.3027189075946808, "learning_rate": 9.368464125682601e-06, "loss": 0.3483, "step": 5138 }, { "epoch": 0.7368798394034987, "grad_norm": 0.3142508864402771, "learning_rate": 9.368058223963205e-06, "loss": 0.3313, "step": 5139 }, { "epoch": 0.7370232291367939, "grad_norm": 0.34937506914138794, "learning_rate": 9.367652200643786e-06, "loss": 0.3571, "step": 5140 }, { "epoch": 0.7371666188700889, "grad_norm": 0.34310075640678406, "learning_rate": 9.367246055735649e-06, "loss": 0.3366, "step": 5141 }, { "epoch": 0.737310008603384, "grad_norm": 0.364205002784729, "learning_rate": 9.366839789250098e-06, "loss": 0.3392, "step": 5142 }, { "epoch": 0.7374533983366791, "grad_norm": 0.31127581000328064, "learning_rate": 9.366433401198446e-06, "loss": 0.3329, "step": 5143 }, { "epoch": 0.7375967880699742, "grad_norm": 0.3561396598815918, "learning_rate": 9.366026891592004e-06, "loss": 0.3537, "step": 5144 }, { "epoch": 0.7377401778032693, "grad_norm": 0.36825382709503174, "learning_rate": 9.36562026044209e-06, "loss": 0.3543, "step": 5145 }, { "epoch": 0.7378835675365644, "grad_norm": 0.32182973623275757, "learning_rate": 9.365213507760025e-06, "loss": 0.3514, "step": 5146 }, { "epoch": 0.7380269572698595, "grad_norm": 0.32710009813308716, "learning_rate": 9.364806633557128e-06, "loss": 0.3469, "step": 5147 }, { "epoch": 0.7381703470031545, "grad_norm": 0.3230937719345093, "learning_rate": 9.36439963784473e-06, "loss": 0.3536, "step": 5148 }, { "epoch": 0.7383137367364496, "grad_norm": 0.34285953640937805, "learning_rate": 9.363992520634161e-06, "loss": 0.3294, "step": 5149 }, { "epoch": 0.7384571264697448, "grad_norm": 0.36181676387786865, "learning_rate": 9.363585281936753e-06, "loss": 0.3573, "step": 5150 }, { "epoch": 0.7386005162030399, "grad_norm": 0.30384212732315063, "learning_rate": 9.363177921763843e-06, "loss": 0.3188, "step": 5151 }, { "epoch": 0.738743905936335, "grad_norm": 0.3729681372642517, "learning_rate": 9.36277044012677e-06, "loss": 0.3292, "step": 5152 }, { "epoch": 0.7388872956696301, "grad_norm": 0.33605721592903137, "learning_rate": 9.36236283703688e-06, "loss": 0.3493, "step": 5153 }, { "epoch": 0.7390306854029252, "grad_norm": 0.3449658751487732, "learning_rate": 9.361955112505521e-06, "loss": 0.3437, "step": 5154 }, { "epoch": 0.7391740751362202, "grad_norm": 0.3239240050315857, "learning_rate": 9.36154726654404e-06, "loss": 0.3441, "step": 5155 }, { "epoch": 0.7393174648695153, "grad_norm": 0.3537641167640686, "learning_rate": 9.361139299163793e-06, "loss": 0.3502, "step": 5156 }, { "epoch": 0.7394608546028104, "grad_norm": 0.38332289457321167, "learning_rate": 9.360731210376135e-06, "loss": 0.3207, "step": 5157 }, { "epoch": 0.7396042443361055, "grad_norm": 0.3643508553504944, "learning_rate": 9.360323000192431e-06, "loss": 0.3145, "step": 5158 }, { "epoch": 0.7397476340694006, "grad_norm": 0.3420413136482239, "learning_rate": 9.359914668624039e-06, "loss": 0.3519, "step": 5159 }, { "epoch": 0.7398910238026958, "grad_norm": 0.3637307286262512, "learning_rate": 9.359506215682331e-06, "loss": 0.3418, "step": 5160 }, { "epoch": 0.7400344135359909, "grad_norm": 0.331298291683197, "learning_rate": 9.359097641378676e-06, "loss": 0.3484, "step": 5161 }, { "epoch": 0.740177803269286, "grad_norm": 0.31905215978622437, "learning_rate": 9.358688945724449e-06, "loss": 0.3295, "step": 5162 }, { "epoch": 0.740321193002581, "grad_norm": 0.34807977080345154, "learning_rate": 9.358280128731026e-06, "loss": 0.3307, "step": 5163 }, { "epoch": 0.7404645827358761, "grad_norm": 0.3413431644439697, "learning_rate": 9.357871190409786e-06, "loss": 0.3316, "step": 5164 }, { "epoch": 0.7406079724691712, "grad_norm": 0.33184075355529785, "learning_rate": 9.35746213077212e-06, "loss": 0.3408, "step": 5165 }, { "epoch": 0.7407513622024663, "grad_norm": 0.32197123765945435, "learning_rate": 9.357052949829409e-06, "loss": 0.3519, "step": 5166 }, { "epoch": 0.7408947519357614, "grad_norm": 0.34516236186027527, "learning_rate": 9.356643647593047e-06, "loss": 0.3512, "step": 5167 }, { "epoch": 0.7410381416690565, "grad_norm": 0.3073960244655609, "learning_rate": 9.356234224074426e-06, "loss": 0.3208, "step": 5168 }, { "epoch": 0.7411815314023515, "grad_norm": 0.3085488975048065, "learning_rate": 9.355824679284947e-06, "loss": 0.3427, "step": 5169 }, { "epoch": 0.7413249211356467, "grad_norm": 0.29845690727233887, "learning_rate": 9.35541501323601e-06, "loss": 0.3341, "step": 5170 }, { "epoch": 0.7414683108689418, "grad_norm": 0.3523816764354706, "learning_rate": 9.355005225939018e-06, "loss": 0.349, "step": 5171 }, { "epoch": 0.7416117006022369, "grad_norm": 0.3137089014053345, "learning_rate": 9.35459531740538e-06, "loss": 0.3356, "step": 5172 }, { "epoch": 0.741755090335532, "grad_norm": 0.31132298707962036, "learning_rate": 9.354185287646505e-06, "loss": 0.3461, "step": 5173 }, { "epoch": 0.7418984800688271, "grad_norm": 0.32930445671081543, "learning_rate": 9.353775136673813e-06, "loss": 0.3532, "step": 5174 }, { "epoch": 0.7420418698021222, "grad_norm": 0.322672575712204, "learning_rate": 9.353364864498715e-06, "loss": 0.3737, "step": 5175 }, { "epoch": 0.7421852595354173, "grad_norm": 0.3244020938873291, "learning_rate": 9.352954471132638e-06, "loss": 0.3337, "step": 5176 }, { "epoch": 0.7423286492687123, "grad_norm": 0.33341625332832336, "learning_rate": 9.352543956587006e-06, "loss": 0.3431, "step": 5177 }, { "epoch": 0.7424720390020074, "grad_norm": 0.30102699995040894, "learning_rate": 9.352133320873243e-06, "loss": 0.3409, "step": 5178 }, { "epoch": 0.7426154287353025, "grad_norm": 0.3173570930957794, "learning_rate": 9.351722564002784e-06, "loss": 0.345, "step": 5179 }, { "epoch": 0.7427588184685977, "grad_norm": 0.3296526372432709, "learning_rate": 9.351311685987064e-06, "loss": 0.3257, "step": 5180 }, { "epoch": 0.7429022082018928, "grad_norm": 0.30910468101501465, "learning_rate": 9.35090068683752e-06, "loss": 0.3382, "step": 5181 }, { "epoch": 0.7430455979351879, "grad_norm": 0.3247867822647095, "learning_rate": 9.350489566565593e-06, "loss": 0.3243, "step": 5182 }, { "epoch": 0.743188987668483, "grad_norm": 0.30293864011764526, "learning_rate": 9.350078325182728e-06, "loss": 0.3337, "step": 5183 }, { "epoch": 0.743332377401778, "grad_norm": 0.3074156939983368, "learning_rate": 9.349666962700377e-06, "loss": 0.3241, "step": 5184 }, { "epoch": 0.7434757671350731, "grad_norm": 0.3129982054233551, "learning_rate": 9.349255479129986e-06, "loss": 0.3335, "step": 5185 }, { "epoch": 0.7436191568683682, "grad_norm": 0.33433759212493896, "learning_rate": 9.348843874483014e-06, "loss": 0.3325, "step": 5186 }, { "epoch": 0.7437625466016633, "grad_norm": 0.33038613200187683, "learning_rate": 9.348432148770918e-06, "loss": 0.3393, "step": 5187 }, { "epoch": 0.7439059363349584, "grad_norm": 0.30355921387672424, "learning_rate": 9.348020302005161e-06, "loss": 0.3327, "step": 5188 }, { "epoch": 0.7440493260682535, "grad_norm": 0.3276514410972595, "learning_rate": 9.347608334197207e-06, "loss": 0.3349, "step": 5189 }, { "epoch": 0.7441927158015486, "grad_norm": 0.32322368025779724, "learning_rate": 9.347196245358524e-06, "loss": 0.3801, "step": 5190 }, { "epoch": 0.7443361055348438, "grad_norm": 0.31272900104522705, "learning_rate": 9.346784035500585e-06, "loss": 0.3137, "step": 5191 }, { "epoch": 0.7444794952681388, "grad_norm": 0.3171924948692322, "learning_rate": 9.346371704634865e-06, "loss": 0.3308, "step": 5192 }, { "epoch": 0.7446228850014339, "grad_norm": 0.34841805696487427, "learning_rate": 9.345959252772844e-06, "loss": 0.3556, "step": 5193 }, { "epoch": 0.744766274734729, "grad_norm": 0.2867702841758728, "learning_rate": 9.345546679926001e-06, "loss": 0.3378, "step": 5194 }, { "epoch": 0.7449096644680241, "grad_norm": 0.31704220175743103, "learning_rate": 9.345133986105825e-06, "loss": 0.3317, "step": 5195 }, { "epoch": 0.7450530542013192, "grad_norm": 0.322242796421051, "learning_rate": 9.3447211713238e-06, "loss": 0.3558, "step": 5196 }, { "epoch": 0.7451964439346143, "grad_norm": 0.3156258165836334, "learning_rate": 9.344308235591425e-06, "loss": 0.3499, "step": 5197 }, { "epoch": 0.7453398336679093, "grad_norm": 0.2877963185310364, "learning_rate": 9.343895178920187e-06, "loss": 0.3238, "step": 5198 }, { "epoch": 0.7454832234012044, "grad_norm": 0.3108045756816864, "learning_rate": 9.343482001321594e-06, "loss": 0.3248, "step": 5199 }, { "epoch": 0.7456266131344995, "grad_norm": 0.36500445008277893, "learning_rate": 9.34306870280714e-06, "loss": 0.3527, "step": 5200 }, { "epoch": 0.7457700028677947, "grad_norm": 0.31198960542678833, "learning_rate": 9.342655283388335e-06, "loss": 0.3278, "step": 5201 }, { "epoch": 0.7459133926010898, "grad_norm": 0.3269873857498169, "learning_rate": 9.342241743076689e-06, "loss": 0.3429, "step": 5202 }, { "epoch": 0.7460567823343849, "grad_norm": 0.3224961459636688, "learning_rate": 9.341828081883711e-06, "loss": 0.3193, "step": 5203 }, { "epoch": 0.74620017206768, "grad_norm": 0.31593406200408936, "learning_rate": 9.341414299820918e-06, "loss": 0.3313, "step": 5204 }, { "epoch": 0.746343561800975, "grad_norm": 0.28591394424438477, "learning_rate": 9.34100039689983e-06, "loss": 0.3403, "step": 5205 }, { "epoch": 0.7464869515342701, "grad_norm": 0.3271217942237854, "learning_rate": 9.340586373131967e-06, "loss": 0.3305, "step": 5206 }, { "epoch": 0.7466303412675652, "grad_norm": 0.31725457310676575, "learning_rate": 9.340172228528858e-06, "loss": 0.3435, "step": 5207 }, { "epoch": 0.7467737310008603, "grad_norm": 0.3035881221294403, "learning_rate": 9.339757963102032e-06, "loss": 0.3471, "step": 5208 }, { "epoch": 0.7469171207341554, "grad_norm": 0.32433316111564636, "learning_rate": 9.339343576863018e-06, "loss": 0.3598, "step": 5209 }, { "epoch": 0.7470605104674505, "grad_norm": 0.3347424864768982, "learning_rate": 9.338929069823354e-06, "loss": 0.349, "step": 5210 }, { "epoch": 0.7472039002007457, "grad_norm": 0.29550305008888245, "learning_rate": 9.338514441994579e-06, "loss": 0.3397, "step": 5211 }, { "epoch": 0.7473472899340408, "grad_norm": 0.35774192214012146, "learning_rate": 9.338099693388237e-06, "loss": 0.3616, "step": 5212 }, { "epoch": 0.7474906796673358, "grad_norm": 0.34493589401245117, "learning_rate": 9.337684824015874e-06, "loss": 0.3313, "step": 5213 }, { "epoch": 0.7476340694006309, "grad_norm": 0.2996898889541626, "learning_rate": 9.337269833889035e-06, "loss": 0.3308, "step": 5214 }, { "epoch": 0.747777459133926, "grad_norm": 0.30227652192115784, "learning_rate": 9.336854723019278e-06, "loss": 0.3244, "step": 5215 }, { "epoch": 0.7479208488672211, "grad_norm": 0.3271496593952179, "learning_rate": 9.336439491418154e-06, "loss": 0.3461, "step": 5216 }, { "epoch": 0.7480642386005162, "grad_norm": 0.33684101700782776, "learning_rate": 9.336024139097229e-06, "loss": 0.3389, "step": 5217 }, { "epoch": 0.7482076283338113, "grad_norm": 0.32322224974632263, "learning_rate": 9.335608666068059e-06, "loss": 0.3136, "step": 5218 }, { "epoch": 0.7483510180671064, "grad_norm": 0.3388175070285797, "learning_rate": 9.335193072342215e-06, "loss": 0.342, "step": 5219 }, { "epoch": 0.7484944078004014, "grad_norm": 0.30815792083740234, "learning_rate": 9.334777357931264e-06, "loss": 0.3234, "step": 5220 }, { "epoch": 0.7486377975336966, "grad_norm": 0.29564040899276733, "learning_rate": 9.33436152284678e-06, "loss": 0.3234, "step": 5221 }, { "epoch": 0.7487811872669917, "grad_norm": 0.2969842851161957, "learning_rate": 9.333945567100337e-06, "loss": 0.327, "step": 5222 }, { "epoch": 0.7489245770002868, "grad_norm": 0.3164084851741791, "learning_rate": 9.333529490703519e-06, "loss": 0.3557, "step": 5223 }, { "epoch": 0.7490679667335819, "grad_norm": 0.34173518419265747, "learning_rate": 9.333113293667904e-06, "loss": 0.347, "step": 5224 }, { "epoch": 0.749211356466877, "grad_norm": 0.2957831025123596, "learning_rate": 9.332696976005081e-06, "loss": 0.334, "step": 5225 }, { "epoch": 0.7493547462001721, "grad_norm": 0.33888718485832214, "learning_rate": 9.33228053772664e-06, "loss": 0.3462, "step": 5226 }, { "epoch": 0.7494981359334671, "grad_norm": 0.32006484270095825, "learning_rate": 9.331863978844172e-06, "loss": 0.3507, "step": 5227 }, { "epoch": 0.7496415256667622, "grad_norm": 0.2968797981739044, "learning_rate": 9.331447299369276e-06, "loss": 0.3331, "step": 5228 }, { "epoch": 0.7497849154000573, "grad_norm": 0.3098127841949463, "learning_rate": 9.331030499313549e-06, "loss": 0.3316, "step": 5229 }, { "epoch": 0.7499283051333524, "grad_norm": 0.33941811323165894, "learning_rate": 9.330613578688594e-06, "loss": 0.3536, "step": 5230 }, { "epoch": 0.7500716948666476, "grad_norm": 0.3263980746269226, "learning_rate": 9.33019653750602e-06, "loss": 0.344, "step": 5231 }, { "epoch": 0.7502150845999427, "grad_norm": 0.31331855058670044, "learning_rate": 9.329779375777437e-06, "loss": 0.3332, "step": 5232 }, { "epoch": 0.7503584743332378, "grad_norm": 0.3221499025821686, "learning_rate": 9.329362093514455e-06, "loss": 0.3294, "step": 5233 }, { "epoch": 0.7505018640665329, "grad_norm": 0.32770663499832153, "learning_rate": 9.328944690728692e-06, "loss": 0.3325, "step": 5234 }, { "epoch": 0.7506452537998279, "grad_norm": 0.3193347454071045, "learning_rate": 9.328527167431769e-06, "loss": 0.32, "step": 5235 }, { "epoch": 0.750788643533123, "grad_norm": 0.33463236689567566, "learning_rate": 9.328109523635306e-06, "loss": 0.3329, "step": 5236 }, { "epoch": 0.7509320332664181, "grad_norm": 0.2999224364757538, "learning_rate": 9.327691759350935e-06, "loss": 0.3666, "step": 5237 }, { "epoch": 0.7510754229997132, "grad_norm": 0.3157896101474762, "learning_rate": 9.32727387459028e-06, "loss": 0.3338, "step": 5238 }, { "epoch": 0.7512188127330083, "grad_norm": 0.34849071502685547, "learning_rate": 9.326855869364978e-06, "loss": 0.3479, "step": 5239 }, { "epoch": 0.7513622024663034, "grad_norm": 0.3047585189342499, "learning_rate": 9.326437743686665e-06, "loss": 0.3152, "step": 5240 }, { "epoch": 0.7515055921995986, "grad_norm": 0.2888111174106598, "learning_rate": 9.32601949756698e-06, "loss": 0.3358, "step": 5241 }, { "epoch": 0.7516489819328936, "grad_norm": 0.33771488070487976, "learning_rate": 9.325601131017566e-06, "loss": 0.3384, "step": 5242 }, { "epoch": 0.7517923716661887, "grad_norm": 0.35772326588630676, "learning_rate": 9.325182644050071e-06, "loss": 0.3443, "step": 5243 }, { "epoch": 0.7519357613994838, "grad_norm": 0.33818140625953674, "learning_rate": 9.324764036676146e-06, "loss": 0.3523, "step": 5244 }, { "epoch": 0.7520791511327789, "grad_norm": 0.3465818464756012, "learning_rate": 9.32434530890744e-06, "loss": 0.3489, "step": 5245 }, { "epoch": 0.752222540866074, "grad_norm": 0.33117055892944336, "learning_rate": 9.323926460755615e-06, "loss": 0.3414, "step": 5246 }, { "epoch": 0.7523659305993691, "grad_norm": 0.3563920259475708, "learning_rate": 9.323507492232329e-06, "loss": 0.3612, "step": 5247 }, { "epoch": 0.7525093203326642, "grad_norm": 0.3104873597621918, "learning_rate": 9.323088403349244e-06, "loss": 0.3486, "step": 5248 }, { "epoch": 0.7526527100659592, "grad_norm": 0.3233811855316162, "learning_rate": 9.322669194118029e-06, "loss": 0.3192, "step": 5249 }, { "epoch": 0.7527960997992543, "grad_norm": 0.31205177307128906, "learning_rate": 9.322249864550352e-06, "loss": 0.3329, "step": 5250 }, { "epoch": 0.7529394895325495, "grad_norm": 0.31855571269989014, "learning_rate": 9.321830414657888e-06, "loss": 0.3313, "step": 5251 }, { "epoch": 0.7530828792658446, "grad_norm": 0.31748566031455994, "learning_rate": 9.321410844452315e-06, "loss": 0.3579, "step": 5252 }, { "epoch": 0.7532262689991397, "grad_norm": 0.3429695963859558, "learning_rate": 9.32099115394531e-06, "loss": 0.3556, "step": 5253 }, { "epoch": 0.7533696587324348, "grad_norm": 0.33950158953666687, "learning_rate": 9.320571343148559e-06, "loss": 0.3378, "step": 5254 }, { "epoch": 0.7535130484657299, "grad_norm": 0.337862104177475, "learning_rate": 9.320151412073748e-06, "loss": 0.3491, "step": 5255 }, { "epoch": 0.753656438199025, "grad_norm": 0.3172016143798828, "learning_rate": 9.319731360732567e-06, "loss": 0.337, "step": 5256 }, { "epoch": 0.75379982793232, "grad_norm": 0.33314570784568787, "learning_rate": 9.31931118913671e-06, "loss": 0.3415, "step": 5257 }, { "epoch": 0.7539432176656151, "grad_norm": 0.29896900057792664, "learning_rate": 9.318890897297874e-06, "loss": 0.3069, "step": 5258 }, { "epoch": 0.7540866073989102, "grad_norm": 0.30674734711647034, "learning_rate": 9.318470485227757e-06, "loss": 0.341, "step": 5259 }, { "epoch": 0.7542299971322053, "grad_norm": 0.3551415205001831, "learning_rate": 9.318049952938068e-06, "loss": 0.3507, "step": 5260 }, { "epoch": 0.7543733868655005, "grad_norm": 0.3173074722290039, "learning_rate": 9.31762930044051e-06, "loss": 0.3449, "step": 5261 }, { "epoch": 0.7545167765987956, "grad_norm": 0.339044988155365, "learning_rate": 9.317208527746793e-06, "loss": 0.3472, "step": 5262 }, { "epoch": 0.7546601663320907, "grad_norm": 0.3362966775894165, "learning_rate": 9.31678763486863e-06, "loss": 0.3515, "step": 5263 }, { "epoch": 0.7548035560653857, "grad_norm": 0.30881643295288086, "learning_rate": 9.316366621817744e-06, "loss": 0.3301, "step": 5264 }, { "epoch": 0.7549469457986808, "grad_norm": 0.31511905789375305, "learning_rate": 9.31594548860585e-06, "loss": 0.3475, "step": 5265 }, { "epoch": 0.7550903355319759, "grad_norm": 0.3177552819252014, "learning_rate": 9.315524235244671e-06, "loss": 0.338, "step": 5266 }, { "epoch": 0.755233725265271, "grad_norm": 0.3376806676387787, "learning_rate": 9.315102861745935e-06, "loss": 0.3488, "step": 5267 }, { "epoch": 0.7553771149985661, "grad_norm": 0.32732921838760376, "learning_rate": 9.314681368121375e-06, "loss": 0.3371, "step": 5268 }, { "epoch": 0.7555205047318612, "grad_norm": 0.29396939277648926, "learning_rate": 9.314259754382723e-06, "loss": 0.3337, "step": 5269 }, { "epoch": 0.7556638944651562, "grad_norm": 0.3338756859302521, "learning_rate": 9.313838020541716e-06, "loss": 0.347, "step": 5270 }, { "epoch": 0.7558072841984514, "grad_norm": 0.329155832529068, "learning_rate": 9.313416166610095e-06, "loss": 0.337, "step": 5271 }, { "epoch": 0.7559506739317465, "grad_norm": 0.3148277699947357, "learning_rate": 9.3129941925996e-06, "loss": 0.3414, "step": 5272 }, { "epoch": 0.7560940636650416, "grad_norm": 0.3227492868900299, "learning_rate": 9.312572098521983e-06, "loss": 0.3311, "step": 5273 }, { "epoch": 0.7562374533983367, "grad_norm": 0.30640870332717896, "learning_rate": 9.312149884388995e-06, "loss": 0.3235, "step": 5274 }, { "epoch": 0.7563808431316318, "grad_norm": 0.3618970215320587, "learning_rate": 9.311727550212385e-06, "loss": 0.3374, "step": 5275 }, { "epoch": 0.7565242328649269, "grad_norm": 0.32564613223075867, "learning_rate": 9.311305096003914e-06, "loss": 0.3336, "step": 5276 }, { "epoch": 0.756667622598222, "grad_norm": 0.2911010682582855, "learning_rate": 9.310882521775341e-06, "loss": 0.3403, "step": 5277 }, { "epoch": 0.756811012331517, "grad_norm": 0.2897418737411499, "learning_rate": 9.31045982753843e-06, "loss": 0.3419, "step": 5278 }, { "epoch": 0.7569544020648121, "grad_norm": 0.32344740629196167, "learning_rate": 9.310037013304946e-06, "loss": 0.3419, "step": 5279 }, { "epoch": 0.7570977917981072, "grad_norm": 0.35140225291252136, "learning_rate": 9.309614079086665e-06, "loss": 0.3101, "step": 5280 }, { "epoch": 0.7572411815314023, "grad_norm": 0.30963605642318726, "learning_rate": 9.309191024895356e-06, "loss": 0.3206, "step": 5281 }, { "epoch": 0.7573845712646975, "grad_norm": 0.3223007619380951, "learning_rate": 9.308767850742797e-06, "loss": 0.3553, "step": 5282 }, { "epoch": 0.7575279609979926, "grad_norm": 0.3257090449333191, "learning_rate": 9.30834455664077e-06, "loss": 0.3277, "step": 5283 }, { "epoch": 0.7576713507312877, "grad_norm": 0.30481061339378357, "learning_rate": 9.307921142601058e-06, "loss": 0.3262, "step": 5284 }, { "epoch": 0.7578147404645827, "grad_norm": 0.31839755177497864, "learning_rate": 9.307497608635447e-06, "loss": 0.3327, "step": 5285 }, { "epoch": 0.7579581301978778, "grad_norm": 0.2934972941875458, "learning_rate": 9.30707395475573e-06, "loss": 0.3199, "step": 5286 }, { "epoch": 0.7581015199311729, "grad_norm": 0.3440327048301697, "learning_rate": 9.3066501809737e-06, "loss": 0.3324, "step": 5287 }, { "epoch": 0.758244909664468, "grad_norm": 0.3429175913333893, "learning_rate": 9.306226287301152e-06, "loss": 0.3393, "step": 5288 }, { "epoch": 0.7583882993977631, "grad_norm": 0.32083916664123535, "learning_rate": 9.305802273749892e-06, "loss": 0.3455, "step": 5289 }, { "epoch": 0.7585316891310582, "grad_norm": 0.3010948896408081, "learning_rate": 9.305378140331715e-06, "loss": 0.3362, "step": 5290 }, { "epoch": 0.7586750788643533, "grad_norm": 0.32784149050712585, "learning_rate": 9.304953887058437e-06, "loss": 0.3424, "step": 5291 }, { "epoch": 0.7588184685976485, "grad_norm": 0.3417082726955414, "learning_rate": 9.304529513941865e-06, "loss": 0.3302, "step": 5292 }, { "epoch": 0.7589618583309435, "grad_norm": 0.27431154251098633, "learning_rate": 9.304105020993811e-06, "loss": 0.3287, "step": 5293 }, { "epoch": 0.7591052480642386, "grad_norm": 0.31862524151802063, "learning_rate": 9.303680408226096e-06, "loss": 0.3501, "step": 5294 }, { "epoch": 0.7592486377975337, "grad_norm": 0.34553906321525574, "learning_rate": 9.303255675650538e-06, "loss": 0.3252, "step": 5295 }, { "epoch": 0.7593920275308288, "grad_norm": 0.35449567437171936, "learning_rate": 9.302830823278962e-06, "loss": 0.3457, "step": 5296 }, { "epoch": 0.7595354172641239, "grad_norm": 0.29638954997062683, "learning_rate": 9.302405851123194e-06, "loss": 0.343, "step": 5297 }, { "epoch": 0.759678806997419, "grad_norm": 0.30489009618759155, "learning_rate": 9.301980759195067e-06, "loss": 0.3453, "step": 5298 }, { "epoch": 0.759822196730714, "grad_norm": 0.3419972360134125, "learning_rate": 9.301555547506413e-06, "loss": 0.3488, "step": 5299 }, { "epoch": 0.7599655864640091, "grad_norm": 0.3154049515724182, "learning_rate": 9.301130216069069e-06, "loss": 0.3434, "step": 5300 }, { "epoch": 0.7601089761973042, "grad_norm": 0.31279364228248596, "learning_rate": 9.300704764894876e-06, "loss": 0.3335, "step": 5301 }, { "epoch": 0.7602523659305994, "grad_norm": 0.37114372849464417, "learning_rate": 9.300279193995679e-06, "loss": 0.3395, "step": 5302 }, { "epoch": 0.7603957556638945, "grad_norm": 0.317727655172348, "learning_rate": 9.299853503383322e-06, "loss": 0.329, "step": 5303 }, { "epoch": 0.7605391453971896, "grad_norm": 0.3070703148841858, "learning_rate": 9.29942769306966e-06, "loss": 0.3318, "step": 5304 }, { "epoch": 0.7606825351304847, "grad_norm": 0.2916989326477051, "learning_rate": 9.299001763066544e-06, "loss": 0.3231, "step": 5305 }, { "epoch": 0.7608259248637798, "grad_norm": 0.3361065089702606, "learning_rate": 9.298575713385833e-06, "loss": 0.3503, "step": 5306 }, { "epoch": 0.7609693145970748, "grad_norm": 0.33008524775505066, "learning_rate": 9.298149544039385e-06, "loss": 0.336, "step": 5307 }, { "epoch": 0.7611127043303699, "grad_norm": 0.29978686571121216, "learning_rate": 9.297723255039068e-06, "loss": 0.3321, "step": 5308 }, { "epoch": 0.761256094063665, "grad_norm": 0.3095149099826813, "learning_rate": 9.297296846396743e-06, "loss": 0.3181, "step": 5309 }, { "epoch": 0.7613994837969601, "grad_norm": 0.3086113929748535, "learning_rate": 9.296870318124288e-06, "loss": 0.3276, "step": 5310 }, { "epoch": 0.7615428735302552, "grad_norm": 0.3145888149738312, "learning_rate": 9.29644367023357e-06, "loss": 0.328, "step": 5311 }, { "epoch": 0.7616862632635504, "grad_norm": 0.3163544833660126, "learning_rate": 9.296016902736472e-06, "loss": 0.3367, "step": 5312 }, { "epoch": 0.7618296529968455, "grad_norm": 0.2939142882823944, "learning_rate": 9.29559001564487e-06, "loss": 0.3257, "step": 5313 }, { "epoch": 0.7619730427301405, "grad_norm": 0.2880237400531769, "learning_rate": 9.29516300897065e-06, "loss": 0.3307, "step": 5314 }, { "epoch": 0.7621164324634356, "grad_norm": 0.3125467598438263, "learning_rate": 9.294735882725701e-06, "loss": 0.3303, "step": 5315 }, { "epoch": 0.7622598221967307, "grad_norm": 0.31130051612854004, "learning_rate": 9.29430863692191e-06, "loss": 0.3367, "step": 5316 }, { "epoch": 0.7624032119300258, "grad_norm": 0.31234288215637207, "learning_rate": 9.293881271571174e-06, "loss": 0.3135, "step": 5317 }, { "epoch": 0.7625466016633209, "grad_norm": 0.30337613821029663, "learning_rate": 9.293453786685387e-06, "loss": 0.3425, "step": 5318 }, { "epoch": 0.762689991396616, "grad_norm": 0.3083209693431854, "learning_rate": 9.29302618227645e-06, "loss": 0.3319, "step": 5319 }, { "epoch": 0.7628333811299111, "grad_norm": 0.3337664008140564, "learning_rate": 9.29259845835627e-06, "loss": 0.3424, "step": 5320 }, { "epoch": 0.7629767708632061, "grad_norm": 0.29641422629356384, "learning_rate": 9.292170614936753e-06, "loss": 0.3289, "step": 5321 }, { "epoch": 0.7631201605965013, "grad_norm": 0.32074442505836487, "learning_rate": 9.291742652029808e-06, "loss": 0.3542, "step": 5322 }, { "epoch": 0.7632635503297964, "grad_norm": 0.3140612840652466, "learning_rate": 9.291314569647347e-06, "loss": 0.3242, "step": 5323 }, { "epoch": 0.7634069400630915, "grad_norm": 0.2896919250488281, "learning_rate": 9.290886367801292e-06, "loss": 0.3522, "step": 5324 }, { "epoch": 0.7635503297963866, "grad_norm": 0.3134767413139343, "learning_rate": 9.290458046503561e-06, "loss": 0.3282, "step": 5325 }, { "epoch": 0.7636937195296817, "grad_norm": 0.31974557042121887, "learning_rate": 9.290029605766078e-06, "loss": 0.3421, "step": 5326 }, { "epoch": 0.7638371092629768, "grad_norm": 0.31680816411972046, "learning_rate": 9.289601045600769e-06, "loss": 0.321, "step": 5327 }, { "epoch": 0.7639804989962719, "grad_norm": 0.30665379762649536, "learning_rate": 9.289172366019565e-06, "loss": 0.3363, "step": 5328 }, { "epoch": 0.7641238887295669, "grad_norm": 0.33341988921165466, "learning_rate": 9.288743567034401e-06, "loss": 0.3294, "step": 5329 }, { "epoch": 0.764267278462862, "grad_norm": 0.2998583912849426, "learning_rate": 9.288314648657215e-06, "loss": 0.3224, "step": 5330 }, { "epoch": 0.7644106681961571, "grad_norm": 0.3066882789134979, "learning_rate": 9.287885610899945e-06, "loss": 0.367, "step": 5331 }, { "epoch": 0.7645540579294523, "grad_norm": 0.31107693910598755, "learning_rate": 9.287456453774535e-06, "loss": 0.342, "step": 5332 }, { "epoch": 0.7646974476627474, "grad_norm": 0.32188817858695984, "learning_rate": 9.287027177292934e-06, "loss": 0.3421, "step": 5333 }, { "epoch": 0.7648408373960425, "grad_norm": 0.3123921751976013, "learning_rate": 9.28659778146709e-06, "loss": 0.3421, "step": 5334 }, { "epoch": 0.7649842271293376, "grad_norm": 0.30967918038368225, "learning_rate": 9.286168266308956e-06, "loss": 0.3462, "step": 5335 }, { "epoch": 0.7651276168626326, "grad_norm": 0.329750657081604, "learning_rate": 9.285738631830494e-06, "loss": 0.3379, "step": 5336 }, { "epoch": 0.7652710065959277, "grad_norm": 0.3050084114074707, "learning_rate": 9.28530887804366e-06, "loss": 0.3351, "step": 5337 }, { "epoch": 0.7654143963292228, "grad_norm": 0.31829243898391724, "learning_rate": 9.284879004960419e-06, "loss": 0.3396, "step": 5338 }, { "epoch": 0.7655577860625179, "grad_norm": 0.32682517170906067, "learning_rate": 9.284449012592738e-06, "loss": 0.3335, "step": 5339 }, { "epoch": 0.765701175795813, "grad_norm": 0.3356066048145294, "learning_rate": 9.284018900952587e-06, "loss": 0.3728, "step": 5340 }, { "epoch": 0.7658445655291081, "grad_norm": 0.288760244846344, "learning_rate": 9.28358867005194e-06, "loss": 0.3207, "step": 5341 }, { "epoch": 0.7659879552624033, "grad_norm": 0.3553425669670105, "learning_rate": 9.283158319902772e-06, "loss": 0.314, "step": 5342 }, { "epoch": 0.7661313449956983, "grad_norm": 0.38194921612739563, "learning_rate": 9.282727850517067e-06, "loss": 0.3461, "step": 5343 }, { "epoch": 0.7662747347289934, "grad_norm": 0.3174656927585602, "learning_rate": 9.282297261906808e-06, "loss": 0.3366, "step": 5344 }, { "epoch": 0.7664181244622885, "grad_norm": 0.33010900020599365, "learning_rate": 9.281866554083979e-06, "loss": 0.3397, "step": 5345 }, { "epoch": 0.7665615141955836, "grad_norm": 0.31389889121055603, "learning_rate": 9.28143572706057e-06, "loss": 0.3386, "step": 5346 }, { "epoch": 0.7667049039288787, "grad_norm": 0.3133602440357208, "learning_rate": 9.281004780848578e-06, "loss": 0.3311, "step": 5347 }, { "epoch": 0.7668482936621738, "grad_norm": 0.3193123936653137, "learning_rate": 9.28057371546e-06, "loss": 0.3363, "step": 5348 }, { "epoch": 0.7669916833954689, "grad_norm": 0.3368578851222992, "learning_rate": 9.280142530906832e-06, "loss": 0.3317, "step": 5349 }, { "epoch": 0.767135073128764, "grad_norm": 0.32317832112312317, "learning_rate": 9.279711227201083e-06, "loss": 0.3378, "step": 5350 }, { "epoch": 0.767278462862059, "grad_norm": 0.3063780665397644, "learning_rate": 9.279279804354754e-06, "loss": 0.3286, "step": 5351 }, { "epoch": 0.7674218525953542, "grad_norm": 0.31067386269569397, "learning_rate": 9.278848262379858e-06, "loss": 0.3141, "step": 5352 }, { "epoch": 0.7675652423286493, "grad_norm": 0.3463047444820404, "learning_rate": 9.278416601288411e-06, "loss": 0.3421, "step": 5353 }, { "epoch": 0.7677086320619444, "grad_norm": 0.3216584324836731, "learning_rate": 9.277984821092426e-06, "loss": 0.3381, "step": 5354 }, { "epoch": 0.7678520217952395, "grad_norm": 0.3331303894519806, "learning_rate": 9.277552921803923e-06, "loss": 0.3315, "step": 5355 }, { "epoch": 0.7679954115285346, "grad_norm": 0.3045932948589325, "learning_rate": 9.27712090343493e-06, "loss": 0.33, "step": 5356 }, { "epoch": 0.7681388012618297, "grad_norm": 0.30482473969459534, "learning_rate": 9.276688765997465e-06, "loss": 0.3263, "step": 5357 }, { "epoch": 0.7682821909951247, "grad_norm": 0.31220975518226624, "learning_rate": 9.276256509503568e-06, "loss": 0.3412, "step": 5358 }, { "epoch": 0.7684255807284198, "grad_norm": 0.30407610535621643, "learning_rate": 9.275824133965268e-06, "loss": 0.3623, "step": 5359 }, { "epoch": 0.7685689704617149, "grad_norm": 0.32339853048324585, "learning_rate": 9.2753916393946e-06, "loss": 0.3355, "step": 5360 }, { "epoch": 0.76871236019501, "grad_norm": 0.33956247568130493, "learning_rate": 9.274959025803605e-06, "loss": 0.3355, "step": 5361 }, { "epoch": 0.7688557499283052, "grad_norm": 0.31287121772766113, "learning_rate": 9.27452629320433e-06, "loss": 0.3298, "step": 5362 }, { "epoch": 0.7689991396616003, "grad_norm": 0.3283417522907257, "learning_rate": 9.274093441608815e-06, "loss": 0.3516, "step": 5363 }, { "epoch": 0.7691425293948954, "grad_norm": 0.32947951555252075, "learning_rate": 9.273660471029114e-06, "loss": 0.3452, "step": 5364 }, { "epoch": 0.7692859191281904, "grad_norm": 0.30988383293151855, "learning_rate": 9.273227381477282e-06, "loss": 0.3426, "step": 5365 }, { "epoch": 0.7694293088614855, "grad_norm": 0.3362114727497101, "learning_rate": 9.272794172965369e-06, "loss": 0.3619, "step": 5366 }, { "epoch": 0.7695726985947806, "grad_norm": 0.39105892181396484, "learning_rate": 9.272360845505442e-06, "loss": 0.3465, "step": 5367 }, { "epoch": 0.7697160883280757, "grad_norm": 0.3442426919937134, "learning_rate": 9.271927399109562e-06, "loss": 0.3317, "step": 5368 }, { "epoch": 0.7698594780613708, "grad_norm": 0.322269469499588, "learning_rate": 9.271493833789792e-06, "loss": 0.327, "step": 5369 }, { "epoch": 0.7700028677946659, "grad_norm": 0.31326377391815186, "learning_rate": 9.271060149558206e-06, "loss": 0.3289, "step": 5370 }, { "epoch": 0.770146257527961, "grad_norm": 0.33120954036712646, "learning_rate": 9.270626346426875e-06, "loss": 0.321, "step": 5371 }, { "epoch": 0.770289647261256, "grad_norm": 0.3435710072517395, "learning_rate": 9.270192424407875e-06, "loss": 0.3208, "step": 5372 }, { "epoch": 0.7704330369945512, "grad_norm": 0.32463863492012024, "learning_rate": 9.26975838351329e-06, "loss": 0.3557, "step": 5373 }, { "epoch": 0.7705764267278463, "grad_norm": 0.3369614779949188, "learning_rate": 9.269324223755197e-06, "loss": 0.322, "step": 5374 }, { "epoch": 0.7707198164611414, "grad_norm": 0.3351140320301056, "learning_rate": 9.268889945145685e-06, "loss": 0.324, "step": 5375 }, { "epoch": 0.7708632061944365, "grad_norm": 0.31419435143470764, "learning_rate": 9.268455547696845e-06, "loss": 0.3326, "step": 5376 }, { "epoch": 0.7710065959277316, "grad_norm": 0.3136206865310669, "learning_rate": 9.26802103142077e-06, "loss": 0.3217, "step": 5377 }, { "epoch": 0.7711499856610267, "grad_norm": 0.3217877447605133, "learning_rate": 9.267586396329551e-06, "loss": 0.3102, "step": 5378 }, { "epoch": 0.7712933753943217, "grad_norm": 0.32131052017211914, "learning_rate": 9.267151642435292e-06, "loss": 0.3593, "step": 5379 }, { "epoch": 0.7714367651276168, "grad_norm": 0.31720346212387085, "learning_rate": 9.266716769750098e-06, "loss": 0.3346, "step": 5380 }, { "epoch": 0.7715801548609119, "grad_norm": 0.3220806121826172, "learning_rate": 9.266281778286072e-06, "loss": 0.3379, "step": 5381 }, { "epoch": 0.771723544594207, "grad_norm": 0.34857678413391113, "learning_rate": 9.265846668055322e-06, "loss": 0.3483, "step": 5382 }, { "epoch": 0.7718669343275022, "grad_norm": 0.3262333869934082, "learning_rate": 9.265411439069965e-06, "loss": 0.3404, "step": 5383 }, { "epoch": 0.7720103240607973, "grad_norm": 0.3098479211330414, "learning_rate": 9.264976091342115e-06, "loss": 0.3436, "step": 5384 }, { "epoch": 0.7721537137940924, "grad_norm": 0.31089431047439575, "learning_rate": 9.264540624883892e-06, "loss": 0.303, "step": 5385 }, { "epoch": 0.7722971035273875, "grad_norm": 0.3402668535709381, "learning_rate": 9.264105039707416e-06, "loss": 0.3355, "step": 5386 }, { "epoch": 0.7724404932606825, "grad_norm": 0.33710551261901855, "learning_rate": 9.263669335824817e-06, "loss": 0.3462, "step": 5387 }, { "epoch": 0.7725838829939776, "grad_norm": 0.30888479948043823, "learning_rate": 9.26323351324822e-06, "loss": 0.3586, "step": 5388 }, { "epoch": 0.7727272727272727, "grad_norm": 0.31001028418540955, "learning_rate": 9.26279757198976e-06, "loss": 0.3337, "step": 5389 }, { "epoch": 0.7728706624605678, "grad_norm": 0.3234165906906128, "learning_rate": 9.262361512061576e-06, "loss": 0.3304, "step": 5390 }, { "epoch": 0.7730140521938629, "grad_norm": 0.33438679575920105, "learning_rate": 9.261925333475803e-06, "loss": 0.3342, "step": 5391 }, { "epoch": 0.773157441927158, "grad_norm": 0.31326428055763245, "learning_rate": 9.261489036244585e-06, "loss": 0.3534, "step": 5392 }, { "epoch": 0.7733008316604532, "grad_norm": 0.3464123606681824, "learning_rate": 9.261052620380067e-06, "loss": 0.3537, "step": 5393 }, { "epoch": 0.7734442213937482, "grad_norm": 0.30235302448272705, "learning_rate": 9.260616085894399e-06, "loss": 0.3251, "step": 5394 }, { "epoch": 0.7735876111270433, "grad_norm": 0.29758358001708984, "learning_rate": 9.260179432799732e-06, "loss": 0.3296, "step": 5395 }, { "epoch": 0.7737310008603384, "grad_norm": 0.34819358587265015, "learning_rate": 9.259742661108225e-06, "loss": 0.3691, "step": 5396 }, { "epoch": 0.7738743905936335, "grad_norm": 0.31652987003326416, "learning_rate": 9.259305770832034e-06, "loss": 0.3579, "step": 5397 }, { "epoch": 0.7740177803269286, "grad_norm": 0.302124947309494, "learning_rate": 9.258868761983322e-06, "loss": 0.3395, "step": 5398 }, { "epoch": 0.7741611700602237, "grad_norm": 0.3443399667739868, "learning_rate": 9.258431634574256e-06, "loss": 0.3286, "step": 5399 }, { "epoch": 0.7743045597935188, "grad_norm": 0.379470556974411, "learning_rate": 9.257994388617004e-06, "loss": 0.3681, "step": 5400 }, { "epoch": 0.7744479495268138, "grad_norm": 0.32541823387145996, "learning_rate": 9.257557024123737e-06, "loss": 0.3448, "step": 5401 }, { "epoch": 0.7745913392601089, "grad_norm": 0.30543631315231323, "learning_rate": 9.257119541106632e-06, "loss": 0.3452, "step": 5402 }, { "epoch": 0.7747347289934041, "grad_norm": 0.29423150420188904, "learning_rate": 9.25668193957787e-06, "loss": 0.3135, "step": 5403 }, { "epoch": 0.7748781187266992, "grad_norm": 0.3456009328365326, "learning_rate": 9.256244219549629e-06, "loss": 0.3346, "step": 5404 }, { "epoch": 0.7750215084599943, "grad_norm": 0.31411951780319214, "learning_rate": 9.255806381034095e-06, "loss": 0.3201, "step": 5405 }, { "epoch": 0.7751648981932894, "grad_norm": 0.30338701605796814, "learning_rate": 9.255368424043459e-06, "loss": 0.3394, "step": 5406 }, { "epoch": 0.7753082879265845, "grad_norm": 0.31636759638786316, "learning_rate": 9.254930348589912e-06, "loss": 0.3499, "step": 5407 }, { "epoch": 0.7754516776598795, "grad_norm": 0.3443688452243805, "learning_rate": 9.25449215468565e-06, "loss": 0.3426, "step": 5408 }, { "epoch": 0.7755950673931746, "grad_norm": 0.3084009885787964, "learning_rate": 9.254053842342871e-06, "loss": 0.3337, "step": 5409 }, { "epoch": 0.7757384571264697, "grad_norm": 0.34168779850006104, "learning_rate": 9.253615411573778e-06, "loss": 0.3329, "step": 5410 }, { "epoch": 0.7758818468597648, "grad_norm": 0.3350257873535156, "learning_rate": 9.253176862390574e-06, "loss": 0.3431, "step": 5411 }, { "epoch": 0.7760252365930599, "grad_norm": 0.31807729601860046, "learning_rate": 9.252738194805468e-06, "loss": 0.3476, "step": 5412 }, { "epoch": 0.7761686263263551, "grad_norm": 0.31040242314338684, "learning_rate": 9.252299408830674e-06, "loss": 0.3235, "step": 5413 }, { "epoch": 0.7763120160596502, "grad_norm": 0.3346070647239685, "learning_rate": 9.251860504478404e-06, "loss": 0.3527, "step": 5414 }, { "epoch": 0.7764554057929453, "grad_norm": 0.3152467906475067, "learning_rate": 9.25142148176088e-06, "loss": 0.3437, "step": 5415 }, { "epoch": 0.7765987955262403, "grad_norm": 0.3221234381198883, "learning_rate": 9.250982340690322e-06, "loss": 0.3376, "step": 5416 }, { "epoch": 0.7767421852595354, "grad_norm": 0.32713526487350464, "learning_rate": 9.250543081278954e-06, "loss": 0.3325, "step": 5417 }, { "epoch": 0.7768855749928305, "grad_norm": 0.28771790862083435, "learning_rate": 9.250103703539004e-06, "loss": 0.3388, "step": 5418 }, { "epoch": 0.7770289647261256, "grad_norm": 0.2866358160972595, "learning_rate": 9.249664207482706e-06, "loss": 0.3124, "step": 5419 }, { "epoch": 0.7771723544594207, "grad_norm": 0.3249143958091736, "learning_rate": 9.249224593122294e-06, "loss": 0.3437, "step": 5420 }, { "epoch": 0.7773157441927158, "grad_norm": 0.32043537497520447, "learning_rate": 9.248784860470007e-06, "loss": 0.3445, "step": 5421 }, { "epoch": 0.7774591339260108, "grad_norm": 0.29788482189178467, "learning_rate": 9.248345009538083e-06, "loss": 0.3366, "step": 5422 }, { "epoch": 0.777602523659306, "grad_norm": 0.31576380133628845, "learning_rate": 9.247905040338769e-06, "loss": 0.3454, "step": 5423 }, { "epoch": 0.7777459133926011, "grad_norm": 0.32485780119895935, "learning_rate": 9.247464952884314e-06, "loss": 0.3385, "step": 5424 }, { "epoch": 0.7778893031258962, "grad_norm": 0.34310707449913025, "learning_rate": 9.247024747186969e-06, "loss": 0.3499, "step": 5425 }, { "epoch": 0.7780326928591913, "grad_norm": 0.2937067449092865, "learning_rate": 9.246584423258988e-06, "loss": 0.346, "step": 5426 }, { "epoch": 0.7781760825924864, "grad_norm": 0.3387051522731781, "learning_rate": 9.246143981112629e-06, "loss": 0.3585, "step": 5427 }, { "epoch": 0.7783194723257815, "grad_norm": 0.32475340366363525, "learning_rate": 9.245703420760152e-06, "loss": 0.3427, "step": 5428 }, { "epoch": 0.7784628620590766, "grad_norm": 0.2948870360851288, "learning_rate": 9.245262742213825e-06, "loss": 0.3304, "step": 5429 }, { "epoch": 0.7786062517923716, "grad_norm": 0.29291582107543945, "learning_rate": 9.244821945485913e-06, "loss": 0.3116, "step": 5430 }, { "epoch": 0.7787496415256667, "grad_norm": 0.3347969651222229, "learning_rate": 9.244381030588688e-06, "loss": 0.3054, "step": 5431 }, { "epoch": 0.7788930312589618, "grad_norm": 0.3261106014251709, "learning_rate": 9.243939997534423e-06, "loss": 0.3359, "step": 5432 }, { "epoch": 0.779036420992257, "grad_norm": 0.3341514468193054, "learning_rate": 9.243498846335399e-06, "loss": 0.3296, "step": 5433 }, { "epoch": 0.7791798107255521, "grad_norm": 0.32170987129211426, "learning_rate": 9.243057577003894e-06, "loss": 0.3433, "step": 5434 }, { "epoch": 0.7793232004588472, "grad_norm": 0.3557758331298828, "learning_rate": 9.242616189552191e-06, "loss": 0.3597, "step": 5435 }, { "epoch": 0.7794665901921423, "grad_norm": 0.3241986334323883, "learning_rate": 9.242174683992581e-06, "loss": 0.3242, "step": 5436 }, { "epoch": 0.7796099799254373, "grad_norm": 0.36427444219589233, "learning_rate": 9.241733060337354e-06, "loss": 0.34, "step": 5437 }, { "epoch": 0.7797533696587324, "grad_norm": 0.31133392453193665, "learning_rate": 9.241291318598805e-06, "loss": 0.3393, "step": 5438 }, { "epoch": 0.7798967593920275, "grad_norm": 0.3355083763599396, "learning_rate": 9.240849458789226e-06, "loss": 0.3213, "step": 5439 }, { "epoch": 0.7800401491253226, "grad_norm": 0.38472750782966614, "learning_rate": 9.240407480920926e-06, "loss": 0.3499, "step": 5440 }, { "epoch": 0.7801835388586177, "grad_norm": 0.35876306891441345, "learning_rate": 9.239965385006203e-06, "loss": 0.3291, "step": 5441 }, { "epoch": 0.7803269285919128, "grad_norm": 0.33826690912246704, "learning_rate": 9.239523171057365e-06, "loss": 0.3357, "step": 5442 }, { "epoch": 0.780470318325208, "grad_norm": 0.3335408866405487, "learning_rate": 9.239080839086724e-06, "loss": 0.3609, "step": 5443 }, { "epoch": 0.780613708058503, "grad_norm": 0.33518335223197937, "learning_rate": 9.238638389106594e-06, "loss": 0.337, "step": 5444 }, { "epoch": 0.7807570977917981, "grad_norm": 0.3097827434539795, "learning_rate": 9.238195821129293e-06, "loss": 0.3139, "step": 5445 }, { "epoch": 0.7809004875250932, "grad_norm": 0.31261730194091797, "learning_rate": 9.237753135167137e-06, "loss": 0.3321, "step": 5446 }, { "epoch": 0.7810438772583883, "grad_norm": 0.31186404824256897, "learning_rate": 9.237310331232454e-06, "loss": 0.3424, "step": 5447 }, { "epoch": 0.7811872669916834, "grad_norm": 0.30653494596481323, "learning_rate": 9.236867409337568e-06, "loss": 0.3399, "step": 5448 }, { "epoch": 0.7813306567249785, "grad_norm": 0.3101717531681061, "learning_rate": 9.236424369494814e-06, "loss": 0.3419, "step": 5449 }, { "epoch": 0.7814740464582736, "grad_norm": 0.2951711416244507, "learning_rate": 9.235981211716521e-06, "loss": 0.3291, "step": 5450 }, { "epoch": 0.7816174361915686, "grad_norm": 0.32270702719688416, "learning_rate": 9.235537936015025e-06, "loss": 0.3209, "step": 5451 }, { "epoch": 0.7817608259248637, "grad_norm": 0.30661436915397644, "learning_rate": 9.23509454240267e-06, "loss": 0.3226, "step": 5452 }, { "epoch": 0.7819042156581589, "grad_norm": 0.31869378685951233, "learning_rate": 9.234651030891799e-06, "loss": 0.3259, "step": 5453 }, { "epoch": 0.782047605391454, "grad_norm": 0.3326178193092346, "learning_rate": 9.234207401494755e-06, "loss": 0.3254, "step": 5454 }, { "epoch": 0.7821909951247491, "grad_norm": 0.30008819699287415, "learning_rate": 9.233763654223893e-06, "loss": 0.3377, "step": 5455 }, { "epoch": 0.7823343848580442, "grad_norm": 0.3324487805366516, "learning_rate": 9.23331978909156e-06, "loss": 0.3344, "step": 5456 }, { "epoch": 0.7824777745913393, "grad_norm": 0.3392122685909271, "learning_rate": 9.23287580611012e-06, "loss": 0.3243, "step": 5457 }, { "epoch": 0.7826211643246344, "grad_norm": 0.28733861446380615, "learning_rate": 9.232431705291927e-06, "loss": 0.3115, "step": 5458 }, { "epoch": 0.7827645540579294, "grad_norm": 0.32432079315185547, "learning_rate": 9.231987486649348e-06, "loss": 0.3193, "step": 5459 }, { "epoch": 0.7829079437912245, "grad_norm": 0.3714366853237152, "learning_rate": 9.231543150194744e-06, "loss": 0.3457, "step": 5460 }, { "epoch": 0.7830513335245196, "grad_norm": 0.36216869950294495, "learning_rate": 9.23109869594049e-06, "loss": 0.332, "step": 5461 }, { "epoch": 0.7831947232578147, "grad_norm": 0.31593164801597595, "learning_rate": 9.230654123898957e-06, "loss": 0.3105, "step": 5462 }, { "epoch": 0.7833381129911099, "grad_norm": 0.3563162088394165, "learning_rate": 9.230209434082522e-06, "loss": 0.3408, "step": 5463 }, { "epoch": 0.783481502724405, "grad_norm": 0.344438761472702, "learning_rate": 9.229764626503561e-06, "loss": 0.3305, "step": 5464 }, { "epoch": 0.7836248924577001, "grad_norm": 0.33222609758377075, "learning_rate": 9.229319701174462e-06, "loss": 0.3272, "step": 5465 }, { "epoch": 0.7837682821909951, "grad_norm": 0.34803614020347595, "learning_rate": 9.228874658107608e-06, "loss": 0.3444, "step": 5466 }, { "epoch": 0.7839116719242902, "grad_norm": 0.36577603220939636, "learning_rate": 9.228429497315387e-06, "loss": 0.3306, "step": 5467 }, { "epoch": 0.7840550616575853, "grad_norm": 0.3666073977947235, "learning_rate": 9.227984218810196e-06, "loss": 0.3545, "step": 5468 }, { "epoch": 0.7841984513908804, "grad_norm": 0.33418843150138855, "learning_rate": 9.227538822604426e-06, "loss": 0.3432, "step": 5469 }, { "epoch": 0.7843418411241755, "grad_norm": 0.35715028643608093, "learning_rate": 9.22709330871048e-06, "loss": 0.2968, "step": 5470 }, { "epoch": 0.7844852308574706, "grad_norm": 0.34488645195961, "learning_rate": 9.226647677140756e-06, "loss": 0.3347, "step": 5471 }, { "epoch": 0.7846286205907657, "grad_norm": 0.3160715699195862, "learning_rate": 9.226201927907664e-06, "loss": 0.3258, "step": 5472 }, { "epoch": 0.7847720103240607, "grad_norm": 0.32984164357185364, "learning_rate": 9.225756061023611e-06, "loss": 0.3224, "step": 5473 }, { "epoch": 0.7849154000573559, "grad_norm": 0.3495952785015106, "learning_rate": 9.225310076501012e-06, "loss": 0.3494, "step": 5474 }, { "epoch": 0.785058789790651, "grad_norm": 0.2926905155181885, "learning_rate": 9.224863974352278e-06, "loss": 0.3628, "step": 5475 }, { "epoch": 0.7852021795239461, "grad_norm": 0.31435003876686096, "learning_rate": 9.22441775458983e-06, "loss": 0.3317, "step": 5476 }, { "epoch": 0.7853455692572412, "grad_norm": 0.3318634629249573, "learning_rate": 9.223971417226093e-06, "loss": 0.3304, "step": 5477 }, { "epoch": 0.7854889589905363, "grad_norm": 0.30209529399871826, "learning_rate": 9.223524962273485e-06, "loss": 0.3442, "step": 5478 }, { "epoch": 0.7856323487238314, "grad_norm": 0.3319195806980133, "learning_rate": 9.223078389744441e-06, "loss": 0.3384, "step": 5479 }, { "epoch": 0.7857757384571264, "grad_norm": 0.36020633578300476, "learning_rate": 9.222631699651392e-06, "loss": 0.3382, "step": 5480 }, { "epoch": 0.7859191281904215, "grad_norm": 0.3413003087043762, "learning_rate": 9.22218489200677e-06, "loss": 0.3321, "step": 5481 }, { "epoch": 0.7860625179237166, "grad_norm": 0.38468998670578003, "learning_rate": 9.22173796682302e-06, "loss": 0.3359, "step": 5482 }, { "epoch": 0.7862059076570117, "grad_norm": 0.33680960536003113, "learning_rate": 9.221290924112573e-06, "loss": 0.3528, "step": 5483 }, { "epoch": 0.7863492973903069, "grad_norm": 0.3029823899269104, "learning_rate": 9.220843763887884e-06, "loss": 0.3342, "step": 5484 }, { "epoch": 0.786492687123602, "grad_norm": 0.3698864281177521, "learning_rate": 9.220396486161397e-06, "loss": 0.3708, "step": 5485 }, { "epoch": 0.7866360768568971, "grad_norm": 0.32472431659698486, "learning_rate": 9.219949090945564e-06, "loss": 0.3366, "step": 5486 }, { "epoch": 0.7867794665901922, "grad_norm": 0.314738929271698, "learning_rate": 9.21950157825284e-06, "loss": 0.3387, "step": 5487 }, { "epoch": 0.7869228563234872, "grad_norm": 0.33758363127708435, "learning_rate": 9.219053948095681e-06, "loss": 0.3322, "step": 5488 }, { "epoch": 0.7870662460567823, "grad_norm": 0.3460638225078583, "learning_rate": 9.218606200486552e-06, "loss": 0.34, "step": 5489 }, { "epoch": 0.7872096357900774, "grad_norm": 0.33549150824546814, "learning_rate": 9.218158335437916e-06, "loss": 0.3478, "step": 5490 }, { "epoch": 0.7873530255233725, "grad_norm": 0.3603934645652771, "learning_rate": 9.21771035296224e-06, "loss": 0.3107, "step": 5491 }, { "epoch": 0.7874964152566676, "grad_norm": 0.33031508326530457, "learning_rate": 9.217262253071996e-06, "loss": 0.3351, "step": 5492 }, { "epoch": 0.7876398049899627, "grad_norm": 0.357635498046875, "learning_rate": 9.216814035779657e-06, "loss": 0.3293, "step": 5493 }, { "epoch": 0.7877831947232579, "grad_norm": 0.4066932499408722, "learning_rate": 9.216365701097705e-06, "loss": 0.3445, "step": 5494 }, { "epoch": 0.787926584456553, "grad_norm": 0.3300754129886627, "learning_rate": 9.215917249038616e-06, "loss": 0.3462, "step": 5495 }, { "epoch": 0.788069974189848, "grad_norm": 0.32051748037338257, "learning_rate": 9.215468679614877e-06, "loss": 0.3307, "step": 5496 }, { "epoch": 0.7882133639231431, "grad_norm": 0.3317885994911194, "learning_rate": 9.215019992838972e-06, "loss": 0.333, "step": 5497 }, { "epoch": 0.7883567536564382, "grad_norm": 0.3595474362373352, "learning_rate": 9.214571188723397e-06, "loss": 0.3399, "step": 5498 }, { "epoch": 0.7885001433897333, "grad_norm": 0.3106094300746918, "learning_rate": 9.21412226728064e-06, "loss": 0.3242, "step": 5499 }, { "epoch": 0.7886435331230284, "grad_norm": 0.35333654284477234, "learning_rate": 9.213673228523205e-06, "loss": 0.3366, "step": 5500 }, { "epoch": 0.7887869228563235, "grad_norm": 0.34004324674606323, "learning_rate": 9.21322407246359e-06, "loss": 0.3648, "step": 5501 }, { "epoch": 0.7889303125896185, "grad_norm": 0.3013900816440582, "learning_rate": 9.212774799114294e-06, "loss": 0.3351, "step": 5502 }, { "epoch": 0.7890737023229136, "grad_norm": 0.3441627621650696, "learning_rate": 9.21232540848783e-06, "loss": 0.3222, "step": 5503 }, { "epoch": 0.7892170920562088, "grad_norm": 0.3475417494773865, "learning_rate": 9.211875900596708e-06, "loss": 0.322, "step": 5504 }, { "epoch": 0.7893604817895039, "grad_norm": 0.3405061662197113, "learning_rate": 9.211426275453438e-06, "loss": 0.3455, "step": 5505 }, { "epoch": 0.789503871522799, "grad_norm": 0.33350300788879395, "learning_rate": 9.210976533070539e-06, "loss": 0.3358, "step": 5506 }, { "epoch": 0.7896472612560941, "grad_norm": 0.3931249976158142, "learning_rate": 9.210526673460532e-06, "loss": 0.3563, "step": 5507 }, { "epoch": 0.7897906509893892, "grad_norm": 0.32363396883010864, "learning_rate": 9.210076696635938e-06, "loss": 0.3562, "step": 5508 }, { "epoch": 0.7899340407226842, "grad_norm": 0.3034461438655853, "learning_rate": 9.209626602609286e-06, "loss": 0.3264, "step": 5509 }, { "epoch": 0.7900774304559793, "grad_norm": 0.33524012565612793, "learning_rate": 9.209176391393106e-06, "loss": 0.3486, "step": 5510 }, { "epoch": 0.7902208201892744, "grad_norm": 0.305494487285614, "learning_rate": 9.208726062999929e-06, "loss": 0.3337, "step": 5511 }, { "epoch": 0.7903642099225695, "grad_norm": 0.31592318415641785, "learning_rate": 9.208275617442293e-06, "loss": 0.348, "step": 5512 }, { "epoch": 0.7905075996558646, "grad_norm": 0.33095893263816833, "learning_rate": 9.207825054732737e-06, "loss": 0.3454, "step": 5513 }, { "epoch": 0.7906509893891598, "grad_norm": 0.30137506127357483, "learning_rate": 9.207374374883804e-06, "loss": 0.3534, "step": 5514 }, { "epoch": 0.7907943791224549, "grad_norm": 0.31642574071884155, "learning_rate": 9.20692357790804e-06, "loss": 0.3483, "step": 5515 }, { "epoch": 0.79093776885575, "grad_norm": 0.3260844647884369, "learning_rate": 9.206472663817995e-06, "loss": 0.33, "step": 5516 }, { "epoch": 0.791081158589045, "grad_norm": 0.32568296790122986, "learning_rate": 9.206021632626223e-06, "loss": 0.3188, "step": 5517 }, { "epoch": 0.7912245483223401, "grad_norm": 0.3508913815021515, "learning_rate": 9.205570484345279e-06, "loss": 0.329, "step": 5518 }, { "epoch": 0.7913679380556352, "grad_norm": 0.30555424094200134, "learning_rate": 9.205119218987722e-06, "loss": 0.325, "step": 5519 }, { "epoch": 0.7915113277889303, "grad_norm": 0.35544878244400024, "learning_rate": 9.204667836566115e-06, "loss": 0.3379, "step": 5520 }, { "epoch": 0.7916547175222254, "grad_norm": 0.32086583971977234, "learning_rate": 9.204216337093022e-06, "loss": 0.357, "step": 5521 }, { "epoch": 0.7917981072555205, "grad_norm": 0.30928489565849304, "learning_rate": 9.203764720581014e-06, "loss": 0.3337, "step": 5522 }, { "epoch": 0.7919414969888156, "grad_norm": 0.3133018910884857, "learning_rate": 9.203312987042664e-06, "loss": 0.3255, "step": 5523 }, { "epoch": 0.7920848867221107, "grad_norm": 0.33021700382232666, "learning_rate": 9.202861136490545e-06, "loss": 0.3227, "step": 5524 }, { "epoch": 0.7922282764554058, "grad_norm": 0.3389973044395447, "learning_rate": 9.202409168937238e-06, "loss": 0.3397, "step": 5525 }, { "epoch": 0.7923716661887009, "grad_norm": 0.308501273393631, "learning_rate": 9.201957084395324e-06, "loss": 0.3371, "step": 5526 }, { "epoch": 0.792515055921996, "grad_norm": 0.3186182975769043, "learning_rate": 9.201504882877389e-06, "loss": 0.321, "step": 5527 }, { "epoch": 0.7926584456552911, "grad_norm": 0.3427749276161194, "learning_rate": 9.20105256439602e-06, "loss": 0.3342, "step": 5528 }, { "epoch": 0.7928018353885862, "grad_norm": 0.31429561972618103, "learning_rate": 9.20060012896381e-06, "loss": 0.3236, "step": 5529 }, { "epoch": 0.7929452251218813, "grad_norm": 0.3255605697631836, "learning_rate": 9.200147576593354e-06, "loss": 0.3658, "step": 5530 }, { "epoch": 0.7930886148551763, "grad_norm": 0.3339911699295044, "learning_rate": 9.199694907297252e-06, "loss": 0.3304, "step": 5531 }, { "epoch": 0.7932320045884714, "grad_norm": 0.34307754039764404, "learning_rate": 9.199242121088103e-06, "loss": 0.3206, "step": 5532 }, { "epoch": 0.7933753943217665, "grad_norm": 0.3271400034427643, "learning_rate": 9.198789217978514e-06, "loss": 0.3505, "step": 5533 }, { "epoch": 0.7935187840550617, "grad_norm": 0.34995168447494507, "learning_rate": 9.198336197981092e-06, "loss": 0.3346, "step": 5534 }, { "epoch": 0.7936621737883568, "grad_norm": 0.30848121643066406, "learning_rate": 9.197883061108447e-06, "loss": 0.3353, "step": 5535 }, { "epoch": 0.7938055635216519, "grad_norm": 0.3474535048007965, "learning_rate": 9.197429807373196e-06, "loss": 0.3384, "step": 5536 }, { "epoch": 0.793948953254947, "grad_norm": 0.33544379472732544, "learning_rate": 9.196976436787956e-06, "loss": 0.3364, "step": 5537 }, { "epoch": 0.794092342988242, "grad_norm": 0.2983795702457428, "learning_rate": 9.19652294936535e-06, "loss": 0.3101, "step": 5538 }, { "epoch": 0.7942357327215371, "grad_norm": 0.33184459805488586, "learning_rate": 9.196069345117998e-06, "loss": 0.339, "step": 5539 }, { "epoch": 0.7943791224548322, "grad_norm": 0.30867403745651245, "learning_rate": 9.195615624058532e-06, "loss": 0.336, "step": 5540 }, { "epoch": 0.7945225121881273, "grad_norm": 0.32219231128692627, "learning_rate": 9.19516178619958e-06, "loss": 0.3445, "step": 5541 }, { "epoch": 0.7946659019214224, "grad_norm": 0.31849434971809387, "learning_rate": 9.194707831553777e-06, "loss": 0.3403, "step": 5542 }, { "epoch": 0.7948092916547175, "grad_norm": 0.30980363488197327, "learning_rate": 9.194253760133761e-06, "loss": 0.3304, "step": 5543 }, { "epoch": 0.7949526813880127, "grad_norm": 0.3019520044326782, "learning_rate": 9.193799571952173e-06, "loss": 0.326, "step": 5544 }, { "epoch": 0.7950960711213078, "grad_norm": 0.3272162675857544, "learning_rate": 9.193345267021655e-06, "loss": 0.3183, "step": 5545 }, { "epoch": 0.7952394608546028, "grad_norm": 0.3337804973125458, "learning_rate": 9.192890845354855e-06, "loss": 0.3419, "step": 5546 }, { "epoch": 0.7953828505878979, "grad_norm": 0.3394699990749359, "learning_rate": 9.192436306964425e-06, "loss": 0.3137, "step": 5547 }, { "epoch": 0.795526240321193, "grad_norm": 0.3662564158439636, "learning_rate": 9.191981651863018e-06, "loss": 0.3372, "step": 5548 }, { "epoch": 0.7956696300544881, "grad_norm": 0.33660903573036194, "learning_rate": 9.191526880063288e-06, "loss": 0.3271, "step": 5549 }, { "epoch": 0.7958130197877832, "grad_norm": 0.3346658945083618, "learning_rate": 9.1910719915779e-06, "loss": 0.3302, "step": 5550 }, { "epoch": 0.7959564095210783, "grad_norm": 0.36228832602500916, "learning_rate": 9.190616986419514e-06, "loss": 0.3332, "step": 5551 }, { "epoch": 0.7960997992543734, "grad_norm": 0.34234586358070374, "learning_rate": 9.190161864600796e-06, "loss": 0.3296, "step": 5552 }, { "epoch": 0.7962431889876684, "grad_norm": 0.34327003359794617, "learning_rate": 9.189706626134418e-06, "loss": 0.3279, "step": 5553 }, { "epoch": 0.7963865787209636, "grad_norm": 0.3480978012084961, "learning_rate": 9.189251271033053e-06, "loss": 0.3421, "step": 5554 }, { "epoch": 0.7965299684542587, "grad_norm": 0.3329004645347595, "learning_rate": 9.188795799309377e-06, "loss": 0.339, "step": 5555 }, { "epoch": 0.7966733581875538, "grad_norm": 0.35772237181663513, "learning_rate": 9.18834021097607e-06, "loss": 0.3407, "step": 5556 }, { "epoch": 0.7968167479208489, "grad_norm": 0.3291645050048828, "learning_rate": 9.187884506045813e-06, "loss": 0.3238, "step": 5557 }, { "epoch": 0.796960137654144, "grad_norm": 0.32943516969680786, "learning_rate": 9.187428684531294e-06, "loss": 0.3389, "step": 5558 }, { "epoch": 0.7971035273874391, "grad_norm": 0.31306204199790955, "learning_rate": 9.186972746445202e-06, "loss": 0.3652, "step": 5559 }, { "epoch": 0.7972469171207341, "grad_norm": 0.37459495663642883, "learning_rate": 9.18651669180023e-06, "loss": 0.3232, "step": 5560 }, { "epoch": 0.7973903068540292, "grad_norm": 0.35697466135025024, "learning_rate": 9.186060520609073e-06, "loss": 0.3489, "step": 5561 }, { "epoch": 0.7975336965873243, "grad_norm": 0.30442118644714355, "learning_rate": 9.185604232884431e-06, "loss": 0.3272, "step": 5562 }, { "epoch": 0.7976770863206194, "grad_norm": 0.3398526608943939, "learning_rate": 9.185147828639005e-06, "loss": 0.3567, "step": 5563 }, { "epoch": 0.7978204760539145, "grad_norm": 0.3832547962665558, "learning_rate": 9.184691307885501e-06, "loss": 0.341, "step": 5564 }, { "epoch": 0.7979638657872097, "grad_norm": 0.31781479716300964, "learning_rate": 9.184234670636629e-06, "loss": 0.3022, "step": 5565 }, { "epoch": 0.7981072555205048, "grad_norm": 0.34905576705932617, "learning_rate": 9.1837779169051e-06, "loss": 0.3561, "step": 5566 }, { "epoch": 0.7982506452537999, "grad_norm": 0.33538660407066345, "learning_rate": 9.183321046703631e-06, "loss": 0.3345, "step": 5567 }, { "epoch": 0.7983940349870949, "grad_norm": 0.34221217036247253, "learning_rate": 9.182864060044939e-06, "loss": 0.336, "step": 5568 }, { "epoch": 0.79853742472039, "grad_norm": 0.3078978359699249, "learning_rate": 9.182406956941744e-06, "loss": 0.3035, "step": 5569 }, { "epoch": 0.7986808144536851, "grad_norm": 0.32299649715423584, "learning_rate": 9.181949737406775e-06, "loss": 0.3477, "step": 5570 }, { "epoch": 0.7988242041869802, "grad_norm": 0.34033483266830444, "learning_rate": 9.181492401452758e-06, "loss": 0.3522, "step": 5571 }, { "epoch": 0.7989675939202753, "grad_norm": 0.30197569727897644, "learning_rate": 9.181034949092423e-06, "loss": 0.3436, "step": 5572 }, { "epoch": 0.7991109836535704, "grad_norm": 0.3388022780418396, "learning_rate": 9.180577380338509e-06, "loss": 0.3427, "step": 5573 }, { "epoch": 0.7992543733868654, "grad_norm": 0.3297984302043915, "learning_rate": 9.18011969520375e-06, "loss": 0.357, "step": 5574 }, { "epoch": 0.7993977631201606, "grad_norm": 0.3191377818584442, "learning_rate": 9.179661893700891e-06, "loss": 0.3398, "step": 5575 }, { "epoch": 0.7995411528534557, "grad_norm": 0.32286983728408813, "learning_rate": 9.179203975842672e-06, "loss": 0.3229, "step": 5576 }, { "epoch": 0.7996845425867508, "grad_norm": 0.3623696267604828, "learning_rate": 9.178745941641844e-06, "loss": 0.3354, "step": 5577 }, { "epoch": 0.7998279323200459, "grad_norm": 0.33670875430107117, "learning_rate": 9.178287791111157e-06, "loss": 0.336, "step": 5578 }, { "epoch": 0.799971322053341, "grad_norm": 0.33014005422592163, "learning_rate": 9.177829524263367e-06, "loss": 0.3459, "step": 5579 }, { "epoch": 0.8001147117866361, "grad_norm": 0.3399995267391205, "learning_rate": 9.177371141111227e-06, "loss": 0.3327, "step": 5580 }, { "epoch": 0.8002581015199312, "grad_norm": 0.3210766017436981, "learning_rate": 9.176912641667503e-06, "loss": 0.3331, "step": 5581 }, { "epoch": 0.8004014912532262, "grad_norm": 0.33530256152153015, "learning_rate": 9.176454025944955e-06, "loss": 0.3327, "step": 5582 }, { "epoch": 0.8005448809865213, "grad_norm": 0.33051207661628723, "learning_rate": 9.175995293956352e-06, "loss": 0.3323, "step": 5583 }, { "epoch": 0.8006882707198164, "grad_norm": 0.36014968156814575, "learning_rate": 9.175536445714463e-06, "loss": 0.327, "step": 5584 }, { "epoch": 0.8008316604531116, "grad_norm": 0.34919485449790955, "learning_rate": 9.175077481232064e-06, "loss": 0.3497, "step": 5585 }, { "epoch": 0.8009750501864067, "grad_norm": 0.30893272161483765, "learning_rate": 9.174618400521929e-06, "loss": 0.3321, "step": 5586 }, { "epoch": 0.8011184399197018, "grad_norm": 0.3391934037208557, "learning_rate": 9.17415920359684e-06, "loss": 0.3485, "step": 5587 }, { "epoch": 0.8012618296529969, "grad_norm": 0.31977906823158264, "learning_rate": 9.17369989046958e-06, "loss": 0.3663, "step": 5588 }, { "epoch": 0.801405219386292, "grad_norm": 0.3411715626716614, "learning_rate": 9.173240461152934e-06, "loss": 0.3618, "step": 5589 }, { "epoch": 0.801548609119587, "grad_norm": 0.3415500521659851, "learning_rate": 9.172780915659696e-06, "loss": 0.335, "step": 5590 }, { "epoch": 0.8016919988528821, "grad_norm": 0.2935057580471039, "learning_rate": 9.172321254002654e-06, "loss": 0.3447, "step": 5591 }, { "epoch": 0.8018353885861772, "grad_norm": 0.31202054023742676, "learning_rate": 9.171861476194607e-06, "loss": 0.3579, "step": 5592 }, { "epoch": 0.8019787783194723, "grad_norm": 0.3207041323184967, "learning_rate": 9.171401582248355e-06, "loss": 0.33, "step": 5593 }, { "epoch": 0.8021221680527674, "grad_norm": 0.3370523452758789, "learning_rate": 9.1709415721767e-06, "loss": 0.3191, "step": 5594 }, { "epoch": 0.8022655577860626, "grad_norm": 0.30335307121276855, "learning_rate": 9.170481445992445e-06, "loss": 0.3352, "step": 5595 }, { "epoch": 0.8024089475193577, "grad_norm": 0.3087974786758423, "learning_rate": 9.170021203708404e-06, "loss": 0.3258, "step": 5596 }, { "epoch": 0.8025523372526527, "grad_norm": 0.328801691532135, "learning_rate": 9.169560845337387e-06, "loss": 0.337, "step": 5597 }, { "epoch": 0.8026957269859478, "grad_norm": 0.29434269666671753, "learning_rate": 9.16910037089221e-06, "loss": 0.3338, "step": 5598 }, { "epoch": 0.8028391167192429, "grad_norm": 0.3126962184906006, "learning_rate": 9.168639780385694e-06, "loss": 0.3482, "step": 5599 }, { "epoch": 0.802982506452538, "grad_norm": 0.2979985475540161, "learning_rate": 9.168179073830657e-06, "loss": 0.3281, "step": 5600 }, { "epoch": 0.8031258961858331, "grad_norm": 0.32309669256210327, "learning_rate": 9.167718251239926e-06, "loss": 0.3436, "step": 5601 }, { "epoch": 0.8032692859191282, "grad_norm": 0.31456446647644043, "learning_rate": 9.16725731262633e-06, "loss": 0.3466, "step": 5602 }, { "epoch": 0.8034126756524232, "grad_norm": 0.30737173557281494, "learning_rate": 9.166796258002701e-06, "loss": 0.3022, "step": 5603 }, { "epoch": 0.8035560653857183, "grad_norm": 0.31421664357185364, "learning_rate": 9.166335087381875e-06, "loss": 0.3148, "step": 5604 }, { "epoch": 0.8036994551190135, "grad_norm": 0.33608752489089966, "learning_rate": 9.165873800776689e-06, "loss": 0.3501, "step": 5605 }, { "epoch": 0.8038428448523086, "grad_norm": 0.3005465269088745, "learning_rate": 9.165412398199983e-06, "loss": 0.3363, "step": 5606 }, { "epoch": 0.8039862345856037, "grad_norm": 0.3246936798095703, "learning_rate": 9.164950879664606e-06, "loss": 0.329, "step": 5607 }, { "epoch": 0.8041296243188988, "grad_norm": 0.32226330041885376, "learning_rate": 9.164489245183402e-06, "loss": 0.3397, "step": 5608 }, { "epoch": 0.8042730140521939, "grad_norm": 0.3723042607307434, "learning_rate": 9.164027494769223e-06, "loss": 0.3541, "step": 5609 }, { "epoch": 0.804416403785489, "grad_norm": 0.3040003776550293, "learning_rate": 9.163565628434927e-06, "loss": 0.3267, "step": 5610 }, { "epoch": 0.804559793518784, "grad_norm": 0.299908310174942, "learning_rate": 9.163103646193365e-06, "loss": 0.335, "step": 5611 }, { "epoch": 0.8047031832520791, "grad_norm": 0.3272193968296051, "learning_rate": 9.162641548057403e-06, "loss": 0.328, "step": 5612 }, { "epoch": 0.8048465729853742, "grad_norm": 0.31490594148635864, "learning_rate": 9.162179334039905e-06, "loss": 0.3182, "step": 5613 }, { "epoch": 0.8049899627186693, "grad_norm": 0.32165127992630005, "learning_rate": 9.161717004153733e-06, "loss": 0.325, "step": 5614 }, { "epoch": 0.8051333524519645, "grad_norm": 0.3212463855743408, "learning_rate": 9.161254558411765e-06, "loss": 0.3437, "step": 5615 }, { "epoch": 0.8052767421852596, "grad_norm": 0.33114001154899597, "learning_rate": 9.160791996826873e-06, "loss": 0.3515, "step": 5616 }, { "epoch": 0.8054201319185547, "grad_norm": 0.32310330867767334, "learning_rate": 9.160329319411929e-06, "loss": 0.3579, "step": 5617 }, { "epoch": 0.8055635216518497, "grad_norm": 0.3200071156024933, "learning_rate": 9.159866526179817e-06, "loss": 0.3349, "step": 5618 }, { "epoch": 0.8057069113851448, "grad_norm": 0.3087252378463745, "learning_rate": 9.15940361714342e-06, "loss": 0.3353, "step": 5619 }, { "epoch": 0.8058503011184399, "grad_norm": 0.3169063329696655, "learning_rate": 9.158940592315624e-06, "loss": 0.3661, "step": 5620 }, { "epoch": 0.805993690851735, "grad_norm": 0.32244226336479187, "learning_rate": 9.15847745170932e-06, "loss": 0.3382, "step": 5621 }, { "epoch": 0.8061370805850301, "grad_norm": 0.30868592858314514, "learning_rate": 9.158014195337403e-06, "loss": 0.3236, "step": 5622 }, { "epoch": 0.8062804703183252, "grad_norm": 0.30758166313171387, "learning_rate": 9.157550823212765e-06, "loss": 0.3307, "step": 5623 }, { "epoch": 0.8064238600516203, "grad_norm": 0.3291621208190918, "learning_rate": 9.157087335348306e-06, "loss": 0.3134, "step": 5624 }, { "epoch": 0.8065672497849155, "grad_norm": 0.32729363441467285, "learning_rate": 9.156623731756933e-06, "loss": 0.3249, "step": 5625 }, { "epoch": 0.8067106395182105, "grad_norm": 0.3131542503833771, "learning_rate": 9.15616001245155e-06, "loss": 0.3411, "step": 5626 }, { "epoch": 0.8068540292515056, "grad_norm": 0.30062928795814514, "learning_rate": 9.155696177445064e-06, "loss": 0.3316, "step": 5627 }, { "epoch": 0.8069974189848007, "grad_norm": 0.2984096109867096, "learning_rate": 9.155232226750387e-06, "loss": 0.3322, "step": 5628 }, { "epoch": 0.8071408087180958, "grad_norm": 0.3177921772003174, "learning_rate": 9.15476816038044e-06, "loss": 0.3342, "step": 5629 }, { "epoch": 0.8072841984513909, "grad_norm": 0.3218352794647217, "learning_rate": 9.154303978348138e-06, "loss": 0.3233, "step": 5630 }, { "epoch": 0.807427588184686, "grad_norm": 0.31271231174468994, "learning_rate": 9.153839680666403e-06, "loss": 0.3349, "step": 5631 }, { "epoch": 0.807570977917981, "grad_norm": 0.3326675593852997, "learning_rate": 9.153375267348163e-06, "loss": 0.3397, "step": 5632 }, { "epoch": 0.8077143676512761, "grad_norm": 0.33984047174453735, "learning_rate": 9.152910738406343e-06, "loss": 0.3421, "step": 5633 }, { "epoch": 0.8078577573845712, "grad_norm": 0.3277115225791931, "learning_rate": 9.152446093853876e-06, "loss": 0.3219, "step": 5634 }, { "epoch": 0.8080011471178664, "grad_norm": 0.30533644556999207, "learning_rate": 9.1519813337037e-06, "loss": 0.3506, "step": 5635 }, { "epoch": 0.8081445368511615, "grad_norm": 0.3239297866821289, "learning_rate": 9.151516457968748e-06, "loss": 0.3471, "step": 5636 }, { "epoch": 0.8082879265844566, "grad_norm": 0.3066592514514923, "learning_rate": 9.151051466661964e-06, "loss": 0.3615, "step": 5637 }, { "epoch": 0.8084313163177517, "grad_norm": 0.3123515546321869, "learning_rate": 9.150586359796293e-06, "loss": 0.3365, "step": 5638 }, { "epoch": 0.8085747060510468, "grad_norm": 0.3253742456436157, "learning_rate": 9.150121137384681e-06, "loss": 0.337, "step": 5639 }, { "epoch": 0.8087180957843418, "grad_norm": 0.298833429813385, "learning_rate": 9.149655799440085e-06, "loss": 0.3546, "step": 5640 }, { "epoch": 0.8088614855176369, "grad_norm": 0.3036731481552124, "learning_rate": 9.149190345975452e-06, "loss": 0.3336, "step": 5641 }, { "epoch": 0.809004875250932, "grad_norm": 0.2937946319580078, "learning_rate": 9.148724777003743e-06, "loss": 0.3194, "step": 5642 }, { "epoch": 0.8091482649842271, "grad_norm": 0.3206828832626343, "learning_rate": 9.148259092537917e-06, "loss": 0.3641, "step": 5643 }, { "epoch": 0.8092916547175222, "grad_norm": 0.340360552072525, "learning_rate": 9.147793292590942e-06, "loss": 0.3541, "step": 5644 }, { "epoch": 0.8094350444508174, "grad_norm": 0.2961440086364746, "learning_rate": 9.147327377175779e-06, "loss": 0.3518, "step": 5645 }, { "epoch": 0.8095784341841125, "grad_norm": 0.30241262912750244, "learning_rate": 9.146861346305403e-06, "loss": 0.3356, "step": 5646 }, { "epoch": 0.8097218239174075, "grad_norm": 0.3275321424007416, "learning_rate": 9.146395199992786e-06, "loss": 0.3508, "step": 5647 }, { "epoch": 0.8098652136507026, "grad_norm": 0.28905993700027466, "learning_rate": 9.145928938250906e-06, "loss": 0.3418, "step": 5648 }, { "epoch": 0.8100086033839977, "grad_norm": 0.3097951412200928, "learning_rate": 9.145462561092739e-06, "loss": 0.3504, "step": 5649 }, { "epoch": 0.8101519931172928, "grad_norm": 0.30163896083831787, "learning_rate": 9.144996068531275e-06, "loss": 0.349, "step": 5650 }, { "epoch": 0.8102953828505879, "grad_norm": 0.3055991232395172, "learning_rate": 9.144529460579494e-06, "loss": 0.3273, "step": 5651 }, { "epoch": 0.810438772583883, "grad_norm": 0.33110666275024414, "learning_rate": 9.14406273725039e-06, "loss": 0.3416, "step": 5652 }, { "epoch": 0.8105821623171781, "grad_norm": 0.2967507839202881, "learning_rate": 9.143595898556953e-06, "loss": 0.3205, "step": 5653 }, { "epoch": 0.8107255520504731, "grad_norm": 0.29831361770629883, "learning_rate": 9.143128944512181e-06, "loss": 0.3405, "step": 5654 }, { "epoch": 0.8108689417837682, "grad_norm": 0.3363315165042877, "learning_rate": 9.142661875129072e-06, "loss": 0.3452, "step": 5655 }, { "epoch": 0.8110123315170634, "grad_norm": 0.3283821940422058, "learning_rate": 9.14219469042063e-06, "loss": 0.361, "step": 5656 }, { "epoch": 0.8111557212503585, "grad_norm": 0.3060528635978699, "learning_rate": 9.141727390399857e-06, "loss": 0.3265, "step": 5657 }, { "epoch": 0.8112991109836536, "grad_norm": 0.3245740830898285, "learning_rate": 9.141259975079768e-06, "loss": 0.3453, "step": 5658 }, { "epoch": 0.8114425007169487, "grad_norm": 0.3035620152950287, "learning_rate": 9.14079244447337e-06, "loss": 0.3241, "step": 5659 }, { "epoch": 0.8115858904502438, "grad_norm": 0.3124668002128601, "learning_rate": 9.14032479859368e-06, "loss": 0.3325, "step": 5660 }, { "epoch": 0.8117292801835388, "grad_norm": 0.32188647985458374, "learning_rate": 9.139857037453716e-06, "loss": 0.3174, "step": 5661 }, { "epoch": 0.8118726699168339, "grad_norm": 0.3248763382434845, "learning_rate": 9.1393891610665e-06, "loss": 0.3539, "step": 5662 }, { "epoch": 0.812016059650129, "grad_norm": 0.32051604986190796, "learning_rate": 9.13892116944506e-06, "loss": 0.3458, "step": 5663 }, { "epoch": 0.8121594493834241, "grad_norm": 0.32670828700065613, "learning_rate": 9.13845306260242e-06, "loss": 0.3453, "step": 5664 }, { "epoch": 0.8123028391167192, "grad_norm": 0.3205423355102539, "learning_rate": 9.137984840551613e-06, "loss": 0.3178, "step": 5665 }, { "epoch": 0.8124462288500144, "grad_norm": 0.30650457739830017, "learning_rate": 9.137516503305673e-06, "loss": 0.316, "step": 5666 }, { "epoch": 0.8125896185833095, "grad_norm": 0.3214004337787628, "learning_rate": 9.13704805087764e-06, "loss": 0.3476, "step": 5667 }, { "epoch": 0.8127330083166046, "grad_norm": 0.300476998090744, "learning_rate": 9.136579483280553e-06, "loss": 0.356, "step": 5668 }, { "epoch": 0.8128763980498996, "grad_norm": 0.30547672510147095, "learning_rate": 9.136110800527453e-06, "loss": 0.3271, "step": 5669 }, { "epoch": 0.8130197877831947, "grad_norm": 0.3383873701095581, "learning_rate": 9.135642002631394e-06, "loss": 0.3435, "step": 5670 }, { "epoch": 0.8131631775164898, "grad_norm": 0.31447237730026245, "learning_rate": 9.135173089605423e-06, "loss": 0.3422, "step": 5671 }, { "epoch": 0.8133065672497849, "grad_norm": 0.3049333393573761, "learning_rate": 9.134704061462595e-06, "loss": 0.3349, "step": 5672 }, { "epoch": 0.81344995698308, "grad_norm": 0.3135030269622803, "learning_rate": 9.134234918215962e-06, "loss": 0.3778, "step": 5673 }, { "epoch": 0.8135933467163751, "grad_norm": 0.3084559738636017, "learning_rate": 9.133765659878593e-06, "loss": 0.3393, "step": 5674 }, { "epoch": 0.8137367364496702, "grad_norm": 0.3297182321548462, "learning_rate": 9.133296286463546e-06, "loss": 0.3176, "step": 5675 }, { "epoch": 0.8138801261829653, "grad_norm": 0.3083338141441345, "learning_rate": 9.132826797983888e-06, "loss": 0.3492, "step": 5676 }, { "epoch": 0.8140235159162604, "grad_norm": 0.3100374639034271, "learning_rate": 9.13235719445269e-06, "loss": 0.33, "step": 5677 }, { "epoch": 0.8141669056495555, "grad_norm": 0.35232874751091003, "learning_rate": 9.131887475883023e-06, "loss": 0.3354, "step": 5678 }, { "epoch": 0.8143102953828506, "grad_norm": 0.3710698187351227, "learning_rate": 9.131417642287967e-06, "loss": 0.3338, "step": 5679 }, { "epoch": 0.8144536851161457, "grad_norm": 0.3403613567352295, "learning_rate": 9.130947693680598e-06, "loss": 0.3472, "step": 5680 }, { "epoch": 0.8145970748494408, "grad_norm": 0.36747562885284424, "learning_rate": 9.130477630074e-06, "loss": 0.3605, "step": 5681 }, { "epoch": 0.8147404645827359, "grad_norm": 0.33119431138038635, "learning_rate": 9.130007451481258e-06, "loss": 0.3352, "step": 5682 }, { "epoch": 0.8148838543160309, "grad_norm": 0.29446303844451904, "learning_rate": 9.129537157915462e-06, "loss": 0.3324, "step": 5683 }, { "epoch": 0.815027244049326, "grad_norm": 0.3403749167919159, "learning_rate": 9.129066749389704e-06, "loss": 0.3444, "step": 5684 }, { "epoch": 0.8151706337826211, "grad_norm": 0.3429318070411682, "learning_rate": 9.128596225917079e-06, "loss": 0.3395, "step": 5685 }, { "epoch": 0.8153140235159163, "grad_norm": 0.2985152304172516, "learning_rate": 9.128125587510689e-06, "loss": 0.3211, "step": 5686 }, { "epoch": 0.8154574132492114, "grad_norm": 0.3377682864665985, "learning_rate": 9.127654834183628e-06, "loss": 0.3399, "step": 5687 }, { "epoch": 0.8156008029825065, "grad_norm": 0.32230910658836365, "learning_rate": 9.127183965949009e-06, "loss": 0.3435, "step": 5688 }, { "epoch": 0.8157441927158016, "grad_norm": 0.34198880195617676, "learning_rate": 9.126712982819936e-06, "loss": 0.3411, "step": 5689 }, { "epoch": 0.8158875824490966, "grad_norm": 0.31795307993888855, "learning_rate": 9.126241884809524e-06, "loss": 0.3244, "step": 5690 }, { "epoch": 0.8160309721823917, "grad_norm": 0.32876691222190857, "learning_rate": 9.125770671930882e-06, "loss": 0.3172, "step": 5691 }, { "epoch": 0.8161743619156868, "grad_norm": 0.3262827694416046, "learning_rate": 9.125299344197133e-06, "loss": 0.316, "step": 5692 }, { "epoch": 0.8163177516489819, "grad_norm": 0.3002883791923523, "learning_rate": 9.124827901621397e-06, "loss": 0.3194, "step": 5693 }, { "epoch": 0.816461141382277, "grad_norm": 0.3327007293701172, "learning_rate": 9.124356344216795e-06, "loss": 0.3395, "step": 5694 }, { "epoch": 0.8166045311155721, "grad_norm": 0.3348570466041565, "learning_rate": 9.123884671996457e-06, "loss": 0.3327, "step": 5695 }, { "epoch": 0.8167479208488673, "grad_norm": 0.32957518100738525, "learning_rate": 9.123412884973516e-06, "loss": 0.3551, "step": 5696 }, { "epoch": 0.8168913105821624, "grad_norm": 0.31392306089401245, "learning_rate": 9.122940983161101e-06, "loss": 0.3338, "step": 5697 }, { "epoch": 0.8170347003154574, "grad_norm": 0.3338613510131836, "learning_rate": 9.122468966572351e-06, "loss": 0.3233, "step": 5698 }, { "epoch": 0.8171780900487525, "grad_norm": 0.32896044850349426, "learning_rate": 9.121996835220408e-06, "loss": 0.3329, "step": 5699 }, { "epoch": 0.8173214797820476, "grad_norm": 0.3245718777179718, "learning_rate": 9.121524589118413e-06, "loss": 0.3474, "step": 5700 }, { "epoch": 0.8174648695153427, "grad_norm": 0.3161848485469818, "learning_rate": 9.121052228279515e-06, "loss": 0.3362, "step": 5701 }, { "epoch": 0.8176082592486378, "grad_norm": 0.3120878338813782, "learning_rate": 9.12057975271686e-06, "loss": 0.3163, "step": 5702 }, { "epoch": 0.8177516489819329, "grad_norm": 0.28700873255729675, "learning_rate": 9.120107162443606e-06, "loss": 0.3363, "step": 5703 }, { "epoch": 0.817895038715228, "grad_norm": 0.3569456934928894, "learning_rate": 9.119634457472904e-06, "loss": 0.3302, "step": 5704 }, { "epoch": 0.818038428448523, "grad_norm": 0.3569466173648834, "learning_rate": 9.119161637817917e-06, "loss": 0.3383, "step": 5705 }, { "epoch": 0.8181818181818182, "grad_norm": 0.29409530758857727, "learning_rate": 9.118688703491807e-06, "loss": 0.3263, "step": 5706 }, { "epoch": 0.8183252079151133, "grad_norm": 0.38247066736221313, "learning_rate": 9.118215654507738e-06, "loss": 0.3324, "step": 5707 }, { "epoch": 0.8184685976484084, "grad_norm": 0.35136958956718445, "learning_rate": 9.117742490878882e-06, "loss": 0.3364, "step": 5708 }, { "epoch": 0.8186119873817035, "grad_norm": 0.34239891171455383, "learning_rate": 9.117269212618407e-06, "loss": 0.3452, "step": 5709 }, { "epoch": 0.8187553771149986, "grad_norm": 0.35940220952033997, "learning_rate": 9.116795819739493e-06, "loss": 0.3287, "step": 5710 }, { "epoch": 0.8188987668482937, "grad_norm": 0.38279953598976135, "learning_rate": 9.116322312255314e-06, "loss": 0.3321, "step": 5711 }, { "epoch": 0.8190421565815887, "grad_norm": 0.32653579115867615, "learning_rate": 9.115848690179055e-06, "loss": 0.3552, "step": 5712 }, { "epoch": 0.8191855463148838, "grad_norm": 0.3850298225879669, "learning_rate": 9.115374953523901e-06, "loss": 0.3329, "step": 5713 }, { "epoch": 0.8193289360481789, "grad_norm": 0.3690354526042938, "learning_rate": 9.114901102303038e-06, "loss": 0.3258, "step": 5714 }, { "epoch": 0.819472325781474, "grad_norm": 0.34162822365760803, "learning_rate": 9.114427136529657e-06, "loss": 0.3535, "step": 5715 }, { "epoch": 0.8196157155147692, "grad_norm": 0.32347556948661804, "learning_rate": 9.113953056216956e-06, "loss": 0.3203, "step": 5716 }, { "epoch": 0.8197591052480643, "grad_norm": 0.3303360044956207, "learning_rate": 9.113478861378128e-06, "loss": 0.3495, "step": 5717 }, { "epoch": 0.8199024949813594, "grad_norm": 0.34092801809310913, "learning_rate": 9.113004552026376e-06, "loss": 0.3183, "step": 5718 }, { "epoch": 0.8200458847146544, "grad_norm": 0.3487235903739929, "learning_rate": 9.112530128174906e-06, "loss": 0.3666, "step": 5719 }, { "epoch": 0.8201892744479495, "grad_norm": 0.3291208744049072, "learning_rate": 9.112055589836923e-06, "loss": 0.3535, "step": 5720 }, { "epoch": 0.8203326641812446, "grad_norm": 0.3476983606815338, "learning_rate": 9.111580937025639e-06, "loss": 0.3456, "step": 5721 }, { "epoch": 0.8204760539145397, "grad_norm": 0.31837543845176697, "learning_rate": 9.111106169754264e-06, "loss": 0.3237, "step": 5722 }, { "epoch": 0.8206194436478348, "grad_norm": 0.31787049770355225, "learning_rate": 9.110631288036019e-06, "loss": 0.3411, "step": 5723 }, { "epoch": 0.8207628333811299, "grad_norm": 0.3422692120075226, "learning_rate": 9.110156291884122e-06, "loss": 0.3345, "step": 5724 }, { "epoch": 0.820906223114425, "grad_norm": 0.31178510189056396, "learning_rate": 9.109681181311797e-06, "loss": 0.3308, "step": 5725 }, { "epoch": 0.8210496128477202, "grad_norm": 0.3221505880355835, "learning_rate": 9.109205956332269e-06, "loss": 0.3312, "step": 5726 }, { "epoch": 0.8211930025810152, "grad_norm": 0.36498865485191345, "learning_rate": 9.108730616958768e-06, "loss": 0.3329, "step": 5727 }, { "epoch": 0.8213363923143103, "grad_norm": 0.27559560537338257, "learning_rate": 9.108255163204529e-06, "loss": 0.3084, "step": 5728 }, { "epoch": 0.8214797820476054, "grad_norm": 0.3258472979068756, "learning_rate": 9.107779595082784e-06, "loss": 0.328, "step": 5729 }, { "epoch": 0.8216231717809005, "grad_norm": 0.31982743740081787, "learning_rate": 9.107303912606774e-06, "loss": 0.3338, "step": 5730 }, { "epoch": 0.8217665615141956, "grad_norm": 0.29959341883659363, "learning_rate": 9.106828115789742e-06, "loss": 0.3201, "step": 5731 }, { "epoch": 0.8219099512474907, "grad_norm": 0.3399393856525421, "learning_rate": 9.106352204644932e-06, "loss": 0.3539, "step": 5732 }, { "epoch": 0.8220533409807858, "grad_norm": 0.3201739490032196, "learning_rate": 9.105876179185595e-06, "loss": 0.3204, "step": 5733 }, { "epoch": 0.8221967307140808, "grad_norm": 0.3049142360687256, "learning_rate": 9.105400039424978e-06, "loss": 0.3432, "step": 5734 }, { "epoch": 0.8223401204473759, "grad_norm": 0.3267689645290375, "learning_rate": 9.104923785376343e-06, "loss": 0.3267, "step": 5735 }, { "epoch": 0.8224835101806711, "grad_norm": 0.28898948431015015, "learning_rate": 9.104447417052942e-06, "loss": 0.323, "step": 5736 }, { "epoch": 0.8226268999139662, "grad_norm": 0.3106519281864166, "learning_rate": 9.103970934468041e-06, "loss": 0.324, "step": 5737 }, { "epoch": 0.8227702896472613, "grad_norm": 0.30945074558258057, "learning_rate": 9.103494337634899e-06, "loss": 0.3308, "step": 5738 }, { "epoch": 0.8229136793805564, "grad_norm": 0.36745941638946533, "learning_rate": 9.103017626566789e-06, "loss": 0.3624, "step": 5739 }, { "epoch": 0.8230570691138515, "grad_norm": 0.32272589206695557, "learning_rate": 9.10254080127698e-06, "loss": 0.3161, "step": 5740 }, { "epoch": 0.8232004588471465, "grad_norm": 0.3244295120239258, "learning_rate": 9.102063861778745e-06, "loss": 0.3481, "step": 5741 }, { "epoch": 0.8233438485804416, "grad_norm": 0.32526659965515137, "learning_rate": 9.101586808085364e-06, "loss": 0.3349, "step": 5742 }, { "epoch": 0.8234872383137367, "grad_norm": 0.3562047779560089, "learning_rate": 9.101109640210114e-06, "loss": 0.334, "step": 5743 }, { "epoch": 0.8236306280470318, "grad_norm": 0.30120837688446045, "learning_rate": 9.10063235816628e-06, "loss": 0.3338, "step": 5744 }, { "epoch": 0.8237740177803269, "grad_norm": 0.31663182377815247, "learning_rate": 9.10015496196715e-06, "loss": 0.3371, "step": 5745 }, { "epoch": 0.8239174075136221, "grad_norm": 0.31908631324768066, "learning_rate": 9.099677451626013e-06, "loss": 0.3396, "step": 5746 }, { "epoch": 0.8240607972469172, "grad_norm": 0.3439236879348755, "learning_rate": 9.099199827156162e-06, "loss": 0.3236, "step": 5747 }, { "epoch": 0.8242041869802123, "grad_norm": 0.30799055099487305, "learning_rate": 9.098722088570895e-06, "loss": 0.3396, "step": 5748 }, { "epoch": 0.8243475767135073, "grad_norm": 0.2872534394264221, "learning_rate": 9.098244235883508e-06, "loss": 0.332, "step": 5749 }, { "epoch": 0.8244909664468024, "grad_norm": 0.3111211657524109, "learning_rate": 9.097766269107307e-06, "loss": 0.3209, "step": 5750 }, { "epoch": 0.8246343561800975, "grad_norm": 0.31618350744247437, "learning_rate": 9.097288188255596e-06, "loss": 0.3216, "step": 5751 }, { "epoch": 0.8247777459133926, "grad_norm": 0.3177067041397095, "learning_rate": 9.096809993341684e-06, "loss": 0.3445, "step": 5752 }, { "epoch": 0.8249211356466877, "grad_norm": 0.32729312777519226, "learning_rate": 9.096331684378885e-06, "loss": 0.3209, "step": 5753 }, { "epoch": 0.8250645253799828, "grad_norm": 0.3115885257720947, "learning_rate": 9.095853261380514e-06, "loss": 0.3274, "step": 5754 }, { "epoch": 0.8252079151132778, "grad_norm": 0.32743340730667114, "learning_rate": 9.095374724359886e-06, "loss": 0.3429, "step": 5755 }, { "epoch": 0.8253513048465729, "grad_norm": 0.33359622955322266, "learning_rate": 9.094896073330328e-06, "loss": 0.3306, "step": 5756 }, { "epoch": 0.8254946945798681, "grad_norm": 0.3422558903694153, "learning_rate": 9.094417308305162e-06, "loss": 0.3431, "step": 5757 }, { "epoch": 0.8256380843131632, "grad_norm": 0.29384732246398926, "learning_rate": 9.093938429297716e-06, "loss": 0.326, "step": 5758 }, { "epoch": 0.8257814740464583, "grad_norm": 0.3375932574272156, "learning_rate": 9.093459436321323e-06, "loss": 0.3534, "step": 5759 }, { "epoch": 0.8259248637797534, "grad_norm": 0.31159016489982605, "learning_rate": 9.092980329389315e-06, "loss": 0.3414, "step": 5760 }, { "epoch": 0.8260682535130485, "grad_norm": 0.29826200008392334, "learning_rate": 9.09250110851503e-06, "loss": 0.3205, "step": 5761 }, { "epoch": 0.8262116432463436, "grad_norm": 0.30541789531707764, "learning_rate": 9.092021773711811e-06, "loss": 0.3388, "step": 5762 }, { "epoch": 0.8263550329796386, "grad_norm": 0.30762550234794617, "learning_rate": 9.091542324993002e-06, "loss": 0.3375, "step": 5763 }, { "epoch": 0.8264984227129337, "grad_norm": 0.34286582469940186, "learning_rate": 9.091062762371946e-06, "loss": 0.3318, "step": 5764 }, { "epoch": 0.8266418124462288, "grad_norm": 0.31292539834976196, "learning_rate": 9.090583085861996e-06, "loss": 0.341, "step": 5765 }, { "epoch": 0.8267852021795239, "grad_norm": 0.3142048120498657, "learning_rate": 9.090103295476506e-06, "loss": 0.3214, "step": 5766 }, { "epoch": 0.8269285919128191, "grad_norm": 0.31835681200027466, "learning_rate": 9.089623391228833e-06, "loss": 0.3112, "step": 5767 }, { "epoch": 0.8270719816461142, "grad_norm": 0.3424305021762848, "learning_rate": 9.089143373132335e-06, "loss": 0.3133, "step": 5768 }, { "epoch": 0.8272153713794093, "grad_norm": 0.37823110818862915, "learning_rate": 9.088663241200376e-06, "loss": 0.3593, "step": 5769 }, { "epoch": 0.8273587611127043, "grad_norm": 0.29849696159362793, "learning_rate": 9.088182995446321e-06, "loss": 0.31, "step": 5770 }, { "epoch": 0.8275021508459994, "grad_norm": 0.3360871374607086, "learning_rate": 9.087702635883542e-06, "loss": 0.3275, "step": 5771 }, { "epoch": 0.8276455405792945, "grad_norm": 0.34503406286239624, "learning_rate": 9.087222162525407e-06, "loss": 0.3237, "step": 5772 }, { "epoch": 0.8277889303125896, "grad_norm": 0.34009602665901184, "learning_rate": 9.086741575385296e-06, "loss": 0.3164, "step": 5773 }, { "epoch": 0.8279323200458847, "grad_norm": 0.3034668266773224, "learning_rate": 9.086260874476586e-06, "loss": 0.3522, "step": 5774 }, { "epoch": 0.8280757097791798, "grad_norm": 0.3523907959461212, "learning_rate": 9.08578005981266e-06, "loss": 0.3299, "step": 5775 }, { "epoch": 0.8282190995124749, "grad_norm": 0.3301972448825836, "learning_rate": 9.0852991314069e-06, "loss": 0.3434, "step": 5776 }, { "epoch": 0.82836248924577, "grad_norm": 0.31755968928337097, "learning_rate": 9.0848180892727e-06, "loss": 0.3328, "step": 5777 }, { "epoch": 0.8285058789790651, "grad_norm": 0.31995850801467896, "learning_rate": 9.084336933423446e-06, "loss": 0.3609, "step": 5778 }, { "epoch": 0.8286492687123602, "grad_norm": 0.33772197365760803, "learning_rate": 9.083855663872535e-06, "loss": 0.3392, "step": 5779 }, { "epoch": 0.8287926584456553, "grad_norm": 0.3546614944934845, "learning_rate": 9.083374280633363e-06, "loss": 0.3491, "step": 5780 }, { "epoch": 0.8289360481789504, "grad_norm": 0.32306957244873047, "learning_rate": 9.082892783719332e-06, "loss": 0.326, "step": 5781 }, { "epoch": 0.8290794379122455, "grad_norm": 0.314344197511673, "learning_rate": 9.08241117314385e-06, "loss": 0.3408, "step": 5782 }, { "epoch": 0.8292228276455406, "grad_norm": 0.36425647139549255, "learning_rate": 9.081929448920318e-06, "loss": 0.3347, "step": 5783 }, { "epoch": 0.8293662173788356, "grad_norm": 0.33774733543395996, "learning_rate": 9.08144761106215e-06, "loss": 0.3556, "step": 5784 }, { "epoch": 0.8295096071121307, "grad_norm": 0.3175759017467499, "learning_rate": 9.08096565958276e-06, "loss": 0.3251, "step": 5785 }, { "epoch": 0.8296529968454258, "grad_norm": 0.3147333562374115, "learning_rate": 9.080483594495562e-06, "loss": 0.3179, "step": 5786 }, { "epoch": 0.829796386578721, "grad_norm": 0.3702310621738434, "learning_rate": 9.08000141581398e-06, "loss": 0.348, "step": 5787 }, { "epoch": 0.8299397763120161, "grad_norm": 0.3318353593349457, "learning_rate": 9.079519123551433e-06, "loss": 0.3433, "step": 5788 }, { "epoch": 0.8300831660453112, "grad_norm": 0.323689728975296, "learning_rate": 9.07903671772135e-06, "loss": 0.3463, "step": 5789 }, { "epoch": 0.8302265557786063, "grad_norm": 0.28460893034935, "learning_rate": 9.078554198337158e-06, "loss": 0.3104, "step": 5790 }, { "epoch": 0.8303699455119014, "grad_norm": 0.32149407267570496, "learning_rate": 9.078071565412291e-06, "loss": 0.3565, "step": 5791 }, { "epoch": 0.8305133352451964, "grad_norm": 0.3047720193862915, "learning_rate": 9.077588818960186e-06, "loss": 0.3434, "step": 5792 }, { "epoch": 0.8306567249784915, "grad_norm": 0.29871633648872375, "learning_rate": 9.07710595899428e-06, "loss": 0.2937, "step": 5793 }, { "epoch": 0.8308001147117866, "grad_norm": 0.3324773609638214, "learning_rate": 9.076622985528019e-06, "loss": 0.3319, "step": 5794 }, { "epoch": 0.8309435044450817, "grad_norm": 0.28330668807029724, "learning_rate": 9.076139898574842e-06, "loss": 0.3451, "step": 5795 }, { "epoch": 0.8310868941783768, "grad_norm": 0.30418291687965393, "learning_rate": 9.0756566981482e-06, "loss": 0.3461, "step": 5796 }, { "epoch": 0.831230283911672, "grad_norm": 0.3287311792373657, "learning_rate": 9.075173384261547e-06, "loss": 0.3284, "step": 5797 }, { "epoch": 0.8313736736449671, "grad_norm": 0.3011622726917267, "learning_rate": 9.074689956928335e-06, "loss": 0.3265, "step": 5798 }, { "epoch": 0.8315170633782621, "grad_norm": 0.328591525554657, "learning_rate": 9.074206416162024e-06, "loss": 0.3354, "step": 5799 }, { "epoch": 0.8316604531115572, "grad_norm": 0.31990477442741394, "learning_rate": 9.073722761976072e-06, "loss": 0.3356, "step": 5800 }, { "epoch": 0.8318038428448523, "grad_norm": 0.3056124150753021, "learning_rate": 9.073238994383944e-06, "loss": 0.3393, "step": 5801 }, { "epoch": 0.8319472325781474, "grad_norm": 0.31107091903686523, "learning_rate": 9.072755113399112e-06, "loss": 0.3299, "step": 5802 }, { "epoch": 0.8320906223114425, "grad_norm": 0.2940087616443634, "learning_rate": 9.07227111903504e-06, "loss": 0.3298, "step": 5803 }, { "epoch": 0.8322340120447376, "grad_norm": 0.3074835240840912, "learning_rate": 9.071787011305204e-06, "loss": 0.355, "step": 5804 }, { "epoch": 0.8323774017780327, "grad_norm": 0.30939653515815735, "learning_rate": 9.071302790223082e-06, "loss": 0.3587, "step": 5805 }, { "epoch": 0.8325207915113277, "grad_norm": 0.29491791129112244, "learning_rate": 9.070818455802153e-06, "loss": 0.3259, "step": 5806 }, { "epoch": 0.8326641812446229, "grad_norm": 0.3361358344554901, "learning_rate": 9.070334008055898e-06, "loss": 0.3488, "step": 5807 }, { "epoch": 0.832807570977918, "grad_norm": 0.3206803500652313, "learning_rate": 9.069849446997809e-06, "loss": 0.3311, "step": 5808 }, { "epoch": 0.8329509607112131, "grad_norm": 0.30403345823287964, "learning_rate": 9.069364772641372e-06, "loss": 0.33, "step": 5809 }, { "epoch": 0.8330943504445082, "grad_norm": 0.3073589503765106, "learning_rate": 9.068879985000077e-06, "loss": 0.3379, "step": 5810 }, { "epoch": 0.8332377401778033, "grad_norm": 0.3164985179901123, "learning_rate": 9.068395084087423e-06, "loss": 0.352, "step": 5811 }, { "epoch": 0.8333811299110984, "grad_norm": 0.3029439449310303, "learning_rate": 9.067910069916909e-06, "loss": 0.3273, "step": 5812 }, { "epoch": 0.8335245196443934, "grad_norm": 0.29774025082588196, "learning_rate": 9.067424942502037e-06, "loss": 0.3365, "step": 5813 }, { "epoch": 0.8336679093776885, "grad_norm": 0.3072105348110199, "learning_rate": 9.06693970185631e-06, "loss": 0.3366, "step": 5814 }, { "epoch": 0.8338112991109836, "grad_norm": 0.3189150094985962, "learning_rate": 9.066454347993238e-06, "loss": 0.3312, "step": 5815 }, { "epoch": 0.8339546888442787, "grad_norm": 0.30246925354003906, "learning_rate": 9.065968880926335e-06, "loss": 0.3419, "step": 5816 }, { "epoch": 0.8340980785775739, "grad_norm": 0.32014089822769165, "learning_rate": 9.06548330066911e-06, "loss": 0.3305, "step": 5817 }, { "epoch": 0.834241468310869, "grad_norm": 0.30481091141700745, "learning_rate": 9.064997607235085e-06, "loss": 0.3533, "step": 5818 }, { "epoch": 0.8343848580441641, "grad_norm": 0.274762898683548, "learning_rate": 9.06451180063778e-06, "loss": 0.3223, "step": 5819 }, { "epoch": 0.8345282477774592, "grad_norm": 0.31289759278297424, "learning_rate": 9.064025880890716e-06, "loss": 0.3295, "step": 5820 }, { "epoch": 0.8346716375107542, "grad_norm": 0.3119806945323944, "learning_rate": 9.063539848007425e-06, "loss": 0.3372, "step": 5821 }, { "epoch": 0.8348150272440493, "grad_norm": 0.29903343319892883, "learning_rate": 9.063053702001438e-06, "loss": 0.3167, "step": 5822 }, { "epoch": 0.8349584169773444, "grad_norm": 0.30920612812042236, "learning_rate": 9.062567442886282e-06, "loss": 0.3236, "step": 5823 }, { "epoch": 0.8351018067106395, "grad_norm": 0.30652669072151184, "learning_rate": 9.062081070675501e-06, "loss": 0.3231, "step": 5824 }, { "epoch": 0.8352451964439346, "grad_norm": 0.31898969411849976, "learning_rate": 9.06159458538263e-06, "loss": 0.3308, "step": 5825 }, { "epoch": 0.8353885861772297, "grad_norm": 0.3143335282802582, "learning_rate": 9.061107987021215e-06, "loss": 0.3267, "step": 5826 }, { "epoch": 0.8355319759105249, "grad_norm": 0.31693848967552185, "learning_rate": 9.060621275604799e-06, "loss": 0.3452, "step": 5827 }, { "epoch": 0.83567536564382, "grad_norm": 0.30919116735458374, "learning_rate": 9.060134451146934e-06, "loss": 0.3293, "step": 5828 }, { "epoch": 0.835818755377115, "grad_norm": 0.3364469110965729, "learning_rate": 9.059647513661171e-06, "loss": 0.3521, "step": 5829 }, { "epoch": 0.8359621451104101, "grad_norm": 0.3230026662349701, "learning_rate": 9.059160463161068e-06, "loss": 0.3249, "step": 5830 }, { "epoch": 0.8361055348437052, "grad_norm": 0.34898582100868225, "learning_rate": 9.058673299660178e-06, "loss": 0.3432, "step": 5831 }, { "epoch": 0.8362489245770003, "grad_norm": 0.326608270406723, "learning_rate": 9.058186023172071e-06, "loss": 0.334, "step": 5832 }, { "epoch": 0.8363923143102954, "grad_norm": 0.30485278367996216, "learning_rate": 9.057698633710306e-06, "loss": 0.3479, "step": 5833 }, { "epoch": 0.8365357040435905, "grad_norm": 0.3552131652832031, "learning_rate": 9.057211131288452e-06, "loss": 0.3358, "step": 5834 }, { "epoch": 0.8366790937768855, "grad_norm": 0.3431549370288849, "learning_rate": 9.056723515920083e-06, "loss": 0.328, "step": 5835 }, { "epoch": 0.8368224835101806, "grad_norm": 0.3250640034675598, "learning_rate": 9.05623578761877e-06, "loss": 0.3403, "step": 5836 }, { "epoch": 0.8369658732434758, "grad_norm": 0.30005353689193726, "learning_rate": 9.055747946398095e-06, "loss": 0.3366, "step": 5837 }, { "epoch": 0.8371092629767709, "grad_norm": 0.32085108757019043, "learning_rate": 9.055259992271633e-06, "loss": 0.3209, "step": 5838 }, { "epoch": 0.837252652710066, "grad_norm": 0.3314115107059479, "learning_rate": 9.054771925252971e-06, "loss": 0.3267, "step": 5839 }, { "epoch": 0.8373960424433611, "grad_norm": 0.3065984845161438, "learning_rate": 9.0542837453557e-06, "loss": 0.3314, "step": 5840 }, { "epoch": 0.8375394321766562, "grad_norm": 0.3208591043949127, "learning_rate": 9.053795452593403e-06, "loss": 0.3423, "step": 5841 }, { "epoch": 0.8376828219099512, "grad_norm": 0.3259347081184387, "learning_rate": 9.053307046979675e-06, "loss": 0.3641, "step": 5842 }, { "epoch": 0.8378262116432463, "grad_norm": 0.3073440194129944, "learning_rate": 9.052818528528117e-06, "loss": 0.326, "step": 5843 }, { "epoch": 0.8379696013765414, "grad_norm": 0.31711405515670776, "learning_rate": 9.052329897252324e-06, "loss": 0.323, "step": 5844 }, { "epoch": 0.8381129911098365, "grad_norm": 0.30493462085723877, "learning_rate": 9.0518411531659e-06, "loss": 0.3434, "step": 5845 }, { "epoch": 0.8382563808431316, "grad_norm": 0.30440855026245117, "learning_rate": 9.051352296282453e-06, "loss": 0.3304, "step": 5846 }, { "epoch": 0.8383997705764267, "grad_norm": 0.3167945444583893, "learning_rate": 9.050863326615588e-06, "loss": 0.3123, "step": 5847 }, { "epoch": 0.8385431603097219, "grad_norm": 0.290488064289093, "learning_rate": 9.050374244178922e-06, "loss": 0.3436, "step": 5848 }, { "epoch": 0.838686550043017, "grad_norm": 0.2852120101451874, "learning_rate": 9.049885048986067e-06, "loss": 0.3093, "step": 5849 }, { "epoch": 0.838829939776312, "grad_norm": 0.29703429341316223, "learning_rate": 9.049395741050641e-06, "loss": 0.3561, "step": 5850 }, { "epoch": 0.8389733295096071, "grad_norm": 0.3213184177875519, "learning_rate": 9.048906320386267e-06, "loss": 0.3555, "step": 5851 }, { "epoch": 0.8391167192429022, "grad_norm": 0.2980136275291443, "learning_rate": 9.04841678700657e-06, "loss": 0.3364, "step": 5852 }, { "epoch": 0.8392601089761973, "grad_norm": 0.3188416361808777, "learning_rate": 9.047927140925177e-06, "loss": 0.3355, "step": 5853 }, { "epoch": 0.8394034987094924, "grad_norm": 0.3375631868839264, "learning_rate": 9.04743738215572e-06, "loss": 0.3461, "step": 5854 }, { "epoch": 0.8395468884427875, "grad_norm": 0.28999608755111694, "learning_rate": 9.046947510711829e-06, "loss": 0.3025, "step": 5855 }, { "epoch": 0.8396902781760825, "grad_norm": 0.3184998035430908, "learning_rate": 9.046457526607149e-06, "loss": 0.3464, "step": 5856 }, { "epoch": 0.8398336679093776, "grad_norm": 0.3134878873825073, "learning_rate": 9.045967429855313e-06, "loss": 0.3499, "step": 5857 }, { "epoch": 0.8399770576426728, "grad_norm": 0.31756120920181274, "learning_rate": 9.04547722046997e-06, "loss": 0.3391, "step": 5858 }, { "epoch": 0.8401204473759679, "grad_norm": 0.3262656629085541, "learning_rate": 9.044986898464763e-06, "loss": 0.3506, "step": 5859 }, { "epoch": 0.840263837109263, "grad_norm": 0.31275156140327454, "learning_rate": 9.044496463853343e-06, "loss": 0.3272, "step": 5860 }, { "epoch": 0.8404072268425581, "grad_norm": 0.304969847202301, "learning_rate": 9.044005916649363e-06, "loss": 0.3342, "step": 5861 }, { "epoch": 0.8405506165758532, "grad_norm": 0.31532013416290283, "learning_rate": 9.04351525686648e-06, "loss": 0.3293, "step": 5862 }, { "epoch": 0.8406940063091483, "grad_norm": 0.29556208848953247, "learning_rate": 9.043024484518352e-06, "loss": 0.3426, "step": 5863 }, { "epoch": 0.8408373960424433, "grad_norm": 0.3023117780685425, "learning_rate": 9.042533599618642e-06, "loss": 0.3412, "step": 5864 }, { "epoch": 0.8409807857757384, "grad_norm": 0.2908909320831299, "learning_rate": 9.042042602181014e-06, "loss": 0.3241, "step": 5865 }, { "epoch": 0.8411241755090335, "grad_norm": 0.32285696268081665, "learning_rate": 9.041551492219139e-06, "loss": 0.345, "step": 5866 }, { "epoch": 0.8412675652423286, "grad_norm": 0.3073480427265167, "learning_rate": 9.041060269746686e-06, "loss": 0.3296, "step": 5867 }, { "epoch": 0.8414109549756238, "grad_norm": 0.3065817058086395, "learning_rate": 9.040568934777333e-06, "loss": 0.3312, "step": 5868 }, { "epoch": 0.8415543447089189, "grad_norm": 0.3124704658985138, "learning_rate": 9.040077487324755e-06, "loss": 0.3639, "step": 5869 }, { "epoch": 0.841697734442214, "grad_norm": 0.3080977201461792, "learning_rate": 9.039585927402635e-06, "loss": 0.3306, "step": 5870 }, { "epoch": 0.841841124175509, "grad_norm": 0.35648342967033386, "learning_rate": 9.03909425502466e-06, "loss": 0.3383, "step": 5871 }, { "epoch": 0.8419845139088041, "grad_norm": 0.338137686252594, "learning_rate": 9.03860247020451e-06, "loss": 0.3266, "step": 5872 }, { "epoch": 0.8421279036420992, "grad_norm": 0.32286518812179565, "learning_rate": 9.03811057295588e-06, "loss": 0.3249, "step": 5873 }, { "epoch": 0.8422712933753943, "grad_norm": 0.35735005140304565, "learning_rate": 9.037618563292465e-06, "loss": 0.3286, "step": 5874 }, { "epoch": 0.8424146831086894, "grad_norm": 0.3260734975337982, "learning_rate": 9.037126441227961e-06, "loss": 0.337, "step": 5875 }, { "epoch": 0.8425580728419845, "grad_norm": 0.29259684681892395, "learning_rate": 9.036634206776064e-06, "loss": 0.3326, "step": 5876 }, { "epoch": 0.8427014625752796, "grad_norm": 0.32476794719696045, "learning_rate": 9.036141859950483e-06, "loss": 0.3626, "step": 5877 }, { "epoch": 0.8428448523085748, "grad_norm": 0.3013966381549835, "learning_rate": 9.03564940076492e-06, "loss": 0.3145, "step": 5878 }, { "epoch": 0.8429882420418698, "grad_norm": 0.34088656306266785, "learning_rate": 9.035156829233088e-06, "loss": 0.3267, "step": 5879 }, { "epoch": 0.8431316317751649, "grad_norm": 0.2982315719127655, "learning_rate": 9.034664145368694e-06, "loss": 0.331, "step": 5880 }, { "epoch": 0.84327502150846, "grad_norm": 0.32539159059524536, "learning_rate": 9.03417134918546e-06, "loss": 0.3307, "step": 5881 }, { "epoch": 0.8434184112417551, "grad_norm": 0.31571757793426514, "learning_rate": 9.033678440697098e-06, "loss": 0.3503, "step": 5882 }, { "epoch": 0.8435618009750502, "grad_norm": 0.3133235573768616, "learning_rate": 9.033185419917335e-06, "loss": 0.3329, "step": 5883 }, { "epoch": 0.8437051907083453, "grad_norm": 0.3197416663169861, "learning_rate": 9.032692286859892e-06, "loss": 0.3472, "step": 5884 }, { "epoch": 0.8438485804416404, "grad_norm": 0.3234028220176697, "learning_rate": 9.032199041538501e-06, "loss": 0.3598, "step": 5885 }, { "epoch": 0.8439919701749354, "grad_norm": 0.3374471962451935, "learning_rate": 9.031705683966891e-06, "loss": 0.3426, "step": 5886 }, { "epoch": 0.8441353599082305, "grad_norm": 0.30684521794319153, "learning_rate": 9.031212214158798e-06, "loss": 0.34, "step": 5887 }, { "epoch": 0.8442787496415257, "grad_norm": 0.2974611520767212, "learning_rate": 9.030718632127957e-06, "loss": 0.3352, "step": 5888 }, { "epoch": 0.8444221393748208, "grad_norm": 0.2953793704509735, "learning_rate": 9.03022493788811e-06, "loss": 0.3301, "step": 5889 }, { "epoch": 0.8445655291081159, "grad_norm": 0.3173935115337372, "learning_rate": 9.029731131452999e-06, "loss": 0.3129, "step": 5890 }, { "epoch": 0.844708918841411, "grad_norm": 0.3015880882740021, "learning_rate": 9.029237212836374e-06, "loss": 0.3323, "step": 5891 }, { "epoch": 0.8448523085747061, "grad_norm": 0.3284250497817993, "learning_rate": 9.028743182051981e-06, "loss": 0.3374, "step": 5892 }, { "epoch": 0.8449956983080011, "grad_norm": 0.28450891375541687, "learning_rate": 9.028249039113576e-06, "loss": 0.3365, "step": 5893 }, { "epoch": 0.8451390880412962, "grad_norm": 0.29786062240600586, "learning_rate": 9.027754784034917e-06, "loss": 0.335, "step": 5894 }, { "epoch": 0.8452824777745913, "grad_norm": 0.323464035987854, "learning_rate": 9.027260416829758e-06, "loss": 0.3555, "step": 5895 }, { "epoch": 0.8454258675078864, "grad_norm": 0.30168232321739197, "learning_rate": 9.026765937511864e-06, "loss": 0.3432, "step": 5896 }, { "epoch": 0.8455692572411815, "grad_norm": 0.3085954487323761, "learning_rate": 9.026271346095002e-06, "loss": 0.3336, "step": 5897 }, { "epoch": 0.8457126469744767, "grad_norm": 0.30338966846466064, "learning_rate": 9.025776642592938e-06, "loss": 0.3353, "step": 5898 }, { "epoch": 0.8458560367077718, "grad_norm": 0.3081737458705902, "learning_rate": 9.025281827019445e-06, "loss": 0.3563, "step": 5899 }, { "epoch": 0.8459994264410668, "grad_norm": 0.3077429234981537, "learning_rate": 9.0247868993883e-06, "loss": 0.3119, "step": 5900 }, { "epoch": 0.8461428161743619, "grad_norm": 0.3564499318599701, "learning_rate": 9.024291859713277e-06, "loss": 0.3644, "step": 5901 }, { "epoch": 0.846286205907657, "grad_norm": 0.3109642565250397, "learning_rate": 9.02379670800816e-06, "loss": 0.3566, "step": 5902 }, { "epoch": 0.8464295956409521, "grad_norm": 0.30867937207221985, "learning_rate": 9.023301444286731e-06, "loss": 0.3262, "step": 5903 }, { "epoch": 0.8465729853742472, "grad_norm": 0.3271936774253845, "learning_rate": 9.022806068562781e-06, "loss": 0.3301, "step": 5904 }, { "epoch": 0.8467163751075423, "grad_norm": 0.31901684403419495, "learning_rate": 9.022310580850098e-06, "loss": 0.3691, "step": 5905 }, { "epoch": 0.8468597648408374, "grad_norm": 0.3002983331680298, "learning_rate": 9.021814981162473e-06, "loss": 0.3213, "step": 5906 }, { "epoch": 0.8470031545741324, "grad_norm": 0.329753577709198, "learning_rate": 9.021319269513708e-06, "loss": 0.342, "step": 5907 }, { "epoch": 0.8471465443074276, "grad_norm": 0.3457046151161194, "learning_rate": 9.020823445917601e-06, "loss": 0.3353, "step": 5908 }, { "epoch": 0.8472899340407227, "grad_norm": 0.3037985563278198, "learning_rate": 9.020327510387954e-06, "loss": 0.3196, "step": 5909 }, { "epoch": 0.8474333237740178, "grad_norm": 0.33412227034568787, "learning_rate": 9.019831462938572e-06, "loss": 0.3327, "step": 5910 }, { "epoch": 0.8475767135073129, "grad_norm": 0.319374680519104, "learning_rate": 9.019335303583267e-06, "loss": 0.3325, "step": 5911 }, { "epoch": 0.847720103240608, "grad_norm": 0.29956427216529846, "learning_rate": 9.018839032335851e-06, "loss": 0.3281, "step": 5912 }, { "epoch": 0.8478634929739031, "grad_norm": 0.294220894575119, "learning_rate": 9.018342649210138e-06, "loss": 0.3399, "step": 5913 }, { "epoch": 0.8480068827071982, "grad_norm": 0.31135293841362, "learning_rate": 9.017846154219947e-06, "loss": 0.3426, "step": 5914 }, { "epoch": 0.8481502724404932, "grad_norm": 0.29454177618026733, "learning_rate": 9.0173495473791e-06, "loss": 0.3355, "step": 5915 }, { "epoch": 0.8482936621737883, "grad_norm": 0.2941873073577881, "learning_rate": 9.016852828701418e-06, "loss": 0.3417, "step": 5916 }, { "epoch": 0.8484370519070834, "grad_norm": 0.30740123987197876, "learning_rate": 9.016355998200736e-06, "loss": 0.3436, "step": 5917 }, { "epoch": 0.8485804416403786, "grad_norm": 0.3024226725101471, "learning_rate": 9.01585905589088e-06, "loss": 0.355, "step": 5918 }, { "epoch": 0.8487238313736737, "grad_norm": 0.29682666063308716, "learning_rate": 9.015362001785686e-06, "loss": 0.3248, "step": 5919 }, { "epoch": 0.8488672211069688, "grad_norm": 0.3201972544193268, "learning_rate": 9.01486483589899e-06, "loss": 0.335, "step": 5920 }, { "epoch": 0.8490106108402639, "grad_norm": 0.295168399810791, "learning_rate": 9.014367558244634e-06, "loss": 0.3261, "step": 5921 }, { "epoch": 0.8491540005735589, "grad_norm": 0.2950354218482971, "learning_rate": 9.01387016883646e-06, "loss": 0.3228, "step": 5922 }, { "epoch": 0.849297390306854, "grad_norm": 0.30902543663978577, "learning_rate": 9.013372667688314e-06, "loss": 0.3193, "step": 5923 }, { "epoch": 0.8494407800401491, "grad_norm": 0.318759948015213, "learning_rate": 9.012875054814048e-06, "loss": 0.3491, "step": 5924 }, { "epoch": 0.8495841697734442, "grad_norm": 0.3158852458000183, "learning_rate": 9.012377330227513e-06, "loss": 0.3401, "step": 5925 }, { "epoch": 0.8497275595067393, "grad_norm": 0.3263358771800995, "learning_rate": 9.011879493942564e-06, "loss": 0.3394, "step": 5926 }, { "epoch": 0.8498709492400344, "grad_norm": 0.30139055848121643, "learning_rate": 9.011381545973063e-06, "loss": 0.3265, "step": 5927 }, { "epoch": 0.8500143389733296, "grad_norm": 0.31434789299964905, "learning_rate": 9.010883486332871e-06, "loss": 0.3228, "step": 5928 }, { "epoch": 0.8501577287066246, "grad_norm": 0.3071043789386749, "learning_rate": 9.01038531503585e-06, "loss": 0.3441, "step": 5929 }, { "epoch": 0.8503011184399197, "grad_norm": 0.3181649148464203, "learning_rate": 9.009887032095873e-06, "loss": 0.329, "step": 5930 }, { "epoch": 0.8504445081732148, "grad_norm": 0.30892837047576904, "learning_rate": 9.00938863752681e-06, "loss": 0.3253, "step": 5931 }, { "epoch": 0.8505878979065099, "grad_norm": 0.32452720403671265, "learning_rate": 9.008890131342533e-06, "loss": 0.3299, "step": 5932 }, { "epoch": 0.850731287639805, "grad_norm": 0.31247076392173767, "learning_rate": 9.008391513556922e-06, "loss": 0.3269, "step": 5933 }, { "epoch": 0.8508746773731001, "grad_norm": 0.3218788802623749, "learning_rate": 9.007892784183859e-06, "loss": 0.3351, "step": 5934 }, { "epoch": 0.8510180671063952, "grad_norm": 0.29430171847343445, "learning_rate": 9.007393943237224e-06, "loss": 0.3121, "step": 5935 }, { "epoch": 0.8511614568396902, "grad_norm": 0.3215702772140503, "learning_rate": 9.006894990730908e-06, "loss": 0.3337, "step": 5936 }, { "epoch": 0.8513048465729853, "grad_norm": 0.3217194080352783, "learning_rate": 9.006395926678797e-06, "loss": 0.3432, "step": 5937 }, { "epoch": 0.8514482363062804, "grad_norm": 0.30557680130004883, "learning_rate": 9.005896751094787e-06, "loss": 0.3605, "step": 5938 }, { "epoch": 0.8515916260395756, "grad_norm": 0.3251630961894989, "learning_rate": 9.005397463992775e-06, "loss": 0.3229, "step": 5939 }, { "epoch": 0.8517350157728707, "grad_norm": 0.3123489022254944, "learning_rate": 9.004898065386658e-06, "loss": 0.3453, "step": 5940 }, { "epoch": 0.8518784055061658, "grad_norm": 0.2989135682582855, "learning_rate": 9.00439855529034e-06, "loss": 0.3325, "step": 5941 }, { "epoch": 0.8520217952394609, "grad_norm": 0.2845367193222046, "learning_rate": 9.003898933717728e-06, "loss": 0.3336, "step": 5942 }, { "epoch": 0.852165184972756, "grad_norm": 0.2886234223842621, "learning_rate": 9.003399200682725e-06, "loss": 0.3194, "step": 5943 }, { "epoch": 0.852308574706051, "grad_norm": 0.3011809289455414, "learning_rate": 9.002899356199249e-06, "loss": 0.3432, "step": 5944 }, { "epoch": 0.8524519644393461, "grad_norm": 0.3182682991027832, "learning_rate": 9.002399400281212e-06, "loss": 0.325, "step": 5945 }, { "epoch": 0.8525953541726412, "grad_norm": 0.3191549479961395, "learning_rate": 9.001899332942532e-06, "loss": 0.337, "step": 5946 }, { "epoch": 0.8527387439059363, "grad_norm": 0.32766756415367126, "learning_rate": 9.001399154197133e-06, "loss": 0.3471, "step": 5947 }, { "epoch": 0.8528821336392314, "grad_norm": 0.29654860496520996, "learning_rate": 9.000898864058933e-06, "loss": 0.3251, "step": 5948 }, { "epoch": 0.8530255233725266, "grad_norm": 0.36222055554389954, "learning_rate": 9.000398462541866e-06, "loss": 0.3312, "step": 5949 }, { "epoch": 0.8531689131058217, "grad_norm": 0.31858932971954346, "learning_rate": 8.999897949659859e-06, "loss": 0.3421, "step": 5950 }, { "epoch": 0.8533123028391167, "grad_norm": 0.3251742124557495, "learning_rate": 8.999397325426847e-06, "loss": 0.334, "step": 5951 }, { "epoch": 0.8534556925724118, "grad_norm": 0.32317429780960083, "learning_rate": 8.998896589856763e-06, "loss": 0.3288, "step": 5952 }, { "epoch": 0.8535990823057069, "grad_norm": 0.3322703242301941, "learning_rate": 8.998395742963553e-06, "loss": 0.3482, "step": 5953 }, { "epoch": 0.853742472039002, "grad_norm": 0.29991471767425537, "learning_rate": 8.997894784761153e-06, "loss": 0.3306, "step": 5954 }, { "epoch": 0.8538858617722971, "grad_norm": 0.3114968240261078, "learning_rate": 8.997393715263513e-06, "loss": 0.3168, "step": 5955 }, { "epoch": 0.8540292515055922, "grad_norm": 0.3319581151008606, "learning_rate": 8.996892534484583e-06, "loss": 0.3443, "step": 5956 }, { "epoch": 0.8541726412388873, "grad_norm": 0.2791350781917572, "learning_rate": 8.99639124243831e-06, "loss": 0.3336, "step": 5957 }, { "epoch": 0.8543160309721823, "grad_norm": 0.32504308223724365, "learning_rate": 8.995889839138654e-06, "loss": 0.3385, "step": 5958 }, { "epoch": 0.8544594207054775, "grad_norm": 0.29041945934295654, "learning_rate": 8.995388324599572e-06, "loss": 0.319, "step": 5959 }, { "epoch": 0.8546028104387726, "grad_norm": 0.30644065141677856, "learning_rate": 8.994886698835025e-06, "loss": 0.3392, "step": 5960 }, { "epoch": 0.8547462001720677, "grad_norm": 0.3263320028781891, "learning_rate": 8.994384961858978e-06, "loss": 0.3225, "step": 5961 }, { "epoch": 0.8548895899053628, "grad_norm": 0.29555070400238037, "learning_rate": 8.993883113685398e-06, "loss": 0.3345, "step": 5962 }, { "epoch": 0.8550329796386579, "grad_norm": 0.3088292181491852, "learning_rate": 8.993381154328255e-06, "loss": 0.337, "step": 5963 }, { "epoch": 0.855176369371953, "grad_norm": 0.30189698934555054, "learning_rate": 8.992879083801524e-06, "loss": 0.3504, "step": 5964 }, { "epoch": 0.855319759105248, "grad_norm": 0.30887800455093384, "learning_rate": 8.992376902119183e-06, "loss": 0.3279, "step": 5965 }, { "epoch": 0.8554631488385431, "grad_norm": 0.32507970929145813, "learning_rate": 8.991874609295209e-06, "loss": 0.3387, "step": 5966 }, { "epoch": 0.8556065385718382, "grad_norm": 0.29298263788223267, "learning_rate": 8.991372205343585e-06, "loss": 0.3227, "step": 5967 }, { "epoch": 0.8557499283051333, "grad_norm": 0.3511030375957489, "learning_rate": 8.990869690278302e-06, "loss": 0.3597, "step": 5968 }, { "epoch": 0.8558933180384285, "grad_norm": 0.3256603181362152, "learning_rate": 8.990367064113343e-06, "loss": 0.3064, "step": 5969 }, { "epoch": 0.8560367077717236, "grad_norm": 0.27802371978759766, "learning_rate": 8.989864326862704e-06, "loss": 0.3455, "step": 5970 }, { "epoch": 0.8561800975050187, "grad_norm": 0.32067200541496277, "learning_rate": 8.98936147854038e-06, "loss": 0.334, "step": 5971 }, { "epoch": 0.8563234872383138, "grad_norm": 0.3022914528846741, "learning_rate": 8.988858519160371e-06, "loss": 0.3383, "step": 5972 }, { "epoch": 0.8564668769716088, "grad_norm": 0.28375518321990967, "learning_rate": 8.988355448736673e-06, "loss": 0.3213, "step": 5973 }, { "epoch": 0.8566102667049039, "grad_norm": 0.3123202621936798, "learning_rate": 8.9878522672833e-06, "loss": 0.3269, "step": 5974 }, { "epoch": 0.856753656438199, "grad_norm": 0.2948312759399414, "learning_rate": 8.98734897481425e-06, "loss": 0.3697, "step": 5975 }, { "epoch": 0.8568970461714941, "grad_norm": 0.2837943434715271, "learning_rate": 8.986845571343538e-06, "loss": 0.3285, "step": 5976 }, { "epoch": 0.8570404359047892, "grad_norm": 0.34674105048179626, "learning_rate": 8.98634205688518e-06, "loss": 0.328, "step": 5977 }, { "epoch": 0.8571838256380843, "grad_norm": 0.3145502805709839, "learning_rate": 8.985838431453191e-06, "loss": 0.3294, "step": 5978 }, { "epoch": 0.8573272153713795, "grad_norm": 0.2919812798500061, "learning_rate": 8.985334695061592e-06, "loss": 0.3389, "step": 5979 }, { "epoch": 0.8574706051046745, "grad_norm": 0.3250230550765991, "learning_rate": 8.984830847724405e-06, "loss": 0.3251, "step": 5980 }, { "epoch": 0.8576139948379696, "grad_norm": 0.31948015093803406, "learning_rate": 8.984326889455656e-06, "loss": 0.3451, "step": 5981 }, { "epoch": 0.8577573845712647, "grad_norm": 0.285593718290329, "learning_rate": 8.983822820269376e-06, "loss": 0.332, "step": 5982 }, { "epoch": 0.8579007743045598, "grad_norm": 0.27692002058029175, "learning_rate": 8.983318640179599e-06, "loss": 0.3073, "step": 5983 }, { "epoch": 0.8580441640378549, "grad_norm": 0.30122941732406616, "learning_rate": 8.982814349200357e-06, "loss": 0.3228, "step": 5984 }, { "epoch": 0.85818755377115, "grad_norm": 0.31393423676490784, "learning_rate": 8.98230994734569e-06, "loss": 0.3278, "step": 5985 }, { "epoch": 0.858330943504445, "grad_norm": 0.28879109025001526, "learning_rate": 8.98180543462964e-06, "loss": 0.3194, "step": 5986 }, { "epoch": 0.8584743332377401, "grad_norm": 0.34088945388793945, "learning_rate": 8.981300811066253e-06, "loss": 0.3277, "step": 5987 }, { "epoch": 0.8586177229710352, "grad_norm": 0.32600104808807373, "learning_rate": 8.980796076669573e-06, "loss": 0.3423, "step": 5988 }, { "epoch": 0.8587611127043304, "grad_norm": 0.3137191832065582, "learning_rate": 8.980291231453657e-06, "loss": 0.3769, "step": 5989 }, { "epoch": 0.8589045024376255, "grad_norm": 0.3019562065601349, "learning_rate": 8.979786275432555e-06, "loss": 0.3341, "step": 5990 }, { "epoch": 0.8590478921709206, "grad_norm": 0.3283775746822357, "learning_rate": 8.979281208620326e-06, "loss": 0.349, "step": 5991 }, { "epoch": 0.8591912819042157, "grad_norm": 0.2837660014629364, "learning_rate": 8.978776031031027e-06, "loss": 0.3378, "step": 5992 }, { "epoch": 0.8593346716375108, "grad_norm": 0.32287776470184326, "learning_rate": 8.978270742678725e-06, "loss": 0.3398, "step": 5993 }, { "epoch": 0.8594780613708058, "grad_norm": 0.2880922853946686, "learning_rate": 8.977765343577485e-06, "loss": 0.3423, "step": 5994 }, { "epoch": 0.8596214511041009, "grad_norm": 0.322721391916275, "learning_rate": 8.977259833741378e-06, "loss": 0.335, "step": 5995 }, { "epoch": 0.859764840837396, "grad_norm": 0.3159165680408478, "learning_rate": 8.976754213184474e-06, "loss": 0.3523, "step": 5996 }, { "epoch": 0.8599082305706911, "grad_norm": 0.3191557824611664, "learning_rate": 8.97624848192085e-06, "loss": 0.3241, "step": 5997 }, { "epoch": 0.8600516203039862, "grad_norm": 0.28800126910209656, "learning_rate": 8.975742639964587e-06, "loss": 0.3286, "step": 5998 }, { "epoch": 0.8601950100372814, "grad_norm": 0.31438228487968445, "learning_rate": 8.975236687329762e-06, "loss": 0.3327, "step": 5999 }, { "epoch": 0.8603383997705765, "grad_norm": 0.3454977869987488, "learning_rate": 8.974730624030463e-06, "loss": 0.3361, "step": 6000 }, { "epoch": 0.8604817895038716, "grad_norm": 0.2806958854198456, "learning_rate": 8.974224450080779e-06, "loss": 0.3298, "step": 6001 }, { "epoch": 0.8606251792371666, "grad_norm": 0.27345430850982666, "learning_rate": 8.973718165494801e-06, "loss": 0.3171, "step": 6002 }, { "epoch": 0.8607685689704617, "grad_norm": 0.3445628881454468, "learning_rate": 8.97321177028662e-06, "loss": 0.3585, "step": 6003 }, { "epoch": 0.8609119587037568, "grad_norm": 0.3248356282711029, "learning_rate": 8.972705264470335e-06, "loss": 0.3217, "step": 6004 }, { "epoch": 0.8610553484370519, "grad_norm": 0.31847038865089417, "learning_rate": 8.972198648060047e-06, "loss": 0.3133, "step": 6005 }, { "epoch": 0.861198738170347, "grad_norm": 0.2970162034034729, "learning_rate": 8.97169192106986e-06, "loss": 0.3452, "step": 6006 }, { "epoch": 0.8613421279036421, "grad_norm": 0.3114047646522522, "learning_rate": 8.971185083513878e-06, "loss": 0.3265, "step": 6007 }, { "epoch": 0.8614855176369371, "grad_norm": 0.30853351950645447, "learning_rate": 8.970678135406213e-06, "loss": 0.3351, "step": 6008 }, { "epoch": 0.8616289073702323, "grad_norm": 0.32263195514678955, "learning_rate": 8.970171076760977e-06, "loss": 0.3505, "step": 6009 }, { "epoch": 0.8617722971035274, "grad_norm": 0.3538094460964203, "learning_rate": 8.969663907592285e-06, "loss": 0.3386, "step": 6010 }, { "epoch": 0.8619156868368225, "grad_norm": 0.3493572771549225, "learning_rate": 8.969156627914257e-06, "loss": 0.3174, "step": 6011 }, { "epoch": 0.8620590765701176, "grad_norm": 0.3577788472175598, "learning_rate": 8.968649237741015e-06, "loss": 0.3332, "step": 6012 }, { "epoch": 0.8622024663034127, "grad_norm": 0.3469359874725342, "learning_rate": 8.96814173708668e-06, "loss": 0.3508, "step": 6013 }, { "epoch": 0.8623458560367078, "grad_norm": 0.298734188079834, "learning_rate": 8.967634125965385e-06, "loss": 0.3302, "step": 6014 }, { "epoch": 0.8624892457700029, "grad_norm": 0.30185502767562866, "learning_rate": 8.96712640439126e-06, "loss": 0.311, "step": 6015 }, { "epoch": 0.8626326355032979, "grad_norm": 0.33111485838890076, "learning_rate": 8.966618572378439e-06, "loss": 0.3417, "step": 6016 }, { "epoch": 0.862776025236593, "grad_norm": 0.29234015941619873, "learning_rate": 8.966110629941057e-06, "loss": 0.2984, "step": 6017 }, { "epoch": 0.8629194149698881, "grad_norm": 0.32762908935546875, "learning_rate": 8.965602577093255e-06, "loss": 0.3332, "step": 6018 }, { "epoch": 0.8630628047031833, "grad_norm": 0.3052383363246918, "learning_rate": 8.96509441384918e-06, "loss": 0.3034, "step": 6019 }, { "epoch": 0.8632061944364784, "grad_norm": 0.30585387349128723, "learning_rate": 8.964586140222974e-06, "loss": 0.3133, "step": 6020 }, { "epoch": 0.8633495841697735, "grad_norm": 0.3054961860179901, "learning_rate": 8.964077756228791e-06, "loss": 0.3286, "step": 6021 }, { "epoch": 0.8634929739030686, "grad_norm": 0.2913442552089691, "learning_rate": 8.96356926188078e-06, "loss": 0.3287, "step": 6022 }, { "epoch": 0.8636363636363636, "grad_norm": 0.3470083177089691, "learning_rate": 8.963060657193097e-06, "loss": 0.355, "step": 6023 }, { "epoch": 0.8637797533696587, "grad_norm": 0.3074108958244324, "learning_rate": 8.962551942179903e-06, "loss": 0.3363, "step": 6024 }, { "epoch": 0.8639231431029538, "grad_norm": 0.30773022770881653, "learning_rate": 8.962043116855357e-06, "loss": 0.3251, "step": 6025 }, { "epoch": 0.8640665328362489, "grad_norm": 0.34377336502075195, "learning_rate": 8.961534181233626e-06, "loss": 0.358, "step": 6026 }, { "epoch": 0.864209922569544, "grad_norm": 0.3058525621891022, "learning_rate": 8.961025135328878e-06, "loss": 0.3398, "step": 6027 }, { "epoch": 0.8643533123028391, "grad_norm": 0.3228095471858978, "learning_rate": 8.960515979155284e-06, "loss": 0.3253, "step": 6028 }, { "epoch": 0.8644967020361343, "grad_norm": 0.318668007850647, "learning_rate": 8.960006712727016e-06, "loss": 0.3082, "step": 6029 }, { "epoch": 0.8646400917694294, "grad_norm": 0.32594454288482666, "learning_rate": 8.959497336058253e-06, "loss": 0.3265, "step": 6030 }, { "epoch": 0.8647834815027244, "grad_norm": 0.2978595197200775, "learning_rate": 8.958987849163174e-06, "loss": 0.323, "step": 6031 }, { "epoch": 0.8649268712360195, "grad_norm": 0.3047559857368469, "learning_rate": 8.958478252055967e-06, "loss": 0.3251, "step": 6032 }, { "epoch": 0.8650702609693146, "grad_norm": 0.3045853078365326, "learning_rate": 8.95796854475081e-06, "loss": 0.3114, "step": 6033 }, { "epoch": 0.8652136507026097, "grad_norm": 0.3480527698993683, "learning_rate": 8.957458727261901e-06, "loss": 0.3507, "step": 6034 }, { "epoch": 0.8653570404359048, "grad_norm": 0.34103670716285706, "learning_rate": 8.956948799603427e-06, "loss": 0.3495, "step": 6035 }, { "epoch": 0.8655004301691999, "grad_norm": 0.3056686520576477, "learning_rate": 8.956438761789586e-06, "loss": 0.3155, "step": 6036 }, { "epoch": 0.865643819902495, "grad_norm": 0.32957157492637634, "learning_rate": 8.955928613834575e-06, "loss": 0.3092, "step": 6037 }, { "epoch": 0.86578720963579, "grad_norm": 0.4061591625213623, "learning_rate": 8.955418355752598e-06, "loss": 0.3382, "step": 6038 }, { "epoch": 0.8659305993690851, "grad_norm": 0.3291299045085907, "learning_rate": 8.954907987557857e-06, "loss": 0.3157, "step": 6039 }, { "epoch": 0.8660739891023803, "grad_norm": 0.32726791501045227, "learning_rate": 8.954397509264563e-06, "loss": 0.335, "step": 6040 }, { "epoch": 0.8662173788356754, "grad_norm": 0.3281887173652649, "learning_rate": 8.953886920886925e-06, "loss": 0.3226, "step": 6041 }, { "epoch": 0.8663607685689705, "grad_norm": 0.3195625841617584, "learning_rate": 8.953376222439157e-06, "loss": 0.3551, "step": 6042 }, { "epoch": 0.8665041583022656, "grad_norm": 0.30020833015441895, "learning_rate": 8.952865413935477e-06, "loss": 0.3225, "step": 6043 }, { "epoch": 0.8666475480355607, "grad_norm": 0.30667850375175476, "learning_rate": 8.952354495390103e-06, "loss": 0.3436, "step": 6044 }, { "epoch": 0.8667909377688557, "grad_norm": 0.341135174036026, "learning_rate": 8.951843466817261e-06, "loss": 0.3462, "step": 6045 }, { "epoch": 0.8669343275021508, "grad_norm": 0.2904326021671295, "learning_rate": 8.951332328231175e-06, "loss": 0.3276, "step": 6046 }, { "epoch": 0.8670777172354459, "grad_norm": 0.3598906397819519, "learning_rate": 8.950821079646078e-06, "loss": 0.3292, "step": 6047 }, { "epoch": 0.867221106968741, "grad_norm": 0.3323948383331299, "learning_rate": 8.950309721076196e-06, "loss": 0.3465, "step": 6048 }, { "epoch": 0.8673644967020361, "grad_norm": 0.3353443145751953, "learning_rate": 8.949798252535771e-06, "loss": 0.3324, "step": 6049 }, { "epoch": 0.8675078864353313, "grad_norm": 0.30057570338249207, "learning_rate": 8.949286674039035e-06, "loss": 0.3326, "step": 6050 }, { "epoch": 0.8676512761686264, "grad_norm": 0.3382098376750946, "learning_rate": 8.948774985600236e-06, "loss": 0.3423, "step": 6051 }, { "epoch": 0.8677946659019214, "grad_norm": 0.30210721492767334, "learning_rate": 8.948263187233615e-06, "loss": 0.3049, "step": 6052 }, { "epoch": 0.8679380556352165, "grad_norm": 0.33121800422668457, "learning_rate": 8.947751278953422e-06, "loss": 0.3432, "step": 6053 }, { "epoch": 0.8680814453685116, "grad_norm": 0.2957547605037689, "learning_rate": 8.947239260773904e-06, "loss": 0.3066, "step": 6054 }, { "epoch": 0.8682248351018067, "grad_norm": 0.31559985876083374, "learning_rate": 8.946727132709318e-06, "loss": 0.3088, "step": 6055 }, { "epoch": 0.8683682248351018, "grad_norm": 0.3192349970340729, "learning_rate": 8.946214894773919e-06, "loss": 0.3181, "step": 6056 }, { "epoch": 0.8685116145683969, "grad_norm": 0.3384675085544586, "learning_rate": 8.94570254698197e-06, "loss": 0.3413, "step": 6057 }, { "epoch": 0.868655004301692, "grad_norm": 0.3297707438468933, "learning_rate": 8.945190089347728e-06, "loss": 0.3357, "step": 6058 }, { "epoch": 0.868798394034987, "grad_norm": 0.334730327129364, "learning_rate": 8.944677521885466e-06, "loss": 0.3445, "step": 6059 }, { "epoch": 0.8689417837682822, "grad_norm": 0.3346651494503021, "learning_rate": 8.944164844609448e-06, "loss": 0.3459, "step": 6060 }, { "epoch": 0.8690851735015773, "grad_norm": 0.32176581025123596, "learning_rate": 8.94365205753395e-06, "loss": 0.3358, "step": 6061 }, { "epoch": 0.8692285632348724, "grad_norm": 0.3546549379825592, "learning_rate": 8.943139160673242e-06, "loss": 0.3565, "step": 6062 }, { "epoch": 0.8693719529681675, "grad_norm": 0.37313008308410645, "learning_rate": 8.942626154041607e-06, "loss": 0.3376, "step": 6063 }, { "epoch": 0.8695153427014626, "grad_norm": 0.34083813428878784, "learning_rate": 8.942113037653326e-06, "loss": 0.3567, "step": 6064 }, { "epoch": 0.8696587324347577, "grad_norm": 0.3203965425491333, "learning_rate": 8.941599811522682e-06, "loss": 0.3448, "step": 6065 }, { "epoch": 0.8698021221680527, "grad_norm": 0.3107001781463623, "learning_rate": 8.94108647566396e-06, "loss": 0.3281, "step": 6066 }, { "epoch": 0.8699455119013478, "grad_norm": 0.370675653219223, "learning_rate": 8.940573030091455e-06, "loss": 0.3268, "step": 6067 }, { "epoch": 0.8700889016346429, "grad_norm": 0.3450535535812378, "learning_rate": 8.940059474819459e-06, "loss": 0.3302, "step": 6068 }, { "epoch": 0.870232291367938, "grad_norm": 0.2822922468185425, "learning_rate": 8.939545809862266e-06, "loss": 0.3146, "step": 6069 }, { "epoch": 0.8703756811012332, "grad_norm": 0.32182034850120544, "learning_rate": 8.939032035234181e-06, "loss": 0.3384, "step": 6070 }, { "epoch": 0.8705190708345283, "grad_norm": 0.32287269830703735, "learning_rate": 8.938518150949501e-06, "loss": 0.3171, "step": 6071 }, { "epoch": 0.8706624605678234, "grad_norm": 0.3183329105377197, "learning_rate": 8.938004157022535e-06, "loss": 0.3264, "step": 6072 }, { "epoch": 0.8708058503011185, "grad_norm": 0.2888850271701813, "learning_rate": 8.937490053467592e-06, "loss": 0.3236, "step": 6073 }, { "epoch": 0.8709492400344135, "grad_norm": 0.3556082546710968, "learning_rate": 8.936975840298982e-06, "loss": 0.3467, "step": 6074 }, { "epoch": 0.8710926297677086, "grad_norm": 0.3348853886127472, "learning_rate": 8.93646151753102e-06, "loss": 0.3349, "step": 6075 }, { "epoch": 0.8712360195010037, "grad_norm": 0.3294583559036255, "learning_rate": 8.935947085178027e-06, "loss": 0.332, "step": 6076 }, { "epoch": 0.8713794092342988, "grad_norm": 0.3249600827693939, "learning_rate": 8.935432543254318e-06, "loss": 0.3279, "step": 6077 }, { "epoch": 0.8715227989675939, "grad_norm": 0.44941282272338867, "learning_rate": 8.934917891774223e-06, "loss": 0.3641, "step": 6078 }, { "epoch": 0.871666188700889, "grad_norm": 0.34352347254753113, "learning_rate": 8.93440313075207e-06, "loss": 0.3301, "step": 6079 }, { "epoch": 0.8718095784341842, "grad_norm": 0.3388163149356842, "learning_rate": 8.933888260202182e-06, "loss": 0.3553, "step": 6080 }, { "epoch": 0.8719529681674792, "grad_norm": 0.3432662785053253, "learning_rate": 8.933373280138898e-06, "loss": 0.3167, "step": 6081 }, { "epoch": 0.8720963579007743, "grad_norm": 0.3172041177749634, "learning_rate": 8.932858190576552e-06, "loss": 0.3338, "step": 6082 }, { "epoch": 0.8722397476340694, "grad_norm": 0.3002021014690399, "learning_rate": 8.932342991529484e-06, "loss": 0.3144, "step": 6083 }, { "epoch": 0.8723831373673645, "grad_norm": 0.3250599801540375, "learning_rate": 8.931827683012036e-06, "loss": 0.3202, "step": 6084 }, { "epoch": 0.8725265271006596, "grad_norm": 0.3614453375339508, "learning_rate": 8.931312265038554e-06, "loss": 0.3423, "step": 6085 }, { "epoch": 0.8726699168339547, "grad_norm": 0.3030347526073456, "learning_rate": 8.930796737623385e-06, "loss": 0.3062, "step": 6086 }, { "epoch": 0.8728133065672498, "grad_norm": 0.3416644036769867, "learning_rate": 8.930281100780883e-06, "loss": 0.3468, "step": 6087 }, { "epoch": 0.8729566963005448, "grad_norm": 0.34827542304992676, "learning_rate": 8.9297653545254e-06, "loss": 0.3746, "step": 6088 }, { "epoch": 0.8731000860338399, "grad_norm": 0.3123217523097992, "learning_rate": 8.929249498871294e-06, "loss": 0.3298, "step": 6089 }, { "epoch": 0.8732434757671351, "grad_norm": 0.3018295168876648, "learning_rate": 8.928733533832927e-06, "loss": 0.3328, "step": 6090 }, { "epoch": 0.8733868655004302, "grad_norm": 0.30187472701072693, "learning_rate": 8.928217459424664e-06, "loss": 0.3152, "step": 6091 }, { "epoch": 0.8735302552337253, "grad_norm": 0.35335248708724976, "learning_rate": 8.927701275660868e-06, "loss": 0.3496, "step": 6092 }, { "epoch": 0.8736736449670204, "grad_norm": 0.3331068456172943, "learning_rate": 8.92718498255591e-06, "loss": 0.342, "step": 6093 }, { "epoch": 0.8738170347003155, "grad_norm": 0.3242614269256592, "learning_rate": 8.926668580124163e-06, "loss": 0.361, "step": 6094 }, { "epoch": 0.8739604244336105, "grad_norm": 0.29801687598228455, "learning_rate": 8.926152068380002e-06, "loss": 0.3352, "step": 6095 }, { "epoch": 0.8741038141669056, "grad_norm": 0.31741365790367126, "learning_rate": 8.925635447337809e-06, "loss": 0.3186, "step": 6096 }, { "epoch": 0.8742472039002007, "grad_norm": 0.3096975088119507, "learning_rate": 8.925118717011965e-06, "loss": 0.3269, "step": 6097 }, { "epoch": 0.8743905936334958, "grad_norm": 0.30868077278137207, "learning_rate": 8.924601877416851e-06, "loss": 0.3343, "step": 6098 }, { "epoch": 0.8745339833667909, "grad_norm": 0.3026891350746155, "learning_rate": 8.92408492856686e-06, "loss": 0.3189, "step": 6099 }, { "epoch": 0.8746773731000861, "grad_norm": 0.2732778489589691, "learning_rate": 8.92356787047638e-06, "loss": 0.3208, "step": 6100 }, { "epoch": 0.8748207628333812, "grad_norm": 0.32901617884635925, "learning_rate": 8.923050703159806e-06, "loss": 0.3268, "step": 6101 }, { "epoch": 0.8749641525666763, "grad_norm": 0.3202122449874878, "learning_rate": 8.922533426631534e-06, "loss": 0.3185, "step": 6102 }, { "epoch": 0.8751075422999713, "grad_norm": 0.2945573329925537, "learning_rate": 8.922016040905968e-06, "loss": 0.314, "step": 6103 }, { "epoch": 0.8752509320332664, "grad_norm": 0.32802218198776245, "learning_rate": 8.921498545997506e-06, "loss": 0.3329, "step": 6104 }, { "epoch": 0.8753943217665615, "grad_norm": 0.3196879029273987, "learning_rate": 8.92098094192056e-06, "loss": 0.3254, "step": 6105 }, { "epoch": 0.8755377114998566, "grad_norm": 0.3123631775379181, "learning_rate": 8.920463228689534e-06, "loss": 0.3536, "step": 6106 }, { "epoch": 0.8756811012331517, "grad_norm": 0.33152711391448975, "learning_rate": 8.919945406318844e-06, "loss": 0.3392, "step": 6107 }, { "epoch": 0.8758244909664468, "grad_norm": 0.32166728377342224, "learning_rate": 8.919427474822901e-06, "loss": 0.3268, "step": 6108 }, { "epoch": 0.8759678806997419, "grad_norm": 0.2962987720966339, "learning_rate": 8.91890943421613e-06, "loss": 0.3508, "step": 6109 }, { "epoch": 0.876111270433037, "grad_norm": 0.3372233510017395, "learning_rate": 8.918391284512946e-06, "loss": 0.3402, "step": 6110 }, { "epoch": 0.8762546601663321, "grad_norm": 0.3022463321685791, "learning_rate": 8.917873025727776e-06, "loss": 0.3115, "step": 6111 }, { "epoch": 0.8763980498996272, "grad_norm": 0.2974976599216461, "learning_rate": 8.91735465787505e-06, "loss": 0.3304, "step": 6112 }, { "epoch": 0.8765414396329223, "grad_norm": 0.6070976257324219, "learning_rate": 8.916836180969197e-06, "loss": 0.3331, "step": 6113 }, { "epoch": 0.8766848293662174, "grad_norm": 0.31101346015930176, "learning_rate": 8.916317595024648e-06, "loss": 0.3089, "step": 6114 }, { "epoch": 0.8768282190995125, "grad_norm": 0.3587980270385742, "learning_rate": 8.91579890005584e-06, "loss": 0.3245, "step": 6115 }, { "epoch": 0.8769716088328076, "grad_norm": 0.4111616909503937, "learning_rate": 8.915280096077217e-06, "loss": 0.346, "step": 6116 }, { "epoch": 0.8771149985661026, "grad_norm": 0.3448854386806488, "learning_rate": 8.914761183103217e-06, "loss": 0.335, "step": 6117 }, { "epoch": 0.8772583882993977, "grad_norm": 0.31957823038101196, "learning_rate": 8.91424216114829e-06, "loss": 0.3521, "step": 6118 }, { "epoch": 0.8774017780326928, "grad_norm": 0.31863853335380554, "learning_rate": 8.913723030226881e-06, "loss": 0.3156, "step": 6119 }, { "epoch": 0.877545167765988, "grad_norm": 0.3062520921230316, "learning_rate": 8.913203790353444e-06, "loss": 0.3182, "step": 6120 }, { "epoch": 0.8776885574992831, "grad_norm": 0.308045893907547, "learning_rate": 8.912684441542432e-06, "loss": 0.3329, "step": 6121 }, { "epoch": 0.8778319472325782, "grad_norm": 0.34682515263557434, "learning_rate": 8.912164983808305e-06, "loss": 0.3317, "step": 6122 }, { "epoch": 0.8779753369658733, "grad_norm": 0.32522162795066833, "learning_rate": 8.911645417165522e-06, "loss": 0.3493, "step": 6123 }, { "epoch": 0.8781187266991684, "grad_norm": 0.30090221762657166, "learning_rate": 8.911125741628549e-06, "loss": 0.3312, "step": 6124 }, { "epoch": 0.8782621164324634, "grad_norm": 0.322593629360199, "learning_rate": 8.910605957211852e-06, "loss": 0.3489, "step": 6125 }, { "epoch": 0.8784055061657585, "grad_norm": 0.30863913893699646, "learning_rate": 8.9100860639299e-06, "loss": 0.3222, "step": 6126 }, { "epoch": 0.8785488958990536, "grad_norm": 0.31542298197746277, "learning_rate": 8.909566061797166e-06, "loss": 0.3449, "step": 6127 }, { "epoch": 0.8786922856323487, "grad_norm": 0.3055543601512909, "learning_rate": 8.909045950828128e-06, "loss": 0.3278, "step": 6128 }, { "epoch": 0.8788356753656438, "grad_norm": 0.29603925347328186, "learning_rate": 8.908525731037263e-06, "loss": 0.3299, "step": 6129 }, { "epoch": 0.8789790650989389, "grad_norm": 0.3174516260623932, "learning_rate": 8.908005402439055e-06, "loss": 0.3374, "step": 6130 }, { "epoch": 0.8791224548322341, "grad_norm": 0.2936476767063141, "learning_rate": 8.907484965047988e-06, "loss": 0.3199, "step": 6131 }, { "epoch": 0.8792658445655291, "grad_norm": 0.3140821158885956, "learning_rate": 8.90696441887855e-06, "loss": 0.3478, "step": 6132 }, { "epoch": 0.8794092342988242, "grad_norm": 0.3324493169784546, "learning_rate": 8.906443763945233e-06, "loss": 0.3349, "step": 6133 }, { "epoch": 0.8795526240321193, "grad_norm": 0.29069027304649353, "learning_rate": 8.905923000262531e-06, "loss": 0.336, "step": 6134 }, { "epoch": 0.8796960137654144, "grad_norm": 0.28837475180625916, "learning_rate": 8.90540212784494e-06, "loss": 0.3426, "step": 6135 }, { "epoch": 0.8798394034987095, "grad_norm": 0.3068004250526428, "learning_rate": 8.904881146706963e-06, "loss": 0.3548, "step": 6136 }, { "epoch": 0.8799827932320046, "grad_norm": 0.28644677996635437, "learning_rate": 8.9043600568631e-06, "loss": 0.3407, "step": 6137 }, { "epoch": 0.8801261829652997, "grad_norm": 0.3097274601459503, "learning_rate": 8.90383885832786e-06, "loss": 0.3196, "step": 6138 }, { "epoch": 0.8802695726985947, "grad_norm": 0.3141053020954132, "learning_rate": 8.903317551115751e-06, "loss": 0.3407, "step": 6139 }, { "epoch": 0.8804129624318898, "grad_norm": 0.3330068290233612, "learning_rate": 8.902796135241288e-06, "loss": 0.3335, "step": 6140 }, { "epoch": 0.880556352165185, "grad_norm": 0.3202831447124481, "learning_rate": 8.902274610718981e-06, "loss": 0.3315, "step": 6141 }, { "epoch": 0.8806997418984801, "grad_norm": 0.3272037208080292, "learning_rate": 8.901752977563354e-06, "loss": 0.3614, "step": 6142 }, { "epoch": 0.8808431316317752, "grad_norm": 0.331341952085495, "learning_rate": 8.901231235788926e-06, "loss": 0.3253, "step": 6143 }, { "epoch": 0.8809865213650703, "grad_norm": 0.3106842041015625, "learning_rate": 8.90070938541022e-06, "loss": 0.3412, "step": 6144 }, { "epoch": 0.8811299110983654, "grad_norm": 0.30817511677742004, "learning_rate": 8.900187426441766e-06, "loss": 0.3515, "step": 6145 }, { "epoch": 0.8812733008316604, "grad_norm": 0.30168434977531433, "learning_rate": 8.899665358898093e-06, "loss": 0.3369, "step": 6146 }, { "epoch": 0.8814166905649555, "grad_norm": 0.29903411865234375, "learning_rate": 8.899143182793737e-06, "loss": 0.3348, "step": 6147 }, { "epoch": 0.8815600802982506, "grad_norm": 0.2921149730682373, "learning_rate": 8.898620898143232e-06, "loss": 0.3083, "step": 6148 }, { "epoch": 0.8817034700315457, "grad_norm": 0.33038419485092163, "learning_rate": 8.898098504961117e-06, "loss": 0.3407, "step": 6149 }, { "epoch": 0.8818468597648408, "grad_norm": 0.30486661195755005, "learning_rate": 8.897576003261936e-06, "loss": 0.3287, "step": 6150 }, { "epoch": 0.881990249498136, "grad_norm": 0.31266525387763977, "learning_rate": 8.897053393060238e-06, "loss": 0.3169, "step": 6151 }, { "epoch": 0.8821336392314311, "grad_norm": 0.31449970602989197, "learning_rate": 8.896530674370565e-06, "loss": 0.3386, "step": 6152 }, { "epoch": 0.8822770289647262, "grad_norm": 0.32343724370002747, "learning_rate": 8.896007847207473e-06, "loss": 0.3289, "step": 6153 }, { "epoch": 0.8824204186980212, "grad_norm": 0.3378874957561493, "learning_rate": 8.895484911585516e-06, "loss": 0.3587, "step": 6154 }, { "epoch": 0.8825638084313163, "grad_norm": 0.30430328845977783, "learning_rate": 8.894961867519251e-06, "loss": 0.3389, "step": 6155 }, { "epoch": 0.8827071981646114, "grad_norm": 0.3272354304790497, "learning_rate": 8.89443871502324e-06, "loss": 0.3247, "step": 6156 }, { "epoch": 0.8828505878979065, "grad_norm": 0.3657483160495758, "learning_rate": 8.893915454112045e-06, "loss": 0.3272, "step": 6157 }, { "epoch": 0.8829939776312016, "grad_norm": 0.31361597776412964, "learning_rate": 8.893392084800234e-06, "loss": 0.3476, "step": 6158 }, { "epoch": 0.8831373673644967, "grad_norm": 0.3324955403804779, "learning_rate": 8.892868607102376e-06, "loss": 0.3374, "step": 6159 }, { "epoch": 0.8832807570977917, "grad_norm": 0.34237977862358093, "learning_rate": 8.892345021033047e-06, "loss": 0.3232, "step": 6160 }, { "epoch": 0.8834241468310869, "grad_norm": 0.31198957562446594, "learning_rate": 8.891821326606817e-06, "loss": 0.3298, "step": 6161 }, { "epoch": 0.883567536564382, "grad_norm": 0.30402544140815735, "learning_rate": 8.891297523838269e-06, "loss": 0.3337, "step": 6162 }, { "epoch": 0.8837109262976771, "grad_norm": 0.2911130487918854, "learning_rate": 8.890773612741983e-06, "loss": 0.3401, "step": 6163 }, { "epoch": 0.8838543160309722, "grad_norm": 0.3073890507221222, "learning_rate": 8.890249593332547e-06, "loss": 0.3086, "step": 6164 }, { "epoch": 0.8839977057642673, "grad_norm": 0.319399893283844, "learning_rate": 8.889725465624545e-06, "loss": 0.3365, "step": 6165 }, { "epoch": 0.8841410954975624, "grad_norm": 0.3249303102493286, "learning_rate": 8.889201229632569e-06, "loss": 0.3412, "step": 6166 }, { "epoch": 0.8842844852308575, "grad_norm": 0.29014772176742554, "learning_rate": 8.888676885371215e-06, "loss": 0.3327, "step": 6167 }, { "epoch": 0.8844278749641525, "grad_norm": 0.3176482021808624, "learning_rate": 8.888152432855079e-06, "loss": 0.3511, "step": 6168 }, { "epoch": 0.8845712646974476, "grad_norm": 0.3071785569190979, "learning_rate": 8.887627872098758e-06, "loss": 0.3214, "step": 6169 }, { "epoch": 0.8847146544307427, "grad_norm": 0.3049132525920868, "learning_rate": 8.887103203116857e-06, "loss": 0.324, "step": 6170 }, { "epoch": 0.8848580441640379, "grad_norm": 0.3092787563800812, "learning_rate": 8.886578425923986e-06, "loss": 0.341, "step": 6171 }, { "epoch": 0.885001433897333, "grad_norm": 0.29938921332359314, "learning_rate": 8.886053540534749e-06, "loss": 0.3457, "step": 6172 }, { "epoch": 0.8851448236306281, "grad_norm": 0.3207995593547821, "learning_rate": 8.885528546963758e-06, "loss": 0.3464, "step": 6173 }, { "epoch": 0.8852882133639232, "grad_norm": 0.32315748929977417, "learning_rate": 8.88500344522563e-06, "loss": 0.3452, "step": 6174 }, { "epoch": 0.8854316030972182, "grad_norm": 0.3148597180843353, "learning_rate": 8.884478235334981e-06, "loss": 0.3456, "step": 6175 }, { "epoch": 0.8855749928305133, "grad_norm": 0.3059360980987549, "learning_rate": 8.883952917306435e-06, "loss": 0.3454, "step": 6176 }, { "epoch": 0.8857183825638084, "grad_norm": 0.3059731423854828, "learning_rate": 8.883427491154613e-06, "loss": 0.3408, "step": 6177 }, { "epoch": 0.8858617722971035, "grad_norm": 0.3326202630996704, "learning_rate": 8.882901956894146e-06, "loss": 0.3386, "step": 6178 }, { "epoch": 0.8860051620303986, "grad_norm": 0.34408485889434814, "learning_rate": 8.882376314539658e-06, "loss": 0.3116, "step": 6179 }, { "epoch": 0.8861485517636937, "grad_norm": 0.3365515172481537, "learning_rate": 8.88185056410579e-06, "loss": 0.3319, "step": 6180 }, { "epoch": 0.8862919414969889, "grad_norm": 0.2909674644470215, "learning_rate": 8.881324705607171e-06, "loss": 0.3322, "step": 6181 }, { "epoch": 0.886435331230284, "grad_norm": 0.3007807433605194, "learning_rate": 8.880798739058443e-06, "loss": 0.3259, "step": 6182 }, { "epoch": 0.886578720963579, "grad_norm": 0.2997846305370331, "learning_rate": 8.880272664474247e-06, "loss": 0.3099, "step": 6183 }, { "epoch": 0.8867221106968741, "grad_norm": 0.2841099798679352, "learning_rate": 8.879746481869229e-06, "loss": 0.3326, "step": 6184 }, { "epoch": 0.8868655004301692, "grad_norm": 0.31212058663368225, "learning_rate": 8.879220191258038e-06, "loss": 0.3352, "step": 6185 }, { "epoch": 0.8870088901634643, "grad_norm": 0.2989645004272461, "learning_rate": 8.878693792655325e-06, "loss": 0.3524, "step": 6186 }, { "epoch": 0.8871522798967594, "grad_norm": 0.30920806527137756, "learning_rate": 8.878167286075743e-06, "loss": 0.3268, "step": 6187 }, { "epoch": 0.8872956696300545, "grad_norm": 0.30094319581985474, "learning_rate": 8.877640671533949e-06, "loss": 0.3293, "step": 6188 }, { "epoch": 0.8874390593633495, "grad_norm": 0.2972429692745209, "learning_rate": 8.877113949044604e-06, "loss": 0.3103, "step": 6189 }, { "epoch": 0.8875824490966446, "grad_norm": 0.33577701449394226, "learning_rate": 8.87658711862237e-06, "loss": 0.3348, "step": 6190 }, { "epoch": 0.8877258388299398, "grad_norm": 0.32010379433631897, "learning_rate": 8.876060180281915e-06, "loss": 0.3263, "step": 6191 }, { "epoch": 0.8878692285632349, "grad_norm": 0.3218129575252533, "learning_rate": 8.875533134037906e-06, "loss": 0.3377, "step": 6192 }, { "epoch": 0.88801261829653, "grad_norm": 0.32931721210479736, "learning_rate": 8.875005979905018e-06, "loss": 0.3524, "step": 6193 }, { "epoch": 0.8881560080298251, "grad_norm": 0.3311004340648651, "learning_rate": 8.874478717897924e-06, "loss": 0.3067, "step": 6194 }, { "epoch": 0.8882993977631202, "grad_norm": 0.3495679497718811, "learning_rate": 8.8739513480313e-06, "loss": 0.329, "step": 6195 }, { "epoch": 0.8884427874964153, "grad_norm": 0.30338242650032043, "learning_rate": 8.873423870319831e-06, "loss": 0.3343, "step": 6196 }, { "epoch": 0.8885861772297103, "grad_norm": 0.3378543257713318, "learning_rate": 8.872896284778201e-06, "loss": 0.3301, "step": 6197 }, { "epoch": 0.8887295669630054, "grad_norm": 0.3566429018974304, "learning_rate": 8.872368591421095e-06, "loss": 0.346, "step": 6198 }, { "epoch": 0.8888729566963005, "grad_norm": 0.3421856462955475, "learning_rate": 8.871840790263205e-06, "loss": 0.3363, "step": 6199 }, { "epoch": 0.8890163464295956, "grad_norm": 0.3385649025440216, "learning_rate": 8.871312881319224e-06, "loss": 0.3435, "step": 6200 }, { "epoch": 0.8891597361628908, "grad_norm": 0.3143427073955536, "learning_rate": 8.870784864603846e-06, "loss": 0.3285, "step": 6201 }, { "epoch": 0.8893031258961859, "grad_norm": 0.3082291781902313, "learning_rate": 8.870256740131772e-06, "loss": 0.309, "step": 6202 }, { "epoch": 0.889446515629481, "grad_norm": 0.32472074031829834, "learning_rate": 8.869728507917704e-06, "loss": 0.3346, "step": 6203 }, { "epoch": 0.889589905362776, "grad_norm": 0.3060588836669922, "learning_rate": 8.869200167976349e-06, "loss": 0.313, "step": 6204 }, { "epoch": 0.8897332950960711, "grad_norm": 0.33809375762939453, "learning_rate": 8.86867172032241e-06, "loss": 0.3273, "step": 6205 }, { "epoch": 0.8898766848293662, "grad_norm": 0.306304395198822, "learning_rate": 8.868143164970603e-06, "loss": 0.328, "step": 6206 }, { "epoch": 0.8900200745626613, "grad_norm": 0.3098841607570648, "learning_rate": 8.867614501935642e-06, "loss": 0.3436, "step": 6207 }, { "epoch": 0.8901634642959564, "grad_norm": 0.3242635428905487, "learning_rate": 8.867085731232241e-06, "loss": 0.3257, "step": 6208 }, { "epoch": 0.8903068540292515, "grad_norm": 0.34976255893707275, "learning_rate": 8.866556852875122e-06, "loss": 0.3371, "step": 6209 }, { "epoch": 0.8904502437625466, "grad_norm": 0.3685716390609741, "learning_rate": 8.866027866879009e-06, "loss": 0.3583, "step": 6210 }, { "epoch": 0.8905936334958418, "grad_norm": 0.35264578461647034, "learning_rate": 8.865498773258627e-06, "loss": 0.3194, "step": 6211 }, { "epoch": 0.8907370232291368, "grad_norm": 0.3282299339771271, "learning_rate": 8.864969572028706e-06, "loss": 0.3459, "step": 6212 }, { "epoch": 0.8908804129624319, "grad_norm": 0.3072383403778076, "learning_rate": 8.864440263203974e-06, "loss": 0.3427, "step": 6213 }, { "epoch": 0.891023802695727, "grad_norm": 0.3138441741466522, "learning_rate": 8.863910846799174e-06, "loss": 0.3223, "step": 6214 }, { "epoch": 0.8911671924290221, "grad_norm": 0.33633893728256226, "learning_rate": 8.863381322829039e-06, "loss": 0.3356, "step": 6215 }, { "epoch": 0.8913105821623172, "grad_norm": 0.3219205141067505, "learning_rate": 8.86285169130831e-06, "loss": 0.3242, "step": 6216 }, { "epoch": 0.8914539718956123, "grad_norm": 0.3069736659526825, "learning_rate": 8.862321952251731e-06, "loss": 0.3277, "step": 6217 }, { "epoch": 0.8915973616289073, "grad_norm": 0.3268660008907318, "learning_rate": 8.861792105674052e-06, "loss": 0.3317, "step": 6218 }, { "epoch": 0.8917407513622024, "grad_norm": 0.31873783469200134, "learning_rate": 8.861262151590023e-06, "loss": 0.3283, "step": 6219 }, { "epoch": 0.8918841410954975, "grad_norm": 0.2827722728252411, "learning_rate": 8.860732090014392e-06, "loss": 0.3223, "step": 6220 }, { "epoch": 0.8920275308287926, "grad_norm": 0.3035792410373688, "learning_rate": 8.86020192096192e-06, "loss": 0.3243, "step": 6221 }, { "epoch": 0.8921709205620878, "grad_norm": 0.3409235179424286, "learning_rate": 8.859671644447364e-06, "loss": 0.3285, "step": 6222 }, { "epoch": 0.8923143102953829, "grad_norm": 0.3780064582824707, "learning_rate": 8.859141260485488e-06, "loss": 0.338, "step": 6223 }, { "epoch": 0.892457700028678, "grad_norm": 0.29864704608917236, "learning_rate": 8.858610769091053e-06, "loss": 0.3392, "step": 6224 }, { "epoch": 0.892601089761973, "grad_norm": 0.30216091871261597, "learning_rate": 8.858080170278833e-06, "loss": 0.3585, "step": 6225 }, { "epoch": 0.8927444794952681, "grad_norm": 0.3218509554862976, "learning_rate": 8.857549464063595e-06, "loss": 0.3099, "step": 6226 }, { "epoch": 0.8928878692285632, "grad_norm": 0.32287731766700745, "learning_rate": 8.857018650460113e-06, "loss": 0.3512, "step": 6227 }, { "epoch": 0.8930312589618583, "grad_norm": 0.30811214447021484, "learning_rate": 8.856487729483165e-06, "loss": 0.314, "step": 6228 }, { "epoch": 0.8931746486951534, "grad_norm": 0.309596449136734, "learning_rate": 8.855956701147532e-06, "loss": 0.3256, "step": 6229 }, { "epoch": 0.8933180384284485, "grad_norm": 0.3102338910102844, "learning_rate": 8.855425565467996e-06, "loss": 0.3279, "step": 6230 }, { "epoch": 0.8934614281617436, "grad_norm": 0.28793975710868835, "learning_rate": 8.854894322459342e-06, "loss": 0.3362, "step": 6231 }, { "epoch": 0.8936048178950388, "grad_norm": 0.3090074360370636, "learning_rate": 8.854362972136362e-06, "loss": 0.3365, "step": 6232 }, { "epoch": 0.8937482076283338, "grad_norm": 0.34330952167510986, "learning_rate": 8.853831514513845e-06, "loss": 0.3212, "step": 6233 }, { "epoch": 0.8938915973616289, "grad_norm": 0.2877333462238312, "learning_rate": 8.853299949606585e-06, "loss": 0.3252, "step": 6234 }, { "epoch": 0.894034987094924, "grad_norm": 0.32452988624572754, "learning_rate": 8.852768277429384e-06, "loss": 0.3456, "step": 6235 }, { "epoch": 0.8941783768282191, "grad_norm": 0.3314153552055359, "learning_rate": 8.85223649799704e-06, "loss": 0.3364, "step": 6236 }, { "epoch": 0.8943217665615142, "grad_norm": 0.2870771884918213, "learning_rate": 8.851704611324359e-06, "loss": 0.3007, "step": 6237 }, { "epoch": 0.8944651562948093, "grad_norm": 0.3101198673248291, "learning_rate": 8.851172617426145e-06, "loss": 0.3324, "step": 6238 }, { "epoch": 0.8946085460281044, "grad_norm": 0.32311752438545227, "learning_rate": 8.850640516317211e-06, "loss": 0.3502, "step": 6239 }, { "epoch": 0.8947519357613994, "grad_norm": 0.3253031373023987, "learning_rate": 8.850108308012366e-06, "loss": 0.3347, "step": 6240 }, { "epoch": 0.8948953254946945, "grad_norm": 0.2909530699253082, "learning_rate": 8.849575992526432e-06, "loss": 0.3305, "step": 6241 }, { "epoch": 0.8950387152279897, "grad_norm": 0.33655232191085815, "learning_rate": 8.849043569874221e-06, "loss": 0.3381, "step": 6242 }, { "epoch": 0.8951821049612848, "grad_norm": 0.32149261236190796, "learning_rate": 8.848511040070559e-06, "loss": 0.3505, "step": 6243 }, { "epoch": 0.8953254946945799, "grad_norm": 0.33916422724723816, "learning_rate": 8.84797840313027e-06, "loss": 0.3464, "step": 6244 }, { "epoch": 0.895468884427875, "grad_norm": 0.33918142318725586, "learning_rate": 8.847445659068182e-06, "loss": 0.3254, "step": 6245 }, { "epoch": 0.8956122741611701, "grad_norm": 0.2966446876525879, "learning_rate": 8.846912807899123e-06, "loss": 0.3289, "step": 6246 }, { "epoch": 0.8957556638944651, "grad_norm": 0.320314884185791, "learning_rate": 8.84637984963793e-06, "loss": 0.3335, "step": 6247 }, { "epoch": 0.8958990536277602, "grad_norm": 0.3641510605812073, "learning_rate": 8.845846784299441e-06, "loss": 0.3159, "step": 6248 }, { "epoch": 0.8960424433610553, "grad_norm": 0.3224550187587738, "learning_rate": 8.845313611898492e-06, "loss": 0.3457, "step": 6249 }, { "epoch": 0.8961858330943504, "grad_norm": 0.2938172519207001, "learning_rate": 8.844780332449928e-06, "loss": 0.3544, "step": 6250 }, { "epoch": 0.8963292228276455, "grad_norm": 0.3415102958679199, "learning_rate": 8.844246945968593e-06, "loss": 0.3267, "step": 6251 }, { "epoch": 0.8964726125609407, "grad_norm": 0.29399028420448303, "learning_rate": 8.843713452469336e-06, "loss": 0.3145, "step": 6252 }, { "epoch": 0.8966160022942358, "grad_norm": 0.33460569381713867, "learning_rate": 8.84317985196701e-06, "loss": 0.345, "step": 6253 }, { "epoch": 0.8967593920275309, "grad_norm": 0.3039875030517578, "learning_rate": 8.84264614447647e-06, "loss": 0.3539, "step": 6254 }, { "epoch": 0.8969027817608259, "grad_norm": 0.3021771013736725, "learning_rate": 8.842112330012572e-06, "loss": 0.3289, "step": 6255 }, { "epoch": 0.897046171494121, "grad_norm": 0.3122865557670593, "learning_rate": 8.841578408590174e-06, "loss": 0.3254, "step": 6256 }, { "epoch": 0.8971895612274161, "grad_norm": 0.3145667612552643, "learning_rate": 8.841044380224146e-06, "loss": 0.3447, "step": 6257 }, { "epoch": 0.8973329509607112, "grad_norm": 0.284281462430954, "learning_rate": 8.84051024492935e-06, "loss": 0.3254, "step": 6258 }, { "epoch": 0.8974763406940063, "grad_norm": 0.34443598985671997, "learning_rate": 8.839976002720656e-06, "loss": 0.3238, "step": 6259 }, { "epoch": 0.8976197304273014, "grad_norm": 0.3059835135936737, "learning_rate": 8.839441653612939e-06, "loss": 0.3126, "step": 6260 }, { "epoch": 0.8977631201605965, "grad_norm": 0.308688223361969, "learning_rate": 8.83890719762107e-06, "loss": 0.3236, "step": 6261 }, { "epoch": 0.8979065098938916, "grad_norm": 0.3486595153808594, "learning_rate": 8.838372634759931e-06, "loss": 0.33, "step": 6262 }, { "epoch": 0.8980498996271867, "grad_norm": 0.2891646921634674, "learning_rate": 8.837837965044403e-06, "loss": 0.3253, "step": 6263 }, { "epoch": 0.8981932893604818, "grad_norm": 0.3201902210712433, "learning_rate": 8.83730318848937e-06, "loss": 0.3566, "step": 6264 }, { "epoch": 0.8983366790937769, "grad_norm": 0.344538152217865, "learning_rate": 8.836768305109718e-06, "loss": 0.3295, "step": 6265 }, { "epoch": 0.898480068827072, "grad_norm": 0.2886003255844116, "learning_rate": 8.83623331492034e-06, "loss": 0.3136, "step": 6266 }, { "epoch": 0.8986234585603671, "grad_norm": 0.2876271903514862, "learning_rate": 8.835698217936126e-06, "loss": 0.3442, "step": 6267 }, { "epoch": 0.8987668482936622, "grad_norm": 0.32025089859962463, "learning_rate": 8.835163014171974e-06, "loss": 0.3197, "step": 6268 }, { "epoch": 0.8989102380269572, "grad_norm": 0.28797560930252075, "learning_rate": 8.834627703642784e-06, "loss": 0.3298, "step": 6269 }, { "epoch": 0.8990536277602523, "grad_norm": 0.31727609038352966, "learning_rate": 8.834092286363458e-06, "loss": 0.3536, "step": 6270 }, { "epoch": 0.8991970174935474, "grad_norm": 0.27768534421920776, "learning_rate": 8.8335567623489e-06, "loss": 0.3489, "step": 6271 }, { "epoch": 0.8993404072268426, "grad_norm": 0.3063548505306244, "learning_rate": 8.83302113161402e-06, "loss": 0.3489, "step": 6272 }, { "epoch": 0.8994837969601377, "grad_norm": 0.3129276931285858, "learning_rate": 8.832485394173727e-06, "loss": 0.3339, "step": 6273 }, { "epoch": 0.8996271866934328, "grad_norm": 0.2917197346687317, "learning_rate": 8.831949550042935e-06, "loss": 0.3324, "step": 6274 }, { "epoch": 0.8997705764267279, "grad_norm": 0.31123435497283936, "learning_rate": 8.831413599236562e-06, "loss": 0.3474, "step": 6275 }, { "epoch": 0.899913966160023, "grad_norm": 0.3043316900730133, "learning_rate": 8.83087754176953e-06, "loss": 0.3035, "step": 6276 }, { "epoch": 0.900057355893318, "grad_norm": 0.29001152515411377, "learning_rate": 8.83034137765676e-06, "loss": 0.3343, "step": 6277 }, { "epoch": 0.9002007456266131, "grad_norm": 0.282565712928772, "learning_rate": 8.829805106913178e-06, "loss": 0.3203, "step": 6278 }, { "epoch": 0.9003441353599082, "grad_norm": 0.32677704095840454, "learning_rate": 8.829268729553712e-06, "loss": 0.317, "step": 6279 }, { "epoch": 0.9004875250932033, "grad_norm": 0.32398733496665955, "learning_rate": 8.828732245593296e-06, "loss": 0.3347, "step": 6280 }, { "epoch": 0.9006309148264984, "grad_norm": 0.30507567524909973, "learning_rate": 8.828195655046864e-06, "loss": 0.3238, "step": 6281 }, { "epoch": 0.9007743045597936, "grad_norm": 0.3240159749984741, "learning_rate": 8.827658957929356e-06, "loss": 0.3435, "step": 6282 }, { "epoch": 0.9009176942930887, "grad_norm": 0.3508358895778656, "learning_rate": 8.827122154255707e-06, "loss": 0.3176, "step": 6283 }, { "epoch": 0.9010610840263837, "grad_norm": 0.35495033860206604, "learning_rate": 8.826585244040867e-06, "loss": 0.3322, "step": 6284 }, { "epoch": 0.9012044737596788, "grad_norm": 0.3008570671081543, "learning_rate": 8.826048227299778e-06, "loss": 0.3341, "step": 6285 }, { "epoch": 0.9013478634929739, "grad_norm": 0.321986585855484, "learning_rate": 8.825511104047393e-06, "loss": 0.3273, "step": 6286 }, { "epoch": 0.901491253226269, "grad_norm": 0.3726103603839874, "learning_rate": 8.824973874298664e-06, "loss": 0.3271, "step": 6287 }, { "epoch": 0.9016346429595641, "grad_norm": 0.3195134401321411, "learning_rate": 8.824436538068545e-06, "loss": 0.3455, "step": 6288 }, { "epoch": 0.9017780326928592, "grad_norm": 0.302937775850296, "learning_rate": 8.823899095371998e-06, "loss": 0.3121, "step": 6289 }, { "epoch": 0.9019214224261543, "grad_norm": 0.3270886540412903, "learning_rate": 8.823361546223981e-06, "loss": 0.3445, "step": 6290 }, { "epoch": 0.9020648121594493, "grad_norm": 0.31445449590682983, "learning_rate": 8.82282389063946e-06, "loss": 0.345, "step": 6291 }, { "epoch": 0.9022082018927445, "grad_norm": 0.3262626528739929, "learning_rate": 8.822286128633403e-06, "loss": 0.3137, "step": 6292 }, { "epoch": 0.9023515916260396, "grad_norm": 0.28844156861305237, "learning_rate": 8.82174826022078e-06, "loss": 0.3383, "step": 6293 }, { "epoch": 0.9024949813593347, "grad_norm": 0.3399581015110016, "learning_rate": 8.821210285416564e-06, "loss": 0.3139, "step": 6294 }, { "epoch": 0.9026383710926298, "grad_norm": 0.30433186888694763, "learning_rate": 8.820672204235732e-06, "loss": 0.3396, "step": 6295 }, { "epoch": 0.9027817608259249, "grad_norm": 0.3150186240673065, "learning_rate": 8.820134016693263e-06, "loss": 0.3427, "step": 6296 }, { "epoch": 0.90292515055922, "grad_norm": 0.31661924719810486, "learning_rate": 8.81959572280414e-06, "loss": 0.3434, "step": 6297 }, { "epoch": 0.903068540292515, "grad_norm": 0.3033598065376282, "learning_rate": 8.819057322583346e-06, "loss": 0.342, "step": 6298 }, { "epoch": 0.9032119300258101, "grad_norm": 0.3220703601837158, "learning_rate": 8.818518816045873e-06, "loss": 0.3391, "step": 6299 }, { "epoch": 0.9033553197591052, "grad_norm": 0.3386363089084625, "learning_rate": 8.817980203206709e-06, "loss": 0.3316, "step": 6300 }, { "epoch": 0.9034987094924003, "grad_norm": 0.3328742980957031, "learning_rate": 8.817441484080849e-06, "loss": 0.3569, "step": 6301 }, { "epoch": 0.9036420992256955, "grad_norm": 0.3116888999938965, "learning_rate": 8.816902658683292e-06, "loss": 0.3285, "step": 6302 }, { "epoch": 0.9037854889589906, "grad_norm": 0.32136040925979614, "learning_rate": 8.816363727029036e-06, "loss": 0.3153, "step": 6303 }, { "epoch": 0.9039288786922857, "grad_norm": 0.3225736916065216, "learning_rate": 8.815824689133084e-06, "loss": 0.3284, "step": 6304 }, { "epoch": 0.9040722684255807, "grad_norm": 0.32718393206596375, "learning_rate": 8.815285545010442e-06, "loss": 0.3354, "step": 6305 }, { "epoch": 0.9042156581588758, "grad_norm": 0.2971222400665283, "learning_rate": 8.81474629467612e-06, "loss": 0.3331, "step": 6306 }, { "epoch": 0.9043590478921709, "grad_norm": 0.3083101809024811, "learning_rate": 8.81420693814513e-06, "loss": 0.3341, "step": 6307 }, { "epoch": 0.904502437625466, "grad_norm": 0.3259681463241577, "learning_rate": 8.813667475432483e-06, "loss": 0.3593, "step": 6308 }, { "epoch": 0.9046458273587611, "grad_norm": 0.3117102086544037, "learning_rate": 8.813127906553203e-06, "loss": 0.3242, "step": 6309 }, { "epoch": 0.9047892170920562, "grad_norm": 0.3053396940231323, "learning_rate": 8.812588231522307e-06, "loss": 0.3193, "step": 6310 }, { "epoch": 0.9049326068253513, "grad_norm": 0.35573893785476685, "learning_rate": 8.81204845035482e-06, "loss": 0.3041, "step": 6311 }, { "epoch": 0.9050759965586465, "grad_norm": 0.30167141556739807, "learning_rate": 8.811508563065769e-06, "loss": 0.3167, "step": 6312 }, { "epoch": 0.9052193862919415, "grad_norm": 0.29989853501319885, "learning_rate": 8.81096856967018e-06, "loss": 0.3423, "step": 6313 }, { "epoch": 0.9053627760252366, "grad_norm": 0.33717915415763855, "learning_rate": 8.810428470183091e-06, "loss": 0.3261, "step": 6314 }, { "epoch": 0.9055061657585317, "grad_norm": 0.3379475772380829, "learning_rate": 8.809888264619534e-06, "loss": 0.3472, "step": 6315 }, { "epoch": 0.9056495554918268, "grad_norm": 0.33476606011390686, "learning_rate": 8.809347952994549e-06, "loss": 0.3501, "step": 6316 }, { "epoch": 0.9057929452251219, "grad_norm": 0.29649388790130615, "learning_rate": 8.808807535323176e-06, "loss": 0.3321, "step": 6317 }, { "epoch": 0.905936334958417, "grad_norm": 0.3057991564273834, "learning_rate": 8.80826701162046e-06, "loss": 0.3358, "step": 6318 }, { "epoch": 0.906079724691712, "grad_norm": 0.2821216583251953, "learning_rate": 8.807726381901448e-06, "loss": 0.3514, "step": 6319 }, { "epoch": 0.9062231144250071, "grad_norm": 0.3235582709312439, "learning_rate": 8.807185646181193e-06, "loss": 0.339, "step": 6320 }, { "epoch": 0.9063665041583022, "grad_norm": 0.3107042908668518, "learning_rate": 8.806644804474746e-06, "loss": 0.334, "step": 6321 }, { "epoch": 0.9065098938915973, "grad_norm": 0.31435626745224, "learning_rate": 8.806103856797162e-06, "loss": 0.3189, "step": 6322 }, { "epoch": 0.9066532836248925, "grad_norm": 0.34129777550697327, "learning_rate": 8.805562803163502e-06, "loss": 0.331, "step": 6323 }, { "epoch": 0.9067966733581876, "grad_norm": 0.3435932695865631, "learning_rate": 8.805021643588827e-06, "loss": 0.3402, "step": 6324 }, { "epoch": 0.9069400630914827, "grad_norm": 0.3153330385684967, "learning_rate": 8.804480378088203e-06, "loss": 0.3258, "step": 6325 }, { "epoch": 0.9070834528247778, "grad_norm": 0.2880105674266815, "learning_rate": 8.803939006676699e-06, "loss": 0.3408, "step": 6326 }, { "epoch": 0.9072268425580728, "grad_norm": 0.3636353313922882, "learning_rate": 8.803397529369384e-06, "loss": 0.3284, "step": 6327 }, { "epoch": 0.9073702322913679, "grad_norm": 0.32077789306640625, "learning_rate": 8.802855946181332e-06, "loss": 0.348, "step": 6328 }, { "epoch": 0.907513622024663, "grad_norm": 0.2951582670211792, "learning_rate": 8.80231425712762e-06, "loss": 0.3466, "step": 6329 }, { "epoch": 0.9076570117579581, "grad_norm": 0.3491760492324829, "learning_rate": 8.801772462223327e-06, "loss": 0.3256, "step": 6330 }, { "epoch": 0.9078004014912532, "grad_norm": 0.32982712984085083, "learning_rate": 8.80123056148354e-06, "loss": 0.3065, "step": 6331 }, { "epoch": 0.9079437912245483, "grad_norm": 0.31613123416900635, "learning_rate": 8.80068855492334e-06, "loss": 0.3237, "step": 6332 }, { "epoch": 0.9080871809578435, "grad_norm": 0.33084461092948914, "learning_rate": 8.800146442557818e-06, "loss": 0.3288, "step": 6333 }, { "epoch": 0.9082305706911386, "grad_norm": 0.3010694682598114, "learning_rate": 8.799604224402062e-06, "loss": 0.3354, "step": 6334 }, { "epoch": 0.9083739604244336, "grad_norm": 0.3015712797641754, "learning_rate": 8.799061900471174e-06, "loss": 0.333, "step": 6335 }, { "epoch": 0.9085173501577287, "grad_norm": 0.3273674547672272, "learning_rate": 8.798519470780242e-06, "loss": 0.3641, "step": 6336 }, { "epoch": 0.9086607398910238, "grad_norm": 0.31209203600883484, "learning_rate": 8.797976935344374e-06, "loss": 0.3382, "step": 6337 }, { "epoch": 0.9088041296243189, "grad_norm": 0.327068567276001, "learning_rate": 8.797434294178669e-06, "loss": 0.3482, "step": 6338 }, { "epoch": 0.908947519357614, "grad_norm": 0.3113449513912201, "learning_rate": 8.796891547298236e-06, "loss": 0.3364, "step": 6339 }, { "epoch": 0.9090909090909091, "grad_norm": 0.31229957938194275, "learning_rate": 8.796348694718182e-06, "loss": 0.3354, "step": 6340 }, { "epoch": 0.9092342988242041, "grad_norm": 0.31514298915863037, "learning_rate": 8.795805736453623e-06, "loss": 0.3226, "step": 6341 }, { "epoch": 0.9093776885574992, "grad_norm": 0.3037608563899994, "learning_rate": 8.795262672519667e-06, "loss": 0.3368, "step": 6342 }, { "epoch": 0.9095210782907944, "grad_norm": 0.29710641503334045, "learning_rate": 8.79471950293144e-06, "loss": 0.322, "step": 6343 }, { "epoch": 0.9096644680240895, "grad_norm": 0.29880520701408386, "learning_rate": 8.794176227704056e-06, "loss": 0.3144, "step": 6344 }, { "epoch": 0.9098078577573846, "grad_norm": 0.32225120067596436, "learning_rate": 8.793632846852644e-06, "loss": 0.339, "step": 6345 }, { "epoch": 0.9099512474906797, "grad_norm": 0.288473904132843, "learning_rate": 8.793089360392331e-06, "loss": 0.338, "step": 6346 }, { "epoch": 0.9100946372239748, "grad_norm": 0.32262158393859863, "learning_rate": 8.792545768338242e-06, "loss": 0.318, "step": 6347 }, { "epoch": 0.9102380269572699, "grad_norm": 0.3285177946090698, "learning_rate": 8.792002070705515e-06, "loss": 0.335, "step": 6348 }, { "epoch": 0.9103814166905649, "grad_norm": 0.3279637396335602, "learning_rate": 8.791458267509284e-06, "loss": 0.3213, "step": 6349 }, { "epoch": 0.91052480642386, "grad_norm": 0.31071993708610535, "learning_rate": 8.790914358764687e-06, "loss": 0.3246, "step": 6350 }, { "epoch": 0.9106681961571551, "grad_norm": 0.3249027729034424, "learning_rate": 8.790370344486862e-06, "loss": 0.3424, "step": 6351 }, { "epoch": 0.9108115858904502, "grad_norm": 0.29951393604278564, "learning_rate": 8.78982622469096e-06, "loss": 0.3182, "step": 6352 }, { "epoch": 0.9109549756237454, "grad_norm": 0.2921200394630432, "learning_rate": 8.789281999392128e-06, "loss": 0.3358, "step": 6353 }, { "epoch": 0.9110983653570405, "grad_norm": 0.32696065306663513, "learning_rate": 8.788737668605512e-06, "loss": 0.3284, "step": 6354 }, { "epoch": 0.9112417550903356, "grad_norm": 0.32536637783050537, "learning_rate": 8.788193232346268e-06, "loss": 0.3409, "step": 6355 }, { "epoch": 0.9113851448236306, "grad_norm": 0.3135469853878021, "learning_rate": 8.78764869062955e-06, "loss": 0.3424, "step": 6356 }, { "epoch": 0.9115285345569257, "grad_norm": 0.29753395915031433, "learning_rate": 8.78710404347052e-06, "loss": 0.3285, "step": 6357 }, { "epoch": 0.9116719242902208, "grad_norm": 0.295335978269577, "learning_rate": 8.78655929088434e-06, "loss": 0.3367, "step": 6358 }, { "epoch": 0.9118153140235159, "grad_norm": 0.3115566074848175, "learning_rate": 8.786014432886173e-06, "loss": 0.3368, "step": 6359 }, { "epoch": 0.911958703756811, "grad_norm": 0.3042113184928894, "learning_rate": 8.785469469491189e-06, "loss": 0.317, "step": 6360 }, { "epoch": 0.9121020934901061, "grad_norm": 0.31603893637657166, "learning_rate": 8.784924400714557e-06, "loss": 0.3409, "step": 6361 }, { "epoch": 0.9122454832234012, "grad_norm": 0.31278836727142334, "learning_rate": 8.784379226571453e-06, "loss": 0.3479, "step": 6362 }, { "epoch": 0.9123888729566964, "grad_norm": 0.2946862280368805, "learning_rate": 8.783833947077053e-06, "loss": 0.3271, "step": 6363 }, { "epoch": 0.9125322626899914, "grad_norm": 0.28493732213974, "learning_rate": 8.783288562246537e-06, "loss": 0.3268, "step": 6364 }, { "epoch": 0.9126756524232865, "grad_norm": 0.29586926102638245, "learning_rate": 8.782743072095086e-06, "loss": 0.3122, "step": 6365 }, { "epoch": 0.9128190421565816, "grad_norm": 0.3235393762588501, "learning_rate": 8.782197476637886e-06, "loss": 0.3323, "step": 6366 }, { "epoch": 0.9129624318898767, "grad_norm": 0.309365838766098, "learning_rate": 8.781651775890127e-06, "loss": 0.3527, "step": 6367 }, { "epoch": 0.9131058216231718, "grad_norm": 0.3028712570667267, "learning_rate": 8.781105969867e-06, "loss": 0.3463, "step": 6368 }, { "epoch": 0.9132492113564669, "grad_norm": 0.30701133608818054, "learning_rate": 8.7805600585837e-06, "loss": 0.3435, "step": 6369 }, { "epoch": 0.913392601089762, "grad_norm": 0.29482606053352356, "learning_rate": 8.780014042055424e-06, "loss": 0.3219, "step": 6370 }, { "epoch": 0.913535990823057, "grad_norm": 0.3152726888656616, "learning_rate": 8.779467920297371e-06, "loss": 0.325, "step": 6371 }, { "epoch": 0.9136793805563521, "grad_norm": 0.2956802248954773, "learning_rate": 8.778921693324744e-06, "loss": 0.3242, "step": 6372 }, { "epoch": 0.9138227702896473, "grad_norm": 0.30582180619239807, "learning_rate": 8.778375361152752e-06, "loss": 0.3149, "step": 6373 }, { "epoch": 0.9139661600229424, "grad_norm": 0.30795761942863464, "learning_rate": 8.777828923796601e-06, "loss": 0.3398, "step": 6374 }, { "epoch": 0.9141095497562375, "grad_norm": 0.2956318259239197, "learning_rate": 8.777282381271504e-06, "loss": 0.3273, "step": 6375 }, { "epoch": 0.9142529394895326, "grad_norm": 0.3010712265968323, "learning_rate": 8.776735733592677e-06, "loss": 0.3403, "step": 6376 }, { "epoch": 0.9143963292228277, "grad_norm": 0.3078595995903015, "learning_rate": 8.776188980775336e-06, "loss": 0.3248, "step": 6377 }, { "epoch": 0.9145397189561227, "grad_norm": 0.31744271516799927, "learning_rate": 8.775642122834703e-06, "loss": 0.321, "step": 6378 }, { "epoch": 0.9146831086894178, "grad_norm": 0.318406343460083, "learning_rate": 8.775095159786002e-06, "loss": 0.3213, "step": 6379 }, { "epoch": 0.9148264984227129, "grad_norm": 0.3071357011795044, "learning_rate": 8.774548091644457e-06, "loss": 0.3287, "step": 6380 }, { "epoch": 0.914969888156008, "grad_norm": 0.3080656826496124, "learning_rate": 8.774000918425302e-06, "loss": 0.3238, "step": 6381 }, { "epoch": 0.9151132778893031, "grad_norm": 0.3262301981449127, "learning_rate": 8.773453640143766e-06, "loss": 0.3504, "step": 6382 }, { "epoch": 0.9152566676225983, "grad_norm": 0.33463844656944275, "learning_rate": 8.772906256815084e-06, "loss": 0.3218, "step": 6383 }, { "epoch": 0.9154000573558934, "grad_norm": 0.3241594135761261, "learning_rate": 8.772358768454496e-06, "loss": 0.3346, "step": 6384 }, { "epoch": 0.9155434470891884, "grad_norm": 0.31452929973602295, "learning_rate": 8.771811175077244e-06, "loss": 0.3259, "step": 6385 }, { "epoch": 0.9156868368224835, "grad_norm": 0.33582639694213867, "learning_rate": 8.771263476698571e-06, "loss": 0.3316, "step": 6386 }, { "epoch": 0.9158302265557786, "grad_norm": 0.3021105229854584, "learning_rate": 8.770715673333724e-06, "loss": 0.3354, "step": 6387 }, { "epoch": 0.9159736162890737, "grad_norm": 0.3342081606388092, "learning_rate": 8.770167764997953e-06, "loss": 0.3591, "step": 6388 }, { "epoch": 0.9161170060223688, "grad_norm": 0.3105032444000244, "learning_rate": 8.769619751706508e-06, "loss": 0.3384, "step": 6389 }, { "epoch": 0.9162603957556639, "grad_norm": 0.3318069577217102, "learning_rate": 8.769071633474651e-06, "loss": 0.3447, "step": 6390 }, { "epoch": 0.916403785488959, "grad_norm": 0.3179744482040405, "learning_rate": 8.768523410317637e-06, "loss": 0.3236, "step": 6391 }, { "epoch": 0.916547175222254, "grad_norm": 0.3551662266254425, "learning_rate": 8.76797508225073e-06, "loss": 0.3553, "step": 6392 }, { "epoch": 0.9166905649555492, "grad_norm": 0.2800678610801697, "learning_rate": 8.767426649289192e-06, "loss": 0.3335, "step": 6393 }, { "epoch": 0.9168339546888443, "grad_norm": 0.33688122034072876, "learning_rate": 8.76687811144829e-06, "loss": 0.3649, "step": 6394 }, { "epoch": 0.9169773444221394, "grad_norm": 0.308859646320343, "learning_rate": 8.766329468743297e-06, "loss": 0.342, "step": 6395 }, { "epoch": 0.9171207341554345, "grad_norm": 0.3360905051231384, "learning_rate": 8.765780721189484e-06, "loss": 0.3249, "step": 6396 }, { "epoch": 0.9172641238887296, "grad_norm": 0.29320162534713745, "learning_rate": 8.765231868802131e-06, "loss": 0.3264, "step": 6397 }, { "epoch": 0.9174075136220247, "grad_norm": 0.3128910958766937, "learning_rate": 8.764682911596514e-06, "loss": 0.335, "step": 6398 }, { "epoch": 0.9175509033553197, "grad_norm": 0.31728386878967285, "learning_rate": 8.764133849587915e-06, "loss": 0.3245, "step": 6399 }, { "epoch": 0.9176942930886148, "grad_norm": 0.3599990904331207, "learning_rate": 8.76358468279162e-06, "loss": 0.3485, "step": 6400 }, { "epoch": 0.9178376828219099, "grad_norm": 0.3168831169605255, "learning_rate": 8.763035411222918e-06, "loss": 0.353, "step": 6401 }, { "epoch": 0.917981072555205, "grad_norm": 0.3122611939907074, "learning_rate": 8.762486034897098e-06, "loss": 0.3417, "step": 6402 }, { "epoch": 0.9181244622885002, "grad_norm": 0.33869996666908264, "learning_rate": 8.761936553829453e-06, "loss": 0.3319, "step": 6403 }, { "epoch": 0.9182678520217953, "grad_norm": 0.3321410119533539, "learning_rate": 8.761386968035286e-06, "loss": 0.3218, "step": 6404 }, { "epoch": 0.9184112417550904, "grad_norm": 0.3025856018066406, "learning_rate": 8.760837277529888e-06, "loss": 0.331, "step": 6405 }, { "epoch": 0.9185546314883855, "grad_norm": 0.31242209672927856, "learning_rate": 8.760287482328566e-06, "loss": 0.3323, "step": 6406 }, { "epoch": 0.9186980212216805, "grad_norm": 0.2935812473297119, "learning_rate": 8.759737582446624e-06, "loss": 0.3186, "step": 6407 }, { "epoch": 0.9188414109549756, "grad_norm": 0.3371512293815613, "learning_rate": 8.759187577899374e-06, "loss": 0.337, "step": 6408 }, { "epoch": 0.9189848006882707, "grad_norm": 0.3327486515045166, "learning_rate": 8.758637468702123e-06, "loss": 0.3202, "step": 6409 }, { "epoch": 0.9191281904215658, "grad_norm": 0.29815545678138733, "learning_rate": 8.758087254870187e-06, "loss": 0.3371, "step": 6410 }, { "epoch": 0.9192715801548609, "grad_norm": 0.3140108287334442, "learning_rate": 8.757536936418883e-06, "loss": 0.3179, "step": 6411 }, { "epoch": 0.919414969888156, "grad_norm": 0.34325358271598816, "learning_rate": 8.75698651336353e-06, "loss": 0.3436, "step": 6412 }, { "epoch": 0.919558359621451, "grad_norm": 0.31754767894744873, "learning_rate": 8.756435985719451e-06, "loss": 0.3164, "step": 6413 }, { "epoch": 0.9197017493547462, "grad_norm": 0.2922540009021759, "learning_rate": 8.755885353501975e-06, "loss": 0.3093, "step": 6414 }, { "epoch": 0.9198451390880413, "grad_norm": 0.31275665760040283, "learning_rate": 8.755334616726427e-06, "loss": 0.3174, "step": 6415 }, { "epoch": 0.9199885288213364, "grad_norm": 0.30887335538864136, "learning_rate": 8.754783775408139e-06, "loss": 0.3102, "step": 6416 }, { "epoch": 0.9201319185546315, "grad_norm": 0.31708821654319763, "learning_rate": 8.754232829562448e-06, "loss": 0.3526, "step": 6417 }, { "epoch": 0.9202753082879266, "grad_norm": 0.28364548087120056, "learning_rate": 8.75368177920469e-06, "loss": 0.3172, "step": 6418 }, { "epoch": 0.9204186980212217, "grad_norm": 0.29086369276046753, "learning_rate": 8.753130624350206e-06, "loss": 0.3369, "step": 6419 }, { "epoch": 0.9205620877545168, "grad_norm": 0.3152313530445099, "learning_rate": 8.75257936501434e-06, "loss": 0.3068, "step": 6420 }, { "epoch": 0.9207054774878118, "grad_norm": 0.29886943101882935, "learning_rate": 8.752028001212433e-06, "loss": 0.3201, "step": 6421 }, { "epoch": 0.9208488672211069, "grad_norm": 0.28569287061691284, "learning_rate": 8.751476532959842e-06, "loss": 0.3239, "step": 6422 }, { "epoch": 0.920992256954402, "grad_norm": 0.3524116575717926, "learning_rate": 8.750924960271913e-06, "loss": 0.3343, "step": 6423 }, { "epoch": 0.9211356466876972, "grad_norm": 0.29112330079078674, "learning_rate": 8.750373283164005e-06, "loss": 0.3307, "step": 6424 }, { "epoch": 0.9212790364209923, "grad_norm": 0.30259400606155396, "learning_rate": 8.749821501651471e-06, "loss": 0.326, "step": 6425 }, { "epoch": 0.9214224261542874, "grad_norm": 0.30354681611061096, "learning_rate": 8.749269615749677e-06, "loss": 0.3456, "step": 6426 }, { "epoch": 0.9215658158875825, "grad_norm": 0.3289400637149811, "learning_rate": 8.748717625473987e-06, "loss": 0.3348, "step": 6427 }, { "epoch": 0.9217092056208775, "grad_norm": 0.3293907046318054, "learning_rate": 8.748165530839759e-06, "loss": 0.3326, "step": 6428 }, { "epoch": 0.9218525953541726, "grad_norm": 0.32363519072532654, "learning_rate": 8.747613331862372e-06, "loss": 0.3239, "step": 6429 }, { "epoch": 0.9219959850874677, "grad_norm": 0.2780957520008087, "learning_rate": 8.747061028557194e-06, "loss": 0.3185, "step": 6430 }, { "epoch": 0.9221393748207628, "grad_norm": 0.3575756549835205, "learning_rate": 8.746508620939601e-06, "loss": 0.3327, "step": 6431 }, { "epoch": 0.9222827645540579, "grad_norm": 0.36142265796661377, "learning_rate": 8.745956109024972e-06, "loss": 0.3362, "step": 6432 }, { "epoch": 0.922426154287353, "grad_norm": 0.2962161898612976, "learning_rate": 8.745403492828686e-06, "loss": 0.3441, "step": 6433 }, { "epoch": 0.9225695440206482, "grad_norm": 0.2920941710472107, "learning_rate": 8.744850772366129e-06, "loss": 0.32, "step": 6434 }, { "epoch": 0.9227129337539433, "grad_norm": 0.3086889982223511, "learning_rate": 8.744297947652686e-06, "loss": 0.334, "step": 6435 }, { "epoch": 0.9228563234872383, "grad_norm": 0.35038551688194275, "learning_rate": 8.743745018703748e-06, "loss": 0.312, "step": 6436 }, { "epoch": 0.9229997132205334, "grad_norm": 0.3091595470905304, "learning_rate": 8.74319198553471e-06, "loss": 0.3333, "step": 6437 }, { "epoch": 0.9231431029538285, "grad_norm": 0.33453458547592163, "learning_rate": 8.742638848160962e-06, "loss": 0.3301, "step": 6438 }, { "epoch": 0.9232864926871236, "grad_norm": 0.30835461616516113, "learning_rate": 8.742085606597908e-06, "loss": 0.3258, "step": 6439 }, { "epoch": 0.9234298824204187, "grad_norm": 0.3089500069618225, "learning_rate": 8.741532260860947e-06, "loss": 0.3206, "step": 6440 }, { "epoch": 0.9235732721537138, "grad_norm": 0.3077129125595093, "learning_rate": 8.740978810965484e-06, "loss": 0.3311, "step": 6441 }, { "epoch": 0.9237166618870088, "grad_norm": 0.31770026683807373, "learning_rate": 8.740425256926923e-06, "loss": 0.3276, "step": 6442 }, { "epoch": 0.9238600516203039, "grad_norm": 0.31071293354034424, "learning_rate": 8.739871598760678e-06, "loss": 0.3616, "step": 6443 }, { "epoch": 0.9240034413535991, "grad_norm": 0.3336385190486908, "learning_rate": 8.739317836482162e-06, "loss": 0.3536, "step": 6444 }, { "epoch": 0.9241468310868942, "grad_norm": 0.3257736265659332, "learning_rate": 8.738763970106788e-06, "loss": 0.3381, "step": 6445 }, { "epoch": 0.9242902208201893, "grad_norm": 0.30728471279144287, "learning_rate": 8.738209999649978e-06, "loss": 0.3422, "step": 6446 }, { "epoch": 0.9244336105534844, "grad_norm": 0.311646044254303, "learning_rate": 8.73765592512715e-06, "loss": 0.3545, "step": 6447 }, { "epoch": 0.9245770002867795, "grad_norm": 0.3426502048969269, "learning_rate": 8.73710174655373e-06, "loss": 0.341, "step": 6448 }, { "epoch": 0.9247203900200746, "grad_norm": 0.3347228467464447, "learning_rate": 8.736547463945148e-06, "loss": 0.3353, "step": 6449 }, { "epoch": 0.9248637797533696, "grad_norm": 0.31400996446609497, "learning_rate": 8.735993077316834e-06, "loss": 0.3293, "step": 6450 }, { "epoch": 0.9250071694866647, "grad_norm": 0.33036690950393677, "learning_rate": 8.735438586684217e-06, "loss": 0.3087, "step": 6451 }, { "epoch": 0.9251505592199598, "grad_norm": 0.3326511085033417, "learning_rate": 8.734883992062737e-06, "loss": 0.3287, "step": 6452 }, { "epoch": 0.9252939489532549, "grad_norm": 0.325845330953598, "learning_rate": 8.734329293467832e-06, "loss": 0.322, "step": 6453 }, { "epoch": 0.9254373386865501, "grad_norm": 0.3485258221626282, "learning_rate": 8.733774490914944e-06, "loss": 0.3283, "step": 6454 }, { "epoch": 0.9255807284198452, "grad_norm": 0.3557501435279846, "learning_rate": 8.733219584419518e-06, "loss": 0.3323, "step": 6455 }, { "epoch": 0.9257241181531403, "grad_norm": 0.31938233971595764, "learning_rate": 8.732664573997001e-06, "loss": 0.3215, "step": 6456 }, { "epoch": 0.9258675078864353, "grad_norm": 0.3680861294269562, "learning_rate": 8.732109459662844e-06, "loss": 0.3353, "step": 6457 }, { "epoch": 0.9260108976197304, "grad_norm": 0.3390926420688629, "learning_rate": 8.7315542414325e-06, "loss": 0.3451, "step": 6458 }, { "epoch": 0.9261542873530255, "grad_norm": 0.3034258782863617, "learning_rate": 8.730998919321429e-06, "loss": 0.3151, "step": 6459 }, { "epoch": 0.9262976770863206, "grad_norm": 0.34023675322532654, "learning_rate": 8.730443493345085e-06, "loss": 0.3071, "step": 6460 }, { "epoch": 0.9264410668196157, "grad_norm": 0.3254014849662781, "learning_rate": 8.729887963518933e-06, "loss": 0.3522, "step": 6461 }, { "epoch": 0.9265844565529108, "grad_norm": 0.28954941034317017, "learning_rate": 8.729332329858437e-06, "loss": 0.3255, "step": 6462 }, { "epoch": 0.9267278462862059, "grad_norm": 0.2902091443538666, "learning_rate": 8.728776592379068e-06, "loss": 0.3359, "step": 6463 }, { "epoch": 0.926871236019501, "grad_norm": 0.36535191535949707, "learning_rate": 8.728220751096292e-06, "loss": 0.3213, "step": 6464 }, { "epoch": 0.9270146257527961, "grad_norm": 0.33549338579177856, "learning_rate": 8.727664806025588e-06, "loss": 0.344, "step": 6465 }, { "epoch": 0.9271580154860912, "grad_norm": 0.3217381536960602, "learning_rate": 8.727108757182428e-06, "loss": 0.3399, "step": 6466 }, { "epoch": 0.9273014052193863, "grad_norm": 0.3268129825592041, "learning_rate": 8.726552604582295e-06, "loss": 0.3239, "step": 6467 }, { "epoch": 0.9274447949526814, "grad_norm": 0.29294541478157043, "learning_rate": 8.725996348240669e-06, "loss": 0.3385, "step": 6468 }, { "epoch": 0.9275881846859765, "grad_norm": 0.3128977417945862, "learning_rate": 8.725439988173037e-06, "loss": 0.3483, "step": 6469 }, { "epoch": 0.9277315744192716, "grad_norm": 0.2788431644439697, "learning_rate": 8.724883524394884e-06, "loss": 0.3162, "step": 6470 }, { "epoch": 0.9278749641525667, "grad_norm": 0.29087409377098083, "learning_rate": 8.724326956921708e-06, "loss": 0.3505, "step": 6471 }, { "epoch": 0.9280183538858617, "grad_norm": 0.30920204520225525, "learning_rate": 8.723770285768995e-06, "loss": 0.341, "step": 6472 }, { "epoch": 0.9281617436191568, "grad_norm": 0.2963982820510864, "learning_rate": 8.723213510952248e-06, "loss": 0.3255, "step": 6473 }, { "epoch": 0.928305133352452, "grad_norm": 0.2853900194168091, "learning_rate": 8.722656632486964e-06, "loss": 0.336, "step": 6474 }, { "epoch": 0.9284485230857471, "grad_norm": 0.3034787178039551, "learning_rate": 8.722099650388644e-06, "loss": 0.3463, "step": 6475 }, { "epoch": 0.9285919128190422, "grad_norm": 0.3063540756702423, "learning_rate": 8.721542564672796e-06, "loss": 0.3121, "step": 6476 }, { "epoch": 0.9287353025523373, "grad_norm": 0.3221608102321625, "learning_rate": 8.720985375354928e-06, "loss": 0.3206, "step": 6477 }, { "epoch": 0.9288786922856324, "grad_norm": 0.2915537655353546, "learning_rate": 8.720428082450552e-06, "loss": 0.3321, "step": 6478 }, { "epoch": 0.9290220820189274, "grad_norm": 0.32560670375823975, "learning_rate": 8.719870685975181e-06, "loss": 0.3371, "step": 6479 }, { "epoch": 0.9291654717522225, "grad_norm": 0.30165964365005493, "learning_rate": 8.71931318594433e-06, "loss": 0.3208, "step": 6480 }, { "epoch": 0.9293088614855176, "grad_norm": 0.29766929149627686, "learning_rate": 8.718755582373524e-06, "loss": 0.3316, "step": 6481 }, { "epoch": 0.9294522512188127, "grad_norm": 0.3123459815979004, "learning_rate": 8.718197875278281e-06, "loss": 0.3346, "step": 6482 }, { "epoch": 0.9295956409521078, "grad_norm": 0.3094251751899719, "learning_rate": 8.71764006467413e-06, "loss": 0.3134, "step": 6483 }, { "epoch": 0.929739030685403, "grad_norm": 0.2980845272541046, "learning_rate": 8.717082150576599e-06, "loss": 0.3148, "step": 6484 }, { "epoch": 0.9298824204186981, "grad_norm": 0.3224228322505951, "learning_rate": 8.716524133001218e-06, "loss": 0.3202, "step": 6485 }, { "epoch": 0.9300258101519931, "grad_norm": 0.3227289915084839, "learning_rate": 8.715966011963523e-06, "loss": 0.3323, "step": 6486 }, { "epoch": 0.9301691998852882, "grad_norm": 0.28344491124153137, "learning_rate": 8.71540778747905e-06, "loss": 0.3136, "step": 6487 }, { "epoch": 0.9303125896185833, "grad_norm": 0.3277728259563446, "learning_rate": 8.714849459563339e-06, "loss": 0.318, "step": 6488 }, { "epoch": 0.9304559793518784, "grad_norm": 0.29054519534111023, "learning_rate": 8.714291028231934e-06, "loss": 0.3426, "step": 6489 }, { "epoch": 0.9305993690851735, "grad_norm": 0.30388855934143066, "learning_rate": 8.71373249350038e-06, "loss": 0.3462, "step": 6490 }, { "epoch": 0.9307427588184686, "grad_norm": 0.31313401460647583, "learning_rate": 8.713173855384227e-06, "loss": 0.3461, "step": 6491 }, { "epoch": 0.9308861485517637, "grad_norm": 0.2881573736667633, "learning_rate": 8.712615113899028e-06, "loss": 0.3278, "step": 6492 }, { "epoch": 0.9310295382850587, "grad_norm": 0.33943769335746765, "learning_rate": 8.712056269060331e-06, "loss": 0.3147, "step": 6493 }, { "epoch": 0.9311729280183539, "grad_norm": 0.2883751094341278, "learning_rate": 8.7114973208837e-06, "loss": 0.3342, "step": 6494 }, { "epoch": 0.931316317751649, "grad_norm": 0.3046894669532776, "learning_rate": 8.710938269384692e-06, "loss": 0.3355, "step": 6495 }, { "epoch": 0.9314597074849441, "grad_norm": 0.30948010087013245, "learning_rate": 8.710379114578872e-06, "loss": 0.3456, "step": 6496 }, { "epoch": 0.9316030972182392, "grad_norm": 0.3016730844974518, "learning_rate": 8.709819856481804e-06, "loss": 0.3362, "step": 6497 }, { "epoch": 0.9317464869515343, "grad_norm": 0.3115881085395813, "learning_rate": 8.709260495109057e-06, "loss": 0.3071, "step": 6498 }, { "epoch": 0.9318898766848294, "grad_norm": 0.30872464179992676, "learning_rate": 8.708701030476208e-06, "loss": 0.3324, "step": 6499 }, { "epoch": 0.9320332664181245, "grad_norm": 0.3226087689399719, "learning_rate": 8.708141462598824e-06, "loss": 0.3353, "step": 6500 }, { "epoch": 0.9321766561514195, "grad_norm": 0.288790225982666, "learning_rate": 8.707581791492486e-06, "loss": 0.3328, "step": 6501 }, { "epoch": 0.9323200458847146, "grad_norm": 0.3408215641975403, "learning_rate": 8.707022017172775e-06, "loss": 0.3414, "step": 6502 }, { "epoch": 0.9324634356180097, "grad_norm": 0.2962409555912018, "learning_rate": 8.706462139655274e-06, "loss": 0.343, "step": 6503 }, { "epoch": 0.9326068253513048, "grad_norm": 0.31268978118896484, "learning_rate": 8.705902158955566e-06, "loss": 0.3351, "step": 6504 }, { "epoch": 0.9327502150846, "grad_norm": 0.3105863630771637, "learning_rate": 8.705342075089245e-06, "loss": 0.3162, "step": 6505 }, { "epoch": 0.9328936048178951, "grad_norm": 0.3052172362804413, "learning_rate": 8.7047818880719e-06, "loss": 0.3111, "step": 6506 }, { "epoch": 0.9330369945511902, "grad_norm": 0.32174691557884216, "learning_rate": 8.704221597919127e-06, "loss": 0.3399, "step": 6507 }, { "epoch": 0.9331803842844852, "grad_norm": 0.3242630660533905, "learning_rate": 8.703661204646522e-06, "loss": 0.331, "step": 6508 }, { "epoch": 0.9333237740177803, "grad_norm": 0.31829747557640076, "learning_rate": 8.703100708269687e-06, "loss": 0.3257, "step": 6509 }, { "epoch": 0.9334671637510754, "grad_norm": 0.32873743772506714, "learning_rate": 8.702540108804224e-06, "loss": 0.3297, "step": 6510 }, { "epoch": 0.9336105534843705, "grad_norm": 0.3174271881580353, "learning_rate": 8.70197940626574e-06, "loss": 0.3206, "step": 6511 }, { "epoch": 0.9337539432176656, "grad_norm": 0.3035237193107605, "learning_rate": 8.701418600669844e-06, "loss": 0.3483, "step": 6512 }, { "epoch": 0.9338973329509607, "grad_norm": 0.35246822237968445, "learning_rate": 8.70085769203215e-06, "loss": 0.3609, "step": 6513 }, { "epoch": 0.9340407226842558, "grad_norm": 0.30963876843452454, "learning_rate": 8.700296680368267e-06, "loss": 0.324, "step": 6514 }, { "epoch": 0.934184112417551, "grad_norm": 0.31323933601379395, "learning_rate": 8.699735565693818e-06, "loss": 0.3187, "step": 6515 }, { "epoch": 0.934327502150846, "grad_norm": 0.3598560392856598, "learning_rate": 8.699174348024424e-06, "loss": 0.3493, "step": 6516 }, { "epoch": 0.9344708918841411, "grad_norm": 0.28647854924201965, "learning_rate": 8.698613027375706e-06, "loss": 0.3089, "step": 6517 }, { "epoch": 0.9346142816174362, "grad_norm": 0.2869752049446106, "learning_rate": 8.698051603763289e-06, "loss": 0.3653, "step": 6518 }, { "epoch": 0.9347576713507313, "grad_norm": 0.3076764941215515, "learning_rate": 8.697490077202806e-06, "loss": 0.3357, "step": 6519 }, { "epoch": 0.9349010610840264, "grad_norm": 0.3302686810493469, "learning_rate": 8.696928447709886e-06, "loss": 0.342, "step": 6520 }, { "epoch": 0.9350444508173215, "grad_norm": 0.3343254029750824, "learning_rate": 8.696366715300165e-06, "loss": 0.343, "step": 6521 }, { "epoch": 0.9351878405506165, "grad_norm": 0.31132951378822327, "learning_rate": 8.69580487998928e-06, "loss": 0.327, "step": 6522 }, { "epoch": 0.9353312302839116, "grad_norm": 0.31846633553504944, "learning_rate": 8.695242941792874e-06, "loss": 0.3386, "step": 6523 }, { "epoch": 0.9354746200172067, "grad_norm": 0.30237895250320435, "learning_rate": 8.694680900726588e-06, "loss": 0.3435, "step": 6524 }, { "epoch": 0.9356180097505019, "grad_norm": 0.32946643233299255, "learning_rate": 8.694118756806069e-06, "loss": 0.338, "step": 6525 }, { "epoch": 0.935761399483797, "grad_norm": 0.2980404794216156, "learning_rate": 8.693556510046968e-06, "loss": 0.348, "step": 6526 }, { "epoch": 0.9359047892170921, "grad_norm": 0.30062830448150635, "learning_rate": 8.692994160464933e-06, "loss": 0.3336, "step": 6527 }, { "epoch": 0.9360481789503872, "grad_norm": 0.31175535917282104, "learning_rate": 8.692431708075623e-06, "loss": 0.3239, "step": 6528 }, { "epoch": 0.9361915686836823, "grad_norm": 0.30562660098075867, "learning_rate": 8.691869152894694e-06, "loss": 0.33, "step": 6529 }, { "epoch": 0.9363349584169773, "grad_norm": 0.2867465317249298, "learning_rate": 8.691306494937808e-06, "loss": 0.321, "step": 6530 }, { "epoch": 0.9364783481502724, "grad_norm": 0.3180202543735504, "learning_rate": 8.690743734220626e-06, "loss": 0.3629, "step": 6531 }, { "epoch": 0.9366217378835675, "grad_norm": 0.334956556558609, "learning_rate": 8.690180870758815e-06, "loss": 0.3292, "step": 6532 }, { "epoch": 0.9367651276168626, "grad_norm": 0.34815675020217896, "learning_rate": 8.689617904568047e-06, "loss": 0.342, "step": 6533 }, { "epoch": 0.9369085173501577, "grad_norm": 0.30863040685653687, "learning_rate": 8.689054835663992e-06, "loss": 0.3322, "step": 6534 }, { "epoch": 0.9370519070834529, "grad_norm": 0.3309469521045685, "learning_rate": 8.688491664062325e-06, "loss": 0.3599, "step": 6535 }, { "epoch": 0.937195296816748, "grad_norm": 0.3575286567211151, "learning_rate": 8.687928389778723e-06, "loss": 0.3506, "step": 6536 }, { "epoch": 0.937338686550043, "grad_norm": 0.3005152642726898, "learning_rate": 8.68736501282887e-06, "loss": 0.3435, "step": 6537 }, { "epoch": 0.9374820762833381, "grad_norm": 0.30124974250793457, "learning_rate": 8.686801533228446e-06, "loss": 0.3154, "step": 6538 }, { "epoch": 0.9376254660166332, "grad_norm": 0.2952772378921509, "learning_rate": 8.686237950993137e-06, "loss": 0.3346, "step": 6539 }, { "epoch": 0.9377688557499283, "grad_norm": 0.31408175826072693, "learning_rate": 8.685674266138636e-06, "loss": 0.3211, "step": 6540 }, { "epoch": 0.9379122454832234, "grad_norm": 0.31434938311576843, "learning_rate": 8.685110478680633e-06, "loss": 0.3435, "step": 6541 }, { "epoch": 0.9380556352165185, "grad_norm": 0.28915393352508545, "learning_rate": 8.684546588634822e-06, "loss": 0.3391, "step": 6542 }, { "epoch": 0.9381990249498136, "grad_norm": 0.3486204743385315, "learning_rate": 8.683982596016903e-06, "loss": 0.3404, "step": 6543 }, { "epoch": 0.9383424146831086, "grad_norm": 0.281453013420105, "learning_rate": 8.683418500842575e-06, "loss": 0.32, "step": 6544 }, { "epoch": 0.9384858044164038, "grad_norm": 0.31690073013305664, "learning_rate": 8.68285430312754e-06, "loss": 0.3254, "step": 6545 }, { "epoch": 0.9386291941496989, "grad_norm": 0.29270943999290466, "learning_rate": 8.68229000288751e-06, "loss": 0.3361, "step": 6546 }, { "epoch": 0.938772583882994, "grad_norm": 0.30484095215797424, "learning_rate": 8.68172560013819e-06, "loss": 0.3426, "step": 6547 }, { "epoch": 0.9389159736162891, "grad_norm": 0.2893460988998413, "learning_rate": 8.681161094895292e-06, "loss": 0.3397, "step": 6548 }, { "epoch": 0.9390593633495842, "grad_norm": 0.3304294943809509, "learning_rate": 8.680596487174532e-06, "loss": 0.3442, "step": 6549 }, { "epoch": 0.9392027530828793, "grad_norm": 0.2979801893234253, "learning_rate": 8.680031776991628e-06, "loss": 0.3214, "step": 6550 }, { "epoch": 0.9393461428161743, "grad_norm": 0.322446346282959, "learning_rate": 8.6794669643623e-06, "loss": 0.3362, "step": 6551 }, { "epoch": 0.9394895325494694, "grad_norm": 0.28592613339424133, "learning_rate": 8.67890204930227e-06, "loss": 0.3295, "step": 6552 }, { "epoch": 0.9396329222827645, "grad_norm": 0.2885359227657318, "learning_rate": 8.678337031827269e-06, "loss": 0.3253, "step": 6553 }, { "epoch": 0.9397763120160596, "grad_norm": 0.2987542450428009, "learning_rate": 8.677771911953023e-06, "loss": 0.3368, "step": 6554 }, { "epoch": 0.9399197017493548, "grad_norm": 0.3077242076396942, "learning_rate": 8.677206689695263e-06, "loss": 0.3262, "step": 6555 }, { "epoch": 0.9400630914826499, "grad_norm": 0.31004205346107483, "learning_rate": 8.676641365069726e-06, "loss": 0.3348, "step": 6556 }, { "epoch": 0.940206481215945, "grad_norm": 0.31437742710113525, "learning_rate": 8.676075938092148e-06, "loss": 0.3412, "step": 6557 }, { "epoch": 0.94034987094924, "grad_norm": 0.36471930146217346, "learning_rate": 8.675510408778274e-06, "loss": 0.3332, "step": 6558 }, { "epoch": 0.9404932606825351, "grad_norm": 0.34503206610679626, "learning_rate": 8.674944777143841e-06, "loss": 0.342, "step": 6559 }, { "epoch": 0.9406366504158302, "grad_norm": 0.3117716610431671, "learning_rate": 8.6743790432046e-06, "loss": 0.3239, "step": 6560 }, { "epoch": 0.9407800401491253, "grad_norm": 0.3008531928062439, "learning_rate": 8.673813206976297e-06, "loss": 0.3147, "step": 6561 }, { "epoch": 0.9409234298824204, "grad_norm": 0.2961648404598236, "learning_rate": 8.673247268474688e-06, "loss": 0.3369, "step": 6562 }, { "epoch": 0.9410668196157155, "grad_norm": 0.3158344626426697, "learning_rate": 8.672681227715525e-06, "loss": 0.321, "step": 6563 }, { "epoch": 0.9412102093490106, "grad_norm": 0.2851572632789612, "learning_rate": 8.672115084714566e-06, "loss": 0.3135, "step": 6564 }, { "epoch": 0.9413535990823058, "grad_norm": 0.35641810297966003, "learning_rate": 8.67154883948757e-06, "loss": 0.3219, "step": 6565 }, { "epoch": 0.9414969888156008, "grad_norm": 0.32260623574256897, "learning_rate": 8.670982492050305e-06, "loss": 0.3314, "step": 6566 }, { "epoch": 0.9416403785488959, "grad_norm": 0.29963985085487366, "learning_rate": 8.670416042418534e-06, "loss": 0.3311, "step": 6567 }, { "epoch": 0.941783768282191, "grad_norm": 0.3671669363975525, "learning_rate": 8.669849490608026e-06, "loss": 0.3446, "step": 6568 }, { "epoch": 0.9419271580154861, "grad_norm": 0.32642826437950134, "learning_rate": 8.669282836634553e-06, "loss": 0.3249, "step": 6569 }, { "epoch": 0.9420705477487812, "grad_norm": 0.32184359431266785, "learning_rate": 8.668716080513891e-06, "loss": 0.3269, "step": 6570 }, { "epoch": 0.9422139374820763, "grad_norm": 0.3136933445930481, "learning_rate": 8.668149222261816e-06, "loss": 0.3382, "step": 6571 }, { "epoch": 0.9423573272153714, "grad_norm": 0.3159245550632477, "learning_rate": 8.667582261894109e-06, "loss": 0.3366, "step": 6572 }, { "epoch": 0.9425007169486664, "grad_norm": 0.3395094871520996, "learning_rate": 8.667015199426555e-06, "loss": 0.3427, "step": 6573 }, { "epoch": 0.9426441066819615, "grad_norm": 0.3082101345062256, "learning_rate": 8.666448034874939e-06, "loss": 0.3262, "step": 6574 }, { "epoch": 0.9427874964152567, "grad_norm": 0.29380810260772705, "learning_rate": 8.665880768255048e-06, "loss": 0.3272, "step": 6575 }, { "epoch": 0.9429308861485518, "grad_norm": 0.3585309386253357, "learning_rate": 8.665313399582677e-06, "loss": 0.331, "step": 6576 }, { "epoch": 0.9430742758818469, "grad_norm": 0.3112315535545349, "learning_rate": 8.66474592887362e-06, "loss": 0.3403, "step": 6577 }, { "epoch": 0.943217665615142, "grad_norm": 0.35909122228622437, "learning_rate": 8.66417835614367e-06, "loss": 0.3452, "step": 6578 }, { "epoch": 0.9433610553484371, "grad_norm": 0.28168341517448425, "learning_rate": 8.663610681408635e-06, "loss": 0.3192, "step": 6579 }, { "epoch": 0.9435044450817321, "grad_norm": 0.334540992975235, "learning_rate": 8.663042904684312e-06, "loss": 0.3345, "step": 6580 }, { "epoch": 0.9436478348150272, "grad_norm": 0.34591144323349, "learning_rate": 8.66247502598651e-06, "loss": 0.3529, "step": 6581 }, { "epoch": 0.9437912245483223, "grad_norm": 0.3021228015422821, "learning_rate": 8.661907045331038e-06, "loss": 0.3446, "step": 6582 }, { "epoch": 0.9439346142816174, "grad_norm": 0.30461353063583374, "learning_rate": 8.661338962733708e-06, "loss": 0.3404, "step": 6583 }, { "epoch": 0.9440780040149125, "grad_norm": 0.3394329845905304, "learning_rate": 8.66077077821033e-06, "loss": 0.3526, "step": 6584 }, { "epoch": 0.9442213937482077, "grad_norm": 0.3748216927051544, "learning_rate": 8.660202491776729e-06, "loss": 0.3237, "step": 6585 }, { "epoch": 0.9443647834815028, "grad_norm": 0.3192686140537262, "learning_rate": 8.659634103448717e-06, "loss": 0.3358, "step": 6586 }, { "epoch": 0.9445081732147979, "grad_norm": 0.30722200870513916, "learning_rate": 8.659065613242124e-06, "loss": 0.3337, "step": 6587 }, { "epoch": 0.9446515629480929, "grad_norm": 0.35531502962112427, "learning_rate": 8.658497021172771e-06, "loss": 0.3501, "step": 6588 }, { "epoch": 0.944794952681388, "grad_norm": 0.3435421586036682, "learning_rate": 8.65792832725649e-06, "loss": 0.3455, "step": 6589 }, { "epoch": 0.9449383424146831, "grad_norm": 0.2850918769836426, "learning_rate": 8.657359531509113e-06, "loss": 0.3213, "step": 6590 }, { "epoch": 0.9450817321479782, "grad_norm": 0.3294321298599243, "learning_rate": 8.65679063394647e-06, "loss": 0.3255, "step": 6591 }, { "epoch": 0.9452251218812733, "grad_norm": 0.3151428997516632, "learning_rate": 8.656221634584402e-06, "loss": 0.3353, "step": 6592 }, { "epoch": 0.9453685116145684, "grad_norm": 0.3207841217517853, "learning_rate": 8.655652533438747e-06, "loss": 0.3117, "step": 6593 }, { "epoch": 0.9455119013478634, "grad_norm": 0.332675576210022, "learning_rate": 8.65508333052535e-06, "loss": 0.3359, "step": 6594 }, { "epoch": 0.9456552910811586, "grad_norm": 0.33371782302856445, "learning_rate": 8.654514025860055e-06, "loss": 0.3421, "step": 6595 }, { "epoch": 0.9457986808144537, "grad_norm": 0.2997817099094391, "learning_rate": 8.653944619458712e-06, "loss": 0.3382, "step": 6596 }, { "epoch": 0.9459420705477488, "grad_norm": 0.3048804998397827, "learning_rate": 8.65337511133717e-06, "loss": 0.3202, "step": 6597 }, { "epoch": 0.9460854602810439, "grad_norm": 0.31565356254577637, "learning_rate": 8.652805501511288e-06, "loss": 0.3341, "step": 6598 }, { "epoch": 0.946228850014339, "grad_norm": 0.3402183949947357, "learning_rate": 8.652235789996917e-06, "loss": 0.3337, "step": 6599 }, { "epoch": 0.9463722397476341, "grad_norm": 0.3159288465976715, "learning_rate": 8.651665976809921e-06, "loss": 0.3386, "step": 6600 }, { "epoch": 0.9465156294809292, "grad_norm": 0.33926331996917725, "learning_rate": 8.65109606196616e-06, "loss": 0.3398, "step": 6601 }, { "epoch": 0.9466590192142242, "grad_norm": 0.3030394911766052, "learning_rate": 8.650526045481504e-06, "loss": 0.326, "step": 6602 }, { "epoch": 0.9468024089475193, "grad_norm": 0.3309212625026703, "learning_rate": 8.649955927371815e-06, "loss": 0.3594, "step": 6603 }, { "epoch": 0.9469457986808144, "grad_norm": 0.3172730505466461, "learning_rate": 8.64938570765297e-06, "loss": 0.3267, "step": 6604 }, { "epoch": 0.9470891884141095, "grad_norm": 0.32406362891197205, "learning_rate": 8.648815386340842e-06, "loss": 0.3258, "step": 6605 }, { "epoch": 0.9472325781474047, "grad_norm": 0.3247632682323456, "learning_rate": 8.648244963451304e-06, "loss": 0.326, "step": 6606 }, { "epoch": 0.9473759678806998, "grad_norm": 0.3212302625179291, "learning_rate": 8.647674439000238e-06, "loss": 0.3353, "step": 6607 }, { "epoch": 0.9475193576139949, "grad_norm": 0.29721468687057495, "learning_rate": 8.64710381300353e-06, "loss": 0.331, "step": 6608 }, { "epoch": 0.94766274734729, "grad_norm": 0.29043155908584595, "learning_rate": 8.646533085477057e-06, "loss": 0.3231, "step": 6609 }, { "epoch": 0.947806137080585, "grad_norm": 0.32647058367729187, "learning_rate": 8.645962256436717e-06, "loss": 0.3226, "step": 6610 }, { "epoch": 0.9479495268138801, "grad_norm": 0.34251198172569275, "learning_rate": 8.645391325898395e-06, "loss": 0.3366, "step": 6611 }, { "epoch": 0.9480929165471752, "grad_norm": 0.3054920732975006, "learning_rate": 8.644820293877985e-06, "loss": 0.3326, "step": 6612 }, { "epoch": 0.9482363062804703, "grad_norm": 0.35644295811653137, "learning_rate": 8.644249160391386e-06, "loss": 0.3162, "step": 6613 }, { "epoch": 0.9483796960137654, "grad_norm": 0.3346516489982605, "learning_rate": 8.643677925454495e-06, "loss": 0.3326, "step": 6614 }, { "epoch": 0.9485230857470605, "grad_norm": 0.30558085441589355, "learning_rate": 8.643106589083217e-06, "loss": 0.3494, "step": 6615 }, { "epoch": 0.9486664754803557, "grad_norm": 0.3108829855918884, "learning_rate": 8.642535151293455e-06, "loss": 0.3448, "step": 6616 }, { "epoch": 0.9488098652136507, "grad_norm": 0.3364658057689667, "learning_rate": 8.641963612101119e-06, "loss": 0.3426, "step": 6617 }, { "epoch": 0.9489532549469458, "grad_norm": 0.2856346070766449, "learning_rate": 8.641391971522117e-06, "loss": 0.3142, "step": 6618 }, { "epoch": 0.9490966446802409, "grad_norm": 0.30587759613990784, "learning_rate": 8.640820229572365e-06, "loss": 0.3313, "step": 6619 }, { "epoch": 0.949240034413536, "grad_norm": 0.3222942650318146, "learning_rate": 8.640248386267775e-06, "loss": 0.3627, "step": 6620 }, { "epoch": 0.9493834241468311, "grad_norm": 0.2786077558994293, "learning_rate": 8.639676441624273e-06, "loss": 0.3264, "step": 6621 }, { "epoch": 0.9495268138801262, "grad_norm": 0.2954791784286499, "learning_rate": 8.639104395657778e-06, "loss": 0.3366, "step": 6622 }, { "epoch": 0.9496702036134212, "grad_norm": 0.28352195024490356, "learning_rate": 8.638532248384214e-06, "loss": 0.3397, "step": 6623 }, { "epoch": 0.9498135933467163, "grad_norm": 0.28857770562171936, "learning_rate": 8.637959999819509e-06, "loss": 0.3135, "step": 6624 }, { "epoch": 0.9499569830800114, "grad_norm": 0.3024536073207855, "learning_rate": 8.637387649979595e-06, "loss": 0.3249, "step": 6625 }, { "epoch": 0.9501003728133066, "grad_norm": 0.286091148853302, "learning_rate": 8.636815198880401e-06, "loss": 0.3099, "step": 6626 }, { "epoch": 0.9502437625466017, "grad_norm": 0.2913680970668793, "learning_rate": 8.636242646537869e-06, "loss": 0.3228, "step": 6627 }, { "epoch": 0.9503871522798968, "grad_norm": 0.299458771944046, "learning_rate": 8.635669992967935e-06, "loss": 0.3327, "step": 6628 }, { "epoch": 0.9505305420131919, "grad_norm": 0.3190654218196869, "learning_rate": 8.63509723818654e-06, "loss": 0.3298, "step": 6629 }, { "epoch": 0.950673931746487, "grad_norm": 0.28497499227523804, "learning_rate": 8.63452438220963e-06, "loss": 0.3333, "step": 6630 }, { "epoch": 0.950817321479782, "grad_norm": 0.2955129146575928, "learning_rate": 8.633951425053152e-06, "loss": 0.3389, "step": 6631 }, { "epoch": 0.9509607112130771, "grad_norm": 0.30143359303474426, "learning_rate": 8.633378366733058e-06, "loss": 0.3224, "step": 6632 }, { "epoch": 0.9511041009463722, "grad_norm": 0.2842985987663269, "learning_rate": 8.6328052072653e-06, "loss": 0.3238, "step": 6633 }, { "epoch": 0.9512474906796673, "grad_norm": 0.30298471450805664, "learning_rate": 8.63223194666583e-06, "loss": 0.3478, "step": 6634 }, { "epoch": 0.9513908804129624, "grad_norm": 0.32663848996162415, "learning_rate": 8.631658584950614e-06, "loss": 0.3061, "step": 6635 }, { "epoch": 0.9515342701462576, "grad_norm": 0.3031354546546936, "learning_rate": 8.631085122135605e-06, "loss": 0.3449, "step": 6636 }, { "epoch": 0.9516776598795527, "grad_norm": 0.32970669865608215, "learning_rate": 8.630511558236774e-06, "loss": 0.3416, "step": 6637 }, { "epoch": 0.9518210496128477, "grad_norm": 0.3232935070991516, "learning_rate": 8.629937893270089e-06, "loss": 0.3519, "step": 6638 }, { "epoch": 0.9519644393461428, "grad_norm": 0.33089694380760193, "learning_rate": 8.629364127251512e-06, "loss": 0.3171, "step": 6639 }, { "epoch": 0.9521078290794379, "grad_norm": 0.29974812269210815, "learning_rate": 8.628790260197023e-06, "loss": 0.3228, "step": 6640 }, { "epoch": 0.952251218812733, "grad_norm": 0.32350578904151917, "learning_rate": 8.628216292122595e-06, "loss": 0.3302, "step": 6641 }, { "epoch": 0.9523946085460281, "grad_norm": 0.33574867248535156, "learning_rate": 8.627642223044207e-06, "loss": 0.332, "step": 6642 }, { "epoch": 0.9525379982793232, "grad_norm": 0.3206256330013275, "learning_rate": 8.62706805297784e-06, "loss": 0.314, "step": 6643 }, { "epoch": 0.9526813880126183, "grad_norm": 0.29774945974349976, "learning_rate": 8.626493781939479e-06, "loss": 0.3247, "step": 6644 }, { "epoch": 0.9528247777459133, "grad_norm": 0.3529854118824005, "learning_rate": 8.625919409945107e-06, "loss": 0.3226, "step": 6645 }, { "epoch": 0.9529681674792085, "grad_norm": 0.3356056809425354, "learning_rate": 8.625344937010718e-06, "loss": 0.3294, "step": 6646 }, { "epoch": 0.9531115572125036, "grad_norm": 0.316139280796051, "learning_rate": 8.624770363152302e-06, "loss": 0.3388, "step": 6647 }, { "epoch": 0.9532549469457987, "grad_norm": 0.2990134060382843, "learning_rate": 8.624195688385854e-06, "loss": 0.3265, "step": 6648 }, { "epoch": 0.9533983366790938, "grad_norm": 0.3170328140258789, "learning_rate": 8.623620912727376e-06, "loss": 0.3247, "step": 6649 }, { "epoch": 0.9535417264123889, "grad_norm": 0.30952590703964233, "learning_rate": 8.623046036192863e-06, "loss": 0.3179, "step": 6650 }, { "epoch": 0.953685116145684, "grad_norm": 0.3077602982521057, "learning_rate": 8.622471058798325e-06, "loss": 0.336, "step": 6651 }, { "epoch": 0.953828505878979, "grad_norm": 0.33773618936538696, "learning_rate": 8.621895980559762e-06, "loss": 0.3227, "step": 6652 }, { "epoch": 0.9539718956122741, "grad_norm": 0.30036014318466187, "learning_rate": 8.621320801493188e-06, "loss": 0.3184, "step": 6653 }, { "epoch": 0.9541152853455692, "grad_norm": 0.28779852390289307, "learning_rate": 8.620745521614614e-06, "loss": 0.3347, "step": 6654 }, { "epoch": 0.9542586750788643, "grad_norm": 0.33095112442970276, "learning_rate": 8.620170140940052e-06, "loss": 0.3376, "step": 6655 }, { "epoch": 0.9544020648121595, "grad_norm": 0.3107682764530182, "learning_rate": 8.619594659485526e-06, "loss": 0.3332, "step": 6656 }, { "epoch": 0.9545454545454546, "grad_norm": 0.2994473874568939, "learning_rate": 8.619019077267049e-06, "loss": 0.342, "step": 6657 }, { "epoch": 0.9546888442787497, "grad_norm": 0.33433863520622253, "learning_rate": 8.61844339430065e-06, "loss": 0.3387, "step": 6658 }, { "epoch": 0.9548322340120448, "grad_norm": 0.3294869363307953, "learning_rate": 8.617867610602352e-06, "loss": 0.3266, "step": 6659 }, { "epoch": 0.9549756237453398, "grad_norm": 0.2842623293399811, "learning_rate": 8.617291726188186e-06, "loss": 0.334, "step": 6660 }, { "epoch": 0.9551190134786349, "grad_norm": 0.29774942994117737, "learning_rate": 8.616715741074182e-06, "loss": 0.3401, "step": 6661 }, { "epoch": 0.95526240321193, "grad_norm": 0.3257710337638855, "learning_rate": 8.616139655276376e-06, "loss": 0.3262, "step": 6662 }, { "epoch": 0.9554057929452251, "grad_norm": 0.30599310994148254, "learning_rate": 8.615563468810803e-06, "loss": 0.349, "step": 6663 }, { "epoch": 0.9555491826785202, "grad_norm": 0.3075193762779236, "learning_rate": 8.614987181693508e-06, "loss": 0.3453, "step": 6664 }, { "epoch": 0.9556925724118153, "grad_norm": 0.37273523211479187, "learning_rate": 8.614410793940528e-06, "loss": 0.3304, "step": 6665 }, { "epoch": 0.9558359621451105, "grad_norm": 0.3341553807258606, "learning_rate": 8.613834305567912e-06, "loss": 0.3329, "step": 6666 }, { "epoch": 0.9559793518784055, "grad_norm": 0.293334424495697, "learning_rate": 8.613257716591708e-06, "loss": 0.3114, "step": 6667 }, { "epoch": 0.9561227416117006, "grad_norm": 0.3042694628238678, "learning_rate": 8.612681027027969e-06, "loss": 0.3102, "step": 6668 }, { "epoch": 0.9562661313449957, "grad_norm": 0.31869441270828247, "learning_rate": 8.612104236892746e-06, "loss": 0.3355, "step": 6669 }, { "epoch": 0.9564095210782908, "grad_norm": 0.29968151450157166, "learning_rate": 8.611527346202099e-06, "loss": 0.3412, "step": 6670 }, { "epoch": 0.9565529108115859, "grad_norm": 0.31753334403038025, "learning_rate": 8.610950354972083e-06, "loss": 0.3301, "step": 6671 }, { "epoch": 0.956696300544881, "grad_norm": 0.3341568410396576, "learning_rate": 8.610373263218767e-06, "loss": 0.3697, "step": 6672 }, { "epoch": 0.9568396902781761, "grad_norm": 0.30780643224716187, "learning_rate": 8.609796070958212e-06, "loss": 0.3214, "step": 6673 }, { "epoch": 0.9569830800114711, "grad_norm": 0.3166501224040985, "learning_rate": 8.609218778206487e-06, "loss": 0.3359, "step": 6674 }, { "epoch": 0.9571264697447662, "grad_norm": 0.29612743854522705, "learning_rate": 8.608641384979664e-06, "loss": 0.3318, "step": 6675 }, { "epoch": 0.9572698594780614, "grad_norm": 0.3154044449329376, "learning_rate": 8.608063891293816e-06, "loss": 0.34, "step": 6676 }, { "epoch": 0.9574132492113565, "grad_norm": 0.31862759590148926, "learning_rate": 8.60748629716502e-06, "loss": 0.3519, "step": 6677 }, { "epoch": 0.9575566389446516, "grad_norm": 0.28586962819099426, "learning_rate": 8.606908602609353e-06, "loss": 0.3342, "step": 6678 }, { "epoch": 0.9577000286779467, "grad_norm": 0.29357481002807617, "learning_rate": 8.6063308076429e-06, "loss": 0.3368, "step": 6679 }, { "epoch": 0.9578434184112418, "grad_norm": 0.30673640966415405, "learning_rate": 8.605752912281744e-06, "loss": 0.3373, "step": 6680 }, { "epoch": 0.9579868081445368, "grad_norm": 0.30318111181259155, "learning_rate": 8.605174916541974e-06, "loss": 0.3353, "step": 6681 }, { "epoch": 0.9581301978778319, "grad_norm": 0.2837163507938385, "learning_rate": 8.604596820439682e-06, "loss": 0.3148, "step": 6682 }, { "epoch": 0.958273587611127, "grad_norm": 0.2968432605266571, "learning_rate": 8.604018623990958e-06, "loss": 0.3292, "step": 6683 }, { "epoch": 0.9584169773444221, "grad_norm": 0.29752084612846375, "learning_rate": 8.6034403272119e-06, "loss": 0.3467, "step": 6684 }, { "epoch": 0.9585603670777172, "grad_norm": 0.29584476351737976, "learning_rate": 8.602861930118604e-06, "loss": 0.3316, "step": 6685 }, { "epoch": 0.9587037568110124, "grad_norm": 0.31486329436302185, "learning_rate": 8.602283432727177e-06, "loss": 0.3252, "step": 6686 }, { "epoch": 0.9588471465443075, "grad_norm": 0.29295313358306885, "learning_rate": 8.60170483505372e-06, "loss": 0.3221, "step": 6687 }, { "epoch": 0.9589905362776026, "grad_norm": 0.2966940999031067, "learning_rate": 8.60112613711434e-06, "loss": 0.3222, "step": 6688 }, { "epoch": 0.9591339260108976, "grad_norm": 0.3106014132499695, "learning_rate": 8.600547338925148e-06, "loss": 0.3174, "step": 6689 }, { "epoch": 0.9592773157441927, "grad_norm": 0.3262483775615692, "learning_rate": 8.599968440502257e-06, "loss": 0.3637, "step": 6690 }, { "epoch": 0.9594207054774878, "grad_norm": 0.305194228887558, "learning_rate": 8.599389441861784e-06, "loss": 0.3478, "step": 6691 }, { "epoch": 0.9595640952107829, "grad_norm": 0.316482812166214, "learning_rate": 8.598810343019843e-06, "loss": 0.3257, "step": 6692 }, { "epoch": 0.959707484944078, "grad_norm": 0.31477251648902893, "learning_rate": 8.598231143992558e-06, "loss": 0.3236, "step": 6693 }, { "epoch": 0.9598508746773731, "grad_norm": 0.29649749398231506, "learning_rate": 8.597651844796055e-06, "loss": 0.3357, "step": 6694 }, { "epoch": 0.9599942644106682, "grad_norm": 0.29838940501213074, "learning_rate": 8.59707244544646e-06, "loss": 0.3404, "step": 6695 }, { "epoch": 0.9601376541439632, "grad_norm": 0.328253835439682, "learning_rate": 8.5964929459599e-06, "loss": 0.3459, "step": 6696 }, { "epoch": 0.9602810438772584, "grad_norm": 0.27904993295669556, "learning_rate": 8.59591334635251e-06, "loss": 0.3259, "step": 6697 }, { "epoch": 0.9604244336105535, "grad_norm": 0.2846173942089081, "learning_rate": 8.595333646640421e-06, "loss": 0.3521, "step": 6698 }, { "epoch": 0.9605678233438486, "grad_norm": 0.29938122630119324, "learning_rate": 8.594753846839776e-06, "loss": 0.3406, "step": 6699 }, { "epoch": 0.9607112130771437, "grad_norm": 0.3147987723350525, "learning_rate": 8.594173946966714e-06, "loss": 0.3267, "step": 6700 }, { "epoch": 0.9608546028104388, "grad_norm": 0.290939599275589, "learning_rate": 8.593593947037377e-06, "loss": 0.3141, "step": 6701 }, { "epoch": 0.9609979925437339, "grad_norm": 0.31378987431526184, "learning_rate": 8.593013847067912e-06, "loss": 0.3084, "step": 6702 }, { "epoch": 0.9611413822770289, "grad_norm": 0.28646013140678406, "learning_rate": 8.59243364707447e-06, "loss": 0.3256, "step": 6703 }, { "epoch": 0.961284772010324, "grad_norm": 0.30586865544319153, "learning_rate": 8.5918533470732e-06, "loss": 0.3273, "step": 6704 }, { "epoch": 0.9614281617436191, "grad_norm": 0.2835885286331177, "learning_rate": 8.591272947080261e-06, "loss": 0.3475, "step": 6705 }, { "epoch": 0.9615715514769142, "grad_norm": 0.29802677035331726, "learning_rate": 8.590692447111805e-06, "loss": 0.3296, "step": 6706 }, { "epoch": 0.9617149412102094, "grad_norm": 0.29092496633529663, "learning_rate": 8.590111847183995e-06, "loss": 0.3442, "step": 6707 }, { "epoch": 0.9618583309435045, "grad_norm": 0.284479022026062, "learning_rate": 8.589531147312995e-06, "loss": 0.3341, "step": 6708 }, { "epoch": 0.9620017206767996, "grad_norm": 0.3084118366241455, "learning_rate": 8.588950347514969e-06, "loss": 0.3406, "step": 6709 }, { "epoch": 0.9621451104100947, "grad_norm": 0.27170050144195557, "learning_rate": 8.588369447806084e-06, "loss": 0.3334, "step": 6710 }, { "epoch": 0.9622885001433897, "grad_norm": 0.29506000876426697, "learning_rate": 8.587788448202517e-06, "loss": 0.3301, "step": 6711 }, { "epoch": 0.9624318898766848, "grad_norm": 0.3086395263671875, "learning_rate": 8.587207348720436e-06, "loss": 0.3516, "step": 6712 }, { "epoch": 0.9625752796099799, "grad_norm": 0.29235517978668213, "learning_rate": 8.586626149376023e-06, "loss": 0.3393, "step": 6713 }, { "epoch": 0.962718669343275, "grad_norm": 0.3232138454914093, "learning_rate": 8.586044850185454e-06, "loss": 0.331, "step": 6714 }, { "epoch": 0.9628620590765701, "grad_norm": 0.3100469410419464, "learning_rate": 8.585463451164913e-06, "loss": 0.3385, "step": 6715 }, { "epoch": 0.9630054488098652, "grad_norm": 0.3717057704925537, "learning_rate": 8.584881952330584e-06, "loss": 0.3374, "step": 6716 }, { "epoch": 0.9631488385431604, "grad_norm": 0.2834240198135376, "learning_rate": 8.584300353698657e-06, "loss": 0.3267, "step": 6717 }, { "epoch": 0.9632922282764554, "grad_norm": 0.3122594952583313, "learning_rate": 8.583718655285323e-06, "loss": 0.3059, "step": 6718 }, { "epoch": 0.9634356180097505, "grad_norm": 0.3153820037841797, "learning_rate": 8.583136857106772e-06, "loss": 0.3308, "step": 6719 }, { "epoch": 0.9635790077430456, "grad_norm": 0.3049597442150116, "learning_rate": 8.582554959179204e-06, "loss": 0.3073, "step": 6720 }, { "epoch": 0.9637223974763407, "grad_norm": 0.2945401668548584, "learning_rate": 8.581972961518817e-06, "loss": 0.3279, "step": 6721 }, { "epoch": 0.9638657872096358, "grad_norm": 0.30900776386260986, "learning_rate": 8.581390864141812e-06, "loss": 0.3268, "step": 6722 }, { "epoch": 0.9640091769429309, "grad_norm": 0.3161795139312744, "learning_rate": 8.580808667064395e-06, "loss": 0.3152, "step": 6723 }, { "epoch": 0.964152566676226, "grad_norm": 0.3314037024974823, "learning_rate": 8.580226370302773e-06, "loss": 0.3222, "step": 6724 }, { "epoch": 0.964295956409521, "grad_norm": 0.30357626080513, "learning_rate": 8.579643973873157e-06, "loss": 0.3419, "step": 6725 }, { "epoch": 0.9644393461428161, "grad_norm": 0.3237442970275879, "learning_rate": 8.579061477791759e-06, "loss": 0.3164, "step": 6726 }, { "epoch": 0.9645827358761113, "grad_norm": 0.3418356478214264, "learning_rate": 8.578478882074794e-06, "loss": 0.3311, "step": 6727 }, { "epoch": 0.9647261256094064, "grad_norm": 0.3336818516254425, "learning_rate": 8.577896186738483e-06, "loss": 0.3335, "step": 6728 }, { "epoch": 0.9648695153427015, "grad_norm": 0.29135623574256897, "learning_rate": 8.577313391799046e-06, "loss": 0.3287, "step": 6729 }, { "epoch": 0.9650129050759966, "grad_norm": 0.3133266568183899, "learning_rate": 8.576730497272707e-06, "loss": 0.3324, "step": 6730 }, { "epoch": 0.9651562948092917, "grad_norm": 0.31563547253608704, "learning_rate": 8.576147503175693e-06, "loss": 0.3071, "step": 6731 }, { "epoch": 0.9652996845425867, "grad_norm": 0.33161595463752747, "learning_rate": 8.575564409524233e-06, "loss": 0.3392, "step": 6732 }, { "epoch": 0.9654430742758818, "grad_norm": 0.2851324677467346, "learning_rate": 8.574981216334559e-06, "loss": 0.3339, "step": 6733 }, { "epoch": 0.9655864640091769, "grad_norm": 0.2892874479293823, "learning_rate": 8.57439792362291e-06, "loss": 0.3299, "step": 6734 }, { "epoch": 0.965729853742472, "grad_norm": 0.27748140692710876, "learning_rate": 8.57381453140552e-06, "loss": 0.3223, "step": 6735 }, { "epoch": 0.9658732434757671, "grad_norm": 0.29981011152267456, "learning_rate": 8.573231039698631e-06, "loss": 0.3286, "step": 6736 }, { "epoch": 0.9660166332090623, "grad_norm": 0.3054746091365814, "learning_rate": 8.572647448518486e-06, "loss": 0.3346, "step": 6737 }, { "epoch": 0.9661600229423574, "grad_norm": 0.3136594891548157, "learning_rate": 8.572063757881333e-06, "loss": 0.3194, "step": 6738 }, { "epoch": 0.9663034126756525, "grad_norm": 0.28380388021469116, "learning_rate": 8.571479967803418e-06, "loss": 0.3225, "step": 6739 }, { "epoch": 0.9664468024089475, "grad_norm": 0.31131187081336975, "learning_rate": 8.570896078300996e-06, "loss": 0.3333, "step": 6740 }, { "epoch": 0.9665901921422426, "grad_norm": 0.30222779512405396, "learning_rate": 8.57031208939032e-06, "loss": 0.329, "step": 6741 }, { "epoch": 0.9667335818755377, "grad_norm": 0.2959979176521301, "learning_rate": 8.569728001087646e-06, "loss": 0.3438, "step": 6742 }, { "epoch": 0.9668769716088328, "grad_norm": 0.3083336353302002, "learning_rate": 8.569143813409239e-06, "loss": 0.3175, "step": 6743 }, { "epoch": 0.9670203613421279, "grad_norm": 0.28491875529289246, "learning_rate": 8.568559526371357e-06, "loss": 0.3325, "step": 6744 }, { "epoch": 0.967163751075423, "grad_norm": 0.30034226179122925, "learning_rate": 8.567975139990268e-06, "loss": 0.3393, "step": 6745 }, { "epoch": 0.967307140808718, "grad_norm": 0.30210867524147034, "learning_rate": 8.567390654282238e-06, "loss": 0.3389, "step": 6746 }, { "epoch": 0.9674505305420132, "grad_norm": 0.3380228579044342, "learning_rate": 8.566806069263541e-06, "loss": 0.3414, "step": 6747 }, { "epoch": 0.9675939202753083, "grad_norm": 0.31119102239608765, "learning_rate": 8.566221384950449e-06, "loss": 0.3235, "step": 6748 }, { "epoch": 0.9677373100086034, "grad_norm": 0.3261133134365082, "learning_rate": 8.56563660135924e-06, "loss": 0.3466, "step": 6749 }, { "epoch": 0.9678806997418985, "grad_norm": 0.3412725329399109, "learning_rate": 8.565051718506194e-06, "loss": 0.3337, "step": 6750 }, { "epoch": 0.9680240894751936, "grad_norm": 0.3471300005912781, "learning_rate": 8.564466736407591e-06, "loss": 0.3169, "step": 6751 }, { "epoch": 0.9681674792084887, "grad_norm": 0.2945403754711151, "learning_rate": 8.563881655079717e-06, "loss": 0.3271, "step": 6752 }, { "epoch": 0.9683108689417838, "grad_norm": 0.39973923563957214, "learning_rate": 8.56329647453886e-06, "loss": 0.3453, "step": 6753 }, { "epoch": 0.9684542586750788, "grad_norm": 0.3330855369567871, "learning_rate": 8.562711194801312e-06, "loss": 0.3455, "step": 6754 }, { "epoch": 0.9685976484083739, "grad_norm": 0.31884852051734924, "learning_rate": 8.562125815883364e-06, "loss": 0.3259, "step": 6755 }, { "epoch": 0.968741038141669, "grad_norm": 0.2978701591491699, "learning_rate": 8.561540337801314e-06, "loss": 0.3345, "step": 6756 }, { "epoch": 0.9688844278749642, "grad_norm": 0.34775808453559875, "learning_rate": 8.560954760571457e-06, "loss": 0.3264, "step": 6757 }, { "epoch": 0.9690278176082593, "grad_norm": 0.35386571288108826, "learning_rate": 8.560369084210098e-06, "loss": 0.3612, "step": 6758 }, { "epoch": 0.9691712073415544, "grad_norm": 0.2841746509075165, "learning_rate": 8.559783308733542e-06, "loss": 0.3188, "step": 6759 }, { "epoch": 0.9693145970748495, "grad_norm": 0.29701855778694153, "learning_rate": 8.559197434158094e-06, "loss": 0.3116, "step": 6760 }, { "epoch": 0.9694579868081445, "grad_norm": 0.35255342721939087, "learning_rate": 8.558611460500063e-06, "loss": 0.3438, "step": 6761 }, { "epoch": 0.9696013765414396, "grad_norm": 0.3407386541366577, "learning_rate": 8.558025387775766e-06, "loss": 0.3346, "step": 6762 }, { "epoch": 0.9697447662747347, "grad_norm": 0.2864888310432434, "learning_rate": 8.557439216001513e-06, "loss": 0.3187, "step": 6763 }, { "epoch": 0.9698881560080298, "grad_norm": 0.3469774127006531, "learning_rate": 8.556852945193625e-06, "loss": 0.3279, "step": 6764 }, { "epoch": 0.9700315457413249, "grad_norm": 0.311644047498703, "learning_rate": 8.556266575368424e-06, "loss": 0.3261, "step": 6765 }, { "epoch": 0.97017493547462, "grad_norm": 0.28637203574180603, "learning_rate": 8.555680106542232e-06, "loss": 0.345, "step": 6766 }, { "epoch": 0.9703183252079152, "grad_norm": 0.30815526843070984, "learning_rate": 8.555093538731374e-06, "loss": 0.3139, "step": 6767 }, { "epoch": 0.9704617149412103, "grad_norm": 0.3556819558143616, "learning_rate": 8.554506871952181e-06, "loss": 0.3249, "step": 6768 }, { "epoch": 0.9706051046745053, "grad_norm": 0.3361036479473114, "learning_rate": 8.553920106220985e-06, "loss": 0.3164, "step": 6769 }, { "epoch": 0.9707484944078004, "grad_norm": 0.284481018781662, "learning_rate": 8.55333324155412e-06, "loss": 0.2974, "step": 6770 }, { "epoch": 0.9708918841410955, "grad_norm": 0.3525649309158325, "learning_rate": 8.552746277967922e-06, "loss": 0.3279, "step": 6771 }, { "epoch": 0.9710352738743906, "grad_norm": 0.31759411096572876, "learning_rate": 8.552159215478736e-06, "loss": 0.3104, "step": 6772 }, { "epoch": 0.9711786636076857, "grad_norm": 0.30946844816207886, "learning_rate": 8.5515720541029e-06, "loss": 0.36, "step": 6773 }, { "epoch": 0.9713220533409808, "grad_norm": 0.3354054391384125, "learning_rate": 8.550984793856762e-06, "loss": 0.3406, "step": 6774 }, { "epoch": 0.9714654430742758, "grad_norm": 0.33570870757102966, "learning_rate": 8.550397434756669e-06, "loss": 0.3181, "step": 6775 }, { "epoch": 0.9716088328075709, "grad_norm": 0.3106265962123871, "learning_rate": 8.549809976818973e-06, "loss": 0.3193, "step": 6776 }, { "epoch": 0.9717522225408661, "grad_norm": 0.31444621086120605, "learning_rate": 8.549222420060028e-06, "loss": 0.3123, "step": 6777 }, { "epoch": 0.9718956122741612, "grad_norm": 0.3033117353916168, "learning_rate": 8.548634764496191e-06, "loss": 0.3261, "step": 6778 }, { "epoch": 0.9720390020074563, "grad_norm": 0.35888832807540894, "learning_rate": 8.548047010143821e-06, "loss": 0.3573, "step": 6779 }, { "epoch": 0.9721823917407514, "grad_norm": 0.28853335976600647, "learning_rate": 8.547459157019278e-06, "loss": 0.3299, "step": 6780 }, { "epoch": 0.9723257814740465, "grad_norm": 0.3107786178588867, "learning_rate": 8.546871205138932e-06, "loss": 0.3185, "step": 6781 }, { "epoch": 0.9724691712073416, "grad_norm": 0.2792521119117737, "learning_rate": 8.546283154519147e-06, "loss": 0.3265, "step": 6782 }, { "epoch": 0.9726125609406366, "grad_norm": 0.30033111572265625, "learning_rate": 8.545695005176293e-06, "loss": 0.3374, "step": 6783 }, { "epoch": 0.9727559506739317, "grad_norm": 0.2999013066291809, "learning_rate": 8.545106757126744e-06, "loss": 0.333, "step": 6784 }, { "epoch": 0.9728993404072268, "grad_norm": 0.30236828327178955, "learning_rate": 8.544518410386879e-06, "loss": 0.3304, "step": 6785 }, { "epoch": 0.9730427301405219, "grad_norm": 0.3034970462322235, "learning_rate": 8.543929964973072e-06, "loss": 0.3242, "step": 6786 }, { "epoch": 0.973186119873817, "grad_norm": 0.2979600429534912, "learning_rate": 8.543341420901705e-06, "loss": 0.3315, "step": 6787 }, { "epoch": 0.9733295096071122, "grad_norm": 0.3047897219657898, "learning_rate": 8.542752778189168e-06, "loss": 0.3381, "step": 6788 }, { "epoch": 0.9734728993404073, "grad_norm": 0.29216641187667847, "learning_rate": 8.54216403685184e-06, "loss": 0.3396, "step": 6789 }, { "epoch": 0.9736162890737023, "grad_norm": 0.29252463579177856, "learning_rate": 8.541575196906113e-06, "loss": 0.3258, "step": 6790 }, { "epoch": 0.9737596788069974, "grad_norm": 0.2929069995880127, "learning_rate": 8.540986258368383e-06, "loss": 0.3249, "step": 6791 }, { "epoch": 0.9739030685402925, "grad_norm": 0.32323727011680603, "learning_rate": 8.540397221255042e-06, "loss": 0.3426, "step": 6792 }, { "epoch": 0.9740464582735876, "grad_norm": 0.27476945519447327, "learning_rate": 8.539808085582489e-06, "loss": 0.3328, "step": 6793 }, { "epoch": 0.9741898480068827, "grad_norm": 0.30147138237953186, "learning_rate": 8.539218851367122e-06, "loss": 0.333, "step": 6794 }, { "epoch": 0.9743332377401778, "grad_norm": 0.27897727489471436, "learning_rate": 8.538629518625349e-06, "loss": 0.3358, "step": 6795 }, { "epoch": 0.9744766274734729, "grad_norm": 0.2942458987236023, "learning_rate": 8.538040087373573e-06, "loss": 0.3169, "step": 6796 }, { "epoch": 0.9746200172067679, "grad_norm": 0.287399560213089, "learning_rate": 8.537450557628204e-06, "loss": 0.3154, "step": 6797 }, { "epoch": 0.9747634069400631, "grad_norm": 0.29352498054504395, "learning_rate": 8.53686092940565e-06, "loss": 0.332, "step": 6798 }, { "epoch": 0.9749067966733582, "grad_norm": 0.28277820348739624, "learning_rate": 8.53627120272233e-06, "loss": 0.3122, "step": 6799 }, { "epoch": 0.9750501864066533, "grad_norm": 0.3109792470932007, "learning_rate": 8.535681377594662e-06, "loss": 0.3266, "step": 6800 }, { "epoch": 0.9751935761399484, "grad_norm": 0.2866467237472534, "learning_rate": 8.535091454039061e-06, "loss": 0.3373, "step": 6801 }, { "epoch": 0.9753369658732435, "grad_norm": 0.30141690373420715, "learning_rate": 8.534501432071954e-06, "loss": 0.327, "step": 6802 }, { "epoch": 0.9754803556065386, "grad_norm": 0.28200310468673706, "learning_rate": 8.533911311709761e-06, "loss": 0.315, "step": 6803 }, { "epoch": 0.9756237453398336, "grad_norm": 0.3008173406124115, "learning_rate": 8.533321092968914e-06, "loss": 0.3545, "step": 6804 }, { "epoch": 0.9757671350731287, "grad_norm": 0.28878167271614075, "learning_rate": 8.532730775865845e-06, "loss": 0.3274, "step": 6805 }, { "epoch": 0.9759105248064238, "grad_norm": 0.3203299939632416, "learning_rate": 8.532140360416985e-06, "loss": 0.3205, "step": 6806 }, { "epoch": 0.9760539145397189, "grad_norm": 0.2931588292121887, "learning_rate": 8.531549846638769e-06, "loss": 0.3242, "step": 6807 }, { "epoch": 0.9761973042730141, "grad_norm": 0.2787298858165741, "learning_rate": 8.53095923454764e-06, "loss": 0.3432, "step": 6808 }, { "epoch": 0.9763406940063092, "grad_norm": 0.32513532042503357, "learning_rate": 8.530368524160037e-06, "loss": 0.3489, "step": 6809 }, { "epoch": 0.9764840837396043, "grad_norm": 0.2782233953475952, "learning_rate": 8.529777715492405e-06, "loss": 0.3419, "step": 6810 }, { "epoch": 0.9766274734728994, "grad_norm": 0.29332205653190613, "learning_rate": 8.52918680856119e-06, "loss": 0.3392, "step": 6811 }, { "epoch": 0.9767708632061944, "grad_norm": 0.27619919180870056, "learning_rate": 8.528595803382844e-06, "loss": 0.3123, "step": 6812 }, { "epoch": 0.9769142529394895, "grad_norm": 0.2775379419326782, "learning_rate": 8.528004699973819e-06, "loss": 0.3354, "step": 6813 }, { "epoch": 0.9770576426727846, "grad_norm": 0.27715566754341125, "learning_rate": 8.52741349835057e-06, "loss": 0.3199, "step": 6814 }, { "epoch": 0.9772010324060797, "grad_norm": 0.30491146445274353, "learning_rate": 8.526822198529555e-06, "loss": 0.3337, "step": 6815 }, { "epoch": 0.9773444221393748, "grad_norm": 0.2772219777107239, "learning_rate": 8.526230800527237e-06, "loss": 0.3242, "step": 6816 }, { "epoch": 0.9774878118726699, "grad_norm": 0.30628857016563416, "learning_rate": 8.525639304360077e-06, "loss": 0.331, "step": 6817 }, { "epoch": 0.9776312016059651, "grad_norm": 0.31965503096580505, "learning_rate": 8.52504771004454e-06, "loss": 0.3581, "step": 6818 }, { "epoch": 0.9777745913392601, "grad_norm": 0.30041933059692383, "learning_rate": 8.5244560175971e-06, "loss": 0.3292, "step": 6819 }, { "epoch": 0.9779179810725552, "grad_norm": 0.3267004191875458, "learning_rate": 8.523864227034225e-06, "loss": 0.3166, "step": 6820 }, { "epoch": 0.9780613708058503, "grad_norm": 0.3105599880218506, "learning_rate": 8.523272338372392e-06, "loss": 0.3148, "step": 6821 }, { "epoch": 0.9782047605391454, "grad_norm": 0.30613774061203003, "learning_rate": 8.522680351628075e-06, "loss": 0.3322, "step": 6822 }, { "epoch": 0.9783481502724405, "grad_norm": 0.322421133518219, "learning_rate": 8.522088266817757e-06, "loss": 0.3493, "step": 6823 }, { "epoch": 0.9784915400057356, "grad_norm": 0.310533344745636, "learning_rate": 8.521496083957918e-06, "loss": 0.32, "step": 6824 }, { "epoch": 0.9786349297390307, "grad_norm": 0.2814190685749054, "learning_rate": 8.520903803065047e-06, "loss": 0.3195, "step": 6825 }, { "epoch": 0.9787783194723257, "grad_norm": 0.2973532974720001, "learning_rate": 8.52031142415563e-06, "loss": 0.3361, "step": 6826 }, { "epoch": 0.9789217092056208, "grad_norm": 0.3223320245742798, "learning_rate": 8.519718947246159e-06, "loss": 0.3498, "step": 6827 }, { "epoch": 0.979065098938916, "grad_norm": 0.31624671816825867, "learning_rate": 8.519126372353124e-06, "loss": 0.3333, "step": 6828 }, { "epoch": 0.9792084886722111, "grad_norm": 0.28038856387138367, "learning_rate": 8.518533699493028e-06, "loss": 0.3289, "step": 6829 }, { "epoch": 0.9793518784055062, "grad_norm": 0.2874532639980316, "learning_rate": 8.517940928682363e-06, "loss": 0.3195, "step": 6830 }, { "epoch": 0.9794952681388013, "grad_norm": 0.3400161564350128, "learning_rate": 8.517348059937636e-06, "loss": 0.3277, "step": 6831 }, { "epoch": 0.9796386578720964, "grad_norm": 0.29830819368362427, "learning_rate": 8.51675509327535e-06, "loss": 0.3225, "step": 6832 }, { "epoch": 0.9797820476053914, "grad_norm": 0.3016589879989624, "learning_rate": 8.516162028712009e-06, "loss": 0.343, "step": 6833 }, { "epoch": 0.9799254373386865, "grad_norm": 0.3382387161254883, "learning_rate": 8.515568866264128e-06, "loss": 0.3278, "step": 6834 }, { "epoch": 0.9800688270719816, "grad_norm": 0.29628807306289673, "learning_rate": 8.51497560594822e-06, "loss": 0.3316, "step": 6835 }, { "epoch": 0.9802122168052767, "grad_norm": 0.2898840308189392, "learning_rate": 8.514382247780795e-06, "loss": 0.3259, "step": 6836 }, { "epoch": 0.9803556065385718, "grad_norm": 0.2917821407318115, "learning_rate": 8.513788791778376e-06, "loss": 0.3308, "step": 6837 }, { "epoch": 0.980498996271867, "grad_norm": 0.3183794915676117, "learning_rate": 8.51319523795748e-06, "loss": 0.3563, "step": 6838 }, { "epoch": 0.9806423860051621, "grad_norm": 0.29509568214416504, "learning_rate": 8.512601586334636e-06, "loss": 0.3219, "step": 6839 }, { "epoch": 0.9807857757384572, "grad_norm": 0.2873883843421936, "learning_rate": 8.512007836926365e-06, "loss": 0.326, "step": 6840 }, { "epoch": 0.9809291654717522, "grad_norm": 0.2969858944416046, "learning_rate": 8.5114139897492e-06, "loss": 0.3224, "step": 6841 }, { "epoch": 0.9810725552050473, "grad_norm": 0.308972030878067, "learning_rate": 8.51082004481967e-06, "loss": 0.3399, "step": 6842 }, { "epoch": 0.9812159449383424, "grad_norm": 0.28723520040512085, "learning_rate": 8.510226002154311e-06, "loss": 0.3467, "step": 6843 }, { "epoch": 0.9813593346716375, "grad_norm": 0.279379665851593, "learning_rate": 8.509631861769659e-06, "loss": 0.3257, "step": 6844 }, { "epoch": 0.9815027244049326, "grad_norm": 0.3309381902217865, "learning_rate": 8.509037623682257e-06, "loss": 0.3256, "step": 6845 }, { "epoch": 0.9816461141382277, "grad_norm": 0.2947089970111847, "learning_rate": 8.508443287908642e-06, "loss": 0.3267, "step": 6846 }, { "epoch": 0.9817895038715228, "grad_norm": 0.4094577431678772, "learning_rate": 8.507848854465366e-06, "loss": 0.3364, "step": 6847 }, { "epoch": 0.981932893604818, "grad_norm": 0.29010209441185, "learning_rate": 8.507254323368971e-06, "loss": 0.3119, "step": 6848 }, { "epoch": 0.982076283338113, "grad_norm": 0.3055132031440735, "learning_rate": 8.506659694636013e-06, "loss": 0.3597, "step": 6849 }, { "epoch": 0.9822196730714081, "grad_norm": 0.2878991961479187, "learning_rate": 8.506064968283043e-06, "loss": 0.3301, "step": 6850 }, { "epoch": 0.9823630628047032, "grad_norm": 0.297598659992218, "learning_rate": 8.505470144326618e-06, "loss": 0.3314, "step": 6851 }, { "epoch": 0.9825064525379983, "grad_norm": 0.29328468441963196, "learning_rate": 8.504875222783296e-06, "loss": 0.3285, "step": 6852 }, { "epoch": 0.9826498422712934, "grad_norm": 0.26992300152778625, "learning_rate": 8.50428020366964e-06, "loss": 0.3134, "step": 6853 }, { "epoch": 0.9827932320045885, "grad_norm": 0.28373414278030396, "learning_rate": 8.503685087002214e-06, "loss": 0.3427, "step": 6854 }, { "epoch": 0.9829366217378835, "grad_norm": 0.27322089672088623, "learning_rate": 8.503089872797584e-06, "loss": 0.3264, "step": 6855 }, { "epoch": 0.9830800114711786, "grad_norm": 0.29061561822891235, "learning_rate": 8.50249456107232e-06, "loss": 0.3432, "step": 6856 }, { "epoch": 0.9832234012044737, "grad_norm": 0.29969584941864014, "learning_rate": 8.501899151842997e-06, "loss": 0.3627, "step": 6857 }, { "epoch": 0.9833667909377689, "grad_norm": 0.2984163165092468, "learning_rate": 8.501303645126186e-06, "loss": 0.3314, "step": 6858 }, { "epoch": 0.983510180671064, "grad_norm": 0.30082815885543823, "learning_rate": 8.50070804093847e-06, "loss": 0.3317, "step": 6859 }, { "epoch": 0.9836535704043591, "grad_norm": 0.2900239825248718, "learning_rate": 8.500112339296424e-06, "loss": 0.3336, "step": 6860 }, { "epoch": 0.9837969601376542, "grad_norm": 0.3241153955459595, "learning_rate": 8.499516540216639e-06, "loss": 0.3286, "step": 6861 }, { "epoch": 0.9839403498709492, "grad_norm": 0.3111821413040161, "learning_rate": 8.498920643715694e-06, "loss": 0.3394, "step": 6862 }, { "epoch": 0.9840837396042443, "grad_norm": 0.3123161196708679, "learning_rate": 8.49832464981018e-06, "loss": 0.3154, "step": 6863 }, { "epoch": 0.9842271293375394, "grad_norm": 0.3115169107913971, "learning_rate": 8.497728558516693e-06, "loss": 0.317, "step": 6864 }, { "epoch": 0.9843705190708345, "grad_norm": 0.3077925741672516, "learning_rate": 8.49713236985182e-06, "loss": 0.3278, "step": 6865 }, { "epoch": 0.9845139088041296, "grad_norm": 0.2895929515361786, "learning_rate": 8.496536083832163e-06, "loss": 0.3383, "step": 6866 }, { "epoch": 0.9846572985374247, "grad_norm": 0.30655428767204285, "learning_rate": 8.495939700474319e-06, "loss": 0.3151, "step": 6867 }, { "epoch": 0.9848006882707199, "grad_norm": 0.2781526744365692, "learning_rate": 8.495343219794891e-06, "loss": 0.316, "step": 6868 }, { "epoch": 0.984944078004015, "grad_norm": 0.31574249267578125, "learning_rate": 8.494746641810485e-06, "loss": 0.3073, "step": 6869 }, { "epoch": 0.98508746773731, "grad_norm": 0.31063807010650635, "learning_rate": 8.49414996653771e-06, "loss": 0.3382, "step": 6870 }, { "epoch": 0.9852308574706051, "grad_norm": 0.2945428192615509, "learning_rate": 8.493553193993175e-06, "loss": 0.3363, "step": 6871 }, { "epoch": 0.9853742472039002, "grad_norm": 0.2896021902561188, "learning_rate": 8.492956324193491e-06, "loss": 0.31, "step": 6872 }, { "epoch": 0.9855176369371953, "grad_norm": 0.32999616861343384, "learning_rate": 8.492359357155277e-06, "loss": 0.3263, "step": 6873 }, { "epoch": 0.9856610266704904, "grad_norm": 0.3484703004360199, "learning_rate": 8.491762292895152e-06, "loss": 0.3452, "step": 6874 }, { "epoch": 0.9858044164037855, "grad_norm": 0.2880992591381073, "learning_rate": 8.491165131429735e-06, "loss": 0.312, "step": 6875 }, { "epoch": 0.9859478061370806, "grad_norm": 0.2931400537490845, "learning_rate": 8.490567872775652e-06, "loss": 0.3406, "step": 6876 }, { "epoch": 0.9860911958703756, "grad_norm": 0.3035893738269806, "learning_rate": 8.489970516949527e-06, "loss": 0.3451, "step": 6877 }, { "epoch": 0.9862345856036708, "grad_norm": 0.3136356472969055, "learning_rate": 8.489373063967994e-06, "loss": 0.3505, "step": 6878 }, { "epoch": 0.9863779753369659, "grad_norm": 0.3206198215484619, "learning_rate": 8.48877551384768e-06, "loss": 0.3291, "step": 6879 }, { "epoch": 0.986521365070261, "grad_norm": 0.32271870970726013, "learning_rate": 8.488177866605224e-06, "loss": 0.3309, "step": 6880 }, { "epoch": 0.9866647548035561, "grad_norm": 0.313595175743103, "learning_rate": 8.487580122257261e-06, "loss": 0.3384, "step": 6881 }, { "epoch": 0.9868081445368512, "grad_norm": 0.3033890724182129, "learning_rate": 8.486982280820432e-06, "loss": 0.3331, "step": 6882 }, { "epoch": 0.9869515342701463, "grad_norm": 0.3016406297683716, "learning_rate": 8.486384342311383e-06, "loss": 0.3463, "step": 6883 }, { "epoch": 0.9870949240034413, "grad_norm": 0.33235928416252136, "learning_rate": 8.485786306746755e-06, "loss": 0.3315, "step": 6884 }, { "epoch": 0.9872383137367364, "grad_norm": 0.3173108696937561, "learning_rate": 8.485188174143198e-06, "loss": 0.3212, "step": 6885 }, { "epoch": 0.9873817034700315, "grad_norm": 0.33602333068847656, "learning_rate": 8.484589944517364e-06, "loss": 0.3391, "step": 6886 }, { "epoch": 0.9875250932033266, "grad_norm": 0.3295559883117676, "learning_rate": 8.483991617885906e-06, "loss": 0.3487, "step": 6887 }, { "epoch": 0.9876684829366217, "grad_norm": 0.34092849493026733, "learning_rate": 8.483393194265482e-06, "loss": 0.3507, "step": 6888 }, { "epoch": 0.9878118726699169, "grad_norm": 0.3101394772529602, "learning_rate": 8.482794673672748e-06, "loss": 0.3395, "step": 6889 }, { "epoch": 0.987955262403212, "grad_norm": 0.32519587874412537, "learning_rate": 8.482196056124368e-06, "loss": 0.3288, "step": 6890 }, { "epoch": 0.988098652136507, "grad_norm": 0.3192654550075531, "learning_rate": 8.48159734163701e-06, "loss": 0.3256, "step": 6891 }, { "epoch": 0.9882420418698021, "grad_norm": 0.33235806226730347, "learning_rate": 8.480998530227333e-06, "loss": 0.3502, "step": 6892 }, { "epoch": 0.9883854316030972, "grad_norm": 0.3064768314361572, "learning_rate": 8.480399621912013e-06, "loss": 0.33, "step": 6893 }, { "epoch": 0.9885288213363923, "grad_norm": 0.290475070476532, "learning_rate": 8.479800616707723e-06, "loss": 0.3252, "step": 6894 }, { "epoch": 0.9886722110696874, "grad_norm": 0.3007156550884247, "learning_rate": 8.479201514631135e-06, "loss": 0.3262, "step": 6895 }, { "epoch": 0.9888156008029825, "grad_norm": 0.34763965010643005, "learning_rate": 8.47860231569893e-06, "loss": 0.323, "step": 6896 }, { "epoch": 0.9889589905362776, "grad_norm": 0.30374911427497864, "learning_rate": 8.478003019927787e-06, "loss": 0.3311, "step": 6897 }, { "epoch": 0.9891023802695726, "grad_norm": 0.3098985254764557, "learning_rate": 8.477403627334391e-06, "loss": 0.3293, "step": 6898 }, { "epoch": 0.9892457700028678, "grad_norm": 0.28981879353523254, "learning_rate": 8.476804137935427e-06, "loss": 0.3189, "step": 6899 }, { "epoch": 0.9893891597361629, "grad_norm": 0.31186798214912415, "learning_rate": 8.476204551747581e-06, "loss": 0.333, "step": 6900 }, { "epoch": 0.989532549469458, "grad_norm": 0.305637389421463, "learning_rate": 8.475604868787552e-06, "loss": 0.3365, "step": 6901 }, { "epoch": 0.9896759392027531, "grad_norm": 0.28599220514297485, "learning_rate": 8.475005089072028e-06, "loss": 0.305, "step": 6902 }, { "epoch": 0.9898193289360482, "grad_norm": 0.3142022490501404, "learning_rate": 8.474405212617708e-06, "loss": 0.3323, "step": 6903 }, { "epoch": 0.9899627186693433, "grad_norm": 0.28762251138687134, "learning_rate": 8.473805239441292e-06, "loss": 0.3163, "step": 6904 }, { "epoch": 0.9901061084026384, "grad_norm": 0.2939949035644531, "learning_rate": 8.473205169559481e-06, "loss": 0.3375, "step": 6905 }, { "epoch": 0.9902494981359334, "grad_norm": 0.30735474824905396, "learning_rate": 8.472605002988982e-06, "loss": 0.3397, "step": 6906 }, { "epoch": 0.9903928878692285, "grad_norm": 0.33249330520629883, "learning_rate": 8.472004739746501e-06, "loss": 0.3472, "step": 6907 }, { "epoch": 0.9905362776025236, "grad_norm": 0.27464109659194946, "learning_rate": 8.47140437984875e-06, "loss": 0.3436, "step": 6908 }, { "epoch": 0.9906796673358188, "grad_norm": 0.2843821346759796, "learning_rate": 8.470803923312438e-06, "loss": 0.3378, "step": 6909 }, { "epoch": 0.9908230570691139, "grad_norm": 0.3124108910560608, "learning_rate": 8.470203370154288e-06, "loss": 0.3543, "step": 6910 }, { "epoch": 0.990966446802409, "grad_norm": 0.293811559677124, "learning_rate": 8.469602720391011e-06, "loss": 0.3211, "step": 6911 }, { "epoch": 0.9911098365357041, "grad_norm": 0.2855825126171112, "learning_rate": 8.469001974039333e-06, "loss": 0.3612, "step": 6912 }, { "epoch": 0.9912532262689991, "grad_norm": 0.3102196753025055, "learning_rate": 8.468401131115977e-06, "loss": 0.321, "step": 6913 }, { "epoch": 0.9913966160022942, "grad_norm": 0.3156512379646301, "learning_rate": 8.467800191637668e-06, "loss": 0.3234, "step": 6914 }, { "epoch": 0.9915400057355893, "grad_norm": 0.2942725121974945, "learning_rate": 8.467199155621135e-06, "loss": 0.3269, "step": 6915 }, { "epoch": 0.9916833954688844, "grad_norm": 0.31763672828674316, "learning_rate": 8.466598023083112e-06, "loss": 0.3442, "step": 6916 }, { "epoch": 0.9918267852021795, "grad_norm": 0.31518974900245667, "learning_rate": 8.465996794040334e-06, "loss": 0.337, "step": 6917 }, { "epoch": 0.9919701749354746, "grad_norm": 0.30930861830711365, "learning_rate": 8.465395468509536e-06, "loss": 0.343, "step": 6918 }, { "epoch": 0.9921135646687698, "grad_norm": 0.2924034595489502, "learning_rate": 8.464794046507458e-06, "loss": 0.3282, "step": 6919 }, { "epoch": 0.9922569544020649, "grad_norm": 0.3077686131000519, "learning_rate": 8.464192528050845e-06, "loss": 0.3225, "step": 6920 }, { "epoch": 0.9924003441353599, "grad_norm": 0.28193604946136475, "learning_rate": 8.463590913156441e-06, "loss": 0.3275, "step": 6921 }, { "epoch": 0.992543733868655, "grad_norm": 0.3221375048160553, "learning_rate": 8.462989201840992e-06, "loss": 0.3226, "step": 6922 }, { "epoch": 0.9926871236019501, "grad_norm": 0.30168113112449646, "learning_rate": 8.462387394121252e-06, "loss": 0.3351, "step": 6923 }, { "epoch": 0.9928305133352452, "grad_norm": 0.2739437222480774, "learning_rate": 8.461785490013974e-06, "loss": 0.3115, "step": 6924 }, { "epoch": 0.9929739030685403, "grad_norm": 0.31057503819465637, "learning_rate": 8.461183489535912e-06, "loss": 0.3217, "step": 6925 }, { "epoch": 0.9931172928018354, "grad_norm": 0.3298371434211731, "learning_rate": 8.460581392703825e-06, "loss": 0.3287, "step": 6926 }, { "epoch": 0.9932606825351304, "grad_norm": 0.3233749270439148, "learning_rate": 8.459979199534479e-06, "loss": 0.373, "step": 6927 }, { "epoch": 0.9934040722684255, "grad_norm": 0.30448922514915466, "learning_rate": 8.459376910044631e-06, "loss": 0.329, "step": 6928 }, { "epoch": 0.9935474620017207, "grad_norm": 0.31430888175964355, "learning_rate": 8.458774524251053e-06, "loss": 0.3238, "step": 6929 }, { "epoch": 0.9936908517350158, "grad_norm": 0.3096298575401306, "learning_rate": 8.458172042170511e-06, "loss": 0.3105, "step": 6930 }, { "epoch": 0.9938342414683109, "grad_norm": 0.3044799566268921, "learning_rate": 8.457569463819782e-06, "loss": 0.3252, "step": 6931 }, { "epoch": 0.993977631201606, "grad_norm": 0.31795626878738403, "learning_rate": 8.456966789215636e-06, "loss": 0.3311, "step": 6932 }, { "epoch": 0.9941210209349011, "grad_norm": 0.3298458456993103, "learning_rate": 8.456364018374852e-06, "loss": 0.3236, "step": 6933 }, { "epoch": 0.9942644106681962, "grad_norm": 0.2876961827278137, "learning_rate": 8.455761151314213e-06, "loss": 0.3283, "step": 6934 }, { "epoch": 0.9944078004014912, "grad_norm": 0.2864025831222534, "learning_rate": 8.455158188050496e-06, "loss": 0.3248, "step": 6935 }, { "epoch": 0.9945511901347863, "grad_norm": 0.31538867950439453, "learning_rate": 8.454555128600491e-06, "loss": 0.3438, "step": 6936 }, { "epoch": 0.9946945798680814, "grad_norm": 0.31069204211235046, "learning_rate": 8.453951972980987e-06, "loss": 0.3187, "step": 6937 }, { "epoch": 0.9948379696013765, "grad_norm": 0.2952869236469269, "learning_rate": 8.453348721208773e-06, "loss": 0.3119, "step": 6938 }, { "epoch": 0.9949813593346717, "grad_norm": 0.30581289529800415, "learning_rate": 8.452745373300641e-06, "loss": 0.3341, "step": 6939 }, { "epoch": 0.9951247490679668, "grad_norm": 0.29630839824676514, "learning_rate": 8.452141929273392e-06, "loss": 0.3351, "step": 6940 }, { "epoch": 0.9952681388012619, "grad_norm": 0.29732733964920044, "learning_rate": 8.45153838914382e-06, "loss": 0.3328, "step": 6941 }, { "epoch": 0.9954115285345569, "grad_norm": 0.27948763966560364, "learning_rate": 8.450934752928729e-06, "loss": 0.3303, "step": 6942 }, { "epoch": 0.995554918267852, "grad_norm": 0.3053082823753357, "learning_rate": 8.450331020644925e-06, "loss": 0.342, "step": 6943 }, { "epoch": 0.9956983080011471, "grad_norm": 0.2990955412387848, "learning_rate": 8.449727192309209e-06, "loss": 0.3339, "step": 6944 }, { "epoch": 0.9958416977344422, "grad_norm": 0.2961096167564392, "learning_rate": 8.449123267938396e-06, "loss": 0.3303, "step": 6945 }, { "epoch": 0.9959850874677373, "grad_norm": 0.3006407916545868, "learning_rate": 8.448519247549298e-06, "loss": 0.3203, "step": 6946 }, { "epoch": 0.9961284772010324, "grad_norm": 0.309241384267807, "learning_rate": 8.44791513115873e-06, "loss": 0.3295, "step": 6947 }, { "epoch": 0.9962718669343275, "grad_norm": 0.26518943905830383, "learning_rate": 8.447310918783506e-06, "loss": 0.3457, "step": 6948 }, { "epoch": 0.9964152566676227, "grad_norm": 0.3290643095970154, "learning_rate": 8.446706610440451e-06, "loss": 0.3514, "step": 6949 }, { "epoch": 0.9965586464009177, "grad_norm": 0.34209367632865906, "learning_rate": 8.446102206146385e-06, "loss": 0.3258, "step": 6950 }, { "epoch": 0.9967020361342128, "grad_norm": 0.27283889055252075, "learning_rate": 8.445497705918135e-06, "loss": 0.3418, "step": 6951 }, { "epoch": 0.9968454258675079, "grad_norm": 0.30953672528266907, "learning_rate": 8.44489310977253e-06, "loss": 0.309, "step": 6952 }, { "epoch": 0.996988815600803, "grad_norm": 0.28044572472572327, "learning_rate": 8.444288417726399e-06, "loss": 0.3219, "step": 6953 }, { "epoch": 0.9971322053340981, "grad_norm": 0.3030879497528076, "learning_rate": 8.443683629796578e-06, "loss": 0.3303, "step": 6954 }, { "epoch": 0.9972755950673932, "grad_norm": 0.27788087725639343, "learning_rate": 8.4430787459999e-06, "loss": 0.3404, "step": 6955 }, { "epoch": 0.9974189848006882, "grad_norm": 0.2904547452926636, "learning_rate": 8.442473766353207e-06, "loss": 0.323, "step": 6956 }, { "epoch": 0.9975623745339833, "grad_norm": 0.314202219247818, "learning_rate": 8.44186869087334e-06, "loss": 0.3526, "step": 6957 }, { "epoch": 0.9977057642672784, "grad_norm": 0.2880602180957794, "learning_rate": 8.441263519577142e-06, "loss": 0.2957, "step": 6958 }, { "epoch": 0.9978491540005736, "grad_norm": 0.27826017141342163, "learning_rate": 8.440658252481464e-06, "loss": 0.33, "step": 6959 }, { "epoch": 0.9979925437338687, "grad_norm": 0.25980308651924133, "learning_rate": 8.440052889603152e-06, "loss": 0.3337, "step": 6960 }, { "epoch": 0.9981359334671638, "grad_norm": 0.30200228095054626, "learning_rate": 8.439447430959058e-06, "loss": 0.3337, "step": 6961 }, { "epoch": 0.9982793232004589, "grad_norm": 0.32329878211021423, "learning_rate": 8.43884187656604e-06, "loss": 0.3387, "step": 6962 }, { "epoch": 0.998422712933754, "grad_norm": 0.2891524136066437, "learning_rate": 8.438236226440952e-06, "loss": 0.3216, "step": 6963 }, { "epoch": 0.998566102667049, "grad_norm": 0.3044980764389038, "learning_rate": 8.437630480600659e-06, "loss": 0.3236, "step": 6964 }, { "epoch": 0.9987094924003441, "grad_norm": 0.2902900278568268, "learning_rate": 8.43702463906202e-06, "loss": 0.3171, "step": 6965 }, { "epoch": 0.9988528821336392, "grad_norm": 0.2864428460597992, "learning_rate": 8.436418701841902e-06, "loss": 0.3484, "step": 6966 }, { "epoch": 0.9989962718669343, "grad_norm": 0.29843801259994507, "learning_rate": 8.435812668957175e-06, "loss": 0.3264, "step": 6967 }, { "epoch": 0.9991396616002294, "grad_norm": 0.30936118960380554, "learning_rate": 8.435206540424708e-06, "loss": 0.3377, "step": 6968 }, { "epoch": 0.9992830513335246, "grad_norm": 0.290460467338562, "learning_rate": 8.434600316261374e-06, "loss": 0.3532, "step": 6969 }, { "epoch": 0.9994264410668197, "grad_norm": 0.32292523980140686, "learning_rate": 8.43399399648405e-06, "loss": 0.3515, "step": 6970 }, { "epoch": 0.9995698308001147, "grad_norm": 0.3049003779888153, "learning_rate": 8.433387581109617e-06, "loss": 0.3396, "step": 6971 }, { "epoch": 0.9997132205334098, "grad_norm": 0.28532037138938904, "learning_rate": 8.432781070154956e-06, "loss": 0.3232, "step": 6972 }, { "epoch": 0.9998566102667049, "grad_norm": 0.2952840328216553, "learning_rate": 8.43217446363695e-06, "loss": 0.3521, "step": 6973 }, { "epoch": 1.0, "grad_norm": 0.31043073534965515, "learning_rate": 8.431567761572487e-06, "loss": 0.3149, "step": 6974 }, { "epoch": 1.0001433897332952, "grad_norm": 0.2872288227081299, "learning_rate": 8.430960963978456e-06, "loss": 0.3121, "step": 6975 }, { "epoch": 1.0002867794665902, "grad_norm": 0.3356529772281647, "learning_rate": 8.430354070871749e-06, "loss": 0.3407, "step": 6976 }, { "epoch": 1.0004301691998854, "grad_norm": 0.307657927274704, "learning_rate": 8.429747082269261e-06, "loss": 0.3001, "step": 6977 }, { "epoch": 1.0005735589331803, "grad_norm": 0.31758397817611694, "learning_rate": 8.429139998187891e-06, "loss": 0.3112, "step": 6978 }, { "epoch": 1.0007169486664755, "grad_norm": 0.323661744594574, "learning_rate": 8.42853281864454e-06, "loss": 0.3158, "step": 6979 }, { "epoch": 1.0008603383997705, "grad_norm": 0.31165191531181335, "learning_rate": 8.427925543656108e-06, "loss": 0.3048, "step": 6980 }, { "epoch": 1.0010037281330657, "grad_norm": 0.2802707552909851, "learning_rate": 8.427318173239502e-06, "loss": 0.3092, "step": 6981 }, { "epoch": 1.0011471178663607, "grad_norm": 0.30799272656440735, "learning_rate": 8.42671070741163e-06, "loss": 0.3051, "step": 6982 }, { "epoch": 1.0012905075996559, "grad_norm": 0.29965639114379883, "learning_rate": 8.426103146189403e-06, "loss": 0.2875, "step": 6983 }, { "epoch": 1.0014338973329509, "grad_norm": 0.3047478497028351, "learning_rate": 8.425495489589735e-06, "loss": 0.3251, "step": 6984 }, { "epoch": 1.001577287066246, "grad_norm": 0.27582547068595886, "learning_rate": 8.424887737629542e-06, "loss": 0.299, "step": 6985 }, { "epoch": 1.0017206767995412, "grad_norm": 0.3242773711681366, "learning_rate": 8.424279890325744e-06, "loss": 0.3019, "step": 6986 }, { "epoch": 1.0018640665328362, "grad_norm": 0.27533257007598877, "learning_rate": 8.42367194769526e-06, "loss": 0.3147, "step": 6987 }, { "epoch": 1.0020074562661314, "grad_norm": 0.30044761300086975, "learning_rate": 8.423063909755017e-06, "loss": 0.3333, "step": 6988 }, { "epoch": 1.0021508459994264, "grad_norm": 0.31553715467453003, "learning_rate": 8.422455776521938e-06, "loss": 0.3076, "step": 6989 }, { "epoch": 1.0022942357327216, "grad_norm": 0.3017868101596832, "learning_rate": 8.421847548012956e-06, "loss": 0.3405, "step": 6990 }, { "epoch": 1.0024376254660166, "grad_norm": 0.2933138906955719, "learning_rate": 8.421239224245002e-06, "loss": 0.3233, "step": 6991 }, { "epoch": 1.0025810151993118, "grad_norm": 0.2905856668949127, "learning_rate": 8.420630805235011e-06, "loss": 0.31, "step": 6992 }, { "epoch": 1.0027244049326067, "grad_norm": 0.30247390270233154, "learning_rate": 8.420022290999922e-06, "loss": 0.3041, "step": 6993 }, { "epoch": 1.002867794665902, "grad_norm": 0.28335827589035034, "learning_rate": 8.41941368155667e-06, "loss": 0.2848, "step": 6994 }, { "epoch": 1.0030111843991971, "grad_norm": 0.2948947846889496, "learning_rate": 8.418804976922203e-06, "loss": 0.3079, "step": 6995 }, { "epoch": 1.003154574132492, "grad_norm": 0.3286580741405487, "learning_rate": 8.418196177113465e-06, "loss": 0.2903, "step": 6996 }, { "epoch": 1.0032979638657873, "grad_norm": 0.351820170879364, "learning_rate": 8.417587282147403e-06, "loss": 0.3069, "step": 6997 }, { "epoch": 1.0034413535990823, "grad_norm": 0.29016557335853577, "learning_rate": 8.416978292040969e-06, "loss": 0.3092, "step": 6998 }, { "epoch": 1.0035847433323775, "grad_norm": 0.35746699571609497, "learning_rate": 8.416369206811115e-06, "loss": 0.3051, "step": 6999 }, { "epoch": 1.0037281330656724, "grad_norm": 0.3236907422542572, "learning_rate": 8.415760026474798e-06, "loss": 0.3031, "step": 7000 }, { "epoch": 1.0038715227989676, "grad_norm": 0.30646583437919617, "learning_rate": 8.415150751048975e-06, "loss": 0.2985, "step": 7001 }, { "epoch": 1.0040149125322626, "grad_norm": 0.31642526388168335, "learning_rate": 8.41454138055061e-06, "loss": 0.3345, "step": 7002 }, { "epoch": 1.0041583022655578, "grad_norm": 0.3014141023159027, "learning_rate": 8.413931914996664e-06, "loss": 0.3085, "step": 7003 }, { "epoch": 1.0043016919988528, "grad_norm": 0.2942686378955841, "learning_rate": 8.413322354404106e-06, "loss": 0.3064, "step": 7004 }, { "epoch": 1.004445081732148, "grad_norm": 0.3261772394180298, "learning_rate": 8.412712698789906e-06, "loss": 0.3142, "step": 7005 }, { "epoch": 1.0045884714654432, "grad_norm": 0.30675920844078064, "learning_rate": 8.412102948171033e-06, "loss": 0.3025, "step": 7006 }, { "epoch": 1.0047318611987381, "grad_norm": 0.3099031448364258, "learning_rate": 8.411493102564462e-06, "loss": 0.3184, "step": 7007 }, { "epoch": 1.0048752509320333, "grad_norm": 0.30981647968292236, "learning_rate": 8.410883161987172e-06, "loss": 0.2958, "step": 7008 }, { "epoch": 1.0050186406653283, "grad_norm": 0.31744736433029175, "learning_rate": 8.410273126456138e-06, "loss": 0.3116, "step": 7009 }, { "epoch": 1.0051620303986235, "grad_norm": 0.31566011905670166, "learning_rate": 8.409662995988351e-06, "loss": 0.3138, "step": 7010 }, { "epoch": 1.0053054201319185, "grad_norm": 0.29774564504623413, "learning_rate": 8.40905277060079e-06, "loss": 0.3169, "step": 7011 }, { "epoch": 1.0054488098652137, "grad_norm": 0.29309964179992676, "learning_rate": 8.408442450310442e-06, "loss": 0.3009, "step": 7012 }, { "epoch": 1.0055921995985087, "grad_norm": 0.30213597416877747, "learning_rate": 8.407832035134302e-06, "loss": 0.3074, "step": 7013 }, { "epoch": 1.0057355893318038, "grad_norm": 0.30930566787719727, "learning_rate": 8.407221525089357e-06, "loss": 0.3199, "step": 7014 }, { "epoch": 1.005878979065099, "grad_norm": 0.2849061191082001, "learning_rate": 8.40661092019261e-06, "loss": 0.3007, "step": 7015 }, { "epoch": 1.006022368798394, "grad_norm": 0.29782459139823914, "learning_rate": 8.406000220461052e-06, "loss": 0.3149, "step": 7016 }, { "epoch": 1.0061657585316892, "grad_norm": 0.30223774909973145, "learning_rate": 8.40538942591169e-06, "loss": 0.3265, "step": 7017 }, { "epoch": 1.0063091482649842, "grad_norm": 0.2975270748138428, "learning_rate": 8.404778536561522e-06, "loss": 0.3013, "step": 7018 }, { "epoch": 1.0064525379982794, "grad_norm": 0.30987444519996643, "learning_rate": 8.40416755242756e-06, "loss": 0.3093, "step": 7019 }, { "epoch": 1.0065959277315744, "grad_norm": 0.32007038593292236, "learning_rate": 8.403556473526808e-06, "loss": 0.3199, "step": 7020 }, { "epoch": 1.0067393174648696, "grad_norm": 0.28344374895095825, "learning_rate": 8.402945299876279e-06, "loss": 0.3117, "step": 7021 }, { "epoch": 1.0068827071981645, "grad_norm": 0.3137865662574768, "learning_rate": 8.402334031492989e-06, "loss": 0.3375, "step": 7022 }, { "epoch": 1.0070260969314597, "grad_norm": 0.29165199398994446, "learning_rate": 8.401722668393954e-06, "loss": 0.3111, "step": 7023 }, { "epoch": 1.0071694866647547, "grad_norm": 0.30943647027015686, "learning_rate": 8.40111121059619e-06, "loss": 0.3155, "step": 7024 }, { "epoch": 1.00731287639805, "grad_norm": 0.3464312255382538, "learning_rate": 8.400499658116725e-06, "loss": 0.305, "step": 7025 }, { "epoch": 1.007456266131345, "grad_norm": 0.30966395139694214, "learning_rate": 8.39988801097258e-06, "loss": 0.3138, "step": 7026 }, { "epoch": 1.00759965586464, "grad_norm": 0.28474313020706177, "learning_rate": 8.399276269180781e-06, "loss": 0.3119, "step": 7027 }, { "epoch": 1.0077430455979353, "grad_norm": 0.30283114314079285, "learning_rate": 8.398664432758358e-06, "loss": 0.325, "step": 7028 }, { "epoch": 1.0078864353312302, "grad_norm": 0.3107685148715973, "learning_rate": 8.398052501722349e-06, "loss": 0.2989, "step": 7029 }, { "epoch": 1.0080298250645254, "grad_norm": 0.31559163331985474, "learning_rate": 8.397440476089784e-06, "loss": 0.3184, "step": 7030 }, { "epoch": 1.0081732147978204, "grad_norm": 0.31747767329216003, "learning_rate": 8.396828355877703e-06, "loss": 0.301, "step": 7031 }, { "epoch": 1.0083166045311156, "grad_norm": 0.3140697777271271, "learning_rate": 8.396216141103146e-06, "loss": 0.3062, "step": 7032 }, { "epoch": 1.0084599942644106, "grad_norm": 0.3308229148387909, "learning_rate": 8.395603831783154e-06, "loss": 0.313, "step": 7033 }, { "epoch": 1.0086033839977058, "grad_norm": 0.3184923231601715, "learning_rate": 8.394991427934778e-06, "loss": 0.3146, "step": 7034 }, { "epoch": 1.008746773731001, "grad_norm": 0.3113466203212738, "learning_rate": 8.39437892957506e-06, "loss": 0.3155, "step": 7035 }, { "epoch": 1.008890163464296, "grad_norm": 0.3378489911556244, "learning_rate": 8.393766336721056e-06, "loss": 0.2912, "step": 7036 }, { "epoch": 1.0090335531975911, "grad_norm": 0.33595147728919983, "learning_rate": 8.393153649389818e-06, "loss": 0.3341, "step": 7037 }, { "epoch": 1.0091769429308861, "grad_norm": 0.31142598390579224, "learning_rate": 8.392540867598401e-06, "loss": 0.3103, "step": 7038 }, { "epoch": 1.0093203326641813, "grad_norm": 0.3042057454586029, "learning_rate": 8.391927991363864e-06, "loss": 0.3078, "step": 7039 }, { "epoch": 1.0094637223974763, "grad_norm": 0.3032097816467285, "learning_rate": 8.391315020703272e-06, "loss": 0.323, "step": 7040 }, { "epoch": 1.0096071121307715, "grad_norm": 0.2953544557094574, "learning_rate": 8.390701955633684e-06, "loss": 0.2971, "step": 7041 }, { "epoch": 1.0097505018640665, "grad_norm": 0.2867296636104584, "learning_rate": 8.390088796172173e-06, "loss": 0.3053, "step": 7042 }, { "epoch": 1.0098938915973616, "grad_norm": 0.30180472135543823, "learning_rate": 8.389475542335802e-06, "loss": 0.2842, "step": 7043 }, { "epoch": 1.0100372813306566, "grad_norm": 0.2985505163669586, "learning_rate": 8.388862194141647e-06, "loss": 0.3144, "step": 7044 }, { "epoch": 1.0101806710639518, "grad_norm": 0.30886873602867126, "learning_rate": 8.38824875160678e-06, "loss": 0.3, "step": 7045 }, { "epoch": 1.010324060797247, "grad_norm": 0.291257381439209, "learning_rate": 8.387635214748282e-06, "loss": 0.3175, "step": 7046 }, { "epoch": 1.010467450530542, "grad_norm": 0.30130526423454285, "learning_rate": 8.387021583583227e-06, "loss": 0.3254, "step": 7047 }, { "epoch": 1.0106108402638372, "grad_norm": 0.29522499442100525, "learning_rate": 8.386407858128707e-06, "loss": 0.309, "step": 7048 }, { "epoch": 1.0107542299971322, "grad_norm": 0.2962183356285095, "learning_rate": 8.385794038401798e-06, "loss": 0.2976, "step": 7049 }, { "epoch": 1.0108976197304274, "grad_norm": 0.28937339782714844, "learning_rate": 8.385180124419591e-06, "loss": 0.3005, "step": 7050 }, { "epoch": 1.0110410094637223, "grad_norm": 0.29834359884262085, "learning_rate": 8.38456611619918e-06, "loss": 0.3254, "step": 7051 }, { "epoch": 1.0111843991970175, "grad_norm": 0.32230228185653687, "learning_rate": 8.38395201375765e-06, "loss": 0.3242, "step": 7052 }, { "epoch": 1.0113277889303125, "grad_norm": 0.2952183187007904, "learning_rate": 8.383337817112104e-06, "loss": 0.3215, "step": 7053 }, { "epoch": 1.0114711786636077, "grad_norm": 0.28206363320350647, "learning_rate": 8.382723526279639e-06, "loss": 0.3066, "step": 7054 }, { "epoch": 1.011614568396903, "grad_norm": 0.2739787697792053, "learning_rate": 8.382109141277352e-06, "loss": 0.2956, "step": 7055 }, { "epoch": 1.0117579581301979, "grad_norm": 0.3064977824687958, "learning_rate": 8.381494662122351e-06, "loss": 0.3099, "step": 7056 }, { "epoch": 1.011901347863493, "grad_norm": 0.29841679334640503, "learning_rate": 8.38088008883174e-06, "loss": 0.3226, "step": 7057 }, { "epoch": 1.012044737596788, "grad_norm": 0.2843392789363861, "learning_rate": 8.380265421422626e-06, "loss": 0.3173, "step": 7058 }, { "epoch": 1.0121881273300832, "grad_norm": 0.29587826132774353, "learning_rate": 8.379650659912126e-06, "loss": 0.3173, "step": 7059 }, { "epoch": 1.0123315170633782, "grad_norm": 0.3012142777442932, "learning_rate": 8.379035804317348e-06, "loss": 0.3357, "step": 7060 }, { "epoch": 1.0124749067966734, "grad_norm": 0.3101758062839508, "learning_rate": 8.378420854655413e-06, "loss": 0.3142, "step": 7061 }, { "epoch": 1.0126182965299684, "grad_norm": 0.2725105285644531, "learning_rate": 8.377805810943436e-06, "loss": 0.3114, "step": 7062 }, { "epoch": 1.0127616862632636, "grad_norm": 0.28738757967948914, "learning_rate": 8.377190673198542e-06, "loss": 0.3256, "step": 7063 }, { "epoch": 1.0129050759965585, "grad_norm": 0.3041071593761444, "learning_rate": 8.376575441437855e-06, "loss": 0.3098, "step": 7064 }, { "epoch": 1.0130484657298537, "grad_norm": 0.3147135376930237, "learning_rate": 8.375960115678503e-06, "loss": 0.3141, "step": 7065 }, { "epoch": 1.013191855463149, "grad_norm": 0.3075563907623291, "learning_rate": 8.375344695937612e-06, "loss": 0.3055, "step": 7066 }, { "epoch": 1.013335245196444, "grad_norm": 0.2989511489868164, "learning_rate": 8.374729182232319e-06, "loss": 0.3007, "step": 7067 }, { "epoch": 1.013478634929739, "grad_norm": 0.3206154406070709, "learning_rate": 8.374113574579757e-06, "loss": 0.2896, "step": 7068 }, { "epoch": 1.013622024663034, "grad_norm": 0.34066981077194214, "learning_rate": 8.373497872997064e-06, "loss": 0.3283, "step": 7069 }, { "epoch": 1.0137654143963293, "grad_norm": 0.3080226182937622, "learning_rate": 8.372882077501378e-06, "loss": 0.3146, "step": 7070 }, { "epoch": 1.0139088041296243, "grad_norm": 0.2797584533691406, "learning_rate": 8.372266188109844e-06, "loss": 0.2837, "step": 7071 }, { "epoch": 1.0140521938629194, "grad_norm": 0.3288578987121582, "learning_rate": 8.371650204839607e-06, "loss": 0.3057, "step": 7072 }, { "epoch": 1.0141955835962144, "grad_norm": 0.3273755609989166, "learning_rate": 8.371034127707816e-06, "loss": 0.3004, "step": 7073 }, { "epoch": 1.0143389733295096, "grad_norm": 0.3033176064491272, "learning_rate": 8.370417956731619e-06, "loss": 0.3202, "step": 7074 }, { "epoch": 1.0144823630628048, "grad_norm": 0.33837345242500305, "learning_rate": 8.36980169192817e-06, "loss": 0.2957, "step": 7075 }, { "epoch": 1.0146257527960998, "grad_norm": 0.3212099075317383, "learning_rate": 8.369185333314627e-06, "loss": 0.332, "step": 7076 }, { "epoch": 1.014769142529395, "grad_norm": 0.3167802393436432, "learning_rate": 8.368568880908145e-06, "loss": 0.3216, "step": 7077 }, { "epoch": 1.01491253226269, "grad_norm": 0.29236674308776855, "learning_rate": 8.367952334725891e-06, "loss": 0.3123, "step": 7078 }, { "epoch": 1.0150559219959852, "grad_norm": 0.3360211253166199, "learning_rate": 8.367335694785022e-06, "loss": 0.3097, "step": 7079 }, { "epoch": 1.0151993117292801, "grad_norm": 0.35288774967193604, "learning_rate": 8.366718961102708e-06, "loss": 0.3214, "step": 7080 }, { "epoch": 1.0153427014625753, "grad_norm": 0.30225712060928345, "learning_rate": 8.366102133696117e-06, "loss": 0.3329, "step": 7081 }, { "epoch": 1.0154860911958703, "grad_norm": 0.3081396222114563, "learning_rate": 8.36548521258242e-06, "loss": 0.3237, "step": 7082 }, { "epoch": 1.0156294809291655, "grad_norm": 0.34434571862220764, "learning_rate": 8.364868197778792e-06, "loss": 0.3, "step": 7083 }, { "epoch": 1.0157728706624605, "grad_norm": 0.31303316354751587, "learning_rate": 8.36425108930241e-06, "loss": 0.3198, "step": 7084 }, { "epoch": 1.0159162603957557, "grad_norm": 0.3014647662639618, "learning_rate": 8.363633887170453e-06, "loss": 0.3022, "step": 7085 }, { "epoch": 1.0160596501290509, "grad_norm": 0.30184298753738403, "learning_rate": 8.363016591400104e-06, "loss": 0.294, "step": 7086 }, { "epoch": 1.0162030398623458, "grad_norm": 0.295628160238266, "learning_rate": 8.362399202008544e-06, "loss": 0.3259, "step": 7087 }, { "epoch": 1.016346429595641, "grad_norm": 0.3244040310382843, "learning_rate": 8.361781719012965e-06, "loss": 0.3, "step": 7088 }, { "epoch": 1.016489819328936, "grad_norm": 0.30972960591316223, "learning_rate": 8.361164142430552e-06, "loss": 0.3071, "step": 7089 }, { "epoch": 1.0166332090622312, "grad_norm": 0.30345430970191956, "learning_rate": 8.360546472278502e-06, "loss": 0.3009, "step": 7090 }, { "epoch": 1.0167765987955262, "grad_norm": 0.3071475625038147, "learning_rate": 8.359928708574005e-06, "loss": 0.3093, "step": 7091 }, { "epoch": 1.0169199885288214, "grad_norm": 0.29562851786613464, "learning_rate": 8.359310851334264e-06, "loss": 0.3078, "step": 7092 }, { "epoch": 1.0170633782621163, "grad_norm": 0.2937542796134949, "learning_rate": 8.358692900576473e-06, "loss": 0.2995, "step": 7093 }, { "epoch": 1.0172067679954115, "grad_norm": 0.30846545100212097, "learning_rate": 8.358074856317842e-06, "loss": 0.3041, "step": 7094 }, { "epoch": 1.0173501577287065, "grad_norm": 0.31697916984558105, "learning_rate": 8.35745671857557e-06, "loss": 0.3224, "step": 7095 }, { "epoch": 1.0174935474620017, "grad_norm": 0.3040596842765808, "learning_rate": 8.35683848736687e-06, "loss": 0.3191, "step": 7096 }, { "epoch": 1.017636937195297, "grad_norm": 0.30688828229904175, "learning_rate": 8.356220162708949e-06, "loss": 0.3161, "step": 7097 }, { "epoch": 1.0177803269285919, "grad_norm": 0.3070300221443176, "learning_rate": 8.355601744619021e-06, "loss": 0.3109, "step": 7098 }, { "epoch": 1.017923716661887, "grad_norm": 0.31073564291000366, "learning_rate": 8.354983233114303e-06, "loss": 0.3103, "step": 7099 }, { "epoch": 1.018067106395182, "grad_norm": 0.2898443043231964, "learning_rate": 8.354364628212013e-06, "loss": 0.32, "step": 7100 }, { "epoch": 1.0182104961284772, "grad_norm": 0.31853577494621277, "learning_rate": 8.35374592992937e-06, "loss": 0.3101, "step": 7101 }, { "epoch": 1.0183538858617722, "grad_norm": 0.3135455548763275, "learning_rate": 8.353127138283602e-06, "loss": 0.3031, "step": 7102 }, { "epoch": 1.0184972755950674, "grad_norm": 0.3047392964363098, "learning_rate": 8.352508253291932e-06, "loss": 0.34, "step": 7103 }, { "epoch": 1.0186406653283624, "grad_norm": 0.2907566726207733, "learning_rate": 8.351889274971588e-06, "loss": 0.3031, "step": 7104 }, { "epoch": 1.0187840550616576, "grad_norm": 0.33520832657814026, "learning_rate": 8.351270203339804e-06, "loss": 0.3052, "step": 7105 }, { "epoch": 1.0189274447949528, "grad_norm": 0.3198932409286499, "learning_rate": 8.35065103841381e-06, "loss": 0.3246, "step": 7106 }, { "epoch": 1.0190708345282478, "grad_norm": 0.30012959241867065, "learning_rate": 8.350031780210848e-06, "loss": 0.2982, "step": 7107 }, { "epoch": 1.019214224261543, "grad_norm": 0.3076242208480835, "learning_rate": 8.349412428748154e-06, "loss": 0.3253, "step": 7108 }, { "epoch": 1.019357613994838, "grad_norm": 0.3003140091896057, "learning_rate": 8.348792984042971e-06, "loss": 0.3167, "step": 7109 }, { "epoch": 1.0195010037281331, "grad_norm": 0.32739272713661194, "learning_rate": 8.348173446112541e-06, "loss": 0.3148, "step": 7110 }, { "epoch": 1.019644393461428, "grad_norm": 0.33439919352531433, "learning_rate": 8.347553814974115e-06, "loss": 0.3196, "step": 7111 }, { "epoch": 1.0197877831947233, "grad_norm": 0.28953200578689575, "learning_rate": 8.346934090644939e-06, "loss": 0.3314, "step": 7112 }, { "epoch": 1.0199311729280183, "grad_norm": 0.3284255564212799, "learning_rate": 8.346314273142266e-06, "loss": 0.3097, "step": 7113 }, { "epoch": 1.0200745626613135, "grad_norm": 0.32200947403907776, "learning_rate": 8.345694362483353e-06, "loss": 0.2994, "step": 7114 }, { "epoch": 1.0202179523946084, "grad_norm": 0.3064993917942047, "learning_rate": 8.345074358685454e-06, "loss": 0.3206, "step": 7115 }, { "epoch": 1.0203613421279036, "grad_norm": 0.31796565651893616, "learning_rate": 8.34445426176583e-06, "loss": 0.3431, "step": 7116 }, { "epoch": 1.0205047318611988, "grad_norm": 0.3138822317123413, "learning_rate": 8.343834071741745e-06, "loss": 0.3075, "step": 7117 }, { "epoch": 1.0206481215944938, "grad_norm": 0.3202568590641022, "learning_rate": 8.343213788630465e-06, "loss": 0.3163, "step": 7118 }, { "epoch": 1.020791511327789, "grad_norm": 0.3443463444709778, "learning_rate": 8.342593412449254e-06, "loss": 0.311, "step": 7119 }, { "epoch": 1.020934901061084, "grad_norm": 0.2965494394302368, "learning_rate": 8.341972943215382e-06, "loss": 0.3179, "step": 7120 }, { "epoch": 1.0210782907943792, "grad_norm": 0.29861611127853394, "learning_rate": 8.341352380946129e-06, "loss": 0.3053, "step": 7121 }, { "epoch": 1.0212216805276741, "grad_norm": 0.33318042755126953, "learning_rate": 8.340731725658761e-06, "loss": 0.3043, "step": 7122 }, { "epoch": 1.0213650702609693, "grad_norm": 0.3142348527908325, "learning_rate": 8.340110977370565e-06, "loss": 0.3056, "step": 7123 }, { "epoch": 1.0215084599942643, "grad_norm": 0.3063638210296631, "learning_rate": 8.339490136098816e-06, "loss": 0.285, "step": 7124 }, { "epoch": 1.0216518497275595, "grad_norm": 0.35601070523262024, "learning_rate": 8.338869201860797e-06, "loss": 0.3285, "step": 7125 }, { "epoch": 1.0217952394608547, "grad_norm": 0.3006889522075653, "learning_rate": 8.338248174673798e-06, "loss": 0.3022, "step": 7126 }, { "epoch": 1.0219386291941497, "grad_norm": 0.31884264945983887, "learning_rate": 8.337627054555102e-06, "loss": 0.3044, "step": 7127 }, { "epoch": 1.0220820189274449, "grad_norm": 0.32261040806770325, "learning_rate": 8.337005841522006e-06, "loss": 0.3041, "step": 7128 }, { "epoch": 1.0222254086607399, "grad_norm": 0.2944851219654083, "learning_rate": 8.336384535591799e-06, "loss": 0.3086, "step": 7129 }, { "epoch": 1.022368798394035, "grad_norm": 0.2956451177597046, "learning_rate": 8.33576313678178e-06, "loss": 0.3255, "step": 7130 }, { "epoch": 1.02251218812733, "grad_norm": 0.31675609946250916, "learning_rate": 8.335141645109245e-06, "loss": 0.3065, "step": 7131 }, { "epoch": 1.0226555778606252, "grad_norm": 0.30729684233665466, "learning_rate": 8.334520060591498e-06, "loss": 0.3249, "step": 7132 }, { "epoch": 1.0227989675939202, "grad_norm": 0.30929163098335266, "learning_rate": 8.333898383245842e-06, "loss": 0.3393, "step": 7133 }, { "epoch": 1.0229423573272154, "grad_norm": 0.28532344102859497, "learning_rate": 8.333276613089584e-06, "loss": 0.3055, "step": 7134 }, { "epoch": 1.0230857470605104, "grad_norm": 0.2944731116294861, "learning_rate": 8.332654750140032e-06, "loss": 0.3064, "step": 7135 }, { "epoch": 1.0232291367938056, "grad_norm": 0.31776705384254456, "learning_rate": 8.332032794414497e-06, "loss": 0.3224, "step": 7136 }, { "epoch": 1.0233725265271008, "grad_norm": 0.2801833748817444, "learning_rate": 8.331410745930297e-06, "loss": 0.3172, "step": 7137 }, { "epoch": 1.0235159162603957, "grad_norm": 0.2845689058303833, "learning_rate": 8.330788604704746e-06, "loss": 0.3062, "step": 7138 }, { "epoch": 1.023659305993691, "grad_norm": 0.300529420375824, "learning_rate": 8.330166370755165e-06, "loss": 0.3234, "step": 7139 }, { "epoch": 1.023802695726986, "grad_norm": 0.3147812783718109, "learning_rate": 8.329544044098872e-06, "loss": 0.3297, "step": 7140 }, { "epoch": 1.023946085460281, "grad_norm": 0.28045132756233215, "learning_rate": 8.328921624753197e-06, "loss": 0.3227, "step": 7141 }, { "epoch": 1.024089475193576, "grad_norm": 0.30114418268203735, "learning_rate": 8.328299112735462e-06, "loss": 0.3051, "step": 7142 }, { "epoch": 1.0242328649268713, "grad_norm": 0.30082428455352783, "learning_rate": 8.327676508063002e-06, "loss": 0.338, "step": 7143 }, { "epoch": 1.0243762546601662, "grad_norm": 0.3589619994163513, "learning_rate": 8.327053810753147e-06, "loss": 0.3029, "step": 7144 }, { "epoch": 1.0245196443934614, "grad_norm": 0.30806517601013184, "learning_rate": 8.326431020823231e-06, "loss": 0.3015, "step": 7145 }, { "epoch": 1.0246630341267566, "grad_norm": 0.3055885434150696, "learning_rate": 8.325808138290593e-06, "loss": 0.321, "step": 7146 }, { "epoch": 1.0248064238600516, "grad_norm": 0.34216493368148804, "learning_rate": 8.325185163172573e-06, "loss": 0.3372, "step": 7147 }, { "epoch": 1.0249498135933468, "grad_norm": 0.30192461609840393, "learning_rate": 8.324562095486513e-06, "loss": 0.3132, "step": 7148 }, { "epoch": 1.0250932033266418, "grad_norm": 0.31416067481040955, "learning_rate": 8.323938935249756e-06, "loss": 0.3008, "step": 7149 }, { "epoch": 1.025236593059937, "grad_norm": 0.30406656861305237, "learning_rate": 8.323315682479656e-06, "loss": 0.3078, "step": 7150 }, { "epoch": 1.025379982793232, "grad_norm": 0.31709784269332886, "learning_rate": 8.322692337193556e-06, "loss": 0.3041, "step": 7151 }, { "epoch": 1.0255233725265271, "grad_norm": 0.30550768971443176, "learning_rate": 8.322068899408815e-06, "loss": 0.2952, "step": 7152 }, { "epoch": 1.0256667622598221, "grad_norm": 0.2713908553123474, "learning_rate": 8.321445369142784e-06, "loss": 0.3005, "step": 7153 }, { "epoch": 1.0258101519931173, "grad_norm": 0.28021061420440674, "learning_rate": 8.320821746412827e-06, "loss": 0.323, "step": 7154 }, { "epoch": 1.0259535417264123, "grad_norm": 0.3097245693206787, "learning_rate": 8.320198031236296e-06, "loss": 0.3297, "step": 7155 }, { "epoch": 1.0260969314597075, "grad_norm": 0.2776125371456146, "learning_rate": 8.319574223630564e-06, "loss": 0.2957, "step": 7156 }, { "epoch": 1.0262403211930027, "grad_norm": 0.2997024655342102, "learning_rate": 8.318950323612989e-06, "loss": 0.3114, "step": 7157 }, { "epoch": 1.0263837109262977, "grad_norm": 0.30885472893714905, "learning_rate": 8.318326331200944e-06, "loss": 0.3165, "step": 7158 }, { "epoch": 1.0265271006595929, "grad_norm": 0.30010852217674255, "learning_rate": 8.317702246411797e-06, "loss": 0.3037, "step": 7159 }, { "epoch": 1.0266704903928878, "grad_norm": 0.29696226119995117, "learning_rate": 8.317078069262923e-06, "loss": 0.3285, "step": 7160 }, { "epoch": 1.026813880126183, "grad_norm": 0.2935723662376404, "learning_rate": 8.3164537997717e-06, "loss": 0.3105, "step": 7161 }, { "epoch": 1.026957269859478, "grad_norm": 0.2971692383289337, "learning_rate": 8.315829437955504e-06, "loss": 0.3085, "step": 7162 }, { "epoch": 1.0271006595927732, "grad_norm": 0.2872445285320282, "learning_rate": 8.315204983831716e-06, "loss": 0.3167, "step": 7163 }, { "epoch": 1.0272440493260682, "grad_norm": 0.29527735710144043, "learning_rate": 8.314580437417723e-06, "loss": 0.3122, "step": 7164 }, { "epoch": 1.0273874390593634, "grad_norm": 0.31680959463119507, "learning_rate": 8.313955798730908e-06, "loss": 0.3232, "step": 7165 }, { "epoch": 1.0275308287926586, "grad_norm": 0.32916516065597534, "learning_rate": 8.313331067788662e-06, "loss": 0.3333, "step": 7166 }, { "epoch": 1.0276742185259535, "grad_norm": 0.28964370489120483, "learning_rate": 8.312706244608376e-06, "loss": 0.3036, "step": 7167 }, { "epoch": 1.0278176082592487, "grad_norm": 0.2770007848739624, "learning_rate": 8.312081329207445e-06, "loss": 0.3053, "step": 7168 }, { "epoch": 1.0279609979925437, "grad_norm": 0.29881948232650757, "learning_rate": 8.311456321603263e-06, "loss": 0.3164, "step": 7169 }, { "epoch": 1.028104387725839, "grad_norm": 0.304000586271286, "learning_rate": 8.310831221813231e-06, "loss": 0.3255, "step": 7170 }, { "epoch": 1.0282477774591339, "grad_norm": 0.29561761021614075, "learning_rate": 8.31020602985475e-06, "loss": 0.3014, "step": 7171 }, { "epoch": 1.028391167192429, "grad_norm": 0.2851163446903229, "learning_rate": 8.309580745745226e-06, "loss": 0.2995, "step": 7172 }, { "epoch": 1.028534556925724, "grad_norm": 0.30058568716049194, "learning_rate": 8.308955369502067e-06, "loss": 0.3179, "step": 7173 }, { "epoch": 1.0286779466590192, "grad_norm": 0.28727656602859497, "learning_rate": 8.30832990114268e-06, "loss": 0.309, "step": 7174 }, { "epoch": 1.0288213363923142, "grad_norm": 0.2824253737926483, "learning_rate": 8.307704340684477e-06, "loss": 0.3179, "step": 7175 }, { "epoch": 1.0289647261256094, "grad_norm": 0.2972743809223175, "learning_rate": 8.307078688144874e-06, "loss": 0.3272, "step": 7176 }, { "epoch": 1.0291081158589046, "grad_norm": 0.3187011480331421, "learning_rate": 8.306452943541285e-06, "loss": 0.3356, "step": 7177 }, { "epoch": 1.0292515055921996, "grad_norm": 0.3178796172142029, "learning_rate": 8.305827106891136e-06, "loss": 0.3173, "step": 7178 }, { "epoch": 1.0293948953254948, "grad_norm": 0.2847851514816284, "learning_rate": 8.305201178211843e-06, "loss": 0.3087, "step": 7179 }, { "epoch": 1.0295382850587897, "grad_norm": 0.2773573100566864, "learning_rate": 8.304575157520835e-06, "loss": 0.3237, "step": 7180 }, { "epoch": 1.029681674792085, "grad_norm": 0.3285956382751465, "learning_rate": 8.303949044835537e-06, "loss": 0.3119, "step": 7181 }, { "epoch": 1.02982506452538, "grad_norm": 0.28147396445274353, "learning_rate": 8.30332284017338e-06, "loss": 0.3058, "step": 7182 }, { "epoch": 1.0299684542586751, "grad_norm": 0.28297728300094604, "learning_rate": 8.302696543551798e-06, "loss": 0.3155, "step": 7183 }, { "epoch": 1.03011184399197, "grad_norm": 0.3347502648830414, "learning_rate": 8.302070154988224e-06, "loss": 0.3211, "step": 7184 }, { "epoch": 1.0302552337252653, "grad_norm": 0.2921810746192932, "learning_rate": 8.301443674500096e-06, "loss": 0.3047, "step": 7185 }, { "epoch": 1.0303986234585603, "grad_norm": 0.30551326274871826, "learning_rate": 8.300817102104855e-06, "loss": 0.3104, "step": 7186 }, { "epoch": 1.0305420131918555, "grad_norm": 0.3062467873096466, "learning_rate": 8.300190437819943e-06, "loss": 0.2969, "step": 7187 }, { "epoch": 1.0306854029251507, "grad_norm": 0.2871701121330261, "learning_rate": 8.299563681662807e-06, "loss": 0.3093, "step": 7188 }, { "epoch": 1.0308287926584456, "grad_norm": 0.31739911437034607, "learning_rate": 8.298936833650892e-06, "loss": 0.3041, "step": 7189 }, { "epoch": 1.0309721823917408, "grad_norm": 0.3267134428024292, "learning_rate": 8.298309893801652e-06, "loss": 0.3018, "step": 7190 }, { "epoch": 1.0311155721250358, "grad_norm": 0.2847032845020294, "learning_rate": 8.297682862132537e-06, "loss": 0.2977, "step": 7191 }, { "epoch": 1.031258961858331, "grad_norm": 0.3195345401763916, "learning_rate": 8.297055738661003e-06, "loss": 0.3104, "step": 7192 }, { "epoch": 1.031402351591626, "grad_norm": 0.2991544008255005, "learning_rate": 8.296428523404512e-06, "loss": 0.3136, "step": 7193 }, { "epoch": 1.0315457413249212, "grad_norm": 0.29311802983283997, "learning_rate": 8.295801216380522e-06, "loss": 0.314, "step": 7194 }, { "epoch": 1.0316891310582161, "grad_norm": 0.31204044818878174, "learning_rate": 8.295173817606492e-06, "loss": 0.3275, "step": 7195 }, { "epoch": 1.0318325207915113, "grad_norm": 0.3317483067512512, "learning_rate": 8.294546327099895e-06, "loss": 0.3113, "step": 7196 }, { "epoch": 1.0319759105248065, "grad_norm": 0.3184925317764282, "learning_rate": 8.293918744878195e-06, "loss": 0.3188, "step": 7197 }, { "epoch": 1.0321193002581015, "grad_norm": 0.3060687780380249, "learning_rate": 8.293291070958865e-06, "loss": 0.3157, "step": 7198 }, { "epoch": 1.0322626899913967, "grad_norm": 0.31938788294792175, "learning_rate": 8.292663305359377e-06, "loss": 0.3197, "step": 7199 }, { "epoch": 1.0324060797246917, "grad_norm": 0.3369388282299042, "learning_rate": 8.292035448097209e-06, "loss": 0.3276, "step": 7200 }, { "epoch": 1.0325494694579869, "grad_norm": 0.318646103143692, "learning_rate": 8.291407499189837e-06, "loss": 0.3211, "step": 7201 }, { "epoch": 1.0326928591912818, "grad_norm": 0.2728036344051361, "learning_rate": 8.290779458654742e-06, "loss": 0.3061, "step": 7202 }, { "epoch": 1.032836248924577, "grad_norm": 0.35369622707366943, "learning_rate": 8.290151326509412e-06, "loss": 0.2931, "step": 7203 }, { "epoch": 1.032979638657872, "grad_norm": 0.30768924951553345, "learning_rate": 8.289523102771328e-06, "loss": 0.2971, "step": 7204 }, { "epoch": 1.0331230283911672, "grad_norm": 0.2671354115009308, "learning_rate": 8.288894787457982e-06, "loss": 0.298, "step": 7205 }, { "epoch": 1.0332664181244622, "grad_norm": 0.33524656295776367, "learning_rate": 8.288266380586864e-06, "loss": 0.3394, "step": 7206 }, { "epoch": 1.0334098078577574, "grad_norm": 0.3100920021533966, "learning_rate": 8.287637882175468e-06, "loss": 0.3185, "step": 7207 }, { "epoch": 1.0335531975910526, "grad_norm": 0.31410685181617737, "learning_rate": 8.28700929224129e-06, "loss": 0.3173, "step": 7208 }, { "epoch": 1.0336965873243475, "grad_norm": 0.32122084498405457, "learning_rate": 8.286380610801832e-06, "loss": 0.311, "step": 7209 }, { "epoch": 1.0338399770576427, "grad_norm": 0.2988344132900238, "learning_rate": 8.285751837874592e-06, "loss": 0.3134, "step": 7210 }, { "epoch": 1.0339833667909377, "grad_norm": 0.30694380402565, "learning_rate": 8.285122973477075e-06, "loss": 0.3041, "step": 7211 }, { "epoch": 1.034126756524233, "grad_norm": 0.31708651781082153, "learning_rate": 8.284494017626788e-06, "loss": 0.3222, "step": 7212 }, { "epoch": 1.034270146257528, "grad_norm": 0.34813520312309265, "learning_rate": 8.28386497034124e-06, "loss": 0.3208, "step": 7213 }, { "epoch": 1.034413535990823, "grad_norm": 0.2968690097332001, "learning_rate": 8.283235831637944e-06, "loss": 0.2958, "step": 7214 }, { "epoch": 1.034556925724118, "grad_norm": 0.2946382761001587, "learning_rate": 8.282606601534412e-06, "loss": 0.3176, "step": 7215 }, { "epoch": 1.0347003154574133, "grad_norm": 0.352931410074234, "learning_rate": 8.28197728004816e-06, "loss": 0.3026, "step": 7216 }, { "epoch": 1.0348437051907085, "grad_norm": 0.31318703293800354, "learning_rate": 8.281347867196712e-06, "loss": 0.3218, "step": 7217 }, { "epoch": 1.0349870949240034, "grad_norm": 0.3059413731098175, "learning_rate": 8.280718362997587e-06, "loss": 0.3169, "step": 7218 }, { "epoch": 1.0351304846572986, "grad_norm": 0.3308134377002716, "learning_rate": 8.280088767468308e-06, "loss": 0.3312, "step": 7219 }, { "epoch": 1.0352738743905936, "grad_norm": 0.32658374309539795, "learning_rate": 8.279459080626404e-06, "loss": 0.3259, "step": 7220 }, { "epoch": 1.0354172641238888, "grad_norm": 0.33222639560699463, "learning_rate": 8.278829302489403e-06, "loss": 0.3203, "step": 7221 }, { "epoch": 1.0355606538571838, "grad_norm": 0.30139845609664917, "learning_rate": 8.278199433074838e-06, "loss": 0.3094, "step": 7222 }, { "epoch": 1.035704043590479, "grad_norm": 0.3451177775859833, "learning_rate": 8.277569472400245e-06, "loss": 0.3358, "step": 7223 }, { "epoch": 1.035847433323774, "grad_norm": 0.32332950830459595, "learning_rate": 8.27693942048316e-06, "loss": 0.299, "step": 7224 }, { "epoch": 1.0359908230570691, "grad_norm": 0.36086705327033997, "learning_rate": 8.276309277341122e-06, "loss": 0.3273, "step": 7225 }, { "epoch": 1.036134212790364, "grad_norm": 0.3138415515422821, "learning_rate": 8.275679042991673e-06, "loss": 0.3107, "step": 7226 }, { "epoch": 1.0362776025236593, "grad_norm": 0.3013703525066376, "learning_rate": 8.275048717452357e-06, "loss": 0.3189, "step": 7227 }, { "epoch": 1.0364209922569545, "grad_norm": 0.3383788466453552, "learning_rate": 8.274418300740726e-06, "loss": 0.2926, "step": 7228 }, { "epoch": 1.0365643819902495, "grad_norm": 0.3286268711090088, "learning_rate": 8.273787792874324e-06, "loss": 0.3025, "step": 7229 }, { "epoch": 1.0367077717235447, "grad_norm": 0.29922452569007874, "learning_rate": 8.273157193870707e-06, "loss": 0.3172, "step": 7230 }, { "epoch": 1.0368511614568396, "grad_norm": 0.2997778356075287, "learning_rate": 8.272526503747426e-06, "loss": 0.3026, "step": 7231 }, { "epoch": 1.0369945511901348, "grad_norm": 0.29907724261283875, "learning_rate": 8.271895722522046e-06, "loss": 0.317, "step": 7232 }, { "epoch": 1.0371379409234298, "grad_norm": 0.3012544810771942, "learning_rate": 8.27126485021212e-06, "loss": 0.3096, "step": 7233 }, { "epoch": 1.037281330656725, "grad_norm": 0.3044573962688446, "learning_rate": 8.270633886835212e-06, "loss": 0.3089, "step": 7234 }, { "epoch": 1.03742472039002, "grad_norm": 0.2931358516216278, "learning_rate": 8.270002832408887e-06, "loss": 0.3329, "step": 7235 }, { "epoch": 1.0375681101233152, "grad_norm": 0.3242530822753906, "learning_rate": 8.269371686950717e-06, "loss": 0.3322, "step": 7236 }, { "epoch": 1.0377114998566102, "grad_norm": 0.2942287027835846, "learning_rate": 8.268740450478265e-06, "loss": 0.317, "step": 7237 }, { "epoch": 1.0378548895899053, "grad_norm": 0.3097139894962311, "learning_rate": 8.268109123009107e-06, "loss": 0.3209, "step": 7238 }, { "epoch": 1.0379982793232005, "grad_norm": 0.31397783756256104, "learning_rate": 8.26747770456082e-06, "loss": 0.2984, "step": 7239 }, { "epoch": 1.0381416690564955, "grad_norm": 0.27747759222984314, "learning_rate": 8.26684619515098e-06, "loss": 0.3273, "step": 7240 }, { "epoch": 1.0382850587897907, "grad_norm": 0.3069373667240143, "learning_rate": 8.266214594797166e-06, "loss": 0.3413, "step": 7241 }, { "epoch": 1.0384284485230857, "grad_norm": 0.3245494067668915, "learning_rate": 8.265582903516965e-06, "loss": 0.3006, "step": 7242 }, { "epoch": 1.0385718382563809, "grad_norm": 0.2944367825984955, "learning_rate": 8.264951121327955e-06, "loss": 0.3128, "step": 7243 }, { "epoch": 1.0387152279896759, "grad_norm": 0.28796523809432983, "learning_rate": 8.264319248247732e-06, "loss": 0.3123, "step": 7244 }, { "epoch": 1.038858617722971, "grad_norm": 0.3109380900859833, "learning_rate": 8.26368728429388e-06, "loss": 0.3112, "step": 7245 }, { "epoch": 1.039002007456266, "grad_norm": 0.31057319045066833, "learning_rate": 8.263055229483996e-06, "loss": 0.3019, "step": 7246 }, { "epoch": 1.0391453971895612, "grad_norm": 0.29859986901283264, "learning_rate": 8.262423083835674e-06, "loss": 0.3218, "step": 7247 }, { "epoch": 1.0392887869228564, "grad_norm": 0.3089565634727478, "learning_rate": 8.261790847366511e-06, "loss": 0.3151, "step": 7248 }, { "epoch": 1.0394321766561514, "grad_norm": 0.28546202182769775, "learning_rate": 8.261158520094108e-06, "loss": 0.319, "step": 7249 }, { "epoch": 1.0395755663894466, "grad_norm": 0.31375566124916077, "learning_rate": 8.26052610203607e-06, "loss": 0.3185, "step": 7250 }, { "epoch": 1.0397189561227416, "grad_norm": 0.2942468822002411, "learning_rate": 8.25989359321e-06, "loss": 0.298, "step": 7251 }, { "epoch": 1.0398623458560368, "grad_norm": 0.2995753586292267, "learning_rate": 8.259260993633506e-06, "loss": 0.3186, "step": 7252 }, { "epoch": 1.0400057355893317, "grad_norm": 0.2940175235271454, "learning_rate": 8.258628303324203e-06, "loss": 0.3139, "step": 7253 }, { "epoch": 1.040149125322627, "grad_norm": 0.2870255410671234, "learning_rate": 8.257995522299698e-06, "loss": 0.3228, "step": 7254 }, { "epoch": 1.040292515055922, "grad_norm": 0.30804646015167236, "learning_rate": 8.25736265057761e-06, "loss": 0.3408, "step": 7255 }, { "epoch": 1.040435904789217, "grad_norm": 0.28137779235839844, "learning_rate": 8.256729688175556e-06, "loss": 0.3168, "step": 7256 }, { "epoch": 1.0405792945225123, "grad_norm": 0.29797422885894775, "learning_rate": 8.256096635111158e-06, "loss": 0.3196, "step": 7257 }, { "epoch": 1.0407226842558073, "grad_norm": 0.27837935090065, "learning_rate": 8.255463491402039e-06, "loss": 0.3232, "step": 7258 }, { "epoch": 1.0408660739891025, "grad_norm": 0.32413747906684875, "learning_rate": 8.254830257065822e-06, "loss": 0.3251, "step": 7259 }, { "epoch": 1.0410094637223974, "grad_norm": 0.3191518187522888, "learning_rate": 8.25419693212014e-06, "loss": 0.3113, "step": 7260 }, { "epoch": 1.0411528534556926, "grad_norm": 0.2963589131832123, "learning_rate": 8.253563516582622e-06, "loss": 0.3145, "step": 7261 }, { "epoch": 1.0412962431889876, "grad_norm": 0.2749924063682556, "learning_rate": 8.252930010470898e-06, "loss": 0.2936, "step": 7262 }, { "epoch": 1.0414396329222828, "grad_norm": 0.31389474868774414, "learning_rate": 8.252296413802608e-06, "loss": 0.3094, "step": 7263 }, { "epoch": 1.0415830226555778, "grad_norm": 0.2931077480316162, "learning_rate": 8.251662726595386e-06, "loss": 0.2968, "step": 7264 }, { "epoch": 1.041726412388873, "grad_norm": 0.2913682758808136, "learning_rate": 8.25102894886688e-06, "loss": 0.3331, "step": 7265 }, { "epoch": 1.041869802122168, "grad_norm": 0.292202889919281, "learning_rate": 8.250395080634726e-06, "loss": 0.3035, "step": 7266 }, { "epoch": 1.0420131918554632, "grad_norm": 0.2845071256160736, "learning_rate": 8.249761121916573e-06, "loss": 0.3064, "step": 7267 }, { "epoch": 1.0421565815887583, "grad_norm": 0.29919812083244324, "learning_rate": 8.249127072730072e-06, "loss": 0.3247, "step": 7268 }, { "epoch": 1.0422999713220533, "grad_norm": 0.28098201751708984, "learning_rate": 8.248492933092869e-06, "loss": 0.3151, "step": 7269 }, { "epoch": 1.0424433610553485, "grad_norm": 0.2939635217189789, "learning_rate": 8.247858703022621e-06, "loss": 0.3082, "step": 7270 }, { "epoch": 1.0425867507886435, "grad_norm": 0.30671390891075134, "learning_rate": 8.247224382536983e-06, "loss": 0.309, "step": 7271 }, { "epoch": 1.0427301405219387, "grad_norm": 0.31728386878967285, "learning_rate": 8.246589971653613e-06, "loss": 0.3032, "step": 7272 }, { "epoch": 1.0428735302552337, "grad_norm": 0.30312255024909973, "learning_rate": 8.245955470390171e-06, "loss": 0.3108, "step": 7273 }, { "epoch": 1.0430169199885289, "grad_norm": 0.2856906056404114, "learning_rate": 8.245320878764323e-06, "loss": 0.3035, "step": 7274 }, { "epoch": 1.0431603097218238, "grad_norm": 0.33389168977737427, "learning_rate": 8.244686196793734e-06, "loss": 0.3138, "step": 7275 }, { "epoch": 1.043303699455119, "grad_norm": 0.31550413370132446, "learning_rate": 8.244051424496073e-06, "loss": 0.3054, "step": 7276 }, { "epoch": 1.043447089188414, "grad_norm": 0.2897643744945526, "learning_rate": 8.243416561889011e-06, "loss": 0.3052, "step": 7277 }, { "epoch": 1.0435904789217092, "grad_norm": 0.29877254366874695, "learning_rate": 8.24278160899022e-06, "loss": 0.3165, "step": 7278 }, { "epoch": 1.0437338686550044, "grad_norm": 0.2991936206817627, "learning_rate": 8.242146565817378e-06, "loss": 0.3221, "step": 7279 }, { "epoch": 1.0438772583882994, "grad_norm": 0.28514227271080017, "learning_rate": 8.241511432388164e-06, "loss": 0.2928, "step": 7280 }, { "epoch": 1.0440206481215946, "grad_norm": 0.28164637088775635, "learning_rate": 8.240876208720257e-06, "loss": 0.3003, "step": 7281 }, { "epoch": 1.0441640378548895, "grad_norm": 0.32119834423065186, "learning_rate": 8.240240894831339e-06, "loss": 0.3168, "step": 7282 }, { "epoch": 1.0443074275881847, "grad_norm": 0.3140645921230316, "learning_rate": 8.239605490739104e-06, "loss": 0.2991, "step": 7283 }, { "epoch": 1.0444508173214797, "grad_norm": 0.31261882185935974, "learning_rate": 8.238969996461232e-06, "loss": 0.3236, "step": 7284 }, { "epoch": 1.044594207054775, "grad_norm": 0.3334614634513855, "learning_rate": 8.238334412015418e-06, "loss": 0.3309, "step": 7285 }, { "epoch": 1.0447375967880699, "grad_norm": 0.3445146679878235, "learning_rate": 8.237698737419357e-06, "loss": 0.3293, "step": 7286 }, { "epoch": 1.044880986521365, "grad_norm": 0.34346476197242737, "learning_rate": 8.237062972690743e-06, "loss": 0.2957, "step": 7287 }, { "epoch": 1.0450243762546603, "grad_norm": 0.3271704912185669, "learning_rate": 8.236427117847277e-06, "loss": 0.2903, "step": 7288 }, { "epoch": 1.0451677659879552, "grad_norm": 0.29666218161582947, "learning_rate": 8.235791172906657e-06, "loss": 0.3035, "step": 7289 }, { "epoch": 1.0453111557212504, "grad_norm": 0.3107755184173584, "learning_rate": 8.235155137886588e-06, "loss": 0.3136, "step": 7290 }, { "epoch": 1.0454545454545454, "grad_norm": 0.3433128893375397, "learning_rate": 8.234519012804777e-06, "loss": 0.3192, "step": 7291 }, { "epoch": 1.0455979351878406, "grad_norm": 0.3139314353466034, "learning_rate": 8.233882797678931e-06, "loss": 0.3105, "step": 7292 }, { "epoch": 1.0457413249211356, "grad_norm": 0.2954988479614258, "learning_rate": 8.233246492526765e-06, "loss": 0.3152, "step": 7293 }, { "epoch": 1.0458847146544308, "grad_norm": 0.2984699308872223, "learning_rate": 8.232610097365989e-06, "loss": 0.3132, "step": 7294 }, { "epoch": 1.0460281043877258, "grad_norm": 0.29381728172302246, "learning_rate": 8.231973612214321e-06, "loss": 0.3013, "step": 7295 }, { "epoch": 1.046171494121021, "grad_norm": 0.300020307302475, "learning_rate": 8.23133703708948e-06, "loss": 0.3235, "step": 7296 }, { "epoch": 1.046314883854316, "grad_norm": 0.3170226514339447, "learning_rate": 8.230700372009183e-06, "loss": 0.299, "step": 7297 }, { "epoch": 1.0464582735876111, "grad_norm": 0.30579280853271484, "learning_rate": 8.230063616991162e-06, "loss": 0.2918, "step": 7298 }, { "epoch": 1.0466016633209063, "grad_norm": 0.3103879392147064, "learning_rate": 8.229426772053136e-06, "loss": 0.3315, "step": 7299 }, { "epoch": 1.0467450530542013, "grad_norm": 0.3023138642311096, "learning_rate": 8.228789837212837e-06, "loss": 0.3091, "step": 7300 }, { "epoch": 1.0468884427874965, "grad_norm": 0.2758793234825134, "learning_rate": 8.228152812487997e-06, "loss": 0.3057, "step": 7301 }, { "epoch": 1.0470318325207915, "grad_norm": 0.296820729970932, "learning_rate": 8.227515697896347e-06, "loss": 0.3014, "step": 7302 }, { "epoch": 1.0471752222540867, "grad_norm": 0.3070659935474396, "learning_rate": 8.226878493455626e-06, "loss": 0.3129, "step": 7303 }, { "epoch": 1.0473186119873816, "grad_norm": 0.26919102668762207, "learning_rate": 8.22624119918357e-06, "loss": 0.3035, "step": 7304 }, { "epoch": 1.0474620017206768, "grad_norm": 0.32442858815193176, "learning_rate": 8.225603815097923e-06, "loss": 0.321, "step": 7305 }, { "epoch": 1.0476053914539718, "grad_norm": 0.27928003668785095, "learning_rate": 8.224966341216427e-06, "loss": 0.33, "step": 7306 }, { "epoch": 1.047748781187267, "grad_norm": 0.2818560302257538, "learning_rate": 8.22432877755683e-06, "loss": 0.3143, "step": 7307 }, { "epoch": 1.0478921709205622, "grad_norm": 0.2919258177280426, "learning_rate": 8.223691124136879e-06, "loss": 0.3051, "step": 7308 }, { "epoch": 1.0480355606538572, "grad_norm": 0.30630409717559814, "learning_rate": 8.223053380974326e-06, "loss": 0.3188, "step": 7309 }, { "epoch": 1.0481789503871524, "grad_norm": 0.2768230438232422, "learning_rate": 8.222415548086926e-06, "loss": 0.2942, "step": 7310 }, { "epoch": 1.0483223401204473, "grad_norm": 0.29700982570648193, "learning_rate": 8.221777625492434e-06, "loss": 0.3368, "step": 7311 }, { "epoch": 1.0484657298537425, "grad_norm": 0.3260044455528259, "learning_rate": 8.221139613208606e-06, "loss": 0.3081, "step": 7312 }, { "epoch": 1.0486091195870375, "grad_norm": 0.29179462790489197, "learning_rate": 8.22050151125321e-06, "loss": 0.2878, "step": 7313 }, { "epoch": 1.0487525093203327, "grad_norm": 0.27222901582717896, "learning_rate": 8.219863319644003e-06, "loss": 0.2827, "step": 7314 }, { "epoch": 1.0488958990536277, "grad_norm": 0.29109543561935425, "learning_rate": 8.219225038398757e-06, "loss": 0.3152, "step": 7315 }, { "epoch": 1.0490392887869229, "grad_norm": 0.30595850944519043, "learning_rate": 8.218586667535237e-06, "loss": 0.3342, "step": 7316 }, { "epoch": 1.0491826785202178, "grad_norm": 0.31676122546195984, "learning_rate": 8.217948207071215e-06, "loss": 0.3128, "step": 7317 }, { "epoch": 1.049326068253513, "grad_norm": 0.30608320236206055, "learning_rate": 8.217309657024465e-06, "loss": 0.3196, "step": 7318 }, { "epoch": 1.0494694579868082, "grad_norm": 0.3057711124420166, "learning_rate": 8.216671017412762e-06, "loss": 0.3287, "step": 7319 }, { "epoch": 1.0496128477201032, "grad_norm": 0.3514714539051056, "learning_rate": 8.216032288253887e-06, "loss": 0.306, "step": 7320 }, { "epoch": 1.0497562374533984, "grad_norm": 0.2953621745109558, "learning_rate": 8.215393469565622e-06, "loss": 0.318, "step": 7321 }, { "epoch": 1.0498996271866934, "grad_norm": 0.29887986183166504, "learning_rate": 8.214754561365748e-06, "loss": 0.3151, "step": 7322 }, { "epoch": 1.0500430169199886, "grad_norm": 0.36117514967918396, "learning_rate": 8.214115563672052e-06, "loss": 0.3253, "step": 7323 }, { "epoch": 1.0501864066532836, "grad_norm": 0.3582776188850403, "learning_rate": 8.213476476502323e-06, "loss": 0.3303, "step": 7324 }, { "epoch": 1.0503297963865788, "grad_norm": 0.30795833468437195, "learning_rate": 8.212837299874353e-06, "loss": 0.323, "step": 7325 }, { "epoch": 1.0504731861198737, "grad_norm": 0.30929166078567505, "learning_rate": 8.212198033805932e-06, "loss": 0.3287, "step": 7326 }, { "epoch": 1.050616575853169, "grad_norm": 0.341994047164917, "learning_rate": 8.211558678314861e-06, "loss": 0.3173, "step": 7327 }, { "epoch": 1.050759965586464, "grad_norm": 0.2845431864261627, "learning_rate": 8.210919233418938e-06, "loss": 0.2932, "step": 7328 }, { "epoch": 1.050903355319759, "grad_norm": 0.31213632225990295, "learning_rate": 8.21027969913596e-06, "loss": 0.3189, "step": 7329 }, { "epoch": 1.0510467450530543, "grad_norm": 0.29771846532821655, "learning_rate": 8.209640075483735e-06, "loss": 0.3147, "step": 7330 }, { "epoch": 1.0511901347863493, "grad_norm": 0.32144567370414734, "learning_rate": 8.209000362480068e-06, "loss": 0.3262, "step": 7331 }, { "epoch": 1.0513335245196445, "grad_norm": 0.26971766352653503, "learning_rate": 8.208360560142768e-06, "loss": 0.3007, "step": 7332 }, { "epoch": 1.0514769142529394, "grad_norm": 0.3096480369567871, "learning_rate": 8.207720668489643e-06, "loss": 0.3329, "step": 7333 }, { "epoch": 1.0516203039862346, "grad_norm": 0.29450708627700806, "learning_rate": 8.207080687538512e-06, "loss": 0.291, "step": 7334 }, { "epoch": 1.0517636937195296, "grad_norm": 0.2859841585159302, "learning_rate": 8.206440617307186e-06, "loss": 0.2975, "step": 7335 }, { "epoch": 1.0519070834528248, "grad_norm": 0.31117233633995056, "learning_rate": 8.205800457813483e-06, "loss": 0.3226, "step": 7336 }, { "epoch": 1.0520504731861198, "grad_norm": 0.29077690839767456, "learning_rate": 8.20516020907523e-06, "loss": 0.3085, "step": 7337 }, { "epoch": 1.052193862919415, "grad_norm": 0.2847343683242798, "learning_rate": 8.204519871110248e-06, "loss": 0.3257, "step": 7338 }, { "epoch": 1.0523372526527102, "grad_norm": 0.29750415682792664, "learning_rate": 8.203879443936361e-06, "loss": 0.3221, "step": 7339 }, { "epoch": 1.0524806423860051, "grad_norm": 0.28985080122947693, "learning_rate": 8.2032389275714e-06, "loss": 0.3153, "step": 7340 }, { "epoch": 1.0526240321193003, "grad_norm": 0.29920387268066406, "learning_rate": 8.202598322033193e-06, "loss": 0.3205, "step": 7341 }, { "epoch": 1.0527674218525953, "grad_norm": 0.3091128170490265, "learning_rate": 8.201957627339576e-06, "loss": 0.2996, "step": 7342 }, { "epoch": 1.0529108115858905, "grad_norm": 0.3114207684993744, "learning_rate": 8.201316843508385e-06, "loss": 0.3426, "step": 7343 }, { "epoch": 1.0530542013191855, "grad_norm": 0.2893136143684387, "learning_rate": 8.200675970557457e-06, "loss": 0.3002, "step": 7344 }, { "epoch": 1.0531975910524807, "grad_norm": 0.3009442090988159, "learning_rate": 8.200035008504633e-06, "loss": 0.3091, "step": 7345 }, { "epoch": 1.0533409807857756, "grad_norm": 0.2856327295303345, "learning_rate": 8.199393957367756e-06, "loss": 0.2961, "step": 7346 }, { "epoch": 1.0534843705190708, "grad_norm": 0.2992154359817505, "learning_rate": 8.198752817164674e-06, "loss": 0.3057, "step": 7347 }, { "epoch": 1.053627760252366, "grad_norm": 0.28364214301109314, "learning_rate": 8.198111587913236e-06, "loss": 0.3064, "step": 7348 }, { "epoch": 1.053771149985661, "grad_norm": 0.28208595514297485, "learning_rate": 8.197470269631289e-06, "loss": 0.3176, "step": 7349 }, { "epoch": 1.0539145397189562, "grad_norm": 0.2881166934967041, "learning_rate": 8.196828862336687e-06, "loss": 0.3191, "step": 7350 }, { "epoch": 1.0540579294522512, "grad_norm": 0.30027180910110474, "learning_rate": 8.19618736604729e-06, "loss": 0.2938, "step": 7351 }, { "epoch": 1.0542013191855464, "grad_norm": 0.3029075562953949, "learning_rate": 8.195545780780952e-06, "loss": 0.3123, "step": 7352 }, { "epoch": 1.0543447089188414, "grad_norm": 0.29934167861938477, "learning_rate": 8.194904106555535e-06, "loss": 0.3013, "step": 7353 }, { "epoch": 1.0544880986521366, "grad_norm": 0.3014160692691803, "learning_rate": 8.194262343388902e-06, "loss": 0.3032, "step": 7354 }, { "epoch": 1.0546314883854315, "grad_norm": 0.3108117878437042, "learning_rate": 8.193620491298918e-06, "loss": 0.3157, "step": 7355 }, { "epoch": 1.0547748781187267, "grad_norm": 0.30415424704551697, "learning_rate": 8.192978550303456e-06, "loss": 0.3324, "step": 7356 }, { "epoch": 1.0549182678520217, "grad_norm": 0.31168851256370544, "learning_rate": 8.192336520420381e-06, "loss": 0.3019, "step": 7357 }, { "epoch": 1.055061657585317, "grad_norm": 0.3443652391433716, "learning_rate": 8.191694401667567e-06, "loss": 0.3125, "step": 7358 }, { "epoch": 1.055205047318612, "grad_norm": 0.32300981879234314, "learning_rate": 8.191052194062892e-06, "loss": 0.3269, "step": 7359 }, { "epoch": 1.055348437051907, "grad_norm": 0.2708694636821747, "learning_rate": 8.190409897624231e-06, "loss": 0.3132, "step": 7360 }, { "epoch": 1.0554918267852023, "grad_norm": 0.3163621723651886, "learning_rate": 8.189767512369468e-06, "loss": 0.3198, "step": 7361 }, { "epoch": 1.0556352165184972, "grad_norm": 0.3204878568649292, "learning_rate": 8.189125038316483e-06, "loss": 0.3323, "step": 7362 }, { "epoch": 1.0557786062517924, "grad_norm": 0.32523635029792786, "learning_rate": 8.188482475483163e-06, "loss": 0.302, "step": 7363 }, { "epoch": 1.0559219959850874, "grad_norm": 0.307756632566452, "learning_rate": 8.187839823887397e-06, "loss": 0.3113, "step": 7364 }, { "epoch": 1.0560653857183826, "grad_norm": 0.30104172229766846, "learning_rate": 8.187197083547073e-06, "loss": 0.2996, "step": 7365 }, { "epoch": 1.0562087754516776, "grad_norm": 0.3093399703502655, "learning_rate": 8.186554254480085e-06, "loss": 0.3206, "step": 7366 }, { "epoch": 1.0563521651849728, "grad_norm": 0.28769513964653015, "learning_rate": 8.185911336704329e-06, "loss": 0.3172, "step": 7367 }, { "epoch": 1.0564955549182677, "grad_norm": 0.29319167137145996, "learning_rate": 8.1852683302377e-06, "loss": 0.3297, "step": 7368 }, { "epoch": 1.056638944651563, "grad_norm": 0.2655715048313141, "learning_rate": 8.184625235098104e-06, "loss": 0.3055, "step": 7369 }, { "epoch": 1.0567823343848581, "grad_norm": 0.2982572615146637, "learning_rate": 8.183982051303438e-06, "loss": 0.3462, "step": 7370 }, { "epoch": 1.056925724118153, "grad_norm": 0.3080337643623352, "learning_rate": 8.18333877887161e-06, "loss": 0.3372, "step": 7371 }, { "epoch": 1.0570691138514483, "grad_norm": 0.31081506609916687, "learning_rate": 8.182695417820528e-06, "loss": 0.3166, "step": 7372 }, { "epoch": 1.0572125035847433, "grad_norm": 0.2868684232234955, "learning_rate": 8.1820519681681e-06, "loss": 0.2842, "step": 7373 }, { "epoch": 1.0573558933180385, "grad_norm": 0.30478933453559875, "learning_rate": 8.181408429932241e-06, "loss": 0.3192, "step": 7374 }, { "epoch": 1.0574992830513334, "grad_norm": 0.28833916783332825, "learning_rate": 8.180764803130865e-06, "loss": 0.3108, "step": 7375 }, { "epoch": 1.0576426727846286, "grad_norm": 0.3026372790336609, "learning_rate": 8.18012108778189e-06, "loss": 0.3117, "step": 7376 }, { "epoch": 1.0577860625179236, "grad_norm": 0.3029162585735321, "learning_rate": 8.179477283903237e-06, "loss": 0.301, "step": 7377 }, { "epoch": 1.0579294522512188, "grad_norm": 0.30303487181663513, "learning_rate": 8.178833391512825e-06, "loss": 0.3044, "step": 7378 }, { "epoch": 1.058072841984514, "grad_norm": 0.29461991786956787, "learning_rate": 8.178189410628583e-06, "loss": 0.3111, "step": 7379 }, { "epoch": 1.058216231717809, "grad_norm": 0.30680230259895325, "learning_rate": 8.177545341268437e-06, "loss": 0.3195, "step": 7380 }, { "epoch": 1.0583596214511042, "grad_norm": 0.3007601797580719, "learning_rate": 8.176901183450315e-06, "loss": 0.3188, "step": 7381 }, { "epoch": 1.0585030111843992, "grad_norm": 0.2997916042804718, "learning_rate": 8.176256937192154e-06, "loss": 0.3237, "step": 7382 }, { "epoch": 1.0586464009176944, "grad_norm": 0.29868414998054504, "learning_rate": 8.175612602511884e-06, "loss": 0.3247, "step": 7383 }, { "epoch": 1.0587897906509893, "grad_norm": 0.288354754447937, "learning_rate": 8.174968179427443e-06, "loss": 0.3386, "step": 7384 }, { "epoch": 1.0589331803842845, "grad_norm": 0.3361682593822479, "learning_rate": 8.174323667956774e-06, "loss": 0.3162, "step": 7385 }, { "epoch": 1.0590765701175795, "grad_norm": 0.30108630657196045, "learning_rate": 8.173679068117814e-06, "loss": 0.2914, "step": 7386 }, { "epoch": 1.0592199598508747, "grad_norm": 0.31365856528282166, "learning_rate": 8.173034379928513e-06, "loss": 0.3092, "step": 7387 }, { "epoch": 1.0593633495841697, "grad_norm": 0.3386772572994232, "learning_rate": 8.172389603406817e-06, "loss": 0.3335, "step": 7388 }, { "epoch": 1.0595067393174649, "grad_norm": 0.32759571075439453, "learning_rate": 8.171744738570674e-06, "loss": 0.311, "step": 7389 }, { "epoch": 1.05965012905076, "grad_norm": 0.325664758682251, "learning_rate": 8.171099785438035e-06, "loss": 0.3213, "step": 7390 }, { "epoch": 1.059793518784055, "grad_norm": 0.30743640661239624, "learning_rate": 8.170454744026856e-06, "loss": 0.3226, "step": 7391 }, { "epoch": 1.0599369085173502, "grad_norm": 0.3339540660381317, "learning_rate": 8.169809614355096e-06, "loss": 0.3426, "step": 7392 }, { "epoch": 1.0600802982506452, "grad_norm": 0.3054848611354828, "learning_rate": 8.169164396440711e-06, "loss": 0.3103, "step": 7393 }, { "epoch": 1.0602236879839404, "grad_norm": 0.3065137267112732, "learning_rate": 8.168519090301666e-06, "loss": 0.3016, "step": 7394 }, { "epoch": 1.0603670777172354, "grad_norm": 0.31090518832206726, "learning_rate": 8.167873695955922e-06, "loss": 0.3074, "step": 7395 }, { "epoch": 1.0605104674505306, "grad_norm": 0.3695281744003296, "learning_rate": 8.167228213421447e-06, "loss": 0.3149, "step": 7396 }, { "epoch": 1.0606538571838255, "grad_norm": 0.2818800210952759, "learning_rate": 8.166582642716211e-06, "loss": 0.3138, "step": 7397 }, { "epoch": 1.0607972469171207, "grad_norm": 0.3080591559410095, "learning_rate": 8.165936983858185e-06, "loss": 0.3037, "step": 7398 }, { "epoch": 1.060940636650416, "grad_norm": 0.3152073621749878, "learning_rate": 8.165291236865345e-06, "loss": 0.3273, "step": 7399 }, { "epoch": 1.061084026383711, "grad_norm": 0.32015445828437805, "learning_rate": 8.164645401755665e-06, "loss": 0.3234, "step": 7400 }, { "epoch": 1.061227416117006, "grad_norm": 0.2987867593765259, "learning_rate": 8.163999478547124e-06, "loss": 0.3368, "step": 7401 }, { "epoch": 1.061370805850301, "grad_norm": 0.28849169611930847, "learning_rate": 8.163353467257708e-06, "loss": 0.331, "step": 7402 }, { "epoch": 1.0615141955835963, "grad_norm": 0.29482796788215637, "learning_rate": 8.162707367905394e-06, "loss": 0.3018, "step": 7403 }, { "epoch": 1.0616575853168913, "grad_norm": 0.2945885956287384, "learning_rate": 8.162061180508174e-06, "loss": 0.313, "step": 7404 }, { "epoch": 1.0618009750501864, "grad_norm": 0.30257880687713623, "learning_rate": 8.161414905084035e-06, "loss": 0.3307, "step": 7405 }, { "epoch": 1.0619443647834814, "grad_norm": 0.30004215240478516, "learning_rate": 8.160768541650967e-06, "loss": 0.2982, "step": 7406 }, { "epoch": 1.0620877545167766, "grad_norm": 0.3078629672527313, "learning_rate": 8.160122090226968e-06, "loss": 0.3299, "step": 7407 }, { "epoch": 1.0622311442500716, "grad_norm": 0.2959495186805725, "learning_rate": 8.159475550830027e-06, "loss": 0.3241, "step": 7408 }, { "epoch": 1.0623745339833668, "grad_norm": 0.2827951908111572, "learning_rate": 8.158828923478149e-06, "loss": 0.3244, "step": 7409 }, { "epoch": 1.062517923716662, "grad_norm": 0.2969024181365967, "learning_rate": 8.158182208189333e-06, "loss": 0.3121, "step": 7410 }, { "epoch": 1.062661313449957, "grad_norm": 0.29506438970565796, "learning_rate": 8.157535404981582e-06, "loss": 0.3029, "step": 7411 }, { "epoch": 1.0628047031832522, "grad_norm": 0.31237077713012695, "learning_rate": 8.156888513872902e-06, "loss": 0.3415, "step": 7412 }, { "epoch": 1.0629480929165471, "grad_norm": 0.2815364599227905, "learning_rate": 8.156241534881305e-06, "loss": 0.3038, "step": 7413 }, { "epoch": 1.0630914826498423, "grad_norm": 0.31036219000816345, "learning_rate": 8.155594468024796e-06, "loss": 0.3101, "step": 7414 }, { "epoch": 1.0632348723831373, "grad_norm": 0.2829204201698303, "learning_rate": 8.154947313321393e-06, "loss": 0.3301, "step": 7415 }, { "epoch": 1.0633782621164325, "grad_norm": 0.30848732590675354, "learning_rate": 8.154300070789107e-06, "loss": 0.2993, "step": 7416 }, { "epoch": 1.0635216518497275, "grad_norm": 0.3125418722629547, "learning_rate": 8.153652740445962e-06, "loss": 0.299, "step": 7417 }, { "epoch": 1.0636650415830227, "grad_norm": 0.30690526962280273, "learning_rate": 8.153005322309975e-06, "loss": 0.3052, "step": 7418 }, { "epoch": 1.0638084313163176, "grad_norm": 0.28965330123901367, "learning_rate": 8.15235781639917e-06, "loss": 0.31, "step": 7419 }, { "epoch": 1.0639518210496128, "grad_norm": 0.3212681710720062, "learning_rate": 8.151710222731573e-06, "loss": 0.3278, "step": 7420 }, { "epoch": 1.064095210782908, "grad_norm": 0.2882057726383209, "learning_rate": 8.151062541325213e-06, "loss": 0.3333, "step": 7421 }, { "epoch": 1.064238600516203, "grad_norm": 0.3087922930717468, "learning_rate": 8.150414772198116e-06, "loss": 0.3294, "step": 7422 }, { "epoch": 1.0643819902494982, "grad_norm": 0.284204363822937, "learning_rate": 8.14976691536832e-06, "loss": 0.293, "step": 7423 }, { "epoch": 1.0645253799827932, "grad_norm": 0.3009592890739441, "learning_rate": 8.149118970853857e-06, "loss": 0.3007, "step": 7424 }, { "epoch": 1.0646687697160884, "grad_norm": 0.2903840243816376, "learning_rate": 8.148470938672767e-06, "loss": 0.3054, "step": 7425 }, { "epoch": 1.0648121594493833, "grad_norm": 0.2943129241466522, "learning_rate": 8.14782281884309e-06, "loss": 0.3292, "step": 7426 }, { "epoch": 1.0649555491826785, "grad_norm": 0.3147493302822113, "learning_rate": 8.147174611382867e-06, "loss": 0.3084, "step": 7427 }, { "epoch": 1.0650989389159735, "grad_norm": 0.28923243284225464, "learning_rate": 8.146526316310145e-06, "loss": 0.3284, "step": 7428 }, { "epoch": 1.0652423286492687, "grad_norm": 0.28688111901283264, "learning_rate": 8.145877933642969e-06, "loss": 0.3042, "step": 7429 }, { "epoch": 1.065385718382564, "grad_norm": 0.2977297902107239, "learning_rate": 8.145229463399391e-06, "loss": 0.3092, "step": 7430 }, { "epoch": 1.0655291081158589, "grad_norm": 0.2853100299835205, "learning_rate": 8.144580905597463e-06, "loss": 0.296, "step": 7431 }, { "epoch": 1.065672497849154, "grad_norm": 0.29113948345184326, "learning_rate": 8.143932260255241e-06, "loss": 0.3094, "step": 7432 }, { "epoch": 1.065815887582449, "grad_norm": 0.2880752384662628, "learning_rate": 8.14328352739078e-06, "loss": 0.3205, "step": 7433 }, { "epoch": 1.0659592773157442, "grad_norm": 0.28545188903808594, "learning_rate": 8.142634707022142e-06, "loss": 0.3301, "step": 7434 }, { "epoch": 1.0661026670490392, "grad_norm": 0.3082984387874603, "learning_rate": 8.141985799167386e-06, "loss": 0.3414, "step": 7435 }, { "epoch": 1.0662460567823344, "grad_norm": 0.29118096828460693, "learning_rate": 8.141336803844582e-06, "loss": 0.2919, "step": 7436 }, { "epoch": 1.0663894465156294, "grad_norm": 0.29466235637664795, "learning_rate": 8.140687721071792e-06, "loss": 0.3068, "step": 7437 }, { "epoch": 1.0665328362489246, "grad_norm": 0.36433884501457214, "learning_rate": 8.140038550867087e-06, "loss": 0.3436, "step": 7438 }, { "epoch": 1.0666762259822198, "grad_norm": 0.2966328561306, "learning_rate": 8.13938929324854e-06, "loss": 0.3001, "step": 7439 }, { "epoch": 1.0668196157155148, "grad_norm": 0.29633620381355286, "learning_rate": 8.138739948234222e-06, "loss": 0.3226, "step": 7440 }, { "epoch": 1.06696300544881, "grad_norm": 0.3010687232017517, "learning_rate": 8.138090515842211e-06, "loss": 0.3217, "step": 7441 }, { "epoch": 1.067106395182105, "grad_norm": 0.31112655997276306, "learning_rate": 8.13744099609059e-06, "loss": 0.3189, "step": 7442 }, { "epoch": 1.0672497849154001, "grad_norm": 0.28591132164001465, "learning_rate": 8.136791388997438e-06, "loss": 0.3105, "step": 7443 }, { "epoch": 1.067393174648695, "grad_norm": 0.31041017174720764, "learning_rate": 8.136141694580839e-06, "loss": 0.3358, "step": 7444 }, { "epoch": 1.0675365643819903, "grad_norm": 0.2912956476211548, "learning_rate": 8.13549191285888e-06, "loss": 0.317, "step": 7445 }, { "epoch": 1.0676799541152853, "grad_norm": 0.2924569547176361, "learning_rate": 8.134842043849647e-06, "loss": 0.3147, "step": 7446 }, { "epoch": 1.0678233438485805, "grad_norm": 0.27744176983833313, "learning_rate": 8.134192087571235e-06, "loss": 0.3244, "step": 7447 }, { "epoch": 1.0679667335818754, "grad_norm": 0.3140690326690674, "learning_rate": 8.133542044041738e-06, "loss": 0.332, "step": 7448 }, { "epoch": 1.0681101233151706, "grad_norm": 0.3096996247768402, "learning_rate": 8.13289191327925e-06, "loss": 0.3161, "step": 7449 }, { "epoch": 1.0682535130484658, "grad_norm": 0.297196626663208, "learning_rate": 8.13224169530187e-06, "loss": 0.3388, "step": 7450 }, { "epoch": 1.0683969027817608, "grad_norm": 0.3146407902240753, "learning_rate": 8.1315913901277e-06, "loss": 0.3162, "step": 7451 }, { "epoch": 1.068540292515056, "grad_norm": 0.2815333306789398, "learning_rate": 8.130940997774844e-06, "loss": 0.3129, "step": 7452 }, { "epoch": 1.068683682248351, "grad_norm": 0.29810887575149536, "learning_rate": 8.130290518261405e-06, "loss": 0.3033, "step": 7453 }, { "epoch": 1.0688270719816462, "grad_norm": 0.2969380021095276, "learning_rate": 8.129639951605493e-06, "loss": 0.3005, "step": 7454 }, { "epoch": 1.0689704617149411, "grad_norm": 0.3038579821586609, "learning_rate": 8.12898929782522e-06, "loss": 0.3189, "step": 7455 }, { "epoch": 1.0691138514482363, "grad_norm": 0.29724740982055664, "learning_rate": 8.128338556938698e-06, "loss": 0.3064, "step": 7456 }, { "epoch": 1.0692572411815313, "grad_norm": 0.3006698489189148, "learning_rate": 8.127687728964043e-06, "loss": 0.3217, "step": 7457 }, { "epoch": 1.0694006309148265, "grad_norm": 0.3226529657840729, "learning_rate": 8.127036813919374e-06, "loss": 0.2977, "step": 7458 }, { "epoch": 1.0695440206481215, "grad_norm": 0.30298224091529846, "learning_rate": 8.126385811822807e-06, "loss": 0.3262, "step": 7459 }, { "epoch": 1.0696874103814167, "grad_norm": 0.308040589094162, "learning_rate": 8.125734722692472e-06, "loss": 0.304, "step": 7460 }, { "epoch": 1.0698308001147119, "grad_norm": 0.28050512075424194, "learning_rate": 8.125083546546491e-06, "loss": 0.3121, "step": 7461 }, { "epoch": 1.0699741898480069, "grad_norm": 0.2988615930080414, "learning_rate": 8.124432283402988e-06, "loss": 0.3233, "step": 7462 }, { "epoch": 1.070117579581302, "grad_norm": 0.31098416447639465, "learning_rate": 8.123780933280097e-06, "loss": 0.3107, "step": 7463 }, { "epoch": 1.070260969314597, "grad_norm": 0.2911243736743927, "learning_rate": 8.123129496195952e-06, "loss": 0.2958, "step": 7464 }, { "epoch": 1.0704043590478922, "grad_norm": 0.3072661757469177, "learning_rate": 8.122477972168685e-06, "loss": 0.3093, "step": 7465 }, { "epoch": 1.0705477487811872, "grad_norm": 0.3268946707248688, "learning_rate": 8.121826361216437e-06, "loss": 0.3405, "step": 7466 }, { "epoch": 1.0706911385144824, "grad_norm": 0.29830387234687805, "learning_rate": 8.12117466335734e-06, "loss": 0.3298, "step": 7467 }, { "epoch": 1.0708345282477774, "grad_norm": 0.3037364184856415, "learning_rate": 8.120522878609544e-06, "loss": 0.3328, "step": 7468 }, { "epoch": 1.0709779179810726, "grad_norm": 0.293284147977829, "learning_rate": 8.119871006991194e-06, "loss": 0.2977, "step": 7469 }, { "epoch": 1.0711213077143678, "grad_norm": 0.3324541747570038, "learning_rate": 8.119219048520433e-06, "loss": 0.3253, "step": 7470 }, { "epoch": 1.0712646974476627, "grad_norm": 0.3071695864200592, "learning_rate": 8.11856700321541e-06, "loss": 0.3237, "step": 7471 }, { "epoch": 1.071408087180958, "grad_norm": 0.28148773312568665, "learning_rate": 8.11791487109428e-06, "loss": 0.3203, "step": 7472 }, { "epoch": 1.071551476914253, "grad_norm": 0.3001682460308075, "learning_rate": 8.117262652175197e-06, "loss": 0.3282, "step": 7473 }, { "epoch": 1.071694866647548, "grad_norm": 0.34751948714256287, "learning_rate": 8.116610346476317e-06, "loss": 0.299, "step": 7474 }, { "epoch": 1.071838256380843, "grad_norm": 0.2846800684928894, "learning_rate": 8.1159579540158e-06, "loss": 0.3097, "step": 7475 }, { "epoch": 1.0719816461141383, "grad_norm": 0.34300366044044495, "learning_rate": 8.115305474811805e-06, "loss": 0.3176, "step": 7476 }, { "epoch": 1.0721250358474332, "grad_norm": 0.3243752717971802, "learning_rate": 8.1146529088825e-06, "loss": 0.309, "step": 7477 }, { "epoch": 1.0722684255807284, "grad_norm": 0.290160208940506, "learning_rate": 8.114000256246046e-06, "loss": 0.311, "step": 7478 }, { "epoch": 1.0724118153140236, "grad_norm": 0.32288363575935364, "learning_rate": 8.113347516920615e-06, "loss": 0.3219, "step": 7479 }, { "epoch": 1.0725552050473186, "grad_norm": 0.3305833339691162, "learning_rate": 8.112694690924382e-06, "loss": 0.3103, "step": 7480 }, { "epoch": 1.0726985947806138, "grad_norm": 0.31976318359375, "learning_rate": 8.112041778275514e-06, "loss": 0.3011, "step": 7481 }, { "epoch": 1.0728419845139088, "grad_norm": 0.3107026517391205, "learning_rate": 8.11138877899219e-06, "loss": 0.3128, "step": 7482 }, { "epoch": 1.072985374247204, "grad_norm": 0.29547926783561707, "learning_rate": 8.110735693092592e-06, "loss": 0.3081, "step": 7483 }, { "epoch": 1.073128763980499, "grad_norm": 0.2968624532222748, "learning_rate": 8.110082520594896e-06, "loss": 0.314, "step": 7484 }, { "epoch": 1.0732721537137941, "grad_norm": 0.2647950351238251, "learning_rate": 8.109429261517286e-06, "loss": 0.2899, "step": 7485 }, { "epoch": 1.0734155434470891, "grad_norm": 0.316177636384964, "learning_rate": 8.108775915877948e-06, "loss": 0.3214, "step": 7486 }, { "epoch": 1.0735589331803843, "grad_norm": 0.3141637444496155, "learning_rate": 8.108122483695071e-06, "loss": 0.319, "step": 7487 }, { "epoch": 1.0737023229136793, "grad_norm": 0.307818204164505, "learning_rate": 8.107468964986846e-06, "loss": 0.3128, "step": 7488 }, { "epoch": 1.0738457126469745, "grad_norm": 0.29643675684928894, "learning_rate": 8.106815359771465e-06, "loss": 0.3086, "step": 7489 }, { "epoch": 1.0739891023802697, "grad_norm": 0.3127937912940979, "learning_rate": 8.106161668067122e-06, "loss": 0.3016, "step": 7490 }, { "epoch": 1.0741324921135647, "grad_norm": 0.29506775736808777, "learning_rate": 8.10550788989202e-06, "loss": 0.3064, "step": 7491 }, { "epoch": 1.0742758818468598, "grad_norm": 0.2815675735473633, "learning_rate": 8.104854025264352e-06, "loss": 0.312, "step": 7492 }, { "epoch": 1.0744192715801548, "grad_norm": 0.3010847866535187, "learning_rate": 8.104200074202327e-06, "loss": 0.3213, "step": 7493 }, { "epoch": 1.07456266131345, "grad_norm": 0.29824331402778625, "learning_rate": 8.103546036724146e-06, "loss": 0.3199, "step": 7494 }, { "epoch": 1.074706051046745, "grad_norm": 0.2942947745323181, "learning_rate": 8.102891912848016e-06, "loss": 0.3076, "step": 7495 }, { "epoch": 1.0748494407800402, "grad_norm": 0.311026006937027, "learning_rate": 8.102237702592149e-06, "loss": 0.325, "step": 7496 }, { "epoch": 1.0749928305133352, "grad_norm": 0.2750471830368042, "learning_rate": 8.101583405974757e-06, "loss": 0.2989, "step": 7497 }, { "epoch": 1.0751362202466304, "grad_norm": 0.2946546971797943, "learning_rate": 8.100929023014053e-06, "loss": 0.3192, "step": 7498 }, { "epoch": 1.0752796099799253, "grad_norm": 0.26758044958114624, "learning_rate": 8.100274553728255e-06, "loss": 0.3147, "step": 7499 }, { "epoch": 1.0754229997132205, "grad_norm": 0.28968387842178345, "learning_rate": 8.099619998135585e-06, "loss": 0.3012, "step": 7500 }, { "epoch": 1.0755663894465157, "grad_norm": 0.30703333020210266, "learning_rate": 8.09896535625426e-06, "loss": 0.3232, "step": 7501 }, { "epoch": 1.0757097791798107, "grad_norm": 0.2847241163253784, "learning_rate": 8.098310628102509e-06, "loss": 0.3117, "step": 7502 }, { "epoch": 1.075853168913106, "grad_norm": 0.29728004336357117, "learning_rate": 8.097655813698554e-06, "loss": 0.3172, "step": 7503 }, { "epoch": 1.0759965586464009, "grad_norm": 0.2936612665653229, "learning_rate": 8.097000913060626e-06, "loss": 0.3012, "step": 7504 }, { "epoch": 1.076139948379696, "grad_norm": 0.299515038728714, "learning_rate": 8.096345926206959e-06, "loss": 0.3161, "step": 7505 }, { "epoch": 1.076283338112991, "grad_norm": 0.2994157075881958, "learning_rate": 8.095690853155781e-06, "loss": 0.3009, "step": 7506 }, { "epoch": 1.0764267278462862, "grad_norm": 0.29007020592689514, "learning_rate": 8.095035693925335e-06, "loss": 0.3073, "step": 7507 }, { "epoch": 1.0765701175795812, "grad_norm": 0.2905675172805786, "learning_rate": 8.094380448533856e-06, "loss": 0.3056, "step": 7508 }, { "epoch": 1.0767135073128764, "grad_norm": 0.2913705110549927, "learning_rate": 8.093725116999584e-06, "loss": 0.3147, "step": 7509 }, { "epoch": 1.0768568970461714, "grad_norm": 0.2794024646282196, "learning_rate": 8.093069699340765e-06, "loss": 0.3124, "step": 7510 }, { "epoch": 1.0770002867794666, "grad_norm": 0.282781183719635, "learning_rate": 8.092414195575642e-06, "loss": 0.3167, "step": 7511 }, { "epoch": 1.0771436765127618, "grad_norm": 0.31274908781051636, "learning_rate": 8.091758605722466e-06, "loss": 0.3183, "step": 7512 }, { "epoch": 1.0772870662460567, "grad_norm": 0.30435308814048767, "learning_rate": 8.091102929799486e-06, "loss": 0.3027, "step": 7513 }, { "epoch": 1.077430455979352, "grad_norm": 0.28274503350257874, "learning_rate": 8.090447167824955e-06, "loss": 0.3207, "step": 7514 }, { "epoch": 1.077573845712647, "grad_norm": 0.2984977066516876, "learning_rate": 8.08979131981713e-06, "loss": 0.3062, "step": 7515 }, { "epoch": 1.0777172354459421, "grad_norm": 0.30995574593544006, "learning_rate": 8.089135385794267e-06, "loss": 0.3044, "step": 7516 }, { "epoch": 1.077860625179237, "grad_norm": 0.2965910732746124, "learning_rate": 8.088479365774627e-06, "loss": 0.324, "step": 7517 }, { "epoch": 1.0780040149125323, "grad_norm": 0.31770431995391846, "learning_rate": 8.087823259776472e-06, "loss": 0.3046, "step": 7518 }, { "epoch": 1.0781474046458273, "grad_norm": 0.3017013370990753, "learning_rate": 8.087167067818066e-06, "loss": 0.3092, "step": 7519 }, { "epoch": 1.0782907943791225, "grad_norm": 0.2911812663078308, "learning_rate": 8.08651078991768e-06, "loss": 0.3116, "step": 7520 }, { "epoch": 1.0784341841124176, "grad_norm": 0.2929612994194031, "learning_rate": 8.085854426093582e-06, "loss": 0.2967, "step": 7521 }, { "epoch": 1.0785775738457126, "grad_norm": 0.3196532428264618, "learning_rate": 8.085197976364043e-06, "loss": 0.3393, "step": 7522 }, { "epoch": 1.0787209635790078, "grad_norm": 0.3182004988193512, "learning_rate": 8.084541440747337e-06, "loss": 0.3119, "step": 7523 }, { "epoch": 1.0788643533123028, "grad_norm": 0.29296794533729553, "learning_rate": 8.083884819261744e-06, "loss": 0.3167, "step": 7524 }, { "epoch": 1.079007743045598, "grad_norm": 0.2879837453365326, "learning_rate": 8.08322811192554e-06, "loss": 0.3095, "step": 7525 }, { "epoch": 1.079151132778893, "grad_norm": 0.3480905294418335, "learning_rate": 8.08257131875701e-06, "loss": 0.2923, "step": 7526 }, { "epoch": 1.0792945225121882, "grad_norm": 0.3304230570793152, "learning_rate": 8.081914439774435e-06, "loss": 0.3205, "step": 7527 }, { "epoch": 1.0794379122454831, "grad_norm": 0.31471771001815796, "learning_rate": 8.081257474996103e-06, "loss": 0.2956, "step": 7528 }, { "epoch": 1.0795813019787783, "grad_norm": 0.3311372697353363, "learning_rate": 8.080600424440304e-06, "loss": 0.3168, "step": 7529 }, { "epoch": 1.0797246917120735, "grad_norm": 0.3257831931114197, "learning_rate": 8.079943288125328e-06, "loss": 0.3241, "step": 7530 }, { "epoch": 1.0798680814453685, "grad_norm": 0.28049999475479126, "learning_rate": 8.079286066069467e-06, "loss": 0.3375, "step": 7531 }, { "epoch": 1.0800114711786637, "grad_norm": 0.34763264656066895, "learning_rate": 8.078628758291019e-06, "loss": 0.3232, "step": 7532 }, { "epoch": 1.0801548609119587, "grad_norm": 0.3283713459968567, "learning_rate": 8.077971364808283e-06, "loss": 0.3262, "step": 7533 }, { "epoch": 1.0802982506452539, "grad_norm": 0.2842782139778137, "learning_rate": 8.077313885639559e-06, "loss": 0.2845, "step": 7534 }, { "epoch": 1.0804416403785488, "grad_norm": 0.3088345229625702, "learning_rate": 8.076656320803149e-06, "loss": 0.3085, "step": 7535 }, { "epoch": 1.080585030111844, "grad_norm": 0.3191666007041931, "learning_rate": 8.075998670317361e-06, "loss": 0.2954, "step": 7536 }, { "epoch": 1.080728419845139, "grad_norm": 0.3372752368450165, "learning_rate": 8.075340934200501e-06, "loss": 0.3057, "step": 7537 }, { "epoch": 1.0808718095784342, "grad_norm": 0.3108564019203186, "learning_rate": 8.07468311247088e-06, "loss": 0.2965, "step": 7538 }, { "epoch": 1.0810151993117292, "grad_norm": 0.314549058675766, "learning_rate": 8.074025205146814e-06, "loss": 0.3003, "step": 7539 }, { "epoch": 1.0811585890450244, "grad_norm": 0.31770434975624084, "learning_rate": 8.073367212246612e-06, "loss": 0.332, "step": 7540 }, { "epoch": 1.0813019787783196, "grad_norm": 0.3130876421928406, "learning_rate": 8.072709133788594e-06, "loss": 0.3187, "step": 7541 }, { "epoch": 1.0814453685116145, "grad_norm": 0.29537147283554077, "learning_rate": 8.072050969791083e-06, "loss": 0.3028, "step": 7542 }, { "epoch": 1.0815887582449097, "grad_norm": 0.3395201861858368, "learning_rate": 8.071392720272397e-06, "loss": 0.3322, "step": 7543 }, { "epoch": 1.0817321479782047, "grad_norm": 0.3376358449459076, "learning_rate": 8.070734385250863e-06, "loss": 0.2896, "step": 7544 }, { "epoch": 1.0818755377115, "grad_norm": 0.27788788080215454, "learning_rate": 8.070075964744805e-06, "loss": 0.3057, "step": 7545 }, { "epoch": 1.0820189274447949, "grad_norm": 0.30821454524993896, "learning_rate": 8.069417458772557e-06, "loss": 0.3191, "step": 7546 }, { "epoch": 1.08216231717809, "grad_norm": 0.3288131654262543, "learning_rate": 8.06875886735245e-06, "loss": 0.3174, "step": 7547 }, { "epoch": 1.082305706911385, "grad_norm": 0.2939583659172058, "learning_rate": 8.068100190502814e-06, "loss": 0.2975, "step": 7548 }, { "epoch": 1.0824490966446803, "grad_norm": 0.2967148721218109, "learning_rate": 8.06744142824199e-06, "loss": 0.309, "step": 7549 }, { "epoch": 1.0825924863779752, "grad_norm": 0.30860093235969543, "learning_rate": 8.066782580588315e-06, "loss": 0.3376, "step": 7550 }, { "epoch": 1.0827358761112704, "grad_norm": 0.28842076659202576, "learning_rate": 8.06612364756013e-06, "loss": 0.3137, "step": 7551 }, { "epoch": 1.0828792658445656, "grad_norm": 0.2844552993774414, "learning_rate": 8.06546462917578e-06, "loss": 0.2988, "step": 7552 }, { "epoch": 1.0830226555778606, "grad_norm": 0.2756803333759308, "learning_rate": 8.064805525453609e-06, "loss": 0.3171, "step": 7553 }, { "epoch": 1.0831660453111558, "grad_norm": 0.2704446613788605, "learning_rate": 8.064146336411968e-06, "loss": 0.306, "step": 7554 }, { "epoch": 1.0833094350444508, "grad_norm": 0.28455662727355957, "learning_rate": 8.063487062069207e-06, "loss": 0.3164, "step": 7555 }, { "epoch": 1.083452824777746, "grad_norm": 0.2947496473789215, "learning_rate": 8.062827702443677e-06, "loss": 0.3219, "step": 7556 }, { "epoch": 1.083596214511041, "grad_norm": 0.30237704515457153, "learning_rate": 8.062168257553735e-06, "loss": 0.3033, "step": 7557 }, { "epoch": 1.0837396042443361, "grad_norm": 0.30879896879196167, "learning_rate": 8.061508727417742e-06, "loss": 0.2891, "step": 7558 }, { "epoch": 1.083882993977631, "grad_norm": 0.30222535133361816, "learning_rate": 8.060849112054053e-06, "loss": 0.3012, "step": 7559 }, { "epoch": 1.0840263837109263, "grad_norm": 0.29868659377098083, "learning_rate": 8.060189411481034e-06, "loss": 0.298, "step": 7560 }, { "epoch": 1.0841697734442215, "grad_norm": 0.32274654507637024, "learning_rate": 8.059529625717049e-06, "loss": 0.305, "step": 7561 }, { "epoch": 1.0843131631775165, "grad_norm": 0.2796581983566284, "learning_rate": 8.058869754780467e-06, "loss": 0.3181, "step": 7562 }, { "epoch": 1.0844565529108117, "grad_norm": 0.2908715009689331, "learning_rate": 8.058209798689653e-06, "loss": 0.3145, "step": 7563 }, { "epoch": 1.0845999426441066, "grad_norm": 0.31784725189208984, "learning_rate": 8.057549757462985e-06, "loss": 0.3177, "step": 7564 }, { "epoch": 1.0847433323774018, "grad_norm": 0.299551784992218, "learning_rate": 8.056889631118835e-06, "loss": 0.3251, "step": 7565 }, { "epoch": 1.0848867221106968, "grad_norm": 0.33925947546958923, "learning_rate": 8.056229419675583e-06, "loss": 0.327, "step": 7566 }, { "epoch": 1.085030111843992, "grad_norm": 0.31286847591400146, "learning_rate": 8.0555691231516e-06, "loss": 0.2988, "step": 7567 }, { "epoch": 1.085173501577287, "grad_norm": 0.28876587748527527, "learning_rate": 8.054908741565277e-06, "loss": 0.3241, "step": 7568 }, { "epoch": 1.0853168913105822, "grad_norm": 0.2782917618751526, "learning_rate": 8.054248274934993e-06, "loss": 0.3102, "step": 7569 }, { "epoch": 1.0854602810438774, "grad_norm": 0.3336745500564575, "learning_rate": 8.053587723279135e-06, "loss": 0.312, "step": 7570 }, { "epoch": 1.0856036707771723, "grad_norm": 0.2863195836544037, "learning_rate": 8.052927086616091e-06, "loss": 0.3278, "step": 7571 }, { "epoch": 1.0857470605104675, "grad_norm": 0.3163747787475586, "learning_rate": 8.052266364964253e-06, "loss": 0.3128, "step": 7572 }, { "epoch": 1.0858904502437625, "grad_norm": 0.2977108657360077, "learning_rate": 8.051605558342014e-06, "loss": 0.3168, "step": 7573 }, { "epoch": 1.0860338399770577, "grad_norm": 0.31039902567863464, "learning_rate": 8.050944666767775e-06, "loss": 0.3297, "step": 7574 }, { "epoch": 1.0861772297103527, "grad_norm": 0.30016860365867615, "learning_rate": 8.050283690259925e-06, "loss": 0.3021, "step": 7575 }, { "epoch": 1.0863206194436479, "grad_norm": 0.31223633885383606, "learning_rate": 8.049622628836872e-06, "loss": 0.3199, "step": 7576 }, { "epoch": 1.0864640091769429, "grad_norm": 0.3092620372772217, "learning_rate": 8.048961482517016e-06, "loss": 0.3071, "step": 7577 }, { "epoch": 1.086607398910238, "grad_norm": 0.33522820472717285, "learning_rate": 8.048300251318762e-06, "loss": 0.2999, "step": 7578 }, { "epoch": 1.086750788643533, "grad_norm": 0.30330926179885864, "learning_rate": 8.047638935260517e-06, "loss": 0.3074, "step": 7579 }, { "epoch": 1.0868941783768282, "grad_norm": 0.3111434280872345, "learning_rate": 8.046977534360694e-06, "loss": 0.3037, "step": 7580 }, { "epoch": 1.0870375681101234, "grad_norm": 0.30172330141067505, "learning_rate": 8.046316048637704e-06, "loss": 0.31, "step": 7581 }, { "epoch": 1.0871809578434184, "grad_norm": 0.3326817452907562, "learning_rate": 8.045654478109959e-06, "loss": 0.318, "step": 7582 }, { "epoch": 1.0873243475767136, "grad_norm": 0.30960819125175476, "learning_rate": 8.044992822795879e-06, "loss": 0.3064, "step": 7583 }, { "epoch": 1.0874677373100086, "grad_norm": 0.30339735746383667, "learning_rate": 8.044331082713884e-06, "loss": 0.3081, "step": 7584 }, { "epoch": 1.0876111270433038, "grad_norm": 0.3372074067592621, "learning_rate": 8.043669257882394e-06, "loss": 0.318, "step": 7585 }, { "epoch": 1.0877545167765987, "grad_norm": 0.32525721192359924, "learning_rate": 8.043007348319833e-06, "loss": 0.3295, "step": 7586 }, { "epoch": 1.087897906509894, "grad_norm": 0.2889457643032074, "learning_rate": 8.04234535404463e-06, "loss": 0.2866, "step": 7587 }, { "epoch": 1.088041296243189, "grad_norm": 0.31219804286956787, "learning_rate": 8.041683275075211e-06, "loss": 0.2977, "step": 7588 }, { "epoch": 1.088184685976484, "grad_norm": 0.3142346739768982, "learning_rate": 8.04102111143001e-06, "loss": 0.3212, "step": 7589 }, { "epoch": 1.088328075709779, "grad_norm": 0.31854531168937683, "learning_rate": 8.04035886312746e-06, "loss": 0.3138, "step": 7590 }, { "epoch": 1.0884714654430743, "grad_norm": 0.29048100113868713, "learning_rate": 8.039696530185993e-06, "loss": 0.311, "step": 7591 }, { "epoch": 1.0886148551763695, "grad_norm": 0.27759629487991333, "learning_rate": 8.039034112624054e-06, "loss": 0.3011, "step": 7592 }, { "epoch": 1.0887582449096644, "grad_norm": 0.2970355749130249, "learning_rate": 8.038371610460079e-06, "loss": 0.309, "step": 7593 }, { "epoch": 1.0889016346429596, "grad_norm": 0.28184112906455994, "learning_rate": 8.037709023712512e-06, "loss": 0.3206, "step": 7594 }, { "epoch": 1.0890450243762546, "grad_norm": 0.3168441653251648, "learning_rate": 8.037046352399798e-06, "loss": 0.3072, "step": 7595 }, { "epoch": 1.0891884141095498, "grad_norm": 0.317848265171051, "learning_rate": 8.036383596540385e-06, "loss": 0.3006, "step": 7596 }, { "epoch": 1.0893318038428448, "grad_norm": 0.2980131208896637, "learning_rate": 8.035720756152725e-06, "loss": 0.3099, "step": 7597 }, { "epoch": 1.08947519357614, "grad_norm": 0.29596132040023804, "learning_rate": 8.035057831255269e-06, "loss": 0.2931, "step": 7598 }, { "epoch": 1.089618583309435, "grad_norm": 0.3003843128681183, "learning_rate": 8.034394821866471e-06, "loss": 0.3409, "step": 7599 }, { "epoch": 1.0897619730427301, "grad_norm": 0.3210751712322235, "learning_rate": 8.033731728004788e-06, "loss": 0.3189, "step": 7600 }, { "epoch": 1.0899053627760251, "grad_norm": 0.34049272537231445, "learning_rate": 8.033068549688682e-06, "loss": 0.3285, "step": 7601 }, { "epoch": 1.0900487525093203, "grad_norm": 0.28177040815353394, "learning_rate": 8.032405286936614e-06, "loss": 0.3021, "step": 7602 }, { "epoch": 1.0901921422426155, "grad_norm": 0.29693350195884705, "learning_rate": 8.031741939767046e-06, "loss": 0.2999, "step": 7603 }, { "epoch": 1.0903355319759105, "grad_norm": 0.3104785978794098, "learning_rate": 8.031078508198447e-06, "loss": 0.3125, "step": 7604 }, { "epoch": 1.0904789217092057, "grad_norm": 0.34439367055892944, "learning_rate": 8.030414992249282e-06, "loss": 0.3177, "step": 7605 }, { "epoch": 1.0906223114425007, "grad_norm": 0.299039751291275, "learning_rate": 8.029751391938028e-06, "loss": 0.3173, "step": 7606 }, { "epoch": 1.0907657011757959, "grad_norm": 0.2886817753314972, "learning_rate": 8.029087707283157e-06, "loss": 0.3037, "step": 7607 }, { "epoch": 1.0909090909090908, "grad_norm": 0.3221057057380676, "learning_rate": 8.028423938303143e-06, "loss": 0.3222, "step": 7608 }, { "epoch": 1.091052480642386, "grad_norm": 0.3291594684123993, "learning_rate": 8.027760085016463e-06, "loss": 0.3102, "step": 7609 }, { "epoch": 1.091195870375681, "grad_norm": 0.27983367443084717, "learning_rate": 8.027096147441603e-06, "loss": 0.3018, "step": 7610 }, { "epoch": 1.0913392601089762, "grad_norm": 0.3083780109882355, "learning_rate": 8.02643212559704e-06, "loss": 0.3155, "step": 7611 }, { "epoch": 1.0914826498422714, "grad_norm": 0.31791970133781433, "learning_rate": 8.025768019501264e-06, "loss": 0.3221, "step": 7612 }, { "epoch": 1.0916260395755664, "grad_norm": 0.2937382459640503, "learning_rate": 8.02510382917276e-06, "loss": 0.3259, "step": 7613 }, { "epoch": 1.0917694293088616, "grad_norm": 0.3088129162788391, "learning_rate": 8.024439554630019e-06, "loss": 0.3062, "step": 7614 }, { "epoch": 1.0919128190421565, "grad_norm": 0.32701119780540466, "learning_rate": 8.023775195891534e-06, "loss": 0.3112, "step": 7615 }, { "epoch": 1.0920562087754517, "grad_norm": 0.28579550981521606, "learning_rate": 8.023110752975798e-06, "loss": 0.2933, "step": 7616 }, { "epoch": 1.0921995985087467, "grad_norm": 0.32876071333885193, "learning_rate": 8.022446225901307e-06, "loss": 0.3228, "step": 7617 }, { "epoch": 1.092342988242042, "grad_norm": 0.3104507327079773, "learning_rate": 8.021781614686566e-06, "loss": 0.3064, "step": 7618 }, { "epoch": 1.0924863779753369, "grad_norm": 0.317502498626709, "learning_rate": 8.021116919350073e-06, "loss": 0.2953, "step": 7619 }, { "epoch": 1.092629767708632, "grad_norm": 0.342532753944397, "learning_rate": 8.020452139910332e-06, "loss": 0.3004, "step": 7620 }, { "epoch": 1.0927731574419273, "grad_norm": 0.2921208143234253, "learning_rate": 8.019787276385848e-06, "loss": 0.3308, "step": 7621 }, { "epoch": 1.0929165471752222, "grad_norm": 0.27922356128692627, "learning_rate": 8.019122328795133e-06, "loss": 0.308, "step": 7622 }, { "epoch": 1.0930599369085174, "grad_norm": 0.31897106766700745, "learning_rate": 8.018457297156697e-06, "loss": 0.3338, "step": 7623 }, { "epoch": 1.0932033266418124, "grad_norm": 0.30774641036987305, "learning_rate": 8.017792181489053e-06, "loss": 0.3141, "step": 7624 }, { "epoch": 1.0933467163751076, "grad_norm": 0.29040342569351196, "learning_rate": 8.017126981810715e-06, "loss": 0.3175, "step": 7625 }, { "epoch": 1.0934901061084026, "grad_norm": 0.29910871386528015, "learning_rate": 8.016461698140205e-06, "loss": 0.3327, "step": 7626 }, { "epoch": 1.0936334958416978, "grad_norm": 0.2867583632469177, "learning_rate": 8.01579633049604e-06, "loss": 0.2998, "step": 7627 }, { "epoch": 1.0937768855749928, "grad_norm": 0.2906433045864105, "learning_rate": 8.015130878896745e-06, "loss": 0.2822, "step": 7628 }, { "epoch": 1.093920275308288, "grad_norm": 0.27694904804229736, "learning_rate": 8.014465343360845e-06, "loss": 0.2991, "step": 7629 }, { "epoch": 1.094063665041583, "grad_norm": 0.31291887164115906, "learning_rate": 8.013799723906866e-06, "loss": 0.3081, "step": 7630 }, { "epoch": 1.0942070547748781, "grad_norm": 0.3027890622615814, "learning_rate": 8.01313402055334e-06, "loss": 0.3056, "step": 7631 }, { "epoch": 1.0943504445081733, "grad_norm": 0.2669545114040375, "learning_rate": 8.012468233318797e-06, "loss": 0.3098, "step": 7632 }, { "epoch": 1.0944938342414683, "grad_norm": 0.2851068377494812, "learning_rate": 8.011802362221772e-06, "loss": 0.3192, "step": 7633 }, { "epoch": 1.0946372239747635, "grad_norm": 0.3091042637825012, "learning_rate": 8.011136407280804e-06, "loss": 0.3182, "step": 7634 }, { "epoch": 1.0947806137080585, "grad_norm": 0.34838372468948364, "learning_rate": 8.010470368514428e-06, "loss": 0.3173, "step": 7635 }, { "epoch": 1.0949240034413537, "grad_norm": 0.29238781332969666, "learning_rate": 8.00980424594119e-06, "loss": 0.3124, "step": 7636 }, { "epoch": 1.0950673931746486, "grad_norm": 0.3012780547142029, "learning_rate": 8.009138039579632e-06, "loss": 0.279, "step": 7637 }, { "epoch": 1.0952107829079438, "grad_norm": 0.2883204519748688, "learning_rate": 8.0084717494483e-06, "loss": 0.3164, "step": 7638 }, { "epoch": 1.0953541726412388, "grad_norm": 0.2770512104034424, "learning_rate": 8.00780537556574e-06, "loss": 0.3045, "step": 7639 }, { "epoch": 1.095497562374534, "grad_norm": 0.3133576810359955, "learning_rate": 8.007138917950507e-06, "loss": 0.3046, "step": 7640 }, { "epoch": 1.095640952107829, "grad_norm": 0.2843916714191437, "learning_rate": 8.006472376621152e-06, "loss": 0.2899, "step": 7641 }, { "epoch": 1.0957843418411242, "grad_norm": 0.2900039553642273, "learning_rate": 8.005805751596231e-06, "loss": 0.3103, "step": 7642 }, { "epoch": 1.0959277315744194, "grad_norm": 0.305836945772171, "learning_rate": 8.005139042894301e-06, "loss": 0.3205, "step": 7643 }, { "epoch": 1.0960711213077143, "grad_norm": 0.32303711771965027, "learning_rate": 8.004472250533924e-06, "loss": 0.3297, "step": 7644 }, { "epoch": 1.0962145110410095, "grad_norm": 0.3179998993873596, "learning_rate": 8.00380537453366e-06, "loss": 0.2929, "step": 7645 }, { "epoch": 1.0963579007743045, "grad_norm": 0.3059106171131134, "learning_rate": 8.003138414912074e-06, "loss": 0.3138, "step": 7646 }, { "epoch": 1.0965012905075997, "grad_norm": 0.27868831157684326, "learning_rate": 8.00247137168774e-06, "loss": 0.3151, "step": 7647 }, { "epoch": 1.0966446802408947, "grad_norm": 0.3091704845428467, "learning_rate": 8.001804244879215e-06, "loss": 0.2957, "step": 7648 }, { "epoch": 1.0967880699741899, "grad_norm": 0.3176520764827728, "learning_rate": 8.00113703450508e-06, "loss": 0.3061, "step": 7649 }, { "epoch": 1.0969314597074848, "grad_norm": 0.2946881353855133, "learning_rate": 8.000469740583907e-06, "loss": 0.329, "step": 7650 }, { "epoch": 1.09707484944078, "grad_norm": 0.28693294525146484, "learning_rate": 7.999802363134271e-06, "loss": 0.3206, "step": 7651 }, { "epoch": 1.0972182391740752, "grad_norm": 0.26728007197380066, "learning_rate": 7.999134902174752e-06, "loss": 0.2928, "step": 7652 }, { "epoch": 1.0973616289073702, "grad_norm": 0.3174993693828583, "learning_rate": 7.998467357723933e-06, "loss": 0.3347, "step": 7653 }, { "epoch": 1.0975050186406654, "grad_norm": 0.30996963381767273, "learning_rate": 7.997799729800393e-06, "loss": 0.3015, "step": 7654 }, { "epoch": 1.0976484083739604, "grad_norm": 0.29476994276046753, "learning_rate": 7.99713201842272e-06, "loss": 0.3146, "step": 7655 }, { "epoch": 1.0977917981072556, "grad_norm": 0.27432745695114136, "learning_rate": 7.996464223609504e-06, "loss": 0.3102, "step": 7656 }, { "epoch": 1.0979351878405506, "grad_norm": 0.30405914783477783, "learning_rate": 7.99579634537933e-06, "loss": 0.3084, "step": 7657 }, { "epoch": 1.0980785775738457, "grad_norm": 0.29809388518333435, "learning_rate": 7.995128383750797e-06, "loss": 0.3231, "step": 7658 }, { "epoch": 1.0982219673071407, "grad_norm": 0.30093833804130554, "learning_rate": 7.994460338742496e-06, "loss": 0.3156, "step": 7659 }, { "epoch": 1.098365357040436, "grad_norm": 0.2956687808036804, "learning_rate": 7.993792210373026e-06, "loss": 0.3098, "step": 7660 }, { "epoch": 1.0985087467737311, "grad_norm": 0.2990400195121765, "learning_rate": 7.993123998660987e-06, "loss": 0.2987, "step": 7661 }, { "epoch": 1.098652136507026, "grad_norm": 0.2869865894317627, "learning_rate": 7.992455703624979e-06, "loss": 0.3023, "step": 7662 }, { "epoch": 1.0987955262403213, "grad_norm": 0.3068010210990906, "learning_rate": 7.991787325283606e-06, "loss": 0.314, "step": 7663 }, { "epoch": 1.0989389159736163, "grad_norm": 0.3155202567577362, "learning_rate": 7.991118863655479e-06, "loss": 0.3141, "step": 7664 }, { "epoch": 1.0990823057069115, "grad_norm": 0.34037137031555176, "learning_rate": 7.990450318759202e-06, "loss": 0.3329, "step": 7665 }, { "epoch": 1.0992256954402064, "grad_norm": 0.33720672130584717, "learning_rate": 7.98978169061339e-06, "loss": 0.2807, "step": 7666 }, { "epoch": 1.0993690851735016, "grad_norm": 0.3350808322429657, "learning_rate": 7.989112979236654e-06, "loss": 0.3282, "step": 7667 }, { "epoch": 1.0995124749067966, "grad_norm": 0.30208098888397217, "learning_rate": 7.98844418464761e-06, "loss": 0.3077, "step": 7668 }, { "epoch": 1.0996558646400918, "grad_norm": 0.3126033842563629, "learning_rate": 7.987775306864878e-06, "loss": 0.303, "step": 7669 }, { "epoch": 1.0997992543733868, "grad_norm": 0.32045096158981323, "learning_rate": 7.987106345907078e-06, "loss": 0.2992, "step": 7670 }, { "epoch": 1.099942644106682, "grad_norm": 0.3356887400150299, "learning_rate": 7.986437301792831e-06, "loss": 0.317, "step": 7671 }, { "epoch": 1.1000860338399772, "grad_norm": 0.3546949326992035, "learning_rate": 7.985768174540764e-06, "loss": 0.2955, "step": 7672 }, { "epoch": 1.1002294235732721, "grad_norm": 0.3024248480796814, "learning_rate": 7.985098964169505e-06, "loss": 0.3118, "step": 7673 }, { "epoch": 1.1003728133065673, "grad_norm": 0.28959909081459045, "learning_rate": 7.984429670697683e-06, "loss": 0.3149, "step": 7674 }, { "epoch": 1.1005162030398623, "grad_norm": 0.30001020431518555, "learning_rate": 7.983760294143928e-06, "loss": 0.2937, "step": 7675 }, { "epoch": 1.1006595927731575, "grad_norm": 0.30873769521713257, "learning_rate": 7.983090834526876e-06, "loss": 0.3099, "step": 7676 }, { "epoch": 1.1008029825064525, "grad_norm": 0.2882665991783142, "learning_rate": 7.982421291865166e-06, "loss": 0.3051, "step": 7677 }, { "epoch": 1.1009463722397477, "grad_norm": 0.3045024573802948, "learning_rate": 7.981751666177433e-06, "loss": 0.3245, "step": 7678 }, { "epoch": 1.1010897619730426, "grad_norm": 0.30274051427841187, "learning_rate": 7.981081957482322e-06, "loss": 0.3113, "step": 7679 }, { "epoch": 1.1012331517063378, "grad_norm": 0.30846384167671204, "learning_rate": 7.980412165798475e-06, "loss": 0.2939, "step": 7680 }, { "epoch": 1.1013765414396328, "grad_norm": 0.266401082277298, "learning_rate": 7.979742291144537e-06, "loss": 0.2979, "step": 7681 }, { "epoch": 1.101519931172928, "grad_norm": 0.29877811670303345, "learning_rate": 7.979072333539157e-06, "loss": 0.3048, "step": 7682 }, { "epoch": 1.1016633209062232, "grad_norm": 0.3264997899532318, "learning_rate": 7.978402293000989e-06, "loss": 0.3186, "step": 7683 }, { "epoch": 1.1018067106395182, "grad_norm": 0.27781394124031067, "learning_rate": 7.97773216954868e-06, "loss": 0.3116, "step": 7684 }, { "epoch": 1.1019501003728134, "grad_norm": 0.2881906032562256, "learning_rate": 7.977061963200889e-06, "loss": 0.312, "step": 7685 }, { "epoch": 1.1020934901061084, "grad_norm": 0.2996339797973633, "learning_rate": 7.976391673976272e-06, "loss": 0.3097, "step": 7686 }, { "epoch": 1.1022368798394035, "grad_norm": 0.2849932610988617, "learning_rate": 7.975721301893489e-06, "loss": 0.3241, "step": 7687 }, { "epoch": 1.1023802695726985, "grad_norm": 0.29526597261428833, "learning_rate": 7.975050846971204e-06, "loss": 0.3012, "step": 7688 }, { "epoch": 1.1025236593059937, "grad_norm": 0.29446765780448914, "learning_rate": 7.974380309228078e-06, "loss": 0.3049, "step": 7689 }, { "epoch": 1.1026670490392887, "grad_norm": 0.28159478306770325, "learning_rate": 7.97370968868278e-06, "loss": 0.3259, "step": 7690 }, { "epoch": 1.102810438772584, "grad_norm": 0.3031311333179474, "learning_rate": 7.97303898535398e-06, "loss": 0.2932, "step": 7691 }, { "epoch": 1.1029538285058789, "grad_norm": 0.2814997732639313, "learning_rate": 7.972368199260348e-06, "loss": 0.3053, "step": 7692 }, { "epoch": 1.103097218239174, "grad_norm": 0.2997020483016968, "learning_rate": 7.971697330420557e-06, "loss": 0.2972, "step": 7693 }, { "epoch": 1.1032406079724693, "grad_norm": 0.2962515652179718, "learning_rate": 7.971026378853283e-06, "loss": 0.3055, "step": 7694 }, { "epoch": 1.1033839977057642, "grad_norm": 0.2948472499847412, "learning_rate": 7.970355344577205e-06, "loss": 0.3004, "step": 7695 }, { "epoch": 1.1035273874390594, "grad_norm": 0.2814418375492096, "learning_rate": 7.969684227611006e-06, "loss": 0.3119, "step": 7696 }, { "epoch": 1.1036707771723544, "grad_norm": 0.31876763701438904, "learning_rate": 7.969013027973363e-06, "loss": 0.3031, "step": 7697 }, { "epoch": 1.1038141669056496, "grad_norm": 0.30918413400650024, "learning_rate": 7.968341745682967e-06, "loss": 0.3089, "step": 7698 }, { "epoch": 1.1039575566389446, "grad_norm": 0.2923026978969574, "learning_rate": 7.9676703807585e-06, "loss": 0.3183, "step": 7699 }, { "epoch": 1.1041009463722398, "grad_norm": 0.2974618375301361, "learning_rate": 7.966998933218658e-06, "loss": 0.3161, "step": 7700 }, { "epoch": 1.1042443361055347, "grad_norm": 0.29962071776390076, "learning_rate": 7.966327403082128e-06, "loss": 0.3315, "step": 7701 }, { "epoch": 1.10438772583883, "grad_norm": 0.3004371225833893, "learning_rate": 7.965655790367606e-06, "loss": 0.2983, "step": 7702 }, { "epoch": 1.1045311155721251, "grad_norm": 0.2989434003829956, "learning_rate": 7.96498409509379e-06, "loss": 0.3173, "step": 7703 }, { "epoch": 1.10467450530542, "grad_norm": 0.31644997000694275, "learning_rate": 7.964312317279378e-06, "loss": 0.3173, "step": 7704 }, { "epoch": 1.1048178950387153, "grad_norm": 0.2897411286830902, "learning_rate": 7.96364045694307e-06, "loss": 0.3209, "step": 7705 }, { "epoch": 1.1049612847720103, "grad_norm": 0.27871957421302795, "learning_rate": 7.96296851410357e-06, "loss": 0.3131, "step": 7706 }, { "epoch": 1.1051046745053055, "grad_norm": 0.28212982416152954, "learning_rate": 7.962296488779585e-06, "loss": 0.3024, "step": 7707 }, { "epoch": 1.1052480642386004, "grad_norm": 0.30124208331108093, "learning_rate": 7.961624380989823e-06, "loss": 0.3213, "step": 7708 }, { "epoch": 1.1053914539718956, "grad_norm": 0.29233554005622864, "learning_rate": 7.960952190752995e-06, "loss": 0.3109, "step": 7709 }, { "epoch": 1.1055348437051906, "grad_norm": 0.3190445303916931, "learning_rate": 7.960279918087812e-06, "loss": 0.3039, "step": 7710 }, { "epoch": 1.1056782334384858, "grad_norm": 0.30175668001174927, "learning_rate": 7.959607563012988e-06, "loss": 0.3051, "step": 7711 }, { "epoch": 1.105821623171781, "grad_norm": 0.2876156270503998, "learning_rate": 7.958935125547246e-06, "loss": 0.2949, "step": 7712 }, { "epoch": 1.105965012905076, "grad_norm": 0.29796674847602844, "learning_rate": 7.958262605709299e-06, "loss": 0.3028, "step": 7713 }, { "epoch": 1.1061084026383712, "grad_norm": 0.30619099736213684, "learning_rate": 7.957590003517871e-06, "loss": 0.3092, "step": 7714 }, { "epoch": 1.1062517923716662, "grad_norm": 0.3207838535308838, "learning_rate": 7.95691731899169e-06, "loss": 0.3307, "step": 7715 }, { "epoch": 1.1063951821049613, "grad_norm": 0.2941437363624573, "learning_rate": 7.956244552149478e-06, "loss": 0.3116, "step": 7716 }, { "epoch": 1.1065385718382563, "grad_norm": 0.2917978763580322, "learning_rate": 7.955571703009963e-06, "loss": 0.3383, "step": 7717 }, { "epoch": 1.1066819615715515, "grad_norm": 0.31452614068984985, "learning_rate": 7.954898771591882e-06, "loss": 0.3051, "step": 7718 }, { "epoch": 1.1068253513048465, "grad_norm": 0.3088798522949219, "learning_rate": 7.954225757913962e-06, "loss": 0.3106, "step": 7719 }, { "epoch": 1.1069687410381417, "grad_norm": 0.28457263112068176, "learning_rate": 7.953552661994942e-06, "loss": 0.3211, "step": 7720 }, { "epoch": 1.1071121307714367, "grad_norm": 0.29327017068862915, "learning_rate": 7.95287948385356e-06, "loss": 0.3069, "step": 7721 }, { "epoch": 1.1072555205047319, "grad_norm": 0.2803097665309906, "learning_rate": 7.952206223508554e-06, "loss": 0.3065, "step": 7722 }, { "epoch": 1.107398910238027, "grad_norm": 0.31014159321784973, "learning_rate": 7.95153288097867e-06, "loss": 0.3448, "step": 7723 }, { "epoch": 1.107542299971322, "grad_norm": 0.3243870735168457, "learning_rate": 7.950859456282649e-06, "loss": 0.3017, "step": 7724 }, { "epoch": 1.1076856897046172, "grad_norm": 0.320136159658432, "learning_rate": 7.950185949439242e-06, "loss": 0.3239, "step": 7725 }, { "epoch": 1.1078290794379122, "grad_norm": 0.31702274084091187, "learning_rate": 7.949512360467195e-06, "loss": 0.3146, "step": 7726 }, { "epoch": 1.1079724691712074, "grad_norm": 0.33864355087280273, "learning_rate": 7.94883868938526e-06, "loss": 0.3337, "step": 7727 }, { "epoch": 1.1081158589045024, "grad_norm": 0.29034101963043213, "learning_rate": 7.948164936212196e-06, "loss": 0.3133, "step": 7728 }, { "epoch": 1.1082592486377976, "grad_norm": 0.3188645839691162, "learning_rate": 7.947491100966753e-06, "loss": 0.3018, "step": 7729 }, { "epoch": 1.1084026383710925, "grad_norm": 0.3180297315120697, "learning_rate": 7.946817183667692e-06, "loss": 0.3196, "step": 7730 }, { "epoch": 1.1085460281043877, "grad_norm": 0.31547239422798157, "learning_rate": 7.946143184333772e-06, "loss": 0.2962, "step": 7731 }, { "epoch": 1.1086894178376827, "grad_norm": 0.3140532672405243, "learning_rate": 7.94546910298376e-06, "loss": 0.3123, "step": 7732 }, { "epoch": 1.108832807570978, "grad_norm": 0.3399251103401184, "learning_rate": 7.944794939636417e-06, "loss": 0.2909, "step": 7733 }, { "epoch": 1.108976197304273, "grad_norm": 0.34657251834869385, "learning_rate": 7.944120694310515e-06, "loss": 0.3177, "step": 7734 }, { "epoch": 1.109119587037568, "grad_norm": 0.3062962293624878, "learning_rate": 7.943446367024823e-06, "loss": 0.3044, "step": 7735 }, { "epoch": 1.1092629767708633, "grad_norm": 0.29805466532707214, "learning_rate": 7.942771957798108e-06, "loss": 0.2982, "step": 7736 }, { "epoch": 1.1094063665041582, "grad_norm": 0.3251718282699585, "learning_rate": 7.942097466649151e-06, "loss": 0.3271, "step": 7737 }, { "epoch": 1.1095497562374534, "grad_norm": 0.32192251086235046, "learning_rate": 7.941422893596727e-06, "loss": 0.29, "step": 7738 }, { "epoch": 1.1096931459707484, "grad_norm": 0.3335510790348053, "learning_rate": 7.940748238659612e-06, "loss": 0.3438, "step": 7739 }, { "epoch": 1.1098365357040436, "grad_norm": 0.3091548681259155, "learning_rate": 7.94007350185659e-06, "loss": 0.329, "step": 7740 }, { "epoch": 1.1099799254373386, "grad_norm": 0.3114447593688965, "learning_rate": 7.939398683206447e-06, "loss": 0.3055, "step": 7741 }, { "epoch": 1.1101233151706338, "grad_norm": 0.32888156175613403, "learning_rate": 7.938723782727965e-06, "loss": 0.3368, "step": 7742 }, { "epoch": 1.110266704903929, "grad_norm": 0.3279098868370056, "learning_rate": 7.938048800439932e-06, "loss": 0.2854, "step": 7743 }, { "epoch": 1.110410094637224, "grad_norm": 0.3224320113658905, "learning_rate": 7.937373736361141e-06, "loss": 0.3022, "step": 7744 }, { "epoch": 1.1105534843705192, "grad_norm": 0.28661784529685974, "learning_rate": 7.936698590510384e-06, "loss": 0.309, "step": 7745 }, { "epoch": 1.1106968741038141, "grad_norm": 0.35569438338279724, "learning_rate": 7.936023362906455e-06, "loss": 0.319, "step": 7746 }, { "epoch": 1.1108402638371093, "grad_norm": 0.3157825171947479, "learning_rate": 7.935348053568154e-06, "loss": 0.3257, "step": 7747 }, { "epoch": 1.1109836535704043, "grad_norm": 0.27801889181137085, "learning_rate": 7.934672662514277e-06, "loss": 0.3238, "step": 7748 }, { "epoch": 1.1111270433036995, "grad_norm": 0.32374489307403564, "learning_rate": 7.933997189763628e-06, "loss": 0.3036, "step": 7749 }, { "epoch": 1.1112704330369945, "grad_norm": 0.3186086118221283, "learning_rate": 7.933321635335009e-06, "loss": 0.3, "step": 7750 }, { "epoch": 1.1114138227702897, "grad_norm": 0.30582350492477417, "learning_rate": 7.93264599924723e-06, "loss": 0.3385, "step": 7751 }, { "epoch": 1.1115572125035849, "grad_norm": 0.29115182161331177, "learning_rate": 7.931970281519097e-06, "loss": 0.3112, "step": 7752 }, { "epoch": 1.1117006022368798, "grad_norm": 0.29181838035583496, "learning_rate": 7.931294482169421e-06, "loss": 0.3056, "step": 7753 }, { "epoch": 1.111843991970175, "grad_norm": 0.2849537134170532, "learning_rate": 7.930618601217016e-06, "loss": 0.3139, "step": 7754 }, { "epoch": 1.11198738170347, "grad_norm": 0.2894086241722107, "learning_rate": 7.929942638680698e-06, "loss": 0.3012, "step": 7755 }, { "epoch": 1.1121307714367652, "grad_norm": 0.33175230026245117, "learning_rate": 7.929266594579284e-06, "loss": 0.3232, "step": 7756 }, { "epoch": 1.1122741611700602, "grad_norm": 0.3100886344909668, "learning_rate": 7.928590468931592e-06, "loss": 0.2985, "step": 7757 }, { "epoch": 1.1124175509033554, "grad_norm": 0.2751077115535736, "learning_rate": 7.92791426175645e-06, "loss": 0.3138, "step": 7758 }, { "epoch": 1.1125609406366503, "grad_norm": 0.308142751455307, "learning_rate": 7.927237973072675e-06, "loss": 0.3057, "step": 7759 }, { "epoch": 1.1127043303699455, "grad_norm": 0.296275794506073, "learning_rate": 7.926561602899098e-06, "loss": 0.3226, "step": 7760 }, { "epoch": 1.1128477201032405, "grad_norm": 0.2830805480480194, "learning_rate": 7.92588515125455e-06, "loss": 0.3301, "step": 7761 }, { "epoch": 1.1129911098365357, "grad_norm": 0.317629337310791, "learning_rate": 7.925208618157862e-06, "loss": 0.3175, "step": 7762 }, { "epoch": 1.113134499569831, "grad_norm": 0.30737248063087463, "learning_rate": 7.924532003627863e-06, "loss": 0.313, "step": 7763 }, { "epoch": 1.1132778893031259, "grad_norm": 0.27429136633872986, "learning_rate": 7.923855307683392e-06, "loss": 0.3157, "step": 7764 }, { "epoch": 1.113421279036421, "grad_norm": 0.3016722500324249, "learning_rate": 7.923178530343287e-06, "loss": 0.3084, "step": 7765 }, { "epoch": 1.113564668769716, "grad_norm": 0.2856808006763458, "learning_rate": 7.92250167162639e-06, "loss": 0.3199, "step": 7766 }, { "epoch": 1.1137080585030112, "grad_norm": 0.3041524589061737, "learning_rate": 7.921824731551541e-06, "loss": 0.3068, "step": 7767 }, { "epoch": 1.1138514482363062, "grad_norm": 0.29461774230003357, "learning_rate": 7.921147710137584e-06, "loss": 0.3213, "step": 7768 }, { "epoch": 1.1139948379696014, "grad_norm": 0.3166612982749939, "learning_rate": 7.920470607403372e-06, "loss": 0.3047, "step": 7769 }, { "epoch": 1.1141382277028964, "grad_norm": 0.2929915487766266, "learning_rate": 7.919793423367749e-06, "loss": 0.3042, "step": 7770 }, { "epoch": 1.1142816174361916, "grad_norm": 0.29945141077041626, "learning_rate": 7.919116158049567e-06, "loss": 0.327, "step": 7771 }, { "epoch": 1.1144250071694866, "grad_norm": 0.31918758153915405, "learning_rate": 7.918438811467685e-06, "loss": 0.3164, "step": 7772 }, { "epoch": 1.1145683969027818, "grad_norm": 0.3146626651287079, "learning_rate": 7.917761383640953e-06, "loss": 0.3214, "step": 7773 }, { "epoch": 1.114711786636077, "grad_norm": 0.2834935784339905, "learning_rate": 7.917083874588234e-06, "loss": 0.3107, "step": 7774 }, { "epoch": 1.114855176369372, "grad_norm": 0.3058193325996399, "learning_rate": 7.916406284328387e-06, "loss": 0.314, "step": 7775 }, { "epoch": 1.1149985661026671, "grad_norm": 0.27976298332214355, "learning_rate": 7.915728612880275e-06, "loss": 0.3047, "step": 7776 }, { "epoch": 1.115141955835962, "grad_norm": 0.30153414607048035, "learning_rate": 7.915050860262765e-06, "loss": 0.319, "step": 7777 }, { "epoch": 1.1152853455692573, "grad_norm": 0.29224830865859985, "learning_rate": 7.91437302649472e-06, "loss": 0.296, "step": 7778 }, { "epoch": 1.1154287353025523, "grad_norm": 0.31104230880737305, "learning_rate": 7.913695111595017e-06, "loss": 0.302, "step": 7779 }, { "epoch": 1.1155721250358475, "grad_norm": 0.29242846369743347, "learning_rate": 7.913017115582521e-06, "loss": 0.305, "step": 7780 }, { "epoch": 1.1157155147691424, "grad_norm": 0.2788279950618744, "learning_rate": 7.912339038476111e-06, "loss": 0.3112, "step": 7781 }, { "epoch": 1.1158589045024376, "grad_norm": 0.29584404826164246, "learning_rate": 7.911660880294662e-06, "loss": 0.2931, "step": 7782 }, { "epoch": 1.1160022942357326, "grad_norm": 0.324165940284729, "learning_rate": 7.910982641057054e-06, "loss": 0.3104, "step": 7783 }, { "epoch": 1.1161456839690278, "grad_norm": 0.2961543798446655, "learning_rate": 7.910304320782165e-06, "loss": 0.2993, "step": 7784 }, { "epoch": 1.116289073702323, "grad_norm": 0.2958153188228607, "learning_rate": 7.909625919488884e-06, "loss": 0.2905, "step": 7785 }, { "epoch": 1.116432463435618, "grad_norm": 0.32412660121917725, "learning_rate": 7.908947437196091e-06, "loss": 0.3036, "step": 7786 }, { "epoch": 1.1165758531689132, "grad_norm": 0.29775911569595337, "learning_rate": 7.908268873922675e-06, "loss": 0.3098, "step": 7787 }, { "epoch": 1.1167192429022081, "grad_norm": 0.3023572862148285, "learning_rate": 7.90759022968753e-06, "loss": 0.3075, "step": 7788 }, { "epoch": 1.1168626326355033, "grad_norm": 0.362247109413147, "learning_rate": 7.906911504509544e-06, "loss": 0.33, "step": 7789 }, { "epoch": 1.1170060223687983, "grad_norm": 0.3272019028663635, "learning_rate": 7.906232698407613e-06, "loss": 0.3344, "step": 7790 }, { "epoch": 1.1171494121020935, "grad_norm": 0.32166582345962524, "learning_rate": 7.905553811400635e-06, "loss": 0.3271, "step": 7791 }, { "epoch": 1.1172928018353885, "grad_norm": 0.312557578086853, "learning_rate": 7.904874843507508e-06, "loss": 0.3023, "step": 7792 }, { "epoch": 1.1174361915686837, "grad_norm": 0.32428282499313354, "learning_rate": 7.904195794747136e-06, "loss": 0.3127, "step": 7793 }, { "epoch": 1.1175795813019789, "grad_norm": 0.35566630959510803, "learning_rate": 7.90351666513842e-06, "loss": 0.321, "step": 7794 }, { "epoch": 1.1177229710352738, "grad_norm": 0.3057880103588104, "learning_rate": 7.902837454700264e-06, "loss": 0.3202, "step": 7795 }, { "epoch": 1.117866360768569, "grad_norm": 0.294345885515213, "learning_rate": 7.90215816345158e-06, "loss": 0.3068, "step": 7796 }, { "epoch": 1.118009750501864, "grad_norm": 0.32971757650375366, "learning_rate": 7.901478791411277e-06, "loss": 0.3392, "step": 7797 }, { "epoch": 1.1181531402351592, "grad_norm": 0.3498573303222656, "learning_rate": 7.900799338598268e-06, "loss": 0.3207, "step": 7798 }, { "epoch": 1.1182965299684542, "grad_norm": 0.3022487759590149, "learning_rate": 7.90011980503147e-06, "loss": 0.3072, "step": 7799 }, { "epoch": 1.1184399197017494, "grad_norm": 0.31170853972435, "learning_rate": 7.899440190729794e-06, "loss": 0.2891, "step": 7800 }, { "epoch": 1.1185833094350444, "grad_norm": 0.3391622006893158, "learning_rate": 7.898760495712165e-06, "loss": 0.3179, "step": 7801 }, { "epoch": 1.1187266991683396, "grad_norm": 0.30718299746513367, "learning_rate": 7.898080719997502e-06, "loss": 0.3283, "step": 7802 }, { "epoch": 1.1188700889016348, "grad_norm": 0.3103353977203369, "learning_rate": 7.89740086360473e-06, "loss": 0.301, "step": 7803 }, { "epoch": 1.1190134786349297, "grad_norm": 0.30176305770874023, "learning_rate": 7.896720926552776e-06, "loss": 0.3047, "step": 7804 }, { "epoch": 1.119156868368225, "grad_norm": 0.2804994285106659, "learning_rate": 7.896040908860567e-06, "loss": 0.3135, "step": 7805 }, { "epoch": 1.11930025810152, "grad_norm": 0.3212653696537018, "learning_rate": 7.895360810547033e-06, "loss": 0.2975, "step": 7806 }, { "epoch": 1.119443647834815, "grad_norm": 0.3516649901866913, "learning_rate": 7.894680631631111e-06, "loss": 0.3191, "step": 7807 }, { "epoch": 1.11958703756811, "grad_norm": 0.30343878269195557, "learning_rate": 7.894000372131729e-06, "loss": 0.3024, "step": 7808 }, { "epoch": 1.1197304273014053, "grad_norm": 0.32903194427490234, "learning_rate": 7.893320032067831e-06, "loss": 0.3167, "step": 7809 }, { "epoch": 1.1198738170347002, "grad_norm": 0.3320806920528412, "learning_rate": 7.892639611458354e-06, "loss": 0.3053, "step": 7810 }, { "epoch": 1.1200172067679954, "grad_norm": 0.3041551113128662, "learning_rate": 7.89195911032224e-06, "loss": 0.3198, "step": 7811 }, { "epoch": 1.1201605965012904, "grad_norm": 0.3208443820476532, "learning_rate": 7.891278528678433e-06, "loss": 0.2944, "step": 7812 }, { "epoch": 1.1203039862345856, "grad_norm": 0.29201874136924744, "learning_rate": 7.890597866545876e-06, "loss": 0.2884, "step": 7813 }, { "epoch": 1.1204473759678808, "grad_norm": 0.3106530010700226, "learning_rate": 7.889917123943524e-06, "loss": 0.3077, "step": 7814 }, { "epoch": 1.1205907657011758, "grad_norm": 0.31254902482032776, "learning_rate": 7.889236300890325e-06, "loss": 0.2982, "step": 7815 }, { "epoch": 1.120734155434471, "grad_norm": 0.2841825783252716, "learning_rate": 7.888555397405232e-06, "loss": 0.3206, "step": 7816 }, { "epoch": 1.120877545167766, "grad_norm": 0.2895631790161133, "learning_rate": 7.887874413507199e-06, "loss": 0.316, "step": 7817 }, { "epoch": 1.1210209349010611, "grad_norm": 0.3069536089897156, "learning_rate": 7.887193349215186e-06, "loss": 0.3184, "step": 7818 }, { "epoch": 1.1211643246343561, "grad_norm": 0.30285879969596863, "learning_rate": 7.886512204548151e-06, "loss": 0.3157, "step": 7819 }, { "epoch": 1.1213077143676513, "grad_norm": 0.31505173444747925, "learning_rate": 7.885830979525057e-06, "loss": 0.3191, "step": 7820 }, { "epoch": 1.1214511041009463, "grad_norm": 0.28413107991218567, "learning_rate": 7.885149674164867e-06, "loss": 0.3164, "step": 7821 }, { "epoch": 1.1215944938342415, "grad_norm": 0.3015984296798706, "learning_rate": 7.884468288486548e-06, "loss": 0.3081, "step": 7822 }, { "epoch": 1.1217378835675365, "grad_norm": 0.3080562651157379, "learning_rate": 7.883786822509069e-06, "loss": 0.3154, "step": 7823 }, { "epoch": 1.1218812733008316, "grad_norm": 0.2934343218803406, "learning_rate": 7.883105276251401e-06, "loss": 0.2975, "step": 7824 }, { "epoch": 1.1220246630341268, "grad_norm": 0.2967526316642761, "learning_rate": 7.882423649732518e-06, "loss": 0.2956, "step": 7825 }, { "epoch": 1.1221680527674218, "grad_norm": 0.31548553705215454, "learning_rate": 7.881741942971395e-06, "loss": 0.3329, "step": 7826 }, { "epoch": 1.122311442500717, "grad_norm": 0.2849486470222473, "learning_rate": 7.881060155987008e-06, "loss": 0.3004, "step": 7827 }, { "epoch": 1.122454832234012, "grad_norm": 0.2752079665660858, "learning_rate": 7.880378288798338e-06, "loss": 0.2986, "step": 7828 }, { "epoch": 1.1225982219673072, "grad_norm": 0.3257497251033783, "learning_rate": 7.879696341424367e-06, "loss": 0.3362, "step": 7829 }, { "epoch": 1.1227416117006022, "grad_norm": 0.29813897609710693, "learning_rate": 7.879014313884081e-06, "loss": 0.3234, "step": 7830 }, { "epoch": 1.1228850014338974, "grad_norm": 0.28890156745910645, "learning_rate": 7.878332206196466e-06, "loss": 0.302, "step": 7831 }, { "epoch": 1.1230283911671923, "grad_norm": 0.299893319606781, "learning_rate": 7.877650018380507e-06, "loss": 0.3203, "step": 7832 }, { "epoch": 1.1231717809004875, "grad_norm": 0.31243523955345154, "learning_rate": 7.8769677504552e-06, "loss": 0.3129, "step": 7833 }, { "epoch": 1.1233151706337827, "grad_norm": 0.3214775025844574, "learning_rate": 7.876285402439538e-06, "loss": 0.3094, "step": 7834 }, { "epoch": 1.1234585603670777, "grad_norm": 0.31403249502182007, "learning_rate": 7.875602974352513e-06, "loss": 0.3002, "step": 7835 }, { "epoch": 1.123601950100373, "grad_norm": 0.27723467350006104, "learning_rate": 7.874920466213125e-06, "loss": 0.3086, "step": 7836 }, { "epoch": 1.1237453398336679, "grad_norm": 0.3133869767189026, "learning_rate": 7.874237878040373e-06, "loss": 0.3152, "step": 7837 }, { "epoch": 1.123888729566963, "grad_norm": 0.30352941155433655, "learning_rate": 7.87355520985326e-06, "loss": 0.298, "step": 7838 }, { "epoch": 1.124032119300258, "grad_norm": 0.2871851325035095, "learning_rate": 7.872872461670792e-06, "loss": 0.3141, "step": 7839 }, { "epoch": 1.1241755090335532, "grad_norm": 0.30370068550109863, "learning_rate": 7.872189633511973e-06, "loss": 0.3218, "step": 7840 }, { "epoch": 1.1243188987668482, "grad_norm": 0.2887941300868988, "learning_rate": 7.871506725395811e-06, "loss": 0.309, "step": 7841 }, { "epoch": 1.1244622885001434, "grad_norm": 0.28961506485939026, "learning_rate": 7.870823737341322e-06, "loss": 0.3071, "step": 7842 }, { "epoch": 1.1246056782334386, "grad_norm": 0.2876582443714142, "learning_rate": 7.870140669367514e-06, "loss": 0.3183, "step": 7843 }, { "epoch": 1.1247490679667336, "grad_norm": 0.29735973477363586, "learning_rate": 7.869457521493407e-06, "loss": 0.3226, "step": 7844 }, { "epoch": 1.1248924577000288, "grad_norm": 0.2822556793689728, "learning_rate": 7.868774293738014e-06, "loss": 0.3075, "step": 7845 }, { "epoch": 1.1250358474333237, "grad_norm": 0.28508684039115906, "learning_rate": 7.868090986120361e-06, "loss": 0.3138, "step": 7846 }, { "epoch": 1.125179237166619, "grad_norm": 0.3093230426311493, "learning_rate": 7.867407598659464e-06, "loss": 0.3201, "step": 7847 }, { "epoch": 1.125322626899914, "grad_norm": 0.3147328197956085, "learning_rate": 7.866724131374352e-06, "loss": 0.3238, "step": 7848 }, { "epoch": 1.125466016633209, "grad_norm": 0.2718319892883301, "learning_rate": 7.866040584284049e-06, "loss": 0.3252, "step": 7849 }, { "epoch": 1.125609406366504, "grad_norm": 0.32104095816612244, "learning_rate": 7.865356957407587e-06, "loss": 0.3185, "step": 7850 }, { "epoch": 1.1257527960997993, "grad_norm": 0.3006666600704193, "learning_rate": 7.864673250763993e-06, "loss": 0.3228, "step": 7851 }, { "epoch": 1.1258961858330943, "grad_norm": 0.30223348736763, "learning_rate": 7.863989464372301e-06, "loss": 0.3135, "step": 7852 }, { "epoch": 1.1260395755663895, "grad_norm": 0.2929624617099762, "learning_rate": 7.863305598251551e-06, "loss": 0.3182, "step": 7853 }, { "epoch": 1.1261829652996846, "grad_norm": 0.2958351969718933, "learning_rate": 7.862621652420777e-06, "loss": 0.3147, "step": 7854 }, { "epoch": 1.1263263550329796, "grad_norm": 0.30247944593429565, "learning_rate": 7.86193762689902e-06, "loss": 0.3019, "step": 7855 }, { "epoch": 1.1264697447662748, "grad_norm": 0.3098351061344147, "learning_rate": 7.861253521705319e-06, "loss": 0.3214, "step": 7856 }, { "epoch": 1.1266131344995698, "grad_norm": 0.30214112997055054, "learning_rate": 7.860569336858724e-06, "loss": 0.3207, "step": 7857 }, { "epoch": 1.126756524232865, "grad_norm": 0.30466094613075256, "learning_rate": 7.859885072378279e-06, "loss": 0.3151, "step": 7858 }, { "epoch": 1.12689991396616, "grad_norm": 0.3251175880432129, "learning_rate": 7.859200728283032e-06, "loss": 0.3047, "step": 7859 }, { "epoch": 1.1270433036994552, "grad_norm": 0.2923867702484131, "learning_rate": 7.858516304592036e-06, "loss": 0.3153, "step": 7860 }, { "epoch": 1.1271866934327501, "grad_norm": 0.31988048553466797, "learning_rate": 7.857831801324343e-06, "loss": 0.2908, "step": 7861 }, { "epoch": 1.1273300831660453, "grad_norm": 0.3105470538139343, "learning_rate": 7.857147218499008e-06, "loss": 0.3006, "step": 7862 }, { "epoch": 1.1274734728993403, "grad_norm": 0.3320119082927704, "learning_rate": 7.856462556135092e-06, "loss": 0.3414, "step": 7863 }, { "epoch": 1.1276168626326355, "grad_norm": 0.27904924750328064, "learning_rate": 7.85577781425165e-06, "loss": 0.3071, "step": 7864 }, { "epoch": 1.1277602523659307, "grad_norm": 0.2853800654411316, "learning_rate": 7.855092992867747e-06, "loss": 0.295, "step": 7865 }, { "epoch": 1.1279036420992257, "grad_norm": 0.3543749451637268, "learning_rate": 7.854408092002448e-06, "loss": 0.3218, "step": 7866 }, { "epoch": 1.1280470318325209, "grad_norm": 0.2800387144088745, "learning_rate": 7.853723111674818e-06, "loss": 0.3026, "step": 7867 }, { "epoch": 1.1281904215658158, "grad_norm": 0.302082896232605, "learning_rate": 7.853038051903926e-06, "loss": 0.3157, "step": 7868 }, { "epoch": 1.128333811299111, "grad_norm": 0.2943251430988312, "learning_rate": 7.852352912708844e-06, "loss": 0.3085, "step": 7869 }, { "epoch": 1.128477201032406, "grad_norm": 0.319453626871109, "learning_rate": 7.851667694108644e-06, "loss": 0.3079, "step": 7870 }, { "epoch": 1.1286205907657012, "grad_norm": 0.2785457670688629, "learning_rate": 7.850982396122402e-06, "loss": 0.3024, "step": 7871 }, { "epoch": 1.1287639804989962, "grad_norm": 0.30196884274482727, "learning_rate": 7.850297018769197e-06, "loss": 0.3105, "step": 7872 }, { "epoch": 1.1289073702322914, "grad_norm": 0.2881917655467987, "learning_rate": 7.849611562068106e-06, "loss": 0.303, "step": 7873 }, { "epoch": 1.1290507599655863, "grad_norm": 0.27010610699653625, "learning_rate": 7.848926026038214e-06, "loss": 0.322, "step": 7874 }, { "epoch": 1.1291941496988815, "grad_norm": 0.2871629595756531, "learning_rate": 7.848240410698603e-06, "loss": 0.2968, "step": 7875 }, { "epoch": 1.1293375394321767, "grad_norm": 0.2734958827495575, "learning_rate": 7.847554716068362e-06, "loss": 0.3197, "step": 7876 }, { "epoch": 1.1294809291654717, "grad_norm": 0.29031869769096375, "learning_rate": 7.846868942166577e-06, "loss": 0.3094, "step": 7877 }, { "epoch": 1.129624318898767, "grad_norm": 0.30701372027397156, "learning_rate": 7.84618308901234e-06, "loss": 0.3067, "step": 7878 }, { "epoch": 1.1297677086320619, "grad_norm": 0.27673983573913574, "learning_rate": 7.845497156624743e-06, "loss": 0.3128, "step": 7879 }, { "epoch": 1.129911098365357, "grad_norm": 0.3126148283481598, "learning_rate": 7.844811145022884e-06, "loss": 0.32, "step": 7880 }, { "epoch": 1.130054488098652, "grad_norm": 0.2985067665576935, "learning_rate": 7.844125054225858e-06, "loss": 0.3216, "step": 7881 }, { "epoch": 1.1301978778319473, "grad_norm": 0.3267973065376282, "learning_rate": 7.843438884252767e-06, "loss": 0.301, "step": 7882 }, { "epoch": 1.1303412675652424, "grad_norm": 0.31662049889564514, "learning_rate": 7.84275263512271e-06, "loss": 0.2918, "step": 7883 }, { "epoch": 1.1304846572985374, "grad_norm": 0.30395522713661194, "learning_rate": 7.842066306854791e-06, "loss": 0.3271, "step": 7884 }, { "epoch": 1.1306280470318326, "grad_norm": 0.28948450088500977, "learning_rate": 7.84137989946812e-06, "loss": 0.3107, "step": 7885 }, { "epoch": 1.1307714367651276, "grad_norm": 0.2772187888622284, "learning_rate": 7.840693412981803e-06, "loss": 0.3224, "step": 7886 }, { "epoch": 1.1309148264984228, "grad_norm": 0.276467502117157, "learning_rate": 7.840006847414952e-06, "loss": 0.3304, "step": 7887 }, { "epoch": 1.1310582162317178, "grad_norm": 0.2842901349067688, "learning_rate": 7.839320202786678e-06, "loss": 0.334, "step": 7888 }, { "epoch": 1.131201605965013, "grad_norm": 0.28958529233932495, "learning_rate": 7.838633479116098e-06, "loss": 0.3182, "step": 7889 }, { "epoch": 1.131344995698308, "grad_norm": 0.29728075861930847, "learning_rate": 7.83794667642233e-06, "loss": 0.3002, "step": 7890 }, { "epoch": 1.1314883854316031, "grad_norm": 0.2869042456150055, "learning_rate": 7.83725979472449e-06, "loss": 0.3403, "step": 7891 }, { "epoch": 1.131631775164898, "grad_norm": 0.3245094120502472, "learning_rate": 7.836572834041702e-06, "loss": 0.326, "step": 7892 }, { "epoch": 1.1317751648981933, "grad_norm": 0.31038349866867065, "learning_rate": 7.83588579439309e-06, "loss": 0.2978, "step": 7893 }, { "epoch": 1.1319185546314885, "grad_norm": 0.2826823592185974, "learning_rate": 7.83519867579778e-06, "loss": 0.3097, "step": 7894 }, { "epoch": 1.1320619443647835, "grad_norm": 0.29211553931236267, "learning_rate": 7.8345114782749e-06, "loss": 0.3061, "step": 7895 }, { "epoch": 1.1322053340980787, "grad_norm": 0.28666743636131287, "learning_rate": 7.83382420184358e-06, "loss": 0.3163, "step": 7896 }, { "epoch": 1.1323487238313736, "grad_norm": 0.3080376982688904, "learning_rate": 7.833136846522955e-06, "loss": 0.312, "step": 7897 }, { "epoch": 1.1324921135646688, "grad_norm": 0.31161388754844666, "learning_rate": 7.832449412332158e-06, "loss": 0.3166, "step": 7898 }, { "epoch": 1.1326355032979638, "grad_norm": 0.2966752350330353, "learning_rate": 7.831761899290327e-06, "loss": 0.3298, "step": 7899 }, { "epoch": 1.132778893031259, "grad_norm": 0.28121069073677063, "learning_rate": 7.831074307416599e-06, "loss": 0.2946, "step": 7900 }, { "epoch": 1.132922282764554, "grad_norm": 0.28430941700935364, "learning_rate": 7.830386636730119e-06, "loss": 0.3059, "step": 7901 }, { "epoch": 1.1330656724978492, "grad_norm": 0.30021700263023376, "learning_rate": 7.829698887250028e-06, "loss": 0.3042, "step": 7902 }, { "epoch": 1.1332090622311441, "grad_norm": 0.28637105226516724, "learning_rate": 7.829011058995473e-06, "loss": 0.3023, "step": 7903 }, { "epoch": 1.1333524519644393, "grad_norm": 0.3136342465877533, "learning_rate": 7.828323151985602e-06, "loss": 0.3088, "step": 7904 }, { "epoch": 1.1334958416977345, "grad_norm": 0.29601526260375977, "learning_rate": 7.827635166239564e-06, "loss": 0.3221, "step": 7905 }, { "epoch": 1.1336392314310295, "grad_norm": 0.30266717076301575, "learning_rate": 7.826947101776514e-06, "loss": 0.3348, "step": 7906 }, { "epoch": 1.1337826211643247, "grad_norm": 0.28945451974868774, "learning_rate": 7.826258958615606e-06, "loss": 0.2904, "step": 7907 }, { "epoch": 1.1339260108976197, "grad_norm": 0.2790786325931549, "learning_rate": 7.825570736775996e-06, "loss": 0.3183, "step": 7908 }, { "epoch": 1.1340694006309149, "grad_norm": 0.2993253767490387, "learning_rate": 7.824882436276843e-06, "loss": 0.3152, "step": 7909 }, { "epoch": 1.1342127903642099, "grad_norm": 0.3032740354537964, "learning_rate": 7.82419405713731e-06, "loss": 0.3163, "step": 7910 }, { "epoch": 1.134356180097505, "grad_norm": 0.3088147044181824, "learning_rate": 7.823505599376556e-06, "loss": 0.3274, "step": 7911 }, { "epoch": 1.1344995698308, "grad_norm": 0.29186683893203735, "learning_rate": 7.82281706301375e-06, "loss": 0.3158, "step": 7912 }, { "epoch": 1.1346429595640952, "grad_norm": 0.29576608538627625, "learning_rate": 7.82212844806806e-06, "loss": 0.302, "step": 7913 }, { "epoch": 1.1347863492973902, "grad_norm": 0.286792516708374, "learning_rate": 7.821439754558655e-06, "loss": 0.3285, "step": 7914 }, { "epoch": 1.1349297390306854, "grad_norm": 0.2866379916667938, "learning_rate": 7.820750982504708e-06, "loss": 0.3021, "step": 7915 }, { "epoch": 1.1350731287639806, "grad_norm": 0.31265896558761597, "learning_rate": 7.820062131925393e-06, "loss": 0.3358, "step": 7916 }, { "epoch": 1.1352165184972756, "grad_norm": 0.29440128803253174, "learning_rate": 7.819373202839884e-06, "loss": 0.3278, "step": 7917 }, { "epoch": 1.1353599082305708, "grad_norm": 0.2917652130126953, "learning_rate": 7.818684195267364e-06, "loss": 0.323, "step": 7918 }, { "epoch": 1.1355032979638657, "grad_norm": 0.2760002017021179, "learning_rate": 7.817995109227013e-06, "loss": 0.3297, "step": 7919 }, { "epoch": 1.135646687697161, "grad_norm": 0.2913457155227661, "learning_rate": 7.81730594473801e-06, "loss": 0.3054, "step": 7920 }, { "epoch": 1.135790077430456, "grad_norm": 0.2908361852169037, "learning_rate": 7.816616701819546e-06, "loss": 0.3312, "step": 7921 }, { "epoch": 1.135933467163751, "grad_norm": 0.3003935217857361, "learning_rate": 7.815927380490805e-06, "loss": 0.3048, "step": 7922 }, { "epoch": 1.1360768568970463, "grad_norm": 0.3021721839904785, "learning_rate": 7.815237980770976e-06, "loss": 0.3146, "step": 7923 }, { "epoch": 1.1362202466303413, "grad_norm": 0.27525070309638977, "learning_rate": 7.814548502679254e-06, "loss": 0.2994, "step": 7924 }, { "epoch": 1.1363636363636362, "grad_norm": 0.32330310344696045, "learning_rate": 7.81385894623483e-06, "loss": 0.3018, "step": 7925 }, { "epoch": 1.1365070260969314, "grad_norm": 0.2915939688682556, "learning_rate": 7.813169311456903e-06, "loss": 0.3146, "step": 7926 }, { "epoch": 1.1366504158302266, "grad_norm": 0.28356480598449707, "learning_rate": 7.812479598364668e-06, "loss": 0.3173, "step": 7927 }, { "epoch": 1.1367938055635216, "grad_norm": 0.29400309920310974, "learning_rate": 7.81178980697733e-06, "loss": 0.29, "step": 7928 }, { "epoch": 1.1369371952968168, "grad_norm": 0.311117023229599, "learning_rate": 7.811099937314087e-06, "loss": 0.3133, "step": 7929 }, { "epoch": 1.1370805850301118, "grad_norm": 0.28779590129852295, "learning_rate": 7.810409989394146e-06, "loss": 0.3297, "step": 7930 }, { "epoch": 1.137223974763407, "grad_norm": 0.2961125373840332, "learning_rate": 7.809719963236716e-06, "loss": 0.3131, "step": 7931 }, { "epoch": 1.137367364496702, "grad_norm": 0.2850736379623413, "learning_rate": 7.809029858861002e-06, "loss": 0.3146, "step": 7932 }, { "epoch": 1.1375107542299971, "grad_norm": 0.31633639335632324, "learning_rate": 7.808339676286219e-06, "loss": 0.3267, "step": 7933 }, { "epoch": 1.1376541439632923, "grad_norm": 0.3259209096431732, "learning_rate": 7.807649415531579e-06, "loss": 0.3146, "step": 7934 }, { "epoch": 1.1377975336965873, "grad_norm": 0.3138217031955719, "learning_rate": 7.8069590766163e-06, "loss": 0.3004, "step": 7935 }, { "epoch": 1.1379409234298825, "grad_norm": 0.30725422501564026, "learning_rate": 7.806268659559597e-06, "loss": 0.329, "step": 7936 }, { "epoch": 1.1380843131631775, "grad_norm": 0.32300207018852234, "learning_rate": 7.80557816438069e-06, "loss": 0.3197, "step": 7937 }, { "epoch": 1.1382277028964727, "grad_norm": 0.2737590968608856, "learning_rate": 7.804887591098805e-06, "loss": 0.3141, "step": 7938 }, { "epoch": 1.1383710926297677, "grad_norm": 0.27546945214271545, "learning_rate": 7.804196939733163e-06, "loss": 0.3112, "step": 7939 }, { "epoch": 1.1385144823630629, "grad_norm": 0.31468063592910767, "learning_rate": 7.80350621030299e-06, "loss": 0.3038, "step": 7940 }, { "epoch": 1.1386578720963578, "grad_norm": 0.2889156639575958, "learning_rate": 7.80281540282752e-06, "loss": 0.3266, "step": 7941 }, { "epoch": 1.138801261829653, "grad_norm": 0.28773948550224304, "learning_rate": 7.802124517325979e-06, "loss": 0.2985, "step": 7942 }, { "epoch": 1.138944651562948, "grad_norm": 0.28784582018852234, "learning_rate": 7.801433553817603e-06, "loss": 0.3158, "step": 7943 }, { "epoch": 1.1390880412962432, "grad_norm": 0.3247877061367035, "learning_rate": 7.800742512321627e-06, "loss": 0.3042, "step": 7944 }, { "epoch": 1.1392314310295384, "grad_norm": 0.3118157386779785, "learning_rate": 7.800051392857284e-06, "loss": 0.3016, "step": 7945 }, { "epoch": 1.1393748207628334, "grad_norm": 0.3386211693286896, "learning_rate": 7.79936019544382e-06, "loss": 0.3129, "step": 7946 }, { "epoch": 1.1395182104961286, "grad_norm": 0.2938567101955414, "learning_rate": 7.798668920100475e-06, "loss": 0.3144, "step": 7947 }, { "epoch": 1.1396616002294235, "grad_norm": 0.3559611141681671, "learning_rate": 7.797977566846492e-06, "loss": 0.3392, "step": 7948 }, { "epoch": 1.1398049899627187, "grad_norm": 0.30160799622535706, "learning_rate": 7.797286135701116e-06, "loss": 0.3171, "step": 7949 }, { "epoch": 1.1399483796960137, "grad_norm": 0.3224406838417053, "learning_rate": 7.796594626683599e-06, "loss": 0.3182, "step": 7950 }, { "epoch": 1.140091769429309, "grad_norm": 0.30663490295410156, "learning_rate": 7.795903039813189e-06, "loss": 0.3058, "step": 7951 }, { "epoch": 1.1402351591626039, "grad_norm": 0.299527645111084, "learning_rate": 7.79521137510914e-06, "loss": 0.3273, "step": 7952 }, { "epoch": 1.140378548895899, "grad_norm": 0.2881968319416046, "learning_rate": 7.794519632590705e-06, "loss": 0.3152, "step": 7953 }, { "epoch": 1.140521938629194, "grad_norm": 0.3203224837779999, "learning_rate": 7.793827812277141e-06, "loss": 0.3111, "step": 7954 }, { "epoch": 1.1406653283624892, "grad_norm": 0.30647873878479004, "learning_rate": 7.79313591418771e-06, "loss": 0.3031, "step": 7955 }, { "epoch": 1.1408087180957844, "grad_norm": 0.29996517300605774, "learning_rate": 7.792443938341672e-06, "loss": 0.304, "step": 7956 }, { "epoch": 1.1409521078290794, "grad_norm": 0.3101753890514374, "learning_rate": 7.79175188475829e-06, "loss": 0.3142, "step": 7957 }, { "epoch": 1.1410954975623746, "grad_norm": 0.305690199136734, "learning_rate": 7.791059753456832e-06, "loss": 0.3388, "step": 7958 }, { "epoch": 1.1412388872956696, "grad_norm": 0.3108067214488983, "learning_rate": 7.79036754445656e-06, "loss": 0.3031, "step": 7959 }, { "epoch": 1.1413822770289648, "grad_norm": 0.2935850918292999, "learning_rate": 7.78967525777675e-06, "loss": 0.3142, "step": 7960 }, { "epoch": 1.1415256667622597, "grad_norm": 0.3084850311279297, "learning_rate": 7.788982893436673e-06, "loss": 0.3061, "step": 7961 }, { "epoch": 1.141669056495555, "grad_norm": 0.31191349029541016, "learning_rate": 7.788290451455602e-06, "loss": 0.3108, "step": 7962 }, { "epoch": 1.14181244622885, "grad_norm": 0.3121117353439331, "learning_rate": 7.787597931852814e-06, "loss": 0.3206, "step": 7963 }, { "epoch": 1.1419558359621451, "grad_norm": 0.28401705622673035, "learning_rate": 7.786905334647588e-06, "loss": 0.3051, "step": 7964 }, { "epoch": 1.14209922569544, "grad_norm": 0.32531580328941345, "learning_rate": 7.786212659859203e-06, "loss": 0.3128, "step": 7965 }, { "epoch": 1.1422426154287353, "grad_norm": 0.2802075743675232, "learning_rate": 7.785519907506945e-06, "loss": 0.2852, "step": 7966 }, { "epoch": 1.1423860051620305, "grad_norm": 0.304758220911026, "learning_rate": 7.784827077610098e-06, "loss": 0.3126, "step": 7967 }, { "epoch": 1.1425293948953255, "grad_norm": 0.30402135848999023, "learning_rate": 7.784134170187947e-06, "loss": 0.2903, "step": 7968 }, { "epoch": 1.1426727846286207, "grad_norm": 0.29177579283714294, "learning_rate": 7.783441185259785e-06, "loss": 0.3269, "step": 7969 }, { "epoch": 1.1428161743619156, "grad_norm": 0.2795558273792267, "learning_rate": 7.782748122844901e-06, "loss": 0.3141, "step": 7970 }, { "epoch": 1.1429595640952108, "grad_norm": 0.29807066917419434, "learning_rate": 7.782054982962593e-06, "loss": 0.3123, "step": 7971 }, { "epoch": 1.1431029538285058, "grad_norm": 0.28544750809669495, "learning_rate": 7.78136176563215e-06, "loss": 0.3038, "step": 7972 }, { "epoch": 1.143246343561801, "grad_norm": 0.28206855058670044, "learning_rate": 7.780668470872875e-06, "loss": 0.2964, "step": 7973 }, { "epoch": 1.1433897332950962, "grad_norm": 0.29577168822288513, "learning_rate": 7.779975098704068e-06, "loss": 0.3073, "step": 7974 }, { "epoch": 1.1435331230283912, "grad_norm": 0.2775677442550659, "learning_rate": 7.779281649145032e-06, "loss": 0.3157, "step": 7975 }, { "epoch": 1.1436765127616864, "grad_norm": 0.2883610725402832, "learning_rate": 7.778588122215068e-06, "loss": 0.3019, "step": 7976 }, { "epoch": 1.1438199024949813, "grad_norm": 0.29233965277671814, "learning_rate": 7.777894517933487e-06, "loss": 0.3089, "step": 7977 }, { "epoch": 1.1439632922282765, "grad_norm": 0.28617092967033386, "learning_rate": 7.777200836319592e-06, "loss": 0.3084, "step": 7978 }, { "epoch": 1.1441066819615715, "grad_norm": 0.3065025210380554, "learning_rate": 7.7765070773927e-06, "loss": 0.3171, "step": 7979 }, { "epoch": 1.1442500716948667, "grad_norm": 0.29871273040771484, "learning_rate": 7.775813241172125e-06, "loss": 0.3171, "step": 7980 }, { "epoch": 1.1443934614281617, "grad_norm": 0.3034839928150177, "learning_rate": 7.775119327677176e-06, "loss": 0.3043, "step": 7981 }, { "epoch": 1.1445368511614569, "grad_norm": 0.33387136459350586, "learning_rate": 7.774425336927173e-06, "loss": 0.3247, "step": 7982 }, { "epoch": 1.1446802408947518, "grad_norm": 0.3181147277355194, "learning_rate": 7.77373126894144e-06, "loss": 0.3065, "step": 7983 }, { "epoch": 1.144823630628047, "grad_norm": 0.28624987602233887, "learning_rate": 7.773037123739294e-06, "loss": 0.3049, "step": 7984 }, { "epoch": 1.1449670203613422, "grad_norm": 0.3314606845378876, "learning_rate": 7.772342901340058e-06, "loss": 0.3102, "step": 7985 }, { "epoch": 1.1451104100946372, "grad_norm": 0.36845484375953674, "learning_rate": 7.771648601763061e-06, "loss": 0.3282, "step": 7986 }, { "epoch": 1.1452537998279324, "grad_norm": 0.32993486523628235, "learning_rate": 7.770954225027633e-06, "loss": 0.3153, "step": 7987 }, { "epoch": 1.1453971895612274, "grad_norm": 0.306020051240921, "learning_rate": 7.7702597711531e-06, "loss": 0.3236, "step": 7988 }, { "epoch": 1.1455405792945226, "grad_norm": 0.3107965290546417, "learning_rate": 7.769565240158796e-06, "loss": 0.3187, "step": 7989 }, { "epoch": 1.1456839690278176, "grad_norm": 0.3209761381149292, "learning_rate": 7.768870632064058e-06, "loss": 0.2968, "step": 7990 }, { "epoch": 1.1458273587611127, "grad_norm": 0.35226550698280334, "learning_rate": 7.76817594688822e-06, "loss": 0.3016, "step": 7991 }, { "epoch": 1.1459707484944077, "grad_norm": 0.2999538481235504, "learning_rate": 7.76748118465062e-06, "loss": 0.3092, "step": 7992 }, { "epoch": 1.146114138227703, "grad_norm": 0.2992437183856964, "learning_rate": 7.766786345370605e-06, "loss": 0.3039, "step": 7993 }, { "epoch": 1.146257527960998, "grad_norm": 0.32732322812080383, "learning_rate": 7.766091429067511e-06, "loss": 0.3126, "step": 7994 }, { "epoch": 1.146400917694293, "grad_norm": 0.314676970243454, "learning_rate": 7.765396435760687e-06, "loss": 0.3398, "step": 7995 }, { "epoch": 1.1465443074275883, "grad_norm": 0.3004870116710663, "learning_rate": 7.764701365469482e-06, "loss": 0.3133, "step": 7996 }, { "epoch": 1.1466876971608833, "grad_norm": 0.3215382993221283, "learning_rate": 7.764006218213241e-06, "loss": 0.3078, "step": 7997 }, { "epoch": 1.1468310868941785, "grad_norm": 0.26478758454322815, "learning_rate": 7.763310994011322e-06, "loss": 0.3104, "step": 7998 }, { "epoch": 1.1469744766274734, "grad_norm": 0.33890846371650696, "learning_rate": 7.762615692883073e-06, "loss": 0.3318, "step": 7999 }, { "epoch": 1.1471178663607686, "grad_norm": 0.29889339208602905, "learning_rate": 7.761920314847855e-06, "loss": 0.3126, "step": 8000 }, { "epoch": 1.1472612560940636, "grad_norm": 0.31439724564552307, "learning_rate": 7.761224859925023e-06, "loss": 0.3294, "step": 8001 }, { "epoch": 1.1474046458273588, "grad_norm": 0.2965571880340576, "learning_rate": 7.760529328133937e-06, "loss": 0.3222, "step": 8002 }, { "epoch": 1.1475480355606538, "grad_norm": 0.28890183568000793, "learning_rate": 7.759833719493963e-06, "loss": 0.3207, "step": 8003 }, { "epoch": 1.147691425293949, "grad_norm": 0.2938865125179291, "learning_rate": 7.759138034024462e-06, "loss": 0.3064, "step": 8004 }, { "epoch": 1.147834815027244, "grad_norm": 0.2955913543701172, "learning_rate": 7.758442271744804e-06, "loss": 0.3072, "step": 8005 }, { "epoch": 1.1479782047605391, "grad_norm": 0.2936975061893463, "learning_rate": 7.757746432674356e-06, "loss": 0.3171, "step": 8006 }, { "epoch": 1.1481215944938343, "grad_norm": 0.29569604992866516, "learning_rate": 7.757050516832491e-06, "loss": 0.3179, "step": 8007 }, { "epoch": 1.1482649842271293, "grad_norm": 0.3167276382446289, "learning_rate": 7.75635452423858e-06, "loss": 0.3143, "step": 8008 }, { "epoch": 1.1484083739604245, "grad_norm": 0.29561129212379456, "learning_rate": 7.755658454911997e-06, "loss": 0.3174, "step": 8009 }, { "epoch": 1.1485517636937195, "grad_norm": 0.2998178005218506, "learning_rate": 7.754962308872123e-06, "loss": 0.3002, "step": 8010 }, { "epoch": 1.1486951534270147, "grad_norm": 0.2997888922691345, "learning_rate": 7.754266086138336e-06, "loss": 0.3003, "step": 8011 }, { "epoch": 1.1488385431603096, "grad_norm": 0.29537487030029297, "learning_rate": 7.75356978673002e-06, "loss": 0.3198, "step": 8012 }, { "epoch": 1.1489819328936048, "grad_norm": 0.29895228147506714, "learning_rate": 7.752873410666553e-06, "loss": 0.303, "step": 8013 }, { "epoch": 1.1491253226269, "grad_norm": 0.29373225569725037, "learning_rate": 7.752176957967327e-06, "loss": 0.331, "step": 8014 }, { "epoch": 1.149268712360195, "grad_norm": 0.2696765661239624, "learning_rate": 7.75148042865173e-06, "loss": 0.3077, "step": 8015 }, { "epoch": 1.14941210209349, "grad_norm": 0.3025111258029938, "learning_rate": 7.750783822739147e-06, "loss": 0.311, "step": 8016 }, { "epoch": 1.1495554918267852, "grad_norm": 0.2949944734573364, "learning_rate": 7.750087140248978e-06, "loss": 0.3086, "step": 8017 }, { "epoch": 1.1496988815600804, "grad_norm": 0.29073086380958557, "learning_rate": 7.74939038120061e-06, "loss": 0.3065, "step": 8018 }, { "epoch": 1.1498422712933754, "grad_norm": 0.28596049547195435, "learning_rate": 7.748693545613444e-06, "loss": 0.3267, "step": 8019 }, { "epoch": 1.1499856610266705, "grad_norm": 0.3248399496078491, "learning_rate": 7.747996633506878e-06, "loss": 0.3038, "step": 8020 }, { "epoch": 1.1501290507599655, "grad_norm": 0.31528759002685547, "learning_rate": 7.747299644900313e-06, "loss": 0.3, "step": 8021 }, { "epoch": 1.1502724404932607, "grad_norm": 0.31096839904785156, "learning_rate": 7.746602579813151e-06, "loss": 0.3091, "step": 8022 }, { "epoch": 1.1504158302265557, "grad_norm": 0.3215044140815735, "learning_rate": 7.7459054382648e-06, "loss": 0.3061, "step": 8023 }, { "epoch": 1.1505592199598509, "grad_norm": 0.30427131056785583, "learning_rate": 7.745208220274665e-06, "loss": 0.3037, "step": 8024 }, { "epoch": 1.150702609693146, "grad_norm": 0.3080950677394867, "learning_rate": 7.744510925862156e-06, "loss": 0.2939, "step": 8025 }, { "epoch": 1.150845999426441, "grad_norm": 0.3171447515487671, "learning_rate": 7.743813555046686e-06, "loss": 0.3123, "step": 8026 }, { "epoch": 1.1509893891597363, "grad_norm": 0.2858625054359436, "learning_rate": 7.743116107847666e-06, "loss": 0.3227, "step": 8027 }, { "epoch": 1.1511327788930312, "grad_norm": 0.31607022881507874, "learning_rate": 7.742418584284513e-06, "loss": 0.3167, "step": 8028 }, { "epoch": 1.1512761686263264, "grad_norm": 0.33334019780158997, "learning_rate": 7.741720984376647e-06, "loss": 0.3182, "step": 8029 }, { "epoch": 1.1514195583596214, "grad_norm": 0.30360937118530273, "learning_rate": 7.741023308143484e-06, "loss": 0.3117, "step": 8030 }, { "epoch": 1.1515629480929166, "grad_norm": 0.32216158509254456, "learning_rate": 7.74032555560445e-06, "loss": 0.3024, "step": 8031 }, { "epoch": 1.1517063378262116, "grad_norm": 0.297102153301239, "learning_rate": 7.739627726778967e-06, "loss": 0.29, "step": 8032 }, { "epoch": 1.1518497275595068, "grad_norm": 0.2920666038990021, "learning_rate": 7.738929821686461e-06, "loss": 0.3345, "step": 8033 }, { "epoch": 1.1519931172928017, "grad_norm": 0.31803590059280396, "learning_rate": 7.738231840346366e-06, "loss": 0.3042, "step": 8034 }, { "epoch": 1.152136507026097, "grad_norm": 0.32371899485588074, "learning_rate": 7.737533782778106e-06, "loss": 0.3015, "step": 8035 }, { "epoch": 1.1522798967593921, "grad_norm": 0.29562485218048096, "learning_rate": 7.736835649001117e-06, "loss": 0.3258, "step": 8036 }, { "epoch": 1.152423286492687, "grad_norm": 0.3095666170120239, "learning_rate": 7.736137439034834e-06, "loss": 0.3155, "step": 8037 }, { "epoch": 1.1525666762259823, "grad_norm": 0.2857297360897064, "learning_rate": 7.735439152898694e-06, "loss": 0.3169, "step": 8038 }, { "epoch": 1.1527100659592773, "grad_norm": 0.2856547236442566, "learning_rate": 7.734740790612137e-06, "loss": 0.3165, "step": 8039 }, { "epoch": 1.1528534556925725, "grad_norm": 0.28520169854164124, "learning_rate": 7.7340423521946e-06, "loss": 0.3, "step": 8040 }, { "epoch": 1.1529968454258674, "grad_norm": 0.32009467482566833, "learning_rate": 7.73334383766553e-06, "loss": 0.3312, "step": 8041 }, { "epoch": 1.1531402351591626, "grad_norm": 0.29934796690940857, "learning_rate": 7.732645247044375e-06, "loss": 0.2904, "step": 8042 }, { "epoch": 1.1532836248924576, "grad_norm": 0.32330262660980225, "learning_rate": 7.731946580350577e-06, "loss": 0.3037, "step": 8043 }, { "epoch": 1.1534270146257528, "grad_norm": 0.3065122663974762, "learning_rate": 7.731247837603592e-06, "loss": 0.2991, "step": 8044 }, { "epoch": 1.1535704043590478, "grad_norm": 0.32091838121414185, "learning_rate": 7.730549018822867e-06, "loss": 0.3232, "step": 8045 }, { "epoch": 1.153713794092343, "grad_norm": 0.32909783720970154, "learning_rate": 7.729850124027855e-06, "loss": 0.3114, "step": 8046 }, { "epoch": 1.1538571838256382, "grad_norm": 0.30244001746177673, "learning_rate": 7.72915115323802e-06, "loss": 0.3084, "step": 8047 }, { "epoch": 1.1540005735589332, "grad_norm": 0.2991282641887665, "learning_rate": 7.728452106472812e-06, "loss": 0.2945, "step": 8048 }, { "epoch": 1.1541439632922283, "grad_norm": 0.3142143785953522, "learning_rate": 7.727752983751694e-06, "loss": 0.3196, "step": 8049 }, { "epoch": 1.1542873530255233, "grad_norm": 0.29405108094215393, "learning_rate": 7.72705378509413e-06, "loss": 0.3067, "step": 8050 }, { "epoch": 1.1544307427588185, "grad_norm": 0.28181710839271545, "learning_rate": 7.726354510519583e-06, "loss": 0.2937, "step": 8051 }, { "epoch": 1.1545741324921135, "grad_norm": 0.2907369136810303, "learning_rate": 7.72565516004752e-06, "loss": 0.3156, "step": 8052 }, { "epoch": 1.1547175222254087, "grad_norm": 0.2952151596546173, "learning_rate": 7.724955733697413e-06, "loss": 0.3154, "step": 8053 }, { "epoch": 1.1548609119587037, "grad_norm": 0.3131541311740875, "learning_rate": 7.724256231488729e-06, "loss": 0.3408, "step": 8054 }, { "epoch": 1.1550043016919989, "grad_norm": 0.28463733196258545, "learning_rate": 7.72355665344094e-06, "loss": 0.3193, "step": 8055 }, { "epoch": 1.1551476914252938, "grad_norm": 0.30999264121055603, "learning_rate": 7.722856999573525e-06, "loss": 0.3251, "step": 8056 }, { "epoch": 1.155291081158589, "grad_norm": 0.3202165365219116, "learning_rate": 7.722157269905959e-06, "loss": 0.3062, "step": 8057 }, { "epoch": 1.1554344708918842, "grad_norm": 0.2780923545360565, "learning_rate": 7.721457464457724e-06, "loss": 0.3174, "step": 8058 }, { "epoch": 1.1555778606251792, "grad_norm": 0.364944189786911, "learning_rate": 7.720757583248297e-06, "loss": 0.3182, "step": 8059 }, { "epoch": 1.1557212503584744, "grad_norm": 0.28387540578842163, "learning_rate": 7.720057626297166e-06, "loss": 0.3346, "step": 8060 }, { "epoch": 1.1558646400917694, "grad_norm": 0.31805527210235596, "learning_rate": 7.719357593623813e-06, "loss": 0.3289, "step": 8061 }, { "epoch": 1.1560080298250646, "grad_norm": 0.27582281827926636, "learning_rate": 7.71865748524773e-06, "loss": 0.2989, "step": 8062 }, { "epoch": 1.1561514195583595, "grad_norm": 0.31488877534866333, "learning_rate": 7.717957301188403e-06, "loss": 0.3174, "step": 8063 }, { "epoch": 1.1562948092916547, "grad_norm": 0.278005987405777, "learning_rate": 7.717257041465326e-06, "loss": 0.3072, "step": 8064 }, { "epoch": 1.15643819902495, "grad_norm": 0.2657485604286194, "learning_rate": 7.716556706097994e-06, "loss": 0.3037, "step": 8065 }, { "epoch": 1.156581588758245, "grad_norm": 0.29740649461746216, "learning_rate": 7.715856295105902e-06, "loss": 0.315, "step": 8066 }, { "epoch": 1.15672497849154, "grad_norm": 0.27726051211357117, "learning_rate": 7.715155808508548e-06, "loss": 0.299, "step": 8067 }, { "epoch": 1.156868368224835, "grad_norm": 0.29124394059181213, "learning_rate": 7.714455246325434e-06, "loss": 0.3102, "step": 8068 }, { "epoch": 1.1570117579581303, "grad_norm": 0.2860983610153198, "learning_rate": 7.713754608576059e-06, "loss": 0.3362, "step": 8069 }, { "epoch": 1.1571551476914252, "grad_norm": 0.30322375893592834, "learning_rate": 7.713053895279931e-06, "loss": 0.3163, "step": 8070 }, { "epoch": 1.1572985374247204, "grad_norm": 0.297130286693573, "learning_rate": 7.712353106456558e-06, "loss": 0.3021, "step": 8071 }, { "epoch": 1.1574419271580154, "grad_norm": 0.3124621510505676, "learning_rate": 7.711652242125447e-06, "loss": 0.3265, "step": 8072 }, { "epoch": 1.1575853168913106, "grad_norm": 0.29152804613113403, "learning_rate": 7.710951302306107e-06, "loss": 0.3011, "step": 8073 }, { "epoch": 1.1577287066246056, "grad_norm": 0.29288244247436523, "learning_rate": 7.710250287018052e-06, "loss": 0.3138, "step": 8074 }, { "epoch": 1.1578720963579008, "grad_norm": 0.29021215438842773, "learning_rate": 7.7095491962808e-06, "loss": 0.3122, "step": 8075 }, { "epoch": 1.158015486091196, "grad_norm": 0.3151964247226715, "learning_rate": 7.708848030113867e-06, "loss": 0.3049, "step": 8076 }, { "epoch": 1.158158875824491, "grad_norm": 0.29552289843559265, "learning_rate": 7.70814678853677e-06, "loss": 0.3025, "step": 8077 }, { "epoch": 1.1583022655577861, "grad_norm": 0.2958225905895233, "learning_rate": 7.707445471569034e-06, "loss": 0.3042, "step": 8078 }, { "epoch": 1.1584456552910811, "grad_norm": 0.30499228835105896, "learning_rate": 7.70674407923018e-06, "loss": 0.3146, "step": 8079 }, { "epoch": 1.1585890450243763, "grad_norm": 0.2970174252986908, "learning_rate": 7.706042611539735e-06, "loss": 0.3007, "step": 8080 }, { "epoch": 1.1587324347576713, "grad_norm": 0.3307238519191742, "learning_rate": 7.705341068517226e-06, "loss": 0.3184, "step": 8081 }, { "epoch": 1.1588758244909665, "grad_norm": 0.30029645562171936, "learning_rate": 7.704639450182183e-06, "loss": 0.2993, "step": 8082 }, { "epoch": 1.1590192142242615, "grad_norm": 0.30570095777511597, "learning_rate": 7.703937756554138e-06, "loss": 0.3185, "step": 8083 }, { "epoch": 1.1591626039575567, "grad_norm": 0.29540449380874634, "learning_rate": 7.703235987652626e-06, "loss": 0.2925, "step": 8084 }, { "epoch": 1.1593059936908516, "grad_norm": 0.3004259765148163, "learning_rate": 7.702534143497184e-06, "loss": 0.3101, "step": 8085 }, { "epoch": 1.1594493834241468, "grad_norm": 0.3191605508327484, "learning_rate": 7.701832224107346e-06, "loss": 0.2936, "step": 8086 }, { "epoch": 1.159592773157442, "grad_norm": 0.31048843264579773, "learning_rate": 7.701130229502656e-06, "loss": 0.318, "step": 8087 }, { "epoch": 1.159736162890737, "grad_norm": 0.29145321249961853, "learning_rate": 7.700428159702658e-06, "loss": 0.301, "step": 8088 }, { "epoch": 1.1598795526240322, "grad_norm": 0.30295875668525696, "learning_rate": 7.699726014726889e-06, "loss": 0.3027, "step": 8089 }, { "epoch": 1.1600229423573272, "grad_norm": 0.3222764730453491, "learning_rate": 7.699023794594904e-06, "loss": 0.2976, "step": 8090 }, { "epoch": 1.1601663320906224, "grad_norm": 0.29420047998428345, "learning_rate": 7.698321499326249e-06, "loss": 0.3013, "step": 8091 }, { "epoch": 1.1603097218239173, "grad_norm": 0.28385090827941895, "learning_rate": 7.697619128940472e-06, "loss": 0.3191, "step": 8092 }, { "epoch": 1.1604531115572125, "grad_norm": 0.2931221127510071, "learning_rate": 7.696916683457129e-06, "loss": 0.2899, "step": 8093 }, { "epoch": 1.1605965012905075, "grad_norm": 0.2708449065685272, "learning_rate": 7.696214162895775e-06, "loss": 0.3301, "step": 8094 }, { "epoch": 1.1607398910238027, "grad_norm": 0.27839648723602295, "learning_rate": 7.695511567275965e-06, "loss": 0.3154, "step": 8095 }, { "epoch": 1.1608832807570977, "grad_norm": 0.2914122939109802, "learning_rate": 7.694808896617261e-06, "loss": 0.309, "step": 8096 }, { "epoch": 1.1610266704903929, "grad_norm": 0.28824013471603394, "learning_rate": 7.694106150939223e-06, "loss": 0.3258, "step": 8097 }, { "epoch": 1.161170060223688, "grad_norm": 0.2814546823501587, "learning_rate": 7.693403330261414e-06, "loss": 0.3275, "step": 8098 }, { "epoch": 1.161313449956983, "grad_norm": 0.29304632544517517, "learning_rate": 7.692700434603396e-06, "loss": 0.2976, "step": 8099 }, { "epoch": 1.1614568396902782, "grad_norm": 0.3221886456012726, "learning_rate": 7.691997463984744e-06, "loss": 0.3099, "step": 8100 }, { "epoch": 1.1616002294235732, "grad_norm": 0.2779626250267029, "learning_rate": 7.691294418425022e-06, "loss": 0.3056, "step": 8101 }, { "epoch": 1.1617436191568684, "grad_norm": 0.3193453252315521, "learning_rate": 7.690591297943803e-06, "loss": 0.3095, "step": 8102 }, { "epoch": 1.1618870088901634, "grad_norm": 0.2969612777233124, "learning_rate": 7.689888102560661e-06, "loss": 0.3032, "step": 8103 }, { "epoch": 1.1620303986234586, "grad_norm": 0.2986331880092621, "learning_rate": 7.689184832295174e-06, "loss": 0.3267, "step": 8104 }, { "epoch": 1.1621737883567538, "grad_norm": 0.29380854964256287, "learning_rate": 7.688481487166916e-06, "loss": 0.3053, "step": 8105 }, { "epoch": 1.1623171780900488, "grad_norm": 0.29766082763671875, "learning_rate": 7.68777806719547e-06, "loss": 0.297, "step": 8106 }, { "epoch": 1.1624605678233437, "grad_norm": 0.31769877672195435, "learning_rate": 7.687074572400417e-06, "loss": 0.3011, "step": 8107 }, { "epoch": 1.162603957556639, "grad_norm": 0.2978843152523041, "learning_rate": 7.68637100280134e-06, "loss": 0.3221, "step": 8108 }, { "epoch": 1.1627473472899341, "grad_norm": 0.305696576833725, "learning_rate": 7.68566735841783e-06, "loss": 0.3174, "step": 8109 }, { "epoch": 1.162890737023229, "grad_norm": 0.2734748125076294, "learning_rate": 7.68496363926947e-06, "loss": 0.3048, "step": 8110 }, { "epoch": 1.1630341267565243, "grad_norm": 0.2908850908279419, "learning_rate": 7.684259845375852e-06, "loss": 0.3182, "step": 8111 }, { "epoch": 1.1631775164898193, "grad_norm": 0.2895027697086334, "learning_rate": 7.683555976756572e-06, "loss": 0.3234, "step": 8112 }, { "epoch": 1.1633209062231145, "grad_norm": 0.2824680209159851, "learning_rate": 7.682852033431219e-06, "loss": 0.3058, "step": 8113 }, { "epoch": 1.1634642959564094, "grad_norm": 0.3194195032119751, "learning_rate": 7.682148015419393e-06, "loss": 0.321, "step": 8114 }, { "epoch": 1.1636076856897046, "grad_norm": 0.3224470019340515, "learning_rate": 7.681443922740693e-06, "loss": 0.3051, "step": 8115 }, { "epoch": 1.1637510754229998, "grad_norm": 0.2897818982601166, "learning_rate": 7.680739755414717e-06, "loss": 0.3183, "step": 8116 }, { "epoch": 1.1638944651562948, "grad_norm": 0.34859699010849, "learning_rate": 7.680035513461071e-06, "loss": 0.31, "step": 8117 }, { "epoch": 1.16403785488959, "grad_norm": 0.3145073354244232, "learning_rate": 7.679331196899357e-06, "loss": 0.3015, "step": 8118 }, { "epoch": 1.164181244622885, "grad_norm": 0.2962936758995056, "learning_rate": 7.678626805749187e-06, "loss": 0.3224, "step": 8119 }, { "epoch": 1.1643246343561802, "grad_norm": 0.33265987038612366, "learning_rate": 7.677922340030167e-06, "loss": 0.3196, "step": 8120 }, { "epoch": 1.1644680240894751, "grad_norm": 0.30634695291519165, "learning_rate": 7.677217799761907e-06, "loss": 0.2996, "step": 8121 }, { "epoch": 1.1646114138227703, "grad_norm": 0.3006143271923065, "learning_rate": 7.676513184964022e-06, "loss": 0.3236, "step": 8122 }, { "epoch": 1.1647548035560653, "grad_norm": 0.29956138134002686, "learning_rate": 7.675808495656129e-06, "loss": 0.3033, "step": 8123 }, { "epoch": 1.1648981932893605, "grad_norm": 0.33023321628570557, "learning_rate": 7.675103731857842e-06, "loss": 0.3041, "step": 8124 }, { "epoch": 1.1650415830226555, "grad_norm": 0.3415563702583313, "learning_rate": 7.674398893588783e-06, "loss": 0.3067, "step": 8125 }, { "epoch": 1.1651849727559507, "grad_norm": 0.2986953556537628, "learning_rate": 7.673693980868571e-06, "loss": 0.313, "step": 8126 }, { "epoch": 1.1653283624892459, "grad_norm": 0.3084554672241211, "learning_rate": 7.672988993716834e-06, "loss": 0.3092, "step": 8127 }, { "epoch": 1.1654717522225408, "grad_norm": 0.2953517436981201, "learning_rate": 7.672283932153195e-06, "loss": 0.2933, "step": 8128 }, { "epoch": 1.165615141955836, "grad_norm": 0.3130219876766205, "learning_rate": 7.671578796197282e-06, "loss": 0.3027, "step": 8129 }, { "epoch": 1.165758531689131, "grad_norm": 0.2921542823314667, "learning_rate": 7.670873585868723e-06, "loss": 0.3166, "step": 8130 }, { "epoch": 1.1659019214224262, "grad_norm": 0.2876255214214325, "learning_rate": 7.670168301187153e-06, "loss": 0.3132, "step": 8131 }, { "epoch": 1.1660453111557212, "grad_norm": 0.3053763806819916, "learning_rate": 7.669462942172206e-06, "loss": 0.2947, "step": 8132 }, { "epoch": 1.1661887008890164, "grad_norm": 0.30022475123405457, "learning_rate": 7.668757508843516e-06, "loss": 0.329, "step": 8133 }, { "epoch": 1.1663320906223114, "grad_norm": 0.30012819170951843, "learning_rate": 7.668052001220721e-06, "loss": 0.3138, "step": 8134 }, { "epoch": 1.1664754803556066, "grad_norm": 0.28763166069984436, "learning_rate": 7.667346419323463e-06, "loss": 0.3158, "step": 8135 }, { "epoch": 1.1666188700889015, "grad_norm": 0.3315635323524475, "learning_rate": 7.666640763171386e-06, "loss": 0.2992, "step": 8136 }, { "epoch": 1.1667622598221967, "grad_norm": 0.32218271493911743, "learning_rate": 7.66593503278413e-06, "loss": 0.3045, "step": 8137 }, { "epoch": 1.166905649555492, "grad_norm": 0.29592251777648926, "learning_rate": 7.665229228181345e-06, "loss": 0.3102, "step": 8138 }, { "epoch": 1.167049039288787, "grad_norm": 0.30771711468696594, "learning_rate": 7.66452334938268e-06, "loss": 0.2914, "step": 8139 }, { "epoch": 1.167192429022082, "grad_norm": 0.29887115955352783, "learning_rate": 7.66381739640778e-06, "loss": 0.29, "step": 8140 }, { "epoch": 1.167335818755377, "grad_norm": 0.34670060873031616, "learning_rate": 7.663111369276303e-06, "loss": 0.3061, "step": 8141 }, { "epoch": 1.1674792084886723, "grad_norm": 0.32602164149284363, "learning_rate": 7.662405268007903e-06, "loss": 0.3053, "step": 8142 }, { "epoch": 1.1676225982219672, "grad_norm": 0.2983139753341675, "learning_rate": 7.661699092622235e-06, "loss": 0.3239, "step": 8143 }, { "epoch": 1.1677659879552624, "grad_norm": 0.32486021518707275, "learning_rate": 7.660992843138959e-06, "loss": 0.337, "step": 8144 }, { "epoch": 1.1679093776885574, "grad_norm": 0.3548795282840729, "learning_rate": 7.660286519577735e-06, "loss": 0.3085, "step": 8145 }, { "epoch": 1.1680527674218526, "grad_norm": 0.2990501821041107, "learning_rate": 7.659580121958225e-06, "loss": 0.3068, "step": 8146 }, { "epoch": 1.1681961571551476, "grad_norm": 0.297284334897995, "learning_rate": 7.658873650300099e-06, "loss": 0.3146, "step": 8147 }, { "epoch": 1.1683395468884428, "grad_norm": 0.2958516478538513, "learning_rate": 7.65816710462302e-06, "loss": 0.3078, "step": 8148 }, { "epoch": 1.168482936621738, "grad_norm": 0.3053712248802185, "learning_rate": 7.657460484946655e-06, "loss": 0.2881, "step": 8149 }, { "epoch": 1.168626326355033, "grad_norm": 0.28996455669403076, "learning_rate": 7.656753791290682e-06, "loss": 0.3072, "step": 8150 }, { "epoch": 1.1687697160883281, "grad_norm": 0.3119828701019287, "learning_rate": 7.656047023674769e-06, "loss": 0.317, "step": 8151 }, { "epoch": 1.168913105821623, "grad_norm": 0.3040556311607361, "learning_rate": 7.655340182118591e-06, "loss": 0.3376, "step": 8152 }, { "epoch": 1.1690564955549183, "grad_norm": 0.3123311698436737, "learning_rate": 7.654633266641828e-06, "loss": 0.3106, "step": 8153 }, { "epoch": 1.1691998852882133, "grad_norm": 0.33504530787467957, "learning_rate": 7.653926277264159e-06, "loss": 0.3086, "step": 8154 }, { "epoch": 1.1693432750215085, "grad_norm": 0.31402504444122314, "learning_rate": 7.653219214005265e-06, "loss": 0.3174, "step": 8155 }, { "epoch": 1.1694866647548037, "grad_norm": 0.29926738142967224, "learning_rate": 7.652512076884828e-06, "loss": 0.2893, "step": 8156 }, { "epoch": 1.1696300544880986, "grad_norm": 0.30162733793258667, "learning_rate": 7.651804865922537e-06, "loss": 0.3007, "step": 8157 }, { "epoch": 1.1697734442213938, "grad_norm": 0.34044334292411804, "learning_rate": 7.651097581138076e-06, "loss": 0.3104, "step": 8158 }, { "epoch": 1.1699168339546888, "grad_norm": 0.2882384657859802, "learning_rate": 7.650390222551137e-06, "loss": 0.3207, "step": 8159 }, { "epoch": 1.170060223687984, "grad_norm": 0.3022853136062622, "learning_rate": 7.649682790181412e-06, "loss": 0.3077, "step": 8160 }, { "epoch": 1.170203613421279, "grad_norm": 0.31533658504486084, "learning_rate": 7.648975284048595e-06, "loss": 0.3113, "step": 8161 }, { "epoch": 1.1703470031545742, "grad_norm": 0.35204145312309265, "learning_rate": 7.648267704172378e-06, "loss": 0.3223, "step": 8162 }, { "epoch": 1.1704903928878692, "grad_norm": 0.28677433729171753, "learning_rate": 7.647560050572463e-06, "loss": 0.3082, "step": 8163 }, { "epoch": 1.1706337826211644, "grad_norm": 0.2919042706489563, "learning_rate": 7.64685232326855e-06, "loss": 0.3027, "step": 8164 }, { "epoch": 1.1707771723544593, "grad_norm": 0.3109036386013031, "learning_rate": 7.646144522280339e-06, "loss": 0.3106, "step": 8165 }, { "epoch": 1.1709205620877545, "grad_norm": 0.32698729634284973, "learning_rate": 7.645436647627536e-06, "loss": 0.3222, "step": 8166 }, { "epoch": 1.1710639518210497, "grad_norm": 0.2750118374824524, "learning_rate": 7.644728699329847e-06, "loss": 0.3125, "step": 8167 }, { "epoch": 1.1712073415543447, "grad_norm": 0.2941499352455139, "learning_rate": 7.644020677406976e-06, "loss": 0.3091, "step": 8168 }, { "epoch": 1.17135073128764, "grad_norm": 0.3194320499897003, "learning_rate": 7.643312581878639e-06, "loss": 0.3243, "step": 8169 }, { "epoch": 1.1714941210209349, "grad_norm": 0.3083933889865875, "learning_rate": 7.642604412764545e-06, "loss": 0.3308, "step": 8170 }, { "epoch": 1.17163751075423, "grad_norm": 0.27234598994255066, "learning_rate": 7.64189617008441e-06, "loss": 0.3008, "step": 8171 }, { "epoch": 1.171780900487525, "grad_norm": 0.31463178992271423, "learning_rate": 7.64118785385795e-06, "loss": 0.325, "step": 8172 }, { "epoch": 1.1719242902208202, "grad_norm": 0.30510109663009644, "learning_rate": 7.640479464104881e-06, "loss": 0.3041, "step": 8173 }, { "epoch": 1.1720676799541152, "grad_norm": 0.30364686250686646, "learning_rate": 7.639771000844929e-06, "loss": 0.3078, "step": 8174 }, { "epoch": 1.1722110696874104, "grad_norm": 0.2857247591018677, "learning_rate": 7.63906246409781e-06, "loss": 0.3102, "step": 8175 }, { "epoch": 1.1723544594207054, "grad_norm": 0.28092172741889954, "learning_rate": 7.638353853883253e-06, "loss": 0.3042, "step": 8176 }, { "epoch": 1.1724978491540006, "grad_norm": 0.2784959375858307, "learning_rate": 7.637645170220984e-06, "loss": 0.2939, "step": 8177 }, { "epoch": 1.1726412388872958, "grad_norm": 0.28965944051742554, "learning_rate": 7.636936413130728e-06, "loss": 0.3116, "step": 8178 }, { "epoch": 1.1727846286205907, "grad_norm": 0.27991214394569397, "learning_rate": 7.636227582632221e-06, "loss": 0.3289, "step": 8179 }, { "epoch": 1.172928018353886, "grad_norm": 0.2963997423648834, "learning_rate": 7.635518678745193e-06, "loss": 0.3142, "step": 8180 }, { "epoch": 1.173071408087181, "grad_norm": 0.27813661098480225, "learning_rate": 7.634809701489378e-06, "loss": 0.306, "step": 8181 }, { "epoch": 1.173214797820476, "grad_norm": 0.29381102323532104, "learning_rate": 7.634100650884517e-06, "loss": 0.3305, "step": 8182 }, { "epoch": 1.173358187553771, "grad_norm": 0.2907653748989105, "learning_rate": 7.633391526950343e-06, "loss": 0.3093, "step": 8183 }, { "epoch": 1.1735015772870663, "grad_norm": 0.29615843296051025, "learning_rate": 7.6326823297066e-06, "loss": 0.3174, "step": 8184 }, { "epoch": 1.1736449670203613, "grad_norm": 0.312502920627594, "learning_rate": 7.631973059173032e-06, "loss": 0.3242, "step": 8185 }, { "epoch": 1.1737883567536564, "grad_norm": 0.29383280873298645, "learning_rate": 7.631263715369382e-06, "loss": 0.3031, "step": 8186 }, { "epoch": 1.1739317464869514, "grad_norm": 0.2879014313220978, "learning_rate": 7.630554298315397e-06, "loss": 0.3261, "step": 8187 }, { "epoch": 1.1740751362202466, "grad_norm": 0.2906348407268524, "learning_rate": 7.62984480803083e-06, "loss": 0.3114, "step": 8188 }, { "epoch": 1.1742185259535418, "grad_norm": 0.2807072103023529, "learning_rate": 7.629135244535426e-06, "loss": 0.3038, "step": 8189 }, { "epoch": 1.1743619156868368, "grad_norm": 0.2989142835140228, "learning_rate": 7.628425607848941e-06, "loss": 0.2985, "step": 8190 }, { "epoch": 1.174505305420132, "grad_norm": 0.27056246995925903, "learning_rate": 7.627715897991133e-06, "loss": 0.3133, "step": 8191 }, { "epoch": 1.174648695153427, "grad_norm": 0.2941606938838959, "learning_rate": 7.627006114981754e-06, "loss": 0.3153, "step": 8192 }, { "epoch": 1.1747920848867222, "grad_norm": 0.3146434426307678, "learning_rate": 7.6262962588405675e-06, "loss": 0.3134, "step": 8193 }, { "epoch": 1.1749354746200171, "grad_norm": 0.29387202858924866, "learning_rate": 7.625586329587332e-06, "loss": 0.3265, "step": 8194 }, { "epoch": 1.1750788643533123, "grad_norm": 0.2932213544845581, "learning_rate": 7.6248763272418125e-06, "loss": 0.3165, "step": 8195 }, { "epoch": 1.1752222540866075, "grad_norm": 0.31336042284965515, "learning_rate": 7.6241662518237746e-06, "loss": 0.3384, "step": 8196 }, { "epoch": 1.1753656438199025, "grad_norm": 0.2929931879043579, "learning_rate": 7.6234561033529845e-06, "loss": 0.3219, "step": 8197 }, { "epoch": 1.1755090335531975, "grad_norm": 0.2910572290420532, "learning_rate": 7.622745881849212e-06, "loss": 0.3099, "step": 8198 }, { "epoch": 1.1756524232864927, "grad_norm": 0.2968103587627411, "learning_rate": 7.622035587332228e-06, "loss": 0.3014, "step": 8199 }, { "epoch": 1.1757958130197879, "grad_norm": 0.30284926295280457, "learning_rate": 7.621325219821807e-06, "loss": 0.3251, "step": 8200 }, { "epoch": 1.1759392027530828, "grad_norm": 0.292052298784256, "learning_rate": 7.620614779337725e-06, "loss": 0.3038, "step": 8201 }, { "epoch": 1.176082592486378, "grad_norm": 0.29027223587036133, "learning_rate": 7.6199042658997576e-06, "loss": 0.3188, "step": 8202 }, { "epoch": 1.176225982219673, "grad_norm": 0.29156267642974854, "learning_rate": 7.619193679527687e-06, "loss": 0.3127, "step": 8203 }, { "epoch": 1.1763693719529682, "grad_norm": 0.30371084809303284, "learning_rate": 7.618483020241293e-06, "loss": 0.3217, "step": 8204 }, { "epoch": 1.1765127616862632, "grad_norm": 0.2851085960865021, "learning_rate": 7.617772288060359e-06, "loss": 0.3174, "step": 8205 }, { "epoch": 1.1766561514195584, "grad_norm": 0.2936422824859619, "learning_rate": 7.617061483004673e-06, "loss": 0.3087, "step": 8206 }, { "epoch": 1.1767995411528536, "grad_norm": 0.29768240451812744, "learning_rate": 7.616350605094018e-06, "loss": 0.291, "step": 8207 }, { "epoch": 1.1769429308861485, "grad_norm": 0.30075880885124207, "learning_rate": 7.6156396543481895e-06, "loss": 0.3276, "step": 8208 }, { "epoch": 1.1770863206194437, "grad_norm": 0.3018268644809723, "learning_rate": 7.614928630786976e-06, "loss": 0.3181, "step": 8209 }, { "epoch": 1.1772297103527387, "grad_norm": 0.28714025020599365, "learning_rate": 7.614217534430172e-06, "loss": 0.3223, "step": 8210 }, { "epoch": 1.177373100086034, "grad_norm": 0.2865702509880066, "learning_rate": 7.613506365297573e-06, "loss": 0.3013, "step": 8211 }, { "epoch": 1.1775164898193289, "grad_norm": 0.3293500542640686, "learning_rate": 7.612795123408978e-06, "loss": 0.3265, "step": 8212 }, { "epoch": 1.177659879552624, "grad_norm": 0.28850483894348145, "learning_rate": 7.612083808784186e-06, "loss": 0.292, "step": 8213 }, { "epoch": 1.177803269285919, "grad_norm": 0.3080534040927887, "learning_rate": 7.611372421442996e-06, "loss": 0.3197, "step": 8214 }, { "epoch": 1.1779466590192142, "grad_norm": 0.30355942249298096, "learning_rate": 7.610660961405219e-06, "loss": 0.3143, "step": 8215 }, { "epoch": 1.1780900487525092, "grad_norm": 0.3199644386768341, "learning_rate": 7.609949428690653e-06, "loss": 0.2861, "step": 8216 }, { "epoch": 1.1782334384858044, "grad_norm": 0.30232715606689453, "learning_rate": 7.609237823319111e-06, "loss": 0.3024, "step": 8217 }, { "epoch": 1.1783768282190996, "grad_norm": 0.28928142786026, "learning_rate": 7.608526145310402e-06, "loss": 0.304, "step": 8218 }, { "epoch": 1.1785202179523946, "grad_norm": 0.29891911149024963, "learning_rate": 7.607814394684339e-06, "loss": 0.3293, "step": 8219 }, { "epoch": 1.1786636076856898, "grad_norm": 0.3338285982608795, "learning_rate": 7.607102571460734e-06, "loss": 0.3246, "step": 8220 }, { "epoch": 1.1788069974189848, "grad_norm": 0.2950473725795746, "learning_rate": 7.606390675659403e-06, "loss": 0.302, "step": 8221 }, { "epoch": 1.17895038715228, "grad_norm": 0.2887479066848755, "learning_rate": 7.605678707300166e-06, "loss": 0.3197, "step": 8222 }, { "epoch": 1.179093776885575, "grad_norm": 0.3076346516609192, "learning_rate": 7.604966666402842e-06, "loss": 0.3158, "step": 8223 }, { "epoch": 1.1792371666188701, "grad_norm": 0.28210166096687317, "learning_rate": 7.604254552987254e-06, "loss": 0.3099, "step": 8224 }, { "epoch": 1.179380556352165, "grad_norm": 0.31096869707107544, "learning_rate": 7.603542367073224e-06, "loss": 0.326, "step": 8225 }, { "epoch": 1.1795239460854603, "grad_norm": 0.29304826259613037, "learning_rate": 7.602830108680579e-06, "loss": 0.3077, "step": 8226 }, { "epoch": 1.1796673358187553, "grad_norm": 0.2893598973751068, "learning_rate": 7.602117777829148e-06, "loss": 0.3075, "step": 8227 }, { "epoch": 1.1798107255520505, "grad_norm": 0.29144227504730225, "learning_rate": 7.601405374538762e-06, "loss": 0.3003, "step": 8228 }, { "epoch": 1.1799541152853457, "grad_norm": 0.29102763533592224, "learning_rate": 7.600692898829251e-06, "loss": 0.3093, "step": 8229 }, { "epoch": 1.1800975050186406, "grad_norm": 0.3027946650981903, "learning_rate": 7.599980350720451e-06, "loss": 0.3147, "step": 8230 }, { "epoch": 1.1802408947519358, "grad_norm": 0.2994014322757721, "learning_rate": 7.599267730232198e-06, "loss": 0.3026, "step": 8231 }, { "epoch": 1.1803842844852308, "grad_norm": 0.31163179874420166, "learning_rate": 7.59855503738433e-06, "loss": 0.3124, "step": 8232 }, { "epoch": 1.180527674218526, "grad_norm": 0.28556060791015625, "learning_rate": 7.597842272196686e-06, "loss": 0.3191, "step": 8233 }, { "epoch": 1.180671063951821, "grad_norm": 0.32382822036743164, "learning_rate": 7.59712943468911e-06, "loss": 0.312, "step": 8234 }, { "epoch": 1.1808144536851162, "grad_norm": 0.3123756945133209, "learning_rate": 7.596416524881444e-06, "loss": 0.3218, "step": 8235 }, { "epoch": 1.1809578434184111, "grad_norm": 0.2722536623477936, "learning_rate": 7.595703542793538e-06, "loss": 0.3124, "step": 8236 }, { "epoch": 1.1811012331517063, "grad_norm": 0.2789573669433594, "learning_rate": 7.594990488445237e-06, "loss": 0.3019, "step": 8237 }, { "epoch": 1.1812446228850013, "grad_norm": 0.34511715173721313, "learning_rate": 7.5942773618563925e-06, "loss": 0.3107, "step": 8238 }, { "epoch": 1.1813880126182965, "grad_norm": 0.3331437408924103, "learning_rate": 7.593564163046858e-06, "loss": 0.3039, "step": 8239 }, { "epoch": 1.1815314023515917, "grad_norm": 0.29474887251853943, "learning_rate": 7.5928508920364876e-06, "loss": 0.2935, "step": 8240 }, { "epoch": 1.1816747920848867, "grad_norm": 0.2750208377838135, "learning_rate": 7.5921375488451355e-06, "loss": 0.3107, "step": 8241 }, { "epoch": 1.1818181818181819, "grad_norm": 0.3527318239212036, "learning_rate": 7.591424133492662e-06, "loss": 0.2989, "step": 8242 }, { "epoch": 1.1819615715514769, "grad_norm": 0.33532974123954773, "learning_rate": 7.590710645998927e-06, "loss": 0.3209, "step": 8243 }, { "epoch": 1.182104961284772, "grad_norm": 0.3124264180660248, "learning_rate": 7.589997086383792e-06, "loss": 0.3123, "step": 8244 }, { "epoch": 1.182248351018067, "grad_norm": 0.3460392951965332, "learning_rate": 7.589283454667122e-06, "loss": 0.3009, "step": 8245 }, { "epoch": 1.1823917407513622, "grad_norm": 0.28686264157295227, "learning_rate": 7.588569750868786e-06, "loss": 0.3113, "step": 8246 }, { "epoch": 1.1825351304846574, "grad_norm": 0.2858644425868988, "learning_rate": 7.587855975008649e-06, "loss": 0.3046, "step": 8247 }, { "epoch": 1.1826785202179524, "grad_norm": 0.2958238124847412, "learning_rate": 7.587142127106581e-06, "loss": 0.3106, "step": 8248 }, { "epoch": 1.1828219099512476, "grad_norm": 0.3239971995353699, "learning_rate": 7.586428207182458e-06, "loss": 0.309, "step": 8249 }, { "epoch": 1.1829652996845426, "grad_norm": 0.29112616181373596, "learning_rate": 7.5857142152561526e-06, "loss": 0.3194, "step": 8250 }, { "epoch": 1.1831086894178378, "grad_norm": 0.2974981367588043, "learning_rate": 7.585000151347539e-06, "loss": 0.3071, "step": 8251 }, { "epoch": 1.1832520791511327, "grad_norm": 0.30810993909835815, "learning_rate": 7.5842860154764985e-06, "loss": 0.3108, "step": 8252 }, { "epoch": 1.183395468884428, "grad_norm": 0.2894139289855957, "learning_rate": 7.5835718076629105e-06, "loss": 0.2966, "step": 8253 }, { "epoch": 1.183538858617723, "grad_norm": 0.2820358872413635, "learning_rate": 7.58285752792666e-06, "loss": 0.3173, "step": 8254 }, { "epoch": 1.183682248351018, "grad_norm": 0.29255425930023193, "learning_rate": 7.582143176287627e-06, "loss": 0.2946, "step": 8255 }, { "epoch": 1.183825638084313, "grad_norm": 0.32111111283302307, "learning_rate": 7.581428752765699e-06, "loss": 0.304, "step": 8256 }, { "epoch": 1.1839690278176083, "grad_norm": 0.2769210934638977, "learning_rate": 7.580714257380767e-06, "loss": 0.3078, "step": 8257 }, { "epoch": 1.1841124175509035, "grad_norm": 0.2975304424762726, "learning_rate": 7.579999690152721e-06, "loss": 0.3132, "step": 8258 }, { "epoch": 1.1842558072841984, "grad_norm": 0.3094504773616791, "learning_rate": 7.5792850511014505e-06, "loss": 0.312, "step": 8259 }, { "epoch": 1.1843991970174936, "grad_norm": 0.30862903594970703, "learning_rate": 7.5785703402468535e-06, "loss": 0.3079, "step": 8260 }, { "epoch": 1.1845425867507886, "grad_norm": 0.2936265468597412, "learning_rate": 7.577855557608823e-06, "loss": 0.3207, "step": 8261 }, { "epoch": 1.1846859764840838, "grad_norm": 0.300121933221817, "learning_rate": 7.5771407032072594e-06, "loss": 0.2973, "step": 8262 }, { "epoch": 1.1848293662173788, "grad_norm": 0.27872830629348755, "learning_rate": 7.5764257770620644e-06, "loss": 0.3158, "step": 8263 }, { "epoch": 1.184972755950674, "grad_norm": 0.2862505614757538, "learning_rate": 7.5757107791931375e-06, "loss": 0.2933, "step": 8264 }, { "epoch": 1.185116145683969, "grad_norm": 0.3601638078689575, "learning_rate": 7.5749957096203855e-06, "loss": 0.3123, "step": 8265 }, { "epoch": 1.1852595354172641, "grad_norm": 0.3041173219680786, "learning_rate": 7.5742805683637145e-06, "loss": 0.3097, "step": 8266 }, { "epoch": 1.1854029251505591, "grad_norm": 0.305512398481369, "learning_rate": 7.573565355443031e-06, "loss": 0.3215, "step": 8267 }, { "epoch": 1.1855463148838543, "grad_norm": 0.3029649555683136, "learning_rate": 7.572850070878248e-06, "loss": 0.3101, "step": 8268 }, { "epoch": 1.1856897046171495, "grad_norm": 0.32067081332206726, "learning_rate": 7.5721347146892775e-06, "loss": 0.288, "step": 8269 }, { "epoch": 1.1858330943504445, "grad_norm": 0.3062826097011566, "learning_rate": 7.571419286896032e-06, "loss": 0.2994, "step": 8270 }, { "epoch": 1.1859764840837397, "grad_norm": 0.30205392837524414, "learning_rate": 7.5707037875184296e-06, "loss": 0.3069, "step": 8271 }, { "epoch": 1.1861198738170347, "grad_norm": 0.3060254752635956, "learning_rate": 7.569988216576388e-06, "loss": 0.3167, "step": 8272 }, { "epoch": 1.1862632635503298, "grad_norm": 0.2732356786727905, "learning_rate": 7.569272574089827e-06, "loss": 0.3104, "step": 8273 }, { "epoch": 1.1864066532836248, "grad_norm": 0.3082301914691925, "learning_rate": 7.568556860078673e-06, "loss": 0.3113, "step": 8274 }, { "epoch": 1.18655004301692, "grad_norm": 0.31665289402008057, "learning_rate": 7.567841074562845e-06, "loss": 0.3094, "step": 8275 }, { "epoch": 1.186693432750215, "grad_norm": 0.26865440607070923, "learning_rate": 7.56712521756227e-06, "loss": 0.3155, "step": 8276 }, { "epoch": 1.1868368224835102, "grad_norm": 0.29873085021972656, "learning_rate": 7.566409289096881e-06, "loss": 0.3094, "step": 8277 }, { "epoch": 1.1869802122168052, "grad_norm": 0.30490538477897644, "learning_rate": 7.565693289186604e-06, "loss": 0.3174, "step": 8278 }, { "epoch": 1.1871236019501004, "grad_norm": 0.3111918866634369, "learning_rate": 7.5649772178513725e-06, "loss": 0.2955, "step": 8279 }, { "epoch": 1.1872669916833956, "grad_norm": 0.2870083749294281, "learning_rate": 7.56426107511112e-06, "loss": 0.3168, "step": 8280 }, { "epoch": 1.1874103814166905, "grad_norm": 0.274601548910141, "learning_rate": 7.563544860985784e-06, "loss": 0.2885, "step": 8281 }, { "epoch": 1.1875537711499857, "grad_norm": 0.30498459935188293, "learning_rate": 7.562828575495305e-06, "loss": 0.3246, "step": 8282 }, { "epoch": 1.1876971608832807, "grad_norm": 0.2895614802837372, "learning_rate": 7.5621122186596185e-06, "loss": 0.3079, "step": 8283 }, { "epoch": 1.187840550616576, "grad_norm": 0.29116323590278625, "learning_rate": 7.561395790498669e-06, "loss": 0.2986, "step": 8284 }, { "epoch": 1.1879839403498709, "grad_norm": 0.27553027868270874, "learning_rate": 7.560679291032402e-06, "loss": 0.3052, "step": 8285 }, { "epoch": 1.188127330083166, "grad_norm": 0.3115125298500061, "learning_rate": 7.559962720280762e-06, "loss": 0.3275, "step": 8286 }, { "epoch": 1.1882707198164613, "grad_norm": 0.3058798611164093, "learning_rate": 7.5592460782636975e-06, "loss": 0.2994, "step": 8287 }, { "epoch": 1.1884141095497562, "grad_norm": 0.29206469655036926, "learning_rate": 7.558529365001159e-06, "loss": 0.3243, "step": 8288 }, { "epoch": 1.1885574992830512, "grad_norm": 0.28619638085365295, "learning_rate": 7.5578125805131e-06, "loss": 0.3021, "step": 8289 }, { "epoch": 1.1887008890163464, "grad_norm": 0.30106979608535767, "learning_rate": 7.557095724819472e-06, "loss": 0.3272, "step": 8290 }, { "epoch": 1.1888442787496416, "grad_norm": 0.3452428877353668, "learning_rate": 7.556378797940232e-06, "loss": 0.3266, "step": 8291 }, { "epoch": 1.1889876684829366, "grad_norm": 0.3668346107006073, "learning_rate": 7.55566179989534e-06, "loss": 0.32, "step": 8292 }, { "epoch": 1.1891310582162318, "grad_norm": 0.28875380754470825, "learning_rate": 7.5549447307047565e-06, "loss": 0.3057, "step": 8293 }, { "epoch": 1.1892744479495267, "grad_norm": 0.28557661175727844, "learning_rate": 7.554227590388439e-06, "loss": 0.2888, "step": 8294 }, { "epoch": 1.189417837682822, "grad_norm": 0.3269161283969879, "learning_rate": 7.553510378966355e-06, "loss": 0.3064, "step": 8295 }, { "epoch": 1.189561227416117, "grad_norm": 0.325749933719635, "learning_rate": 7.552793096458472e-06, "loss": 0.3048, "step": 8296 }, { "epoch": 1.1897046171494121, "grad_norm": 0.27753928303718567, "learning_rate": 7.552075742884754e-06, "loss": 0.3228, "step": 8297 }, { "epoch": 1.1898480068827073, "grad_norm": 0.32716062664985657, "learning_rate": 7.551358318265174e-06, "loss": 0.3134, "step": 8298 }, { "epoch": 1.1899913966160023, "grad_norm": 0.33917003870010376, "learning_rate": 7.550640822619702e-06, "loss": 0.3373, "step": 8299 }, { "epoch": 1.1901347863492975, "grad_norm": 0.28438252210617065, "learning_rate": 7.549923255968314e-06, "loss": 0.2852, "step": 8300 }, { "epoch": 1.1902781760825925, "grad_norm": 0.31394681334495544, "learning_rate": 7.549205618330987e-06, "loss": 0.3207, "step": 8301 }, { "epoch": 1.1904215658158877, "grad_norm": 0.29541313648223877, "learning_rate": 7.548487909727695e-06, "loss": 0.3111, "step": 8302 }, { "epoch": 1.1905649555491826, "grad_norm": 0.3224651515483856, "learning_rate": 7.54777013017842e-06, "loss": 0.305, "step": 8303 }, { "epoch": 1.1907083452824778, "grad_norm": 0.33268266916275024, "learning_rate": 7.547052279703144e-06, "loss": 0.3141, "step": 8304 }, { "epoch": 1.1908517350157728, "grad_norm": 0.3079487979412079, "learning_rate": 7.546334358321851e-06, "loss": 0.3124, "step": 8305 }, { "epoch": 1.190995124749068, "grad_norm": 0.2856154143810272, "learning_rate": 7.5456163660545266e-06, "loss": 0.3122, "step": 8306 }, { "epoch": 1.191138514482363, "grad_norm": 0.3275162875652313, "learning_rate": 7.544898302921156e-06, "loss": 0.3126, "step": 8307 }, { "epoch": 1.1912819042156582, "grad_norm": 0.3430725336074829, "learning_rate": 7.5441801689417335e-06, "loss": 0.3178, "step": 8308 }, { "epoch": 1.1914252939489534, "grad_norm": 0.3071078062057495, "learning_rate": 7.543461964136249e-06, "loss": 0.323, "step": 8309 }, { "epoch": 1.1915686836822483, "grad_norm": 0.32401880621910095, "learning_rate": 7.542743688524696e-06, "loss": 0.3088, "step": 8310 }, { "epoch": 1.1917120734155435, "grad_norm": 0.30419066548347473, "learning_rate": 7.542025342127069e-06, "loss": 0.3285, "step": 8311 }, { "epoch": 1.1918554631488385, "grad_norm": 0.2760904133319855, "learning_rate": 7.54130692496337e-06, "loss": 0.296, "step": 8312 }, { "epoch": 1.1919988528821337, "grad_norm": 0.2980201840400696, "learning_rate": 7.540588437053592e-06, "loss": 0.3004, "step": 8313 }, { "epoch": 1.1921422426154287, "grad_norm": 0.3250885307788849, "learning_rate": 7.539869878417741e-06, "loss": 0.296, "step": 8314 }, { "epoch": 1.1922856323487239, "grad_norm": 0.3038807809352875, "learning_rate": 7.53915124907582e-06, "loss": 0.3165, "step": 8315 }, { "epoch": 1.1924290220820188, "grad_norm": 0.27946996688842773, "learning_rate": 7.5384325490478336e-06, "loss": 0.3278, "step": 8316 }, { "epoch": 1.192572411815314, "grad_norm": 0.3223406672477722, "learning_rate": 7.53771377835379e-06, "loss": 0.3345, "step": 8317 }, { "epoch": 1.192715801548609, "grad_norm": 0.275272011756897, "learning_rate": 7.5369949370137e-06, "loss": 0.3127, "step": 8318 }, { "epoch": 1.1928591912819042, "grad_norm": 0.30420660972595215, "learning_rate": 7.536276025047571e-06, "loss": 0.3164, "step": 8319 }, { "epoch": 1.1930025810151994, "grad_norm": 0.2887101173400879, "learning_rate": 7.535557042475421e-06, "loss": 0.316, "step": 8320 }, { "epoch": 1.1931459707484944, "grad_norm": 0.3051097095012665, "learning_rate": 7.534837989317263e-06, "loss": 0.3326, "step": 8321 }, { "epoch": 1.1932893604817896, "grad_norm": 0.29696717858314514, "learning_rate": 7.534118865593114e-06, "loss": 0.3065, "step": 8322 }, { "epoch": 1.1934327502150845, "grad_norm": 0.3251042366027832, "learning_rate": 7.5333996713229965e-06, "loss": 0.3228, "step": 8323 }, { "epoch": 1.1935761399483797, "grad_norm": 0.3062838912010193, "learning_rate": 7.532680406526927e-06, "loss": 0.3133, "step": 8324 }, { "epoch": 1.1937195296816747, "grad_norm": 0.3046051561832428, "learning_rate": 7.531961071224929e-06, "loss": 0.3412, "step": 8325 }, { "epoch": 1.19386291941497, "grad_norm": 0.29464682936668396, "learning_rate": 7.531241665437032e-06, "loss": 0.325, "step": 8326 }, { "epoch": 1.1940063091482649, "grad_norm": 0.282551646232605, "learning_rate": 7.53052218918326e-06, "loss": 0.3124, "step": 8327 }, { "epoch": 1.19414969888156, "grad_norm": 0.3046662211418152, "learning_rate": 7.529802642483646e-06, "loss": 0.3099, "step": 8328 }, { "epoch": 1.194293088614855, "grad_norm": 0.2839185297489166, "learning_rate": 7.529083025358215e-06, "loss": 0.31, "step": 8329 }, { "epoch": 1.1944364783481503, "grad_norm": 0.29043126106262207, "learning_rate": 7.528363337827002e-06, "loss": 0.3126, "step": 8330 }, { "epoch": 1.1945798680814455, "grad_norm": 0.27687302231788635, "learning_rate": 7.527643579910047e-06, "loss": 0.3003, "step": 8331 }, { "epoch": 1.1947232578147404, "grad_norm": 0.30544787645339966, "learning_rate": 7.52692375162738e-06, "loss": 0.3023, "step": 8332 }, { "epoch": 1.1948666475480356, "grad_norm": 0.3042101562023163, "learning_rate": 7.526203852999043e-06, "loss": 0.3211, "step": 8333 }, { "epoch": 1.1950100372813306, "grad_norm": 0.29184186458587646, "learning_rate": 7.525483884045077e-06, "loss": 0.3431, "step": 8334 }, { "epoch": 1.1951534270146258, "grad_norm": 0.28302448987960815, "learning_rate": 7.524763844785524e-06, "loss": 0.3083, "step": 8335 }, { "epoch": 1.1952968167479208, "grad_norm": 0.3072325587272644, "learning_rate": 7.524043735240431e-06, "loss": 0.329, "step": 8336 }, { "epoch": 1.195440206481216, "grad_norm": 0.26840078830718994, "learning_rate": 7.5233235554298426e-06, "loss": 0.2966, "step": 8337 }, { "epoch": 1.1955835962145112, "grad_norm": 0.29760774970054626, "learning_rate": 7.522603305373808e-06, "loss": 0.3087, "step": 8338 }, { "epoch": 1.1957269859478061, "grad_norm": 0.28081586956977844, "learning_rate": 7.521882985092377e-06, "loss": 0.312, "step": 8339 }, { "epoch": 1.1958703756811013, "grad_norm": 0.30610767006874084, "learning_rate": 7.5211625946056046e-06, "loss": 0.3329, "step": 8340 }, { "epoch": 1.1960137654143963, "grad_norm": 0.32191646099090576, "learning_rate": 7.520442133933543e-06, "loss": 0.3052, "step": 8341 }, { "epoch": 1.1961571551476915, "grad_norm": 0.3102070987224579, "learning_rate": 7.519721603096249e-06, "loss": 0.2968, "step": 8342 }, { "epoch": 1.1963005448809865, "grad_norm": 0.28287428617477417, "learning_rate": 7.5190010021137815e-06, "loss": 0.2957, "step": 8343 }, { "epoch": 1.1964439346142817, "grad_norm": 0.2701084613800049, "learning_rate": 7.518280331006202e-06, "loss": 0.2996, "step": 8344 }, { "epoch": 1.1965873243475766, "grad_norm": 0.3172524869441986, "learning_rate": 7.517559589793571e-06, "loss": 0.3159, "step": 8345 }, { "epoch": 1.1967307140808718, "grad_norm": 0.30435189604759216, "learning_rate": 7.516838778495954e-06, "loss": 0.2982, "step": 8346 }, { "epoch": 1.1968741038141668, "grad_norm": 0.29297029972076416, "learning_rate": 7.5161178971334195e-06, "loss": 0.3171, "step": 8347 }, { "epoch": 1.197017493547462, "grad_norm": 0.26183637976646423, "learning_rate": 7.51539694572603e-06, "loss": 0.3135, "step": 8348 }, { "epoch": 1.1971608832807572, "grad_norm": 0.28579798340797424, "learning_rate": 7.514675924293859e-06, "loss": 0.2944, "step": 8349 }, { "epoch": 1.1973042730140522, "grad_norm": 0.2977565824985504, "learning_rate": 7.513954832856981e-06, "loss": 0.3286, "step": 8350 }, { "epoch": 1.1974476627473474, "grad_norm": 0.3207738995552063, "learning_rate": 7.513233671435467e-06, "loss": 0.3156, "step": 8351 }, { "epoch": 1.1975910524806423, "grad_norm": 0.31211984157562256, "learning_rate": 7.512512440049392e-06, "loss": 0.2869, "step": 8352 }, { "epoch": 1.1977344422139375, "grad_norm": 0.27897176146507263, "learning_rate": 7.511791138718836e-06, "loss": 0.3069, "step": 8353 }, { "epoch": 1.1978778319472325, "grad_norm": 0.29156333208084106, "learning_rate": 7.5110697674638785e-06, "loss": 0.3056, "step": 8354 }, { "epoch": 1.1980212216805277, "grad_norm": 0.30980682373046875, "learning_rate": 7.510348326304604e-06, "loss": 0.2864, "step": 8355 }, { "epoch": 1.1981646114138227, "grad_norm": 0.31551313400268555, "learning_rate": 7.509626815261091e-06, "loss": 0.2977, "step": 8356 }, { "epoch": 1.1983080011471179, "grad_norm": 0.29996487498283386, "learning_rate": 7.508905234353428e-06, "loss": 0.3281, "step": 8357 }, { "epoch": 1.1984513908804129, "grad_norm": 0.30237430334091187, "learning_rate": 7.508183583601705e-06, "loss": 0.2858, "step": 8358 }, { "epoch": 1.198594780613708, "grad_norm": 0.3363052010536194, "learning_rate": 7.507461863026007e-06, "loss": 0.3358, "step": 8359 }, { "epoch": 1.1987381703470033, "grad_norm": 0.31068122386932373, "learning_rate": 7.50674007264643e-06, "loss": 0.3162, "step": 8360 }, { "epoch": 1.1988815600802982, "grad_norm": 0.3042331635951996, "learning_rate": 7.506018212483064e-06, "loss": 0.3133, "step": 8361 }, { "epoch": 1.1990249498135934, "grad_norm": 0.2816530764102936, "learning_rate": 7.505296282556006e-06, "loss": 0.3365, "step": 8362 }, { "epoch": 1.1991683395468884, "grad_norm": 0.29861411452293396, "learning_rate": 7.504574282885353e-06, "loss": 0.314, "step": 8363 }, { "epoch": 1.1993117292801836, "grad_norm": 0.2708797752857208, "learning_rate": 7.503852213491205e-06, "loss": 0.3251, "step": 8364 }, { "epoch": 1.1994551190134786, "grad_norm": 0.2940783202648163, "learning_rate": 7.503130074393663e-06, "loss": 0.3035, "step": 8365 }, { "epoch": 1.1995985087467738, "grad_norm": 0.3138585686683655, "learning_rate": 7.5024078656128305e-06, "loss": 0.3097, "step": 8366 }, { "epoch": 1.1997418984800687, "grad_norm": 0.29111000895500183, "learning_rate": 7.501685587168813e-06, "loss": 0.3105, "step": 8367 }, { "epoch": 1.199885288213364, "grad_norm": 0.3084661364555359, "learning_rate": 7.5009632390817175e-06, "loss": 0.3314, "step": 8368 }, { "epoch": 1.200028677946659, "grad_norm": 0.30952396988868713, "learning_rate": 7.500240821371651e-06, "loss": 0.3134, "step": 8369 }, { "epoch": 1.200172067679954, "grad_norm": 0.3227722644805908, "learning_rate": 7.499518334058727e-06, "loss": 0.3178, "step": 8370 }, { "epoch": 1.2003154574132493, "grad_norm": 0.2752142548561096, "learning_rate": 7.498795777163057e-06, "loss": 0.296, "step": 8371 }, { "epoch": 1.2004588471465443, "grad_norm": 0.31915876269340515, "learning_rate": 7.4980731507047565e-06, "loss": 0.3121, "step": 8372 }, { "epoch": 1.2006022368798395, "grad_norm": 0.2952791154384613, "learning_rate": 7.4973504547039425e-06, "loss": 0.3, "step": 8373 }, { "epoch": 1.2007456266131344, "grad_norm": 0.2985933721065521, "learning_rate": 7.496627689180734e-06, "loss": 0.3077, "step": 8374 }, { "epoch": 1.2008890163464296, "grad_norm": 0.2959568500518799, "learning_rate": 7.495904854155251e-06, "loss": 0.3135, "step": 8375 }, { "epoch": 1.2010324060797246, "grad_norm": 0.28196755051612854, "learning_rate": 7.4951819496476154e-06, "loss": 0.3131, "step": 8376 }, { "epoch": 1.2011757958130198, "grad_norm": 0.29013654589653015, "learning_rate": 7.494458975677954e-06, "loss": 0.3165, "step": 8377 }, { "epoch": 1.201319185546315, "grad_norm": 0.2928875684738159, "learning_rate": 7.49373593226639e-06, "loss": 0.306, "step": 8378 }, { "epoch": 1.20146257527961, "grad_norm": 0.2966468930244446, "learning_rate": 7.493012819433057e-06, "loss": 0.3037, "step": 8379 }, { "epoch": 1.201605965012905, "grad_norm": 0.3000963032245636, "learning_rate": 7.492289637198078e-06, "loss": 0.3085, "step": 8380 }, { "epoch": 1.2017493547462001, "grad_norm": 0.2784774899482727, "learning_rate": 7.491566385581591e-06, "loss": 0.3089, "step": 8381 }, { "epoch": 1.2018927444794953, "grad_norm": 0.2877010107040405, "learning_rate": 7.49084306460373e-06, "loss": 0.3055, "step": 8382 }, { "epoch": 1.2020361342127903, "grad_norm": 0.30198749899864197, "learning_rate": 7.490119674284628e-06, "loss": 0.3008, "step": 8383 }, { "epoch": 1.2021795239460855, "grad_norm": 0.27668464183807373, "learning_rate": 7.489396214644425e-06, "loss": 0.3323, "step": 8384 }, { "epoch": 1.2023229136793805, "grad_norm": 0.29745933413505554, "learning_rate": 7.488672685703262e-06, "loss": 0.3119, "step": 8385 }, { "epoch": 1.2024663034126757, "grad_norm": 0.271644651889801, "learning_rate": 7.487949087481279e-06, "loss": 0.306, "step": 8386 }, { "epoch": 1.2026096931459707, "grad_norm": 0.300959974527359, "learning_rate": 7.4872254199986206e-06, "loss": 0.3143, "step": 8387 }, { "epoch": 1.2027530828792659, "grad_norm": 0.2655596435070038, "learning_rate": 7.486501683275432e-06, "loss": 0.28, "step": 8388 }, { "epoch": 1.202896472612561, "grad_norm": 0.31356456875801086, "learning_rate": 7.485777877331862e-06, "loss": 0.3346, "step": 8389 }, { "epoch": 1.203039862345856, "grad_norm": 0.2889194190502167, "learning_rate": 7.4850540021880595e-06, "loss": 0.3183, "step": 8390 }, { "epoch": 1.2031832520791512, "grad_norm": 0.2788618206977844, "learning_rate": 7.484330057864177e-06, "loss": 0.3147, "step": 8391 }, { "epoch": 1.2033266418124462, "grad_norm": 0.30196094512939453, "learning_rate": 7.483606044380366e-06, "loss": 0.3237, "step": 8392 }, { "epoch": 1.2034700315457414, "grad_norm": 0.2852090299129486, "learning_rate": 7.482881961756785e-06, "loss": 0.3029, "step": 8393 }, { "epoch": 1.2036134212790364, "grad_norm": 0.30000680685043335, "learning_rate": 7.482157810013588e-06, "loss": 0.3065, "step": 8394 }, { "epoch": 1.2037568110123316, "grad_norm": 0.30047327280044556, "learning_rate": 7.481433589170937e-06, "loss": 0.3362, "step": 8395 }, { "epoch": 1.2039002007456265, "grad_norm": 0.2797107696533203, "learning_rate": 7.4807092992489914e-06, "loss": 0.311, "step": 8396 }, { "epoch": 1.2040435904789217, "grad_norm": 0.29499679803848267, "learning_rate": 7.479984940267914e-06, "loss": 0.314, "step": 8397 }, { "epoch": 1.2041869802122167, "grad_norm": 0.3220309615135193, "learning_rate": 7.479260512247872e-06, "loss": 0.3062, "step": 8398 }, { "epoch": 1.204330369945512, "grad_norm": 0.29477086663246155, "learning_rate": 7.478536015209031e-06, "loss": 0.3095, "step": 8399 }, { "epoch": 1.204473759678807, "grad_norm": 0.2825600206851959, "learning_rate": 7.477811449171561e-06, "loss": 0.3002, "step": 8400 }, { "epoch": 1.204617149412102, "grad_norm": 0.2733194828033447, "learning_rate": 7.477086814155631e-06, "loss": 0.3, "step": 8401 }, { "epoch": 1.2047605391453973, "grad_norm": 0.34079569578170776, "learning_rate": 7.476362110181414e-06, "loss": 0.319, "step": 8402 }, { "epoch": 1.2049039288786922, "grad_norm": 0.30388590693473816, "learning_rate": 7.475637337269085e-06, "loss": 0.3152, "step": 8403 }, { "epoch": 1.2050473186119874, "grad_norm": 0.2981891930103302, "learning_rate": 7.474912495438822e-06, "loss": 0.2958, "step": 8404 }, { "epoch": 1.2051907083452824, "grad_norm": 0.2986995279788971, "learning_rate": 7.4741875847108015e-06, "loss": 0.3086, "step": 8405 }, { "epoch": 1.2053340980785776, "grad_norm": 0.277882844209671, "learning_rate": 7.473462605105206e-06, "loss": 0.311, "step": 8406 }, { "epoch": 1.2054774878118726, "grad_norm": 0.26871225237846375, "learning_rate": 7.472737556642215e-06, "loss": 0.3, "step": 8407 }, { "epoch": 1.2056208775451678, "grad_norm": 0.2979830205440521, "learning_rate": 7.472012439342014e-06, "loss": 0.3274, "step": 8408 }, { "epoch": 1.2057642672784628, "grad_norm": 0.3031022548675537, "learning_rate": 7.47128725322479e-06, "loss": 0.3256, "step": 8409 }, { "epoch": 1.205907657011758, "grad_norm": 0.2994481325149536, "learning_rate": 7.47056199831073e-06, "loss": 0.2986, "step": 8410 }, { "epoch": 1.2060510467450531, "grad_norm": 0.3114915192127228, "learning_rate": 7.469836674620025e-06, "loss": 0.3122, "step": 8411 }, { "epoch": 1.2061944364783481, "grad_norm": 0.29802024364471436, "learning_rate": 7.469111282172866e-06, "loss": 0.3286, "step": 8412 }, { "epoch": 1.2063378262116433, "grad_norm": 0.2902659475803375, "learning_rate": 7.4683858209894465e-06, "loss": 0.3053, "step": 8413 }, { "epoch": 1.2064812159449383, "grad_norm": 0.30325645208358765, "learning_rate": 7.467660291089964e-06, "loss": 0.3106, "step": 8414 }, { "epoch": 1.2066246056782335, "grad_norm": 0.31111961603164673, "learning_rate": 7.466934692494615e-06, "loss": 0.3062, "step": 8415 }, { "epoch": 1.2067679954115285, "grad_norm": 0.28535598516464233, "learning_rate": 7.466209025223598e-06, "loss": 0.3072, "step": 8416 }, { "epoch": 1.2069113851448237, "grad_norm": 0.28339412808418274, "learning_rate": 7.465483289297115e-06, "loss": 0.2936, "step": 8417 }, { "epoch": 1.2070547748781186, "grad_norm": 0.3016000986099243, "learning_rate": 7.464757484735371e-06, "loss": 0.289, "step": 8418 }, { "epoch": 1.2071981646114138, "grad_norm": 0.3106714189052582, "learning_rate": 7.4640316115585696e-06, "loss": 0.317, "step": 8419 }, { "epoch": 1.2073415543447088, "grad_norm": 0.3151583969593048, "learning_rate": 7.46330566978692e-06, "loss": 0.3175, "step": 8420 }, { "epoch": 1.207484944078004, "grad_norm": 0.30875301361083984, "learning_rate": 7.462579659440628e-06, "loss": 0.3007, "step": 8421 }, { "epoch": 1.2076283338112992, "grad_norm": 0.3075692355632782, "learning_rate": 7.461853580539908e-06, "loss": 0.2992, "step": 8422 }, { "epoch": 1.2077717235445942, "grad_norm": 0.30799400806427, "learning_rate": 7.46112743310497e-06, "loss": 0.3133, "step": 8423 }, { "epoch": 1.2079151132778894, "grad_norm": 0.3193845748901367, "learning_rate": 7.4604012171560305e-06, "loss": 0.3057, "step": 8424 }, { "epoch": 1.2080585030111843, "grad_norm": 0.28373780846595764, "learning_rate": 7.459674932713306e-06, "loss": 0.3036, "step": 8425 }, { "epoch": 1.2082018927444795, "grad_norm": 0.28229793906211853, "learning_rate": 7.458948579797014e-06, "loss": 0.3284, "step": 8426 }, { "epoch": 1.2083452824777745, "grad_norm": 0.31204620003700256, "learning_rate": 7.458222158427377e-06, "loss": 0.3027, "step": 8427 }, { "epoch": 1.2084886722110697, "grad_norm": 0.3274187445640564, "learning_rate": 7.457495668624618e-06, "loss": 0.3051, "step": 8428 }, { "epoch": 1.208632061944365, "grad_norm": 0.29478710889816284, "learning_rate": 7.456769110408959e-06, "loss": 0.3017, "step": 8429 }, { "epoch": 1.2087754516776599, "grad_norm": 0.30537787079811096, "learning_rate": 7.456042483800626e-06, "loss": 0.3316, "step": 8430 }, { "epoch": 1.208918841410955, "grad_norm": 0.30194732546806335, "learning_rate": 7.4553157888198505e-06, "loss": 0.3198, "step": 8431 }, { "epoch": 1.20906223114425, "grad_norm": 0.3006335198879242, "learning_rate": 7.454589025486858e-06, "loss": 0.3092, "step": 8432 }, { "epoch": 1.2092056208775452, "grad_norm": 0.2992352843284607, "learning_rate": 7.453862193821886e-06, "loss": 0.3102, "step": 8433 }, { "epoch": 1.2093490106108402, "grad_norm": 0.3011403977870941, "learning_rate": 7.453135293845162e-06, "loss": 0.31, "step": 8434 }, { "epoch": 1.2094924003441354, "grad_norm": 0.3162034749984741, "learning_rate": 7.4524083255769264e-06, "loss": 0.332, "step": 8435 }, { "epoch": 1.2096357900774304, "grad_norm": 0.30093351006507874, "learning_rate": 7.4516812890374155e-06, "loss": 0.2926, "step": 8436 }, { "epoch": 1.2097791798107256, "grad_norm": 0.30124160647392273, "learning_rate": 7.450954184246868e-06, "loss": 0.3177, "step": 8437 }, { "epoch": 1.2099225695440206, "grad_norm": 0.30745869874954224, "learning_rate": 7.450227011225527e-06, "loss": 0.3142, "step": 8438 }, { "epoch": 1.2100659592773158, "grad_norm": 0.29303205013275146, "learning_rate": 7.4494997699936365e-06, "loss": 0.3076, "step": 8439 }, { "epoch": 1.210209349010611, "grad_norm": 0.298150897026062, "learning_rate": 7.448772460571438e-06, "loss": 0.3011, "step": 8440 }, { "epoch": 1.210352738743906, "grad_norm": 0.3021014928817749, "learning_rate": 7.448045082979184e-06, "loss": 0.3092, "step": 8441 }, { "epoch": 1.2104961284772011, "grad_norm": 0.3056190013885498, "learning_rate": 7.4473176372371194e-06, "loss": 0.3325, "step": 8442 }, { "epoch": 1.210639518210496, "grad_norm": 0.3164081275463104, "learning_rate": 7.446590123365495e-06, "loss": 0.3338, "step": 8443 }, { "epoch": 1.2107829079437913, "grad_norm": 0.2992764711380005, "learning_rate": 7.445862541384566e-06, "loss": 0.313, "step": 8444 }, { "epoch": 1.2109262976770863, "grad_norm": 0.29909658432006836, "learning_rate": 7.445134891314587e-06, "loss": 0.2931, "step": 8445 }, { "epoch": 1.2110696874103815, "grad_norm": 0.3002045750617981, "learning_rate": 7.444407173175813e-06, "loss": 0.3127, "step": 8446 }, { "epoch": 1.2112130771436764, "grad_norm": 0.2722858488559723, "learning_rate": 7.443679386988505e-06, "loss": 0.2969, "step": 8447 }, { "epoch": 1.2113564668769716, "grad_norm": 0.30379313230514526, "learning_rate": 7.442951532772921e-06, "loss": 0.3091, "step": 8448 }, { "epoch": 1.2114998566102666, "grad_norm": 0.29506629705429077, "learning_rate": 7.442223610549324e-06, "loss": 0.3337, "step": 8449 }, { "epoch": 1.2116432463435618, "grad_norm": 0.2968917191028595, "learning_rate": 7.441495620337979e-06, "loss": 0.2991, "step": 8450 }, { "epoch": 1.211786636076857, "grad_norm": 0.2869378626346588, "learning_rate": 7.440767562159151e-06, "loss": 0.3341, "step": 8451 }, { "epoch": 1.211930025810152, "grad_norm": 0.2925001084804535, "learning_rate": 7.440039436033111e-06, "loss": 0.3039, "step": 8452 }, { "epoch": 1.2120734155434472, "grad_norm": 0.29711809754371643, "learning_rate": 7.439311241980123e-06, "loss": 0.3248, "step": 8453 }, { "epoch": 1.2122168052767421, "grad_norm": 0.31459110975265503, "learning_rate": 7.438582980020463e-06, "loss": 0.3201, "step": 8454 }, { "epoch": 1.2123601950100373, "grad_norm": 0.29507267475128174, "learning_rate": 7.437854650174407e-06, "loss": 0.3223, "step": 8455 }, { "epoch": 1.2125035847433323, "grad_norm": 0.273526668548584, "learning_rate": 7.437126252462225e-06, "loss": 0.325, "step": 8456 }, { "epoch": 1.2126469744766275, "grad_norm": 0.33015090227127075, "learning_rate": 7.436397786904197e-06, "loss": 0.3078, "step": 8457 }, { "epoch": 1.2127903642099225, "grad_norm": 0.2931666374206543, "learning_rate": 7.435669253520605e-06, "loss": 0.2817, "step": 8458 }, { "epoch": 1.2129337539432177, "grad_norm": 0.2838224768638611, "learning_rate": 7.4349406523317255e-06, "loss": 0.3128, "step": 8459 }, { "epoch": 1.2130771436765126, "grad_norm": 0.28755906224250793, "learning_rate": 7.434211983357845e-06, "loss": 0.319, "step": 8460 }, { "epoch": 1.2132205334098078, "grad_norm": 0.328275203704834, "learning_rate": 7.433483246619248e-06, "loss": 0.2893, "step": 8461 }, { "epoch": 1.213363923143103, "grad_norm": 0.306510865688324, "learning_rate": 7.432754442136219e-06, "loss": 0.3235, "step": 8462 }, { "epoch": 1.213507312876398, "grad_norm": 0.3150993585586548, "learning_rate": 7.432025569929049e-06, "loss": 0.3147, "step": 8463 }, { "epoch": 1.2136507026096932, "grad_norm": 0.29186227917671204, "learning_rate": 7.431296630018029e-06, "loss": 0.3379, "step": 8464 }, { "epoch": 1.2137940923429882, "grad_norm": 0.3333527147769928, "learning_rate": 7.4305676224234505e-06, "loss": 0.3026, "step": 8465 }, { "epoch": 1.2139374820762834, "grad_norm": 0.348245769739151, "learning_rate": 7.429838547165609e-06, "loss": 0.2976, "step": 8466 }, { "epoch": 1.2140808718095784, "grad_norm": 0.319346159696579, "learning_rate": 7.429109404264798e-06, "loss": 0.2991, "step": 8467 }, { "epoch": 1.2142242615428736, "grad_norm": 0.2863805592060089, "learning_rate": 7.42838019374132e-06, "loss": 0.3299, "step": 8468 }, { "epoch": 1.2143676512761687, "grad_norm": 0.3176558017730713, "learning_rate": 7.427650915615473e-06, "loss": 0.3063, "step": 8469 }, { "epoch": 1.2145110410094637, "grad_norm": 0.29634156823158264, "learning_rate": 7.426921569907558e-06, "loss": 0.2893, "step": 8470 }, { "epoch": 1.2146544307427587, "grad_norm": 0.30555927753448486, "learning_rate": 7.426192156637879e-06, "loss": 0.3331, "step": 8471 }, { "epoch": 1.214797820476054, "grad_norm": 0.2900458574295044, "learning_rate": 7.4254626758267445e-06, "loss": 0.3192, "step": 8472 }, { "epoch": 1.214941210209349, "grad_norm": 0.3022707402706146, "learning_rate": 7.424733127494459e-06, "loss": 0.3053, "step": 8473 }, { "epoch": 1.215084599942644, "grad_norm": 0.2918190360069275, "learning_rate": 7.424003511661333e-06, "loss": 0.3129, "step": 8474 }, { "epoch": 1.2152279896759393, "grad_norm": 0.30141234397888184, "learning_rate": 7.423273828347679e-06, "loss": 0.3186, "step": 8475 }, { "epoch": 1.2153713794092342, "grad_norm": 0.2866091728210449, "learning_rate": 7.4225440775738074e-06, "loss": 0.3016, "step": 8476 }, { "epoch": 1.2155147691425294, "grad_norm": 0.2959143817424774, "learning_rate": 7.421814259360038e-06, "loss": 0.2858, "step": 8477 }, { "epoch": 1.2156581588758244, "grad_norm": 0.30432114005088806, "learning_rate": 7.421084373726684e-06, "loss": 0.2999, "step": 8478 }, { "epoch": 1.2158015486091196, "grad_norm": 0.30964991450309753, "learning_rate": 7.420354420694066e-06, "loss": 0.3168, "step": 8479 }, { "epoch": 1.2159449383424148, "grad_norm": 0.3075701594352722, "learning_rate": 7.419624400282502e-06, "loss": 0.3043, "step": 8480 }, { "epoch": 1.2160883280757098, "grad_norm": 0.3028331398963928, "learning_rate": 7.418894312512317e-06, "loss": 0.3137, "step": 8481 }, { "epoch": 1.216231717809005, "grad_norm": 0.27242034673690796, "learning_rate": 7.418164157403838e-06, "loss": 0.3015, "step": 8482 }, { "epoch": 1.2163751075423, "grad_norm": 0.30368292331695557, "learning_rate": 7.417433934977386e-06, "loss": 0.3105, "step": 8483 }, { "epoch": 1.2165184972755951, "grad_norm": 0.31878188252449036, "learning_rate": 7.4167036452532926e-06, "loss": 0.3131, "step": 8484 }, { "epoch": 1.21666188700889, "grad_norm": 0.28887704014778137, "learning_rate": 7.4159732882518884e-06, "loss": 0.3374, "step": 8485 }, { "epoch": 1.2168052767421853, "grad_norm": 0.27592113614082336, "learning_rate": 7.4152428639935035e-06, "loss": 0.3049, "step": 8486 }, { "epoch": 1.2169486664754803, "grad_norm": 0.3283839821815491, "learning_rate": 7.414512372498473e-06, "loss": 0.3147, "step": 8487 }, { "epoch": 1.2170920562087755, "grad_norm": 0.32303476333618164, "learning_rate": 7.413781813787132e-06, "loss": 0.3143, "step": 8488 }, { "epoch": 1.2172354459420704, "grad_norm": 0.27172982692718506, "learning_rate": 7.4130511878798185e-06, "loss": 0.3079, "step": 8489 }, { "epoch": 1.2173788356753656, "grad_norm": 0.3004227578639984, "learning_rate": 7.412320494796871e-06, "loss": 0.3003, "step": 8490 }, { "epoch": 1.2175222254086608, "grad_norm": 0.31711044907569885, "learning_rate": 7.4115897345586315e-06, "loss": 0.3223, "step": 8491 }, { "epoch": 1.2176656151419558, "grad_norm": 0.2748687267303467, "learning_rate": 7.410858907185445e-06, "loss": 0.3109, "step": 8492 }, { "epoch": 1.217809004875251, "grad_norm": 0.2724786400794983, "learning_rate": 7.410128012697655e-06, "loss": 0.3032, "step": 8493 }, { "epoch": 1.217952394608546, "grad_norm": 0.3075132369995117, "learning_rate": 7.4093970511156074e-06, "loss": 0.3223, "step": 8494 }, { "epoch": 1.2180957843418412, "grad_norm": 0.28725942969322205, "learning_rate": 7.408666022459654e-06, "loss": 0.3149, "step": 8495 }, { "epoch": 1.2182391740751362, "grad_norm": 0.30169668793678284, "learning_rate": 7.407934926750143e-06, "loss": 0.3085, "step": 8496 }, { "epoch": 1.2183825638084314, "grad_norm": 0.28664445877075195, "learning_rate": 7.4072037640074265e-06, "loss": 0.3434, "step": 8497 }, { "epoch": 1.2185259535417263, "grad_norm": 0.3069029748439789, "learning_rate": 7.406472534251862e-06, "loss": 0.3061, "step": 8498 }, { "epoch": 1.2186693432750215, "grad_norm": 0.28981995582580566, "learning_rate": 7.405741237503803e-06, "loss": 0.2949, "step": 8499 }, { "epoch": 1.2188127330083165, "grad_norm": 0.30638036131858826, "learning_rate": 7.405009873783609e-06, "loss": 0.3037, "step": 8500 }, { "epoch": 1.2189561227416117, "grad_norm": 0.26814791560173035, "learning_rate": 7.40427844311164e-06, "loss": 0.3003, "step": 8501 }, { "epoch": 1.2190995124749069, "grad_norm": 0.2800411581993103, "learning_rate": 7.403546945508257e-06, "loss": 0.3082, "step": 8502 }, { "epoch": 1.2192429022082019, "grad_norm": 0.3047848343849182, "learning_rate": 7.402815380993824e-06, "loss": 0.3201, "step": 8503 }, { "epoch": 1.219386291941497, "grad_norm": 0.3017423748970032, "learning_rate": 7.402083749588709e-06, "loss": 0.3048, "step": 8504 }, { "epoch": 1.219529681674792, "grad_norm": 0.3044057786464691, "learning_rate": 7.401352051313275e-06, "loss": 0.2987, "step": 8505 }, { "epoch": 1.2196730714080872, "grad_norm": 0.30649417638778687, "learning_rate": 7.4006202861878965e-06, "loss": 0.3117, "step": 8506 }, { "epoch": 1.2198164611413822, "grad_norm": 0.30514103174209595, "learning_rate": 7.39988845423294e-06, "loss": 0.2931, "step": 8507 }, { "epoch": 1.2199598508746774, "grad_norm": 0.29371780157089233, "learning_rate": 7.3991565554687805e-06, "loss": 0.3001, "step": 8508 }, { "epoch": 1.2201032406079724, "grad_norm": 0.301944375038147, "learning_rate": 7.398424589915794e-06, "loss": 0.313, "step": 8509 }, { "epoch": 1.2202466303412676, "grad_norm": 0.3021430969238281, "learning_rate": 7.397692557594356e-06, "loss": 0.3055, "step": 8510 }, { "epoch": 1.2203900200745625, "grad_norm": 0.2890189588069916, "learning_rate": 7.396960458524845e-06, "loss": 0.2928, "step": 8511 }, { "epoch": 1.2205334098078577, "grad_norm": 0.3354071080684662, "learning_rate": 7.396228292727644e-06, "loss": 0.3127, "step": 8512 }, { "epoch": 1.220676799541153, "grad_norm": 0.3079644739627838, "learning_rate": 7.395496060223132e-06, "loss": 0.314, "step": 8513 }, { "epoch": 1.220820189274448, "grad_norm": 0.29837915301322937, "learning_rate": 7.394763761031696e-06, "loss": 0.3085, "step": 8514 }, { "epoch": 1.220963579007743, "grad_norm": 0.27450135350227356, "learning_rate": 7.394031395173718e-06, "loss": 0.2973, "step": 8515 }, { "epoch": 1.221106968741038, "grad_norm": 0.2904146909713745, "learning_rate": 7.393298962669591e-06, "loss": 0.3067, "step": 8516 }, { "epoch": 1.2212503584743333, "grad_norm": 0.28134506940841675, "learning_rate": 7.392566463539701e-06, "loss": 0.3151, "step": 8517 }, { "epoch": 1.2213937482076282, "grad_norm": 0.2735748589038849, "learning_rate": 7.391833897804441e-06, "loss": 0.2979, "step": 8518 }, { "epoch": 1.2215371379409234, "grad_norm": 0.2817707657814026, "learning_rate": 7.3911012654842065e-06, "loss": 0.3067, "step": 8519 }, { "epoch": 1.2216805276742186, "grad_norm": 0.2837630808353424, "learning_rate": 7.390368566599391e-06, "loss": 0.3098, "step": 8520 }, { "epoch": 1.2218239174075136, "grad_norm": 0.29354915022850037, "learning_rate": 7.389635801170389e-06, "loss": 0.3105, "step": 8521 }, { "epoch": 1.2219673071408088, "grad_norm": 0.30183473229408264, "learning_rate": 7.388902969217605e-06, "loss": 0.3273, "step": 8522 }, { "epoch": 1.2221106968741038, "grad_norm": 0.2878810167312622, "learning_rate": 7.388170070761436e-06, "loss": 0.3077, "step": 8523 }, { "epoch": 1.222254086607399, "grad_norm": 0.31804072856903076, "learning_rate": 7.387437105822286e-06, "loss": 0.2939, "step": 8524 }, { "epoch": 1.222397476340694, "grad_norm": 0.3021048307418823, "learning_rate": 7.386704074420559e-06, "loss": 0.3103, "step": 8525 }, { "epoch": 1.2225408660739892, "grad_norm": 0.30757543444633484, "learning_rate": 7.3859709765766616e-06, "loss": 0.3134, "step": 8526 }, { "epoch": 1.2226842558072841, "grad_norm": 0.30699557065963745, "learning_rate": 7.385237812311004e-06, "loss": 0.3191, "step": 8527 }, { "epoch": 1.2228276455405793, "grad_norm": 0.279002845287323, "learning_rate": 7.384504581643994e-06, "loss": 0.3308, "step": 8528 }, { "epoch": 1.2229710352738743, "grad_norm": 0.29057180881500244, "learning_rate": 7.383771284596045e-06, "loss": 0.3277, "step": 8529 }, { "epoch": 1.2231144250071695, "grad_norm": 0.2836024761199951, "learning_rate": 7.3830379211875705e-06, "loss": 0.2974, "step": 8530 }, { "epoch": 1.2232578147404647, "grad_norm": 0.2759989798069, "learning_rate": 7.382304491438987e-06, "loss": 0.3125, "step": 8531 }, { "epoch": 1.2234012044737597, "grad_norm": 0.293340265750885, "learning_rate": 7.381570995370708e-06, "loss": 0.297, "step": 8532 }, { "epoch": 1.2235445942070549, "grad_norm": 0.2594553828239441, "learning_rate": 7.38083743300316e-06, "loss": 0.3082, "step": 8533 }, { "epoch": 1.2236879839403498, "grad_norm": 0.2932437062263489, "learning_rate": 7.380103804356757e-06, "loss": 0.3101, "step": 8534 }, { "epoch": 1.223831373673645, "grad_norm": 0.2919692099094391, "learning_rate": 7.379370109451927e-06, "loss": 0.3172, "step": 8535 }, { "epoch": 1.22397476340694, "grad_norm": 0.3202299475669861, "learning_rate": 7.378636348309092e-06, "loss": 0.3027, "step": 8536 }, { "epoch": 1.2241181531402352, "grad_norm": 0.2966419756412506, "learning_rate": 7.377902520948681e-06, "loss": 0.3177, "step": 8537 }, { "epoch": 1.2242615428735302, "grad_norm": 0.3074162006378174, "learning_rate": 7.377168627391122e-06, "loss": 0.3213, "step": 8538 }, { "epoch": 1.2244049326068254, "grad_norm": 0.2906610667705536, "learning_rate": 7.376434667656844e-06, "loss": 0.3078, "step": 8539 }, { "epoch": 1.2245483223401203, "grad_norm": 0.333021879196167, "learning_rate": 7.3757006417662815e-06, "loss": 0.3219, "step": 8540 }, { "epoch": 1.2246917120734155, "grad_norm": 0.31542307138442993, "learning_rate": 7.374966549739868e-06, "loss": 0.2963, "step": 8541 }, { "epoch": 1.2248351018067107, "grad_norm": 0.269542396068573, "learning_rate": 7.374232391598038e-06, "loss": 0.3107, "step": 8542 }, { "epoch": 1.2249784915400057, "grad_norm": 0.29768505692481995, "learning_rate": 7.373498167361231e-06, "loss": 0.2986, "step": 8543 }, { "epoch": 1.225121881273301, "grad_norm": 0.3278637230396271, "learning_rate": 7.372763877049887e-06, "loss": 0.3009, "step": 8544 }, { "epoch": 1.2252652710065959, "grad_norm": 0.2871881127357483, "learning_rate": 7.372029520684445e-06, "loss": 0.3232, "step": 8545 }, { "epoch": 1.225408660739891, "grad_norm": 0.31462883949279785, "learning_rate": 7.371295098285352e-06, "loss": 0.3038, "step": 8546 }, { "epoch": 1.225552050473186, "grad_norm": 0.28582197427749634, "learning_rate": 7.3705606098730495e-06, "loss": 0.317, "step": 8547 }, { "epoch": 1.2256954402064812, "grad_norm": 0.3025568425655365, "learning_rate": 7.369826055467987e-06, "loss": 0.306, "step": 8548 }, { "epoch": 1.2258388299397762, "grad_norm": 0.3145342767238617, "learning_rate": 7.369091435090613e-06, "loss": 0.3205, "step": 8549 }, { "epoch": 1.2259822196730714, "grad_norm": 0.2867349088191986, "learning_rate": 7.3683567487613774e-06, "loss": 0.3056, "step": 8550 }, { "epoch": 1.2261256094063664, "grad_norm": 0.28925782442092896, "learning_rate": 7.367621996500732e-06, "loss": 0.2959, "step": 8551 }, { "epoch": 1.2262689991396616, "grad_norm": 0.32233911752700806, "learning_rate": 7.366887178329134e-06, "loss": 0.3307, "step": 8552 }, { "epoch": 1.2264123888729568, "grad_norm": 0.3019695281982422, "learning_rate": 7.366152294267036e-06, "loss": 0.2925, "step": 8553 }, { "epoch": 1.2265557786062518, "grad_norm": 0.2821122705936432, "learning_rate": 7.3654173443349e-06, "loss": 0.3141, "step": 8554 }, { "epoch": 1.226699168339547, "grad_norm": 0.2675609290599823, "learning_rate": 7.364682328553183e-06, "loss": 0.3093, "step": 8555 }, { "epoch": 1.226842558072842, "grad_norm": 0.2745543122291565, "learning_rate": 7.363947246942347e-06, "loss": 0.3214, "step": 8556 }, { "epoch": 1.2269859478061371, "grad_norm": 0.29582032561302185, "learning_rate": 7.3632120995228565e-06, "loss": 0.3085, "step": 8557 }, { "epoch": 1.227129337539432, "grad_norm": 0.33622875809669495, "learning_rate": 7.362476886315177e-06, "loss": 0.3275, "step": 8558 }, { "epoch": 1.2272727272727273, "grad_norm": 0.2631092965602875, "learning_rate": 7.361741607339774e-06, "loss": 0.3137, "step": 8559 }, { "epoch": 1.2274161170060225, "grad_norm": 0.3109482228755951, "learning_rate": 7.36100626261712e-06, "loss": 0.2956, "step": 8560 }, { "epoch": 1.2275595067393175, "grad_norm": 0.28673848509788513, "learning_rate": 7.360270852167681e-06, "loss": 0.299, "step": 8561 }, { "epoch": 1.2277028964726124, "grad_norm": 0.2834494113922119, "learning_rate": 7.359535376011932e-06, "loss": 0.3212, "step": 8562 }, { "epoch": 1.2278462862059076, "grad_norm": 0.2709423899650574, "learning_rate": 7.358799834170349e-06, "loss": 0.3348, "step": 8563 }, { "epoch": 1.2279896759392028, "grad_norm": 0.27814817428588867, "learning_rate": 7.358064226663408e-06, "loss": 0.3084, "step": 8564 }, { "epoch": 1.2281330656724978, "grad_norm": 0.3028680086135864, "learning_rate": 7.357328553511585e-06, "loss": 0.338, "step": 8565 }, { "epoch": 1.228276455405793, "grad_norm": 0.27551087737083435, "learning_rate": 7.356592814735361e-06, "loss": 0.308, "step": 8566 }, { "epoch": 1.228419845139088, "grad_norm": 0.30090096592903137, "learning_rate": 7.355857010355219e-06, "loss": 0.3049, "step": 8567 }, { "epoch": 1.2285632348723832, "grad_norm": 0.28069666028022766, "learning_rate": 7.355121140391644e-06, "loss": 0.3228, "step": 8568 }, { "epoch": 1.2287066246056781, "grad_norm": 0.2948710024356842, "learning_rate": 7.354385204865116e-06, "loss": 0.3118, "step": 8569 }, { "epoch": 1.2288500143389733, "grad_norm": 0.2835288941860199, "learning_rate": 7.353649203796126e-06, "loss": 0.312, "step": 8570 }, { "epoch": 1.2289934040722685, "grad_norm": 0.28773680329322815, "learning_rate": 7.352913137205163e-06, "loss": 0.3163, "step": 8571 }, { "epoch": 1.2291367938055635, "grad_norm": 0.28615209460258484, "learning_rate": 7.352177005112718e-06, "loss": 0.3091, "step": 8572 }, { "epoch": 1.2292801835388587, "grad_norm": 0.2960365414619446, "learning_rate": 7.351440807539283e-06, "loss": 0.3144, "step": 8573 }, { "epoch": 1.2294235732721537, "grad_norm": 0.3077586591243744, "learning_rate": 7.350704544505355e-06, "loss": 0.3195, "step": 8574 }, { "epoch": 1.2295669630054489, "grad_norm": 0.3000026047229767, "learning_rate": 7.3499682160314264e-06, "loss": 0.314, "step": 8575 }, { "epoch": 1.2297103527387439, "grad_norm": 0.2969186305999756, "learning_rate": 7.349231822138e-06, "loss": 0.2996, "step": 8576 }, { "epoch": 1.229853742472039, "grad_norm": 0.29543155431747437, "learning_rate": 7.348495362845571e-06, "loss": 0.3091, "step": 8577 }, { "epoch": 1.229997132205334, "grad_norm": 0.27787694334983826, "learning_rate": 7.347758838174645e-06, "loss": 0.2937, "step": 8578 }, { "epoch": 1.2301405219386292, "grad_norm": 0.32926487922668457, "learning_rate": 7.347022248145724e-06, "loss": 0.2913, "step": 8579 }, { "epoch": 1.2302839116719242, "grad_norm": 0.3131504952907562, "learning_rate": 7.346285592779314e-06, "loss": 0.3147, "step": 8580 }, { "epoch": 1.2304273014052194, "grad_norm": 0.30125728249549866, "learning_rate": 7.345548872095922e-06, "loss": 0.2916, "step": 8581 }, { "epoch": 1.2305706911385146, "grad_norm": 0.3071560263633728, "learning_rate": 7.344812086116059e-06, "loss": 0.3119, "step": 8582 }, { "epoch": 1.2307140808718096, "grad_norm": 0.3224151134490967, "learning_rate": 7.3440752348602325e-06, "loss": 0.3094, "step": 8583 }, { "epoch": 1.2308574706051048, "grad_norm": 0.3083095848560333, "learning_rate": 7.343338318348958e-06, "loss": 0.3096, "step": 8584 }, { "epoch": 1.2310008603383997, "grad_norm": 0.3283744752407074, "learning_rate": 7.342601336602751e-06, "loss": 0.3211, "step": 8585 }, { "epoch": 1.231144250071695, "grad_norm": 0.284742534160614, "learning_rate": 7.341864289642124e-06, "loss": 0.3249, "step": 8586 }, { "epoch": 1.23128763980499, "grad_norm": 0.30802682042121887, "learning_rate": 7.3411271774876e-06, "loss": 0.2964, "step": 8587 }, { "epoch": 1.231431029538285, "grad_norm": 0.31733018159866333, "learning_rate": 7.340390000159696e-06, "loss": 0.3063, "step": 8588 }, { "epoch": 1.23157441927158, "grad_norm": 0.29424306750297546, "learning_rate": 7.339652757678933e-06, "loss": 0.3078, "step": 8589 }, { "epoch": 1.2317178090048753, "grad_norm": 0.31702232360839844, "learning_rate": 7.338915450065837e-06, "loss": 0.318, "step": 8590 }, { "epoch": 1.2318611987381702, "grad_norm": 0.3030143082141876, "learning_rate": 7.338178077340934e-06, "loss": 0.3122, "step": 8591 }, { "epoch": 1.2320045884714654, "grad_norm": 0.2841840982437134, "learning_rate": 7.337440639524749e-06, "loss": 0.3092, "step": 8592 }, { "epoch": 1.2321479782047606, "grad_norm": 0.3032972514629364, "learning_rate": 7.336703136637812e-06, "loss": 0.3099, "step": 8593 }, { "epoch": 1.2322913679380556, "grad_norm": 0.2850486934185028, "learning_rate": 7.335965568700656e-06, "loss": 0.3112, "step": 8594 }, { "epoch": 1.2324347576713508, "grad_norm": 0.30670273303985596, "learning_rate": 7.335227935733811e-06, "loss": 0.2854, "step": 8595 }, { "epoch": 1.2325781474046458, "grad_norm": 0.32978352904319763, "learning_rate": 7.334490237757812e-06, "loss": 0.3044, "step": 8596 }, { "epoch": 1.232721537137941, "grad_norm": 0.31765949726104736, "learning_rate": 7.3337524747931965e-06, "loss": 0.3393, "step": 8597 }, { "epoch": 1.232864926871236, "grad_norm": 0.2810361087322235, "learning_rate": 7.3330146468605015e-06, "loss": 0.314, "step": 8598 }, { "epoch": 1.2330083166045311, "grad_norm": 0.3074067533016205, "learning_rate": 7.332276753980268e-06, "loss": 0.3038, "step": 8599 }, { "epoch": 1.2331517063378261, "grad_norm": 0.3358158469200134, "learning_rate": 7.331538796173037e-06, "loss": 0.3163, "step": 8600 }, { "epoch": 1.2332950960711213, "grad_norm": 0.3236925005912781, "learning_rate": 7.330800773459352e-06, "loss": 0.3192, "step": 8601 }, { "epoch": 1.2334384858044163, "grad_norm": 0.29112571477890015, "learning_rate": 7.33006268585976e-06, "loss": 0.3275, "step": 8602 }, { "epoch": 1.2335818755377115, "grad_norm": 0.3291448950767517, "learning_rate": 7.329324533394808e-06, "loss": 0.3242, "step": 8603 }, { "epoch": 1.2337252652710067, "grad_norm": 0.2974938452243805, "learning_rate": 7.3285863160850425e-06, "loss": 0.3172, "step": 8604 }, { "epoch": 1.2338686550043017, "grad_norm": 0.30876001715660095, "learning_rate": 7.327848033951016e-06, "loss": 0.3088, "step": 8605 }, { "epoch": 1.2340120447375968, "grad_norm": 0.33545491099357605, "learning_rate": 7.327109687013283e-06, "loss": 0.2952, "step": 8606 }, { "epoch": 1.2341554344708918, "grad_norm": 0.2902178466320038, "learning_rate": 7.326371275292394e-06, "loss": 0.3235, "step": 8607 }, { "epoch": 1.234298824204187, "grad_norm": 0.3152084946632385, "learning_rate": 7.325632798808907e-06, "loss": 0.3176, "step": 8608 }, { "epoch": 1.234442213937482, "grad_norm": 0.29608070850372314, "learning_rate": 7.324894257583382e-06, "loss": 0.2941, "step": 8609 }, { "epoch": 1.2345856036707772, "grad_norm": 0.28728288412094116, "learning_rate": 7.324155651636375e-06, "loss": 0.3035, "step": 8610 }, { "epoch": 1.2347289934040724, "grad_norm": 0.29272592067718506, "learning_rate": 7.323416980988451e-06, "loss": 0.311, "step": 8611 }, { "epoch": 1.2348723831373674, "grad_norm": 0.28591516613960266, "learning_rate": 7.322678245660174e-06, "loss": 0.3073, "step": 8612 }, { "epoch": 1.2350157728706626, "grad_norm": 0.2997543215751648, "learning_rate": 7.321939445672106e-06, "loss": 0.284, "step": 8613 }, { "epoch": 1.2351591626039575, "grad_norm": 0.2905084192752838, "learning_rate": 7.321200581044817e-06, "loss": 0.3217, "step": 8614 }, { "epoch": 1.2353025523372527, "grad_norm": 0.2820388376712799, "learning_rate": 7.3204616517988736e-06, "loss": 0.3194, "step": 8615 }, { "epoch": 1.2354459420705477, "grad_norm": 0.297570139169693, "learning_rate": 7.3197226579548474e-06, "loss": 0.3348, "step": 8616 }, { "epoch": 1.235589331803843, "grad_norm": 0.299167275428772, "learning_rate": 7.318983599533311e-06, "loss": 0.3241, "step": 8617 }, { "epoch": 1.2357327215371379, "grad_norm": 0.28625571727752686, "learning_rate": 7.318244476554837e-06, "loss": 0.2981, "step": 8618 }, { "epoch": 1.235876111270433, "grad_norm": 0.3200264573097229, "learning_rate": 7.3175052890400035e-06, "loss": 0.3018, "step": 8619 }, { "epoch": 1.236019501003728, "grad_norm": 0.2972361147403717, "learning_rate": 7.31676603700939e-06, "loss": 0.308, "step": 8620 }, { "epoch": 1.2361628907370232, "grad_norm": 0.29473865032196045, "learning_rate": 7.316026720483571e-06, "loss": 0.3143, "step": 8621 }, { "epoch": 1.2363062804703184, "grad_norm": 0.3096379041671753, "learning_rate": 7.315287339483132e-06, "loss": 0.3269, "step": 8622 }, { "epoch": 1.2364496702036134, "grad_norm": 0.29470378160476685, "learning_rate": 7.314547894028656e-06, "loss": 0.3184, "step": 8623 }, { "epoch": 1.2365930599369086, "grad_norm": 0.2938840687274933, "learning_rate": 7.313808384140726e-06, "loss": 0.3085, "step": 8624 }, { "epoch": 1.2367364496702036, "grad_norm": 0.2721192240715027, "learning_rate": 7.313068809839931e-06, "loss": 0.2931, "step": 8625 }, { "epoch": 1.2368798394034988, "grad_norm": 0.2919374108314514, "learning_rate": 7.3123291711468555e-06, "loss": 0.3009, "step": 8626 }, { "epoch": 1.2370232291367937, "grad_norm": 0.3033451437950134, "learning_rate": 7.311589468082094e-06, "loss": 0.3214, "step": 8627 }, { "epoch": 1.237166618870089, "grad_norm": 0.2964542806148529, "learning_rate": 7.31084970066624e-06, "loss": 0.3042, "step": 8628 }, { "epoch": 1.237310008603384, "grad_norm": 0.30024316906929016, "learning_rate": 7.310109868919883e-06, "loss": 0.3076, "step": 8629 }, { "epoch": 1.2374533983366791, "grad_norm": 0.29183247685432434, "learning_rate": 7.309369972863623e-06, "loss": 0.3186, "step": 8630 }, { "epoch": 1.237596788069974, "grad_norm": 0.28413495421409607, "learning_rate": 7.308630012518053e-06, "loss": 0.3095, "step": 8631 }, { "epoch": 1.2377401778032693, "grad_norm": 0.3066634237766266, "learning_rate": 7.307889987903776e-06, "loss": 0.3123, "step": 8632 }, { "epoch": 1.2378835675365645, "grad_norm": 0.30483928322792053, "learning_rate": 7.307149899041392e-06, "loss": 0.3196, "step": 8633 }, { "epoch": 1.2380269572698595, "grad_norm": 0.2897167205810547, "learning_rate": 7.306409745951503e-06, "loss": 0.3042, "step": 8634 }, { "epoch": 1.2381703470031546, "grad_norm": 0.2901362478733063, "learning_rate": 7.305669528654716e-06, "loss": 0.3064, "step": 8635 }, { "epoch": 1.2383137367364496, "grad_norm": 0.2811182737350464, "learning_rate": 7.3049292471716335e-06, "loss": 0.3221, "step": 8636 }, { "epoch": 1.2384571264697448, "grad_norm": 0.2973730266094208, "learning_rate": 7.304188901522868e-06, "loss": 0.2981, "step": 8637 }, { "epoch": 1.2386005162030398, "grad_norm": 0.292390376329422, "learning_rate": 7.303448491729029e-06, "loss": 0.2944, "step": 8638 }, { "epoch": 1.238743905936335, "grad_norm": 0.2624102532863617, "learning_rate": 7.302708017810726e-06, "loss": 0.3192, "step": 8639 }, { "epoch": 1.23888729566963, "grad_norm": 0.2694525718688965, "learning_rate": 7.301967479788574e-06, "loss": 0.3128, "step": 8640 }, { "epoch": 1.2390306854029252, "grad_norm": 0.3062796890735626, "learning_rate": 7.301226877683189e-06, "loss": 0.3118, "step": 8641 }, { "epoch": 1.2391740751362201, "grad_norm": 0.29059484601020813, "learning_rate": 7.300486211515187e-06, "loss": 0.3022, "step": 8642 }, { "epoch": 1.2393174648695153, "grad_norm": 0.2756847143173218, "learning_rate": 7.299745481305189e-06, "loss": 0.302, "step": 8643 }, { "epoch": 1.2394608546028105, "grad_norm": 0.2759334146976471, "learning_rate": 7.299004687073814e-06, "loss": 0.318, "step": 8644 }, { "epoch": 1.2396042443361055, "grad_norm": 0.28363117575645447, "learning_rate": 7.298263828841684e-06, "loss": 0.3115, "step": 8645 }, { "epoch": 1.2397476340694007, "grad_norm": 0.298856258392334, "learning_rate": 7.297522906629425e-06, "loss": 0.3078, "step": 8646 }, { "epoch": 1.2398910238026957, "grad_norm": 0.2851812243461609, "learning_rate": 7.296781920457663e-06, "loss": 0.3293, "step": 8647 }, { "epoch": 1.2400344135359909, "grad_norm": 0.29132306575775146, "learning_rate": 7.2960408703470255e-06, "loss": 0.3034, "step": 8648 }, { "epoch": 1.2401778032692858, "grad_norm": 0.3347403407096863, "learning_rate": 7.295299756318142e-06, "loss": 0.3158, "step": 8649 }, { "epoch": 1.240321193002581, "grad_norm": 0.29165974259376526, "learning_rate": 7.294558578391644e-06, "loss": 0.3055, "step": 8650 }, { "epoch": 1.2404645827358762, "grad_norm": 0.2920282483100891, "learning_rate": 7.293817336588165e-06, "loss": 0.3347, "step": 8651 }, { "epoch": 1.2406079724691712, "grad_norm": 0.2863326072692871, "learning_rate": 7.29307603092834e-06, "loss": 0.3111, "step": 8652 }, { "epoch": 1.2407513622024662, "grad_norm": 0.2928757667541504, "learning_rate": 7.292334661432805e-06, "loss": 0.3179, "step": 8653 }, { "epoch": 1.2408947519357614, "grad_norm": 0.315321683883667, "learning_rate": 7.291593228122198e-06, "loss": 0.3153, "step": 8654 }, { "epoch": 1.2410381416690566, "grad_norm": 0.3067969083786011, "learning_rate": 7.290851731017164e-06, "loss": 0.3079, "step": 8655 }, { "epoch": 1.2411815314023515, "grad_norm": 0.2844197452068329, "learning_rate": 7.290110170138339e-06, "loss": 0.2984, "step": 8656 }, { "epoch": 1.2413249211356467, "grad_norm": 0.29116591811180115, "learning_rate": 7.289368545506372e-06, "loss": 0.2821, "step": 8657 }, { "epoch": 1.2414683108689417, "grad_norm": 0.32068464159965515, "learning_rate": 7.288626857141905e-06, "loss": 0.2889, "step": 8658 }, { "epoch": 1.241611700602237, "grad_norm": 0.3207329213619232, "learning_rate": 7.287885105065586e-06, "loss": 0.3163, "step": 8659 }, { "epoch": 1.2417550903355319, "grad_norm": 0.29126083850860596, "learning_rate": 7.287143289298067e-06, "loss": 0.3209, "step": 8660 }, { "epoch": 1.241898480068827, "grad_norm": 0.2882631719112396, "learning_rate": 7.2864014098599965e-06, "loss": 0.3204, "step": 8661 }, { "epoch": 1.2420418698021223, "grad_norm": 0.2723895311355591, "learning_rate": 7.285659466772028e-06, "loss": 0.3167, "step": 8662 }, { "epoch": 1.2421852595354173, "grad_norm": 0.2886141538619995, "learning_rate": 7.2849174600548155e-06, "loss": 0.3109, "step": 8663 }, { "epoch": 1.2423286492687124, "grad_norm": 0.29733067750930786, "learning_rate": 7.284175389729016e-06, "loss": 0.3143, "step": 8664 }, { "epoch": 1.2424720390020074, "grad_norm": 0.3001081943511963, "learning_rate": 7.283433255815287e-06, "loss": 0.2924, "step": 8665 }, { "epoch": 1.2426154287353026, "grad_norm": 0.32856205105781555, "learning_rate": 7.282691058334289e-06, "loss": 0.3319, "step": 8666 }, { "epoch": 1.2427588184685976, "grad_norm": 0.3049009442329407, "learning_rate": 7.281948797306684e-06, "loss": 0.3122, "step": 8667 }, { "epoch": 1.2429022082018928, "grad_norm": 0.27437543869018555, "learning_rate": 7.281206472753134e-06, "loss": 0.2874, "step": 8668 }, { "epoch": 1.2430455979351878, "grad_norm": 0.3119664192199707, "learning_rate": 7.280464084694305e-06, "loss": 0.3027, "step": 8669 }, { "epoch": 1.243188987668483, "grad_norm": 0.297746866941452, "learning_rate": 7.279721633150865e-06, "loss": 0.3183, "step": 8670 }, { "epoch": 1.243332377401778, "grad_norm": 0.2890869081020355, "learning_rate": 7.278979118143479e-06, "loss": 0.3069, "step": 8671 }, { "epoch": 1.2434757671350731, "grad_norm": 0.316605806350708, "learning_rate": 7.278236539692823e-06, "loss": 0.3098, "step": 8672 }, { "epoch": 1.2436191568683683, "grad_norm": 0.29593533277511597, "learning_rate": 7.277493897819563e-06, "loss": 0.3212, "step": 8673 }, { "epoch": 1.2437625466016633, "grad_norm": 0.29404184222221375, "learning_rate": 7.27675119254438e-06, "loss": 0.2943, "step": 8674 }, { "epoch": 1.2439059363349585, "grad_norm": 0.32101839780807495, "learning_rate": 7.276008423887944e-06, "loss": 0.3054, "step": 8675 }, { "epoch": 1.2440493260682535, "grad_norm": 0.3417908549308777, "learning_rate": 7.275265591870936e-06, "loss": 0.3223, "step": 8676 }, { "epoch": 1.2441927158015487, "grad_norm": 0.28659385442733765, "learning_rate": 7.274522696514033e-06, "loss": 0.3207, "step": 8677 }, { "epoch": 1.2443361055348436, "grad_norm": 0.2956914007663727, "learning_rate": 7.273779737837916e-06, "loss": 0.3219, "step": 8678 }, { "epoch": 1.2444794952681388, "grad_norm": 0.324893593788147, "learning_rate": 7.2730367158632705e-06, "loss": 0.3148, "step": 8679 }, { "epoch": 1.2446228850014338, "grad_norm": 0.30211377143859863, "learning_rate": 7.272293630610779e-06, "loss": 0.2971, "step": 8680 }, { "epoch": 1.244766274734729, "grad_norm": 0.28796660900115967, "learning_rate": 7.271550482101129e-06, "loss": 0.3253, "step": 8681 }, { "epoch": 1.244909664468024, "grad_norm": 0.30446261167526245, "learning_rate": 7.270807270355006e-06, "loss": 0.3271, "step": 8682 }, { "epoch": 1.2450530542013192, "grad_norm": 0.3405527174472809, "learning_rate": 7.270063995393102e-06, "loss": 0.3191, "step": 8683 }, { "epoch": 1.2451964439346144, "grad_norm": 0.2723120152950287, "learning_rate": 7.2693206572361095e-06, "loss": 0.2966, "step": 8684 }, { "epoch": 1.2453398336679093, "grad_norm": 0.2781153917312622, "learning_rate": 7.2685772559047205e-06, "loss": 0.3113, "step": 8685 }, { "epoch": 1.2454832234012045, "grad_norm": 0.3146764636039734, "learning_rate": 7.26783379141963e-06, "loss": 0.318, "step": 8686 }, { "epoch": 1.2456266131344995, "grad_norm": 0.29241156578063965, "learning_rate": 7.2670902638015365e-06, "loss": 0.3137, "step": 8687 }, { "epoch": 1.2457700028677947, "grad_norm": 0.28448885679244995, "learning_rate": 7.266346673071136e-06, "loss": 0.2883, "step": 8688 }, { "epoch": 1.2459133926010897, "grad_norm": 0.28753963112831116, "learning_rate": 7.2656030192491314e-06, "loss": 0.2981, "step": 8689 }, { "epoch": 1.2460567823343849, "grad_norm": 0.28151193261146545, "learning_rate": 7.264859302356223e-06, "loss": 0.317, "step": 8690 }, { "epoch": 1.24620017206768, "grad_norm": 0.29464322328567505, "learning_rate": 7.2641155224131156e-06, "loss": 0.2998, "step": 8691 }, { "epoch": 1.246343561800975, "grad_norm": 0.2833932042121887, "learning_rate": 7.263371679440515e-06, "loss": 0.2948, "step": 8692 }, { "epoch": 1.24648695153427, "grad_norm": 0.2800164222717285, "learning_rate": 7.2626277734591295e-06, "loss": 0.3152, "step": 8693 }, { "epoch": 1.2466303412675652, "grad_norm": 0.2935073971748352, "learning_rate": 7.261883804489666e-06, "loss": 0.3092, "step": 8694 }, { "epoch": 1.2467737310008604, "grad_norm": 0.2895967960357666, "learning_rate": 7.261139772552839e-06, "loss": 0.2981, "step": 8695 }, { "epoch": 1.2469171207341554, "grad_norm": 0.32461756467819214, "learning_rate": 7.260395677669359e-06, "loss": 0.3232, "step": 8696 }, { "epoch": 1.2470605104674506, "grad_norm": 0.2771952450275421, "learning_rate": 7.2596515198599385e-06, "loss": 0.3013, "step": 8697 }, { "epoch": 1.2472039002007456, "grad_norm": 0.29425787925720215, "learning_rate": 7.258907299145297e-06, "loss": 0.3097, "step": 8698 }, { "epoch": 1.2473472899340408, "grad_norm": 0.2994607090950012, "learning_rate": 7.2581630155461495e-06, "loss": 0.3163, "step": 8699 }, { "epoch": 1.2474906796673357, "grad_norm": 0.3034024238586426, "learning_rate": 7.257418669083217e-06, "loss": 0.3176, "step": 8700 }, { "epoch": 1.247634069400631, "grad_norm": 0.2953297793865204, "learning_rate": 7.256674259777223e-06, "loss": 0.3076, "step": 8701 }, { "epoch": 1.2477774591339261, "grad_norm": 0.2943107783794403, "learning_rate": 7.255929787648889e-06, "loss": 0.307, "step": 8702 }, { "epoch": 1.247920848867221, "grad_norm": 0.28420397639274597, "learning_rate": 7.25518525271894e-06, "loss": 0.3059, "step": 8703 }, { "epoch": 1.2480642386005163, "grad_norm": 0.2792198061943054, "learning_rate": 7.254440655008103e-06, "loss": 0.3119, "step": 8704 }, { "epoch": 1.2482076283338113, "grad_norm": 0.3149910271167755, "learning_rate": 7.253695994537105e-06, "loss": 0.3247, "step": 8705 }, { "epoch": 1.2483510180671065, "grad_norm": 0.2795620560646057, "learning_rate": 7.252951271326678e-06, "loss": 0.3143, "step": 8706 }, { "epoch": 1.2484944078004014, "grad_norm": 0.27638497948646545, "learning_rate": 7.252206485397553e-06, "loss": 0.3416, "step": 8707 }, { "epoch": 1.2486377975336966, "grad_norm": 0.29895907640457153, "learning_rate": 7.251461636770464e-06, "loss": 0.3092, "step": 8708 }, { "epoch": 1.2487811872669916, "grad_norm": 0.32137444615364075, "learning_rate": 7.250716725466147e-06, "loss": 0.2974, "step": 8709 }, { "epoch": 1.2489245770002868, "grad_norm": 0.29400601983070374, "learning_rate": 7.249971751505337e-06, "loss": 0.2897, "step": 8710 }, { "epoch": 1.2490679667335818, "grad_norm": 0.3053878843784332, "learning_rate": 7.249226714908777e-06, "loss": 0.3289, "step": 8711 }, { "epoch": 1.249211356466877, "grad_norm": 0.28842946887016296, "learning_rate": 7.248481615697204e-06, "loss": 0.3059, "step": 8712 }, { "epoch": 1.2493547462001722, "grad_norm": 0.32333990931510925, "learning_rate": 7.247736453891361e-06, "loss": 0.3037, "step": 8713 }, { "epoch": 1.2494981359334671, "grad_norm": 0.3010600507259369, "learning_rate": 7.246991229511994e-06, "loss": 0.3085, "step": 8714 }, { "epoch": 1.2496415256667623, "grad_norm": 0.296029269695282, "learning_rate": 7.246245942579846e-06, "loss": 0.2934, "step": 8715 }, { "epoch": 1.2497849154000573, "grad_norm": 0.29676204919815063, "learning_rate": 7.245500593115668e-06, "loss": 0.3026, "step": 8716 }, { "epoch": 1.2499283051333525, "grad_norm": 0.2776613235473633, "learning_rate": 7.244755181140206e-06, "loss": 0.2982, "step": 8717 }, { "epoch": 1.2500716948666475, "grad_norm": 0.2819637656211853, "learning_rate": 7.244009706674214e-06, "loss": 0.2888, "step": 8718 }, { "epoch": 1.2502150845999427, "grad_norm": 0.3062545955181122, "learning_rate": 7.243264169738443e-06, "loss": 0.335, "step": 8719 }, { "epoch": 1.2503584743332377, "grad_norm": 0.2882266938686371, "learning_rate": 7.242518570353651e-06, "loss": 0.3043, "step": 8720 }, { "epoch": 1.2505018640665329, "grad_norm": 0.2823723554611206, "learning_rate": 7.2417729085405885e-06, "loss": 0.2828, "step": 8721 }, { "epoch": 1.2506452537998278, "grad_norm": 0.28699225187301636, "learning_rate": 7.2410271843200185e-06, "loss": 0.3377, "step": 8722 }, { "epoch": 1.250788643533123, "grad_norm": 0.30195558071136475, "learning_rate": 7.240281397712699e-06, "loss": 0.3136, "step": 8723 }, { "epoch": 1.2509320332664182, "grad_norm": 0.31716063618659973, "learning_rate": 7.239535548739389e-06, "loss": 0.3151, "step": 8724 }, { "epoch": 1.2510754229997132, "grad_norm": 0.3118349611759186, "learning_rate": 7.238789637420858e-06, "loss": 0.321, "step": 8725 }, { "epoch": 1.2512188127330084, "grad_norm": 0.2877378761768341, "learning_rate": 7.238043663777865e-06, "loss": 0.3193, "step": 8726 }, { "epoch": 1.2513622024663034, "grad_norm": 0.2673962712287903, "learning_rate": 7.237297627831179e-06, "loss": 0.2954, "step": 8727 }, { "epoch": 1.2515055921995986, "grad_norm": 0.33336177468299866, "learning_rate": 7.23655152960157e-06, "loss": 0.3182, "step": 8728 }, { "epoch": 1.2516489819328935, "grad_norm": 0.3201740086078644, "learning_rate": 7.2358053691098065e-06, "loss": 0.2956, "step": 8729 }, { "epoch": 1.2517923716661887, "grad_norm": 0.28157228231430054, "learning_rate": 7.235059146376661e-06, "loss": 0.3076, "step": 8730 }, { "epoch": 1.251935761399484, "grad_norm": 0.2908746600151062, "learning_rate": 7.234312861422906e-06, "loss": 0.3009, "step": 8731 }, { "epoch": 1.252079151132779, "grad_norm": 0.32015958428382874, "learning_rate": 7.233566514269319e-06, "loss": 0.3112, "step": 8732 }, { "epoch": 1.2522225408660739, "grad_norm": 0.270368367433548, "learning_rate": 7.232820104936677e-06, "loss": 0.3014, "step": 8733 }, { "epoch": 1.252365930599369, "grad_norm": 0.30480271577835083, "learning_rate": 7.232073633445756e-06, "loss": 0.3039, "step": 8734 }, { "epoch": 1.2525093203326643, "grad_norm": 0.3037753403186798, "learning_rate": 7.231327099817339e-06, "loss": 0.2956, "step": 8735 }, { "epoch": 1.2526527100659592, "grad_norm": 0.293768048286438, "learning_rate": 7.230580504072209e-06, "loss": 0.2883, "step": 8736 }, { "epoch": 1.2527960997992544, "grad_norm": 0.3060978949069977, "learning_rate": 7.229833846231148e-06, "loss": 0.3012, "step": 8737 }, { "epoch": 1.2529394895325494, "grad_norm": 0.27997806668281555, "learning_rate": 7.229087126314944e-06, "loss": 0.2985, "step": 8738 }, { "epoch": 1.2530828792658446, "grad_norm": 0.32443946599960327, "learning_rate": 7.228340344344382e-06, "loss": 0.3186, "step": 8739 }, { "epoch": 1.2532262689991396, "grad_norm": 0.33366167545318604, "learning_rate": 7.227593500340253e-06, "loss": 0.2992, "step": 8740 }, { "epoch": 1.2533696587324348, "grad_norm": 0.2806553840637207, "learning_rate": 7.226846594323349e-06, "loss": 0.2973, "step": 8741 }, { "epoch": 1.25351304846573, "grad_norm": 0.2978278398513794, "learning_rate": 7.22609962631446e-06, "loss": 0.3162, "step": 8742 }, { "epoch": 1.253656438199025, "grad_norm": 0.29558244347572327, "learning_rate": 7.225352596334381e-06, "loss": 0.2967, "step": 8743 }, { "epoch": 1.25379982793232, "grad_norm": 0.33212873339653015, "learning_rate": 7.22460550440391e-06, "loss": 0.3168, "step": 8744 }, { "epoch": 1.2539432176656151, "grad_norm": 0.28623470664024353, "learning_rate": 7.223858350543845e-06, "loss": 0.3192, "step": 8745 }, { "epoch": 1.2540866073989103, "grad_norm": 0.3114282488822937, "learning_rate": 7.223111134774981e-06, "loss": 0.3096, "step": 8746 }, { "epoch": 1.2542299971322053, "grad_norm": 0.31509971618652344, "learning_rate": 7.2223638571181265e-06, "loss": 0.2946, "step": 8747 }, { "epoch": 1.2543733868655005, "grad_norm": 0.30495935678482056, "learning_rate": 7.2216165175940785e-06, "loss": 0.2952, "step": 8748 }, { "epoch": 1.2545167765987955, "grad_norm": 0.2934850752353668, "learning_rate": 7.220869116223646e-06, "loss": 0.3291, "step": 8749 }, { "epoch": 1.2546601663320907, "grad_norm": 0.29085683822631836, "learning_rate": 7.220121653027633e-06, "loss": 0.3178, "step": 8750 }, { "epoch": 1.2548035560653856, "grad_norm": 0.2799593210220337, "learning_rate": 7.2193741280268484e-06, "loss": 0.3031, "step": 8751 }, { "epoch": 1.2549469457986808, "grad_norm": 0.2674263119697571, "learning_rate": 7.218626541242102e-06, "loss": 0.308, "step": 8752 }, { "epoch": 1.255090335531976, "grad_norm": 0.30569911003112793, "learning_rate": 7.217878892694205e-06, "loss": 0.3195, "step": 8753 }, { "epoch": 1.255233725265271, "grad_norm": 0.30426329374313354, "learning_rate": 7.217131182403972e-06, "loss": 0.3194, "step": 8754 }, { "epoch": 1.255377114998566, "grad_norm": 0.30924850702285767, "learning_rate": 7.216383410392217e-06, "loss": 0.3184, "step": 8755 }, { "epoch": 1.2555205047318612, "grad_norm": 0.30032822489738464, "learning_rate": 7.215635576679757e-06, "loss": 0.304, "step": 8756 }, { "epoch": 1.2556638944651564, "grad_norm": 0.29061710834503174, "learning_rate": 7.2148876812874125e-06, "loss": 0.3205, "step": 8757 }, { "epoch": 1.2558072841984513, "grad_norm": 0.29341238737106323, "learning_rate": 7.214139724236e-06, "loss": 0.2987, "step": 8758 }, { "epoch": 1.2559506739317465, "grad_norm": 0.28503599762916565, "learning_rate": 7.213391705546344e-06, "loss": 0.3076, "step": 8759 }, { "epoch": 1.2560940636650415, "grad_norm": 0.29099908471107483, "learning_rate": 7.212643625239269e-06, "loss": 0.2972, "step": 8760 }, { "epoch": 1.2562374533983367, "grad_norm": 0.2777843177318573, "learning_rate": 7.211895483335599e-06, "loss": 0.3195, "step": 8761 }, { "epoch": 1.2563808431316317, "grad_norm": 0.2866547107696533, "learning_rate": 7.21114727985616e-06, "loss": 0.3156, "step": 8762 }, { "epoch": 1.2565242328649269, "grad_norm": 0.2852779030799866, "learning_rate": 7.2103990148217825e-06, "loss": 0.3026, "step": 8763 }, { "epoch": 1.256667622598222, "grad_norm": 0.27673661708831787, "learning_rate": 7.209650688253297e-06, "loss": 0.3166, "step": 8764 }, { "epoch": 1.256811012331517, "grad_norm": 0.27151787281036377, "learning_rate": 7.208902300171536e-06, "loss": 0.3144, "step": 8765 }, { "epoch": 1.2569544020648122, "grad_norm": 0.2922763228416443, "learning_rate": 7.208153850597333e-06, "loss": 0.2991, "step": 8766 }, { "epoch": 1.2570977917981072, "grad_norm": 0.2782735228538513, "learning_rate": 7.207405339551522e-06, "loss": 0.2945, "step": 8767 }, { "epoch": 1.2572411815314024, "grad_norm": 0.2753453850746155, "learning_rate": 7.206656767054944e-06, "loss": 0.3177, "step": 8768 }, { "epoch": 1.2573845712646974, "grad_norm": 0.2899932563304901, "learning_rate": 7.205908133128434e-06, "loss": 0.3098, "step": 8769 }, { "epoch": 1.2575279609979926, "grad_norm": 0.279778391122818, "learning_rate": 7.205159437792836e-06, "loss": 0.2958, "step": 8770 }, { "epoch": 1.2576713507312878, "grad_norm": 0.26594415307044983, "learning_rate": 7.204410681068991e-06, "loss": 0.3001, "step": 8771 }, { "epoch": 1.2578147404645827, "grad_norm": 0.2894894480705261, "learning_rate": 7.203661862977743e-06, "loss": 0.2909, "step": 8772 }, { "epoch": 1.2579581301978777, "grad_norm": 0.29007506370544434, "learning_rate": 7.202912983539939e-06, "loss": 0.3034, "step": 8773 }, { "epoch": 1.258101519931173, "grad_norm": 0.2874026596546173, "learning_rate": 7.202164042776427e-06, "loss": 0.3145, "step": 8774 }, { "epoch": 1.2582449096644681, "grad_norm": 0.29293787479400635, "learning_rate": 7.201415040708055e-06, "loss": 0.2865, "step": 8775 }, { "epoch": 1.258388299397763, "grad_norm": 0.28569579124450684, "learning_rate": 7.200665977355676e-06, "loss": 0.3196, "step": 8776 }, { "epoch": 1.2585316891310583, "grad_norm": 0.27160513401031494, "learning_rate": 7.199916852740141e-06, "loss": 0.3156, "step": 8777 }, { "epoch": 1.2586750788643533, "grad_norm": 0.32682105898857117, "learning_rate": 7.199167666882304e-06, "loss": 0.3064, "step": 8778 }, { "epoch": 1.2588184685976485, "grad_norm": 0.32057681679725647, "learning_rate": 7.198418419803024e-06, "loss": 0.3039, "step": 8779 }, { "epoch": 1.2589618583309434, "grad_norm": 0.2821493446826935, "learning_rate": 7.197669111523155e-06, "loss": 0.3312, "step": 8780 }, { "epoch": 1.2591052480642386, "grad_norm": 0.26925501227378845, "learning_rate": 7.196919742063558e-06, "loss": 0.2867, "step": 8781 }, { "epoch": 1.2592486377975338, "grad_norm": 0.29298368096351624, "learning_rate": 7.1961703114450966e-06, "loss": 0.3008, "step": 8782 }, { "epoch": 1.2593920275308288, "grad_norm": 0.3210178315639496, "learning_rate": 7.19542081968863e-06, "loss": 0.2831, "step": 8783 }, { "epoch": 1.2595354172641238, "grad_norm": 0.28093934059143066, "learning_rate": 7.1946712668150275e-06, "loss": 0.335, "step": 8784 }, { "epoch": 1.259678806997419, "grad_norm": 0.31263023614883423, "learning_rate": 7.193921652845152e-06, "loss": 0.2929, "step": 8785 }, { "epoch": 1.2598221967307142, "grad_norm": 0.31395480036735535, "learning_rate": 7.193171977799872e-06, "loss": 0.305, "step": 8786 }, { "epoch": 1.2599655864640091, "grad_norm": 0.30423781275749207, "learning_rate": 7.192422241700059e-06, "loss": 0.3133, "step": 8787 }, { "epoch": 1.2601089761973043, "grad_norm": 0.34484776854515076, "learning_rate": 7.191672444566582e-06, "loss": 0.3204, "step": 8788 }, { "epoch": 1.2602523659305993, "grad_norm": 0.2803398072719574, "learning_rate": 7.190922586420317e-06, "loss": 0.3098, "step": 8789 }, { "epoch": 1.2603957556638945, "grad_norm": 0.2772492468357086, "learning_rate": 7.190172667282136e-06, "loss": 0.3132, "step": 8790 }, { "epoch": 1.2605391453971895, "grad_norm": 0.2957431972026825, "learning_rate": 7.1894226871729175e-06, "loss": 0.3158, "step": 8791 }, { "epoch": 1.2606825351304847, "grad_norm": 0.3337222635746002, "learning_rate": 7.18867264611354e-06, "loss": 0.3206, "step": 8792 }, { "epoch": 1.2608259248637799, "grad_norm": 0.2704012095928192, "learning_rate": 7.187922544124882e-06, "loss": 0.3133, "step": 8793 }, { "epoch": 1.2609693145970748, "grad_norm": 0.29338231682777405, "learning_rate": 7.187172381227827e-06, "loss": 0.3005, "step": 8794 }, { "epoch": 1.2611127043303698, "grad_norm": 0.2853092551231384, "learning_rate": 7.186422157443258e-06, "loss": 0.3229, "step": 8795 }, { "epoch": 1.261256094063665, "grad_norm": 0.31486067175865173, "learning_rate": 7.185671872792057e-06, "loss": 0.3097, "step": 8796 }, { "epoch": 1.2613994837969602, "grad_norm": 0.29497745633125305, "learning_rate": 7.184921527295114e-06, "loss": 0.3127, "step": 8797 }, { "epoch": 1.2615428735302552, "grad_norm": 0.27257126569747925, "learning_rate": 7.184171120973319e-06, "loss": 0.3284, "step": 8798 }, { "epoch": 1.2616862632635504, "grad_norm": 0.2949841022491455, "learning_rate": 7.183420653847557e-06, "loss": 0.316, "step": 8799 }, { "epoch": 1.2618296529968454, "grad_norm": 0.30636805295944214, "learning_rate": 7.182670125938723e-06, "loss": 0.3183, "step": 8800 }, { "epoch": 1.2619730427301405, "grad_norm": 0.309195876121521, "learning_rate": 7.181919537267712e-06, "loss": 0.3175, "step": 8801 }, { "epoch": 1.2621164324634355, "grad_norm": 0.28121417760849, "learning_rate": 7.181168887855416e-06, "loss": 0.3113, "step": 8802 }, { "epoch": 1.2622598221967307, "grad_norm": 0.3120093047618866, "learning_rate": 7.180418177722735e-06, "loss": 0.322, "step": 8803 }, { "epoch": 1.262403211930026, "grad_norm": 0.2925284504890442, "learning_rate": 7.179667406890566e-06, "loss": 0.2997, "step": 8804 }, { "epoch": 1.2625466016633209, "grad_norm": 0.29020461440086365, "learning_rate": 7.178916575379808e-06, "loss": 0.314, "step": 8805 }, { "epoch": 1.262689991396616, "grad_norm": 0.31040772795677185, "learning_rate": 7.1781656832113655e-06, "loss": 0.2925, "step": 8806 }, { "epoch": 1.262833381129911, "grad_norm": 0.2884023189544678, "learning_rate": 7.17741473040614e-06, "loss": 0.3178, "step": 8807 }, { "epoch": 1.2629767708632063, "grad_norm": 0.2882987856864929, "learning_rate": 7.176663716985039e-06, "loss": 0.3055, "step": 8808 }, { "epoch": 1.2631201605965012, "grad_norm": 0.3239581286907196, "learning_rate": 7.175912642968968e-06, "loss": 0.3093, "step": 8809 }, { "epoch": 1.2632635503297964, "grad_norm": 0.3025755286216736, "learning_rate": 7.175161508378835e-06, "loss": 0.3126, "step": 8810 }, { "epoch": 1.2634069400630916, "grad_norm": 0.2666126489639282, "learning_rate": 7.174410313235554e-06, "loss": 0.3015, "step": 8811 }, { "epoch": 1.2635503297963866, "grad_norm": 0.33082300424575806, "learning_rate": 7.1736590575600316e-06, "loss": 0.3144, "step": 8812 }, { "epoch": 1.2636937195296816, "grad_norm": 0.29693225026130676, "learning_rate": 7.172907741373187e-06, "loss": 0.3051, "step": 8813 }, { "epoch": 1.2638371092629768, "grad_norm": 0.2774236798286438, "learning_rate": 7.1721563646959335e-06, "loss": 0.301, "step": 8814 }, { "epoch": 1.263980498996272, "grad_norm": 0.28951260447502136, "learning_rate": 7.171404927549187e-06, "loss": 0.3252, "step": 8815 }, { "epoch": 1.264123888729567, "grad_norm": 0.2993400990962982, "learning_rate": 7.1706534299538685e-06, "loss": 0.2982, "step": 8816 }, { "epoch": 1.2642672784628621, "grad_norm": 0.28749188780784607, "learning_rate": 7.169901871930896e-06, "loss": 0.3122, "step": 8817 }, { "epoch": 1.264410668196157, "grad_norm": 0.2914106249809265, "learning_rate": 7.169150253501196e-06, "loss": 0.2935, "step": 8818 }, { "epoch": 1.2645540579294523, "grad_norm": 0.2965393364429474, "learning_rate": 7.168398574685688e-06, "loss": 0.3033, "step": 8819 }, { "epoch": 1.2646974476627473, "grad_norm": 0.3131621479988098, "learning_rate": 7.1676468355052996e-06, "loss": 0.3135, "step": 8820 }, { "epoch": 1.2648408373960425, "grad_norm": 0.29363930225372314, "learning_rate": 7.166895035980957e-06, "loss": 0.3204, "step": 8821 }, { "epoch": 1.2649842271293377, "grad_norm": 0.2964355945587158, "learning_rate": 7.166143176133592e-06, "loss": 0.2887, "step": 8822 }, { "epoch": 1.2651276168626326, "grad_norm": 0.3132568299770355, "learning_rate": 7.165391255984132e-06, "loss": 0.2887, "step": 8823 }, { "epoch": 1.2652710065959276, "grad_norm": 0.3044864535331726, "learning_rate": 7.16463927555351e-06, "loss": 0.3129, "step": 8824 }, { "epoch": 1.2654143963292228, "grad_norm": 0.30958786606788635, "learning_rate": 7.163887234862661e-06, "loss": 0.3239, "step": 8825 }, { "epoch": 1.265557786062518, "grad_norm": 0.3049752712249756, "learning_rate": 7.163135133932519e-06, "loss": 0.3025, "step": 8826 }, { "epoch": 1.265701175795813, "grad_norm": 0.28581535816192627, "learning_rate": 7.162382972784023e-06, "loss": 0.2958, "step": 8827 }, { "epoch": 1.2658445655291082, "grad_norm": 0.29681816697120667, "learning_rate": 7.161630751438112e-06, "loss": 0.3014, "step": 8828 }, { "epoch": 1.2659879552624032, "grad_norm": 0.31064799427986145, "learning_rate": 7.160878469915725e-06, "loss": 0.3041, "step": 8829 }, { "epoch": 1.2661313449956983, "grad_norm": 0.297882080078125, "learning_rate": 7.160126128237807e-06, "loss": 0.3264, "step": 8830 }, { "epoch": 1.2662747347289933, "grad_norm": 0.28886932134628296, "learning_rate": 7.1593737264253e-06, "loss": 0.3089, "step": 8831 }, { "epoch": 1.2664181244622885, "grad_norm": 0.30260196328163147, "learning_rate": 7.158621264499149e-06, "loss": 0.3098, "step": 8832 }, { "epoch": 1.2665615141955837, "grad_norm": 0.29199621081352234, "learning_rate": 7.157868742480305e-06, "loss": 0.3157, "step": 8833 }, { "epoch": 1.2667049039288787, "grad_norm": 0.3047754466533661, "learning_rate": 7.157116160389712e-06, "loss": 0.3213, "step": 8834 }, { "epoch": 1.2668482936621737, "grad_norm": 0.3080720901489258, "learning_rate": 7.156363518248324e-06, "loss": 0.2902, "step": 8835 }, { "epoch": 1.2669916833954689, "grad_norm": 0.29069784283638, "learning_rate": 7.1556108160770935e-06, "loss": 0.2983, "step": 8836 }, { "epoch": 1.267135073128764, "grad_norm": 0.27554401755332947, "learning_rate": 7.154858053896972e-06, "loss": 0.3238, "step": 8837 }, { "epoch": 1.267278462862059, "grad_norm": 0.2875080704689026, "learning_rate": 7.154105231728921e-06, "loss": 0.2999, "step": 8838 }, { "epoch": 1.2674218525953542, "grad_norm": 0.31746727228164673, "learning_rate": 7.15335234959389e-06, "loss": 0.3151, "step": 8839 }, { "epoch": 1.2675652423286492, "grad_norm": 0.29746803641319275, "learning_rate": 7.152599407512844e-06, "loss": 0.3012, "step": 8840 }, { "epoch": 1.2677086320619444, "grad_norm": 0.307328999042511, "learning_rate": 7.151846405506741e-06, "loss": 0.3484, "step": 8841 }, { "epoch": 1.2678520217952394, "grad_norm": 0.29007279872894287, "learning_rate": 7.151093343596544e-06, "loss": 0.312, "step": 8842 }, { "epoch": 1.2679954115285346, "grad_norm": 0.3111342489719391, "learning_rate": 7.150340221803219e-06, "loss": 0.3113, "step": 8843 }, { "epoch": 1.2681388012618298, "grad_norm": 0.3050839900970459, "learning_rate": 7.149587040147728e-06, "loss": 0.3053, "step": 8844 }, { "epoch": 1.2682821909951247, "grad_norm": 0.29854878783226013, "learning_rate": 7.148833798651042e-06, "loss": 0.3007, "step": 8845 }, { "epoch": 1.2684255807284197, "grad_norm": 0.31110531091690063, "learning_rate": 7.148080497334129e-06, "loss": 0.3068, "step": 8846 }, { "epoch": 1.268568970461715, "grad_norm": 0.28847289085388184, "learning_rate": 7.147327136217957e-06, "loss": 0.297, "step": 8847 }, { "epoch": 1.26871236019501, "grad_norm": 0.2852863669395447, "learning_rate": 7.146573715323503e-06, "loss": 0.3226, "step": 8848 }, { "epoch": 1.268855749928305, "grad_norm": 0.2969259023666382, "learning_rate": 7.145820234671738e-06, "loss": 0.303, "step": 8849 }, { "epoch": 1.2689991396616003, "grad_norm": 0.2691451609134674, "learning_rate": 7.145066694283639e-06, "loss": 0.2869, "step": 8850 }, { "epoch": 1.2691425293948952, "grad_norm": 0.29372239112854004, "learning_rate": 7.144313094180182e-06, "loss": 0.3093, "step": 8851 }, { "epoch": 1.2692859191281904, "grad_norm": 0.2774810791015625, "learning_rate": 7.143559434382348e-06, "loss": 0.3172, "step": 8852 }, { "epoch": 1.2694293088614854, "grad_norm": 0.2891347408294678, "learning_rate": 7.142805714911117e-06, "loss": 0.3159, "step": 8853 }, { "epoch": 1.2695726985947806, "grad_norm": 0.29556065797805786, "learning_rate": 7.142051935787468e-06, "loss": 0.3121, "step": 8854 }, { "epoch": 1.2697160883280758, "grad_norm": 0.3070358335971832, "learning_rate": 7.141298097032392e-06, "loss": 0.3263, "step": 8855 }, { "epoch": 1.2698594780613708, "grad_norm": 0.2925666272640228, "learning_rate": 7.140544198666869e-06, "loss": 0.3181, "step": 8856 }, { "epoch": 1.270002867794666, "grad_norm": 0.2877579927444458, "learning_rate": 7.139790240711889e-06, "loss": 0.3163, "step": 8857 }, { "epoch": 1.270146257527961, "grad_norm": 0.29852405190467834, "learning_rate": 7.139036223188441e-06, "loss": 0.3173, "step": 8858 }, { "epoch": 1.2702896472612561, "grad_norm": 0.29561111330986023, "learning_rate": 7.138282146117513e-06, "loss": 0.3251, "step": 8859 }, { "epoch": 1.2704330369945511, "grad_norm": 0.2650661766529083, "learning_rate": 7.1375280095201e-06, "loss": 0.3026, "step": 8860 }, { "epoch": 1.2705764267278463, "grad_norm": 0.310686856508255, "learning_rate": 7.136773813417197e-06, "loss": 0.3063, "step": 8861 }, { "epoch": 1.2707198164611415, "grad_norm": 0.2796441316604614, "learning_rate": 7.136019557829796e-06, "loss": 0.3048, "step": 8862 }, { "epoch": 1.2708632061944365, "grad_norm": 0.3086746633052826, "learning_rate": 7.135265242778897e-06, "loss": 0.3351, "step": 8863 }, { "epoch": 1.2710065959277315, "grad_norm": 0.28300872445106506, "learning_rate": 7.134510868285498e-06, "loss": 0.3024, "step": 8864 }, { "epoch": 1.2711499856610267, "grad_norm": 0.29402783513069153, "learning_rate": 7.133756434370601e-06, "loss": 0.3257, "step": 8865 }, { "epoch": 1.2712933753943219, "grad_norm": 0.26727795600891113, "learning_rate": 7.1330019410552065e-06, "loss": 0.305, "step": 8866 }, { "epoch": 1.2714367651276168, "grad_norm": 0.2924080491065979, "learning_rate": 7.132247388360319e-06, "loss": 0.29, "step": 8867 }, { "epoch": 1.271580154860912, "grad_norm": 0.27572211623191833, "learning_rate": 7.1314927763069456e-06, "loss": 0.3089, "step": 8868 }, { "epoch": 1.271723544594207, "grad_norm": 0.28298190236091614, "learning_rate": 7.130738104916091e-06, "loss": 0.2993, "step": 8869 }, { "epoch": 1.2718669343275022, "grad_norm": 0.29141008853912354, "learning_rate": 7.129983374208764e-06, "loss": 0.31, "step": 8870 }, { "epoch": 1.2720103240607972, "grad_norm": 0.27902525663375854, "learning_rate": 7.129228584205979e-06, "loss": 0.3117, "step": 8871 }, { "epoch": 1.2721537137940924, "grad_norm": 0.29923078417778015, "learning_rate": 7.128473734928742e-06, "loss": 0.3225, "step": 8872 }, { "epoch": 1.2722971035273876, "grad_norm": 0.2944694757461548, "learning_rate": 7.127718826398073e-06, "loss": 0.3144, "step": 8873 }, { "epoch": 1.2724404932606825, "grad_norm": 0.2967674434185028, "learning_rate": 7.1269638586349855e-06, "loss": 0.3158, "step": 8874 }, { "epoch": 1.2725838829939775, "grad_norm": 0.29589220881462097, "learning_rate": 7.126208831660495e-06, "loss": 0.3083, "step": 8875 }, { "epoch": 1.2727272727272727, "grad_norm": 0.28513190150260925, "learning_rate": 7.125453745495622e-06, "loss": 0.3033, "step": 8876 }, { "epoch": 1.272870662460568, "grad_norm": 0.31156978011131287, "learning_rate": 7.124698600161385e-06, "loss": 0.3167, "step": 8877 }, { "epoch": 1.2730140521938629, "grad_norm": 0.3008568584918976, "learning_rate": 7.123943395678808e-06, "loss": 0.2996, "step": 8878 }, { "epoch": 1.273157441927158, "grad_norm": 0.30380117893218994, "learning_rate": 7.123188132068914e-06, "loss": 0.3095, "step": 8879 }, { "epoch": 1.273300831660453, "grad_norm": 0.3101857304573059, "learning_rate": 7.12243280935273e-06, "loss": 0.3158, "step": 8880 }, { "epoch": 1.2734442213937482, "grad_norm": 0.2989276349544525, "learning_rate": 7.121677427551279e-06, "loss": 0.3211, "step": 8881 }, { "epoch": 1.2735876111270432, "grad_norm": 0.2875279486179352, "learning_rate": 7.120921986685594e-06, "loss": 0.3109, "step": 8882 }, { "epoch": 1.2737310008603384, "grad_norm": 0.2920842170715332, "learning_rate": 7.120166486776702e-06, "loss": 0.3155, "step": 8883 }, { "epoch": 1.2738743905936336, "grad_norm": 0.27240490913391113, "learning_rate": 7.119410927845638e-06, "loss": 0.3113, "step": 8884 }, { "epoch": 1.2740177803269286, "grad_norm": 0.29078030586242676, "learning_rate": 7.118655309913432e-06, "loss": 0.2936, "step": 8885 }, { "epoch": 1.2741611700602236, "grad_norm": 0.2789754569530487, "learning_rate": 7.117899633001123e-06, "loss": 0.3011, "step": 8886 }, { "epoch": 1.2743045597935188, "grad_norm": 0.29147687554359436, "learning_rate": 7.117143897129747e-06, "loss": 0.314, "step": 8887 }, { "epoch": 1.274447949526814, "grad_norm": 0.2855668067932129, "learning_rate": 7.11638810232034e-06, "loss": 0.3296, "step": 8888 }, { "epoch": 1.274591339260109, "grad_norm": 0.29503166675567627, "learning_rate": 7.115632248593945e-06, "loss": 0.3096, "step": 8889 }, { "epoch": 1.2747347289934041, "grad_norm": 0.29177507758140564, "learning_rate": 7.1148763359716e-06, "loss": 0.3042, "step": 8890 }, { "epoch": 1.274878118726699, "grad_norm": 0.28369587659835815, "learning_rate": 7.114120364474353e-06, "loss": 0.3002, "step": 8891 }, { "epoch": 1.2750215084599943, "grad_norm": 0.299210786819458, "learning_rate": 7.113364334123248e-06, "loss": 0.307, "step": 8892 }, { "epoch": 1.2751648981932893, "grad_norm": 0.2747876048088074, "learning_rate": 7.11260824493933e-06, "loss": 0.3004, "step": 8893 }, { "epoch": 1.2753082879265845, "grad_norm": 0.26733359694480896, "learning_rate": 7.1118520969436476e-06, "loss": 0.3036, "step": 8894 }, { "epoch": 1.2754516776598797, "grad_norm": 0.32073265314102173, "learning_rate": 7.111095890157254e-06, "loss": 0.2965, "step": 8895 }, { "epoch": 1.2755950673931746, "grad_norm": 0.29598796367645264, "learning_rate": 7.110339624601196e-06, "loss": 0.2891, "step": 8896 }, { "epoch": 1.2757384571264698, "grad_norm": 0.28023484349250793, "learning_rate": 7.109583300296529e-06, "loss": 0.3248, "step": 8897 }, { "epoch": 1.2758818468597648, "grad_norm": 0.3152143955230713, "learning_rate": 7.108826917264309e-06, "loss": 0.325, "step": 8898 }, { "epoch": 1.27602523659306, "grad_norm": 0.28762251138687134, "learning_rate": 7.108070475525591e-06, "loss": 0.3272, "step": 8899 }, { "epoch": 1.276168626326355, "grad_norm": 0.3049728274345398, "learning_rate": 7.107313975101433e-06, "loss": 0.3231, "step": 8900 }, { "epoch": 1.2763120160596502, "grad_norm": 0.2778717875480652, "learning_rate": 7.106557416012898e-06, "loss": 0.32, "step": 8901 }, { "epoch": 1.2764554057929454, "grad_norm": 0.29861289262771606, "learning_rate": 7.1058007982810425e-06, "loss": 0.3351, "step": 8902 }, { "epoch": 1.2765987955262403, "grad_norm": 0.2932252883911133, "learning_rate": 7.105044121926935e-06, "loss": 0.291, "step": 8903 }, { "epoch": 1.2767421852595353, "grad_norm": 0.30173006653785706, "learning_rate": 7.104287386971635e-06, "loss": 0.3184, "step": 8904 }, { "epoch": 1.2768855749928305, "grad_norm": 0.2866261303424835, "learning_rate": 7.103530593436212e-06, "loss": 0.3339, "step": 8905 }, { "epoch": 1.2770289647261257, "grad_norm": 0.2961163818836212, "learning_rate": 7.102773741341733e-06, "loss": 0.292, "step": 8906 }, { "epoch": 1.2771723544594207, "grad_norm": 0.2667045593261719, "learning_rate": 7.1020168307092675e-06, "loss": 0.3129, "step": 8907 }, { "epoch": 1.2773157441927159, "grad_norm": 0.326156347990036, "learning_rate": 7.101259861559887e-06, "loss": 0.3154, "step": 8908 }, { "epoch": 1.2774591339260108, "grad_norm": 0.2894308865070343, "learning_rate": 7.100502833914664e-06, "loss": 0.3021, "step": 8909 }, { "epoch": 1.277602523659306, "grad_norm": 0.28311797976493835, "learning_rate": 7.099745747794673e-06, "loss": 0.3254, "step": 8910 }, { "epoch": 1.277745913392601, "grad_norm": 0.2784097492694855, "learning_rate": 7.098988603220992e-06, "loss": 0.3028, "step": 8911 }, { "epoch": 1.2778893031258962, "grad_norm": 0.31685730814933777, "learning_rate": 7.098231400214695e-06, "loss": 0.3208, "step": 8912 }, { "epoch": 1.2780326928591914, "grad_norm": 0.3327064514160156, "learning_rate": 7.097474138796865e-06, "loss": 0.3207, "step": 8913 }, { "epoch": 1.2781760825924864, "grad_norm": 0.271025687456131, "learning_rate": 7.096716818988582e-06, "loss": 0.3061, "step": 8914 }, { "epoch": 1.2783194723257814, "grad_norm": 0.3121850788593292, "learning_rate": 7.095959440810928e-06, "loss": 0.3074, "step": 8915 }, { "epoch": 1.2784628620590766, "grad_norm": 0.30085498094558716, "learning_rate": 7.095202004284985e-06, "loss": 0.3217, "step": 8916 }, { "epoch": 1.2786062517923718, "grad_norm": 0.3016907572746277, "learning_rate": 7.0944445094318434e-06, "loss": 0.3065, "step": 8917 }, { "epoch": 1.2787496415256667, "grad_norm": 0.28641360998153687, "learning_rate": 7.093686956272587e-06, "loss": 0.3053, "step": 8918 }, { "epoch": 1.278893031258962, "grad_norm": 0.29009580612182617, "learning_rate": 7.0929293448283085e-06, "loss": 0.3104, "step": 8919 }, { "epoch": 1.279036420992257, "grad_norm": 0.2968136966228485, "learning_rate": 7.092171675120095e-06, "loss": 0.3131, "step": 8920 }, { "epoch": 1.279179810725552, "grad_norm": 0.30572864413261414, "learning_rate": 7.091413947169041e-06, "loss": 0.3174, "step": 8921 }, { "epoch": 1.279323200458847, "grad_norm": 0.29714521765708923, "learning_rate": 7.090656160996241e-06, "loss": 0.3216, "step": 8922 }, { "epoch": 1.2794665901921423, "grad_norm": 0.29336684942245483, "learning_rate": 7.089898316622788e-06, "loss": 0.3132, "step": 8923 }, { "epoch": 1.2796099799254375, "grad_norm": 0.2880493998527527, "learning_rate": 7.089140414069781e-06, "loss": 0.3308, "step": 8924 }, { "epoch": 1.2797533696587324, "grad_norm": 0.30033785104751587, "learning_rate": 7.088382453358319e-06, "loss": 0.3301, "step": 8925 }, { "epoch": 1.2798967593920274, "grad_norm": 0.301974892616272, "learning_rate": 7.087624434509501e-06, "loss": 0.3091, "step": 8926 }, { "epoch": 1.2800401491253226, "grad_norm": 0.28614434599876404, "learning_rate": 7.086866357544431e-06, "loss": 0.2988, "step": 8927 }, { "epoch": 1.2801835388586178, "grad_norm": 0.29532623291015625, "learning_rate": 7.086108222484212e-06, "loss": 0.3096, "step": 8928 }, { "epoch": 1.2803269285919128, "grad_norm": 0.2825176417827606, "learning_rate": 7.085350029349948e-06, "loss": 0.3158, "step": 8929 }, { "epoch": 1.280470318325208, "grad_norm": 0.27765706181526184, "learning_rate": 7.084591778162748e-06, "loss": 0.3141, "step": 8930 }, { "epoch": 1.280613708058503, "grad_norm": 0.26835259795188904, "learning_rate": 7.08383346894372e-06, "loss": 0.3193, "step": 8931 }, { "epoch": 1.2807570977917981, "grad_norm": 0.2969425916671753, "learning_rate": 7.083075101713972e-06, "loss": 0.2981, "step": 8932 }, { "epoch": 1.2809004875250931, "grad_norm": 0.2854432165622711, "learning_rate": 7.08231667649462e-06, "loss": 0.2932, "step": 8933 }, { "epoch": 1.2810438772583883, "grad_norm": 0.30902525782585144, "learning_rate": 7.081558193306772e-06, "loss": 0.2999, "step": 8934 }, { "epoch": 1.2811872669916835, "grad_norm": 0.3106372654438019, "learning_rate": 7.080799652171548e-06, "loss": 0.312, "step": 8935 }, { "epoch": 1.2813306567249785, "grad_norm": 0.31750962138175964, "learning_rate": 7.0800410531100615e-06, "loss": 0.3181, "step": 8936 }, { "epoch": 1.2814740464582735, "grad_norm": 0.2962348163127899, "learning_rate": 7.079282396143432e-06, "loss": 0.3116, "step": 8937 }, { "epoch": 1.2816174361915686, "grad_norm": 0.3256145119667053, "learning_rate": 7.07852368129278e-06, "loss": 0.2949, "step": 8938 }, { "epoch": 1.2817608259248638, "grad_norm": 0.29519525170326233, "learning_rate": 7.077764908579225e-06, "loss": 0.325, "step": 8939 }, { "epoch": 1.2819042156581588, "grad_norm": 0.28054186701774597, "learning_rate": 7.077006078023893e-06, "loss": 0.3081, "step": 8940 }, { "epoch": 1.282047605391454, "grad_norm": 0.27654552459716797, "learning_rate": 7.076247189647906e-06, "loss": 0.2902, "step": 8941 }, { "epoch": 1.282190995124749, "grad_norm": 0.2649373710155487, "learning_rate": 7.07548824347239e-06, "loss": 0.3123, "step": 8942 }, { "epoch": 1.2823343848580442, "grad_norm": 0.28672298789024353, "learning_rate": 7.074729239518474e-06, "loss": 0.2801, "step": 8943 }, { "epoch": 1.2824777745913392, "grad_norm": 0.28388234972953796, "learning_rate": 7.07397017780729e-06, "loss": 0.2986, "step": 8944 }, { "epoch": 1.2826211643246344, "grad_norm": 0.2938278615474701, "learning_rate": 7.073211058359963e-06, "loss": 0.3211, "step": 8945 }, { "epoch": 1.2827645540579296, "grad_norm": 0.28211507201194763, "learning_rate": 7.072451881197631e-06, "loss": 0.3052, "step": 8946 }, { "epoch": 1.2829079437912245, "grad_norm": 0.2921520173549652, "learning_rate": 7.071692646341427e-06, "loss": 0.3081, "step": 8947 }, { "epoch": 1.2830513335245197, "grad_norm": 0.29950082302093506, "learning_rate": 7.0709333538124855e-06, "loss": 0.3277, "step": 8948 }, { "epoch": 1.2831947232578147, "grad_norm": 0.28581398725509644, "learning_rate": 7.070174003631948e-06, "loss": 0.3142, "step": 8949 }, { "epoch": 1.28333811299111, "grad_norm": 0.29818007349967957, "learning_rate": 7.069414595820948e-06, "loss": 0.3116, "step": 8950 }, { "epoch": 1.2834815027244049, "grad_norm": 0.28139743208885193, "learning_rate": 7.06865513040063e-06, "loss": 0.3068, "step": 8951 }, { "epoch": 1.2836248924577, "grad_norm": 0.2885740101337433, "learning_rate": 7.067895607392136e-06, "loss": 0.3072, "step": 8952 }, { "epoch": 1.2837682821909953, "grad_norm": 0.2781113088130951, "learning_rate": 7.067136026816608e-06, "loss": 0.3234, "step": 8953 }, { "epoch": 1.2839116719242902, "grad_norm": 0.28312748670578003, "learning_rate": 7.066376388695193e-06, "loss": 0.298, "step": 8954 }, { "epoch": 1.2840550616575852, "grad_norm": 0.2865164279937744, "learning_rate": 7.065616693049037e-06, "loss": 0.2931, "step": 8955 }, { "epoch": 1.2841984513908804, "grad_norm": 0.28675714135169983, "learning_rate": 7.064856939899292e-06, "loss": 0.3013, "step": 8956 }, { "epoch": 1.2843418411241756, "grad_norm": 0.27851682901382446, "learning_rate": 7.064097129267105e-06, "loss": 0.2986, "step": 8957 }, { "epoch": 1.2844852308574706, "grad_norm": 0.2886710464954376, "learning_rate": 7.063337261173629e-06, "loss": 0.3025, "step": 8958 }, { "epoch": 1.2846286205907658, "grad_norm": 0.29401639103889465, "learning_rate": 7.062577335640018e-06, "loss": 0.3083, "step": 8959 }, { "epoch": 1.2847720103240607, "grad_norm": 0.28487879037857056, "learning_rate": 7.0618173526874275e-06, "loss": 0.3065, "step": 8960 }, { "epoch": 1.284915400057356, "grad_norm": 0.2789449989795685, "learning_rate": 7.061057312337012e-06, "loss": 0.3093, "step": 8961 }, { "epoch": 1.285058789790651, "grad_norm": 0.29664692282676697, "learning_rate": 7.060297214609933e-06, "loss": 0.329, "step": 8962 }, { "epoch": 1.285202179523946, "grad_norm": 0.28862500190734863, "learning_rate": 7.059537059527347e-06, "loss": 0.3067, "step": 8963 }, { "epoch": 1.2853455692572413, "grad_norm": 0.2916072607040405, "learning_rate": 7.058776847110419e-06, "loss": 0.3196, "step": 8964 }, { "epoch": 1.2854889589905363, "grad_norm": 0.30118268728256226, "learning_rate": 7.058016577380311e-06, "loss": 0.2958, "step": 8965 }, { "epoch": 1.2856323487238313, "grad_norm": 0.27073490619659424, "learning_rate": 7.057256250358185e-06, "loss": 0.2974, "step": 8966 }, { "epoch": 1.2857757384571264, "grad_norm": 0.270974725484848, "learning_rate": 7.0564958660652115e-06, "loss": 0.3132, "step": 8967 }, { "epoch": 1.2859191281904216, "grad_norm": 0.27763596177101135, "learning_rate": 7.055735424522557e-06, "loss": 0.3141, "step": 8968 }, { "epoch": 1.2860625179237166, "grad_norm": 0.28647279739379883, "learning_rate": 7.05497492575139e-06, "loss": 0.3172, "step": 8969 }, { "epoch": 1.2862059076570118, "grad_norm": 0.27136489748954773, "learning_rate": 7.054214369772881e-06, "loss": 0.2957, "step": 8970 }, { "epoch": 1.2863492973903068, "grad_norm": 0.29085108637809753, "learning_rate": 7.053453756608206e-06, "loss": 0.3064, "step": 8971 }, { "epoch": 1.286492687123602, "grad_norm": 0.29293397068977356, "learning_rate": 7.052693086278536e-06, "loss": 0.3016, "step": 8972 }, { "epoch": 1.286636076856897, "grad_norm": 0.3318578898906708, "learning_rate": 7.051932358805047e-06, "loss": 0.3028, "step": 8973 }, { "epoch": 1.2867794665901922, "grad_norm": 0.29463818669319153, "learning_rate": 7.051171574208919e-06, "loss": 0.3087, "step": 8974 }, { "epoch": 1.2869228563234874, "grad_norm": 0.28193917870521545, "learning_rate": 7.050410732511329e-06, "loss": 0.3207, "step": 8975 }, { "epoch": 1.2870662460567823, "grad_norm": 0.32258984446525574, "learning_rate": 7.0496498337334594e-06, "loss": 0.3115, "step": 8976 }, { "epoch": 1.2872096357900773, "grad_norm": 0.2821591794490814, "learning_rate": 7.048888877896492e-06, "loss": 0.3114, "step": 8977 }, { "epoch": 1.2873530255233725, "grad_norm": 0.2648545801639557, "learning_rate": 7.048127865021608e-06, "loss": 0.2969, "step": 8978 }, { "epoch": 1.2874964152566677, "grad_norm": 0.2808851897716522, "learning_rate": 7.047366795129996e-06, "loss": 0.3118, "step": 8979 }, { "epoch": 1.2876398049899627, "grad_norm": 0.29552364349365234, "learning_rate": 7.04660566824284e-06, "loss": 0.34, "step": 8980 }, { "epoch": 1.2877831947232579, "grad_norm": 0.30721813440322876, "learning_rate": 7.045844484381332e-06, "loss": 0.326, "step": 8981 }, { "epoch": 1.2879265844565528, "grad_norm": 0.27355656027793884, "learning_rate": 7.045083243566662e-06, "loss": 0.2987, "step": 8982 }, { "epoch": 1.288069974189848, "grad_norm": 0.2877947688102722, "learning_rate": 7.0443219458200185e-06, "loss": 0.3055, "step": 8983 }, { "epoch": 1.288213363923143, "grad_norm": 0.2707597613334656, "learning_rate": 7.043560591162597e-06, "loss": 0.2983, "step": 8984 }, { "epoch": 1.2883567536564382, "grad_norm": 0.30702492594718933, "learning_rate": 7.042799179615593e-06, "loss": 0.3133, "step": 8985 }, { "epoch": 1.2885001433897334, "grad_norm": 0.2871286869049072, "learning_rate": 7.042037711200201e-06, "loss": 0.3102, "step": 8986 }, { "epoch": 1.2886435331230284, "grad_norm": 0.2845320701599121, "learning_rate": 7.041276185937622e-06, "loss": 0.3184, "step": 8987 }, { "epoch": 1.2887869228563236, "grad_norm": 0.2935425043106079, "learning_rate": 7.0405146038490534e-06, "loss": 0.3371, "step": 8988 }, { "epoch": 1.2889303125896185, "grad_norm": 0.2778969705104828, "learning_rate": 7.039752964955697e-06, "loss": 0.3139, "step": 8989 }, { "epoch": 1.2890737023229137, "grad_norm": 0.28652092814445496, "learning_rate": 7.0389912692787554e-06, "loss": 0.3187, "step": 8990 }, { "epoch": 1.2892170920562087, "grad_norm": 0.27303704619407654, "learning_rate": 7.038229516839433e-06, "loss": 0.2855, "step": 8991 }, { "epoch": 1.289360481789504, "grad_norm": 0.27063286304473877, "learning_rate": 7.037467707658938e-06, "loss": 0.3111, "step": 8992 }, { "epoch": 1.289503871522799, "grad_norm": 0.2982173562049866, "learning_rate": 7.036705841758476e-06, "loss": 0.3208, "step": 8993 }, { "epoch": 1.289647261256094, "grad_norm": 0.26783522963523865, "learning_rate": 7.0359439191592545e-06, "loss": 0.3292, "step": 8994 }, { "epoch": 1.289790650989389, "grad_norm": 0.28694477677345276, "learning_rate": 7.035181939882489e-06, "loss": 0.3254, "step": 8995 }, { "epoch": 1.2899340407226842, "grad_norm": 0.293633371591568, "learning_rate": 7.0344199039493865e-06, "loss": 0.3155, "step": 8996 }, { "epoch": 1.2900774304559794, "grad_norm": 0.29562681913375854, "learning_rate": 7.033657811381164e-06, "loss": 0.3295, "step": 8997 }, { "epoch": 1.2902208201892744, "grad_norm": 0.27780136466026306, "learning_rate": 7.032895662199037e-06, "loss": 0.3095, "step": 8998 }, { "epoch": 1.2903642099225696, "grad_norm": 0.28276416659355164, "learning_rate": 7.032133456424221e-06, "loss": 0.3039, "step": 8999 }, { "epoch": 1.2905075996558646, "grad_norm": 0.27990755438804626, "learning_rate": 7.031371194077936e-06, "loss": 0.3057, "step": 9000 }, { "epoch": 1.2906509893891598, "grad_norm": 0.280729740858078, "learning_rate": 7.030608875181402e-06, "loss": 0.3034, "step": 9001 }, { "epoch": 1.2907943791224548, "grad_norm": 0.29875972867012024, "learning_rate": 7.02984649975584e-06, "loss": 0.2996, "step": 9002 }, { "epoch": 1.29093776885575, "grad_norm": 0.2857564091682434, "learning_rate": 7.029084067822475e-06, "loss": 0.3121, "step": 9003 }, { "epoch": 1.2910811585890452, "grad_norm": 0.29145604372024536, "learning_rate": 7.028321579402531e-06, "loss": 0.3081, "step": 9004 }, { "epoch": 1.2912245483223401, "grad_norm": 0.2846568822860718, "learning_rate": 7.027559034517233e-06, "loss": 0.3187, "step": 9005 }, { "epoch": 1.291367938055635, "grad_norm": 0.31690728664398193, "learning_rate": 7.026796433187811e-06, "loss": 0.3239, "step": 9006 }, { "epoch": 1.2915113277889303, "grad_norm": 0.3673117756843567, "learning_rate": 7.026033775435495e-06, "loss": 0.3082, "step": 9007 }, { "epoch": 1.2916547175222255, "grad_norm": 0.27920016646385193, "learning_rate": 7.025271061281513e-06, "loss": 0.3108, "step": 9008 }, { "epoch": 1.2917981072555205, "grad_norm": 0.2896348536014557, "learning_rate": 7.024508290747102e-06, "loss": 0.3093, "step": 9009 }, { "epoch": 1.2919414969888157, "grad_norm": 0.28755900263786316, "learning_rate": 7.023745463853493e-06, "loss": 0.3048, "step": 9010 }, { "epoch": 1.2920848867221106, "grad_norm": 0.28618523478507996, "learning_rate": 7.022982580621927e-06, "loss": 0.3254, "step": 9011 }, { "epoch": 1.2922282764554058, "grad_norm": 0.28266361355781555, "learning_rate": 7.022219641073634e-06, "loss": 0.321, "step": 9012 }, { "epoch": 1.2923716661887008, "grad_norm": 0.30835458636283875, "learning_rate": 7.021456645229858e-06, "loss": 0.3034, "step": 9013 }, { "epoch": 1.292515055921996, "grad_norm": 0.2918625473976135, "learning_rate": 7.02069359311184e-06, "loss": 0.3018, "step": 9014 }, { "epoch": 1.2926584456552912, "grad_norm": 0.31545180082321167, "learning_rate": 7.019930484740819e-06, "loss": 0.3153, "step": 9015 }, { "epoch": 1.2928018353885862, "grad_norm": 0.28180015087127686, "learning_rate": 7.019167320138041e-06, "loss": 0.3117, "step": 9016 }, { "epoch": 1.2929452251218811, "grad_norm": 0.30792832374572754, "learning_rate": 7.01840409932475e-06, "loss": 0.3294, "step": 9017 }, { "epoch": 1.2930886148551763, "grad_norm": 0.30343732237815857, "learning_rate": 7.017640822322196e-06, "loss": 0.3303, "step": 9018 }, { "epoch": 1.2932320045884715, "grad_norm": 0.28996819257736206, "learning_rate": 7.0168774891516245e-06, "loss": 0.3182, "step": 9019 }, { "epoch": 1.2933753943217665, "grad_norm": 0.30372512340545654, "learning_rate": 7.016114099834286e-06, "loss": 0.3015, "step": 9020 }, { "epoch": 1.2935187840550617, "grad_norm": 0.31320449709892273, "learning_rate": 7.015350654391431e-06, "loss": 0.3329, "step": 9021 }, { "epoch": 1.2936621737883567, "grad_norm": 0.3174489140510559, "learning_rate": 7.014587152844316e-06, "loss": 0.3365, "step": 9022 }, { "epoch": 1.2938055635216519, "grad_norm": 0.29806190729141235, "learning_rate": 7.013823595214193e-06, "loss": 0.3219, "step": 9023 }, { "epoch": 1.2939489532549469, "grad_norm": 0.31553369760513306, "learning_rate": 7.013059981522318e-06, "loss": 0.2996, "step": 9024 }, { "epoch": 1.294092342988242, "grad_norm": 0.29523035883903503, "learning_rate": 7.012296311789951e-06, "loss": 0.3058, "step": 9025 }, { "epoch": 1.2942357327215372, "grad_norm": 0.2924611270427704, "learning_rate": 7.01153258603835e-06, "loss": 0.3122, "step": 9026 }, { "epoch": 1.2943791224548322, "grad_norm": 0.30620405077934265, "learning_rate": 7.0107688042887745e-06, "loss": 0.3133, "step": 9027 }, { "epoch": 1.2945225121881272, "grad_norm": 0.29147377610206604, "learning_rate": 7.01000496656249e-06, "loss": 0.3035, "step": 9028 }, { "epoch": 1.2946659019214224, "grad_norm": 0.2695026993751526, "learning_rate": 7.009241072880759e-06, "loss": 0.3075, "step": 9029 }, { "epoch": 1.2948092916547176, "grad_norm": 0.28617167472839355, "learning_rate": 7.008477123264849e-06, "loss": 0.2969, "step": 9030 }, { "epoch": 1.2949526813880126, "grad_norm": 0.26492226123809814, "learning_rate": 7.007713117736023e-06, "loss": 0.2953, "step": 9031 }, { "epoch": 1.2950960711213078, "grad_norm": 0.29107269644737244, "learning_rate": 7.006949056315553e-06, "loss": 0.328, "step": 9032 }, { "epoch": 1.2952394608546027, "grad_norm": 0.2832891345024109, "learning_rate": 7.006184939024709e-06, "loss": 0.3078, "step": 9033 }, { "epoch": 1.295382850587898, "grad_norm": 0.27163392305374146, "learning_rate": 7.005420765884763e-06, "loss": 0.3189, "step": 9034 }, { "epoch": 1.295526240321193, "grad_norm": 0.29293519258499146, "learning_rate": 7.004656536916986e-06, "loss": 0.282, "step": 9035 }, { "epoch": 1.295669630054488, "grad_norm": 0.27731096744537354, "learning_rate": 7.003892252142656e-06, "loss": 0.3041, "step": 9036 }, { "epoch": 1.2958130197877833, "grad_norm": 0.2929533123970032, "learning_rate": 7.0031279115830476e-06, "loss": 0.3331, "step": 9037 }, { "epoch": 1.2959564095210783, "grad_norm": 0.29531458020210266, "learning_rate": 7.00236351525944e-06, "loss": 0.3165, "step": 9038 }, { "epoch": 1.2960997992543735, "grad_norm": 0.278363436460495, "learning_rate": 7.001599063193112e-06, "loss": 0.3015, "step": 9039 }, { "epoch": 1.2962431889876684, "grad_norm": 0.31585997343063354, "learning_rate": 7.000834555405345e-06, "loss": 0.3183, "step": 9040 }, { "epoch": 1.2963865787209636, "grad_norm": 0.2873170077800751, "learning_rate": 7.000069991917423e-06, "loss": 0.3155, "step": 9041 }, { "epoch": 1.2965299684542586, "grad_norm": 0.25972455739974976, "learning_rate": 6.9993053727506275e-06, "loss": 0.3151, "step": 9042 }, { "epoch": 1.2966733581875538, "grad_norm": 0.3042570948600769, "learning_rate": 6.998540697926247e-06, "loss": 0.3266, "step": 9043 }, { "epoch": 1.296816747920849, "grad_norm": 0.28623801469802856, "learning_rate": 6.997775967465567e-06, "loss": 0.3118, "step": 9044 }, { "epoch": 1.296960137654144, "grad_norm": 0.28654563426971436, "learning_rate": 6.997011181389878e-06, "loss": 0.298, "step": 9045 }, { "epoch": 1.297103527387439, "grad_norm": 0.28293099999427795, "learning_rate": 6.99624633972047e-06, "loss": 0.3034, "step": 9046 }, { "epoch": 1.2972469171207341, "grad_norm": 0.29857248067855835, "learning_rate": 6.995481442478633e-06, "loss": 0.3165, "step": 9047 }, { "epoch": 1.2973903068540293, "grad_norm": 0.3038206994533539, "learning_rate": 6.994716489685663e-06, "loss": 0.2939, "step": 9048 }, { "epoch": 1.2975336965873243, "grad_norm": 0.30443090200424194, "learning_rate": 6.993951481362856e-06, "loss": 0.3081, "step": 9049 }, { "epoch": 1.2976770863206195, "grad_norm": 0.29429182410240173, "learning_rate": 6.993186417531506e-06, "loss": 0.3247, "step": 9050 }, { "epoch": 1.2978204760539145, "grad_norm": 0.2798042595386505, "learning_rate": 6.992421298212911e-06, "loss": 0.3157, "step": 9051 }, { "epoch": 1.2979638657872097, "grad_norm": 0.3130089044570923, "learning_rate": 6.991656123428374e-06, "loss": 0.3158, "step": 9052 }, { "epoch": 1.2981072555205047, "grad_norm": 0.28293585777282715, "learning_rate": 6.9908908931991935e-06, "loss": 0.3136, "step": 9053 }, { "epoch": 1.2982506452537999, "grad_norm": 0.2706996202468872, "learning_rate": 6.990125607546673e-06, "loss": 0.3218, "step": 9054 }, { "epoch": 1.298394034987095, "grad_norm": 0.30101296305656433, "learning_rate": 6.989360266492116e-06, "loss": 0.3194, "step": 9055 }, { "epoch": 1.29853742472039, "grad_norm": 0.2937144637107849, "learning_rate": 6.9885948700568305e-06, "loss": 0.2914, "step": 9056 }, { "epoch": 1.298680814453685, "grad_norm": 0.36008578538894653, "learning_rate": 6.987829418262123e-06, "loss": 0.3373, "step": 9057 }, { "epoch": 1.2988242041869802, "grad_norm": 0.2768896520137787, "learning_rate": 6.987063911129302e-06, "loss": 0.2971, "step": 9058 }, { "epoch": 1.2989675939202754, "grad_norm": 0.277900367975235, "learning_rate": 6.986298348679678e-06, "loss": 0.3026, "step": 9059 }, { "epoch": 1.2991109836535704, "grad_norm": 0.3003822863101959, "learning_rate": 6.985532730934565e-06, "loss": 0.3077, "step": 9060 }, { "epoch": 1.2992543733868656, "grad_norm": 0.3017962574958801, "learning_rate": 6.984767057915272e-06, "loss": 0.311, "step": 9061 }, { "epoch": 1.2993977631201605, "grad_norm": 0.2855139374732971, "learning_rate": 6.9840013296431195e-06, "loss": 0.3118, "step": 9062 }, { "epoch": 1.2995411528534557, "grad_norm": 0.27536681294441223, "learning_rate": 6.983235546139422e-06, "loss": 0.3096, "step": 9063 }, { "epoch": 1.2996845425867507, "grad_norm": 0.29020562767982483, "learning_rate": 6.982469707425497e-06, "loss": 0.3341, "step": 9064 }, { "epoch": 1.299827932320046, "grad_norm": 0.3001447021961212, "learning_rate": 6.981703813522666e-06, "loss": 0.2945, "step": 9065 }, { "epoch": 1.299971322053341, "grad_norm": 0.28021493554115295, "learning_rate": 6.9809378644522475e-06, "loss": 0.3073, "step": 9066 }, { "epoch": 1.300114711786636, "grad_norm": 0.28204116225242615, "learning_rate": 6.980171860235567e-06, "loss": 0.3117, "step": 9067 }, { "epoch": 1.300258101519931, "grad_norm": 0.31429192423820496, "learning_rate": 6.979405800893949e-06, "loss": 0.314, "step": 9068 }, { "epoch": 1.3004014912532262, "grad_norm": 0.29087749123573303, "learning_rate": 6.978639686448717e-06, "loss": 0.3076, "step": 9069 }, { "epoch": 1.3005448809865214, "grad_norm": 0.27822351455688477, "learning_rate": 6.9778735169212e-06, "loss": 0.3277, "step": 9070 }, { "epoch": 1.3006882707198164, "grad_norm": 0.32662996649742126, "learning_rate": 6.9771072923327275e-06, "loss": 0.3277, "step": 9071 }, { "epoch": 1.3008316604531116, "grad_norm": 0.28664955496788025, "learning_rate": 6.9763410127046295e-06, "loss": 0.3031, "step": 9072 }, { "epoch": 1.3009750501864066, "grad_norm": 0.2813522517681122, "learning_rate": 6.9755746780582365e-06, "loss": 0.3128, "step": 9073 }, { "epoch": 1.3011184399197018, "grad_norm": 0.29715245962142944, "learning_rate": 6.9748082884148845e-06, "loss": 0.3049, "step": 9074 }, { "epoch": 1.3012618296529967, "grad_norm": 0.3202231526374817, "learning_rate": 6.974041843795907e-06, "loss": 0.3259, "step": 9075 }, { "epoch": 1.301405219386292, "grad_norm": 0.2648209035396576, "learning_rate": 6.973275344222643e-06, "loss": 0.2956, "step": 9076 }, { "epoch": 1.3015486091195871, "grad_norm": 0.3014819324016571, "learning_rate": 6.9725087897164266e-06, "loss": 0.3072, "step": 9077 }, { "epoch": 1.3016919988528821, "grad_norm": 0.3010725677013397, "learning_rate": 6.971742180298601e-06, "loss": 0.3221, "step": 9078 }, { "epoch": 1.3018353885861773, "grad_norm": 0.2765785753726959, "learning_rate": 6.970975515990506e-06, "loss": 0.3025, "step": 9079 }, { "epoch": 1.3019787783194723, "grad_norm": 0.31767821311950684, "learning_rate": 6.9702087968134844e-06, "loss": 0.3196, "step": 9080 }, { "epoch": 1.3021221680527675, "grad_norm": 0.28064343333244324, "learning_rate": 6.969442022788881e-06, "loss": 0.3048, "step": 9081 }, { "epoch": 1.3022655577860625, "grad_norm": 0.30385494232177734, "learning_rate": 6.968675193938041e-06, "loss": 0.2891, "step": 9082 }, { "epoch": 1.3024089475193577, "grad_norm": 0.27373167872428894, "learning_rate": 6.967908310282311e-06, "loss": 0.3136, "step": 9083 }, { "epoch": 1.3025523372526528, "grad_norm": 0.3120664060115814, "learning_rate": 6.967141371843043e-06, "loss": 0.3071, "step": 9084 }, { "epoch": 1.3026957269859478, "grad_norm": 0.30428817868232727, "learning_rate": 6.9663743786415825e-06, "loss": 0.3072, "step": 9085 }, { "epoch": 1.3028391167192428, "grad_norm": 0.28672337532043457, "learning_rate": 6.965607330699287e-06, "loss": 0.3144, "step": 9086 }, { "epoch": 1.302982506452538, "grad_norm": 0.2978293001651764, "learning_rate": 6.964840228037506e-06, "loss": 0.3109, "step": 9087 }, { "epoch": 1.3031258961858332, "grad_norm": 0.28719374537467957, "learning_rate": 6.964073070677595e-06, "loss": 0.3138, "step": 9088 }, { "epoch": 1.3032692859191282, "grad_norm": 0.2873504161834717, "learning_rate": 6.963305858640911e-06, "loss": 0.2884, "step": 9089 }, { "epoch": 1.3034126756524234, "grad_norm": 0.3106197416782379, "learning_rate": 6.962538591948812e-06, "loss": 0.2905, "step": 9090 }, { "epoch": 1.3035560653857183, "grad_norm": 0.30595555901527405, "learning_rate": 6.961771270622658e-06, "loss": 0.3358, "step": 9091 }, { "epoch": 1.3036994551190135, "grad_norm": 0.29810836911201477, "learning_rate": 6.9610038946838096e-06, "loss": 0.3175, "step": 9092 }, { "epoch": 1.3038428448523085, "grad_norm": 0.28810250759124756, "learning_rate": 6.960236464153629e-06, "loss": 0.3161, "step": 9093 }, { "epoch": 1.3039862345856037, "grad_norm": 0.27752551436424255, "learning_rate": 6.959468979053481e-06, "loss": 0.284, "step": 9094 }, { "epoch": 1.304129624318899, "grad_norm": 0.28831174969673157, "learning_rate": 6.958701439404732e-06, "loss": 0.3072, "step": 9095 }, { "epoch": 1.3042730140521939, "grad_norm": 0.3169008493423462, "learning_rate": 6.957933845228746e-06, "loss": 0.3289, "step": 9096 }, { "epoch": 1.3044164037854888, "grad_norm": 0.3053828179836273, "learning_rate": 6.957166196546894e-06, "loss": 0.3056, "step": 9097 }, { "epoch": 1.304559793518784, "grad_norm": 0.28765392303466797, "learning_rate": 6.956398493380548e-06, "loss": 0.3109, "step": 9098 }, { "epoch": 1.3047031832520792, "grad_norm": 0.3026915490627289, "learning_rate": 6.9556307357510744e-06, "loss": 0.3098, "step": 9099 }, { "epoch": 1.3048465729853742, "grad_norm": 0.3078605532646179, "learning_rate": 6.95486292367985e-06, "loss": 0.3141, "step": 9100 }, { "epoch": 1.3049899627186694, "grad_norm": 0.309217631816864, "learning_rate": 6.954095057188251e-06, "loss": 0.305, "step": 9101 }, { "epoch": 1.3051333524519644, "grad_norm": 0.2780417501926422, "learning_rate": 6.953327136297648e-06, "loss": 0.3075, "step": 9102 }, { "epoch": 1.3052767421852596, "grad_norm": 0.29973870515823364, "learning_rate": 6.9525591610294265e-06, "loss": 0.3193, "step": 9103 }, { "epoch": 1.3054201319185545, "grad_norm": 0.2800758183002472, "learning_rate": 6.951791131404959e-06, "loss": 0.3225, "step": 9104 }, { "epoch": 1.3055635216518497, "grad_norm": 0.28081077337265015, "learning_rate": 6.9510230474456286e-06, "loss": 0.3299, "step": 9105 }, { "epoch": 1.305706911385145, "grad_norm": 0.293484628200531, "learning_rate": 6.95025490917282e-06, "loss": 0.3145, "step": 9106 }, { "epoch": 1.30585030111844, "grad_norm": 0.29339101910591125, "learning_rate": 6.949486716607913e-06, "loss": 0.3153, "step": 9107 }, { "epoch": 1.305993690851735, "grad_norm": 0.2685920298099518, "learning_rate": 6.948718469772294e-06, "loss": 0.3055, "step": 9108 }, { "epoch": 1.30613708058503, "grad_norm": 0.28860583901405334, "learning_rate": 6.947950168687352e-06, "loss": 0.3016, "step": 9109 }, { "epoch": 1.3062804703183253, "grad_norm": 0.326825350522995, "learning_rate": 6.9471818133744726e-06, "loss": 0.3059, "step": 9110 }, { "epoch": 1.3064238600516203, "grad_norm": 0.2946116030216217, "learning_rate": 6.946413403855048e-06, "loss": 0.3154, "step": 9111 }, { "epoch": 1.3065672497849155, "grad_norm": 0.2905232608318329, "learning_rate": 6.945644940150468e-06, "loss": 0.2923, "step": 9112 }, { "epoch": 1.3067106395182104, "grad_norm": 0.2672406733036041, "learning_rate": 6.944876422282125e-06, "loss": 0.2995, "step": 9113 }, { "epoch": 1.3068540292515056, "grad_norm": 0.26747244596481323, "learning_rate": 6.944107850271417e-06, "loss": 0.3044, "step": 9114 }, { "epoch": 1.3069974189848006, "grad_norm": 0.3195725381374359, "learning_rate": 6.943339224139733e-06, "loss": 0.2978, "step": 9115 }, { "epoch": 1.3071408087180958, "grad_norm": 0.2989429831504822, "learning_rate": 6.9425705439084765e-06, "loss": 0.3267, "step": 9116 }, { "epoch": 1.307284198451391, "grad_norm": 0.28673920035362244, "learning_rate": 6.941801809599046e-06, "loss": 0.2912, "step": 9117 }, { "epoch": 1.307427588184686, "grad_norm": 0.28672394156455994, "learning_rate": 6.941033021232837e-06, "loss": 0.3052, "step": 9118 }, { "epoch": 1.307570977917981, "grad_norm": 0.27999651432037354, "learning_rate": 6.940264178831257e-06, "loss": 0.308, "step": 9119 }, { "epoch": 1.3077143676512761, "grad_norm": 0.28333914279937744, "learning_rate": 6.939495282415705e-06, "loss": 0.3075, "step": 9120 }, { "epoch": 1.3078577573845713, "grad_norm": 0.3117774426937103, "learning_rate": 6.938726332007588e-06, "loss": 0.311, "step": 9121 }, { "epoch": 1.3080011471178663, "grad_norm": 0.3098946213722229, "learning_rate": 6.937957327628314e-06, "loss": 0.3012, "step": 9122 }, { "epoch": 1.3081445368511615, "grad_norm": 0.28682661056518555, "learning_rate": 6.937188269299287e-06, "loss": 0.3232, "step": 9123 }, { "epoch": 1.3082879265844565, "grad_norm": 0.27435970306396484, "learning_rate": 6.936419157041921e-06, "loss": 0.3099, "step": 9124 }, { "epoch": 1.3084313163177517, "grad_norm": 0.2842264175415039, "learning_rate": 6.935649990877623e-06, "loss": 0.3005, "step": 9125 }, { "epoch": 1.3085747060510466, "grad_norm": 0.29319390654563904, "learning_rate": 6.934880770827807e-06, "loss": 0.3003, "step": 9126 }, { "epoch": 1.3087180957843418, "grad_norm": 0.2942257523536682, "learning_rate": 6.9341114969138855e-06, "loss": 0.3121, "step": 9127 }, { "epoch": 1.308861485517637, "grad_norm": 0.295086532831192, "learning_rate": 6.933342169157276e-06, "loss": 0.3012, "step": 9128 }, { "epoch": 1.309004875250932, "grad_norm": 0.30146974325180054, "learning_rate": 6.932572787579395e-06, "loss": 0.3288, "step": 9129 }, { "epoch": 1.3091482649842272, "grad_norm": 0.3014776110649109, "learning_rate": 6.931803352201661e-06, "loss": 0.2998, "step": 9130 }, { "epoch": 1.3092916547175222, "grad_norm": 0.28898197412490845, "learning_rate": 6.931033863045492e-06, "loss": 0.3015, "step": 9131 }, { "epoch": 1.3094350444508174, "grad_norm": 0.27766478061676025, "learning_rate": 6.930264320132311e-06, "loss": 0.3048, "step": 9132 }, { "epoch": 1.3095784341841123, "grad_norm": 0.28622764348983765, "learning_rate": 6.929494723483543e-06, "loss": 0.2862, "step": 9133 }, { "epoch": 1.3097218239174075, "grad_norm": 0.2994665503501892, "learning_rate": 6.9287250731206065e-06, "loss": 0.2918, "step": 9134 }, { "epoch": 1.3098652136507027, "grad_norm": 0.2827533483505249, "learning_rate": 6.9279553690649325e-06, "loss": 0.2931, "step": 9135 }, { "epoch": 1.3100086033839977, "grad_norm": 0.2961355447769165, "learning_rate": 6.927185611337947e-06, "loss": 0.3096, "step": 9136 }, { "epoch": 1.3101519931172927, "grad_norm": 0.3063865900039673, "learning_rate": 6.926415799961078e-06, "loss": 0.3054, "step": 9137 }, { "epoch": 1.3102953828505879, "grad_norm": 0.2675045132637024, "learning_rate": 6.925645934955758e-06, "loss": 0.3046, "step": 9138 }, { "epoch": 1.310438772583883, "grad_norm": 0.28727155923843384, "learning_rate": 6.924876016343416e-06, "loss": 0.3007, "step": 9139 }, { "epoch": 1.310582162317178, "grad_norm": 0.2882876992225647, "learning_rate": 6.924106044145486e-06, "loss": 0.3074, "step": 9140 }, { "epoch": 1.3107255520504733, "grad_norm": 0.30038970708847046, "learning_rate": 6.9233360183834055e-06, "loss": 0.2935, "step": 9141 }, { "epoch": 1.3108689417837682, "grad_norm": 0.28481605648994446, "learning_rate": 6.922565939078607e-06, "loss": 0.2963, "step": 9142 }, { "epoch": 1.3110123315170634, "grad_norm": 0.29740744829177856, "learning_rate": 6.921795806252532e-06, "loss": 0.3302, "step": 9143 }, { "epoch": 1.3111557212503584, "grad_norm": 0.29527631402015686, "learning_rate": 6.921025619926617e-06, "loss": 0.32, "step": 9144 }, { "epoch": 1.3112991109836536, "grad_norm": 0.29594686627388, "learning_rate": 6.920255380122304e-06, "loss": 0.3133, "step": 9145 }, { "epoch": 1.3114425007169488, "grad_norm": 0.28527724742889404, "learning_rate": 6.919485086861035e-06, "loss": 0.3093, "step": 9146 }, { "epoch": 1.3115858904502438, "grad_norm": 0.2632976770401001, "learning_rate": 6.918714740164254e-06, "loss": 0.3102, "step": 9147 }, { "epoch": 1.3117292801835387, "grad_norm": 0.2855773866176605, "learning_rate": 6.917944340053405e-06, "loss": 0.3035, "step": 9148 }, { "epoch": 1.311872669916834, "grad_norm": 0.2913200259208679, "learning_rate": 6.917173886549939e-06, "loss": 0.2908, "step": 9149 }, { "epoch": 1.3120160596501291, "grad_norm": 0.2806011736392975, "learning_rate": 6.916403379675298e-06, "loss": 0.3076, "step": 9150 }, { "epoch": 1.312159449383424, "grad_norm": 0.2930750548839569, "learning_rate": 6.915632819450936e-06, "loss": 0.3172, "step": 9151 }, { "epoch": 1.3123028391167193, "grad_norm": 0.2982984185218811, "learning_rate": 6.9148622058983035e-06, "loss": 0.3067, "step": 9152 }, { "epoch": 1.3124462288500143, "grad_norm": 0.26929154992103577, "learning_rate": 6.914091539038853e-06, "loss": 0.3218, "step": 9153 }, { "epoch": 1.3125896185833095, "grad_norm": 0.25287729501724243, "learning_rate": 6.9133208188940385e-06, "loss": 0.3183, "step": 9154 }, { "epoch": 1.3127330083166044, "grad_norm": 0.26571348309516907, "learning_rate": 6.912550045485315e-06, "loss": 0.2977, "step": 9155 }, { "epoch": 1.3128763980498996, "grad_norm": 0.2813708484172821, "learning_rate": 6.91177921883414e-06, "loss": 0.286, "step": 9156 }, { "epoch": 1.3130197877831948, "grad_norm": 0.28651362657546997, "learning_rate": 6.911008338961973e-06, "loss": 0.3178, "step": 9157 }, { "epoch": 1.3131631775164898, "grad_norm": 0.2705436646938324, "learning_rate": 6.9102374058902735e-06, "loss": 0.2986, "step": 9158 }, { "epoch": 1.3133065672497848, "grad_norm": 0.29528871178627014, "learning_rate": 6.9094664196405025e-06, "loss": 0.3099, "step": 9159 }, { "epoch": 1.31344995698308, "grad_norm": 0.29807618260383606, "learning_rate": 6.908695380234125e-06, "loss": 0.3184, "step": 9160 }, { "epoch": 1.3135933467163752, "grad_norm": 0.2819671332836151, "learning_rate": 6.907924287692604e-06, "loss": 0.3361, "step": 9161 }, { "epoch": 1.3137367364496702, "grad_norm": 0.280566543340683, "learning_rate": 6.907153142037405e-06, "loss": 0.3174, "step": 9162 }, { "epoch": 1.3138801261829653, "grad_norm": 0.29558852314949036, "learning_rate": 6.906381943289995e-06, "loss": 0.305, "step": 9163 }, { "epoch": 1.3140235159162603, "grad_norm": 0.28219830989837646, "learning_rate": 6.905610691471846e-06, "loss": 0.3006, "step": 9164 }, { "epoch": 1.3141669056495555, "grad_norm": 0.2922211289405823, "learning_rate": 6.904839386604427e-06, "loss": 0.3378, "step": 9165 }, { "epoch": 1.3143102953828505, "grad_norm": 0.29432547092437744, "learning_rate": 6.904068028709209e-06, "loss": 0.322, "step": 9166 }, { "epoch": 1.3144536851161457, "grad_norm": 0.2678074836730957, "learning_rate": 6.903296617807666e-06, "loss": 0.307, "step": 9167 }, { "epoch": 1.3145970748494409, "grad_norm": 0.27807533740997314, "learning_rate": 6.902525153921273e-06, "loss": 0.3228, "step": 9168 }, { "epoch": 1.3147404645827359, "grad_norm": 0.30018123984336853, "learning_rate": 6.9017536370715065e-06, "loss": 0.2953, "step": 9169 }, { "epoch": 1.314883854316031, "grad_norm": 0.27909234166145325, "learning_rate": 6.900982067279844e-06, "loss": 0.3037, "step": 9170 }, { "epoch": 1.315027244049326, "grad_norm": 0.2921897768974304, "learning_rate": 6.900210444567767e-06, "loss": 0.3243, "step": 9171 }, { "epoch": 1.3151706337826212, "grad_norm": 0.2924421429634094, "learning_rate": 6.899438768956752e-06, "loss": 0.3264, "step": 9172 }, { "epoch": 1.3153140235159162, "grad_norm": 0.26291605830192566, "learning_rate": 6.8986670404682834e-06, "loss": 0.2992, "step": 9173 }, { "epoch": 1.3154574132492114, "grad_norm": 0.2959395945072174, "learning_rate": 6.897895259123846e-06, "loss": 0.3189, "step": 9174 }, { "epoch": 1.3156008029825066, "grad_norm": 0.27339914441108704, "learning_rate": 6.8971234249449235e-06, "loss": 0.3056, "step": 9175 }, { "epoch": 1.3157441927158016, "grad_norm": 0.3141060769557953, "learning_rate": 6.896351537953004e-06, "loss": 0.3139, "step": 9176 }, { "epoch": 1.3158875824490965, "grad_norm": 0.2656818926334381, "learning_rate": 6.895579598169574e-06, "loss": 0.2886, "step": 9177 }, { "epoch": 1.3160309721823917, "grad_norm": 0.29237237572669983, "learning_rate": 6.894807605616124e-06, "loss": 0.3013, "step": 9178 }, { "epoch": 1.316174361915687, "grad_norm": 0.2866918444633484, "learning_rate": 6.8940355603141465e-06, "loss": 0.3008, "step": 9179 }, { "epoch": 1.316317751648982, "grad_norm": 0.290340781211853, "learning_rate": 6.893263462285131e-06, "loss": 0.3023, "step": 9180 }, { "epoch": 1.316461141382277, "grad_norm": 0.2773926258087158, "learning_rate": 6.892491311550573e-06, "loss": 0.3025, "step": 9181 }, { "epoch": 1.316604531115572, "grad_norm": 0.2856194078922272, "learning_rate": 6.891719108131969e-06, "loss": 0.3027, "step": 9182 }, { "epoch": 1.3167479208488673, "grad_norm": 0.3016517460346222, "learning_rate": 6.890946852050814e-06, "loss": 0.3165, "step": 9183 }, { "epoch": 1.3168913105821622, "grad_norm": 0.2858574092388153, "learning_rate": 6.890174543328609e-06, "loss": 0.309, "step": 9184 }, { "epoch": 1.3170347003154574, "grad_norm": 0.29654747247695923, "learning_rate": 6.889402181986851e-06, "loss": 0.3135, "step": 9185 }, { "epoch": 1.3171780900487526, "grad_norm": 0.2820121943950653, "learning_rate": 6.8886297680470425e-06, "loss": 0.3083, "step": 9186 }, { "epoch": 1.3173214797820476, "grad_norm": 0.32318994402885437, "learning_rate": 6.887857301530688e-06, "loss": 0.3114, "step": 9187 }, { "epoch": 1.3174648695153426, "grad_norm": 0.31605711579322815, "learning_rate": 6.88708478245929e-06, "loss": 0.2932, "step": 9188 }, { "epoch": 1.3176082592486378, "grad_norm": 0.29630884528160095, "learning_rate": 6.886312210854354e-06, "loss": 0.3133, "step": 9189 }, { "epoch": 1.317751648981933, "grad_norm": 0.2887170910835266, "learning_rate": 6.885539586737388e-06, "loss": 0.3103, "step": 9190 }, { "epoch": 1.317895038715228, "grad_norm": 0.31363430619239807, "learning_rate": 6.884766910129899e-06, "loss": 0.3079, "step": 9191 }, { "epoch": 1.3180384284485231, "grad_norm": 0.28742098808288574, "learning_rate": 6.883994181053399e-06, "loss": 0.3226, "step": 9192 }, { "epoch": 1.3181818181818181, "grad_norm": 0.29883137345314026, "learning_rate": 6.8832213995294005e-06, "loss": 0.2875, "step": 9193 }, { "epoch": 1.3183252079151133, "grad_norm": 0.2740711569786072, "learning_rate": 6.882448565579414e-06, "loss": 0.3116, "step": 9194 }, { "epoch": 1.3184685976484083, "grad_norm": 0.29586124420166016, "learning_rate": 6.881675679224958e-06, "loss": 0.3045, "step": 9195 }, { "epoch": 1.3186119873817035, "grad_norm": 0.314750999212265, "learning_rate": 6.880902740487542e-06, "loss": 0.3036, "step": 9196 }, { "epoch": 1.3187553771149987, "grad_norm": 0.30129778385162354, "learning_rate": 6.880129749388688e-06, "loss": 0.3162, "step": 9197 }, { "epoch": 1.3188987668482937, "grad_norm": 0.2798502743244171, "learning_rate": 6.879356705949914e-06, "loss": 0.2901, "step": 9198 }, { "epoch": 1.3190421565815886, "grad_norm": 0.3016893267631531, "learning_rate": 6.878583610192742e-06, "loss": 0.2909, "step": 9199 }, { "epoch": 1.3191855463148838, "grad_norm": 0.3118900656700134, "learning_rate": 6.8778104621386894e-06, "loss": 0.2956, "step": 9200 }, { "epoch": 1.319328936048179, "grad_norm": 0.29489201307296753, "learning_rate": 6.877037261809283e-06, "loss": 0.3073, "step": 9201 }, { "epoch": 1.319472325781474, "grad_norm": 0.27851971983909607, "learning_rate": 6.876264009226048e-06, "loss": 0.3007, "step": 9202 }, { "epoch": 1.3196157155147692, "grad_norm": 0.292653352022171, "learning_rate": 6.8754907044105075e-06, "loss": 0.2994, "step": 9203 }, { "epoch": 1.3197591052480642, "grad_norm": 0.2957090735435486, "learning_rate": 6.874717347384192e-06, "loss": 0.3088, "step": 9204 }, { "epoch": 1.3199024949813594, "grad_norm": 0.28816795349121094, "learning_rate": 6.873943938168629e-06, "loss": 0.3044, "step": 9205 }, { "epoch": 1.3200458847146543, "grad_norm": 0.3319052755832672, "learning_rate": 6.87317047678535e-06, "loss": 0.33, "step": 9206 }, { "epoch": 1.3201892744479495, "grad_norm": 0.28039243817329407, "learning_rate": 6.872396963255886e-06, "loss": 0.3097, "step": 9207 }, { "epoch": 1.3203326641812447, "grad_norm": 0.2953474521636963, "learning_rate": 6.871623397601771e-06, "loss": 0.31, "step": 9208 }, { "epoch": 1.3204760539145397, "grad_norm": 0.28906989097595215, "learning_rate": 6.870849779844538e-06, "loss": 0.2906, "step": 9209 }, { "epoch": 1.3206194436478347, "grad_norm": 0.2972225248813629, "learning_rate": 6.870076110005726e-06, "loss": 0.2925, "step": 9210 }, { "epoch": 1.3207628333811299, "grad_norm": 0.3014054298400879, "learning_rate": 6.869302388106875e-06, "loss": 0.3171, "step": 9211 }, { "epoch": 1.320906223114425, "grad_norm": 0.2930872440338135, "learning_rate": 6.868528614169517e-06, "loss": 0.3074, "step": 9212 }, { "epoch": 1.32104961284772, "grad_norm": 0.30165648460388184, "learning_rate": 6.867754788215198e-06, "loss": 0.3152, "step": 9213 }, { "epoch": 1.3211930025810152, "grad_norm": 0.30737221240997314, "learning_rate": 6.866980910265459e-06, "loss": 0.3034, "step": 9214 }, { "epoch": 1.3213363923143102, "grad_norm": 0.3043826222419739, "learning_rate": 6.866206980341844e-06, "loss": 0.3059, "step": 9215 }, { "epoch": 1.3214797820476054, "grad_norm": 0.30075985193252563, "learning_rate": 6.865432998465898e-06, "loss": 0.3306, "step": 9216 }, { "epoch": 1.3216231717809004, "grad_norm": 0.2940007448196411, "learning_rate": 6.864658964659165e-06, "loss": 0.3172, "step": 9217 }, { "epoch": 1.3217665615141956, "grad_norm": 0.2899128794670105, "learning_rate": 6.863884878943194e-06, "loss": 0.2928, "step": 9218 }, { "epoch": 1.3219099512474908, "grad_norm": 0.2919662594795227, "learning_rate": 6.863110741339537e-06, "loss": 0.3115, "step": 9219 }, { "epoch": 1.3220533409807858, "grad_norm": 0.2785018980503082, "learning_rate": 6.862336551869742e-06, "loss": 0.3131, "step": 9220 }, { "epoch": 1.322196730714081, "grad_norm": 0.2719246447086334, "learning_rate": 6.8615623105553635e-06, "loss": 0.3024, "step": 9221 }, { "epoch": 1.322340120447376, "grad_norm": 0.2997671961784363, "learning_rate": 6.860788017417953e-06, "loss": 0.311, "step": 9222 }, { "epoch": 1.3224835101806711, "grad_norm": 0.2984482944011688, "learning_rate": 6.860013672479067e-06, "loss": 0.3171, "step": 9223 }, { "epoch": 1.322626899913966, "grad_norm": 0.2699795365333557, "learning_rate": 6.859239275760261e-06, "loss": 0.3072, "step": 9224 }, { "epoch": 1.3227702896472613, "grad_norm": 0.31114593148231506, "learning_rate": 6.8584648272830946e-06, "loss": 0.3275, "step": 9225 }, { "epoch": 1.3229136793805565, "grad_norm": 0.2685728073120117, "learning_rate": 6.857690327069125e-06, "loss": 0.3081, "step": 9226 }, { "epoch": 1.3230570691138515, "grad_norm": 0.3001975119113922, "learning_rate": 6.856915775139915e-06, "loss": 0.2918, "step": 9227 }, { "epoch": 1.3232004588471464, "grad_norm": 0.27809369564056396, "learning_rate": 6.856141171517025e-06, "loss": 0.3161, "step": 9228 }, { "epoch": 1.3233438485804416, "grad_norm": 0.2752116024494171, "learning_rate": 6.8553665162220214e-06, "loss": 0.2931, "step": 9229 }, { "epoch": 1.3234872383137368, "grad_norm": 0.28508666157722473, "learning_rate": 6.85459180927647e-06, "loss": 0.2957, "step": 9230 }, { "epoch": 1.3236306280470318, "grad_norm": 0.28651824593544006, "learning_rate": 6.853817050701932e-06, "loss": 0.3061, "step": 9231 }, { "epoch": 1.323774017780327, "grad_norm": 0.277485728263855, "learning_rate": 6.85304224051998e-06, "loss": 0.3088, "step": 9232 }, { "epoch": 1.323917407513622, "grad_norm": 0.2889009118080139, "learning_rate": 6.852267378752184e-06, "loss": 0.2936, "step": 9233 }, { "epoch": 1.3240607972469172, "grad_norm": 0.303158164024353, "learning_rate": 6.8514924654201136e-06, "loss": 0.3071, "step": 9234 }, { "epoch": 1.3242041869802121, "grad_norm": 0.2814585566520691, "learning_rate": 6.850717500545338e-06, "loss": 0.2967, "step": 9235 }, { "epoch": 1.3243475767135073, "grad_norm": 0.2789193391799927, "learning_rate": 6.849942484149437e-06, "loss": 0.3105, "step": 9236 }, { "epoch": 1.3244909664468025, "grad_norm": 0.2863723337650299, "learning_rate": 6.849167416253981e-06, "loss": 0.3123, "step": 9237 }, { "epoch": 1.3246343561800975, "grad_norm": 0.2930777966976166, "learning_rate": 6.84839229688055e-06, "loss": 0.3057, "step": 9238 }, { "epoch": 1.3247777459133925, "grad_norm": 0.2916547358036041, "learning_rate": 6.847617126050722e-06, "loss": 0.2949, "step": 9239 }, { "epoch": 1.3249211356466877, "grad_norm": 0.27811047434806824, "learning_rate": 6.846841903786073e-06, "loss": 0.2945, "step": 9240 }, { "epoch": 1.3250645253799829, "grad_norm": 0.2924503684043884, "learning_rate": 6.846066630108188e-06, "loss": 0.314, "step": 9241 }, { "epoch": 1.3252079151132778, "grad_norm": 0.3141407370567322, "learning_rate": 6.845291305038647e-06, "loss": 0.3266, "step": 9242 }, { "epoch": 1.325351304846573, "grad_norm": 0.2950359880924225, "learning_rate": 6.844515928599036e-06, "loss": 0.3387, "step": 9243 }, { "epoch": 1.325494694579868, "grad_norm": 0.28099504113197327, "learning_rate": 6.843740500810937e-06, "loss": 0.3123, "step": 9244 }, { "epoch": 1.3256380843131632, "grad_norm": 0.31891605257987976, "learning_rate": 6.84296502169594e-06, "loss": 0.3228, "step": 9245 }, { "epoch": 1.3257814740464582, "grad_norm": 0.3032852113246918, "learning_rate": 6.84218949127563e-06, "loss": 0.2999, "step": 9246 }, { "epoch": 1.3259248637797534, "grad_norm": 0.2856594920158386, "learning_rate": 6.8414139095716005e-06, "loss": 0.3113, "step": 9247 }, { "epoch": 1.3260682535130486, "grad_norm": 0.2735936939716339, "learning_rate": 6.840638276605439e-06, "loss": 0.3271, "step": 9248 }, { "epoch": 1.3262116432463436, "grad_norm": 0.29005831480026245, "learning_rate": 6.83986259239874e-06, "loss": 0.3026, "step": 9249 }, { "epoch": 1.3263550329796385, "grad_norm": 0.32549503445625305, "learning_rate": 6.839086856973096e-06, "loss": 0.3003, "step": 9250 }, { "epoch": 1.3264984227129337, "grad_norm": 0.27986857295036316, "learning_rate": 6.838311070350103e-06, "loss": 0.3085, "step": 9251 }, { "epoch": 1.326641812446229, "grad_norm": 0.29690372943878174, "learning_rate": 6.83753523255136e-06, "loss": 0.2855, "step": 9252 }, { "epoch": 1.326785202179524, "grad_norm": 0.28549861907958984, "learning_rate": 6.83675934359846e-06, "loss": 0.3046, "step": 9253 }, { "epoch": 1.326928591912819, "grad_norm": 0.29299670457839966, "learning_rate": 6.835983403513007e-06, "loss": 0.3049, "step": 9254 }, { "epoch": 1.327071981646114, "grad_norm": 0.3110141456127167, "learning_rate": 6.835207412316598e-06, "loss": 0.3041, "step": 9255 }, { "epoch": 1.3272153713794093, "grad_norm": 0.2884756028652191, "learning_rate": 6.83443137003084e-06, "loss": 0.3, "step": 9256 }, { "epoch": 1.3273587611127042, "grad_norm": 0.2905522286891937, "learning_rate": 6.8336552766773336e-06, "loss": 0.3045, "step": 9257 }, { "epoch": 1.3275021508459994, "grad_norm": 0.28730615973472595, "learning_rate": 6.832879132277686e-06, "loss": 0.3343, "step": 9258 }, { "epoch": 1.3276455405792946, "grad_norm": 0.305040180683136, "learning_rate": 6.832102936853501e-06, "loss": 0.3158, "step": 9259 }, { "epoch": 1.3277889303125896, "grad_norm": 0.30033692717552185, "learning_rate": 6.83132669042639e-06, "loss": 0.3024, "step": 9260 }, { "epoch": 1.3279323200458848, "grad_norm": 0.30882012844085693, "learning_rate": 6.830550393017961e-06, "loss": 0.3236, "step": 9261 }, { "epoch": 1.3280757097791798, "grad_norm": 0.2761790156364441, "learning_rate": 6.829774044649825e-06, "loss": 0.3304, "step": 9262 }, { "epoch": 1.328219099512475, "grad_norm": 0.3319164514541626, "learning_rate": 6.828997645343592e-06, "loss": 0.3192, "step": 9263 }, { "epoch": 1.32836248924577, "grad_norm": 0.2912995517253876, "learning_rate": 6.828221195120881e-06, "loss": 0.3097, "step": 9264 }, { "epoch": 1.3285058789790651, "grad_norm": 0.26604771614074707, "learning_rate": 6.827444694003303e-06, "loss": 0.3103, "step": 9265 }, { "epoch": 1.3286492687123603, "grad_norm": 0.2704184949398041, "learning_rate": 6.826668142012476e-06, "loss": 0.3019, "step": 9266 }, { "epoch": 1.3287926584456553, "grad_norm": 0.30328741669654846, "learning_rate": 6.825891539170017e-06, "loss": 0.2999, "step": 9267 }, { "epoch": 1.3289360481789503, "grad_norm": 0.2684308588504791, "learning_rate": 6.8251148854975495e-06, "loss": 0.3081, "step": 9268 }, { "epoch": 1.3290794379122455, "grad_norm": 0.29546183347702026, "learning_rate": 6.824338181016689e-06, "loss": 0.3193, "step": 9269 }, { "epoch": 1.3292228276455407, "grad_norm": 0.2745134234428406, "learning_rate": 6.823561425749059e-06, "loss": 0.3048, "step": 9270 }, { "epoch": 1.3293662173788356, "grad_norm": 0.26529404520988464, "learning_rate": 6.822784619716285e-06, "loss": 0.3013, "step": 9271 }, { "epoch": 1.3295096071121308, "grad_norm": 0.28645455837249756, "learning_rate": 6.8220077629399915e-06, "loss": 0.3141, "step": 9272 }, { "epoch": 1.3296529968454258, "grad_norm": 0.26821383833885193, "learning_rate": 6.821230855441804e-06, "loss": 0.316, "step": 9273 }, { "epoch": 1.329796386578721, "grad_norm": 0.30347320437431335, "learning_rate": 6.820453897243351e-06, "loss": 0.319, "step": 9274 }, { "epoch": 1.329939776312016, "grad_norm": 0.2956514358520508, "learning_rate": 6.819676888366261e-06, "loss": 0.319, "step": 9275 }, { "epoch": 1.3300831660453112, "grad_norm": 0.28718671202659607, "learning_rate": 6.818899828832168e-06, "loss": 0.3105, "step": 9276 }, { "epoch": 1.3302265557786064, "grad_norm": 0.2975008487701416, "learning_rate": 6.8181227186627e-06, "loss": 0.3228, "step": 9277 }, { "epoch": 1.3303699455119014, "grad_norm": 0.27986711263656616, "learning_rate": 6.817345557879494e-06, "loss": 0.3036, "step": 9278 }, { "epoch": 1.3305133352451963, "grad_norm": 0.29258131980895996, "learning_rate": 6.816568346504182e-06, "loss": 0.2937, "step": 9279 }, { "epoch": 1.3306567249784915, "grad_norm": 0.2958401143550873, "learning_rate": 6.815791084558401e-06, "loss": 0.2981, "step": 9280 }, { "epoch": 1.3308001147117867, "grad_norm": 0.29163217544555664, "learning_rate": 6.81501377206379e-06, "loss": 0.3372, "step": 9281 }, { "epoch": 1.3309435044450817, "grad_norm": 0.2929461896419525, "learning_rate": 6.814236409041988e-06, "loss": 0.3227, "step": 9282 }, { "epoch": 1.331086894178377, "grad_norm": 0.28148549795150757, "learning_rate": 6.813458995514635e-06, "loss": 0.2954, "step": 9283 }, { "epoch": 1.3312302839116719, "grad_norm": 0.2973315715789795, "learning_rate": 6.812681531503373e-06, "loss": 0.3058, "step": 9284 }, { "epoch": 1.331373673644967, "grad_norm": 0.2864097058773041, "learning_rate": 6.811904017029846e-06, "loss": 0.3262, "step": 9285 }, { "epoch": 1.331517063378262, "grad_norm": 0.26980432868003845, "learning_rate": 6.811126452115698e-06, "loss": 0.3173, "step": 9286 }, { "epoch": 1.3316604531115572, "grad_norm": 0.27580970525741577, "learning_rate": 6.810348836782577e-06, "loss": 0.3147, "step": 9287 }, { "epoch": 1.3318038428448524, "grad_norm": 0.2741210460662842, "learning_rate": 6.809571171052127e-06, "loss": 0.2861, "step": 9288 }, { "epoch": 1.3319472325781474, "grad_norm": 0.28895437717437744, "learning_rate": 6.808793454946001e-06, "loss": 0.3072, "step": 9289 }, { "epoch": 1.3320906223114424, "grad_norm": 0.3201027512550354, "learning_rate": 6.808015688485846e-06, "loss": 0.3021, "step": 9290 }, { "epoch": 1.3322340120447376, "grad_norm": 0.2952989935874939, "learning_rate": 6.807237871693316e-06, "loss": 0.3315, "step": 9291 }, { "epoch": 1.3323774017780328, "grad_norm": 0.31146442890167236, "learning_rate": 6.8064600045900645e-06, "loss": 0.2966, "step": 9292 }, { "epoch": 1.3325207915113277, "grad_norm": 0.3041491210460663, "learning_rate": 6.805682087197745e-06, "loss": 0.3149, "step": 9293 }, { "epoch": 1.332664181244623, "grad_norm": 0.2852412164211273, "learning_rate": 6.8049041195380125e-06, "loss": 0.2976, "step": 9294 }, { "epoch": 1.332807570977918, "grad_norm": 0.31043264269828796, "learning_rate": 6.804126101632528e-06, "loss": 0.3043, "step": 9295 }, { "epoch": 1.332950960711213, "grad_norm": 0.2927826941013336, "learning_rate": 6.803348033502947e-06, "loss": 0.3059, "step": 9296 }, { "epoch": 1.333094350444508, "grad_norm": 0.27578532695770264, "learning_rate": 6.8025699151709305e-06, "loss": 0.3158, "step": 9297 }, { "epoch": 1.3332377401778033, "grad_norm": 0.2845565676689148, "learning_rate": 6.801791746658141e-06, "loss": 0.3337, "step": 9298 }, { "epoch": 1.3333811299110985, "grad_norm": 0.28714698553085327, "learning_rate": 6.801013527986241e-06, "loss": 0.312, "step": 9299 }, { "epoch": 1.3335245196443934, "grad_norm": 0.2876453399658203, "learning_rate": 6.800235259176894e-06, "loss": 0.3023, "step": 9300 }, { "epoch": 1.3336679093776884, "grad_norm": 0.2755659222602844, "learning_rate": 6.799456940251768e-06, "loss": 0.3171, "step": 9301 }, { "epoch": 1.3338112991109836, "grad_norm": 0.26928913593292236, "learning_rate": 6.798678571232528e-06, "loss": 0.3095, "step": 9302 }, { "epoch": 1.3339546888442788, "grad_norm": 0.2753047049045563, "learning_rate": 6.797900152140844e-06, "loss": 0.3197, "step": 9303 }, { "epoch": 1.3340980785775738, "grad_norm": 0.275633305311203, "learning_rate": 6.797121682998384e-06, "loss": 0.3187, "step": 9304 }, { "epoch": 1.334241468310869, "grad_norm": 0.28047189116477966, "learning_rate": 6.7963431638268215e-06, "loss": 0.3056, "step": 9305 }, { "epoch": 1.334384858044164, "grad_norm": 0.28339824080467224, "learning_rate": 6.79556459464783e-06, "loss": 0.3067, "step": 9306 }, { "epoch": 1.3345282477774592, "grad_norm": 0.30016836524009705, "learning_rate": 6.794785975483082e-06, "loss": 0.3206, "step": 9307 }, { "epoch": 1.3346716375107541, "grad_norm": 0.2943519353866577, "learning_rate": 6.794007306354253e-06, "loss": 0.3269, "step": 9308 }, { "epoch": 1.3348150272440493, "grad_norm": 0.3004223704338074, "learning_rate": 6.793228587283018e-06, "loss": 0.3202, "step": 9309 }, { "epoch": 1.3349584169773445, "grad_norm": 0.269686222076416, "learning_rate": 6.7924498182910605e-06, "loss": 0.3073, "step": 9310 }, { "epoch": 1.3351018067106395, "grad_norm": 0.30994752049446106, "learning_rate": 6.791670999400056e-06, "loss": 0.2884, "step": 9311 }, { "epoch": 1.3352451964439347, "grad_norm": 0.30989164113998413, "learning_rate": 6.790892130631688e-06, "loss": 0.3116, "step": 9312 }, { "epoch": 1.3353885861772297, "grad_norm": 0.2866893708705902, "learning_rate": 6.7901132120076365e-06, "loss": 0.3234, "step": 9313 }, { "epoch": 1.3355319759105249, "grad_norm": 0.28025007247924805, "learning_rate": 6.789334243549589e-06, "loss": 0.3176, "step": 9314 }, { "epoch": 1.3356753656438198, "grad_norm": 0.30789798498153687, "learning_rate": 6.788555225279227e-06, "loss": 0.3137, "step": 9315 }, { "epoch": 1.335818755377115, "grad_norm": 0.31221526861190796, "learning_rate": 6.78777615721824e-06, "loss": 0.3089, "step": 9316 }, { "epoch": 1.3359621451104102, "grad_norm": 0.25426313281059265, "learning_rate": 6.786997039388314e-06, "loss": 0.3183, "step": 9317 }, { "epoch": 1.3361055348437052, "grad_norm": 0.2719162106513977, "learning_rate": 6.7862178718111396e-06, "loss": 0.3312, "step": 9318 }, { "epoch": 1.3362489245770002, "grad_norm": 0.29235729575157166, "learning_rate": 6.785438654508407e-06, "loss": 0.3001, "step": 9319 }, { "epoch": 1.3363923143102954, "grad_norm": 0.31006500124931335, "learning_rate": 6.784659387501808e-06, "loss": 0.316, "step": 9320 }, { "epoch": 1.3365357040435906, "grad_norm": 0.2884176969528198, "learning_rate": 6.7838800708130384e-06, "loss": 0.3033, "step": 9321 }, { "epoch": 1.3366790937768855, "grad_norm": 0.29077234864234924, "learning_rate": 6.783100704463792e-06, "loss": 0.3235, "step": 9322 }, { "epoch": 1.3368224835101807, "grad_norm": 0.2853916883468628, "learning_rate": 6.7823212884757636e-06, "loss": 0.3126, "step": 9323 }, { "epoch": 1.3369658732434757, "grad_norm": 0.2851167917251587, "learning_rate": 6.781541822870654e-06, "loss": 0.296, "step": 9324 }, { "epoch": 1.337109262976771, "grad_norm": 0.2919459640979767, "learning_rate": 6.78076230767016e-06, "loss": 0.2941, "step": 9325 }, { "epoch": 1.3372526527100659, "grad_norm": 0.2869925796985626, "learning_rate": 6.779982742895982e-06, "loss": 0.2915, "step": 9326 }, { "epoch": 1.337396042443361, "grad_norm": 0.28608185052871704, "learning_rate": 6.7792031285698225e-06, "loss": 0.3116, "step": 9327 }, { "epoch": 1.3375394321766563, "grad_norm": 0.29085662961006165, "learning_rate": 6.778423464713385e-06, "loss": 0.3043, "step": 9328 }, { "epoch": 1.3376828219099512, "grad_norm": 0.30264967679977417, "learning_rate": 6.777643751348374e-06, "loss": 0.3091, "step": 9329 }, { "epoch": 1.3378262116432462, "grad_norm": 0.3022099733352661, "learning_rate": 6.776863988496496e-06, "loss": 0.2996, "step": 9330 }, { "epoch": 1.3379696013765414, "grad_norm": 0.3066062331199646, "learning_rate": 6.776084176179457e-06, "loss": 0.3169, "step": 9331 }, { "epoch": 1.3381129911098366, "grad_norm": 0.2808508574962616, "learning_rate": 6.7753043144189666e-06, "loss": 0.2975, "step": 9332 }, { "epoch": 1.3382563808431316, "grad_norm": 0.28026098012924194, "learning_rate": 6.774524403236737e-06, "loss": 0.313, "step": 9333 }, { "epoch": 1.3383997705764268, "grad_norm": 0.2908580005168915, "learning_rate": 6.773744442654476e-06, "loss": 0.308, "step": 9334 }, { "epoch": 1.3385431603097218, "grad_norm": 0.3058621287345886, "learning_rate": 6.7729644326938995e-06, "loss": 0.3038, "step": 9335 }, { "epoch": 1.338686550043017, "grad_norm": 0.3039468824863434, "learning_rate": 6.772184373376718e-06, "loss": 0.3081, "step": 9336 }, { "epoch": 1.338829939776312, "grad_norm": 0.28064584732055664, "learning_rate": 6.771404264724651e-06, "loss": 0.2954, "step": 9337 }, { "epoch": 1.3389733295096071, "grad_norm": 0.30265435576438904, "learning_rate": 6.770624106759416e-06, "loss": 0.3168, "step": 9338 }, { "epoch": 1.3391167192429023, "grad_norm": 0.27508029341697693, "learning_rate": 6.769843899502727e-06, "loss": 0.3187, "step": 9339 }, { "epoch": 1.3392601089761973, "grad_norm": 0.28256139159202576, "learning_rate": 6.769063642976308e-06, "loss": 0.3178, "step": 9340 }, { "epoch": 1.3394034987094923, "grad_norm": 0.29094889760017395, "learning_rate": 6.768283337201878e-06, "loss": 0.3118, "step": 9341 }, { "epoch": 1.3395468884427875, "grad_norm": 0.30545487999916077, "learning_rate": 6.767502982201159e-06, "loss": 0.3217, "step": 9342 }, { "epoch": 1.3396902781760827, "grad_norm": 0.2797369658946991, "learning_rate": 6.766722577995879e-06, "loss": 0.2965, "step": 9343 }, { "epoch": 1.3398336679093776, "grad_norm": 0.2983033359050751, "learning_rate": 6.765942124607757e-06, "loss": 0.3022, "step": 9344 }, { "epoch": 1.3399770576426728, "grad_norm": 0.30315154790878296, "learning_rate": 6.765161622058524e-06, "loss": 0.2978, "step": 9345 }, { "epoch": 1.3401204473759678, "grad_norm": 0.3091549873352051, "learning_rate": 6.764381070369905e-06, "loss": 0.3018, "step": 9346 }, { "epoch": 1.340263837109263, "grad_norm": 0.305159330368042, "learning_rate": 6.763600469563633e-06, "loss": 0.306, "step": 9347 }, { "epoch": 1.340407226842558, "grad_norm": 0.3256121575832367, "learning_rate": 6.762819819661437e-06, "loss": 0.3122, "step": 9348 }, { "epoch": 1.3405506165758532, "grad_norm": 0.27970513701438904, "learning_rate": 6.762039120685049e-06, "loss": 0.3042, "step": 9349 }, { "epoch": 1.3406940063091484, "grad_norm": 0.29773709177970886, "learning_rate": 6.761258372656201e-06, "loss": 0.2874, "step": 9350 }, { "epoch": 1.3408373960424433, "grad_norm": 0.3316262364387512, "learning_rate": 6.76047757559663e-06, "loss": 0.2947, "step": 9351 }, { "epoch": 1.3409807857757385, "grad_norm": 0.2796347439289093, "learning_rate": 6.759696729528073e-06, "loss": 0.3089, "step": 9352 }, { "epoch": 1.3411241755090335, "grad_norm": 0.2879191040992737, "learning_rate": 6.7589158344722635e-06, "loss": 0.2993, "step": 9353 }, { "epoch": 1.3412675652423287, "grad_norm": 0.2989988625049591, "learning_rate": 6.758134890450945e-06, "loss": 0.3111, "step": 9354 }, { "epoch": 1.3414109549756237, "grad_norm": 0.2867092788219452, "learning_rate": 6.757353897485853e-06, "loss": 0.3149, "step": 9355 }, { "epoch": 1.3415543447089189, "grad_norm": 0.27695468068122864, "learning_rate": 6.756572855598733e-06, "loss": 0.2977, "step": 9356 }, { "epoch": 1.341697734442214, "grad_norm": 0.29420673847198486, "learning_rate": 6.755791764811328e-06, "loss": 0.3047, "step": 9357 }, { "epoch": 1.341841124175509, "grad_norm": 0.3073940575122833, "learning_rate": 6.755010625145381e-06, "loss": 0.3293, "step": 9358 }, { "epoch": 1.341984513908804, "grad_norm": 0.2753315269947052, "learning_rate": 6.754229436622637e-06, "loss": 0.3147, "step": 9359 }, { "epoch": 1.3421279036420992, "grad_norm": 0.2803100049495697, "learning_rate": 6.753448199264845e-06, "loss": 0.2977, "step": 9360 }, { "epoch": 1.3422712933753944, "grad_norm": 0.3096829652786255, "learning_rate": 6.752666913093751e-06, "loss": 0.2999, "step": 9361 }, { "epoch": 1.3424146831086894, "grad_norm": 0.29677659273147583, "learning_rate": 6.751885578131108e-06, "loss": 0.3285, "step": 9362 }, { "epoch": 1.3425580728419846, "grad_norm": 0.3025569021701813, "learning_rate": 6.751104194398665e-06, "loss": 0.3089, "step": 9363 }, { "epoch": 1.3427014625752796, "grad_norm": 0.3344983458518982, "learning_rate": 6.750322761918172e-06, "loss": 0.3059, "step": 9364 }, { "epoch": 1.3428448523085748, "grad_norm": 0.31388577818870544, "learning_rate": 6.749541280711389e-06, "loss": 0.3261, "step": 9365 }, { "epoch": 1.3429882420418697, "grad_norm": 0.28102874755859375, "learning_rate": 6.748759750800067e-06, "loss": 0.3223, "step": 9366 }, { "epoch": 1.343131631775165, "grad_norm": 0.30496957898139954, "learning_rate": 6.747978172205963e-06, "loss": 0.3049, "step": 9367 }, { "epoch": 1.3432750215084601, "grad_norm": 0.3212635815143585, "learning_rate": 6.747196544950838e-06, "loss": 0.2919, "step": 9368 }, { "epoch": 1.343418411241755, "grad_norm": 0.3070417642593384, "learning_rate": 6.746414869056447e-06, "loss": 0.3047, "step": 9369 }, { "epoch": 1.34356180097505, "grad_norm": 0.3133942186832428, "learning_rate": 6.7456331445445535e-06, "loss": 0.3002, "step": 9370 }, { "epoch": 1.3437051907083453, "grad_norm": 0.2911102771759033, "learning_rate": 6.744851371436917e-06, "loss": 0.3044, "step": 9371 }, { "epoch": 1.3438485804416405, "grad_norm": 0.2728109061717987, "learning_rate": 6.7440695497553034e-06, "loss": 0.2911, "step": 9372 }, { "epoch": 1.3439919701749354, "grad_norm": 0.2954670488834381, "learning_rate": 6.743287679521475e-06, "loss": 0.3025, "step": 9373 }, { "epoch": 1.3441353599082306, "grad_norm": 0.32220056653022766, "learning_rate": 6.742505760757201e-06, "loss": 0.3098, "step": 9374 }, { "epoch": 1.3442787496415256, "grad_norm": 0.31817615032196045, "learning_rate": 6.741723793484246e-06, "loss": 0.299, "step": 9375 }, { "epoch": 1.3444221393748208, "grad_norm": 0.299466073513031, "learning_rate": 6.740941777724381e-06, "loss": 0.2979, "step": 9376 }, { "epoch": 1.3445655291081158, "grad_norm": 0.2923892140388489, "learning_rate": 6.740159713499374e-06, "loss": 0.315, "step": 9377 }, { "epoch": 1.344708918841411, "grad_norm": 0.30498582124710083, "learning_rate": 6.739377600830999e-06, "loss": 0.2982, "step": 9378 }, { "epoch": 1.3448523085747062, "grad_norm": 0.28764113783836365, "learning_rate": 6.738595439741027e-06, "loss": 0.3166, "step": 9379 }, { "epoch": 1.3449956983080011, "grad_norm": 0.2804200053215027, "learning_rate": 6.737813230251231e-06, "loss": 0.2965, "step": 9380 }, { "epoch": 1.3451390880412961, "grad_norm": 0.31908825039863586, "learning_rate": 6.7370309723833884e-06, "loss": 0.3134, "step": 9381 }, { "epoch": 1.3452824777745913, "grad_norm": 0.2908811569213867, "learning_rate": 6.736248666159275e-06, "loss": 0.3062, "step": 9382 }, { "epoch": 1.3454258675078865, "grad_norm": 0.27813610434532166, "learning_rate": 6.7354663116006705e-06, "loss": 0.2971, "step": 9383 }, { "epoch": 1.3455692572411815, "grad_norm": 0.3195444345474243, "learning_rate": 6.7346839087293535e-06, "loss": 0.3157, "step": 9384 }, { "epoch": 1.3457126469744767, "grad_norm": 0.296041876077652, "learning_rate": 6.733901457567104e-06, "loss": 0.2972, "step": 9385 }, { "epoch": 1.3458560367077717, "grad_norm": 0.2934604287147522, "learning_rate": 6.733118958135706e-06, "loss": 0.2965, "step": 9386 }, { "epoch": 1.3459994264410668, "grad_norm": 0.31924375891685486, "learning_rate": 6.732336410456943e-06, "loss": 0.3015, "step": 9387 }, { "epoch": 1.3461428161743618, "grad_norm": 0.3024488091468811, "learning_rate": 6.731553814552598e-06, "loss": 0.3099, "step": 9388 }, { "epoch": 1.346286205907657, "grad_norm": 0.2798789143562317, "learning_rate": 6.7307711704444604e-06, "loss": 0.2996, "step": 9389 }, { "epoch": 1.3464295956409522, "grad_norm": 0.2794923484325409, "learning_rate": 6.729988478154313e-06, "loss": 0.3061, "step": 9390 }, { "epoch": 1.3465729853742472, "grad_norm": 0.3315746486186981, "learning_rate": 6.729205737703949e-06, "loss": 0.3102, "step": 9391 }, { "epoch": 1.3467163751075422, "grad_norm": 0.2900593876838684, "learning_rate": 6.728422949115157e-06, "loss": 0.3028, "step": 9392 }, { "epoch": 1.3468597648408374, "grad_norm": 0.27776703238487244, "learning_rate": 6.727640112409728e-06, "loss": 0.3149, "step": 9393 }, { "epoch": 1.3470031545741326, "grad_norm": 0.27849847078323364, "learning_rate": 6.726857227609457e-06, "loss": 0.3036, "step": 9394 }, { "epoch": 1.3471465443074275, "grad_norm": 0.3077472448348999, "learning_rate": 6.726074294736138e-06, "loss": 0.3117, "step": 9395 }, { "epoch": 1.3472899340407227, "grad_norm": 0.3109146058559418, "learning_rate": 6.725291313811564e-06, "loss": 0.2856, "step": 9396 }, { "epoch": 1.3474333237740177, "grad_norm": 0.2778247892856598, "learning_rate": 6.724508284857536e-06, "loss": 0.3061, "step": 9397 }, { "epoch": 1.347576713507313, "grad_norm": 0.3187851309776306, "learning_rate": 6.7237252078958485e-06, "loss": 0.3305, "step": 9398 }, { "epoch": 1.3477201032406079, "grad_norm": 0.2908826768398285, "learning_rate": 6.722942082948303e-06, "loss": 0.3085, "step": 9399 }, { "epoch": 1.347863492973903, "grad_norm": 0.3099987804889679, "learning_rate": 6.7221589100367e-06, "loss": 0.2984, "step": 9400 }, { "epoch": 1.3480068827071983, "grad_norm": 0.2854495048522949, "learning_rate": 6.721375689182842e-06, "loss": 0.304, "step": 9401 }, { "epoch": 1.3481502724404932, "grad_norm": 0.2928667962551117, "learning_rate": 6.720592420408534e-06, "loss": 0.2973, "step": 9402 }, { "epoch": 1.3482936621737884, "grad_norm": 0.2921009063720703, "learning_rate": 6.71980910373558e-06, "loss": 0.3107, "step": 9403 }, { "epoch": 1.3484370519070834, "grad_norm": 0.3005473017692566, "learning_rate": 6.719025739185784e-06, "loss": 0.3395, "step": 9404 }, { "epoch": 1.3485804416403786, "grad_norm": 0.2930971682071686, "learning_rate": 6.7182423267809575e-06, "loss": 0.3128, "step": 9405 }, { "epoch": 1.3487238313736736, "grad_norm": 0.28247401118278503, "learning_rate": 6.717458866542908e-06, "loss": 0.3079, "step": 9406 }, { "epoch": 1.3488672211069688, "grad_norm": 0.28976404666900635, "learning_rate": 6.716675358493444e-06, "loss": 0.3057, "step": 9407 }, { "epoch": 1.349010610840264, "grad_norm": 0.2972602844238281, "learning_rate": 6.7158918026543804e-06, "loss": 0.2946, "step": 9408 }, { "epoch": 1.349154000573559, "grad_norm": 0.2927791476249695, "learning_rate": 6.715108199047526e-06, "loss": 0.3001, "step": 9409 }, { "epoch": 1.349297390306854, "grad_norm": 0.2936805188655853, "learning_rate": 6.7143245476947e-06, "loss": 0.3022, "step": 9410 }, { "epoch": 1.3494407800401491, "grad_norm": 0.30917850136756897, "learning_rate": 6.713540848617716e-06, "loss": 0.3005, "step": 9411 }, { "epoch": 1.3495841697734443, "grad_norm": 0.29069292545318604, "learning_rate": 6.7127571018383894e-06, "loss": 0.2947, "step": 9412 }, { "epoch": 1.3497275595067393, "grad_norm": 0.36233052611351013, "learning_rate": 6.71197330737854e-06, "loss": 0.3118, "step": 9413 }, { "epoch": 1.3498709492400345, "grad_norm": 0.2841882109642029, "learning_rate": 6.711189465259989e-06, "loss": 0.2918, "step": 9414 }, { "epoch": 1.3500143389733295, "grad_norm": 0.3000665307044983, "learning_rate": 6.710405575504553e-06, "loss": 0.3038, "step": 9415 }, { "epoch": 1.3501577287066246, "grad_norm": 0.30216845870018005, "learning_rate": 6.709621638134058e-06, "loss": 0.2972, "step": 9416 }, { "epoch": 1.3503011184399196, "grad_norm": 0.30794835090637207, "learning_rate": 6.708837653170327e-06, "loss": 0.3022, "step": 9417 }, { "epoch": 1.3504445081732148, "grad_norm": 0.30382227897644043, "learning_rate": 6.708053620635183e-06, "loss": 0.3221, "step": 9418 }, { "epoch": 1.35058789790651, "grad_norm": 0.3114356994628906, "learning_rate": 6.707269540550454e-06, "loss": 0.3033, "step": 9419 }, { "epoch": 1.350731287639805, "grad_norm": 0.2965868413448334, "learning_rate": 6.706485412937969e-06, "loss": 0.3129, "step": 9420 }, { "epoch": 1.3508746773731, "grad_norm": 0.27808254957199097, "learning_rate": 6.705701237819553e-06, "loss": 0.3025, "step": 9421 }, { "epoch": 1.3510180671063952, "grad_norm": 0.2902170717716217, "learning_rate": 6.704917015217041e-06, "loss": 0.3052, "step": 9422 }, { "epoch": 1.3511614568396904, "grad_norm": 0.3240424394607544, "learning_rate": 6.704132745152259e-06, "loss": 0.3075, "step": 9423 }, { "epoch": 1.3513048465729853, "grad_norm": 0.29438745975494385, "learning_rate": 6.703348427647045e-06, "loss": 0.3121, "step": 9424 }, { "epoch": 1.3514482363062805, "grad_norm": 0.27890029549598694, "learning_rate": 6.70256406272323e-06, "loss": 0.2999, "step": 9425 }, { "epoch": 1.3515916260395755, "grad_norm": 0.3276202082633972, "learning_rate": 6.701779650402651e-06, "loss": 0.3033, "step": 9426 }, { "epoch": 1.3517350157728707, "grad_norm": 0.3251951038837433, "learning_rate": 6.700995190707143e-06, "loss": 0.3066, "step": 9427 }, { "epoch": 1.3518784055061657, "grad_norm": 0.2754380702972412, "learning_rate": 6.700210683658547e-06, "loss": 0.3102, "step": 9428 }, { "epoch": 1.3520217952394609, "grad_norm": 0.30484580993652344, "learning_rate": 6.6994261292787e-06, "loss": 0.3212, "step": 9429 }, { "epoch": 1.352165184972756, "grad_norm": 0.3237491250038147, "learning_rate": 6.698641527589444e-06, "loss": 0.3131, "step": 9430 }, { "epoch": 1.352308574706051, "grad_norm": 0.2947068214416504, "learning_rate": 6.697856878612622e-06, "loss": 0.3052, "step": 9431 }, { "epoch": 1.352451964439346, "grad_norm": 0.2808997631072998, "learning_rate": 6.6970721823700745e-06, "loss": 0.3464, "step": 9432 }, { "epoch": 1.3525953541726412, "grad_norm": 0.3039402663707733, "learning_rate": 6.696287438883651e-06, "loss": 0.3165, "step": 9433 }, { "epoch": 1.3527387439059364, "grad_norm": 0.3115203082561493, "learning_rate": 6.695502648175192e-06, "loss": 0.3061, "step": 9434 }, { "epoch": 1.3528821336392314, "grad_norm": 0.28950196504592896, "learning_rate": 6.694717810266549e-06, "loss": 0.3132, "step": 9435 }, { "epoch": 1.3530255233725266, "grad_norm": 0.2756613492965698, "learning_rate": 6.693932925179567e-06, "loss": 0.2986, "step": 9436 }, { "epoch": 1.3531689131058215, "grad_norm": 0.2774347960948944, "learning_rate": 6.693147992936098e-06, "loss": 0.3088, "step": 9437 }, { "epoch": 1.3533123028391167, "grad_norm": 0.3009651005268097, "learning_rate": 6.6923630135579966e-06, "loss": 0.2902, "step": 9438 }, { "epoch": 1.3534556925724117, "grad_norm": 0.3100873827934265, "learning_rate": 6.691577987067109e-06, "loss": 0.3187, "step": 9439 }, { "epoch": 1.353599082305707, "grad_norm": 0.2775067687034607, "learning_rate": 6.6907929134852935e-06, "loss": 0.3253, "step": 9440 }, { "epoch": 1.353742472039002, "grad_norm": 0.305483877658844, "learning_rate": 6.690007792834405e-06, "loss": 0.306, "step": 9441 }, { "epoch": 1.353885861772297, "grad_norm": 0.29793187975883484, "learning_rate": 6.689222625136299e-06, "loss": 0.3068, "step": 9442 }, { "epoch": 1.3540292515055923, "grad_norm": 0.2939492166042328, "learning_rate": 6.688437410412834e-06, "loss": 0.3031, "step": 9443 }, { "epoch": 1.3541726412388873, "grad_norm": 0.2947210669517517, "learning_rate": 6.687652148685868e-06, "loss": 0.29, "step": 9444 }, { "epoch": 1.3543160309721824, "grad_norm": 0.29624122381210327, "learning_rate": 6.686866839977262e-06, "loss": 0.2971, "step": 9445 }, { "epoch": 1.3544594207054774, "grad_norm": 0.27712419629096985, "learning_rate": 6.686081484308879e-06, "loss": 0.2929, "step": 9446 }, { "epoch": 1.3546028104387726, "grad_norm": 0.30807507038116455, "learning_rate": 6.685296081702579e-06, "loss": 0.3173, "step": 9447 }, { "epoch": 1.3547462001720678, "grad_norm": 0.29132407903671265, "learning_rate": 6.68451063218023e-06, "loss": 0.3033, "step": 9448 }, { "epoch": 1.3548895899053628, "grad_norm": 0.29061436653137207, "learning_rate": 6.683725135763698e-06, "loss": 0.2892, "step": 9449 }, { "epoch": 1.3550329796386578, "grad_norm": 0.2809451222419739, "learning_rate": 6.682939592474846e-06, "loss": 0.2976, "step": 9450 }, { "epoch": 1.355176369371953, "grad_norm": 0.3183281421661377, "learning_rate": 6.682154002335546e-06, "loss": 0.3022, "step": 9451 }, { "epoch": 1.3553197591052482, "grad_norm": 0.28258463740348816, "learning_rate": 6.681368365367665e-06, "loss": 0.3067, "step": 9452 }, { "epoch": 1.3554631488385431, "grad_norm": 0.27843061089515686, "learning_rate": 6.680582681593077e-06, "loss": 0.2892, "step": 9453 }, { "epoch": 1.3556065385718383, "grad_norm": 0.309196412563324, "learning_rate": 6.6797969510336505e-06, "loss": 0.2885, "step": 9454 }, { "epoch": 1.3557499283051333, "grad_norm": 0.2850508689880371, "learning_rate": 6.679011173711261e-06, "loss": 0.3008, "step": 9455 }, { "epoch": 1.3558933180384285, "grad_norm": 0.2734035849571228, "learning_rate": 6.678225349647783e-06, "loss": 0.2988, "step": 9456 }, { "epoch": 1.3560367077717235, "grad_norm": 0.2844671308994293, "learning_rate": 6.677439478865094e-06, "loss": 0.3031, "step": 9457 }, { "epoch": 1.3561800975050187, "grad_norm": 0.30188846588134766, "learning_rate": 6.676653561385069e-06, "loss": 0.2856, "step": 9458 }, { "epoch": 1.3563234872383139, "grad_norm": 0.2848140597343445, "learning_rate": 6.675867597229589e-06, "loss": 0.324, "step": 9459 }, { "epoch": 1.3564668769716088, "grad_norm": 0.27633950114250183, "learning_rate": 6.675081586420535e-06, "loss": 0.3092, "step": 9460 }, { "epoch": 1.3566102667049038, "grad_norm": 0.28916823863983154, "learning_rate": 6.674295528979783e-06, "loss": 0.2947, "step": 9461 }, { "epoch": 1.356753656438199, "grad_norm": 0.284132719039917, "learning_rate": 6.6735094249292206e-06, "loss": 0.3, "step": 9462 }, { "epoch": 1.3568970461714942, "grad_norm": 0.2887108325958252, "learning_rate": 6.67272327429073e-06, "loss": 0.3028, "step": 9463 }, { "epoch": 1.3570404359047892, "grad_norm": 0.30694982409477234, "learning_rate": 6.671937077086197e-06, "loss": 0.3054, "step": 9464 }, { "epoch": 1.3571838256380844, "grad_norm": 0.30322277545928955, "learning_rate": 6.671150833337507e-06, "loss": 0.3036, "step": 9465 }, { "epoch": 1.3573272153713793, "grad_norm": 0.25601065158843994, "learning_rate": 6.6703645430665475e-06, "loss": 0.3095, "step": 9466 }, { "epoch": 1.3574706051046745, "grad_norm": 0.2750462591648102, "learning_rate": 6.66957820629521e-06, "loss": 0.3081, "step": 9467 }, { "epoch": 1.3576139948379695, "grad_norm": 0.28809624910354614, "learning_rate": 6.668791823045384e-06, "loss": 0.3153, "step": 9468 }, { "epoch": 1.3577573845712647, "grad_norm": 0.30857887864112854, "learning_rate": 6.668005393338959e-06, "loss": 0.3099, "step": 9469 }, { "epoch": 1.35790077430456, "grad_norm": 0.2785240411758423, "learning_rate": 6.6672189171978315e-06, "loss": 0.299, "step": 9470 }, { "epoch": 1.3580441640378549, "grad_norm": 0.28050827980041504, "learning_rate": 6.666432394643894e-06, "loss": 0.2818, "step": 9471 }, { "epoch": 1.3581875537711499, "grad_norm": 0.3036881387233734, "learning_rate": 6.665645825699041e-06, "loss": 0.3038, "step": 9472 }, { "epoch": 1.358330943504445, "grad_norm": 0.41535302996635437, "learning_rate": 6.664859210385171e-06, "loss": 0.3231, "step": 9473 }, { "epoch": 1.3584743332377403, "grad_norm": 0.29424500465393066, "learning_rate": 6.664072548724181e-06, "loss": 0.312, "step": 9474 }, { "epoch": 1.3586177229710352, "grad_norm": 0.2906794250011444, "learning_rate": 6.663285840737971e-06, "loss": 0.3095, "step": 9475 }, { "epoch": 1.3587611127043304, "grad_norm": 0.28634458780288696, "learning_rate": 6.6624990864484426e-06, "loss": 0.2984, "step": 9476 }, { "epoch": 1.3589045024376254, "grad_norm": 0.27243995666503906, "learning_rate": 6.661712285877496e-06, "loss": 0.3044, "step": 9477 }, { "epoch": 1.3590478921709206, "grad_norm": 0.28771623969078064, "learning_rate": 6.660925439047037e-06, "loss": 0.3098, "step": 9478 }, { "epoch": 1.3591912819042156, "grad_norm": 0.2761134207248688, "learning_rate": 6.660138545978968e-06, "loss": 0.3141, "step": 9479 }, { "epoch": 1.3593346716375108, "grad_norm": 0.2770461440086365, "learning_rate": 6.659351606695196e-06, "loss": 0.3234, "step": 9480 }, { "epoch": 1.359478061370806, "grad_norm": 0.2866925597190857, "learning_rate": 6.658564621217627e-06, "loss": 0.3087, "step": 9481 }, { "epoch": 1.359621451104101, "grad_norm": 0.2919536232948303, "learning_rate": 6.657777589568171e-06, "loss": 0.3169, "step": 9482 }, { "epoch": 1.359764840837396, "grad_norm": 0.28826186060905457, "learning_rate": 6.656990511768736e-06, "loss": 0.312, "step": 9483 }, { "epoch": 1.359908230570691, "grad_norm": 0.2638234496116638, "learning_rate": 6.6562033878412354e-06, "loss": 0.3081, "step": 9484 }, { "epoch": 1.3600516203039863, "grad_norm": 0.27381062507629395, "learning_rate": 6.6554162178075795e-06, "loss": 0.2928, "step": 9485 }, { "epoch": 1.3601950100372813, "grad_norm": 0.3000953495502472, "learning_rate": 6.654629001689683e-06, "loss": 0.296, "step": 9486 }, { "epoch": 1.3603383997705765, "grad_norm": 0.30345502495765686, "learning_rate": 6.653841739509461e-06, "loss": 0.3197, "step": 9487 }, { "epoch": 1.3604817895038714, "grad_norm": 0.27994200587272644, "learning_rate": 6.6530544312888275e-06, "loss": 0.3291, "step": 9488 }, { "epoch": 1.3606251792371666, "grad_norm": 0.2776723802089691, "learning_rate": 6.652267077049704e-06, "loss": 0.3261, "step": 9489 }, { "epoch": 1.3607685689704616, "grad_norm": 0.27782225608825684, "learning_rate": 6.651479676814005e-06, "loss": 0.3145, "step": 9490 }, { "epoch": 1.3609119587037568, "grad_norm": 0.2574920058250427, "learning_rate": 6.650692230603651e-06, "loss": 0.3078, "step": 9491 }, { "epoch": 1.361055348437052, "grad_norm": 0.3162756562232971, "learning_rate": 6.649904738440566e-06, "loss": 0.316, "step": 9492 }, { "epoch": 1.361198738170347, "grad_norm": 0.27266284823417664, "learning_rate": 6.649117200346671e-06, "loss": 0.3234, "step": 9493 }, { "epoch": 1.3613421279036422, "grad_norm": 0.276806503534317, "learning_rate": 6.64832961634389e-06, "loss": 0.3292, "step": 9494 }, { "epoch": 1.3614855176369371, "grad_norm": 0.2837899625301361, "learning_rate": 6.647541986454148e-06, "loss": 0.3263, "step": 9495 }, { "epoch": 1.3616289073702323, "grad_norm": 0.27429628372192383, "learning_rate": 6.6467543106993715e-06, "loss": 0.3084, "step": 9496 }, { "epoch": 1.3617722971035273, "grad_norm": 0.2752460837364197, "learning_rate": 6.645966589101489e-06, "loss": 0.3072, "step": 9497 }, { "epoch": 1.3619156868368225, "grad_norm": 0.27598893642425537, "learning_rate": 6.645178821682428e-06, "loss": 0.2999, "step": 9498 }, { "epoch": 1.3620590765701177, "grad_norm": 0.2657751739025116, "learning_rate": 6.644391008464118e-06, "loss": 0.3178, "step": 9499 }, { "epoch": 1.3622024663034127, "grad_norm": 0.2669149339199066, "learning_rate": 6.643603149468493e-06, "loss": 0.3342, "step": 9500 }, { "epoch": 1.3623458560367077, "grad_norm": 0.292450487613678, "learning_rate": 6.642815244717484e-06, "loss": 0.3146, "step": 9501 }, { "epoch": 1.3624892457700029, "grad_norm": 0.27785322070121765, "learning_rate": 6.642027294233027e-06, "loss": 0.307, "step": 9502 }, { "epoch": 1.362632635503298, "grad_norm": 0.268096387386322, "learning_rate": 6.641239298037055e-06, "loss": 0.3165, "step": 9503 }, { "epoch": 1.362776025236593, "grad_norm": 0.28879570960998535, "learning_rate": 6.640451256151507e-06, "loss": 0.3066, "step": 9504 }, { "epoch": 1.3629194149698882, "grad_norm": 0.2891318202018738, "learning_rate": 6.639663168598318e-06, "loss": 0.3126, "step": 9505 }, { "epoch": 1.3630628047031832, "grad_norm": 0.2577175796031952, "learning_rate": 6.6388750353994305e-06, "loss": 0.3065, "step": 9506 }, { "epoch": 1.3632061944364784, "grad_norm": 0.2850992679595947, "learning_rate": 6.638086856576781e-06, "loss": 0.3016, "step": 9507 }, { "epoch": 1.3633495841697734, "grad_norm": 0.29935362935066223, "learning_rate": 6.637298632152314e-06, "loss": 0.3096, "step": 9508 }, { "epoch": 1.3634929739030686, "grad_norm": 0.2825947105884552, "learning_rate": 6.636510362147971e-06, "loss": 0.2903, "step": 9509 }, { "epoch": 1.3636363636363638, "grad_norm": 0.29006317257881165, "learning_rate": 6.6357220465856974e-06, "loss": 0.3071, "step": 9510 }, { "epoch": 1.3637797533696587, "grad_norm": 0.2538118064403534, "learning_rate": 6.634933685487439e-06, "loss": 0.2944, "step": 9511 }, { "epoch": 1.3639231431029537, "grad_norm": 0.2815398573875427, "learning_rate": 6.634145278875142e-06, "loss": 0.2898, "step": 9512 }, { "epoch": 1.364066532836249, "grad_norm": 0.3011009991168976, "learning_rate": 6.633356826770752e-06, "loss": 0.328, "step": 9513 }, { "epoch": 1.364209922569544, "grad_norm": 0.29422083497047424, "learning_rate": 6.632568329196224e-06, "loss": 0.3071, "step": 9514 }, { "epoch": 1.364353312302839, "grad_norm": 0.27527257800102234, "learning_rate": 6.631779786173502e-06, "loss": 0.3007, "step": 9515 }, { "epoch": 1.3644967020361343, "grad_norm": 0.30180197954177856, "learning_rate": 6.630991197724543e-06, "loss": 0.3121, "step": 9516 }, { "epoch": 1.3646400917694292, "grad_norm": 0.2835593819618225, "learning_rate": 6.630202563871298e-06, "loss": 0.3023, "step": 9517 }, { "epoch": 1.3647834815027244, "grad_norm": 0.30813097953796387, "learning_rate": 6.629413884635721e-06, "loss": 0.3156, "step": 9518 }, { "epoch": 1.3649268712360194, "grad_norm": 0.3147718012332916, "learning_rate": 6.628625160039766e-06, "loss": 0.3014, "step": 9519 }, { "epoch": 1.3650702609693146, "grad_norm": 0.3163696825504303, "learning_rate": 6.627836390105393e-06, "loss": 0.3211, "step": 9520 }, { "epoch": 1.3652136507026098, "grad_norm": 0.29275938868522644, "learning_rate": 6.627047574854559e-06, "loss": 0.3238, "step": 9521 }, { "epoch": 1.3653570404359048, "grad_norm": 0.3020709156990051, "learning_rate": 6.626258714309224e-06, "loss": 0.3035, "step": 9522 }, { "epoch": 1.3655004301691998, "grad_norm": 0.26611119508743286, "learning_rate": 6.625469808491347e-06, "loss": 0.2949, "step": 9523 }, { "epoch": 1.365643819902495, "grad_norm": 0.28682225942611694, "learning_rate": 6.624680857422893e-06, "loss": 0.296, "step": 9524 }, { "epoch": 1.3657872096357901, "grad_norm": 0.29651620984077454, "learning_rate": 6.623891861125822e-06, "loss": 0.319, "step": 9525 }, { "epoch": 1.3659305993690851, "grad_norm": 0.30858445167541504, "learning_rate": 6.623102819622098e-06, "loss": 0.297, "step": 9526 }, { "epoch": 1.3660739891023803, "grad_norm": 0.28320035338401794, "learning_rate": 6.62231373293369e-06, "loss": 0.318, "step": 9527 }, { "epoch": 1.3662173788356753, "grad_norm": 0.2952359616756439, "learning_rate": 6.621524601082563e-06, "loss": 0.3183, "step": 9528 }, { "epoch": 1.3663607685689705, "grad_norm": 0.2902381122112274, "learning_rate": 6.620735424090685e-06, "loss": 0.3185, "step": 9529 }, { "epoch": 1.3665041583022655, "grad_norm": 0.2795674204826355, "learning_rate": 6.6199462019800266e-06, "loss": 0.2709, "step": 9530 }, { "epoch": 1.3666475480355607, "grad_norm": 0.2914450466632843, "learning_rate": 6.619156934772557e-06, "loss": 0.322, "step": 9531 }, { "epoch": 1.3667909377688559, "grad_norm": 0.3256256878376007, "learning_rate": 6.618367622490251e-06, "loss": 0.3106, "step": 9532 }, { "epoch": 1.3669343275021508, "grad_norm": 0.27053302526474, "learning_rate": 6.6175782651550775e-06, "loss": 0.3173, "step": 9533 }, { "epoch": 1.367077717235446, "grad_norm": 0.2656233310699463, "learning_rate": 6.616788862789015e-06, "loss": 0.3181, "step": 9534 }, { "epoch": 1.367221106968741, "grad_norm": 0.3162373900413513, "learning_rate": 6.615999415414038e-06, "loss": 0.3316, "step": 9535 }, { "epoch": 1.3673644967020362, "grad_norm": 0.30670779943466187, "learning_rate": 6.615209923052122e-06, "loss": 0.3149, "step": 9536 }, { "epoch": 1.3675078864353312, "grad_norm": 0.2789866328239441, "learning_rate": 6.614420385725245e-06, "loss": 0.2968, "step": 9537 }, { "epoch": 1.3676512761686264, "grad_norm": 0.2838498055934906, "learning_rate": 6.613630803455391e-06, "loss": 0.3171, "step": 9538 }, { "epoch": 1.3677946659019216, "grad_norm": 0.3275901973247528, "learning_rate": 6.612841176264536e-06, "loss": 0.3352, "step": 9539 }, { "epoch": 1.3679380556352165, "grad_norm": 0.2771708369255066, "learning_rate": 6.612051504174663e-06, "loss": 0.2976, "step": 9540 }, { "epoch": 1.3680814453685115, "grad_norm": 0.3049936294555664, "learning_rate": 6.611261787207758e-06, "loss": 0.3318, "step": 9541 }, { "epoch": 1.3682248351018067, "grad_norm": 0.28368666768074036, "learning_rate": 6.6104720253858025e-06, "loss": 0.3075, "step": 9542 }, { "epoch": 1.368368224835102, "grad_norm": 0.297553688287735, "learning_rate": 6.609682218730784e-06, "loss": 0.2908, "step": 9543 }, { "epoch": 1.3685116145683969, "grad_norm": 0.2799660563468933, "learning_rate": 6.608892367264686e-06, "loss": 0.3079, "step": 9544 }, { "epoch": 1.368655004301692, "grad_norm": 0.30468034744262695, "learning_rate": 6.6081024710095025e-06, "loss": 0.3232, "step": 9545 }, { "epoch": 1.368798394034987, "grad_norm": 0.2944154143333435, "learning_rate": 6.607312529987218e-06, "loss": 0.3035, "step": 9546 }, { "epoch": 1.3689417837682822, "grad_norm": 0.2847762107849121, "learning_rate": 6.606522544219824e-06, "loss": 0.3035, "step": 9547 }, { "epoch": 1.3690851735015772, "grad_norm": 0.2975311875343323, "learning_rate": 6.605732513729316e-06, "loss": 0.3014, "step": 9548 }, { "epoch": 1.3692285632348724, "grad_norm": 0.310676634311676, "learning_rate": 6.604942438537685e-06, "loss": 0.3167, "step": 9549 }, { "epoch": 1.3693719529681676, "grad_norm": 0.2968237102031708, "learning_rate": 6.604152318666924e-06, "loss": 0.297, "step": 9550 }, { "epoch": 1.3695153427014626, "grad_norm": 0.28196582198143005, "learning_rate": 6.603362154139033e-06, "loss": 0.3142, "step": 9551 }, { "epoch": 1.3696587324347576, "grad_norm": 0.3191290497779846, "learning_rate": 6.602571944976003e-06, "loss": 0.2941, "step": 9552 }, { "epoch": 1.3698021221680527, "grad_norm": 0.3027784824371338, "learning_rate": 6.601781691199835e-06, "loss": 0.3144, "step": 9553 }, { "epoch": 1.369945511901348, "grad_norm": 0.3077189326286316, "learning_rate": 6.60099139283253e-06, "loss": 0.3117, "step": 9554 }, { "epoch": 1.370088901634643, "grad_norm": 0.27832427620887756, "learning_rate": 6.600201049896087e-06, "loss": 0.3109, "step": 9555 }, { "epoch": 1.3702322913679381, "grad_norm": 0.31802916526794434, "learning_rate": 6.599410662412507e-06, "loss": 0.3119, "step": 9556 }, { "epoch": 1.370375681101233, "grad_norm": 0.2792312502861023, "learning_rate": 6.598620230403798e-06, "loss": 0.3123, "step": 9557 }, { "epoch": 1.3705190708345283, "grad_norm": 0.3103174865245819, "learning_rate": 6.597829753891959e-06, "loss": 0.3072, "step": 9558 }, { "epoch": 1.3706624605678233, "grad_norm": 0.2825697958469391, "learning_rate": 6.597039232898998e-06, "loss": 0.3123, "step": 9559 }, { "epoch": 1.3708058503011185, "grad_norm": 0.28420954942703247, "learning_rate": 6.596248667446921e-06, "loss": 0.2962, "step": 9560 }, { "epoch": 1.3709492400344137, "grad_norm": 0.2887920141220093, "learning_rate": 6.595458057557735e-06, "loss": 0.3275, "step": 9561 }, { "epoch": 1.3710926297677086, "grad_norm": 0.31433868408203125, "learning_rate": 6.5946674032534545e-06, "loss": 0.3028, "step": 9562 }, { "epoch": 1.3712360195010036, "grad_norm": 0.27962765097618103, "learning_rate": 6.593876704556084e-06, "loss": 0.2891, "step": 9563 }, { "epoch": 1.3713794092342988, "grad_norm": 0.2876311242580414, "learning_rate": 6.593085961487638e-06, "loss": 0.3227, "step": 9564 }, { "epoch": 1.371522798967594, "grad_norm": 0.2850568890571594, "learning_rate": 6.59229517407013e-06, "loss": 0.3069, "step": 9565 }, { "epoch": 1.371666188700889, "grad_norm": 0.28279241919517517, "learning_rate": 6.591504342325573e-06, "loss": 0.3113, "step": 9566 }, { "epoch": 1.3718095784341842, "grad_norm": 0.28249606490135193, "learning_rate": 6.590713466275984e-06, "loss": 0.318, "step": 9567 }, { "epoch": 1.3719529681674791, "grad_norm": 0.3150680959224701, "learning_rate": 6.5899225459433806e-06, "loss": 0.2896, "step": 9568 }, { "epoch": 1.3720963579007743, "grad_norm": 0.3029012680053711, "learning_rate": 6.589131581349777e-06, "loss": 0.3103, "step": 9569 }, { "epoch": 1.3722397476340693, "grad_norm": 0.30287447571754456, "learning_rate": 6.588340572517195e-06, "loss": 0.3083, "step": 9570 }, { "epoch": 1.3723831373673645, "grad_norm": 0.28119051456451416, "learning_rate": 6.587549519467657e-06, "loss": 0.3049, "step": 9571 }, { "epoch": 1.3725265271006597, "grad_norm": 0.2924005091190338, "learning_rate": 6.586758422223179e-06, "loss": 0.2907, "step": 9572 }, { "epoch": 1.3726699168339547, "grad_norm": 0.30381813645362854, "learning_rate": 6.585967280805789e-06, "loss": 0.3037, "step": 9573 }, { "epoch": 1.3728133065672496, "grad_norm": 0.2968769967556, "learning_rate": 6.58517609523751e-06, "loss": 0.3167, "step": 9574 }, { "epoch": 1.3729566963005448, "grad_norm": 0.2769958972930908, "learning_rate": 6.584384865540366e-06, "loss": 0.312, "step": 9575 }, { "epoch": 1.37310008603384, "grad_norm": 0.2973882257938385, "learning_rate": 6.583593591736387e-06, "loss": 0.2904, "step": 9576 }, { "epoch": 1.373243475767135, "grad_norm": 0.2961469888687134, "learning_rate": 6.582802273847596e-06, "loss": 0.3147, "step": 9577 }, { "epoch": 1.3733868655004302, "grad_norm": 0.31377261877059937, "learning_rate": 6.582010911896026e-06, "loss": 0.3046, "step": 9578 }, { "epoch": 1.3735302552337252, "grad_norm": 0.277773380279541, "learning_rate": 6.581219505903705e-06, "loss": 0.3063, "step": 9579 }, { "epoch": 1.3736736449670204, "grad_norm": 0.2864339351654053, "learning_rate": 6.580428055892665e-06, "loss": 0.2946, "step": 9580 }, { "epoch": 1.3738170347003154, "grad_norm": 0.29319125413894653, "learning_rate": 6.57963656188494e-06, "loss": 0.3003, "step": 9581 }, { "epoch": 1.3739604244336105, "grad_norm": 0.309000164270401, "learning_rate": 6.578845023902561e-06, "loss": 0.315, "step": 9582 }, { "epoch": 1.3741038141669057, "grad_norm": 0.30881571769714355, "learning_rate": 6.578053441967567e-06, "loss": 0.3241, "step": 9583 }, { "epoch": 1.3742472039002007, "grad_norm": 0.30601420998573303, "learning_rate": 6.577261816101993e-06, "loss": 0.3149, "step": 9584 }, { "epoch": 1.374390593633496, "grad_norm": 0.26972514390945435, "learning_rate": 6.576470146327876e-06, "loss": 0.3132, "step": 9585 }, { "epoch": 1.374533983366791, "grad_norm": 0.3051667809486389, "learning_rate": 6.575678432667255e-06, "loss": 0.2784, "step": 9586 }, { "epoch": 1.374677373100086, "grad_norm": 0.29542285203933716, "learning_rate": 6.574886675142171e-06, "loss": 0.3212, "step": 9587 }, { "epoch": 1.374820762833381, "grad_norm": 0.27812814712524414, "learning_rate": 6.574094873774663e-06, "loss": 0.3279, "step": 9588 }, { "epoch": 1.3749641525666763, "grad_norm": 0.3226223587989807, "learning_rate": 6.573303028586777e-06, "loss": 0.3112, "step": 9589 }, { "epoch": 1.3751075422999715, "grad_norm": 0.3063323199748993, "learning_rate": 6.572511139600554e-06, "loss": 0.3186, "step": 9590 }, { "epoch": 1.3752509320332664, "grad_norm": 0.2866271734237671, "learning_rate": 6.571719206838039e-06, "loss": 0.2993, "step": 9591 }, { "epoch": 1.3753943217665614, "grad_norm": 0.29648691415786743, "learning_rate": 6.570927230321279e-06, "loss": 0.2975, "step": 9592 }, { "epoch": 1.3755377114998566, "grad_norm": 0.29166534543037415, "learning_rate": 6.570135210072322e-06, "loss": 0.3022, "step": 9593 }, { "epoch": 1.3756811012331518, "grad_norm": 0.3039245009422302, "learning_rate": 6.5693431461132165e-06, "loss": 0.3253, "step": 9594 }, { "epoch": 1.3758244909664468, "grad_norm": 0.2807421386241913, "learning_rate": 6.568551038466013e-06, "loss": 0.3152, "step": 9595 }, { "epoch": 1.375967880699742, "grad_norm": 0.33942148089408875, "learning_rate": 6.567758887152759e-06, "loss": 0.3158, "step": 9596 }, { "epoch": 1.376111270433037, "grad_norm": 0.30427882075309753, "learning_rate": 6.566966692195511e-06, "loss": 0.324, "step": 9597 }, { "epoch": 1.3762546601663321, "grad_norm": 0.2901782691478729, "learning_rate": 6.566174453616319e-06, "loss": 0.3078, "step": 9598 }, { "epoch": 1.376398049899627, "grad_norm": 0.30513596534729004, "learning_rate": 6.5653821714372415e-06, "loss": 0.3092, "step": 9599 }, { "epoch": 1.3765414396329223, "grad_norm": 0.2892796993255615, "learning_rate": 6.56458984568033e-06, "loss": 0.3171, "step": 9600 }, { "epoch": 1.3766848293662175, "grad_norm": 0.2877497673034668, "learning_rate": 6.563797476367646e-06, "loss": 0.3079, "step": 9601 }, { "epoch": 1.3768282190995125, "grad_norm": 0.28823161125183105, "learning_rate": 6.563005063521245e-06, "loss": 0.3108, "step": 9602 }, { "epoch": 1.3769716088328074, "grad_norm": 0.2995104193687439, "learning_rate": 6.562212607163188e-06, "loss": 0.3116, "step": 9603 }, { "epoch": 1.3771149985661026, "grad_norm": 0.3193005621433258, "learning_rate": 6.561420107315535e-06, "loss": 0.327, "step": 9604 }, { "epoch": 1.3772583882993978, "grad_norm": 0.284772127866745, "learning_rate": 6.560627564000348e-06, "loss": 0.2895, "step": 9605 }, { "epoch": 1.3774017780326928, "grad_norm": 0.3071857690811157, "learning_rate": 6.5598349772396896e-06, "loss": 0.3412, "step": 9606 }, { "epoch": 1.377545167765988, "grad_norm": 0.3181094229221344, "learning_rate": 6.559042347055626e-06, "loss": 0.3109, "step": 9607 }, { "epoch": 1.377688557499283, "grad_norm": 0.28533607721328735, "learning_rate": 6.558249673470221e-06, "loss": 0.3257, "step": 9608 }, { "epoch": 1.3778319472325782, "grad_norm": 0.28391388058662415, "learning_rate": 6.557456956505543e-06, "loss": 0.3214, "step": 9609 }, { "epoch": 1.3779753369658732, "grad_norm": 0.2906380891799927, "learning_rate": 6.5566641961836575e-06, "loss": 0.3224, "step": 9610 }, { "epoch": 1.3781187266991684, "grad_norm": 0.29843810200691223, "learning_rate": 6.555871392526636e-06, "loss": 0.3311, "step": 9611 }, { "epoch": 1.3782621164324635, "grad_norm": 0.2782810628414154, "learning_rate": 6.5550785455565495e-06, "loss": 0.3145, "step": 9612 }, { "epoch": 1.3784055061657585, "grad_norm": 0.3043227791786194, "learning_rate": 6.554285655295468e-06, "loss": 0.3432, "step": 9613 }, { "epoch": 1.3785488958990535, "grad_norm": 0.27921658754348755, "learning_rate": 6.553492721765464e-06, "loss": 0.3179, "step": 9614 }, { "epoch": 1.3786922856323487, "grad_norm": 0.27024227380752563, "learning_rate": 6.5526997449886135e-06, "loss": 0.3005, "step": 9615 }, { "epoch": 1.3788356753656439, "grad_norm": 0.2855016887187958, "learning_rate": 6.551906724986989e-06, "loss": 0.2928, "step": 9616 }, { "epoch": 1.3789790650989389, "grad_norm": 0.2950887382030487, "learning_rate": 6.55111366178267e-06, "loss": 0.3238, "step": 9617 }, { "epoch": 1.379122454832234, "grad_norm": 0.3017479479312897, "learning_rate": 6.550320555397731e-06, "loss": 0.3203, "step": 9618 }, { "epoch": 1.379265844565529, "grad_norm": 0.29646962881088257, "learning_rate": 6.549527405854255e-06, "loss": 0.3139, "step": 9619 }, { "epoch": 1.3794092342988242, "grad_norm": 0.28634315729141235, "learning_rate": 6.548734213174317e-06, "loss": 0.3148, "step": 9620 }, { "epoch": 1.3795526240321192, "grad_norm": 0.28504037857055664, "learning_rate": 6.547940977380003e-06, "loss": 0.3048, "step": 9621 }, { "epoch": 1.3796960137654144, "grad_norm": 0.28526437282562256, "learning_rate": 6.547147698493392e-06, "loss": 0.3292, "step": 9622 }, { "epoch": 1.3798394034987096, "grad_norm": 0.2836337387561798, "learning_rate": 6.54635437653657e-06, "loss": 0.3064, "step": 9623 }, { "epoch": 1.3799827932320046, "grad_norm": 0.2643950879573822, "learning_rate": 6.545561011531621e-06, "loss": 0.2867, "step": 9624 }, { "epoch": 1.3801261829652998, "grad_norm": 0.27391332387924194, "learning_rate": 6.54476760350063e-06, "loss": 0.307, "step": 9625 }, { "epoch": 1.3802695726985947, "grad_norm": 0.28960856795310974, "learning_rate": 6.543974152465687e-06, "loss": 0.328, "step": 9626 }, { "epoch": 1.38041296243189, "grad_norm": 0.2852765619754791, "learning_rate": 6.543180658448877e-06, "loss": 0.3172, "step": 9627 }, { "epoch": 1.380556352165185, "grad_norm": 0.2787036895751953, "learning_rate": 6.542387121472291e-06, "loss": 0.3188, "step": 9628 }, { "epoch": 1.38069974189848, "grad_norm": 0.29198935627937317, "learning_rate": 6.54159354155802e-06, "loss": 0.2987, "step": 9629 }, { "epoch": 1.3808431316317753, "grad_norm": 0.3021796643733978, "learning_rate": 6.540799918728158e-06, "loss": 0.2976, "step": 9630 }, { "epoch": 1.3809865213650703, "grad_norm": 0.2721233665943146, "learning_rate": 6.540006253004796e-06, "loss": 0.3155, "step": 9631 }, { "epoch": 1.3811299110983652, "grad_norm": 0.2769593298435211, "learning_rate": 6.539212544410028e-06, "loss": 0.3142, "step": 9632 }, { "epoch": 1.3812733008316604, "grad_norm": 0.2736526131629944, "learning_rate": 6.538418792965952e-06, "loss": 0.3078, "step": 9633 }, { "epoch": 1.3814166905649556, "grad_norm": 0.3102923631668091, "learning_rate": 6.537624998694662e-06, "loss": 0.316, "step": 9634 }, { "epoch": 1.3815600802982506, "grad_norm": 0.29559600353240967, "learning_rate": 6.5368311616182575e-06, "loss": 0.3157, "step": 9635 }, { "epoch": 1.3817034700315458, "grad_norm": 0.3120094835758209, "learning_rate": 6.536037281758838e-06, "loss": 0.3233, "step": 9636 }, { "epoch": 1.3818468597648408, "grad_norm": 0.3154425621032715, "learning_rate": 6.535243359138503e-06, "loss": 0.3081, "step": 9637 }, { "epoch": 1.381990249498136, "grad_norm": 0.3011285960674286, "learning_rate": 6.534449393779354e-06, "loss": 0.3149, "step": 9638 }, { "epoch": 1.382133639231431, "grad_norm": 0.29823341965675354, "learning_rate": 6.533655385703495e-06, "loss": 0.2979, "step": 9639 }, { "epoch": 1.3822770289647262, "grad_norm": 0.26079586148262024, "learning_rate": 6.532861334933029e-06, "loss": 0.322, "step": 9640 }, { "epoch": 1.3824204186980213, "grad_norm": 0.28421446681022644, "learning_rate": 6.5320672414900625e-06, "loss": 0.316, "step": 9641 }, { "epoch": 1.3825638084313163, "grad_norm": 0.29986757040023804, "learning_rate": 6.531273105396699e-06, "loss": 0.3045, "step": 9642 }, { "epoch": 1.3827071981646113, "grad_norm": 0.28719621896743774, "learning_rate": 6.53047892667505e-06, "loss": 0.3024, "step": 9643 }, { "epoch": 1.3828505878979065, "grad_norm": 0.2746962904930115, "learning_rate": 6.529684705347221e-06, "loss": 0.3017, "step": 9644 }, { "epoch": 1.3829939776312017, "grad_norm": 0.2584628462791443, "learning_rate": 6.528890441435321e-06, "loss": 0.2863, "step": 9645 }, { "epoch": 1.3831373673644967, "grad_norm": 0.284883588552475, "learning_rate": 6.528096134961465e-06, "loss": 0.3044, "step": 9646 }, { "epoch": 1.3832807570977919, "grad_norm": 0.2949979305267334, "learning_rate": 6.527301785947763e-06, "loss": 0.309, "step": 9647 }, { "epoch": 1.3834241468310868, "grad_norm": 0.29177722334861755, "learning_rate": 6.526507394416328e-06, "loss": 0.3186, "step": 9648 }, { "epoch": 1.383567536564382, "grad_norm": 0.29694968461990356, "learning_rate": 6.5257129603892765e-06, "loss": 0.311, "step": 9649 }, { "epoch": 1.383710926297677, "grad_norm": 0.30861136317253113, "learning_rate": 6.5249184838887224e-06, "loss": 0.3122, "step": 9650 }, { "epoch": 1.3838543160309722, "grad_norm": 0.3249705731868744, "learning_rate": 6.524123964936785e-06, "loss": 0.3087, "step": 9651 }, { "epoch": 1.3839977057642674, "grad_norm": 0.27674952149391174, "learning_rate": 6.52332940355558e-06, "loss": 0.3102, "step": 9652 }, { "epoch": 1.3841410954975624, "grad_norm": 0.2808094024658203, "learning_rate": 6.522534799767226e-06, "loss": 0.313, "step": 9653 }, { "epoch": 1.3842844852308573, "grad_norm": 0.29826194047927856, "learning_rate": 6.521740153593847e-06, "loss": 0.3259, "step": 9654 }, { "epoch": 1.3844278749641525, "grad_norm": 0.2995552718639374, "learning_rate": 6.520945465057562e-06, "loss": 0.2856, "step": 9655 }, { "epoch": 1.3845712646974477, "grad_norm": 0.2943849563598633, "learning_rate": 6.520150734180495e-06, "loss": 0.3239, "step": 9656 }, { "epoch": 1.3847146544307427, "grad_norm": 0.27670836448669434, "learning_rate": 6.519355960984771e-06, "loss": 0.3015, "step": 9657 }, { "epoch": 1.384858044164038, "grad_norm": 0.28812846541404724, "learning_rate": 6.518561145492514e-06, "loss": 0.3058, "step": 9658 }, { "epoch": 1.3850014338973329, "grad_norm": 0.2988668978214264, "learning_rate": 6.51776628772585e-06, "loss": 0.3031, "step": 9659 }, { "epoch": 1.385144823630628, "grad_norm": 0.322284996509552, "learning_rate": 6.516971387706908e-06, "loss": 0.3335, "step": 9660 }, { "epoch": 1.385288213363923, "grad_norm": 0.3144535720348358, "learning_rate": 6.516176445457816e-06, "loss": 0.3269, "step": 9661 }, { "epoch": 1.3854316030972182, "grad_norm": 0.28546959161758423, "learning_rate": 6.515381461000705e-06, "loss": 0.2998, "step": 9662 }, { "epoch": 1.3855749928305134, "grad_norm": 0.29378318786621094, "learning_rate": 6.514586434357704e-06, "loss": 0.3045, "step": 9663 }, { "epoch": 1.3857183825638084, "grad_norm": 0.3302965462207794, "learning_rate": 6.513791365550947e-06, "loss": 0.3257, "step": 9664 }, { "epoch": 1.3858617722971034, "grad_norm": 0.2884938716888428, "learning_rate": 6.512996254602566e-06, "loss": 0.3083, "step": 9665 }, { "epoch": 1.3860051620303986, "grad_norm": 0.2788415849208832, "learning_rate": 6.512201101534696e-06, "loss": 0.2874, "step": 9666 }, { "epoch": 1.3861485517636938, "grad_norm": 0.2717237174510956, "learning_rate": 6.511405906369476e-06, "loss": 0.3004, "step": 9667 }, { "epoch": 1.3862919414969888, "grad_norm": 0.2842947244644165, "learning_rate": 6.510610669129041e-06, "loss": 0.3356, "step": 9668 }, { "epoch": 1.386435331230284, "grad_norm": 0.28118300437927246, "learning_rate": 6.509815389835526e-06, "loss": 0.3065, "step": 9669 }, { "epoch": 1.386578720963579, "grad_norm": 0.2932130694389343, "learning_rate": 6.509020068511075e-06, "loss": 0.3105, "step": 9670 }, { "epoch": 1.3867221106968741, "grad_norm": 0.26645100116729736, "learning_rate": 6.508224705177826e-06, "loss": 0.2874, "step": 9671 }, { "epoch": 1.386865500430169, "grad_norm": 0.28458282351493835, "learning_rate": 6.50742929985792e-06, "loss": 0.3184, "step": 9672 }, { "epoch": 1.3870088901634643, "grad_norm": 0.283953458070755, "learning_rate": 6.506633852573503e-06, "loss": 0.3119, "step": 9673 }, { "epoch": 1.3871522798967595, "grad_norm": 0.2994985282421112, "learning_rate": 6.505838363346716e-06, "loss": 0.3169, "step": 9674 }, { "epoch": 1.3872956696300545, "grad_norm": 0.2702264189720154, "learning_rate": 6.505042832199706e-06, "loss": 0.3114, "step": 9675 }, { "epoch": 1.3874390593633497, "grad_norm": 0.26758044958114624, "learning_rate": 6.504247259154619e-06, "loss": 0.3019, "step": 9676 }, { "epoch": 1.3875824490966446, "grad_norm": 0.29468920826911926, "learning_rate": 6.5034516442336015e-06, "loss": 0.3146, "step": 9677 }, { "epoch": 1.3877258388299398, "grad_norm": 0.2783571481704712, "learning_rate": 6.502655987458805e-06, "loss": 0.3096, "step": 9678 }, { "epoch": 1.3878692285632348, "grad_norm": 0.29187482595443726, "learning_rate": 6.5018602888523755e-06, "loss": 0.319, "step": 9679 }, { "epoch": 1.38801261829653, "grad_norm": 0.2791212499141693, "learning_rate": 6.501064548436465e-06, "loss": 0.308, "step": 9680 }, { "epoch": 1.3881560080298252, "grad_norm": 0.2716267704963684, "learning_rate": 6.500268766233229e-06, "loss": 0.3032, "step": 9681 }, { "epoch": 1.3882993977631202, "grad_norm": 0.2745632231235504, "learning_rate": 6.499472942264816e-06, "loss": 0.2968, "step": 9682 }, { "epoch": 1.3884427874964151, "grad_norm": 0.2751932442188263, "learning_rate": 6.498677076553383e-06, "loss": 0.3092, "step": 9683 }, { "epoch": 1.3885861772297103, "grad_norm": 0.2888490557670593, "learning_rate": 6.4978811691210874e-06, "loss": 0.2928, "step": 9684 }, { "epoch": 1.3887295669630055, "grad_norm": 0.27028337121009827, "learning_rate": 6.497085219990082e-06, "loss": 0.3166, "step": 9685 }, { "epoch": 1.3888729566963005, "grad_norm": 0.30618008971214294, "learning_rate": 6.496289229182529e-06, "loss": 0.3082, "step": 9686 }, { "epoch": 1.3890163464295957, "grad_norm": 0.27366903424263, "learning_rate": 6.4954931967205855e-06, "loss": 0.2896, "step": 9687 }, { "epoch": 1.3891597361628907, "grad_norm": 0.28591427206993103, "learning_rate": 6.494697122626411e-06, "loss": 0.3293, "step": 9688 }, { "epoch": 1.3893031258961859, "grad_norm": 0.291781485080719, "learning_rate": 6.49390100692217e-06, "loss": 0.3244, "step": 9689 }, { "epoch": 1.3894465156294808, "grad_norm": 0.27040594816207886, "learning_rate": 6.493104849630021e-06, "loss": 0.2964, "step": 9690 }, { "epoch": 1.389589905362776, "grad_norm": 0.2810063362121582, "learning_rate": 6.492308650772129e-06, "loss": 0.3179, "step": 9691 }, { "epoch": 1.3897332950960712, "grad_norm": 0.2748139500617981, "learning_rate": 6.49151241037066e-06, "loss": 0.3164, "step": 9692 }, { "epoch": 1.3898766848293662, "grad_norm": 0.2890108823776245, "learning_rate": 6.49071612844778e-06, "loss": 0.3139, "step": 9693 }, { "epoch": 1.3900200745626612, "grad_norm": 0.29470399022102356, "learning_rate": 6.4899198050256575e-06, "loss": 0.3017, "step": 9694 }, { "epoch": 1.3901634642959564, "grad_norm": 0.3241841197013855, "learning_rate": 6.489123440126459e-06, "loss": 0.3092, "step": 9695 }, { "epoch": 1.3903068540292516, "grad_norm": 0.26873722672462463, "learning_rate": 6.488327033772355e-06, "loss": 0.3037, "step": 9696 }, { "epoch": 1.3904502437625466, "grad_norm": 0.2906787693500519, "learning_rate": 6.4875305859855165e-06, "loss": 0.325, "step": 9697 }, { "epoch": 1.3905936334958418, "grad_norm": 0.2890699505805969, "learning_rate": 6.4867340967881134e-06, "loss": 0.312, "step": 9698 }, { "epoch": 1.3907370232291367, "grad_norm": 0.3047208786010742, "learning_rate": 6.485937566202319e-06, "loss": 0.3107, "step": 9699 }, { "epoch": 1.390880412962432, "grad_norm": 0.2745514512062073, "learning_rate": 6.4851409942503095e-06, "loss": 0.3148, "step": 9700 }, { "epoch": 1.391023802695727, "grad_norm": 0.26402586698532104, "learning_rate": 6.484344380954259e-06, "loss": 0.2988, "step": 9701 }, { "epoch": 1.391167192429022, "grad_norm": 0.30363771319389343, "learning_rate": 6.483547726336345e-06, "loss": 0.2911, "step": 9702 }, { "epoch": 1.3913105821623173, "grad_norm": 0.29051852226257324, "learning_rate": 6.482751030418746e-06, "loss": 0.305, "step": 9703 }, { "epoch": 1.3914539718956123, "grad_norm": 0.28471243381500244, "learning_rate": 6.481954293223637e-06, "loss": 0.2896, "step": 9704 }, { "epoch": 1.3915973616289072, "grad_norm": 0.2796446681022644, "learning_rate": 6.481157514773202e-06, "loss": 0.3092, "step": 9705 }, { "epoch": 1.3917407513622024, "grad_norm": 0.2912810742855072, "learning_rate": 6.480360695089618e-06, "loss": 0.3037, "step": 9706 }, { "epoch": 1.3918841410954976, "grad_norm": 0.29410144686698914, "learning_rate": 6.4795638341950705e-06, "loss": 0.3141, "step": 9707 }, { "epoch": 1.3920275308287926, "grad_norm": 0.3101145029067993, "learning_rate": 6.478766932111742e-06, "loss": 0.3039, "step": 9708 }, { "epoch": 1.3921709205620878, "grad_norm": 0.2744574248790741, "learning_rate": 6.477969988861817e-06, "loss": 0.3097, "step": 9709 }, { "epoch": 1.3923143102953828, "grad_norm": 0.3129562735557556, "learning_rate": 6.4771730044674795e-06, "loss": 0.2938, "step": 9710 }, { "epoch": 1.392457700028678, "grad_norm": 0.28738903999328613, "learning_rate": 6.476375978950919e-06, "loss": 0.3179, "step": 9711 }, { "epoch": 1.392601089761973, "grad_norm": 0.2995333969593048, "learning_rate": 6.475578912334322e-06, "loss": 0.3202, "step": 9712 }, { "epoch": 1.3927444794952681, "grad_norm": 0.297626793384552, "learning_rate": 6.47478180463988e-06, "loss": 0.3077, "step": 9713 }, { "epoch": 1.3928878692285633, "grad_norm": 0.2790102958679199, "learning_rate": 6.4739846558897786e-06, "loss": 0.3029, "step": 9714 }, { "epoch": 1.3930312589618583, "grad_norm": 0.2899385094642639, "learning_rate": 6.473187466106212e-06, "loss": 0.3136, "step": 9715 }, { "epoch": 1.3931746486951535, "grad_norm": 0.30765822529792786, "learning_rate": 6.472390235311375e-06, "loss": 0.3086, "step": 9716 }, { "epoch": 1.3933180384284485, "grad_norm": 0.2798106074333191, "learning_rate": 6.471592963527457e-06, "loss": 0.2798, "step": 9717 }, { "epoch": 1.3934614281617437, "grad_norm": 0.3154885768890381, "learning_rate": 6.470795650776653e-06, "loss": 0.3015, "step": 9718 }, { "epoch": 1.3936048178950387, "grad_norm": 0.3012638986110687, "learning_rate": 6.4699982970811635e-06, "loss": 0.3233, "step": 9719 }, { "epoch": 1.3937482076283338, "grad_norm": 0.31495386362075806, "learning_rate": 6.46920090246318e-06, "loss": 0.3011, "step": 9720 }, { "epoch": 1.393891597361629, "grad_norm": 0.2950159013271332, "learning_rate": 6.468403466944906e-06, "loss": 0.309, "step": 9721 }, { "epoch": 1.394034987094924, "grad_norm": 0.29708683490753174, "learning_rate": 6.467605990548538e-06, "loss": 0.3079, "step": 9722 }, { "epoch": 1.394178376828219, "grad_norm": 0.29627367854118347, "learning_rate": 6.466808473296275e-06, "loss": 0.3107, "step": 9723 }, { "epoch": 1.3943217665615142, "grad_norm": 0.270683228969574, "learning_rate": 6.466010915210322e-06, "loss": 0.2966, "step": 9724 }, { "epoch": 1.3944651562948094, "grad_norm": 0.28999707102775574, "learning_rate": 6.4652133163128795e-06, "loss": 0.3193, "step": 9725 }, { "epoch": 1.3946085460281044, "grad_norm": 0.2980046570301056, "learning_rate": 6.464415676626152e-06, "loss": 0.3002, "step": 9726 }, { "epoch": 1.3947519357613996, "grad_norm": 0.3065378665924072, "learning_rate": 6.463617996172346e-06, "loss": 0.3054, "step": 9727 }, { "epoch": 1.3948953254946945, "grad_norm": 0.28121551871299744, "learning_rate": 6.462820274973664e-06, "loss": 0.32, "step": 9728 }, { "epoch": 1.3950387152279897, "grad_norm": 0.27425670623779297, "learning_rate": 6.462022513052319e-06, "loss": 0.3109, "step": 9729 }, { "epoch": 1.3951821049612847, "grad_norm": 0.3064637780189514, "learning_rate": 6.461224710430515e-06, "loss": 0.3212, "step": 9730 }, { "epoch": 1.39532549469458, "grad_norm": 0.2652207016944885, "learning_rate": 6.460426867130463e-06, "loss": 0.3163, "step": 9731 }, { "epoch": 1.395468884427875, "grad_norm": 0.3071940541267395, "learning_rate": 6.459628983174373e-06, "loss": 0.313, "step": 9732 }, { "epoch": 1.39561227416117, "grad_norm": 0.27949848771095276, "learning_rate": 6.4588310585844575e-06, "loss": 0.2946, "step": 9733 }, { "epoch": 1.395755663894465, "grad_norm": 0.27710291743278503, "learning_rate": 6.4580330933829295e-06, "loss": 0.3072, "step": 9734 }, { "epoch": 1.3958990536277602, "grad_norm": 0.28361207246780396, "learning_rate": 6.457235087592003e-06, "loss": 0.3028, "step": 9735 }, { "epoch": 1.3960424433610554, "grad_norm": 0.28694212436676025, "learning_rate": 6.456437041233894e-06, "loss": 0.2883, "step": 9736 }, { "epoch": 1.3961858330943504, "grad_norm": 0.29535889625549316, "learning_rate": 6.455638954330817e-06, "loss": 0.3169, "step": 9737 }, { "epoch": 1.3963292228276456, "grad_norm": 0.29382261633872986, "learning_rate": 6.454840826904991e-06, "loss": 0.3155, "step": 9738 }, { "epoch": 1.3964726125609406, "grad_norm": 0.2942008376121521, "learning_rate": 6.454042658978635e-06, "loss": 0.2936, "step": 9739 }, { "epoch": 1.3966160022942358, "grad_norm": 0.31785091757774353, "learning_rate": 6.453244450573968e-06, "loss": 0.3141, "step": 9740 }, { "epoch": 1.3967593920275307, "grad_norm": 0.30553022027015686, "learning_rate": 6.452446201713211e-06, "loss": 0.2982, "step": 9741 }, { "epoch": 1.396902781760826, "grad_norm": 0.28922876715660095, "learning_rate": 6.4516479124185846e-06, "loss": 0.3034, "step": 9742 }, { "epoch": 1.3970461714941211, "grad_norm": 0.30536362528800964, "learning_rate": 6.4508495827123155e-06, "loss": 0.3266, "step": 9743 }, { "epoch": 1.397189561227416, "grad_norm": 0.2956172823905945, "learning_rate": 6.450051212616625e-06, "loss": 0.2981, "step": 9744 }, { "epoch": 1.397332950960711, "grad_norm": 0.2794894576072693, "learning_rate": 6.449252802153738e-06, "loss": 0.3057, "step": 9745 }, { "epoch": 1.3974763406940063, "grad_norm": 0.32975339889526367, "learning_rate": 6.4484543513458845e-06, "loss": 0.3311, "step": 9746 }, { "epoch": 1.3976197304273015, "grad_norm": 0.28553441166877747, "learning_rate": 6.447655860215288e-06, "loss": 0.3274, "step": 9747 }, { "epoch": 1.3977631201605965, "grad_norm": 0.291128009557724, "learning_rate": 6.44685732878418e-06, "loss": 0.3066, "step": 9748 }, { "epoch": 1.3979065098938916, "grad_norm": 0.31353262066841125, "learning_rate": 6.4460587570747915e-06, "loss": 0.3105, "step": 9749 }, { "epoch": 1.3980498996271866, "grad_norm": 0.2780213952064514, "learning_rate": 6.445260145109349e-06, "loss": 0.2968, "step": 9750 }, { "epoch": 1.3981932893604818, "grad_norm": 0.28491127490997314, "learning_rate": 6.44446149291009e-06, "loss": 0.3086, "step": 9751 }, { "epoch": 1.3983366790937768, "grad_norm": 0.3245478868484497, "learning_rate": 6.443662800499243e-06, "loss": 0.3401, "step": 9752 }, { "epoch": 1.398480068827072, "grad_norm": 0.2956888675689697, "learning_rate": 6.442864067899044e-06, "loss": 0.3107, "step": 9753 }, { "epoch": 1.3986234585603672, "grad_norm": 0.298942893743515, "learning_rate": 6.442065295131731e-06, "loss": 0.2996, "step": 9754 }, { "epoch": 1.3987668482936622, "grad_norm": 0.30441591143608093, "learning_rate": 6.4412664822195355e-06, "loss": 0.3012, "step": 9755 }, { "epoch": 1.3989102380269571, "grad_norm": 0.310421347618103, "learning_rate": 6.4404676291847e-06, "loss": 0.309, "step": 9756 }, { "epoch": 1.3990536277602523, "grad_norm": 0.28899338841438293, "learning_rate": 6.439668736049462e-06, "loss": 0.2897, "step": 9757 }, { "epoch": 1.3991970174935475, "grad_norm": 0.2921152710914612, "learning_rate": 6.438869802836061e-06, "loss": 0.2952, "step": 9758 }, { "epoch": 1.3993404072268425, "grad_norm": 0.27517950534820557, "learning_rate": 6.438070829566738e-06, "loss": 0.3122, "step": 9759 }, { "epoch": 1.3994837969601377, "grad_norm": 0.3138998746871948, "learning_rate": 6.437271816263735e-06, "loss": 0.3013, "step": 9760 }, { "epoch": 1.3996271866934327, "grad_norm": 0.2798402011394501, "learning_rate": 6.4364727629492954e-06, "loss": 0.3216, "step": 9761 }, { "epoch": 1.3997705764267279, "grad_norm": 0.31949329376220703, "learning_rate": 6.4356736696456646e-06, "loss": 0.3173, "step": 9762 }, { "epoch": 1.3999139661600228, "grad_norm": 0.287109375, "learning_rate": 6.4348745363750865e-06, "loss": 0.3212, "step": 9763 }, { "epoch": 1.400057355893318, "grad_norm": 0.27945271134376526, "learning_rate": 6.434075363159808e-06, "loss": 0.3083, "step": 9764 }, { "epoch": 1.4002007456266132, "grad_norm": 0.2755700647830963, "learning_rate": 6.433276150022078e-06, "loss": 0.2864, "step": 9765 }, { "epoch": 1.4003441353599082, "grad_norm": 0.2742178440093994, "learning_rate": 6.432476896984145e-06, "loss": 0.3031, "step": 9766 }, { "epoch": 1.4004875250932034, "grad_norm": 0.27578866481781006, "learning_rate": 6.431677604068258e-06, "loss": 0.3032, "step": 9767 }, { "epoch": 1.4006309148264984, "grad_norm": 0.27770763635635376, "learning_rate": 6.43087827129667e-06, "loss": 0.3027, "step": 9768 }, { "epoch": 1.4007743045597936, "grad_norm": 0.2796512842178345, "learning_rate": 6.43007889869163e-06, "loss": 0.3079, "step": 9769 }, { "epoch": 1.4009176942930885, "grad_norm": 0.28709354996681213, "learning_rate": 6.429279486275396e-06, "loss": 0.3064, "step": 9770 }, { "epoch": 1.4010610840263837, "grad_norm": 0.29055067896842957, "learning_rate": 6.428480034070219e-06, "loss": 0.3239, "step": 9771 }, { "epoch": 1.401204473759679, "grad_norm": 0.2711305320262909, "learning_rate": 6.4276805420983535e-06, "loss": 0.2911, "step": 9772 }, { "epoch": 1.401347863492974, "grad_norm": 0.2716304361820221, "learning_rate": 6.42688101038206e-06, "loss": 0.3074, "step": 9773 }, { "epoch": 1.4014912532262689, "grad_norm": 0.3015204966068268, "learning_rate": 6.426081438943592e-06, "loss": 0.3056, "step": 9774 }, { "epoch": 1.401634642959564, "grad_norm": 0.2885790467262268, "learning_rate": 6.425281827805212e-06, "loss": 0.2961, "step": 9775 }, { "epoch": 1.4017780326928593, "grad_norm": 0.28113555908203125, "learning_rate": 6.424482176989179e-06, "loss": 0.3044, "step": 9776 }, { "epoch": 1.4019214224261543, "grad_norm": 0.2663462460041046, "learning_rate": 6.4236824865177525e-06, "loss": 0.3079, "step": 9777 }, { "epoch": 1.4020648121594494, "grad_norm": 0.28368079662323, "learning_rate": 6.422882756413197e-06, "loss": 0.3181, "step": 9778 }, { "epoch": 1.4022082018927444, "grad_norm": 0.28315675258636475, "learning_rate": 6.422082986697772e-06, "loss": 0.3244, "step": 9779 }, { "epoch": 1.4023515916260396, "grad_norm": 0.28397345542907715, "learning_rate": 6.421283177393746e-06, "loss": 0.2806, "step": 9780 }, { "epoch": 1.4024949813593346, "grad_norm": 0.31563472747802734, "learning_rate": 6.420483328523383e-06, "loss": 0.3011, "step": 9781 }, { "epoch": 1.4026383710926298, "grad_norm": 0.269232839345932, "learning_rate": 6.419683440108947e-06, "loss": 0.2899, "step": 9782 }, { "epoch": 1.402781760825925, "grad_norm": 0.30651751160621643, "learning_rate": 6.4188835121727075e-06, "loss": 0.3265, "step": 9783 }, { "epoch": 1.40292515055922, "grad_norm": 0.2747986316680908, "learning_rate": 6.418083544736937e-06, "loss": 0.3084, "step": 9784 }, { "epoch": 1.403068540292515, "grad_norm": 0.27194666862487793, "learning_rate": 6.4172835378239e-06, "loss": 0.3163, "step": 9785 }, { "epoch": 1.4032119300258101, "grad_norm": 0.2940751612186432, "learning_rate": 6.41648349145587e-06, "loss": 0.3079, "step": 9786 }, { "epoch": 1.4033553197591053, "grad_norm": 0.2816057801246643, "learning_rate": 6.415683405655119e-06, "loss": 0.3033, "step": 9787 }, { "epoch": 1.4034987094924003, "grad_norm": 0.28509455919265747, "learning_rate": 6.414883280443919e-06, "loss": 0.3177, "step": 9788 }, { "epoch": 1.4036420992256955, "grad_norm": 0.2786916196346283, "learning_rate": 6.414083115844545e-06, "loss": 0.292, "step": 9789 }, { "epoch": 1.4037854889589905, "grad_norm": 0.2787301540374756, "learning_rate": 6.413282911879273e-06, "loss": 0.3087, "step": 9790 }, { "epoch": 1.4039288786922857, "grad_norm": 0.2725811004638672, "learning_rate": 6.412482668570378e-06, "loss": 0.2902, "step": 9791 }, { "epoch": 1.4040722684255806, "grad_norm": 0.2918435037136078, "learning_rate": 6.411682385940138e-06, "loss": 0.3167, "step": 9792 }, { "epoch": 1.4042156581588758, "grad_norm": 0.2880952060222626, "learning_rate": 6.410882064010832e-06, "loss": 0.3092, "step": 9793 }, { "epoch": 1.404359047892171, "grad_norm": 0.293971985578537, "learning_rate": 6.410081702804741e-06, "loss": 0.3107, "step": 9794 }, { "epoch": 1.404502437625466, "grad_norm": 0.2997080683708191, "learning_rate": 6.4092813023441435e-06, "loss": 0.3129, "step": 9795 }, { "epoch": 1.404645827358761, "grad_norm": 0.2950601279735565, "learning_rate": 6.408480862651322e-06, "loss": 0.2923, "step": 9796 }, { "epoch": 1.4047892170920562, "grad_norm": 0.2835730016231537, "learning_rate": 6.407680383748561e-06, "loss": 0.3169, "step": 9797 }, { "epoch": 1.4049326068253514, "grad_norm": 0.28240421414375305, "learning_rate": 6.4068798656581435e-06, "loss": 0.3055, "step": 9798 }, { "epoch": 1.4050759965586463, "grad_norm": 0.27581721544265747, "learning_rate": 6.406079308402353e-06, "loss": 0.2931, "step": 9799 }, { "epoch": 1.4052193862919415, "grad_norm": 0.302911639213562, "learning_rate": 6.40527871200348e-06, "loss": 0.3233, "step": 9800 }, { "epoch": 1.4053627760252365, "grad_norm": 0.29338279366493225, "learning_rate": 6.4044780764838065e-06, "loss": 0.3179, "step": 9801 }, { "epoch": 1.4055061657585317, "grad_norm": 0.29745981097221375, "learning_rate": 6.403677401865625e-06, "loss": 0.3001, "step": 9802 }, { "epoch": 1.4056495554918267, "grad_norm": 0.29275989532470703, "learning_rate": 6.402876688171227e-06, "loss": 0.3095, "step": 9803 }, { "epoch": 1.4057929452251219, "grad_norm": 0.2778570055961609, "learning_rate": 6.402075935422895e-06, "loss": 0.3025, "step": 9804 }, { "epoch": 1.405936334958417, "grad_norm": 0.2941867411136627, "learning_rate": 6.4012751436429295e-06, "loss": 0.3205, "step": 9805 }, { "epoch": 1.406079724691712, "grad_norm": 0.29910019040107727, "learning_rate": 6.400474312853619e-06, "loss": 0.3384, "step": 9806 }, { "epoch": 1.4062231144250072, "grad_norm": 0.28411155939102173, "learning_rate": 6.399673443077256e-06, "loss": 0.3072, "step": 9807 }, { "epoch": 1.4063665041583022, "grad_norm": 0.29129332304000854, "learning_rate": 6.39887253433614e-06, "loss": 0.3133, "step": 9808 }, { "epoch": 1.4065098938915974, "grad_norm": 0.30002182722091675, "learning_rate": 6.398071586652563e-06, "loss": 0.3172, "step": 9809 }, { "epoch": 1.4066532836248924, "grad_norm": 0.2876976728439331, "learning_rate": 6.397270600048824e-06, "loss": 0.3143, "step": 9810 }, { "epoch": 1.4067966733581876, "grad_norm": 0.2847335636615753, "learning_rate": 6.39646957454722e-06, "loss": 0.2999, "step": 9811 }, { "epoch": 1.4069400630914828, "grad_norm": 0.3055844306945801, "learning_rate": 6.395668510170052e-06, "loss": 0.3185, "step": 9812 }, { "epoch": 1.4070834528247778, "grad_norm": 0.28475090861320496, "learning_rate": 6.39486740693962e-06, "loss": 0.2917, "step": 9813 }, { "epoch": 1.4072268425580727, "grad_norm": 0.3052779734134674, "learning_rate": 6.394066264878225e-06, "loss": 0.3333, "step": 9814 }, { "epoch": 1.407370232291368, "grad_norm": 0.28440338373184204, "learning_rate": 6.393265084008169e-06, "loss": 0.2876, "step": 9815 }, { "epoch": 1.4075136220246631, "grad_norm": 0.30644696950912476, "learning_rate": 6.392463864351757e-06, "loss": 0.3344, "step": 9816 }, { "epoch": 1.407657011757958, "grad_norm": 0.2861213684082031, "learning_rate": 6.391662605931292e-06, "loss": 0.2978, "step": 9817 }, { "epoch": 1.4078004014912533, "grad_norm": 0.2730924189090729, "learning_rate": 6.390861308769082e-06, "loss": 0.3052, "step": 9818 }, { "epoch": 1.4079437912245483, "grad_norm": 0.30742135643959045, "learning_rate": 6.390059972887432e-06, "loss": 0.3286, "step": 9819 }, { "epoch": 1.4080871809578435, "grad_norm": 0.28010115027427673, "learning_rate": 6.389258598308649e-06, "loss": 0.3108, "step": 9820 }, { "epoch": 1.4082305706911384, "grad_norm": 0.2912260890007019, "learning_rate": 6.388457185055048e-06, "loss": 0.2963, "step": 9821 }, { "epoch": 1.4083739604244336, "grad_norm": 0.2838417589664459, "learning_rate": 6.387655733148932e-06, "loss": 0.3124, "step": 9822 }, { "epoch": 1.4085173501577288, "grad_norm": 0.2970995306968689, "learning_rate": 6.3868542426126145e-06, "loss": 0.2993, "step": 9823 }, { "epoch": 1.4086607398910238, "grad_norm": 0.2943817377090454, "learning_rate": 6.3860527134684105e-06, "loss": 0.31, "step": 9824 }, { "epoch": 1.4088041296243188, "grad_norm": 0.301656574010849, "learning_rate": 6.385251145738629e-06, "loss": 0.3125, "step": 9825 }, { "epoch": 1.408947519357614, "grad_norm": 0.2787485420703888, "learning_rate": 6.384449539445587e-06, "loss": 0.3104, "step": 9826 }, { "epoch": 1.4090909090909092, "grad_norm": 0.2816474139690399, "learning_rate": 6.383647894611601e-06, "loss": 0.3027, "step": 9827 }, { "epoch": 1.4092342988242041, "grad_norm": 0.3054127097129822, "learning_rate": 6.382846211258984e-06, "loss": 0.31, "step": 9828 }, { "epoch": 1.4093776885574993, "grad_norm": 0.288986474275589, "learning_rate": 6.382044489410055e-06, "loss": 0.3139, "step": 9829 }, { "epoch": 1.4095210782907943, "grad_norm": 0.29851457476615906, "learning_rate": 6.381242729087137e-06, "loss": 0.3203, "step": 9830 }, { "epoch": 1.4096644680240895, "grad_norm": 0.30339309573173523, "learning_rate": 6.380440930312543e-06, "loss": 0.3157, "step": 9831 }, { "epoch": 1.4098078577573845, "grad_norm": 0.3021126687526703, "learning_rate": 6.379639093108599e-06, "loss": 0.3128, "step": 9832 }, { "epoch": 1.4099512474906797, "grad_norm": 0.2790358066558838, "learning_rate": 6.378837217497625e-06, "loss": 0.3002, "step": 9833 }, { "epoch": 1.4100946372239749, "grad_norm": 0.2844253182411194, "learning_rate": 6.378035303501941e-06, "loss": 0.3103, "step": 9834 }, { "epoch": 1.4102380269572699, "grad_norm": 0.3152313530445099, "learning_rate": 6.377233351143878e-06, "loss": 0.3085, "step": 9835 }, { "epoch": 1.4103814166905648, "grad_norm": 0.2738412916660309, "learning_rate": 6.376431360445754e-06, "loss": 0.3095, "step": 9836 }, { "epoch": 1.41052480642386, "grad_norm": 0.31927111744880676, "learning_rate": 6.3756293314299e-06, "loss": 0.3294, "step": 9837 }, { "epoch": 1.4106681961571552, "grad_norm": 0.28596651554107666, "learning_rate": 6.374827264118641e-06, "loss": 0.3105, "step": 9838 }, { "epoch": 1.4108115858904502, "grad_norm": 0.2736095190048218, "learning_rate": 6.374025158534305e-06, "loss": 0.3043, "step": 9839 }, { "epoch": 1.4109549756237454, "grad_norm": 0.27783823013305664, "learning_rate": 6.373223014699223e-06, "loss": 0.3085, "step": 9840 }, { "epoch": 1.4110983653570404, "grad_norm": 0.2554070055484772, "learning_rate": 6.372420832635724e-06, "loss": 0.2999, "step": 9841 }, { "epoch": 1.4112417550903356, "grad_norm": 0.3108367621898651, "learning_rate": 6.371618612366141e-06, "loss": 0.3134, "step": 9842 }, { "epoch": 1.4113851448236305, "grad_norm": 0.2639109194278717, "learning_rate": 6.370816353912806e-06, "loss": 0.308, "step": 9843 }, { "epoch": 1.4115285345569257, "grad_norm": 0.27756112813949585, "learning_rate": 6.370014057298053e-06, "loss": 0.2983, "step": 9844 }, { "epoch": 1.411671924290221, "grad_norm": 0.30829647183418274, "learning_rate": 6.369211722544214e-06, "loss": 0.3313, "step": 9845 }, { "epoch": 1.411815314023516, "grad_norm": 0.2716359794139862, "learning_rate": 6.368409349673627e-06, "loss": 0.3149, "step": 9846 }, { "epoch": 1.411958703756811, "grad_norm": 0.258122980594635, "learning_rate": 6.3676069387086305e-06, "loss": 0.287, "step": 9847 }, { "epoch": 1.412102093490106, "grad_norm": 0.2816442847251892, "learning_rate": 6.366804489671561e-06, "loss": 0.3117, "step": 9848 }, { "epoch": 1.4122454832234013, "grad_norm": 0.27723264694213867, "learning_rate": 6.366002002584754e-06, "loss": 0.3026, "step": 9849 }, { "epoch": 1.4123888729566962, "grad_norm": 0.3055294156074524, "learning_rate": 6.365199477470555e-06, "loss": 0.3155, "step": 9850 }, { "epoch": 1.4125322626899914, "grad_norm": 0.2721804678440094, "learning_rate": 6.364396914351303e-06, "loss": 0.3158, "step": 9851 }, { "epoch": 1.4126756524232864, "grad_norm": 0.28328853845596313, "learning_rate": 6.363594313249338e-06, "loss": 0.322, "step": 9852 }, { "epoch": 1.4128190421565816, "grad_norm": 0.2851565182209015, "learning_rate": 6.362791674187006e-06, "loss": 0.3233, "step": 9853 }, { "epoch": 1.4129624318898766, "grad_norm": 0.26701775193214417, "learning_rate": 6.36198899718665e-06, "loss": 0.3093, "step": 9854 }, { "epoch": 1.4131058216231718, "grad_norm": 0.2808655798435211, "learning_rate": 6.361186282270615e-06, "loss": 0.3135, "step": 9855 }, { "epoch": 1.413249211356467, "grad_norm": 0.2734655737876892, "learning_rate": 6.360383529461247e-06, "loss": 0.3146, "step": 9856 }, { "epoch": 1.413392601089762, "grad_norm": 0.26913982629776, "learning_rate": 6.359580738780896e-06, "loss": 0.2977, "step": 9857 }, { "epoch": 1.4135359908230571, "grad_norm": 0.2880840003490448, "learning_rate": 6.358777910251907e-06, "loss": 0.2936, "step": 9858 }, { "epoch": 1.4136793805563521, "grad_norm": 0.2858120799064636, "learning_rate": 6.357975043896634e-06, "loss": 0.3126, "step": 9859 }, { "epoch": 1.4138227702896473, "grad_norm": 0.27629971504211426, "learning_rate": 6.357172139737422e-06, "loss": 0.3173, "step": 9860 }, { "epoch": 1.4139661600229423, "grad_norm": 0.292692631483078, "learning_rate": 6.356369197796627e-06, "loss": 0.3102, "step": 9861 }, { "epoch": 1.4141095497562375, "grad_norm": 0.30223917961120605, "learning_rate": 6.3555662180966e-06, "loss": 0.3198, "step": 9862 }, { "epoch": 1.4142529394895327, "grad_norm": 0.28711292147636414, "learning_rate": 6.354763200659694e-06, "loss": 0.3095, "step": 9863 }, { "epoch": 1.4143963292228277, "grad_norm": 0.29329052567481995, "learning_rate": 6.353960145508263e-06, "loss": 0.3136, "step": 9864 }, { "epoch": 1.4145397189561226, "grad_norm": 0.27778807282447815, "learning_rate": 6.353157052664666e-06, "loss": 0.3066, "step": 9865 }, { "epoch": 1.4146831086894178, "grad_norm": 0.3152677118778229, "learning_rate": 6.352353922151258e-06, "loss": 0.2878, "step": 9866 }, { "epoch": 1.414826498422713, "grad_norm": 0.2735220789909363, "learning_rate": 6.351550753990397e-06, "loss": 0.2905, "step": 9867 }, { "epoch": 1.414969888156008, "grad_norm": 0.28085777163505554, "learning_rate": 6.350747548204443e-06, "loss": 0.3006, "step": 9868 }, { "epoch": 1.4151132778893032, "grad_norm": 0.28010472655296326, "learning_rate": 6.3499443048157535e-06, "loss": 0.2996, "step": 9869 }, { "epoch": 1.4152566676225982, "grad_norm": 0.27042073011398315, "learning_rate": 6.3491410238466935e-06, "loss": 0.2949, "step": 9870 }, { "epoch": 1.4154000573558934, "grad_norm": 0.28286513686180115, "learning_rate": 6.348337705319621e-06, "loss": 0.2785, "step": 9871 }, { "epoch": 1.4155434470891883, "grad_norm": 0.2770330011844635, "learning_rate": 6.347534349256901e-06, "loss": 0.3026, "step": 9872 }, { "epoch": 1.4156868368224835, "grad_norm": 0.29073867201805115, "learning_rate": 6.346730955680898e-06, "loss": 0.311, "step": 9873 }, { "epoch": 1.4158302265557787, "grad_norm": 0.2976713180541992, "learning_rate": 6.345927524613975e-06, "loss": 0.3026, "step": 9874 }, { "epoch": 1.4159736162890737, "grad_norm": 0.2843164801597595, "learning_rate": 6.3451240560785035e-06, "loss": 0.2923, "step": 9875 }, { "epoch": 1.4161170060223687, "grad_norm": 0.272294282913208, "learning_rate": 6.344320550096845e-06, "loss": 0.313, "step": 9876 }, { "epoch": 1.4162603957556639, "grad_norm": 0.2821584939956665, "learning_rate": 6.343517006691371e-06, "loss": 0.3239, "step": 9877 }, { "epoch": 1.416403785488959, "grad_norm": 0.2969529330730438, "learning_rate": 6.34271342588445e-06, "loss": 0.2971, "step": 9878 }, { "epoch": 1.416547175222254, "grad_norm": 0.31033897399902344, "learning_rate": 6.341909807698453e-06, "loss": 0.2888, "step": 9879 }, { "epoch": 1.4166905649555492, "grad_norm": 0.2734255790710449, "learning_rate": 6.341106152155751e-06, "loss": 0.319, "step": 9880 }, { "epoch": 1.4168339546888442, "grad_norm": 0.29327356815338135, "learning_rate": 6.340302459278718e-06, "loss": 0.3076, "step": 9881 }, { "epoch": 1.4169773444221394, "grad_norm": 0.30027446150779724, "learning_rate": 6.3394987290897245e-06, "loss": 0.3074, "step": 9882 }, { "epoch": 1.4171207341554344, "grad_norm": 0.28056222200393677, "learning_rate": 6.3386949616111485e-06, "loss": 0.3061, "step": 9883 }, { "epoch": 1.4172641238887296, "grad_norm": 0.3097200095653534, "learning_rate": 6.337891156865362e-06, "loss": 0.3086, "step": 9884 }, { "epoch": 1.4174075136220248, "grad_norm": 0.27043142914772034, "learning_rate": 6.3370873148747455e-06, "loss": 0.3123, "step": 9885 }, { "epoch": 1.4175509033553197, "grad_norm": 0.2850412130355835, "learning_rate": 6.336283435661675e-06, "loss": 0.304, "step": 9886 }, { "epoch": 1.4176942930886147, "grad_norm": 0.29600033164024353, "learning_rate": 6.3354795192485284e-06, "loss": 0.3127, "step": 9887 }, { "epoch": 1.41783768282191, "grad_norm": 0.30385103821754456, "learning_rate": 6.334675565657686e-06, "loss": 0.3032, "step": 9888 }, { "epoch": 1.4179810725552051, "grad_norm": 0.2987128794193268, "learning_rate": 6.333871574911531e-06, "loss": 0.3042, "step": 9889 }, { "epoch": 1.4181244622885, "grad_norm": 0.26663896441459656, "learning_rate": 6.333067547032441e-06, "loss": 0.3114, "step": 9890 }, { "epoch": 1.4182678520217953, "grad_norm": 0.2751738727092743, "learning_rate": 6.332263482042802e-06, "loss": 0.3055, "step": 9891 }, { "epoch": 1.4184112417550903, "grad_norm": 0.32352837920188904, "learning_rate": 6.331459379964997e-06, "loss": 0.3203, "step": 9892 }, { "epoch": 1.4185546314883855, "grad_norm": 0.2682934105396271, "learning_rate": 6.330655240821411e-06, "loss": 0.3164, "step": 9893 }, { "epoch": 1.4186980212216804, "grad_norm": 0.27533072233200073, "learning_rate": 6.329851064634432e-06, "loss": 0.3072, "step": 9894 }, { "epoch": 1.4188414109549756, "grad_norm": 0.27342966198921204, "learning_rate": 6.329046851426441e-06, "loss": 0.3021, "step": 9895 }, { "epoch": 1.4189848006882708, "grad_norm": 0.3185541033744812, "learning_rate": 6.328242601219833e-06, "loss": 0.3025, "step": 9896 }, { "epoch": 1.4191281904215658, "grad_norm": 0.25952577590942383, "learning_rate": 6.327438314036993e-06, "loss": 0.3019, "step": 9897 }, { "epoch": 1.419271580154861, "grad_norm": 0.2680981159210205, "learning_rate": 6.326633989900314e-06, "loss": 0.3009, "step": 9898 }, { "epoch": 1.419414969888156, "grad_norm": 0.26688551902770996, "learning_rate": 6.325829628832183e-06, "loss": 0.2894, "step": 9899 }, { "epoch": 1.4195583596214512, "grad_norm": 0.3029724359512329, "learning_rate": 6.3250252308549974e-06, "loss": 0.3003, "step": 9900 }, { "epoch": 1.4197017493547461, "grad_norm": 0.2880224883556366, "learning_rate": 6.324220795991145e-06, "loss": 0.3139, "step": 9901 }, { "epoch": 1.4198451390880413, "grad_norm": 0.28858494758605957, "learning_rate": 6.323416324263024e-06, "loss": 0.2904, "step": 9902 }, { "epoch": 1.4199885288213365, "grad_norm": 0.2921887934207916, "learning_rate": 6.322611815693028e-06, "loss": 0.3228, "step": 9903 }, { "epoch": 1.4201319185546315, "grad_norm": 0.31384769082069397, "learning_rate": 6.321807270303553e-06, "loss": 0.3028, "step": 9904 }, { "epoch": 1.4202753082879265, "grad_norm": 0.2871011197566986, "learning_rate": 6.3210026881169995e-06, "loss": 0.2869, "step": 9905 }, { "epoch": 1.4204186980212217, "grad_norm": 0.2964648902416229, "learning_rate": 6.320198069155761e-06, "loss": 0.3095, "step": 9906 }, { "epoch": 1.4205620877545169, "grad_norm": 0.31697380542755127, "learning_rate": 6.319393413442238e-06, "loss": 0.3186, "step": 9907 }, { "epoch": 1.4207054774878118, "grad_norm": 0.2664302587509155, "learning_rate": 6.318588720998835e-06, "loss": 0.2978, "step": 9908 }, { "epoch": 1.420848867221107, "grad_norm": 0.2943909764289856, "learning_rate": 6.3177839918479485e-06, "loss": 0.3099, "step": 9909 }, { "epoch": 1.420992256954402, "grad_norm": 0.29461491107940674, "learning_rate": 6.3169792260119835e-06, "loss": 0.3046, "step": 9910 }, { "epoch": 1.4211356466876972, "grad_norm": 0.28274640440940857, "learning_rate": 6.316174423513342e-06, "loss": 0.3224, "step": 9911 }, { "epoch": 1.4212790364209922, "grad_norm": 0.2793442904949188, "learning_rate": 6.315369584374429e-06, "loss": 0.3051, "step": 9912 }, { "epoch": 1.4214224261542874, "grad_norm": 0.266631543636322, "learning_rate": 6.314564708617652e-06, "loss": 0.2994, "step": 9913 }, { "epoch": 1.4215658158875826, "grad_norm": 0.28857070207595825, "learning_rate": 6.313759796265414e-06, "loss": 0.3135, "step": 9914 }, { "epoch": 1.4217092056208775, "grad_norm": 0.2721942067146301, "learning_rate": 6.312954847340125e-06, "loss": 0.2867, "step": 9915 }, { "epoch": 1.4218525953541725, "grad_norm": 0.2833845317363739, "learning_rate": 6.312149861864193e-06, "loss": 0.2909, "step": 9916 }, { "epoch": 1.4219959850874677, "grad_norm": 0.28198739886283875, "learning_rate": 6.311344839860026e-06, "loss": 0.3058, "step": 9917 }, { "epoch": 1.422139374820763, "grad_norm": 0.278985857963562, "learning_rate": 6.3105397813500365e-06, "loss": 0.2923, "step": 9918 }, { "epoch": 1.4222827645540579, "grad_norm": 0.2788979709148407, "learning_rate": 6.309734686356635e-06, "loss": 0.3068, "step": 9919 }, { "epoch": 1.422426154287353, "grad_norm": 0.27698713541030884, "learning_rate": 6.308929554902236e-06, "loss": 0.3071, "step": 9920 }, { "epoch": 1.422569544020648, "grad_norm": 0.2824403941631317, "learning_rate": 6.3081243870092516e-06, "loss": 0.2959, "step": 9921 }, { "epoch": 1.4227129337539433, "grad_norm": 0.30628710985183716, "learning_rate": 6.307319182700096e-06, "loss": 0.2845, "step": 9922 }, { "epoch": 1.4228563234872382, "grad_norm": 0.3059035837650299, "learning_rate": 6.306513941997184e-06, "loss": 0.3158, "step": 9923 }, { "epoch": 1.4229997132205334, "grad_norm": 0.28785744309425354, "learning_rate": 6.305708664922936e-06, "loss": 0.3103, "step": 9924 }, { "epoch": 1.4231431029538286, "grad_norm": 0.279885470867157, "learning_rate": 6.304903351499766e-06, "loss": 0.3119, "step": 9925 }, { "epoch": 1.4232864926871236, "grad_norm": 0.28872939944267273, "learning_rate": 6.3040980017500944e-06, "loss": 0.3035, "step": 9926 }, { "epoch": 1.4234298824204186, "grad_norm": 0.3014453053474426, "learning_rate": 6.30329261569634e-06, "loss": 0.3059, "step": 9927 }, { "epoch": 1.4235732721537138, "grad_norm": 0.2954407334327698, "learning_rate": 6.302487193360925e-06, "loss": 0.3131, "step": 9928 }, { "epoch": 1.423716661887009, "grad_norm": 0.29717522859573364, "learning_rate": 6.3016817347662685e-06, "loss": 0.3106, "step": 9929 }, { "epoch": 1.423860051620304, "grad_norm": 0.31011074781417847, "learning_rate": 6.300876239934796e-06, "loss": 0.3219, "step": 9930 }, { "epoch": 1.4240034413535991, "grad_norm": 0.2964383065700531, "learning_rate": 6.30007070888893e-06, "loss": 0.2958, "step": 9931 }, { "epoch": 1.424146831086894, "grad_norm": 0.27319207787513733, "learning_rate": 6.299265141651096e-06, "loss": 0.2989, "step": 9932 }, { "epoch": 1.4242902208201893, "grad_norm": 0.29976990818977356, "learning_rate": 6.298459538243717e-06, "loss": 0.2987, "step": 9933 }, { "epoch": 1.4244336105534843, "grad_norm": 0.2761455774307251, "learning_rate": 6.297653898689224e-06, "loss": 0.3022, "step": 9934 }, { "epoch": 1.4245770002867795, "grad_norm": 0.3061254024505615, "learning_rate": 6.296848223010042e-06, "loss": 0.3076, "step": 9935 }, { "epoch": 1.4247203900200747, "grad_norm": 0.2853112518787384, "learning_rate": 6.296042511228601e-06, "loss": 0.3017, "step": 9936 }, { "epoch": 1.4248637797533696, "grad_norm": 0.296245813369751, "learning_rate": 6.295236763367329e-06, "loss": 0.3211, "step": 9937 }, { "epoch": 1.4250071694866648, "grad_norm": 0.27539166808128357, "learning_rate": 6.294430979448658e-06, "loss": 0.2941, "step": 9938 }, { "epoch": 1.4251505592199598, "grad_norm": 0.2901139259338379, "learning_rate": 6.29362515949502e-06, "loss": 0.2991, "step": 9939 }, { "epoch": 1.425293948953255, "grad_norm": 0.30463507771492004, "learning_rate": 6.292819303528849e-06, "loss": 0.3089, "step": 9940 }, { "epoch": 1.42543733868655, "grad_norm": 0.2706157863140106, "learning_rate": 6.292013411572575e-06, "loss": 0.3065, "step": 9941 }, { "epoch": 1.4255807284198452, "grad_norm": 0.271452397108078, "learning_rate": 6.291207483648636e-06, "loss": 0.2912, "step": 9942 }, { "epoch": 1.4257241181531402, "grad_norm": 0.3108054995536804, "learning_rate": 6.290401519779468e-06, "loss": 0.3005, "step": 9943 }, { "epoch": 1.4258675078864353, "grad_norm": 0.2869246304035187, "learning_rate": 6.289595519987506e-06, "loss": 0.2923, "step": 9944 }, { "epoch": 1.4260108976197303, "grad_norm": 0.28516122698783875, "learning_rate": 6.288789484295187e-06, "loss": 0.3209, "step": 9945 }, { "epoch": 1.4261542873530255, "grad_norm": 0.2762434482574463, "learning_rate": 6.287983412724953e-06, "loss": 0.3194, "step": 9946 }, { "epoch": 1.4262976770863207, "grad_norm": 0.2712821960449219, "learning_rate": 6.287177305299241e-06, "loss": 0.2929, "step": 9947 }, { "epoch": 1.4264410668196157, "grad_norm": 0.2990119457244873, "learning_rate": 6.286371162040494e-06, "loss": 0.3242, "step": 9948 }, { "epoch": 1.4265844565529109, "grad_norm": 0.2843547761440277, "learning_rate": 6.285564982971152e-06, "loss": 0.2925, "step": 9949 }, { "epoch": 1.4267278462862059, "grad_norm": 0.29874545335769653, "learning_rate": 6.284758768113658e-06, "loss": 0.3171, "step": 9950 }, { "epoch": 1.426871236019501, "grad_norm": 0.28832074999809265, "learning_rate": 6.283952517490458e-06, "loss": 0.2911, "step": 9951 }, { "epoch": 1.427014625752796, "grad_norm": 0.3101811707019806, "learning_rate": 6.2831462311239935e-06, "loss": 0.3052, "step": 9952 }, { "epoch": 1.4271580154860912, "grad_norm": 0.3035535514354706, "learning_rate": 6.282339909036712e-06, "loss": 0.3125, "step": 9953 }, { "epoch": 1.4273014052193864, "grad_norm": 0.269685298204422, "learning_rate": 6.28153355125106e-06, "loss": 0.3202, "step": 9954 }, { "epoch": 1.4274447949526814, "grad_norm": 0.2767483592033386, "learning_rate": 6.2807271577894855e-06, "loss": 0.3049, "step": 9955 }, { "epoch": 1.4275881846859764, "grad_norm": 0.2929939329624176, "learning_rate": 6.279920728674437e-06, "loss": 0.3005, "step": 9956 }, { "epoch": 1.4277315744192716, "grad_norm": 0.29852238297462463, "learning_rate": 6.2791142639283645e-06, "loss": 0.3164, "step": 9957 }, { "epoch": 1.4278749641525668, "grad_norm": 0.2607012093067169, "learning_rate": 6.2783077635737174e-06, "loss": 0.3105, "step": 9958 }, { "epoch": 1.4280183538858617, "grad_norm": 0.2665756642818451, "learning_rate": 6.27750122763295e-06, "loss": 0.3289, "step": 9959 }, { "epoch": 1.428161743619157, "grad_norm": 0.29176172614097595, "learning_rate": 6.276694656128512e-06, "loss": 0.3099, "step": 9960 }, { "epoch": 1.428305133352452, "grad_norm": 0.28640690445899963, "learning_rate": 6.2758880490828615e-06, "loss": 0.3309, "step": 9961 }, { "epoch": 1.428448523085747, "grad_norm": 0.28815388679504395, "learning_rate": 6.2750814065184485e-06, "loss": 0.2923, "step": 9962 }, { "epoch": 1.428591912819042, "grad_norm": 0.2667834460735321, "learning_rate": 6.27427472845773e-06, "loss": 0.3135, "step": 9963 }, { "epoch": 1.4287353025523373, "grad_norm": 0.27916857600212097, "learning_rate": 6.273468014923164e-06, "loss": 0.3153, "step": 9964 }, { "epoch": 1.4288786922856325, "grad_norm": 0.30971112847328186, "learning_rate": 6.272661265937208e-06, "loss": 0.3273, "step": 9965 }, { "epoch": 1.4290220820189274, "grad_norm": 0.27996018528938293, "learning_rate": 6.271854481522318e-06, "loss": 0.2985, "step": 9966 }, { "epoch": 1.4291654717522224, "grad_norm": 0.2831032872200012, "learning_rate": 6.2710476617009585e-06, "loss": 0.3202, "step": 9967 }, { "epoch": 1.4293088614855176, "grad_norm": 0.27197790145874023, "learning_rate": 6.270240806495585e-06, "loss": 0.3268, "step": 9968 }, { "epoch": 1.4294522512188128, "grad_norm": 0.281305193901062, "learning_rate": 6.269433915928663e-06, "loss": 0.3029, "step": 9969 }, { "epoch": 1.4295956409521078, "grad_norm": 0.29141587018966675, "learning_rate": 6.268626990022653e-06, "loss": 0.303, "step": 9970 }, { "epoch": 1.429739030685403, "grad_norm": 0.2963758111000061, "learning_rate": 6.26782002880002e-06, "loss": 0.3373, "step": 9971 }, { "epoch": 1.429882420418698, "grad_norm": 0.2960495948791504, "learning_rate": 6.267013032283227e-06, "loss": 0.312, "step": 9972 }, { "epoch": 1.4300258101519931, "grad_norm": 0.29528141021728516, "learning_rate": 6.266206000494739e-06, "loss": 0.3092, "step": 9973 }, { "epoch": 1.4301691998852881, "grad_norm": 0.2710185647010803, "learning_rate": 6.265398933457024e-06, "loss": 0.2905, "step": 9974 }, { "epoch": 1.4303125896185833, "grad_norm": 0.2894740402698517, "learning_rate": 6.264591831192552e-06, "loss": 0.3112, "step": 9975 }, { "epoch": 1.4304559793518785, "grad_norm": 0.3072216510772705, "learning_rate": 6.263784693723786e-06, "loss": 0.2934, "step": 9976 }, { "epoch": 1.4305993690851735, "grad_norm": 0.2954992949962616, "learning_rate": 6.262977521073199e-06, "loss": 0.3046, "step": 9977 }, { "epoch": 1.4307427588184685, "grad_norm": 0.29820749163627625, "learning_rate": 6.2621703132632605e-06, "loss": 0.2952, "step": 9978 }, { "epoch": 1.4308861485517637, "grad_norm": 0.30178868770599365, "learning_rate": 6.261363070316442e-06, "loss": 0.3081, "step": 9979 }, { "epoch": 1.4310295382850589, "grad_norm": 0.28713539242744446, "learning_rate": 6.2605557922552165e-06, "loss": 0.3212, "step": 9980 }, { "epoch": 1.4311729280183538, "grad_norm": 0.3177052438259125, "learning_rate": 6.259748479102057e-06, "loss": 0.3168, "step": 9981 }, { "epoch": 1.431316317751649, "grad_norm": 0.2847013473510742, "learning_rate": 6.2589411308794364e-06, "loss": 0.3105, "step": 9982 }, { "epoch": 1.431459707484944, "grad_norm": 0.29878073930740356, "learning_rate": 6.2581337476098315e-06, "loss": 0.3114, "step": 9983 }, { "epoch": 1.4316030972182392, "grad_norm": 0.2923088073730469, "learning_rate": 6.257326329315719e-06, "loss": 0.3073, "step": 9984 }, { "epoch": 1.4317464869515342, "grad_norm": 0.2786877751350403, "learning_rate": 6.256518876019575e-06, "loss": 0.3028, "step": 9985 }, { "epoch": 1.4318898766848294, "grad_norm": 0.29992055892944336, "learning_rate": 6.2557113877438804e-06, "loss": 0.3049, "step": 9986 }, { "epoch": 1.4320332664181246, "grad_norm": 0.29600855708122253, "learning_rate": 6.25490386451111e-06, "loss": 0.3041, "step": 9987 }, { "epoch": 1.4321766561514195, "grad_norm": 0.29955950379371643, "learning_rate": 6.254096306343748e-06, "loss": 0.297, "step": 9988 }, { "epoch": 1.4323200458847147, "grad_norm": 0.3143478333950043, "learning_rate": 6.253288713264274e-06, "loss": 0.3205, "step": 9989 }, { "epoch": 1.4324634356180097, "grad_norm": 0.2724894881248474, "learning_rate": 6.252481085295169e-06, "loss": 0.3021, "step": 9990 }, { "epoch": 1.432606825351305, "grad_norm": 0.2857901453971863, "learning_rate": 6.251673422458918e-06, "loss": 0.3161, "step": 9991 }, { "epoch": 1.4327502150845999, "grad_norm": 0.2860810160636902, "learning_rate": 6.2508657247780035e-06, "loss": 0.298, "step": 9992 }, { "epoch": 1.432893604817895, "grad_norm": 0.29509109258651733, "learning_rate": 6.250057992274912e-06, "loss": 0.3068, "step": 9993 }, { "epoch": 1.4330369945511903, "grad_norm": 0.3016388714313507, "learning_rate": 6.24925022497213e-06, "loss": 0.3037, "step": 9994 }, { "epoch": 1.4331803842844852, "grad_norm": 0.27672332525253296, "learning_rate": 6.248442422892143e-06, "loss": 0.3121, "step": 9995 }, { "epoch": 1.4333237740177802, "grad_norm": 0.2770809233188629, "learning_rate": 6.247634586057437e-06, "loss": 0.3165, "step": 9996 }, { "epoch": 1.4334671637510754, "grad_norm": 0.31262385845184326, "learning_rate": 6.246826714490507e-06, "loss": 0.316, "step": 9997 }, { "epoch": 1.4336105534843706, "grad_norm": 0.2934456169605255, "learning_rate": 6.246018808213837e-06, "loss": 0.3158, "step": 9998 }, { "epoch": 1.4337539432176656, "grad_norm": 0.26672986149787903, "learning_rate": 6.24521086724992e-06, "loss": 0.3069, "step": 9999 }, { "epoch": 1.4338973329509608, "grad_norm": 0.28537628054618835, "learning_rate": 6.24440289162125e-06, "loss": 0.3105, "step": 10000 }, { "epoch": 1.4340407226842558, "grad_norm": 0.30282673239707947, "learning_rate": 6.243594881350314e-06, "loss": 0.2954, "step": 10001 }, { "epoch": 1.434184112417551, "grad_norm": 0.2780386507511139, "learning_rate": 6.2427868364596135e-06, "loss": 0.2974, "step": 10002 }, { "epoch": 1.434327502150846, "grad_norm": 0.28959009051322937, "learning_rate": 6.2419787569716376e-06, "loss": 0.3034, "step": 10003 }, { "epoch": 1.4344708918841411, "grad_norm": 0.2841393053531647, "learning_rate": 6.241170642908883e-06, "loss": 0.3019, "step": 10004 }, { "epoch": 1.4346142816174363, "grad_norm": 0.27344202995300293, "learning_rate": 6.240362494293848e-06, "loss": 0.2978, "step": 10005 }, { "epoch": 1.4347576713507313, "grad_norm": 0.2938859462738037, "learning_rate": 6.239554311149029e-06, "loss": 0.3249, "step": 10006 }, { "epoch": 1.4349010610840263, "grad_norm": 0.2721102833747864, "learning_rate": 6.238746093496923e-06, "loss": 0.3056, "step": 10007 }, { "epoch": 1.4350444508173215, "grad_norm": 0.26966747641563416, "learning_rate": 6.237937841360034e-06, "loss": 0.3113, "step": 10008 }, { "epoch": 1.4351878405506167, "grad_norm": 0.2881878912448883, "learning_rate": 6.2371295547608594e-06, "loss": 0.3261, "step": 10009 }, { "epoch": 1.4353312302839116, "grad_norm": 0.2796050012111664, "learning_rate": 6.236321233721899e-06, "loss": 0.3038, "step": 10010 }, { "epoch": 1.4354746200172068, "grad_norm": 0.294297993183136, "learning_rate": 6.2355128782656585e-06, "loss": 0.3104, "step": 10011 }, { "epoch": 1.4356180097505018, "grad_norm": 0.2679382860660553, "learning_rate": 6.23470448841464e-06, "loss": 0.3099, "step": 10012 }, { "epoch": 1.435761399483797, "grad_norm": 0.2879916727542877, "learning_rate": 6.233896064191348e-06, "loss": 0.3124, "step": 10013 }, { "epoch": 1.435904789217092, "grad_norm": 0.2937769591808319, "learning_rate": 6.233087605618288e-06, "loss": 0.3124, "step": 10014 }, { "epoch": 1.4360481789503872, "grad_norm": 0.2993439733982086, "learning_rate": 6.232279112717965e-06, "loss": 0.3232, "step": 10015 }, { "epoch": 1.4361915686836824, "grad_norm": 0.26145800948143005, "learning_rate": 6.2314705855128885e-06, "loss": 0.3025, "step": 10016 }, { "epoch": 1.4363349584169773, "grad_norm": 0.26578354835510254, "learning_rate": 6.230662024025564e-06, "loss": 0.3085, "step": 10017 }, { "epoch": 1.4364783481502723, "grad_norm": 0.28009429574012756, "learning_rate": 6.229853428278501e-06, "loss": 0.3106, "step": 10018 }, { "epoch": 1.4366217378835675, "grad_norm": 0.30315226316452026, "learning_rate": 6.229044798294213e-06, "loss": 0.3137, "step": 10019 }, { "epoch": 1.4367651276168627, "grad_norm": 0.27536430954933167, "learning_rate": 6.228236134095207e-06, "loss": 0.3057, "step": 10020 }, { "epoch": 1.4369085173501577, "grad_norm": 0.27249929308891296, "learning_rate": 6.227427435703997e-06, "loss": 0.3111, "step": 10021 }, { "epoch": 1.4370519070834529, "grad_norm": 0.285084068775177, "learning_rate": 6.226618703143095e-06, "loss": 0.2931, "step": 10022 }, { "epoch": 1.4371952968167478, "grad_norm": 0.27448877692222595, "learning_rate": 6.225809936435014e-06, "loss": 0.3024, "step": 10023 }, { "epoch": 1.437338686550043, "grad_norm": 0.28181952238082886, "learning_rate": 6.225001135602272e-06, "loss": 0.3007, "step": 10024 }, { "epoch": 1.437482076283338, "grad_norm": 0.2694409191608429, "learning_rate": 6.224192300667381e-06, "loss": 0.3071, "step": 10025 }, { "epoch": 1.4376254660166332, "grad_norm": 0.27415895462036133, "learning_rate": 6.223383431652861e-06, "loss": 0.3101, "step": 10026 }, { "epoch": 1.4377688557499284, "grad_norm": 0.26836711168289185, "learning_rate": 6.2225745285812275e-06, "loss": 0.2856, "step": 10027 }, { "epoch": 1.4379122454832234, "grad_norm": 0.2567427158355713, "learning_rate": 6.221765591475e-06, "loss": 0.312, "step": 10028 }, { "epoch": 1.4380556352165186, "grad_norm": 0.29014989733695984, "learning_rate": 6.2209566203566974e-06, "loss": 0.3001, "step": 10029 }, { "epoch": 1.4381990249498136, "grad_norm": 0.284986287355423, "learning_rate": 6.22014761524884e-06, "loss": 0.2962, "step": 10030 }, { "epoch": 1.4383424146831087, "grad_norm": 0.25863465666770935, "learning_rate": 6.219338576173951e-06, "loss": 0.3159, "step": 10031 }, { "epoch": 1.4384858044164037, "grad_norm": 0.28195375204086304, "learning_rate": 6.218529503154554e-06, "loss": 0.3172, "step": 10032 }, { "epoch": 1.438629194149699, "grad_norm": 0.27447038888931274, "learning_rate": 6.217720396213166e-06, "loss": 0.3212, "step": 10033 }, { "epoch": 1.4387725838829941, "grad_norm": 0.26742279529571533, "learning_rate": 6.216911255372318e-06, "loss": 0.3031, "step": 10034 }, { "epoch": 1.438915973616289, "grad_norm": 0.2774152457714081, "learning_rate": 6.216102080654532e-06, "loss": 0.3124, "step": 10035 }, { "epoch": 1.439059363349584, "grad_norm": 0.26262998580932617, "learning_rate": 6.215292872082333e-06, "loss": 0.3064, "step": 10036 }, { "epoch": 1.4392027530828793, "grad_norm": 0.2723035216331482, "learning_rate": 6.214483629678252e-06, "loss": 0.303, "step": 10037 }, { "epoch": 1.4393461428161745, "grad_norm": 0.26838263869285583, "learning_rate": 6.2136743534648135e-06, "loss": 0.2856, "step": 10038 }, { "epoch": 1.4394895325494694, "grad_norm": 0.26554298400878906, "learning_rate": 6.212865043464549e-06, "loss": 0.3091, "step": 10039 }, { "epoch": 1.4396329222827646, "grad_norm": 0.2643146514892578, "learning_rate": 6.212055699699988e-06, "loss": 0.3145, "step": 10040 }, { "epoch": 1.4397763120160596, "grad_norm": 0.2936064302921295, "learning_rate": 6.2112463221936606e-06, "loss": 0.3048, "step": 10041 }, { "epoch": 1.4399197017493548, "grad_norm": 0.2805175483226776, "learning_rate": 6.210436910968097e-06, "loss": 0.2925, "step": 10042 }, { "epoch": 1.4400630914826498, "grad_norm": 0.28806403279304504, "learning_rate": 6.209627466045835e-06, "loss": 0.3077, "step": 10043 }, { "epoch": 1.440206481215945, "grad_norm": 0.2808836102485657, "learning_rate": 6.2088179874494025e-06, "loss": 0.2934, "step": 10044 }, { "epoch": 1.4403498709492402, "grad_norm": 0.27903464436531067, "learning_rate": 6.208008475201337e-06, "loss": 0.3158, "step": 10045 }, { "epoch": 1.4404932606825351, "grad_norm": 0.2702551782131195, "learning_rate": 6.207198929324174e-06, "loss": 0.3108, "step": 10046 }, { "epoch": 1.4406366504158301, "grad_norm": 0.259684294462204, "learning_rate": 6.2063893498404504e-06, "loss": 0.3186, "step": 10047 }, { "epoch": 1.4407800401491253, "grad_norm": 0.28255900740623474, "learning_rate": 6.205579736772703e-06, "loss": 0.3131, "step": 10048 }, { "epoch": 1.4409234298824205, "grad_norm": 0.2915664315223694, "learning_rate": 6.2047700901434704e-06, "loss": 0.326, "step": 10049 }, { "epoch": 1.4410668196157155, "grad_norm": 0.26810553669929504, "learning_rate": 6.203960409975291e-06, "loss": 0.2952, "step": 10050 }, { "epoch": 1.4412102093490107, "grad_norm": 0.28574734926223755, "learning_rate": 6.203150696290708e-06, "loss": 0.3106, "step": 10051 }, { "epoch": 1.4413535990823056, "grad_norm": 0.294846773147583, "learning_rate": 6.202340949112257e-06, "loss": 0.3032, "step": 10052 }, { "epoch": 1.4414969888156008, "grad_norm": 0.26235663890838623, "learning_rate": 6.2015311684624845e-06, "loss": 0.3055, "step": 10053 }, { "epoch": 1.4416403785488958, "grad_norm": 0.29655471444129944, "learning_rate": 6.2007213543639344e-06, "loss": 0.3018, "step": 10054 }, { "epoch": 1.441783768282191, "grad_norm": 0.2863929271697998, "learning_rate": 6.199911506839148e-06, "loss": 0.3053, "step": 10055 }, { "epoch": 1.4419271580154862, "grad_norm": 0.2877381443977356, "learning_rate": 6.1991016259106705e-06, "loss": 0.2946, "step": 10056 }, { "epoch": 1.4420705477487812, "grad_norm": 0.2677145004272461, "learning_rate": 6.1982917116010475e-06, "loss": 0.2987, "step": 10057 }, { "epoch": 1.4422139374820762, "grad_norm": 0.2817995846271515, "learning_rate": 6.197481763932828e-06, "loss": 0.3138, "step": 10058 }, { "epoch": 1.4423573272153714, "grad_norm": 0.30021414160728455, "learning_rate": 6.196671782928558e-06, "loss": 0.3317, "step": 10059 }, { "epoch": 1.4425007169486666, "grad_norm": 0.29085564613342285, "learning_rate": 6.195861768610787e-06, "loss": 0.3163, "step": 10060 }, { "epoch": 1.4426441066819615, "grad_norm": 0.2732047736644745, "learning_rate": 6.195051721002062e-06, "loss": 0.3227, "step": 10061 }, { "epoch": 1.4427874964152567, "grad_norm": 0.28582409024238586, "learning_rate": 6.194241640124938e-06, "loss": 0.3015, "step": 10062 }, { "epoch": 1.4429308861485517, "grad_norm": 0.28200697898864746, "learning_rate": 6.193431526001962e-06, "loss": 0.2799, "step": 10063 }, { "epoch": 1.443074275881847, "grad_norm": 0.30119046568870544, "learning_rate": 6.192621378655689e-06, "loss": 0.2925, "step": 10064 }, { "epoch": 1.4432176656151419, "grad_norm": 0.28858765959739685, "learning_rate": 6.19181119810867e-06, "loss": 0.2926, "step": 10065 }, { "epoch": 1.443361055348437, "grad_norm": 0.2937643527984619, "learning_rate": 6.191000984383462e-06, "loss": 0.2981, "step": 10066 }, { "epoch": 1.4435044450817323, "grad_norm": 0.31294065713882446, "learning_rate": 6.190190737502619e-06, "loss": 0.2987, "step": 10067 }, { "epoch": 1.4436478348150272, "grad_norm": 0.2974889576435089, "learning_rate": 6.189380457488696e-06, "loss": 0.3058, "step": 10068 }, { "epoch": 1.4437912245483222, "grad_norm": 0.29039835929870605, "learning_rate": 6.18857014436425e-06, "loss": 0.3052, "step": 10069 }, { "epoch": 1.4439346142816174, "grad_norm": 0.3181246519088745, "learning_rate": 6.187759798151841e-06, "loss": 0.3059, "step": 10070 }, { "epoch": 1.4440780040149126, "grad_norm": 0.3046399652957916, "learning_rate": 6.186949418874026e-06, "loss": 0.3019, "step": 10071 }, { "epoch": 1.4442213937482076, "grad_norm": 0.2834036946296692, "learning_rate": 6.186139006553364e-06, "loss": 0.2977, "step": 10072 }, { "epoch": 1.4443647834815028, "grad_norm": 0.29751572012901306, "learning_rate": 6.185328561212418e-06, "loss": 0.309, "step": 10073 }, { "epoch": 1.4445081732147977, "grad_norm": 0.28273338079452515, "learning_rate": 6.184518082873746e-06, "loss": 0.3243, "step": 10074 }, { "epoch": 1.444651562948093, "grad_norm": 0.2703677713871002, "learning_rate": 6.183707571559916e-06, "loss": 0.2889, "step": 10075 }, { "epoch": 1.444794952681388, "grad_norm": 0.2708137333393097, "learning_rate": 6.182897027293485e-06, "loss": 0.3085, "step": 10076 }, { "epoch": 1.444938342414683, "grad_norm": 0.2771112620830536, "learning_rate": 6.182086450097022e-06, "loss": 0.2933, "step": 10077 }, { "epoch": 1.4450817321479783, "grad_norm": 0.29898443818092346, "learning_rate": 6.181275839993091e-06, "loss": 0.318, "step": 10078 }, { "epoch": 1.4452251218812733, "grad_norm": 0.25881996750831604, "learning_rate": 6.180465197004257e-06, "loss": 0.3168, "step": 10079 }, { "epoch": 1.4453685116145685, "grad_norm": 0.3056766390800476, "learning_rate": 6.179654521153088e-06, "loss": 0.3258, "step": 10080 }, { "epoch": 1.4455119013478634, "grad_norm": 0.29039695858955383, "learning_rate": 6.178843812462152e-06, "loss": 0.3209, "step": 10081 }, { "epoch": 1.4456552910811586, "grad_norm": 0.27088990807533264, "learning_rate": 6.178033070954017e-06, "loss": 0.3132, "step": 10082 }, { "epoch": 1.4457986808144536, "grad_norm": 0.26845860481262207, "learning_rate": 6.177222296651254e-06, "loss": 0.2906, "step": 10083 }, { "epoch": 1.4459420705477488, "grad_norm": 0.27503108978271484, "learning_rate": 6.1764114895764325e-06, "loss": 0.3367, "step": 10084 }, { "epoch": 1.446085460281044, "grad_norm": 0.2659834623336792, "learning_rate": 6.1756006497521245e-06, "loss": 0.3103, "step": 10085 }, { "epoch": 1.446228850014339, "grad_norm": 0.31247478723526, "learning_rate": 6.174789777200905e-06, "loss": 0.2912, "step": 10086 }, { "epoch": 1.446372239747634, "grad_norm": 0.2693461775779724, "learning_rate": 6.1739788719453445e-06, "loss": 0.2848, "step": 10087 }, { "epoch": 1.4465156294809292, "grad_norm": 0.29287683963775635, "learning_rate": 6.173167934008017e-06, "loss": 0.3114, "step": 10088 }, { "epoch": 1.4466590192142244, "grad_norm": 0.2721313238143921, "learning_rate": 6.1723569634115e-06, "loss": 0.3213, "step": 10089 }, { "epoch": 1.4468024089475193, "grad_norm": 0.2644851803779602, "learning_rate": 6.171545960178368e-06, "loss": 0.3207, "step": 10090 }, { "epoch": 1.4469457986808145, "grad_norm": 0.34444719552993774, "learning_rate": 6.170734924331198e-06, "loss": 0.2991, "step": 10091 }, { "epoch": 1.4470891884141095, "grad_norm": 0.3433673083782196, "learning_rate": 6.169923855892569e-06, "loss": 0.292, "step": 10092 }, { "epoch": 1.4472325781474047, "grad_norm": 0.2832504212856293, "learning_rate": 6.169112754885059e-06, "loss": 0.3046, "step": 10093 }, { "epoch": 1.4473759678806997, "grad_norm": 0.29937905073165894, "learning_rate": 6.168301621331251e-06, "loss": 0.3014, "step": 10094 }, { "epoch": 1.4475193576139949, "grad_norm": 0.3224915564060211, "learning_rate": 6.167490455253721e-06, "loss": 0.3111, "step": 10095 }, { "epoch": 1.44766274734729, "grad_norm": 0.26921558380126953, "learning_rate": 6.166679256675052e-06, "loss": 0.2953, "step": 10096 }, { "epoch": 1.447806137080585, "grad_norm": 0.2709158658981323, "learning_rate": 6.1658680256178284e-06, "loss": 0.314, "step": 10097 }, { "epoch": 1.44794952681388, "grad_norm": 0.28912153840065, "learning_rate": 6.165056762104632e-06, "loss": 0.3033, "step": 10098 }, { "epoch": 1.4480929165471752, "grad_norm": 0.29541298747062683, "learning_rate": 6.164245466158048e-06, "loss": 0.3092, "step": 10099 }, { "epoch": 1.4482363062804704, "grad_norm": 0.27491146326065063, "learning_rate": 6.163434137800661e-06, "loss": 0.2936, "step": 10100 }, { "epoch": 1.4483796960137654, "grad_norm": 0.28015366196632385, "learning_rate": 6.162622777055056e-06, "loss": 0.326, "step": 10101 }, { "epoch": 1.4485230857470606, "grad_norm": 0.27719059586524963, "learning_rate": 6.161811383943823e-06, "loss": 0.3017, "step": 10102 }, { "epoch": 1.4486664754803555, "grad_norm": 0.28825536370277405, "learning_rate": 6.160999958489548e-06, "loss": 0.3205, "step": 10103 }, { "epoch": 1.4488098652136507, "grad_norm": 0.25170832872390747, "learning_rate": 6.160188500714819e-06, "loss": 0.3144, "step": 10104 }, { "epoch": 1.4489532549469457, "grad_norm": 0.2589491307735443, "learning_rate": 6.159377010642228e-06, "loss": 0.3035, "step": 10105 }, { "epoch": 1.449096644680241, "grad_norm": 0.27879923582077026, "learning_rate": 6.158565488294363e-06, "loss": 0.2987, "step": 10106 }, { "epoch": 1.449240034413536, "grad_norm": 0.2840350866317749, "learning_rate": 6.1577539336938185e-06, "loss": 0.2836, "step": 10107 }, { "epoch": 1.449383424146831, "grad_norm": 0.29653167724609375, "learning_rate": 6.156942346863186e-06, "loss": 0.3152, "step": 10108 }, { "epoch": 1.449526813880126, "grad_norm": 0.27508050203323364, "learning_rate": 6.156130727825057e-06, "loss": 0.3165, "step": 10109 }, { "epoch": 1.4496702036134212, "grad_norm": 0.2695540487766266, "learning_rate": 6.1553190766020265e-06, "loss": 0.2933, "step": 10110 }, { "epoch": 1.4498135933467164, "grad_norm": 0.2785841226577759, "learning_rate": 6.154507393216691e-06, "loss": 0.3148, "step": 10111 }, { "epoch": 1.4499569830800114, "grad_norm": 0.2746407389640808, "learning_rate": 6.153695677691646e-06, "loss": 0.3131, "step": 10112 }, { "epoch": 1.4501003728133066, "grad_norm": 0.2796590328216553, "learning_rate": 6.152883930049488e-06, "loss": 0.3122, "step": 10113 }, { "epoch": 1.4502437625466016, "grad_norm": 0.2836593985557556, "learning_rate": 6.152072150312814e-06, "loss": 0.2811, "step": 10114 }, { "epoch": 1.4503871522798968, "grad_norm": 0.2664434015750885, "learning_rate": 6.151260338504223e-06, "loss": 0.2924, "step": 10115 }, { "epoch": 1.4505305420131918, "grad_norm": 0.29420405626296997, "learning_rate": 6.150448494646317e-06, "loss": 0.3213, "step": 10116 }, { "epoch": 1.450673931746487, "grad_norm": 0.27529260516166687, "learning_rate": 6.149636618761694e-06, "loss": 0.3078, "step": 10117 }, { "epoch": 1.4508173214797822, "grad_norm": 0.2939074635505676, "learning_rate": 6.148824710872956e-06, "loss": 0.2913, "step": 10118 }, { "epoch": 1.4509607112130771, "grad_norm": 0.2979799211025238, "learning_rate": 6.148012771002705e-06, "loss": 0.297, "step": 10119 }, { "epoch": 1.4511041009463723, "grad_norm": 0.2873917520046234, "learning_rate": 6.147200799173544e-06, "loss": 0.2924, "step": 10120 }, { "epoch": 1.4512474906796673, "grad_norm": 0.28498974442481995, "learning_rate": 6.146388795408078e-06, "loss": 0.3087, "step": 10121 }, { "epoch": 1.4513908804129625, "grad_norm": 0.32359859347343445, "learning_rate": 6.145576759728912e-06, "loss": 0.2934, "step": 10122 }, { "epoch": 1.4515342701462575, "grad_norm": 0.2663693130016327, "learning_rate": 6.14476469215865e-06, "loss": 0.3006, "step": 10123 }, { "epoch": 1.4516776598795527, "grad_norm": 0.29209303855895996, "learning_rate": 6.143952592719902e-06, "loss": 0.2993, "step": 10124 }, { "epoch": 1.4518210496128479, "grad_norm": 0.278568834066391, "learning_rate": 6.1431404614352706e-06, "loss": 0.3356, "step": 10125 }, { "epoch": 1.4519644393461428, "grad_norm": 0.283634752035141, "learning_rate": 6.142328298327369e-06, "loss": 0.3181, "step": 10126 }, { "epoch": 1.4521078290794378, "grad_norm": 0.25888174772262573, "learning_rate": 6.141516103418805e-06, "loss": 0.2998, "step": 10127 }, { "epoch": 1.452251218812733, "grad_norm": 0.28617358207702637, "learning_rate": 6.140703876732188e-06, "loss": 0.3182, "step": 10128 }, { "epoch": 1.4523946085460282, "grad_norm": 0.30308690667152405, "learning_rate": 6.13989161829013e-06, "loss": 0.3166, "step": 10129 }, { "epoch": 1.4525379982793232, "grad_norm": 0.27620163559913635, "learning_rate": 6.139079328115243e-06, "loss": 0.3047, "step": 10130 }, { "epoch": 1.4526813880126184, "grad_norm": 0.2907574474811554, "learning_rate": 6.13826700623014e-06, "loss": 0.3037, "step": 10131 }, { "epoch": 1.4528247777459133, "grad_norm": 0.27037355303764343, "learning_rate": 6.137454652657434e-06, "loss": 0.3068, "step": 10132 }, { "epoch": 1.4529681674792085, "grad_norm": 0.29064780473709106, "learning_rate": 6.13664226741974e-06, "loss": 0.3079, "step": 10133 }, { "epoch": 1.4531115572125035, "grad_norm": 0.2984747290611267, "learning_rate": 6.135829850539674e-06, "loss": 0.2996, "step": 10134 }, { "epoch": 1.4532549469457987, "grad_norm": 0.28802311420440674, "learning_rate": 6.135017402039854e-06, "loss": 0.3084, "step": 10135 }, { "epoch": 1.453398336679094, "grad_norm": 0.3029254972934723, "learning_rate": 6.1342049219428925e-06, "loss": 0.3018, "step": 10136 }, { "epoch": 1.4535417264123889, "grad_norm": 0.3114657998085022, "learning_rate": 6.133392410271411e-06, "loss": 0.3101, "step": 10137 }, { "epoch": 1.4536851161456839, "grad_norm": 0.2919487953186035, "learning_rate": 6.13257986704803e-06, "loss": 0.2924, "step": 10138 }, { "epoch": 1.453828505878979, "grad_norm": 0.3017646372318268, "learning_rate": 6.131767292295366e-06, "loss": 0.3238, "step": 10139 }, { "epoch": 1.4539718956122742, "grad_norm": 0.2726421058177948, "learning_rate": 6.130954686036043e-06, "loss": 0.313, "step": 10140 }, { "epoch": 1.4541152853455692, "grad_norm": 0.27065348625183105, "learning_rate": 6.130142048292682e-06, "loss": 0.3129, "step": 10141 }, { "epoch": 1.4542586750788644, "grad_norm": 0.2937443256378174, "learning_rate": 6.129329379087902e-06, "loss": 0.3291, "step": 10142 }, { "epoch": 1.4544020648121594, "grad_norm": 0.29719653725624084, "learning_rate": 6.128516678444333e-06, "loss": 0.3075, "step": 10143 }, { "epoch": 1.4545454545454546, "grad_norm": 0.30646437406539917, "learning_rate": 6.127703946384593e-06, "loss": 0.3058, "step": 10144 }, { "epoch": 1.4546888442787496, "grad_norm": 0.27865898609161377, "learning_rate": 6.12689118293131e-06, "loss": 0.2896, "step": 10145 }, { "epoch": 1.4548322340120448, "grad_norm": 0.29220640659332275, "learning_rate": 6.126078388107112e-06, "loss": 0.3118, "step": 10146 }, { "epoch": 1.45497562374534, "grad_norm": 0.30017513036727905, "learning_rate": 6.125265561934622e-06, "loss": 0.29, "step": 10147 }, { "epoch": 1.455119013478635, "grad_norm": 0.29740509390830994, "learning_rate": 6.12445270443647e-06, "loss": 0.3157, "step": 10148 }, { "epoch": 1.45526240321193, "grad_norm": 0.26375073194503784, "learning_rate": 6.123639815635285e-06, "loss": 0.3093, "step": 10149 }, { "epoch": 1.455405792945225, "grad_norm": 0.28107911348342896, "learning_rate": 6.122826895553695e-06, "loss": 0.3086, "step": 10150 }, { "epoch": 1.4555491826785203, "grad_norm": 0.33235904574394226, "learning_rate": 6.122013944214332e-06, "loss": 0.2964, "step": 10151 }, { "epoch": 1.4556925724118153, "grad_norm": 0.2923640310764313, "learning_rate": 6.121200961639827e-06, "loss": 0.3017, "step": 10152 }, { "epoch": 1.4558359621451105, "grad_norm": 0.28339773416519165, "learning_rate": 6.120387947852812e-06, "loss": 0.3081, "step": 10153 }, { "epoch": 1.4559793518784054, "grad_norm": 0.30608996748924255, "learning_rate": 6.11957490287592e-06, "loss": 0.3035, "step": 10154 }, { "epoch": 1.4561227416117006, "grad_norm": 0.3396325707435608, "learning_rate": 6.118761826731785e-06, "loss": 0.3169, "step": 10155 }, { "epoch": 1.4562661313449956, "grad_norm": 0.2807021737098694, "learning_rate": 6.117948719443041e-06, "loss": 0.2961, "step": 10156 }, { "epoch": 1.4564095210782908, "grad_norm": 0.27059948444366455, "learning_rate": 6.117135581032325e-06, "loss": 0.3229, "step": 10157 }, { "epoch": 1.456552910811586, "grad_norm": 0.2938852310180664, "learning_rate": 6.1163224115222715e-06, "loss": 0.298, "step": 10158 }, { "epoch": 1.456696300544881, "grad_norm": 0.3127705156803131, "learning_rate": 6.115509210935522e-06, "loss": 0.3058, "step": 10159 }, { "epoch": 1.456839690278176, "grad_norm": 0.29991084337234497, "learning_rate": 6.11469597929471e-06, "loss": 0.2956, "step": 10160 }, { "epoch": 1.4569830800114711, "grad_norm": 0.28272920846939087, "learning_rate": 6.1138827166224765e-06, "loss": 0.299, "step": 10161 }, { "epoch": 1.4571264697447663, "grad_norm": 0.31393465399742126, "learning_rate": 6.113069422941465e-06, "loss": 0.3174, "step": 10162 }, { "epoch": 1.4572698594780613, "grad_norm": 0.2920064330101013, "learning_rate": 6.11225609827431e-06, "loss": 0.2987, "step": 10163 }, { "epoch": 1.4574132492113565, "grad_norm": 0.27359461784362793, "learning_rate": 6.111442742643655e-06, "loss": 0.2898, "step": 10164 }, { "epoch": 1.4575566389446515, "grad_norm": 0.2961874008178711, "learning_rate": 6.110629356072145e-06, "loss": 0.2989, "step": 10165 }, { "epoch": 1.4577000286779467, "grad_norm": 0.30637326836586, "learning_rate": 6.109815938582423e-06, "loss": 0.3143, "step": 10166 }, { "epoch": 1.4578434184112417, "grad_norm": 0.3147766888141632, "learning_rate": 6.109002490197132e-06, "loss": 0.3179, "step": 10167 }, { "epoch": 1.4579868081445368, "grad_norm": 0.2882409393787384, "learning_rate": 6.108189010938917e-06, "loss": 0.3049, "step": 10168 }, { "epoch": 1.458130197877832, "grad_norm": 0.3240111172199249, "learning_rate": 6.107375500830426e-06, "loss": 0.3097, "step": 10169 }, { "epoch": 1.458273587611127, "grad_norm": 0.29384034872055054, "learning_rate": 6.106561959894305e-06, "loss": 0.291, "step": 10170 }, { "epoch": 1.4584169773444222, "grad_norm": 0.3046671152114868, "learning_rate": 6.1057483881532e-06, "loss": 0.3164, "step": 10171 }, { "epoch": 1.4585603670777172, "grad_norm": 0.28367090225219727, "learning_rate": 6.104934785629761e-06, "loss": 0.2971, "step": 10172 }, { "epoch": 1.4587037568110124, "grad_norm": 0.32139328122138977, "learning_rate": 6.1041211523466374e-06, "loss": 0.3256, "step": 10173 }, { "epoch": 1.4588471465443074, "grad_norm": 0.3079017996788025, "learning_rate": 6.103307488326478e-06, "loss": 0.2979, "step": 10174 }, { "epoch": 1.4589905362776026, "grad_norm": 0.27987855672836304, "learning_rate": 6.102493793591937e-06, "loss": 0.3223, "step": 10175 }, { "epoch": 1.4591339260108978, "grad_norm": 0.2899288237094879, "learning_rate": 6.101680068165664e-06, "loss": 0.3257, "step": 10176 }, { "epoch": 1.4592773157441927, "grad_norm": 0.2977556884288788, "learning_rate": 6.100866312070312e-06, "loss": 0.2979, "step": 10177 }, { "epoch": 1.4594207054774877, "grad_norm": 0.2958015501499176, "learning_rate": 6.100052525328537e-06, "loss": 0.3019, "step": 10178 }, { "epoch": 1.459564095210783, "grad_norm": 0.30635616183280945, "learning_rate": 6.099238707962991e-06, "loss": 0.2976, "step": 10179 }, { "epoch": 1.459707484944078, "grad_norm": 0.28403913974761963, "learning_rate": 6.0984248599963305e-06, "loss": 0.2998, "step": 10180 }, { "epoch": 1.459850874677373, "grad_norm": 0.2884121239185333, "learning_rate": 6.097610981451212e-06, "loss": 0.3159, "step": 10181 }, { "epoch": 1.4599942644106683, "grad_norm": 0.32379603385925293, "learning_rate": 6.0967970723502915e-06, "loss": 0.321, "step": 10182 }, { "epoch": 1.4601376541439632, "grad_norm": 0.26746612787246704, "learning_rate": 6.095983132716228e-06, "loss": 0.3201, "step": 10183 }, { "epoch": 1.4602810438772584, "grad_norm": 0.3037252724170685, "learning_rate": 6.095169162571679e-06, "loss": 0.3166, "step": 10184 }, { "epoch": 1.4604244336105534, "grad_norm": 0.30386340618133545, "learning_rate": 6.094355161939306e-06, "loss": 0.2958, "step": 10185 }, { "epoch": 1.4605678233438486, "grad_norm": 0.2983323931694031, "learning_rate": 6.093541130841769e-06, "loss": 0.2986, "step": 10186 }, { "epoch": 1.4607112130771438, "grad_norm": 0.2924639880657196, "learning_rate": 6.092727069301729e-06, "loss": 0.3274, "step": 10187 }, { "epoch": 1.4608546028104388, "grad_norm": 0.29206234216690063, "learning_rate": 6.091912977341848e-06, "loss": 0.3121, "step": 10188 }, { "epoch": 1.4609979925437337, "grad_norm": 0.2822849154472351, "learning_rate": 6.0910988549847904e-06, "loss": 0.3062, "step": 10189 }, { "epoch": 1.461141382277029, "grad_norm": 0.2836343050003052, "learning_rate": 6.090284702253218e-06, "loss": 0.3103, "step": 10190 }, { "epoch": 1.4612847720103241, "grad_norm": 0.28089725971221924, "learning_rate": 6.0894705191697964e-06, "loss": 0.2939, "step": 10191 }, { "epoch": 1.4614281617436191, "grad_norm": 0.29392486810684204, "learning_rate": 6.0886563057571924e-06, "loss": 0.3202, "step": 10192 }, { "epoch": 1.4615715514769143, "grad_norm": 0.2684233486652374, "learning_rate": 6.087842062038071e-06, "loss": 0.2991, "step": 10193 }, { "epoch": 1.4617149412102093, "grad_norm": 0.27588725090026855, "learning_rate": 6.0870277880351e-06, "loss": 0.2945, "step": 10194 }, { "epoch": 1.4618583309435045, "grad_norm": 0.3083708584308624, "learning_rate": 6.086213483770948e-06, "loss": 0.3044, "step": 10195 }, { "epoch": 1.4620017206767995, "grad_norm": 0.28789222240448, "learning_rate": 6.085399149268283e-06, "loss": 0.3175, "step": 10196 }, { "epoch": 1.4621451104100947, "grad_norm": 0.28425535559654236, "learning_rate": 6.084584784549776e-06, "loss": 0.3058, "step": 10197 }, { "epoch": 1.4622885001433898, "grad_norm": 0.2603021264076233, "learning_rate": 6.083770389638096e-06, "loss": 0.2853, "step": 10198 }, { "epoch": 1.4624318898766848, "grad_norm": 0.28096240758895874, "learning_rate": 6.082955964555917e-06, "loss": 0.303, "step": 10199 }, { "epoch": 1.4625752796099798, "grad_norm": 0.2705037593841553, "learning_rate": 6.08214150932591e-06, "loss": 0.3292, "step": 10200 }, { "epoch": 1.462718669343275, "grad_norm": 0.29183390736579895, "learning_rate": 6.0813270239707455e-06, "loss": 0.3123, "step": 10201 }, { "epoch": 1.4628620590765702, "grad_norm": 0.286003977060318, "learning_rate": 6.080512508513101e-06, "loss": 0.3198, "step": 10202 }, { "epoch": 1.4630054488098652, "grad_norm": 0.28593093156814575, "learning_rate": 6.079697962975651e-06, "loss": 0.3057, "step": 10203 }, { "epoch": 1.4631488385431604, "grad_norm": 0.2858513295650482, "learning_rate": 6.07888338738107e-06, "loss": 0.3049, "step": 10204 }, { "epoch": 1.4632922282764553, "grad_norm": 0.2837754786014557, "learning_rate": 6.078068781752037e-06, "loss": 0.3043, "step": 10205 }, { "epoch": 1.4634356180097505, "grad_norm": 0.26947811245918274, "learning_rate": 6.077254146111225e-06, "loss": 0.3119, "step": 10206 }, { "epoch": 1.4635790077430455, "grad_norm": 0.2811641991138458, "learning_rate": 6.076439480481316e-06, "loss": 0.3196, "step": 10207 }, { "epoch": 1.4637223974763407, "grad_norm": 0.29085126519203186, "learning_rate": 6.075624784884989e-06, "loss": 0.32, "step": 10208 }, { "epoch": 1.463865787209636, "grad_norm": 0.28969019651412964, "learning_rate": 6.074810059344921e-06, "loss": 0.3074, "step": 10209 }, { "epoch": 1.4640091769429309, "grad_norm": 0.277334988117218, "learning_rate": 6.073995303883794e-06, "loss": 0.314, "step": 10210 }, { "epoch": 1.464152566676226, "grad_norm": 0.286762535572052, "learning_rate": 6.07318051852429e-06, "loss": 0.3026, "step": 10211 }, { "epoch": 1.464295956409521, "grad_norm": 0.2711355984210968, "learning_rate": 6.072365703289092e-06, "loss": 0.298, "step": 10212 }, { "epoch": 1.4644393461428162, "grad_norm": 0.2959134578704834, "learning_rate": 6.071550858200882e-06, "loss": 0.3025, "step": 10213 }, { "epoch": 1.4645827358761112, "grad_norm": 0.27995726466178894, "learning_rate": 6.070735983282344e-06, "loss": 0.3129, "step": 10214 }, { "epoch": 1.4647261256094064, "grad_norm": 0.2849302887916565, "learning_rate": 6.069921078556163e-06, "loss": 0.3232, "step": 10215 }, { "epoch": 1.4648695153427016, "grad_norm": 0.29760152101516724, "learning_rate": 6.069106144045027e-06, "loss": 0.314, "step": 10216 }, { "epoch": 1.4650129050759966, "grad_norm": 0.2985958755016327, "learning_rate": 6.068291179771619e-06, "loss": 0.3216, "step": 10217 }, { "epoch": 1.4651562948092915, "grad_norm": 0.2846147418022156, "learning_rate": 6.067476185758629e-06, "loss": 0.3075, "step": 10218 }, { "epoch": 1.4652996845425867, "grad_norm": 0.2661682665348053, "learning_rate": 6.066661162028742e-06, "loss": 0.2857, "step": 10219 }, { "epoch": 1.465443074275882, "grad_norm": 0.2778984606266022, "learning_rate": 6.065846108604651e-06, "loss": 0.2859, "step": 10220 }, { "epoch": 1.465586464009177, "grad_norm": 0.29724371433258057, "learning_rate": 6.065031025509044e-06, "loss": 0.2946, "step": 10221 }, { "epoch": 1.465729853742472, "grad_norm": 0.2726490795612335, "learning_rate": 6.06421591276461e-06, "loss": 0.2937, "step": 10222 }, { "epoch": 1.465873243475767, "grad_norm": 0.28241342306137085, "learning_rate": 6.063400770394043e-06, "loss": 0.3002, "step": 10223 }, { "epoch": 1.4660166332090623, "grad_norm": 0.28049492835998535, "learning_rate": 6.062585598420036e-06, "loss": 0.3299, "step": 10224 }, { "epoch": 1.4661600229423573, "grad_norm": 0.2973136007785797, "learning_rate": 6.061770396865277e-06, "loss": 0.3296, "step": 10225 }, { "epoch": 1.4663034126756525, "grad_norm": 0.25886425375938416, "learning_rate": 6.060955165752466e-06, "loss": 0.2844, "step": 10226 }, { "epoch": 1.4664468024089476, "grad_norm": 0.29132720828056335, "learning_rate": 6.060139905104295e-06, "loss": 0.3238, "step": 10227 }, { "epoch": 1.4665901921422426, "grad_norm": 0.2810533344745636, "learning_rate": 6.0593246149434595e-06, "loss": 0.3049, "step": 10228 }, { "epoch": 1.4667335818755376, "grad_norm": 0.2737239897251129, "learning_rate": 6.058509295292655e-06, "loss": 0.301, "step": 10229 }, { "epoch": 1.4668769716088328, "grad_norm": 0.28223443031311035, "learning_rate": 6.057693946174581e-06, "loss": 0.2895, "step": 10230 }, { "epoch": 1.467020361342128, "grad_norm": 0.292929470539093, "learning_rate": 6.056878567611935e-06, "loss": 0.3165, "step": 10231 }, { "epoch": 1.467163751075423, "grad_norm": 0.27775678038597107, "learning_rate": 6.056063159627415e-06, "loss": 0.3013, "step": 10232 }, { "epoch": 1.4673071408087182, "grad_norm": 0.2790108919143677, "learning_rate": 6.055247722243722e-06, "loss": 0.3115, "step": 10233 }, { "epoch": 1.4674505305420131, "grad_norm": 0.2829984724521637, "learning_rate": 6.054432255483555e-06, "loss": 0.3266, "step": 10234 }, { "epoch": 1.4675939202753083, "grad_norm": 0.3009800314903259, "learning_rate": 6.053616759369616e-06, "loss": 0.3061, "step": 10235 }, { "epoch": 1.4677373100086033, "grad_norm": 0.28055399656295776, "learning_rate": 6.052801233924608e-06, "loss": 0.3053, "step": 10236 }, { "epoch": 1.4678806997418985, "grad_norm": 0.26914912462234497, "learning_rate": 6.051985679171232e-06, "loss": 0.2966, "step": 10237 }, { "epoch": 1.4680240894751937, "grad_norm": 0.2971605956554413, "learning_rate": 6.0511700951321924e-06, "loss": 0.3118, "step": 10238 }, { "epoch": 1.4681674792084887, "grad_norm": 0.2911428213119507, "learning_rate": 6.050354481830195e-06, "loss": 0.3129, "step": 10239 }, { "epoch": 1.4683108689417836, "grad_norm": 0.3286452889442444, "learning_rate": 6.0495388392879465e-06, "loss": 0.3228, "step": 10240 }, { "epoch": 1.4684542586750788, "grad_norm": 0.2938348352909088, "learning_rate": 6.048723167528149e-06, "loss": 0.2991, "step": 10241 }, { "epoch": 1.468597648408374, "grad_norm": 0.31170716881752014, "learning_rate": 6.0479074665735115e-06, "loss": 0.2963, "step": 10242 }, { "epoch": 1.468741038141669, "grad_norm": 0.2721485495567322, "learning_rate": 6.047091736446744e-06, "loss": 0.3004, "step": 10243 }, { "epoch": 1.4688844278749642, "grad_norm": 0.3106519281864166, "learning_rate": 6.046275977170552e-06, "loss": 0.3032, "step": 10244 }, { "epoch": 1.4690278176082592, "grad_norm": 0.30690455436706543, "learning_rate": 6.045460188767648e-06, "loss": 0.3042, "step": 10245 }, { "epoch": 1.4691712073415544, "grad_norm": 0.27082329988479614, "learning_rate": 6.044644371260739e-06, "loss": 0.2856, "step": 10246 }, { "epoch": 1.4693145970748493, "grad_norm": 0.2921239137649536, "learning_rate": 6.043828524672536e-06, "loss": 0.3288, "step": 10247 }, { "epoch": 1.4694579868081445, "grad_norm": 0.2742648720741272, "learning_rate": 6.043012649025755e-06, "loss": 0.3105, "step": 10248 }, { "epoch": 1.4696013765414397, "grad_norm": 0.32121092081069946, "learning_rate": 6.0421967443431055e-06, "loss": 0.3025, "step": 10249 }, { "epoch": 1.4697447662747347, "grad_norm": 0.2950989007949829, "learning_rate": 6.041380810647302e-06, "loss": 0.3258, "step": 10250 }, { "epoch": 1.4698881560080297, "grad_norm": 0.27440017461776733, "learning_rate": 6.040564847961059e-06, "loss": 0.2921, "step": 10251 }, { "epoch": 1.4700315457413249, "grad_norm": 0.31941959261894226, "learning_rate": 6.03974885630709e-06, "loss": 0.2952, "step": 10252 }, { "epoch": 1.47017493547462, "grad_norm": 0.2988276481628418, "learning_rate": 6.038932835708113e-06, "loss": 0.2852, "step": 10253 }, { "epoch": 1.470318325207915, "grad_norm": 0.2781754732131958, "learning_rate": 6.038116786186845e-06, "loss": 0.2972, "step": 10254 }, { "epoch": 1.4704617149412103, "grad_norm": 0.3353421092033386, "learning_rate": 6.0373007077660014e-06, "loss": 0.3258, "step": 10255 }, { "epoch": 1.4706051046745052, "grad_norm": 0.2915118932723999, "learning_rate": 6.036484600468303e-06, "loss": 0.3092, "step": 10256 }, { "epoch": 1.4707484944078004, "grad_norm": 0.2891678810119629, "learning_rate": 6.035668464316466e-06, "loss": 0.2878, "step": 10257 }, { "epoch": 1.4708918841410954, "grad_norm": 0.301624059677124, "learning_rate": 6.034852299333214e-06, "loss": 0.3202, "step": 10258 }, { "epoch": 1.4710352738743906, "grad_norm": 0.297436386346817, "learning_rate": 6.034036105541265e-06, "loss": 0.2986, "step": 10259 }, { "epoch": 1.4711786636076858, "grad_norm": 0.28205606341362, "learning_rate": 6.033219882963342e-06, "loss": 0.3027, "step": 10260 }, { "epoch": 1.4713220533409808, "grad_norm": 0.3077172338962555, "learning_rate": 6.032403631622165e-06, "loss": 0.3311, "step": 10261 }, { "epoch": 1.471465443074276, "grad_norm": 0.30094069242477417, "learning_rate": 6.031587351540461e-06, "loss": 0.305, "step": 10262 }, { "epoch": 1.471608832807571, "grad_norm": 0.2968714237213135, "learning_rate": 6.030771042740952e-06, "loss": 0.306, "step": 10263 }, { "epoch": 1.4717522225408661, "grad_norm": 0.2782132029533386, "learning_rate": 6.0299547052463614e-06, "loss": 0.3086, "step": 10264 }, { "epoch": 1.471895612274161, "grad_norm": 0.2749565541744232, "learning_rate": 6.029138339079417e-06, "loss": 0.3088, "step": 10265 }, { "epoch": 1.4720390020074563, "grad_norm": 0.29389578104019165, "learning_rate": 6.028321944262842e-06, "loss": 0.3044, "step": 10266 }, { "epoch": 1.4721823917407515, "grad_norm": 0.31069934368133545, "learning_rate": 6.02750552081937e-06, "loss": 0.3097, "step": 10267 }, { "epoch": 1.4723257814740465, "grad_norm": 0.2662378251552582, "learning_rate": 6.026689068771722e-06, "loss": 0.3147, "step": 10268 }, { "epoch": 1.4724691712073414, "grad_norm": 0.28365975618362427, "learning_rate": 6.025872588142631e-06, "loss": 0.3173, "step": 10269 }, { "epoch": 1.4726125609406366, "grad_norm": 0.28911292552948, "learning_rate": 6.025056078954827e-06, "loss": 0.3057, "step": 10270 }, { "epoch": 1.4727559506739318, "grad_norm": 0.30938881635665894, "learning_rate": 6.024239541231036e-06, "loss": 0.3269, "step": 10271 }, { "epoch": 1.4728993404072268, "grad_norm": 0.2975444793701172, "learning_rate": 6.0234229749939935e-06, "loss": 0.3231, "step": 10272 }, { "epoch": 1.473042730140522, "grad_norm": 0.2994506061077118, "learning_rate": 6.022606380266427e-06, "loss": 0.2991, "step": 10273 }, { "epoch": 1.473186119873817, "grad_norm": 0.2996737062931061, "learning_rate": 6.021789757071073e-06, "loss": 0.3063, "step": 10274 }, { "epoch": 1.4733295096071122, "grad_norm": 0.28416895866394043, "learning_rate": 6.020973105430665e-06, "loss": 0.3099, "step": 10275 }, { "epoch": 1.4734728993404071, "grad_norm": 0.261669397354126, "learning_rate": 6.020156425367936e-06, "loss": 0.3165, "step": 10276 }, { "epoch": 1.4736162890737023, "grad_norm": 0.2770110070705414, "learning_rate": 6.01933971690562e-06, "loss": 0.314, "step": 10277 }, { "epoch": 1.4737596788069975, "grad_norm": 0.28476494550704956, "learning_rate": 6.0185229800664565e-06, "loss": 0.2959, "step": 10278 }, { "epoch": 1.4739030685402925, "grad_norm": 0.2984701693058014, "learning_rate": 6.017706214873178e-06, "loss": 0.2914, "step": 10279 }, { "epoch": 1.4740464582735875, "grad_norm": 0.29796352982521057, "learning_rate": 6.016889421348525e-06, "loss": 0.3178, "step": 10280 }, { "epoch": 1.4741898480068827, "grad_norm": 0.28463295102119446, "learning_rate": 6.016072599515236e-06, "loss": 0.2799, "step": 10281 }, { "epoch": 1.4743332377401779, "grad_norm": 0.2642899751663208, "learning_rate": 6.015255749396047e-06, "loss": 0.3104, "step": 10282 }, { "epoch": 1.4744766274734729, "grad_norm": 0.29017356038093567, "learning_rate": 6.0144388710137e-06, "loss": 0.3095, "step": 10283 }, { "epoch": 1.474620017206768, "grad_norm": 0.30618545413017273, "learning_rate": 6.013621964390935e-06, "loss": 0.3446, "step": 10284 }, { "epoch": 1.474763406940063, "grad_norm": 0.26711955666542053, "learning_rate": 6.012805029550495e-06, "loss": 0.3184, "step": 10285 }, { "epoch": 1.4749067966733582, "grad_norm": 0.26339495182037354, "learning_rate": 6.011988066515121e-06, "loss": 0.2839, "step": 10286 }, { "epoch": 1.4750501864066532, "grad_norm": 0.2845524847507477, "learning_rate": 6.011171075307555e-06, "loss": 0.3078, "step": 10287 }, { "epoch": 1.4751935761399484, "grad_norm": 0.30139443278312683, "learning_rate": 6.010354055950541e-06, "loss": 0.303, "step": 10288 }, { "epoch": 1.4753369658732436, "grad_norm": 0.2729882001876831, "learning_rate": 6.009537008466827e-06, "loss": 0.3118, "step": 10289 }, { "epoch": 1.4754803556065386, "grad_norm": 0.28481152653694153, "learning_rate": 6.008719932879154e-06, "loss": 0.311, "step": 10290 }, { "epoch": 1.4756237453398335, "grad_norm": 0.3108441233634949, "learning_rate": 6.0079028292102705e-06, "loss": 0.2939, "step": 10291 }, { "epoch": 1.4757671350731287, "grad_norm": 0.2868598997592926, "learning_rate": 6.0070856974829195e-06, "loss": 0.2988, "step": 10292 }, { "epoch": 1.475910524806424, "grad_norm": 0.27657780051231384, "learning_rate": 6.0062685377198544e-06, "loss": 0.2939, "step": 10293 }, { "epoch": 1.476053914539719, "grad_norm": 0.28787752985954285, "learning_rate": 6.005451349943822e-06, "loss": 0.2847, "step": 10294 }, { "epoch": 1.476197304273014, "grad_norm": 0.2899489104747772, "learning_rate": 6.004634134177569e-06, "loss": 0.329, "step": 10295 }, { "epoch": 1.476340694006309, "grad_norm": 0.2699485719203949, "learning_rate": 6.003816890443848e-06, "loss": 0.2997, "step": 10296 }, { "epoch": 1.4764840837396043, "grad_norm": 0.2785831391811371, "learning_rate": 6.0029996187654095e-06, "loss": 0.303, "step": 10297 }, { "epoch": 1.4766274734728992, "grad_norm": 0.2845688462257385, "learning_rate": 6.002182319165003e-06, "loss": 0.2938, "step": 10298 }, { "epoch": 1.4767708632061944, "grad_norm": 0.2908737361431122, "learning_rate": 6.001364991665384e-06, "loss": 0.2911, "step": 10299 }, { "epoch": 1.4769142529394896, "grad_norm": 0.3049168288707733, "learning_rate": 6.000547636289303e-06, "loss": 0.3054, "step": 10300 }, { "epoch": 1.4770576426727846, "grad_norm": 0.2874380946159363, "learning_rate": 5.999730253059515e-06, "loss": 0.3362, "step": 10301 }, { "epoch": 1.4772010324060798, "grad_norm": 0.2874349355697632, "learning_rate": 5.998912841998774e-06, "loss": 0.3085, "step": 10302 }, { "epoch": 1.4773444221393748, "grad_norm": 0.29803696274757385, "learning_rate": 5.998095403129836e-06, "loss": 0.3089, "step": 10303 }, { "epoch": 1.47748781187267, "grad_norm": 0.2690209746360779, "learning_rate": 5.997277936475459e-06, "loss": 0.2928, "step": 10304 }, { "epoch": 1.477631201605965, "grad_norm": 0.28000208735466003, "learning_rate": 5.996460442058398e-06, "loss": 0.2978, "step": 10305 }, { "epoch": 1.4777745913392601, "grad_norm": 0.29361259937286377, "learning_rate": 5.995642919901411e-06, "loss": 0.3086, "step": 10306 }, { "epoch": 1.4779179810725553, "grad_norm": 0.28464335203170776, "learning_rate": 5.994825370027255e-06, "loss": 0.2793, "step": 10307 }, { "epoch": 1.4780613708058503, "grad_norm": 0.2979539632797241, "learning_rate": 5.994007792458694e-06, "loss": 0.3009, "step": 10308 }, { "epoch": 1.4782047605391453, "grad_norm": 0.27143824100494385, "learning_rate": 5.9931901872184826e-06, "loss": 0.309, "step": 10309 }, { "epoch": 1.4783481502724405, "grad_norm": 0.2924184501171112, "learning_rate": 5.9923725543293855e-06, "loss": 0.31, "step": 10310 }, { "epoch": 1.4784915400057357, "grad_norm": 0.2795676290988922, "learning_rate": 5.991554893814162e-06, "loss": 0.2881, "step": 10311 }, { "epoch": 1.4786349297390307, "grad_norm": 0.27941349148750305, "learning_rate": 5.990737205695576e-06, "loss": 0.3067, "step": 10312 }, { "epoch": 1.4787783194723259, "grad_norm": 0.2764676511287689, "learning_rate": 5.989919489996392e-06, "loss": 0.3006, "step": 10313 }, { "epoch": 1.4789217092056208, "grad_norm": 0.26937565207481384, "learning_rate": 5.98910174673937e-06, "loss": 0.3054, "step": 10314 }, { "epoch": 1.479065098938916, "grad_norm": 0.28074344992637634, "learning_rate": 5.9882839759472775e-06, "loss": 0.3064, "step": 10315 }, { "epoch": 1.479208488672211, "grad_norm": 0.2899446487426758, "learning_rate": 5.98746617764288e-06, "loss": 0.2918, "step": 10316 }, { "epoch": 1.4793518784055062, "grad_norm": 0.2751944959163666, "learning_rate": 5.986648351848942e-06, "loss": 0.3074, "step": 10317 }, { "epoch": 1.4794952681388014, "grad_norm": 0.2808387875556946, "learning_rate": 5.985830498588233e-06, "loss": 0.3165, "step": 10318 }, { "epoch": 1.4796386578720964, "grad_norm": 0.3236287534236908, "learning_rate": 5.985012617883519e-06, "loss": 0.3189, "step": 10319 }, { "epoch": 1.4797820476053913, "grad_norm": 0.2802340090274811, "learning_rate": 5.984194709757567e-06, "loss": 0.3007, "step": 10320 }, { "epoch": 1.4799254373386865, "grad_norm": 0.2836288511753082, "learning_rate": 5.983376774233149e-06, "loss": 0.3222, "step": 10321 }, { "epoch": 1.4800688270719817, "grad_norm": 0.2982088029384613, "learning_rate": 5.9825588113330355e-06, "loss": 0.3168, "step": 10322 }, { "epoch": 1.4802122168052767, "grad_norm": 0.2888930141925812, "learning_rate": 5.981740821079995e-06, "loss": 0.3067, "step": 10323 }, { "epoch": 1.480355606538572, "grad_norm": 0.2797313630580902, "learning_rate": 5.980922803496802e-06, "loss": 0.3073, "step": 10324 }, { "epoch": 1.4804989962718669, "grad_norm": 0.2888546288013458, "learning_rate": 5.980104758606227e-06, "loss": 0.3231, "step": 10325 }, { "epoch": 1.480642386005162, "grad_norm": 0.2677598297595978, "learning_rate": 5.979286686431043e-06, "loss": 0.2956, "step": 10326 }, { "epoch": 1.480785775738457, "grad_norm": 0.281417578458786, "learning_rate": 5.978468586994023e-06, "loss": 0.3043, "step": 10327 }, { "epoch": 1.4809291654717522, "grad_norm": 0.29763472080230713, "learning_rate": 5.977650460317942e-06, "loss": 0.3177, "step": 10328 }, { "epoch": 1.4810725552050474, "grad_norm": 0.2826950252056122, "learning_rate": 5.9768323064255775e-06, "loss": 0.3083, "step": 10329 }, { "epoch": 1.4812159449383424, "grad_norm": 0.2877267301082611, "learning_rate": 5.976014125339703e-06, "loss": 0.2997, "step": 10330 }, { "epoch": 1.4813593346716374, "grad_norm": 0.27320483326911926, "learning_rate": 5.975195917083098e-06, "loss": 0.3076, "step": 10331 }, { "epoch": 1.4815027244049326, "grad_norm": 0.2752592861652374, "learning_rate": 5.974377681678538e-06, "loss": 0.2839, "step": 10332 }, { "epoch": 1.4816461141382278, "grad_norm": 0.2660152018070221, "learning_rate": 5.9735594191488035e-06, "loss": 0.3059, "step": 10333 }, { "epoch": 1.4817895038715228, "grad_norm": 0.3217853009700775, "learning_rate": 5.972741129516671e-06, "loss": 0.3256, "step": 10334 }, { "epoch": 1.481932893604818, "grad_norm": 0.2798680067062378, "learning_rate": 5.971922812804922e-06, "loss": 0.3325, "step": 10335 }, { "epoch": 1.482076283338113, "grad_norm": 0.2831534743309021, "learning_rate": 5.971104469036337e-06, "loss": 0.2946, "step": 10336 }, { "epoch": 1.4822196730714081, "grad_norm": 0.3093545436859131, "learning_rate": 5.9702860982336985e-06, "loss": 0.314, "step": 10337 }, { "epoch": 1.482363062804703, "grad_norm": 0.27997609972953796, "learning_rate": 5.9694677004197865e-06, "loss": 0.2918, "step": 10338 }, { "epoch": 1.4825064525379983, "grad_norm": 0.2851056456565857, "learning_rate": 5.9686492756173846e-06, "loss": 0.3234, "step": 10339 }, { "epoch": 1.4826498422712935, "grad_norm": 0.30133622884750366, "learning_rate": 5.967830823849279e-06, "loss": 0.3194, "step": 10340 }, { "epoch": 1.4827932320045885, "grad_norm": 0.3090994358062744, "learning_rate": 5.96701234513825e-06, "loss": 0.308, "step": 10341 }, { "epoch": 1.4829366217378834, "grad_norm": 0.3095860183238983, "learning_rate": 5.966193839507085e-06, "loss": 0.3024, "step": 10342 }, { "epoch": 1.4830800114711786, "grad_norm": 0.2720130980014801, "learning_rate": 5.965375306978572e-06, "loss": 0.297, "step": 10343 }, { "epoch": 1.4832234012044738, "grad_norm": 0.3013571798801422, "learning_rate": 5.964556747575493e-06, "loss": 0.3102, "step": 10344 }, { "epoch": 1.4833667909377688, "grad_norm": 0.3113076090812683, "learning_rate": 5.963738161320639e-06, "loss": 0.3289, "step": 10345 }, { "epoch": 1.483510180671064, "grad_norm": 0.2946680784225464, "learning_rate": 5.962919548236798e-06, "loss": 0.3098, "step": 10346 }, { "epoch": 1.483653570404359, "grad_norm": 0.2713093161582947, "learning_rate": 5.962100908346755e-06, "loss": 0.311, "step": 10347 }, { "epoch": 1.4837969601376542, "grad_norm": 0.2743735909461975, "learning_rate": 5.961282241673305e-06, "loss": 0.3054, "step": 10348 }, { "epoch": 1.4839403498709491, "grad_norm": 0.29848548769950867, "learning_rate": 5.9604635482392346e-06, "loss": 0.2952, "step": 10349 }, { "epoch": 1.4840837396042443, "grad_norm": 0.27816852927207947, "learning_rate": 5.959644828067338e-06, "loss": 0.3187, "step": 10350 }, { "epoch": 1.4842271293375395, "grad_norm": 0.2741817831993103, "learning_rate": 5.958826081180406e-06, "loss": 0.2796, "step": 10351 }, { "epoch": 1.4843705190708345, "grad_norm": 0.3022693991661072, "learning_rate": 5.95800730760123e-06, "loss": 0.3108, "step": 10352 }, { "epoch": 1.4845139088041297, "grad_norm": 0.2796775698661804, "learning_rate": 5.957188507352605e-06, "loss": 0.311, "step": 10353 }, { "epoch": 1.4846572985374247, "grad_norm": 0.2872070074081421, "learning_rate": 5.956369680457324e-06, "loss": 0.3102, "step": 10354 }, { "epoch": 1.4848006882707199, "grad_norm": 0.28737154603004456, "learning_rate": 5.955550826938181e-06, "loss": 0.3066, "step": 10355 }, { "epoch": 1.4849440780040148, "grad_norm": 0.28151440620422363, "learning_rate": 5.954731946817973e-06, "loss": 0.3227, "step": 10356 }, { "epoch": 1.48508746773731, "grad_norm": 0.2912636399269104, "learning_rate": 5.953913040119497e-06, "loss": 0.3188, "step": 10357 }, { "epoch": 1.4852308574706052, "grad_norm": 0.3098083734512329, "learning_rate": 5.953094106865549e-06, "loss": 0.3198, "step": 10358 }, { "epoch": 1.4853742472039002, "grad_norm": 0.28107762336730957, "learning_rate": 5.9522751470789286e-06, "loss": 0.295, "step": 10359 }, { "epoch": 1.4855176369371952, "grad_norm": 0.300393283367157, "learning_rate": 5.951456160782431e-06, "loss": 0.3303, "step": 10360 }, { "epoch": 1.4856610266704904, "grad_norm": 0.2698756456375122, "learning_rate": 5.950637147998859e-06, "loss": 0.2932, "step": 10361 }, { "epoch": 1.4858044164037856, "grad_norm": 0.2679443359375, "learning_rate": 5.949818108751011e-06, "loss": 0.321, "step": 10362 }, { "epoch": 1.4859478061370806, "grad_norm": 0.31023699045181274, "learning_rate": 5.948999043061687e-06, "loss": 0.2991, "step": 10363 }, { "epoch": 1.4860911958703757, "grad_norm": 0.2926761209964752, "learning_rate": 5.948179950953692e-06, "loss": 0.3267, "step": 10364 }, { "epoch": 1.4862345856036707, "grad_norm": 0.27157506346702576, "learning_rate": 5.947360832449822e-06, "loss": 0.3178, "step": 10365 }, { "epoch": 1.486377975336966, "grad_norm": 0.26900485157966614, "learning_rate": 5.9465416875728855e-06, "loss": 0.3117, "step": 10366 }, { "epoch": 1.486521365070261, "grad_norm": 0.30479463934898376, "learning_rate": 5.945722516345686e-06, "loss": 0.2904, "step": 10367 }, { "epoch": 1.486664754803556, "grad_norm": 0.277802437543869, "learning_rate": 5.944903318791025e-06, "loss": 0.3128, "step": 10368 }, { "epoch": 1.4868081445368513, "grad_norm": 0.3241278827190399, "learning_rate": 5.944084094931708e-06, "loss": 0.2986, "step": 10369 }, { "epoch": 1.4869515342701463, "grad_norm": 0.29856595396995544, "learning_rate": 5.943264844790544e-06, "loss": 0.3232, "step": 10370 }, { "epoch": 1.4870949240034412, "grad_norm": 0.2765192985534668, "learning_rate": 5.942445568390336e-06, "loss": 0.3043, "step": 10371 }, { "epoch": 1.4872383137367364, "grad_norm": 0.2805075943470001, "learning_rate": 5.941626265753895e-06, "loss": 0.2807, "step": 10372 }, { "epoch": 1.4873817034700316, "grad_norm": 0.2887674868106842, "learning_rate": 5.940806936904027e-06, "loss": 0.3231, "step": 10373 }, { "epoch": 1.4875250932033266, "grad_norm": 0.2760879397392273, "learning_rate": 5.939987581863539e-06, "loss": 0.3045, "step": 10374 }, { "epoch": 1.4876684829366218, "grad_norm": 0.2752663195133209, "learning_rate": 5.939168200655244e-06, "loss": 0.3157, "step": 10375 }, { "epoch": 1.4878118726699168, "grad_norm": 0.2750839591026306, "learning_rate": 5.938348793301951e-06, "loss": 0.3158, "step": 10376 }, { "epoch": 1.487955262403212, "grad_norm": 0.2939258813858032, "learning_rate": 5.93752935982647e-06, "loss": 0.3135, "step": 10377 }, { "epoch": 1.488098652136507, "grad_norm": 0.2909191846847534, "learning_rate": 5.936709900251616e-06, "loss": 0.3251, "step": 10378 }, { "epoch": 1.4882420418698021, "grad_norm": 0.2779396176338196, "learning_rate": 5.935890414600199e-06, "loss": 0.2981, "step": 10379 }, { "epoch": 1.4883854316030973, "grad_norm": 0.2945590317249298, "learning_rate": 5.935070902895032e-06, "loss": 0.304, "step": 10380 }, { "epoch": 1.4885288213363923, "grad_norm": 0.26664453744888306, "learning_rate": 5.934251365158929e-06, "loss": 0.2908, "step": 10381 }, { "epoch": 1.4886722110696873, "grad_norm": 0.29848387837409973, "learning_rate": 5.933431801414705e-06, "loss": 0.3135, "step": 10382 }, { "epoch": 1.4888156008029825, "grad_norm": 0.28140631318092346, "learning_rate": 5.932612211685176e-06, "loss": 0.305, "step": 10383 }, { "epoch": 1.4889589905362777, "grad_norm": 0.30484890937805176, "learning_rate": 5.931792595993156e-06, "loss": 0.2894, "step": 10384 }, { "epoch": 1.4891023802695726, "grad_norm": 0.2946866452693939, "learning_rate": 5.930972954361466e-06, "loss": 0.2939, "step": 10385 }, { "epoch": 1.4892457700028678, "grad_norm": 0.2932693660259247, "learning_rate": 5.930153286812921e-06, "loss": 0.3386, "step": 10386 }, { "epoch": 1.4893891597361628, "grad_norm": 0.2736310660839081, "learning_rate": 5.9293335933703375e-06, "loss": 0.3075, "step": 10387 }, { "epoch": 1.489532549469458, "grad_norm": 0.28579220175743103, "learning_rate": 5.928513874056537e-06, "loss": 0.3101, "step": 10388 }, { "epoch": 1.489675939202753, "grad_norm": 0.25316253304481506, "learning_rate": 5.9276941288943405e-06, "loss": 0.2779, "step": 10389 }, { "epoch": 1.4898193289360482, "grad_norm": 0.2826220691204071, "learning_rate": 5.926874357906565e-06, "loss": 0.2968, "step": 10390 }, { "epoch": 1.4899627186693434, "grad_norm": 0.2726476192474365, "learning_rate": 5.926054561116034e-06, "loss": 0.3052, "step": 10391 }, { "epoch": 1.4901061084026384, "grad_norm": 0.2761729061603546, "learning_rate": 5.925234738545566e-06, "loss": 0.3193, "step": 10392 }, { "epoch": 1.4902494981359335, "grad_norm": 0.2829453647136688, "learning_rate": 5.9244148902179865e-06, "loss": 0.3061, "step": 10393 }, { "epoch": 1.4903928878692285, "grad_norm": 0.2631286382675171, "learning_rate": 5.923595016156121e-06, "loss": 0.3, "step": 10394 }, { "epoch": 1.4905362776025237, "grad_norm": 0.28194528818130493, "learning_rate": 5.92277511638279e-06, "loss": 0.2897, "step": 10395 }, { "epoch": 1.4906796673358187, "grad_norm": 0.29896390438079834, "learning_rate": 5.9219551909208185e-06, "loss": 0.3104, "step": 10396 }, { "epoch": 1.4908230570691139, "grad_norm": 0.27478694915771484, "learning_rate": 5.921135239793034e-06, "loss": 0.2835, "step": 10397 }, { "epoch": 1.490966446802409, "grad_norm": 0.26918259263038635, "learning_rate": 5.920315263022261e-06, "loss": 0.2986, "step": 10398 }, { "epoch": 1.491109836535704, "grad_norm": 0.3024241328239441, "learning_rate": 5.919495260631326e-06, "loss": 0.3046, "step": 10399 }, { "epoch": 1.491253226268999, "grad_norm": 0.2839204967021942, "learning_rate": 5.918675232643059e-06, "loss": 0.3263, "step": 10400 }, { "epoch": 1.4913966160022942, "grad_norm": 0.27871379256248474, "learning_rate": 5.917855179080284e-06, "loss": 0.2962, "step": 10401 }, { "epoch": 1.4915400057355894, "grad_norm": 0.3094111382961273, "learning_rate": 5.917035099965834e-06, "loss": 0.3229, "step": 10402 }, { "epoch": 1.4916833954688844, "grad_norm": 0.294037789106369, "learning_rate": 5.916214995322538e-06, "loss": 0.2879, "step": 10403 }, { "epoch": 1.4918267852021796, "grad_norm": 0.30952200293540955, "learning_rate": 5.9153948651732255e-06, "loss": 0.2938, "step": 10404 }, { "epoch": 1.4919701749354746, "grad_norm": 0.2799701690673828, "learning_rate": 5.9145747095407295e-06, "loss": 0.3128, "step": 10405 }, { "epoch": 1.4921135646687698, "grad_norm": 0.2978966236114502, "learning_rate": 5.913754528447878e-06, "loss": 0.3079, "step": 10406 }, { "epoch": 1.4922569544020647, "grad_norm": 0.31318095326423645, "learning_rate": 5.912934321917509e-06, "loss": 0.2991, "step": 10407 }, { "epoch": 1.49240034413536, "grad_norm": 0.2843029499053955, "learning_rate": 5.912114089972451e-06, "loss": 0.3218, "step": 10408 }, { "epoch": 1.4925437338686551, "grad_norm": 0.27497178316116333, "learning_rate": 5.911293832635538e-06, "loss": 0.3142, "step": 10409 }, { "epoch": 1.49268712360195, "grad_norm": 0.27790090441703796, "learning_rate": 5.910473549929609e-06, "loss": 0.2908, "step": 10410 }, { "epoch": 1.492830513335245, "grad_norm": 0.3138846457004547, "learning_rate": 5.909653241877497e-06, "loss": 0.3188, "step": 10411 }, { "epoch": 1.4929739030685403, "grad_norm": 0.2942313253879547, "learning_rate": 5.908832908502036e-06, "loss": 0.3158, "step": 10412 }, { "epoch": 1.4931172928018355, "grad_norm": 0.26724129915237427, "learning_rate": 5.908012549826068e-06, "loss": 0.3293, "step": 10413 }, { "epoch": 1.4932606825351304, "grad_norm": 0.2853638529777527, "learning_rate": 5.9071921658724255e-06, "loss": 0.2924, "step": 10414 }, { "epoch": 1.4934040722684256, "grad_norm": 0.28419390320777893, "learning_rate": 5.906371756663948e-06, "loss": 0.328, "step": 10415 }, { "epoch": 1.4935474620017206, "grad_norm": 0.2810748517513275, "learning_rate": 5.905551322223477e-06, "loss": 0.3212, "step": 10416 }, { "epoch": 1.4936908517350158, "grad_norm": 0.28365856409072876, "learning_rate": 5.904730862573849e-06, "loss": 0.3006, "step": 10417 }, { "epoch": 1.4938342414683108, "grad_norm": 0.28943607211112976, "learning_rate": 5.903910377737906e-06, "loss": 0.3025, "step": 10418 }, { "epoch": 1.493977631201606, "grad_norm": 0.268242210149765, "learning_rate": 5.9030898677384874e-06, "loss": 0.311, "step": 10419 }, { "epoch": 1.4941210209349012, "grad_norm": 0.28901407122612, "learning_rate": 5.902269332598437e-06, "loss": 0.2955, "step": 10420 }, { "epoch": 1.4942644106681962, "grad_norm": 0.29035454988479614, "learning_rate": 5.9014487723405964e-06, "loss": 0.3157, "step": 10421 }, { "epoch": 1.4944078004014911, "grad_norm": 0.2826346457004547, "learning_rate": 5.9006281869878076e-06, "loss": 0.3086, "step": 10422 }, { "epoch": 1.4945511901347863, "grad_norm": 0.2652440071105957, "learning_rate": 5.899807576562917e-06, "loss": 0.3089, "step": 10423 }, { "epoch": 1.4946945798680815, "grad_norm": 0.26645734906196594, "learning_rate": 5.898986941088767e-06, "loss": 0.2844, "step": 10424 }, { "epoch": 1.4948379696013765, "grad_norm": 0.28626900911331177, "learning_rate": 5.898166280588204e-06, "loss": 0.289, "step": 10425 }, { "epoch": 1.4949813593346717, "grad_norm": 0.30448034405708313, "learning_rate": 5.897345595084073e-06, "loss": 0.3109, "step": 10426 }, { "epoch": 1.4951247490679667, "grad_norm": 0.26476770639419556, "learning_rate": 5.896524884599221e-06, "loss": 0.3078, "step": 10427 }, { "epoch": 1.4952681388012619, "grad_norm": 0.27852317690849304, "learning_rate": 5.895704149156495e-06, "loss": 0.3159, "step": 10428 }, { "epoch": 1.4954115285345568, "grad_norm": 0.2919239401817322, "learning_rate": 5.894883388778743e-06, "loss": 0.3003, "step": 10429 }, { "epoch": 1.495554918267852, "grad_norm": 0.2946164309978485, "learning_rate": 5.894062603488815e-06, "loss": 0.2991, "step": 10430 }, { "epoch": 1.4956983080011472, "grad_norm": 0.2719046175479889, "learning_rate": 5.8932417933095576e-06, "loss": 0.3108, "step": 10431 }, { "epoch": 1.4958416977344422, "grad_norm": 0.2876795828342438, "learning_rate": 5.892420958263826e-06, "loss": 0.2896, "step": 10432 }, { "epoch": 1.4959850874677372, "grad_norm": 0.28596314787864685, "learning_rate": 5.8916000983744646e-06, "loss": 0.319, "step": 10433 }, { "epoch": 1.4961284772010324, "grad_norm": 0.27730652689933777, "learning_rate": 5.890779213664331e-06, "loss": 0.3178, "step": 10434 }, { "epoch": 1.4962718669343276, "grad_norm": 0.3004304766654968, "learning_rate": 5.8899583041562715e-06, "loss": 0.2932, "step": 10435 }, { "epoch": 1.4964152566676225, "grad_norm": 0.2921658158302307, "learning_rate": 5.889137369873143e-06, "loss": 0.2956, "step": 10436 }, { "epoch": 1.4965586464009177, "grad_norm": 0.264109343290329, "learning_rate": 5.888316410837798e-06, "loss": 0.308, "step": 10437 }, { "epoch": 1.4967020361342127, "grad_norm": 0.28909194469451904, "learning_rate": 5.88749542707309e-06, "loss": 0.3255, "step": 10438 }, { "epoch": 1.496845425867508, "grad_norm": 0.2735157012939453, "learning_rate": 5.8866744186018746e-06, "loss": 0.3015, "step": 10439 }, { "epoch": 1.4969888156008029, "grad_norm": 0.27278026938438416, "learning_rate": 5.885853385447009e-06, "loss": 0.3, "step": 10440 }, { "epoch": 1.497132205334098, "grad_norm": 0.27965936064720154, "learning_rate": 5.885032327631346e-06, "loss": 0.3, "step": 10441 }, { "epoch": 1.4972755950673933, "grad_norm": 0.26250895857810974, "learning_rate": 5.884211245177744e-06, "loss": 0.3013, "step": 10442 }, { "epoch": 1.4974189848006882, "grad_norm": 0.29242098331451416, "learning_rate": 5.883390138109064e-06, "loss": 0.3209, "step": 10443 }, { "epoch": 1.4975623745339834, "grad_norm": 0.2934950292110443, "learning_rate": 5.882569006448159e-06, "loss": 0.313, "step": 10444 }, { "epoch": 1.4977057642672784, "grad_norm": 0.29231756925582886, "learning_rate": 5.881747850217891e-06, "loss": 0.2992, "step": 10445 }, { "epoch": 1.4978491540005736, "grad_norm": 0.26700398325920105, "learning_rate": 5.88092666944112e-06, "loss": 0.3103, "step": 10446 }, { "epoch": 1.4979925437338686, "grad_norm": 0.288185179233551, "learning_rate": 5.880105464140704e-06, "loss": 0.3109, "step": 10447 }, { "epoch": 1.4981359334671638, "grad_norm": 0.3077104091644287, "learning_rate": 5.879284234339508e-06, "loss": 0.2907, "step": 10448 }, { "epoch": 1.498279323200459, "grad_norm": 0.2687305510044098, "learning_rate": 5.878462980060389e-06, "loss": 0.2855, "step": 10449 }, { "epoch": 1.498422712933754, "grad_norm": 0.2883019745349884, "learning_rate": 5.877641701326213e-06, "loss": 0.3254, "step": 10450 }, { "epoch": 1.498566102667049, "grad_norm": 0.2670714855194092, "learning_rate": 5.876820398159843e-06, "loss": 0.2967, "step": 10451 }, { "epoch": 1.4987094924003441, "grad_norm": 0.2863713204860687, "learning_rate": 5.8759990705841406e-06, "loss": 0.3241, "step": 10452 }, { "epoch": 1.4988528821336393, "grad_norm": 0.2778911292552948, "learning_rate": 5.875177718621974e-06, "loss": 0.3047, "step": 10453 }, { "epoch": 1.4989962718669343, "grad_norm": 0.29599615931510925, "learning_rate": 5.8743563422962035e-06, "loss": 0.3145, "step": 10454 }, { "epoch": 1.4991396616002295, "grad_norm": 0.2852290868759155, "learning_rate": 5.873534941629697e-06, "loss": 0.3048, "step": 10455 }, { "epoch": 1.4992830513335245, "grad_norm": 0.27271342277526855, "learning_rate": 5.872713516645323e-06, "loss": 0.2888, "step": 10456 }, { "epoch": 1.4994264410668197, "grad_norm": 0.30614224076271057, "learning_rate": 5.871892067365946e-06, "loss": 0.3125, "step": 10457 }, { "epoch": 1.4995698308001146, "grad_norm": 0.2675994038581848, "learning_rate": 5.8710705938144365e-06, "loss": 0.297, "step": 10458 }, { "epoch": 1.4997132205334098, "grad_norm": 0.27186402678489685, "learning_rate": 5.870249096013661e-06, "loss": 0.2879, "step": 10459 }, { "epoch": 1.499856610266705, "grad_norm": 0.2701382637023926, "learning_rate": 5.869427573986489e-06, "loss": 0.2928, "step": 10460 }, { "epoch": 1.5, "grad_norm": 0.2617662250995636, "learning_rate": 5.868606027755792e-06, "loss": 0.2966, "step": 10461 }, { "epoch": 1.500143389733295, "grad_norm": 0.2796574532985687, "learning_rate": 5.867784457344439e-06, "loss": 0.2977, "step": 10462 }, { "epoch": 1.5002867794665902, "grad_norm": 0.28204548358917236, "learning_rate": 5.8669628627753e-06, "loss": 0.2867, "step": 10463 }, { "epoch": 1.5004301691998854, "grad_norm": 0.27655404806137085, "learning_rate": 5.8661412440712504e-06, "loss": 0.3089, "step": 10464 }, { "epoch": 1.5005735589331803, "grad_norm": 0.2748253345489502, "learning_rate": 5.865319601255159e-06, "loss": 0.3108, "step": 10465 }, { "epoch": 1.5007169486664753, "grad_norm": 0.2837110161781311, "learning_rate": 5.864497934349902e-06, "loss": 0.3094, "step": 10466 }, { "epoch": 1.5008603383997707, "grad_norm": 0.29096290469169617, "learning_rate": 5.863676243378353e-06, "loss": 0.2919, "step": 10467 }, { "epoch": 1.5010037281330657, "grad_norm": 0.2994256317615509, "learning_rate": 5.862854528363386e-06, "loss": 0.2983, "step": 10468 }, { "epoch": 1.5011471178663607, "grad_norm": 0.2758235037326813, "learning_rate": 5.862032789327875e-06, "loss": 0.326, "step": 10469 }, { "epoch": 1.5012905075996559, "grad_norm": 0.2675608694553375, "learning_rate": 5.861211026294699e-06, "loss": 0.2954, "step": 10470 }, { "epoch": 1.501433897332951, "grad_norm": 0.2792085111141205, "learning_rate": 5.860389239286731e-06, "loss": 0.2995, "step": 10471 }, { "epoch": 1.501577287066246, "grad_norm": 0.2831604778766632, "learning_rate": 5.859567428326852e-06, "loss": 0.2997, "step": 10472 }, { "epoch": 1.501720676799541, "grad_norm": 0.28799691796302795, "learning_rate": 5.858745593437936e-06, "loss": 0.2937, "step": 10473 }, { "epoch": 1.5018640665328362, "grad_norm": 0.27849072217941284, "learning_rate": 5.857923734642865e-06, "loss": 0.3126, "step": 10474 }, { "epoch": 1.5020074562661314, "grad_norm": 0.3266056478023529, "learning_rate": 5.857101851964516e-06, "loss": 0.3022, "step": 10475 }, { "epoch": 1.5021508459994264, "grad_norm": 0.28729215264320374, "learning_rate": 5.856279945425771e-06, "loss": 0.2975, "step": 10476 }, { "epoch": 1.5022942357327216, "grad_norm": 0.29019874334335327, "learning_rate": 5.855458015049509e-06, "loss": 0.3149, "step": 10477 }, { "epoch": 1.5024376254660168, "grad_norm": 0.28437647223472595, "learning_rate": 5.854636060858612e-06, "loss": 0.3098, "step": 10478 }, { "epoch": 1.5025810151993118, "grad_norm": 0.3067822754383087, "learning_rate": 5.8538140828759615e-06, "loss": 0.3045, "step": 10479 }, { "epoch": 1.5027244049326067, "grad_norm": 0.2920849621295929, "learning_rate": 5.852992081124441e-06, "loss": 0.3221, "step": 10480 }, { "epoch": 1.502867794665902, "grad_norm": 0.3010897934436798, "learning_rate": 5.852170055626932e-06, "loss": 0.3166, "step": 10481 }, { "epoch": 1.5030111843991971, "grad_norm": 0.29052767157554626, "learning_rate": 5.85134800640632e-06, "loss": 0.3192, "step": 10482 }, { "epoch": 1.503154574132492, "grad_norm": 0.30930763483047485, "learning_rate": 5.850525933485489e-06, "loss": 0.3295, "step": 10483 }, { "epoch": 1.503297963865787, "grad_norm": 0.28134557604789734, "learning_rate": 5.849703836887324e-06, "loss": 0.2803, "step": 10484 }, { "epoch": 1.5034413535990823, "grad_norm": 0.295219749212265, "learning_rate": 5.848881716634711e-06, "loss": 0.3061, "step": 10485 }, { "epoch": 1.5035847433323775, "grad_norm": 0.30101045966148376, "learning_rate": 5.848059572750539e-06, "loss": 0.3042, "step": 10486 }, { "epoch": 1.5037281330656724, "grad_norm": 0.33995798230171204, "learning_rate": 5.847237405257692e-06, "loss": 0.3213, "step": 10487 }, { "epoch": 1.5038715227989676, "grad_norm": 0.2935691177845001, "learning_rate": 5.846415214179058e-06, "loss": 0.3077, "step": 10488 }, { "epoch": 1.5040149125322628, "grad_norm": 0.273894727230072, "learning_rate": 5.845592999537528e-06, "loss": 0.2916, "step": 10489 }, { "epoch": 1.5041583022655578, "grad_norm": 0.2646960914134979, "learning_rate": 5.844770761355989e-06, "loss": 0.3138, "step": 10490 }, { "epoch": 1.5043016919988528, "grad_norm": 0.3065727651119232, "learning_rate": 5.843948499657331e-06, "loss": 0.2925, "step": 10491 }, { "epoch": 1.504445081732148, "grad_norm": 0.2854693830013275, "learning_rate": 5.8431262144644456e-06, "loss": 0.3111, "step": 10492 }, { "epoch": 1.5045884714654432, "grad_norm": 0.26893988251686096, "learning_rate": 5.842303905800222e-06, "loss": 0.3137, "step": 10493 }, { "epoch": 1.5047318611987381, "grad_norm": 0.27687814831733704, "learning_rate": 5.841481573687555e-06, "loss": 0.3061, "step": 10494 }, { "epoch": 1.5048752509320331, "grad_norm": 0.30540746450424194, "learning_rate": 5.8406592181493335e-06, "loss": 0.3243, "step": 10495 }, { "epoch": 1.5050186406653283, "grad_norm": 0.304168701171875, "learning_rate": 5.839836839208455e-06, "loss": 0.3157, "step": 10496 }, { "epoch": 1.5051620303986235, "grad_norm": 0.29829180240631104, "learning_rate": 5.83901443688781e-06, "loss": 0.3171, "step": 10497 }, { "epoch": 1.5053054201319185, "grad_norm": 0.2909087538719177, "learning_rate": 5.838192011210293e-06, "loss": 0.3019, "step": 10498 }, { "epoch": 1.5054488098652137, "grad_norm": 0.30252891778945923, "learning_rate": 5.837369562198802e-06, "loss": 0.3045, "step": 10499 }, { "epoch": 1.5055921995985089, "grad_norm": 0.30919018387794495, "learning_rate": 5.836547089876228e-06, "loss": 0.3175, "step": 10500 }, { "epoch": 1.5057355893318038, "grad_norm": 0.2848929464817047, "learning_rate": 5.835724594265469e-06, "loss": 0.3006, "step": 10501 }, { "epoch": 1.5058789790650988, "grad_norm": 0.2935403883457184, "learning_rate": 5.834902075389425e-06, "loss": 0.2975, "step": 10502 }, { "epoch": 1.506022368798394, "grad_norm": 0.2563394606113434, "learning_rate": 5.834079533270991e-06, "loss": 0.3019, "step": 10503 }, { "epoch": 1.5061657585316892, "grad_norm": 0.28369441628456116, "learning_rate": 5.833256967933065e-06, "loss": 0.2834, "step": 10504 }, { "epoch": 1.5063091482649842, "grad_norm": 0.2861303389072418, "learning_rate": 5.832434379398548e-06, "loss": 0.3099, "step": 10505 }, { "epoch": 1.5064525379982792, "grad_norm": 0.29383420944213867, "learning_rate": 5.831611767690338e-06, "loss": 0.2972, "step": 10506 }, { "epoch": 1.5065959277315746, "grad_norm": 0.2986448109149933, "learning_rate": 5.830789132831337e-06, "loss": 0.3137, "step": 10507 }, { "epoch": 1.5067393174648696, "grad_norm": 0.2691614031791687, "learning_rate": 5.829966474844443e-06, "loss": 0.3032, "step": 10508 }, { "epoch": 1.5068827071981645, "grad_norm": 0.26484763622283936, "learning_rate": 5.829143793752559e-06, "loss": 0.2806, "step": 10509 }, { "epoch": 1.5070260969314597, "grad_norm": 0.2761209011077881, "learning_rate": 5.8283210895785865e-06, "loss": 0.3117, "step": 10510 }, { "epoch": 1.507169486664755, "grad_norm": 0.27983278036117554, "learning_rate": 5.82749836234543e-06, "loss": 0.2917, "step": 10511 }, { "epoch": 1.50731287639805, "grad_norm": 0.29203036427497864, "learning_rate": 5.826675612075991e-06, "loss": 0.3096, "step": 10512 }, { "epoch": 1.5074562661313449, "grad_norm": 0.2806970775127411, "learning_rate": 5.8258528387931775e-06, "loss": 0.3273, "step": 10513 }, { "epoch": 1.50759965586464, "grad_norm": 0.27167627215385437, "learning_rate": 5.825030042519889e-06, "loss": 0.322, "step": 10514 }, { "epoch": 1.5077430455979353, "grad_norm": 0.29122379422187805, "learning_rate": 5.8242072232790335e-06, "loss": 0.3184, "step": 10515 }, { "epoch": 1.5078864353312302, "grad_norm": 0.28054919838905334, "learning_rate": 5.823384381093518e-06, "loss": 0.3121, "step": 10516 }, { "epoch": 1.5080298250645254, "grad_norm": 0.26935023069381714, "learning_rate": 5.822561515986247e-06, "loss": 0.2987, "step": 10517 }, { "epoch": 1.5081732147978206, "grad_norm": 0.27847158908843994, "learning_rate": 5.821738627980129e-06, "loss": 0.3142, "step": 10518 }, { "epoch": 1.5083166045311156, "grad_norm": 0.2778750956058502, "learning_rate": 5.820915717098073e-06, "loss": 0.3022, "step": 10519 }, { "epoch": 1.5084599942644106, "grad_norm": 0.29255369305610657, "learning_rate": 5.820092783362983e-06, "loss": 0.3089, "step": 10520 }, { "epoch": 1.5086033839977058, "grad_norm": 0.2729185223579407, "learning_rate": 5.819269826797773e-06, "loss": 0.3045, "step": 10521 }, { "epoch": 1.508746773731001, "grad_norm": 0.26713308691978455, "learning_rate": 5.818446847425352e-06, "loss": 0.2945, "step": 10522 }, { "epoch": 1.508890163464296, "grad_norm": 0.27999091148376465, "learning_rate": 5.817623845268628e-06, "loss": 0.3103, "step": 10523 }, { "epoch": 1.509033553197591, "grad_norm": 0.276650071144104, "learning_rate": 5.816800820350517e-06, "loss": 0.3111, "step": 10524 }, { "epoch": 1.5091769429308861, "grad_norm": 0.2805730700492859, "learning_rate": 5.815977772693925e-06, "loss": 0.3331, "step": 10525 }, { "epoch": 1.5093203326641813, "grad_norm": 0.2947385609149933, "learning_rate": 5.815154702321768e-06, "loss": 0.3073, "step": 10526 }, { "epoch": 1.5094637223974763, "grad_norm": 0.30620527267456055, "learning_rate": 5.814331609256958e-06, "loss": 0.3048, "step": 10527 }, { "epoch": 1.5096071121307715, "grad_norm": 0.2801668047904968, "learning_rate": 5.813508493522408e-06, "loss": 0.318, "step": 10528 }, { "epoch": 1.5097505018640667, "grad_norm": 0.2655732035636902, "learning_rate": 5.812685355141033e-06, "loss": 0.3182, "step": 10529 }, { "epoch": 1.5098938915973616, "grad_norm": 0.28476306796073914, "learning_rate": 5.811862194135748e-06, "loss": 0.3015, "step": 10530 }, { "epoch": 1.5100372813306566, "grad_norm": 0.3014811873435974, "learning_rate": 5.811039010529469e-06, "loss": 0.2962, "step": 10531 }, { "epoch": 1.5101806710639518, "grad_norm": 0.29547131061553955, "learning_rate": 5.810215804345112e-06, "loss": 0.3143, "step": 10532 }, { "epoch": 1.510324060797247, "grad_norm": 0.27613648772239685, "learning_rate": 5.809392575605591e-06, "loss": 0.2942, "step": 10533 }, { "epoch": 1.510467450530542, "grad_norm": 0.2882791757583618, "learning_rate": 5.808569324333829e-06, "loss": 0.3024, "step": 10534 }, { "epoch": 1.510610840263837, "grad_norm": 0.2669578492641449, "learning_rate": 5.807746050552738e-06, "loss": 0.3123, "step": 10535 }, { "epoch": 1.5107542299971322, "grad_norm": 0.2953546643257141, "learning_rate": 5.806922754285241e-06, "loss": 0.3069, "step": 10536 }, { "epoch": 1.5108976197304274, "grad_norm": 0.28321927785873413, "learning_rate": 5.806099435554257e-06, "loss": 0.299, "step": 10537 }, { "epoch": 1.5110410094637223, "grad_norm": 0.3177148103713989, "learning_rate": 5.805276094382701e-06, "loss": 0.3229, "step": 10538 }, { "epoch": 1.5111843991970175, "grad_norm": 0.28632164001464844, "learning_rate": 5.804452730793499e-06, "loss": 0.3136, "step": 10539 }, { "epoch": 1.5113277889303127, "grad_norm": 0.2802364230155945, "learning_rate": 5.8036293448095724e-06, "loss": 0.3161, "step": 10540 }, { "epoch": 1.5114711786636077, "grad_norm": 0.2887043356895447, "learning_rate": 5.8028059364538395e-06, "loss": 0.319, "step": 10541 }, { "epoch": 1.5116145683969027, "grad_norm": 0.2981891930103302, "learning_rate": 5.801982505749224e-06, "loss": 0.3, "step": 10542 }, { "epoch": 1.5117579581301979, "grad_norm": 0.29790738224983215, "learning_rate": 5.8011590527186504e-06, "loss": 0.3074, "step": 10543 }, { "epoch": 1.511901347863493, "grad_norm": 0.278186559677124, "learning_rate": 5.80033557738504e-06, "loss": 0.305, "step": 10544 }, { "epoch": 1.512044737596788, "grad_norm": 0.29606759548187256, "learning_rate": 5.799512079771319e-06, "loss": 0.3095, "step": 10545 }, { "epoch": 1.512188127330083, "grad_norm": 0.29769837856292725, "learning_rate": 5.798688559900412e-06, "loss": 0.2978, "step": 10546 }, { "epoch": 1.5123315170633784, "grad_norm": 0.2693241238594055, "learning_rate": 5.7978650177952424e-06, "loss": 0.3051, "step": 10547 }, { "epoch": 1.5124749067966734, "grad_norm": 0.2954132854938507, "learning_rate": 5.797041453478739e-06, "loss": 0.3046, "step": 10548 }, { "epoch": 1.5126182965299684, "grad_norm": 0.29378175735473633, "learning_rate": 5.796217866973827e-06, "loss": 0.3318, "step": 10549 }, { "epoch": 1.5127616862632636, "grad_norm": 0.2909759283065796, "learning_rate": 5.795394258303435e-06, "loss": 0.3111, "step": 10550 }, { "epoch": 1.5129050759965588, "grad_norm": 0.25621742010116577, "learning_rate": 5.794570627490491e-06, "loss": 0.3021, "step": 10551 }, { "epoch": 1.5130484657298537, "grad_norm": 0.27234867215156555, "learning_rate": 5.7937469745579224e-06, "loss": 0.2896, "step": 10552 }, { "epoch": 1.5131918554631487, "grad_norm": 0.31147873401641846, "learning_rate": 5.792923299528659e-06, "loss": 0.3175, "step": 10553 }, { "epoch": 1.513335245196444, "grad_norm": 0.3131963610649109, "learning_rate": 5.792099602425631e-06, "loss": 0.314, "step": 10554 }, { "epoch": 1.513478634929739, "grad_norm": 0.2959126830101013, "learning_rate": 5.791275883271768e-06, "loss": 0.2974, "step": 10555 }, { "epoch": 1.513622024663034, "grad_norm": 0.2788486182689667, "learning_rate": 5.790452142090002e-06, "loss": 0.3059, "step": 10556 }, { "epoch": 1.513765414396329, "grad_norm": 0.28389912843704224, "learning_rate": 5.789628378903264e-06, "loss": 0.3231, "step": 10557 }, { "epoch": 1.5139088041296245, "grad_norm": 0.2887387275695801, "learning_rate": 5.788804593734486e-06, "loss": 0.3014, "step": 10558 }, { "epoch": 1.5140521938629194, "grad_norm": 0.3106641173362732, "learning_rate": 5.7879807866066025e-06, "loss": 0.2963, "step": 10559 }, { "epoch": 1.5141955835962144, "grad_norm": 0.2751919627189636, "learning_rate": 5.787156957542546e-06, "loss": 0.2952, "step": 10560 }, { "epoch": 1.5143389733295096, "grad_norm": 0.2924976944923401, "learning_rate": 5.786333106565252e-06, "loss": 0.3057, "step": 10561 }, { "epoch": 1.5144823630628048, "grad_norm": 0.3020036220550537, "learning_rate": 5.785509233697652e-06, "loss": 0.3229, "step": 10562 }, { "epoch": 1.5146257527960998, "grad_norm": 0.3081560730934143, "learning_rate": 5.784685338962683e-06, "loss": 0.3052, "step": 10563 }, { "epoch": 1.5147691425293948, "grad_norm": 0.2959660291671753, "learning_rate": 5.783861422383283e-06, "loss": 0.3019, "step": 10564 }, { "epoch": 1.51491253226269, "grad_norm": 0.2956826984882355, "learning_rate": 5.7830374839823855e-06, "loss": 0.3098, "step": 10565 }, { "epoch": 1.5150559219959852, "grad_norm": 0.29132384061813354, "learning_rate": 5.782213523782928e-06, "loss": 0.2977, "step": 10566 }, { "epoch": 1.5151993117292801, "grad_norm": 0.27909374237060547, "learning_rate": 5.781389541807851e-06, "loss": 0.3223, "step": 10567 }, { "epoch": 1.5153427014625753, "grad_norm": 0.30484166741371155, "learning_rate": 5.780565538080091e-06, "loss": 0.3096, "step": 10568 }, { "epoch": 1.5154860911958705, "grad_norm": 0.2902411222457886, "learning_rate": 5.779741512622587e-06, "loss": 0.3089, "step": 10569 }, { "epoch": 1.5156294809291655, "grad_norm": 0.296438604593277, "learning_rate": 5.778917465458278e-06, "loss": 0.3008, "step": 10570 }, { "epoch": 1.5157728706624605, "grad_norm": 0.27892154455184937, "learning_rate": 5.778093396610106e-06, "loss": 0.3089, "step": 10571 }, { "epoch": 1.5159162603957557, "grad_norm": 0.2763272523880005, "learning_rate": 5.777269306101011e-06, "loss": 0.2927, "step": 10572 }, { "epoch": 1.5160596501290509, "grad_norm": 0.3059494197368622, "learning_rate": 5.776445193953934e-06, "loss": 0.3286, "step": 10573 }, { "epoch": 1.5162030398623458, "grad_norm": 0.2966470718383789, "learning_rate": 5.7756210601918165e-06, "loss": 0.3078, "step": 10574 }, { "epoch": 1.5163464295956408, "grad_norm": 0.2730928063392639, "learning_rate": 5.7747969048376e-06, "loss": 0.3074, "step": 10575 }, { "epoch": 1.516489819328936, "grad_norm": 0.28790783882141113, "learning_rate": 5.773972727914232e-06, "loss": 0.3097, "step": 10576 }, { "epoch": 1.5166332090622312, "grad_norm": 0.28525781631469727, "learning_rate": 5.773148529444653e-06, "loss": 0.3058, "step": 10577 }, { "epoch": 1.5167765987955262, "grad_norm": 0.31588685512542725, "learning_rate": 5.77232430945181e-06, "loss": 0.3005, "step": 10578 }, { "epoch": 1.5169199885288214, "grad_norm": 0.28183719515800476, "learning_rate": 5.771500067958644e-06, "loss": 0.2939, "step": 10579 }, { "epoch": 1.5170633782621166, "grad_norm": 0.28212597966194153, "learning_rate": 5.770675804988105e-06, "loss": 0.298, "step": 10580 }, { "epoch": 1.5172067679954115, "grad_norm": 0.2751804292201996, "learning_rate": 5.769851520563135e-06, "loss": 0.3219, "step": 10581 }, { "epoch": 1.5173501577287065, "grad_norm": 0.2608332633972168, "learning_rate": 5.769027214706684e-06, "loss": 0.276, "step": 10582 }, { "epoch": 1.5174935474620017, "grad_norm": 0.28017595410346985, "learning_rate": 5.768202887441698e-06, "loss": 0.3009, "step": 10583 }, { "epoch": 1.517636937195297, "grad_norm": 0.2908375859260559, "learning_rate": 5.767378538791125e-06, "loss": 0.3087, "step": 10584 }, { "epoch": 1.5177803269285919, "grad_norm": 0.2894022762775421, "learning_rate": 5.766554168777915e-06, "loss": 0.3084, "step": 10585 }, { "epoch": 1.5179237166618869, "grad_norm": 0.28983962535858154, "learning_rate": 5.765729777425015e-06, "loss": 0.3007, "step": 10586 }, { "epoch": 1.518067106395182, "grad_norm": 0.2951201796531677, "learning_rate": 5.764905364755377e-06, "loss": 0.3125, "step": 10587 }, { "epoch": 1.5182104961284772, "grad_norm": 0.28235650062561035, "learning_rate": 5.764080930791951e-06, "loss": 0.3019, "step": 10588 }, { "epoch": 1.5183538858617722, "grad_norm": 0.2869741916656494, "learning_rate": 5.763256475557686e-06, "loss": 0.3061, "step": 10589 }, { "epoch": 1.5184972755950674, "grad_norm": 0.2570304870605469, "learning_rate": 5.762431999075534e-06, "loss": 0.2867, "step": 10590 }, { "epoch": 1.5186406653283626, "grad_norm": 0.2786663770675659, "learning_rate": 5.76160750136845e-06, "loss": 0.3041, "step": 10591 }, { "epoch": 1.5187840550616576, "grad_norm": 0.2885739505290985, "learning_rate": 5.760782982459383e-06, "loss": 0.3053, "step": 10592 }, { "epoch": 1.5189274447949526, "grad_norm": 0.2829780876636505, "learning_rate": 5.759958442371288e-06, "loss": 0.3115, "step": 10593 }, { "epoch": 1.5190708345282478, "grad_norm": 0.2758095860481262, "learning_rate": 5.75913388112712e-06, "loss": 0.3231, "step": 10594 }, { "epoch": 1.519214224261543, "grad_norm": 0.26676100492477417, "learning_rate": 5.758309298749831e-06, "loss": 0.315, "step": 10595 }, { "epoch": 1.519357613994838, "grad_norm": 0.26474058628082275, "learning_rate": 5.75748469526238e-06, "loss": 0.2962, "step": 10596 }, { "epoch": 1.519501003728133, "grad_norm": 0.2981119751930237, "learning_rate": 5.756660070687719e-06, "loss": 0.3166, "step": 10597 }, { "epoch": 1.5196443934614283, "grad_norm": 0.2605963945388794, "learning_rate": 5.755835425048806e-06, "loss": 0.3021, "step": 10598 }, { "epoch": 1.5197877831947233, "grad_norm": 0.28956297039985657, "learning_rate": 5.755010758368598e-06, "loss": 0.3134, "step": 10599 }, { "epoch": 1.5199311729280183, "grad_norm": 0.27202486991882324, "learning_rate": 5.75418607067005e-06, "loss": 0.3101, "step": 10600 }, { "epoch": 1.5200745626613135, "grad_norm": 0.27551719546318054, "learning_rate": 5.753361361976123e-06, "loss": 0.3173, "step": 10601 }, { "epoch": 1.5202179523946087, "grad_norm": 0.2820758819580078, "learning_rate": 5.752536632309774e-06, "loss": 0.3006, "step": 10602 }, { "epoch": 1.5203613421279036, "grad_norm": 0.2849753797054291, "learning_rate": 5.7517118816939645e-06, "loss": 0.3019, "step": 10603 }, { "epoch": 1.5205047318611986, "grad_norm": 0.26457563042640686, "learning_rate": 5.7508871101516506e-06, "loss": 0.3075, "step": 10604 }, { "epoch": 1.5206481215944938, "grad_norm": 0.27369436621665955, "learning_rate": 5.750062317705796e-06, "loss": 0.3072, "step": 10605 }, { "epoch": 1.520791511327789, "grad_norm": 0.28003668785095215, "learning_rate": 5.749237504379359e-06, "loss": 0.3065, "step": 10606 }, { "epoch": 1.520934901061084, "grad_norm": 0.2825862169265747, "learning_rate": 5.748412670195305e-06, "loss": 0.3035, "step": 10607 }, { "epoch": 1.5210782907943792, "grad_norm": 0.26459357142448425, "learning_rate": 5.7475878151765905e-06, "loss": 0.2771, "step": 10608 }, { "epoch": 1.5212216805276744, "grad_norm": 0.28814372420310974, "learning_rate": 5.7467629393461825e-06, "loss": 0.2908, "step": 10609 }, { "epoch": 1.5213650702609693, "grad_norm": 0.27751314640045166, "learning_rate": 5.745938042727044e-06, "loss": 0.3097, "step": 10610 }, { "epoch": 1.5215084599942643, "grad_norm": 0.29760584235191345, "learning_rate": 5.745113125342136e-06, "loss": 0.3032, "step": 10611 }, { "epoch": 1.5216518497275595, "grad_norm": 0.27376797795295715, "learning_rate": 5.744288187214425e-06, "loss": 0.3039, "step": 10612 }, { "epoch": 1.5217952394608547, "grad_norm": 0.3018963932991028, "learning_rate": 5.743463228366876e-06, "loss": 0.3138, "step": 10613 }, { "epoch": 1.5219386291941497, "grad_norm": 0.2898373007774353, "learning_rate": 5.742638248822455e-06, "loss": 0.2924, "step": 10614 }, { "epoch": 1.5220820189274447, "grad_norm": 0.3054846525192261, "learning_rate": 5.741813248604129e-06, "loss": 0.2987, "step": 10615 }, { "epoch": 1.5222254086607399, "grad_norm": 0.2976175844669342, "learning_rate": 5.740988227734861e-06, "loss": 0.3017, "step": 10616 }, { "epoch": 1.522368798394035, "grad_norm": 0.29756343364715576, "learning_rate": 5.7401631862376205e-06, "loss": 0.3037, "step": 10617 }, { "epoch": 1.52251218812733, "grad_norm": 0.27817341685295105, "learning_rate": 5.739338124135377e-06, "loss": 0.2904, "step": 10618 }, { "epoch": 1.5226555778606252, "grad_norm": 0.28216877579689026, "learning_rate": 5.738513041451097e-06, "loss": 0.3017, "step": 10619 }, { "epoch": 1.5227989675939204, "grad_norm": 0.2993202805519104, "learning_rate": 5.737687938207749e-06, "loss": 0.3044, "step": 10620 }, { "epoch": 1.5229423573272154, "grad_norm": 0.2807871699333191, "learning_rate": 5.736862814428303e-06, "loss": 0.2922, "step": 10621 }, { "epoch": 1.5230857470605104, "grad_norm": 0.28387656807899475, "learning_rate": 5.736037670135731e-06, "loss": 0.313, "step": 10622 }, { "epoch": 1.5232291367938056, "grad_norm": 0.28234386444091797, "learning_rate": 5.735212505353003e-06, "loss": 0.3185, "step": 10623 }, { "epoch": 1.5233725265271008, "grad_norm": 0.3071710765361786, "learning_rate": 5.73438732010309e-06, "loss": 0.2943, "step": 10624 }, { "epoch": 1.5235159162603957, "grad_norm": 0.2736336886882782, "learning_rate": 5.733562114408963e-06, "loss": 0.3016, "step": 10625 }, { "epoch": 1.5236593059936907, "grad_norm": 0.2669825255870819, "learning_rate": 5.732736888293595e-06, "loss": 0.3076, "step": 10626 }, { "epoch": 1.523802695726986, "grad_norm": 0.28995439410209656, "learning_rate": 5.731911641779959e-06, "loss": 0.3161, "step": 10627 }, { "epoch": 1.523946085460281, "grad_norm": 0.2555431127548218, "learning_rate": 5.7310863748910295e-06, "loss": 0.2985, "step": 10628 }, { "epoch": 1.524089475193576, "grad_norm": 0.2992325723171234, "learning_rate": 5.7302610876497784e-06, "loss": 0.315, "step": 10629 }, { "epoch": 1.5242328649268713, "grad_norm": 0.30089354515075684, "learning_rate": 5.7294357800791835e-06, "loss": 0.3137, "step": 10630 }, { "epoch": 1.5243762546601665, "grad_norm": 0.2748752534389496, "learning_rate": 5.728610452202218e-06, "loss": 0.3001, "step": 10631 }, { "epoch": 1.5245196443934614, "grad_norm": 0.2945925295352936, "learning_rate": 5.7277851040418585e-06, "loss": 0.3176, "step": 10632 }, { "epoch": 1.5246630341267564, "grad_norm": 0.2721045911312103, "learning_rate": 5.7269597356210815e-06, "loss": 0.298, "step": 10633 }, { "epoch": 1.5248064238600516, "grad_norm": 0.28985777497291565, "learning_rate": 5.726134346962864e-06, "loss": 0.3244, "step": 10634 }, { "epoch": 1.5249498135933468, "grad_norm": 0.2806375026702881, "learning_rate": 5.725308938090183e-06, "loss": 0.2999, "step": 10635 }, { "epoch": 1.5250932033266418, "grad_norm": 0.32280591130256653, "learning_rate": 5.724483509026017e-06, "loss": 0.3159, "step": 10636 }, { "epoch": 1.5252365930599368, "grad_norm": 0.2791832387447357, "learning_rate": 5.723658059793344e-06, "loss": 0.3028, "step": 10637 }, { "epoch": 1.5253799827932322, "grad_norm": 0.2840718924999237, "learning_rate": 5.722832590415144e-06, "loss": 0.3007, "step": 10638 }, { "epoch": 1.5255233725265271, "grad_norm": 0.31586915254592896, "learning_rate": 5.722007100914395e-06, "loss": 0.2978, "step": 10639 }, { "epoch": 1.5256667622598221, "grad_norm": 0.29166480898857117, "learning_rate": 5.7211815913140826e-06, "loss": 0.309, "step": 10640 }, { "epoch": 1.5258101519931173, "grad_norm": 0.28636428713798523, "learning_rate": 5.7203560616371815e-06, "loss": 0.3163, "step": 10641 }, { "epoch": 1.5259535417264125, "grad_norm": 0.2985537052154541, "learning_rate": 5.719530511906677e-06, "loss": 0.3136, "step": 10642 }, { "epoch": 1.5260969314597075, "grad_norm": 0.2664405405521393, "learning_rate": 5.7187049421455485e-06, "loss": 0.3053, "step": 10643 }, { "epoch": 1.5262403211930025, "grad_norm": 0.2918829917907715, "learning_rate": 5.717879352376781e-06, "loss": 0.3122, "step": 10644 }, { "epoch": 1.5263837109262977, "grad_norm": 0.263472318649292, "learning_rate": 5.717053742623356e-06, "loss": 0.3026, "step": 10645 }, { "epoch": 1.5265271006595929, "grad_norm": 0.26627781987190247, "learning_rate": 5.716228112908259e-06, "loss": 0.294, "step": 10646 }, { "epoch": 1.5266704903928878, "grad_norm": 0.30463242530822754, "learning_rate": 5.715402463254471e-06, "loss": 0.2877, "step": 10647 }, { "epoch": 1.526813880126183, "grad_norm": 0.28911086916923523, "learning_rate": 5.714576793684981e-06, "loss": 0.3332, "step": 10648 }, { "epoch": 1.5269572698594782, "grad_norm": 0.30316174030303955, "learning_rate": 5.713751104222771e-06, "loss": 0.3097, "step": 10649 }, { "epoch": 1.5271006595927732, "grad_norm": 0.2744029760360718, "learning_rate": 5.712925394890828e-06, "loss": 0.2865, "step": 10650 }, { "epoch": 1.5272440493260682, "grad_norm": 0.2941582500934601, "learning_rate": 5.712099665712141e-06, "loss": 0.3274, "step": 10651 }, { "epoch": 1.5273874390593634, "grad_norm": 0.31730642914772034, "learning_rate": 5.711273916709692e-06, "loss": 0.3024, "step": 10652 }, { "epoch": 1.5275308287926586, "grad_norm": 0.28622931241989136, "learning_rate": 5.710448147906473e-06, "loss": 0.3086, "step": 10653 }, { "epoch": 1.5276742185259535, "grad_norm": 0.29688888788223267, "learning_rate": 5.7096223593254705e-06, "loss": 0.3166, "step": 10654 }, { "epoch": 1.5278176082592485, "grad_norm": 0.2921222448348999, "learning_rate": 5.708796550989672e-06, "loss": 0.3048, "step": 10655 }, { "epoch": 1.5279609979925437, "grad_norm": 0.2770419418811798, "learning_rate": 5.707970722922068e-06, "loss": 0.3087, "step": 10656 }, { "epoch": 1.528104387725839, "grad_norm": 0.28171321749687195, "learning_rate": 5.707144875145648e-06, "loss": 0.3233, "step": 10657 }, { "epoch": 1.5282477774591339, "grad_norm": 0.2821024954319, "learning_rate": 5.706319007683403e-06, "loss": 0.2962, "step": 10658 }, { "epoch": 1.528391167192429, "grad_norm": 0.2734706997871399, "learning_rate": 5.705493120558324e-06, "loss": 0.3195, "step": 10659 }, { "epoch": 1.5285345569257243, "grad_norm": 0.2785516381263733, "learning_rate": 5.7046672137934015e-06, "loss": 0.3041, "step": 10660 }, { "epoch": 1.5286779466590192, "grad_norm": 0.29801034927368164, "learning_rate": 5.703841287411627e-06, "loss": 0.2987, "step": 10661 }, { "epoch": 1.5288213363923142, "grad_norm": 0.27697959542274475, "learning_rate": 5.703015341435995e-06, "loss": 0.3084, "step": 10662 }, { "epoch": 1.5289647261256094, "grad_norm": 0.30328282713890076, "learning_rate": 5.702189375889495e-06, "loss": 0.3052, "step": 10663 }, { "epoch": 1.5291081158589046, "grad_norm": 0.2903916835784912, "learning_rate": 5.701363390795126e-06, "loss": 0.292, "step": 10664 }, { "epoch": 1.5292515055921996, "grad_norm": 0.27907681465148926, "learning_rate": 5.700537386175877e-06, "loss": 0.3143, "step": 10665 }, { "epoch": 1.5293948953254946, "grad_norm": 0.28488895297050476, "learning_rate": 5.699711362054745e-06, "loss": 0.2982, "step": 10666 }, { "epoch": 1.5295382850587897, "grad_norm": 0.265144944190979, "learning_rate": 5.6988853184547255e-06, "loss": 0.2969, "step": 10667 }, { "epoch": 1.529681674792085, "grad_norm": 0.30500614643096924, "learning_rate": 5.698059255398813e-06, "loss": 0.3142, "step": 10668 }, { "epoch": 1.52982506452538, "grad_norm": 0.2954779863357544, "learning_rate": 5.697233172910006e-06, "loss": 0.282, "step": 10669 }, { "epoch": 1.5299684542586751, "grad_norm": 0.2793963849544525, "learning_rate": 5.6964070710112996e-06, "loss": 0.306, "step": 10670 }, { "epoch": 1.5301118439919703, "grad_norm": 0.2980007529258728, "learning_rate": 5.695580949725691e-06, "loss": 0.2971, "step": 10671 }, { "epoch": 1.5302552337252653, "grad_norm": 0.28814399242401123, "learning_rate": 5.6947548090761796e-06, "loss": 0.2898, "step": 10672 }, { "epoch": 1.5303986234585603, "grad_norm": 0.2728666663169861, "learning_rate": 5.693928649085763e-06, "loss": 0.2941, "step": 10673 }, { "epoch": 1.5305420131918555, "grad_norm": 0.29601383209228516, "learning_rate": 5.693102469777438e-06, "loss": 0.3052, "step": 10674 }, { "epoch": 1.5306854029251507, "grad_norm": 0.26074376702308655, "learning_rate": 5.692276271174209e-06, "loss": 0.3118, "step": 10675 }, { "epoch": 1.5308287926584456, "grad_norm": 0.2851516008377075, "learning_rate": 5.6914500532990725e-06, "loss": 0.3192, "step": 10676 }, { "epoch": 1.5309721823917406, "grad_norm": 0.2768588662147522, "learning_rate": 5.690623816175029e-06, "loss": 0.3347, "step": 10677 }, { "epoch": 1.5311155721250358, "grad_norm": 0.26923874020576477, "learning_rate": 5.689797559825083e-06, "loss": 0.3133, "step": 10678 }, { "epoch": 1.531258961858331, "grad_norm": 0.2793920338153839, "learning_rate": 5.6889712842722335e-06, "loss": 0.2879, "step": 10679 }, { "epoch": 1.531402351591626, "grad_norm": 0.27732396125793457, "learning_rate": 5.688144989539485e-06, "loss": 0.3042, "step": 10680 }, { "epoch": 1.5315457413249212, "grad_norm": 0.2956683337688446, "learning_rate": 5.687318675649836e-06, "loss": 0.2999, "step": 10681 }, { "epoch": 1.5316891310582164, "grad_norm": 0.28450655937194824, "learning_rate": 5.686492342626292e-06, "loss": 0.3128, "step": 10682 }, { "epoch": 1.5318325207915113, "grad_norm": 0.2931583821773529, "learning_rate": 5.685665990491858e-06, "loss": 0.297, "step": 10683 }, { "epoch": 1.5319759105248063, "grad_norm": 0.30646705627441406, "learning_rate": 5.684839619269538e-06, "loss": 0.3135, "step": 10684 }, { "epoch": 1.5321193002581015, "grad_norm": 0.2833075225353241, "learning_rate": 5.684013228982336e-06, "loss": 0.2987, "step": 10685 }, { "epoch": 1.5322626899913967, "grad_norm": 0.2756175994873047, "learning_rate": 5.6831868196532596e-06, "loss": 0.3066, "step": 10686 }, { "epoch": 1.5324060797246917, "grad_norm": 0.3140583038330078, "learning_rate": 5.6823603913053105e-06, "loss": 0.3045, "step": 10687 }, { "epoch": 1.5325494694579866, "grad_norm": 0.2963291108608246, "learning_rate": 5.6815339439615e-06, "loss": 0.2931, "step": 10688 }, { "epoch": 1.532692859191282, "grad_norm": 0.2759450078010559, "learning_rate": 5.680707477644831e-06, "loss": 0.3213, "step": 10689 }, { "epoch": 1.532836248924577, "grad_norm": 0.29753413796424866, "learning_rate": 5.679880992378314e-06, "loss": 0.295, "step": 10690 }, { "epoch": 1.532979638657872, "grad_norm": 0.2939017713069916, "learning_rate": 5.679054488184956e-06, "loss": 0.3082, "step": 10691 }, { "epoch": 1.5331230283911672, "grad_norm": 0.28288331627845764, "learning_rate": 5.678227965087766e-06, "loss": 0.2955, "step": 10692 }, { "epoch": 1.5332664181244624, "grad_norm": 0.2681933045387268, "learning_rate": 5.6774014231097516e-06, "loss": 0.305, "step": 10693 }, { "epoch": 1.5334098078577574, "grad_norm": 0.2791990339756012, "learning_rate": 5.676574862273923e-06, "loss": 0.3305, "step": 10694 }, { "epoch": 1.5335531975910524, "grad_norm": 0.2944927215576172, "learning_rate": 5.675748282603293e-06, "loss": 0.3059, "step": 10695 }, { "epoch": 1.5336965873243475, "grad_norm": 0.28979480266571045, "learning_rate": 5.67492168412087e-06, "loss": 0.3197, "step": 10696 }, { "epoch": 1.5338399770576427, "grad_norm": 0.2775234878063202, "learning_rate": 5.674095066849666e-06, "loss": 0.3131, "step": 10697 }, { "epoch": 1.5339833667909377, "grad_norm": 0.30546024441719055, "learning_rate": 5.67326843081269e-06, "loss": 0.3102, "step": 10698 }, { "epoch": 1.534126756524233, "grad_norm": 0.2599218785762787, "learning_rate": 5.672441776032959e-06, "loss": 0.306, "step": 10699 }, { "epoch": 1.5342701462575281, "grad_norm": 0.27450644969940186, "learning_rate": 5.671615102533482e-06, "loss": 0.3277, "step": 10700 }, { "epoch": 1.534413535990823, "grad_norm": 0.28785213828086853, "learning_rate": 5.670788410337273e-06, "loss": 0.2985, "step": 10701 }, { "epoch": 1.534556925724118, "grad_norm": 0.3008304536342621, "learning_rate": 5.669961699467348e-06, "loss": 0.2996, "step": 10702 }, { "epoch": 1.5347003154574133, "grad_norm": 0.2910330593585968, "learning_rate": 5.669134969946719e-06, "loss": 0.2845, "step": 10703 }, { "epoch": 1.5348437051907085, "grad_norm": 0.2791098654270172, "learning_rate": 5.668308221798402e-06, "loss": 0.3069, "step": 10704 }, { "epoch": 1.5349870949240034, "grad_norm": 0.2585640847682953, "learning_rate": 5.6674814550454125e-06, "loss": 0.2856, "step": 10705 }, { "epoch": 1.5351304846572984, "grad_norm": 0.31495776772499084, "learning_rate": 5.666654669710766e-06, "loss": 0.2954, "step": 10706 }, { "epoch": 1.5352738743905936, "grad_norm": 0.2887283265590668, "learning_rate": 5.665827865817479e-06, "loss": 0.2708, "step": 10707 }, { "epoch": 1.5354172641238888, "grad_norm": 0.27581098675727844, "learning_rate": 5.665001043388568e-06, "loss": 0.315, "step": 10708 }, { "epoch": 1.5355606538571838, "grad_norm": 0.26925939321517944, "learning_rate": 5.664174202447051e-06, "loss": 0.2905, "step": 10709 }, { "epoch": 1.535704043590479, "grad_norm": 0.27767929434776306, "learning_rate": 5.663347343015948e-06, "loss": 0.3156, "step": 10710 }, { "epoch": 1.5358474333237742, "grad_norm": 0.28023582696914673, "learning_rate": 5.662520465118272e-06, "loss": 0.2976, "step": 10711 }, { "epoch": 1.5359908230570691, "grad_norm": 0.29287368059158325, "learning_rate": 5.661693568777047e-06, "loss": 0.3065, "step": 10712 }, { "epoch": 1.536134212790364, "grad_norm": 0.2962742745876312, "learning_rate": 5.660866654015291e-06, "loss": 0.3136, "step": 10713 }, { "epoch": 1.5362776025236593, "grad_norm": 0.2697288393974304, "learning_rate": 5.6600397208560235e-06, "loss": 0.3119, "step": 10714 }, { "epoch": 1.5364209922569545, "grad_norm": 0.2763066291809082, "learning_rate": 5.659212769322266e-06, "loss": 0.3096, "step": 10715 }, { "epoch": 1.5365643819902495, "grad_norm": 0.27868789434432983, "learning_rate": 5.658385799437039e-06, "loss": 0.3125, "step": 10716 }, { "epoch": 1.5367077717235444, "grad_norm": 0.26200026273727417, "learning_rate": 5.657558811223364e-06, "loss": 0.3044, "step": 10717 }, { "epoch": 1.5368511614568396, "grad_norm": 0.2834409475326538, "learning_rate": 5.656731804704264e-06, "loss": 0.3004, "step": 10718 }, { "epoch": 1.5369945511901348, "grad_norm": 0.27769461274147034, "learning_rate": 5.65590477990276e-06, "loss": 0.3181, "step": 10719 }, { "epoch": 1.5371379409234298, "grad_norm": 0.29123422503471375, "learning_rate": 5.655077736841875e-06, "loss": 0.3106, "step": 10720 }, { "epoch": 1.537281330656725, "grad_norm": 0.26964887976646423, "learning_rate": 5.654250675544635e-06, "loss": 0.3042, "step": 10721 }, { "epoch": 1.5374247203900202, "grad_norm": 0.26787126064300537, "learning_rate": 5.653423596034061e-06, "loss": 0.2835, "step": 10722 }, { "epoch": 1.5375681101233152, "grad_norm": 0.273959219455719, "learning_rate": 5.652596498333182e-06, "loss": 0.2904, "step": 10723 }, { "epoch": 1.5377114998566102, "grad_norm": 0.27836212515830994, "learning_rate": 5.651769382465018e-06, "loss": 0.3051, "step": 10724 }, { "epoch": 1.5378548895899053, "grad_norm": 0.27959784865379333, "learning_rate": 5.650942248452596e-06, "loss": 0.3008, "step": 10725 }, { "epoch": 1.5379982793232005, "grad_norm": 0.3055291175842285, "learning_rate": 5.650115096318947e-06, "loss": 0.3388, "step": 10726 }, { "epoch": 1.5381416690564955, "grad_norm": 0.2722946107387543, "learning_rate": 5.64928792608709e-06, "loss": 0.2849, "step": 10727 }, { "epoch": 1.5382850587897905, "grad_norm": 0.2741874158382416, "learning_rate": 5.6484607377800585e-06, "loss": 0.3057, "step": 10728 }, { "epoch": 1.538428448523086, "grad_norm": 0.2865138053894043, "learning_rate": 5.647633531420875e-06, "loss": 0.3091, "step": 10729 }, { "epoch": 1.5385718382563809, "grad_norm": 0.27474063634872437, "learning_rate": 5.646806307032572e-06, "loss": 0.2832, "step": 10730 }, { "epoch": 1.5387152279896759, "grad_norm": 0.28119009733200073, "learning_rate": 5.645979064638176e-06, "loss": 0.3, "step": 10731 }, { "epoch": 1.538858617722971, "grad_norm": 0.26508063077926636, "learning_rate": 5.645151804260718e-06, "loss": 0.307, "step": 10732 }, { "epoch": 1.5390020074562663, "grad_norm": 0.28226104378700256, "learning_rate": 5.644324525923224e-06, "loss": 0.3068, "step": 10733 }, { "epoch": 1.5391453971895612, "grad_norm": 0.29148146510124207, "learning_rate": 5.643497229648729e-06, "loss": 0.3, "step": 10734 }, { "epoch": 1.5392887869228562, "grad_norm": 0.28093284368515015, "learning_rate": 5.642669915460258e-06, "loss": 0.2997, "step": 10735 }, { "epoch": 1.5394321766561514, "grad_norm": 0.2993864119052887, "learning_rate": 5.641842583380846e-06, "loss": 0.2962, "step": 10736 }, { "epoch": 1.5395755663894466, "grad_norm": 0.2807031273841858, "learning_rate": 5.641015233433525e-06, "loss": 0.2973, "step": 10737 }, { "epoch": 1.5397189561227416, "grad_norm": 0.26209786534309387, "learning_rate": 5.640187865641325e-06, "loss": 0.3033, "step": 10738 }, { "epoch": 1.5398623458560368, "grad_norm": 0.2867724597454071, "learning_rate": 5.639360480027279e-06, "loss": 0.3181, "step": 10739 }, { "epoch": 1.540005735589332, "grad_norm": 0.3054603934288025, "learning_rate": 5.638533076614423e-06, "loss": 0.3234, "step": 10740 }, { "epoch": 1.540149125322627, "grad_norm": 0.27360522747039795, "learning_rate": 5.637705655425786e-06, "loss": 0.3026, "step": 10741 }, { "epoch": 1.540292515055922, "grad_norm": 0.27404385805130005, "learning_rate": 5.636878216484407e-06, "loss": 0.3162, "step": 10742 }, { "epoch": 1.540435904789217, "grad_norm": 0.26361891627311707, "learning_rate": 5.636050759813316e-06, "loss": 0.308, "step": 10743 }, { "epoch": 1.5405792945225123, "grad_norm": 0.292110800743103, "learning_rate": 5.635223285435551e-06, "loss": 0.3205, "step": 10744 }, { "epoch": 1.5407226842558073, "grad_norm": 0.2787691056728363, "learning_rate": 5.634395793374148e-06, "loss": 0.3021, "step": 10745 }, { "epoch": 1.5408660739891022, "grad_norm": 0.25809574127197266, "learning_rate": 5.63356828365214e-06, "loss": 0.3017, "step": 10746 }, { "epoch": 1.5410094637223974, "grad_norm": 0.27600204944610596, "learning_rate": 5.632740756292566e-06, "loss": 0.2873, "step": 10747 }, { "epoch": 1.5411528534556926, "grad_norm": 0.286260724067688, "learning_rate": 5.631913211318462e-06, "loss": 0.3151, "step": 10748 }, { "epoch": 1.5412962431889876, "grad_norm": 0.30126670002937317, "learning_rate": 5.631085648752867e-06, "loss": 0.3128, "step": 10749 }, { "epoch": 1.5414396329222828, "grad_norm": 0.3030562400817871, "learning_rate": 5.630258068618821e-06, "loss": 0.2943, "step": 10750 }, { "epoch": 1.541583022655578, "grad_norm": 0.2829354703426361, "learning_rate": 5.629430470939357e-06, "loss": 0.3044, "step": 10751 }, { "epoch": 1.541726412388873, "grad_norm": 0.2729699909687042, "learning_rate": 5.628602855737517e-06, "loss": 0.292, "step": 10752 }, { "epoch": 1.541869802122168, "grad_norm": 0.28619781136512756, "learning_rate": 5.627775223036342e-06, "loss": 0.3198, "step": 10753 }, { "epoch": 1.5420131918554632, "grad_norm": 0.2891788184642792, "learning_rate": 5.62694757285887e-06, "loss": 0.3313, "step": 10754 }, { "epoch": 1.5421565815887583, "grad_norm": 0.28715723752975464, "learning_rate": 5.62611990522814e-06, "loss": 0.3102, "step": 10755 }, { "epoch": 1.5422999713220533, "grad_norm": 0.28291022777557373, "learning_rate": 5.625292220167197e-06, "loss": 0.3156, "step": 10756 }, { "epoch": 1.5424433610553483, "grad_norm": 0.24992559850215912, "learning_rate": 5.62446451769908e-06, "loss": 0.3069, "step": 10757 }, { "epoch": 1.5425867507886435, "grad_norm": 0.2982621192932129, "learning_rate": 5.623636797846831e-06, "loss": 0.3101, "step": 10758 }, { "epoch": 1.5427301405219387, "grad_norm": 0.29449570178985596, "learning_rate": 5.622809060633494e-06, "loss": 0.3031, "step": 10759 }, { "epoch": 1.5428735302552337, "grad_norm": 0.2775757312774658, "learning_rate": 5.621981306082109e-06, "loss": 0.3124, "step": 10760 }, { "epoch": 1.5430169199885289, "grad_norm": 0.28756555914878845, "learning_rate": 5.621153534215723e-06, "loss": 0.2978, "step": 10761 }, { "epoch": 1.543160309721824, "grad_norm": 0.2857030928134918, "learning_rate": 5.620325745057378e-06, "loss": 0.2888, "step": 10762 }, { "epoch": 1.543303699455119, "grad_norm": 0.27778321504592896, "learning_rate": 5.619497938630117e-06, "loss": 0.299, "step": 10763 }, { "epoch": 1.543447089188414, "grad_norm": 0.28624722361564636, "learning_rate": 5.618670114956989e-06, "loss": 0.3038, "step": 10764 }, { "epoch": 1.5435904789217092, "grad_norm": 0.2641546428203583, "learning_rate": 5.617842274061034e-06, "loss": 0.2974, "step": 10765 }, { "epoch": 1.5437338686550044, "grad_norm": 0.2954287528991699, "learning_rate": 5.617014415965302e-06, "loss": 0.3185, "step": 10766 }, { "epoch": 1.5438772583882994, "grad_norm": 0.29168903827667236, "learning_rate": 5.616186540692836e-06, "loss": 0.3292, "step": 10767 }, { "epoch": 1.5440206481215943, "grad_norm": 0.267287939786911, "learning_rate": 5.615358648266685e-06, "loss": 0.3128, "step": 10768 }, { "epoch": 1.5441640378548895, "grad_norm": 0.27801916003227234, "learning_rate": 5.614530738709898e-06, "loss": 0.3167, "step": 10769 }, { "epoch": 1.5443074275881847, "grad_norm": 0.27630695700645447, "learning_rate": 5.613702812045519e-06, "loss": 0.2926, "step": 10770 }, { "epoch": 1.5444508173214797, "grad_norm": 0.27180230617523193, "learning_rate": 5.612874868296599e-06, "loss": 0.2949, "step": 10771 }, { "epoch": 1.544594207054775, "grad_norm": 0.29283764958381653, "learning_rate": 5.612046907486185e-06, "loss": 0.2975, "step": 10772 }, { "epoch": 1.54473759678807, "grad_norm": 0.27313515543937683, "learning_rate": 5.6112189296373255e-06, "loss": 0.2923, "step": 10773 }, { "epoch": 1.544880986521365, "grad_norm": 0.27948006987571716, "learning_rate": 5.610390934773071e-06, "loss": 0.3139, "step": 10774 }, { "epoch": 1.54502437625466, "grad_norm": 0.26792675256729126, "learning_rate": 5.609562922916473e-06, "loss": 0.2948, "step": 10775 }, { "epoch": 1.5451677659879552, "grad_norm": 0.3051839768886566, "learning_rate": 5.608734894090579e-06, "loss": 0.3148, "step": 10776 }, { "epoch": 1.5453111557212504, "grad_norm": 0.30167779326438904, "learning_rate": 5.607906848318445e-06, "loss": 0.2997, "step": 10777 }, { "epoch": 1.5454545454545454, "grad_norm": 0.28174978494644165, "learning_rate": 5.607078785623118e-06, "loss": 0.2935, "step": 10778 }, { "epoch": 1.5455979351878404, "grad_norm": 0.27544814348220825, "learning_rate": 5.606250706027652e-06, "loss": 0.2876, "step": 10779 }, { "epoch": 1.5457413249211358, "grad_norm": 0.2762960195541382, "learning_rate": 5.605422609555098e-06, "loss": 0.2951, "step": 10780 }, { "epoch": 1.5458847146544308, "grad_norm": 0.29444512724876404, "learning_rate": 5.60459449622851e-06, "loss": 0.3207, "step": 10781 }, { "epoch": 1.5460281043877258, "grad_norm": 0.286054790019989, "learning_rate": 5.603766366070942e-06, "loss": 0.3045, "step": 10782 }, { "epoch": 1.546171494121021, "grad_norm": 0.2877815067768097, "learning_rate": 5.602938219105447e-06, "loss": 0.315, "step": 10783 }, { "epoch": 1.5463148838543161, "grad_norm": 0.28378280997276306, "learning_rate": 5.602110055355078e-06, "loss": 0.2995, "step": 10784 }, { "epoch": 1.5464582735876111, "grad_norm": 0.2699953317642212, "learning_rate": 5.601281874842892e-06, "loss": 0.3166, "step": 10785 }, { "epoch": 1.546601663320906, "grad_norm": 0.278766006231308, "learning_rate": 5.600453677591945e-06, "loss": 0.3245, "step": 10786 }, { "epoch": 1.5467450530542013, "grad_norm": 0.2761225402355194, "learning_rate": 5.59962546362529e-06, "loss": 0.307, "step": 10787 }, { "epoch": 1.5468884427874965, "grad_norm": 0.29314181208610535, "learning_rate": 5.598797232965986e-06, "loss": 0.2988, "step": 10788 }, { "epoch": 1.5470318325207915, "grad_norm": 0.2740495800971985, "learning_rate": 5.597968985637086e-06, "loss": 0.3015, "step": 10789 }, { "epoch": 1.5471752222540867, "grad_norm": 0.2908056974411011, "learning_rate": 5.597140721661649e-06, "loss": 0.3232, "step": 10790 }, { "epoch": 1.5473186119873819, "grad_norm": 0.280517578125, "learning_rate": 5.596312441062735e-06, "loss": 0.324, "step": 10791 }, { "epoch": 1.5474620017206768, "grad_norm": 0.2797052562236786, "learning_rate": 5.595484143863399e-06, "loss": 0.2921, "step": 10792 }, { "epoch": 1.5476053914539718, "grad_norm": 0.29636266827583313, "learning_rate": 5.594655830086701e-06, "loss": 0.3028, "step": 10793 }, { "epoch": 1.547748781187267, "grad_norm": 0.2915203273296356, "learning_rate": 5.593827499755699e-06, "loss": 0.2905, "step": 10794 }, { "epoch": 1.5478921709205622, "grad_norm": 0.2981165945529938, "learning_rate": 5.592999152893451e-06, "loss": 0.3047, "step": 10795 }, { "epoch": 1.5480355606538572, "grad_norm": 0.285543829202652, "learning_rate": 5.592170789523022e-06, "loss": 0.2925, "step": 10796 }, { "epoch": 1.5481789503871521, "grad_norm": 0.2819730043411255, "learning_rate": 5.591342409667467e-06, "loss": 0.291, "step": 10797 }, { "epoch": 1.5483223401204473, "grad_norm": 0.27512291073799133, "learning_rate": 5.59051401334985e-06, "loss": 0.2982, "step": 10798 }, { "epoch": 1.5484657298537425, "grad_norm": 0.3004457354545593, "learning_rate": 5.589685600593232e-06, "loss": 0.3053, "step": 10799 }, { "epoch": 1.5486091195870375, "grad_norm": 0.26888230443000793, "learning_rate": 5.588857171420672e-06, "loss": 0.3007, "step": 10800 }, { "epoch": 1.5487525093203327, "grad_norm": 0.27007654309272766, "learning_rate": 5.588028725855234e-06, "loss": 0.3038, "step": 10801 }, { "epoch": 1.548895899053628, "grad_norm": 0.2715979218482971, "learning_rate": 5.587200263919983e-06, "loss": 0.3122, "step": 10802 }, { "epoch": 1.5490392887869229, "grad_norm": 0.2771078944206238, "learning_rate": 5.5863717856379785e-06, "loss": 0.3063, "step": 10803 }, { "epoch": 1.5491826785202178, "grad_norm": 0.2836255431175232, "learning_rate": 5.585543291032286e-06, "loss": 0.2986, "step": 10804 }, { "epoch": 1.549326068253513, "grad_norm": 0.28585782647132874, "learning_rate": 5.584714780125969e-06, "loss": 0.3105, "step": 10805 }, { "epoch": 1.5494694579868082, "grad_norm": 0.2704434394836426, "learning_rate": 5.583886252942091e-06, "loss": 0.3104, "step": 10806 }, { "epoch": 1.5496128477201032, "grad_norm": 0.27172911167144775, "learning_rate": 5.58305770950372e-06, "loss": 0.3027, "step": 10807 }, { "epoch": 1.5497562374533982, "grad_norm": 0.2885196805000305, "learning_rate": 5.582229149833918e-06, "loss": 0.335, "step": 10808 }, { "epoch": 1.5498996271866934, "grad_norm": 0.2532136142253876, "learning_rate": 5.581400573955752e-06, "loss": 0.2877, "step": 10809 }, { "epoch": 1.5500430169199886, "grad_norm": 0.288032203912735, "learning_rate": 5.5805719818922885e-06, "loss": 0.3255, "step": 10810 }, { "epoch": 1.5501864066532836, "grad_norm": 0.27033278346061707, "learning_rate": 5.579743373666595e-06, "loss": 0.2842, "step": 10811 }, { "epoch": 1.5503297963865788, "grad_norm": 0.28053319454193115, "learning_rate": 5.578914749301734e-06, "loss": 0.2999, "step": 10812 }, { "epoch": 1.550473186119874, "grad_norm": 0.28289908170700073, "learning_rate": 5.578086108820782e-06, "loss": 0.2895, "step": 10813 }, { "epoch": 1.550616575853169, "grad_norm": 0.2784622609615326, "learning_rate": 5.577257452246799e-06, "loss": 0.3182, "step": 10814 }, { "epoch": 1.550759965586464, "grad_norm": 0.2667880058288574, "learning_rate": 5.576428779602858e-06, "loss": 0.3077, "step": 10815 }, { "epoch": 1.550903355319759, "grad_norm": 0.2699058949947357, "learning_rate": 5.5756000909120255e-06, "loss": 0.2994, "step": 10816 }, { "epoch": 1.5510467450530543, "grad_norm": 0.28758007287979126, "learning_rate": 5.574771386197373e-06, "loss": 0.302, "step": 10817 }, { "epoch": 1.5511901347863493, "grad_norm": 0.2614811956882477, "learning_rate": 5.573942665481969e-06, "loss": 0.3189, "step": 10818 }, { "epoch": 1.5513335245196442, "grad_norm": 0.2745198607444763, "learning_rate": 5.573113928788884e-06, "loss": 0.3274, "step": 10819 }, { "epoch": 1.5514769142529397, "grad_norm": 0.29507169127464294, "learning_rate": 5.572285176141188e-06, "loss": 0.2844, "step": 10820 }, { "epoch": 1.5516203039862346, "grad_norm": 0.28188663721084595, "learning_rate": 5.571456407561953e-06, "loss": 0.2926, "step": 10821 }, { "epoch": 1.5517636937195296, "grad_norm": 0.28793036937713623, "learning_rate": 5.5706276230742505e-06, "loss": 0.3207, "step": 10822 }, { "epoch": 1.5519070834528248, "grad_norm": 0.27503007650375366, "learning_rate": 5.569798822701154e-06, "loss": 0.2993, "step": 10823 }, { "epoch": 1.55205047318612, "grad_norm": 0.28117460012435913, "learning_rate": 5.568970006465734e-06, "loss": 0.3148, "step": 10824 }, { "epoch": 1.552193862919415, "grad_norm": 0.2762623429298401, "learning_rate": 5.568141174391064e-06, "loss": 0.3062, "step": 10825 }, { "epoch": 1.55233725265271, "grad_norm": 0.26952460408210754, "learning_rate": 5.56731232650022e-06, "loss": 0.294, "step": 10826 }, { "epoch": 1.5524806423860051, "grad_norm": 0.259937047958374, "learning_rate": 5.566483462816271e-06, "loss": 0.3053, "step": 10827 }, { "epoch": 1.5526240321193003, "grad_norm": 0.2763090431690216, "learning_rate": 5.565654583362295e-06, "loss": 0.3203, "step": 10828 }, { "epoch": 1.5527674218525953, "grad_norm": 0.2690390646457672, "learning_rate": 5.564825688161364e-06, "loss": 0.307, "step": 10829 }, { "epoch": 1.5529108115858905, "grad_norm": 0.2729184031486511, "learning_rate": 5.563996777236555e-06, "loss": 0.2923, "step": 10830 }, { "epoch": 1.5530542013191857, "grad_norm": 0.27691444754600525, "learning_rate": 5.563167850610946e-06, "loss": 0.3082, "step": 10831 }, { "epoch": 1.5531975910524807, "grad_norm": 0.27969300746917725, "learning_rate": 5.562338908307607e-06, "loss": 0.3203, "step": 10832 }, { "epoch": 1.5533409807857756, "grad_norm": 0.28051117062568665, "learning_rate": 5.5615099503496195e-06, "loss": 0.3169, "step": 10833 }, { "epoch": 1.5534843705190708, "grad_norm": 0.3024868965148926, "learning_rate": 5.560680976760059e-06, "loss": 0.3128, "step": 10834 }, { "epoch": 1.553627760252366, "grad_norm": 0.2672401964664459, "learning_rate": 5.559851987562002e-06, "loss": 0.2902, "step": 10835 }, { "epoch": 1.553771149985661, "grad_norm": 0.28214678168296814, "learning_rate": 5.559022982778526e-06, "loss": 0.3188, "step": 10836 }, { "epoch": 1.553914539718956, "grad_norm": 0.2731657922267914, "learning_rate": 5.558193962432712e-06, "loss": 0.3298, "step": 10837 }, { "epoch": 1.5540579294522512, "grad_norm": 0.26256072521209717, "learning_rate": 5.557364926547636e-06, "loss": 0.2905, "step": 10838 }, { "epoch": 1.5542013191855464, "grad_norm": 0.2791909873485565, "learning_rate": 5.556535875146377e-06, "loss": 0.3036, "step": 10839 }, { "epoch": 1.5543447089188414, "grad_norm": 0.28114789724349976, "learning_rate": 5.555706808252016e-06, "loss": 0.2904, "step": 10840 }, { "epoch": 1.5544880986521366, "grad_norm": 0.27929818630218506, "learning_rate": 5.554877725887632e-06, "loss": 0.3027, "step": 10841 }, { "epoch": 1.5546314883854317, "grad_norm": 0.2674924433231354, "learning_rate": 5.554048628076306e-06, "loss": 0.3201, "step": 10842 }, { "epoch": 1.5547748781187267, "grad_norm": 0.2916492819786072, "learning_rate": 5.553219514841118e-06, "loss": 0.3076, "step": 10843 }, { "epoch": 1.5549182678520217, "grad_norm": 0.28144213557243347, "learning_rate": 5.552390386205151e-06, "loss": 0.3248, "step": 10844 }, { "epoch": 1.555061657585317, "grad_norm": 0.2684120535850525, "learning_rate": 5.551561242191485e-06, "loss": 0.3035, "step": 10845 }, { "epoch": 1.555205047318612, "grad_norm": 0.30212944746017456, "learning_rate": 5.550732082823201e-06, "loss": 0.3159, "step": 10846 }, { "epoch": 1.555348437051907, "grad_norm": 0.2734038829803467, "learning_rate": 5.549902908123385e-06, "loss": 0.2948, "step": 10847 }, { "epoch": 1.555491826785202, "grad_norm": 0.2824833393096924, "learning_rate": 5.549073718115116e-06, "loss": 0.308, "step": 10848 }, { "epoch": 1.5556352165184972, "grad_norm": 0.28555628657341003, "learning_rate": 5.54824451282148e-06, "loss": 0.3067, "step": 10849 }, { "epoch": 1.5557786062517924, "grad_norm": 0.2782444953918457, "learning_rate": 5.547415292265561e-06, "loss": 0.3106, "step": 10850 }, { "epoch": 1.5559219959850874, "grad_norm": 0.26850053668022156, "learning_rate": 5.546586056470441e-06, "loss": 0.3113, "step": 10851 }, { "epoch": 1.5560653857183826, "grad_norm": 0.28189074993133545, "learning_rate": 5.545756805459207e-06, "loss": 0.289, "step": 10852 }, { "epoch": 1.5562087754516778, "grad_norm": 0.2794930040836334, "learning_rate": 5.544927539254944e-06, "loss": 0.3112, "step": 10853 }, { "epoch": 1.5563521651849728, "grad_norm": 0.25971415638923645, "learning_rate": 5.544098257880735e-06, "loss": 0.3115, "step": 10854 }, { "epoch": 1.5564955549182677, "grad_norm": 0.2955414652824402, "learning_rate": 5.543268961359667e-06, "loss": 0.3385, "step": 10855 }, { "epoch": 1.556638944651563, "grad_norm": 0.2617688477039337, "learning_rate": 5.542439649714828e-06, "loss": 0.3113, "step": 10856 }, { "epoch": 1.5567823343848581, "grad_norm": 0.28591659665107727, "learning_rate": 5.541610322969302e-06, "loss": 0.3111, "step": 10857 }, { "epoch": 1.556925724118153, "grad_norm": 0.2738703489303589, "learning_rate": 5.54078098114618e-06, "loss": 0.306, "step": 10858 }, { "epoch": 1.557069113851448, "grad_norm": 0.29973170161247253, "learning_rate": 5.539951624268547e-06, "loss": 0.3047, "step": 10859 }, { "epoch": 1.5572125035847433, "grad_norm": 0.29142194986343384, "learning_rate": 5.53912225235949e-06, "loss": 0.304, "step": 10860 }, { "epoch": 1.5573558933180385, "grad_norm": 0.27878478169441223, "learning_rate": 5.538292865442099e-06, "loss": 0.2992, "step": 10861 }, { "epoch": 1.5574992830513334, "grad_norm": 0.30842918157577515, "learning_rate": 5.537463463539464e-06, "loss": 0.2991, "step": 10862 }, { "epoch": 1.5576426727846286, "grad_norm": 0.29401299357414246, "learning_rate": 5.536634046674672e-06, "loss": 0.3012, "step": 10863 }, { "epoch": 1.5577860625179238, "grad_norm": 0.27702465653419495, "learning_rate": 5.535804614870815e-06, "loss": 0.2802, "step": 10864 }, { "epoch": 1.5579294522512188, "grad_norm": 0.2823193371295929, "learning_rate": 5.53497516815098e-06, "loss": 0.3155, "step": 10865 }, { "epoch": 1.5580728419845138, "grad_norm": 0.31012919545173645, "learning_rate": 5.5341457065382594e-06, "loss": 0.2943, "step": 10866 }, { "epoch": 1.558216231717809, "grad_norm": 0.2918863892555237, "learning_rate": 5.533316230055744e-06, "loss": 0.288, "step": 10867 }, { "epoch": 1.5583596214511042, "grad_norm": 0.281221479177475, "learning_rate": 5.5324867387265255e-06, "loss": 0.3269, "step": 10868 }, { "epoch": 1.5585030111843992, "grad_norm": 0.2752634584903717, "learning_rate": 5.531657232573696e-06, "loss": 0.3094, "step": 10869 }, { "epoch": 1.5586464009176941, "grad_norm": 0.28687193989753723, "learning_rate": 5.5308277116203465e-06, "loss": 0.2857, "step": 10870 }, { "epoch": 1.5587897906509895, "grad_norm": 0.2862531244754791, "learning_rate": 5.5299981758895695e-06, "loss": 0.3083, "step": 10871 }, { "epoch": 1.5589331803842845, "grad_norm": 0.2886158227920532, "learning_rate": 5.5291686254044594e-06, "loss": 0.3176, "step": 10872 }, { "epoch": 1.5590765701175795, "grad_norm": 0.27736619114875793, "learning_rate": 5.528339060188108e-06, "loss": 0.3008, "step": 10873 }, { "epoch": 1.5592199598508747, "grad_norm": 0.2675442695617676, "learning_rate": 5.527509480263611e-06, "loss": 0.2883, "step": 10874 }, { "epoch": 1.5593633495841699, "grad_norm": 0.26664066314697266, "learning_rate": 5.526679885654061e-06, "loss": 0.2984, "step": 10875 }, { "epoch": 1.5595067393174649, "grad_norm": 0.2736108899116516, "learning_rate": 5.525850276382552e-06, "loss": 0.3112, "step": 10876 }, { "epoch": 1.5596501290507598, "grad_norm": 0.26715004444122314, "learning_rate": 5.5250206524721815e-06, "loss": 0.3082, "step": 10877 }, { "epoch": 1.559793518784055, "grad_norm": 0.30070483684539795, "learning_rate": 5.524191013946044e-06, "loss": 0.3138, "step": 10878 }, { "epoch": 1.5599369085173502, "grad_norm": 0.270510196685791, "learning_rate": 5.523361360827234e-06, "loss": 0.301, "step": 10879 }, { "epoch": 1.5600802982506452, "grad_norm": 0.2994433343410492, "learning_rate": 5.52253169313885e-06, "loss": 0.3053, "step": 10880 }, { "epoch": 1.5602236879839404, "grad_norm": 0.29539036750793457, "learning_rate": 5.521702010903987e-06, "loss": 0.3265, "step": 10881 }, { "epoch": 1.5603670777172356, "grad_norm": 0.30924463272094727, "learning_rate": 5.520872314145741e-06, "loss": 0.3075, "step": 10882 }, { "epoch": 1.5605104674505306, "grad_norm": 0.31022873520851135, "learning_rate": 5.5200426028872135e-06, "loss": 0.321, "step": 10883 }, { "epoch": 1.5606538571838255, "grad_norm": 0.2994551658630371, "learning_rate": 5.519212877151498e-06, "loss": 0.3086, "step": 10884 }, { "epoch": 1.5607972469171207, "grad_norm": 0.2686080038547516, "learning_rate": 5.518383136961696e-06, "loss": 0.3118, "step": 10885 }, { "epoch": 1.560940636650416, "grad_norm": 0.28820839524269104, "learning_rate": 5.517553382340904e-06, "loss": 0.3106, "step": 10886 }, { "epoch": 1.561084026383711, "grad_norm": 0.3112561106681824, "learning_rate": 5.5167236133122215e-06, "loss": 0.3066, "step": 10887 }, { "epoch": 1.5612274161170059, "grad_norm": 0.25844186544418335, "learning_rate": 5.51589382989875e-06, "loss": 0.3069, "step": 10888 }, { "epoch": 1.561370805850301, "grad_norm": 0.27747178077697754, "learning_rate": 5.515064032123587e-06, "loss": 0.3089, "step": 10889 }, { "epoch": 1.5615141955835963, "grad_norm": 0.2861360013484955, "learning_rate": 5.514234220009833e-06, "loss": 0.2785, "step": 10890 }, { "epoch": 1.5616575853168913, "grad_norm": 0.2687772810459137, "learning_rate": 5.513404393580592e-06, "loss": 0.2967, "step": 10891 }, { "epoch": 1.5618009750501864, "grad_norm": 0.3021532893180847, "learning_rate": 5.5125745528589596e-06, "loss": 0.3174, "step": 10892 }, { "epoch": 1.5619443647834816, "grad_norm": 0.3145824074745178, "learning_rate": 5.51174469786804e-06, "loss": 0.3313, "step": 10893 }, { "epoch": 1.5620877545167766, "grad_norm": 0.29071325063705444, "learning_rate": 5.510914828630936e-06, "loss": 0.3174, "step": 10894 }, { "epoch": 1.5622311442500716, "grad_norm": 0.2801872789859772, "learning_rate": 5.510084945170749e-06, "loss": 0.3135, "step": 10895 }, { "epoch": 1.5623745339833668, "grad_norm": 0.28201842308044434, "learning_rate": 5.509255047510583e-06, "loss": 0.3034, "step": 10896 }, { "epoch": 1.562517923716662, "grad_norm": 0.280356764793396, "learning_rate": 5.508425135673539e-06, "loss": 0.3032, "step": 10897 }, { "epoch": 1.562661313449957, "grad_norm": 0.2958986759185791, "learning_rate": 5.5075952096827216e-06, "loss": 0.2941, "step": 10898 }, { "epoch": 1.562804703183252, "grad_norm": 0.2868100702762604, "learning_rate": 5.506765269561235e-06, "loss": 0.2955, "step": 10899 }, { "epoch": 1.5629480929165471, "grad_norm": 0.2679787874221802, "learning_rate": 5.505935315332181e-06, "loss": 0.2926, "step": 10900 }, { "epoch": 1.5630914826498423, "grad_norm": 0.265066534280777, "learning_rate": 5.505105347018668e-06, "loss": 0.315, "step": 10901 }, { "epoch": 1.5632348723831373, "grad_norm": 0.2793273329734802, "learning_rate": 5.5042753646437975e-06, "loss": 0.3167, "step": 10902 }, { "epoch": 1.5633782621164325, "grad_norm": 0.2948303818702698, "learning_rate": 5.503445368230677e-06, "loss": 0.294, "step": 10903 }, { "epoch": 1.5635216518497277, "grad_norm": 0.2829528748989105, "learning_rate": 5.502615357802413e-06, "loss": 0.3133, "step": 10904 }, { "epoch": 1.5636650415830227, "grad_norm": 0.29698413610458374, "learning_rate": 5.501785333382109e-06, "loss": 0.3243, "step": 10905 }, { "epoch": 1.5638084313163176, "grad_norm": 0.2886514961719513, "learning_rate": 5.500955294992874e-06, "loss": 0.332, "step": 10906 }, { "epoch": 1.5639518210496128, "grad_norm": 0.2804073989391327, "learning_rate": 5.500125242657816e-06, "loss": 0.3004, "step": 10907 }, { "epoch": 1.564095210782908, "grad_norm": 0.2676175534725189, "learning_rate": 5.499295176400038e-06, "loss": 0.2833, "step": 10908 }, { "epoch": 1.564238600516203, "grad_norm": 0.2681015729904175, "learning_rate": 5.498465096242651e-06, "loss": 0.2937, "step": 10909 }, { "epoch": 1.564381990249498, "grad_norm": 0.2938169538974762, "learning_rate": 5.497635002208763e-06, "loss": 0.3159, "step": 10910 }, { "epoch": 1.5645253799827934, "grad_norm": 0.2906375527381897, "learning_rate": 5.496804894321481e-06, "loss": 0.2989, "step": 10911 }, { "epoch": 1.5646687697160884, "grad_norm": 0.26844894886016846, "learning_rate": 5.495974772603916e-06, "loss": 0.2883, "step": 10912 }, { "epoch": 1.5648121594493833, "grad_norm": 0.27193838357925415, "learning_rate": 5.495144637079176e-06, "loss": 0.2838, "step": 10913 }, { "epoch": 1.5649555491826785, "grad_norm": 0.28253957629203796, "learning_rate": 5.49431448777037e-06, "loss": 0.3089, "step": 10914 }, { "epoch": 1.5650989389159737, "grad_norm": 0.27442601323127747, "learning_rate": 5.493484324700611e-06, "loss": 0.309, "step": 10915 }, { "epoch": 1.5652423286492687, "grad_norm": 0.27956461906433105, "learning_rate": 5.492654147893006e-06, "loss": 0.3035, "step": 10916 }, { "epoch": 1.5653857183825637, "grad_norm": 0.268474817276001, "learning_rate": 5.491823957370668e-06, "loss": 0.3063, "step": 10917 }, { "epoch": 1.5655291081158589, "grad_norm": 0.2829592823982239, "learning_rate": 5.490993753156708e-06, "loss": 0.3006, "step": 10918 }, { "epoch": 1.565672497849154, "grad_norm": 0.29574793577194214, "learning_rate": 5.490163535274237e-06, "loss": 0.31, "step": 10919 }, { "epoch": 1.565815887582449, "grad_norm": 0.2767406404018402, "learning_rate": 5.489333303746367e-06, "loss": 0.3042, "step": 10920 }, { "epoch": 1.5659592773157442, "grad_norm": 0.26525065302848816, "learning_rate": 5.4885030585962095e-06, "loss": 0.3197, "step": 10921 }, { "epoch": 1.5661026670490394, "grad_norm": 0.2875279188156128, "learning_rate": 5.487672799846879e-06, "loss": 0.3086, "step": 10922 }, { "epoch": 1.5662460567823344, "grad_norm": 0.2907387316226959, "learning_rate": 5.486842527521489e-06, "loss": 0.3056, "step": 10923 }, { "epoch": 1.5663894465156294, "grad_norm": 0.2846996784210205, "learning_rate": 5.486012241643151e-06, "loss": 0.2878, "step": 10924 }, { "epoch": 1.5665328362489246, "grad_norm": 0.2844243049621582, "learning_rate": 5.485181942234979e-06, "loss": 0.3185, "step": 10925 }, { "epoch": 1.5666762259822198, "grad_norm": 0.3034854233264923, "learning_rate": 5.4843516293200905e-06, "loss": 0.2891, "step": 10926 }, { "epoch": 1.5668196157155148, "grad_norm": 0.27326104044914246, "learning_rate": 5.483521302921596e-06, "loss": 0.2968, "step": 10927 }, { "epoch": 1.5669630054488097, "grad_norm": 0.28379133343696594, "learning_rate": 5.482690963062612e-06, "loss": 0.2916, "step": 10928 }, { "epoch": 1.567106395182105, "grad_norm": 0.28613999485969543, "learning_rate": 5.4818606097662545e-06, "loss": 0.2804, "step": 10929 }, { "epoch": 1.5672497849154001, "grad_norm": 0.2954059839248657, "learning_rate": 5.4810302430556385e-06, "loss": 0.3188, "step": 10930 }, { "epoch": 1.567393174648695, "grad_norm": 0.25969207286834717, "learning_rate": 5.480199862953882e-06, "loss": 0.2976, "step": 10931 }, { "epoch": 1.5675365643819903, "grad_norm": 0.2742288410663605, "learning_rate": 5.4793694694840985e-06, "loss": 0.3126, "step": 10932 }, { "epoch": 1.5676799541152855, "grad_norm": 0.30267903208732605, "learning_rate": 5.4785390626694065e-06, "loss": 0.3155, "step": 10933 }, { "epoch": 1.5678233438485805, "grad_norm": 0.2877718210220337, "learning_rate": 5.477708642532924e-06, "loss": 0.3121, "step": 10934 }, { "epoch": 1.5679667335818754, "grad_norm": 0.2652618885040283, "learning_rate": 5.476878209097767e-06, "loss": 0.2976, "step": 10935 }, { "epoch": 1.5681101233151706, "grad_norm": 0.27755266427993774, "learning_rate": 5.476047762387054e-06, "loss": 0.3042, "step": 10936 }, { "epoch": 1.5682535130484658, "grad_norm": 0.27560654282569885, "learning_rate": 5.475217302423905e-06, "loss": 0.2973, "step": 10937 }, { "epoch": 1.5683969027817608, "grad_norm": 0.28045907616615295, "learning_rate": 5.4743868292314365e-06, "loss": 0.2915, "step": 10938 }, { "epoch": 1.5685402925150558, "grad_norm": 0.29103147983551025, "learning_rate": 5.473556342832768e-06, "loss": 0.2964, "step": 10939 }, { "epoch": 1.568683682248351, "grad_norm": 0.2958067059516907, "learning_rate": 5.472725843251019e-06, "loss": 0.2987, "step": 10940 }, { "epoch": 1.5688270719816462, "grad_norm": 0.2785238027572632, "learning_rate": 5.471895330509309e-06, "loss": 0.303, "step": 10941 }, { "epoch": 1.5689704617149411, "grad_norm": 0.2766047418117523, "learning_rate": 5.4710648046307615e-06, "loss": 0.2901, "step": 10942 }, { "epoch": 1.5691138514482363, "grad_norm": 0.27225902676582336, "learning_rate": 5.470234265638493e-06, "loss": 0.2899, "step": 10943 }, { "epoch": 1.5692572411815315, "grad_norm": 0.270171582698822, "learning_rate": 5.4694037135556256e-06, "loss": 0.3068, "step": 10944 }, { "epoch": 1.5694006309148265, "grad_norm": 0.2937600612640381, "learning_rate": 5.4685731484052824e-06, "loss": 0.3011, "step": 10945 }, { "epoch": 1.5695440206481215, "grad_norm": 0.2967345118522644, "learning_rate": 5.467742570210583e-06, "loss": 0.3193, "step": 10946 }, { "epoch": 1.5696874103814167, "grad_norm": 0.283065527677536, "learning_rate": 5.466911978994649e-06, "loss": 0.3203, "step": 10947 }, { "epoch": 1.5698308001147119, "grad_norm": 0.26129886507987976, "learning_rate": 5.466081374780604e-06, "loss": 0.3035, "step": 10948 }, { "epoch": 1.5699741898480069, "grad_norm": 0.26728183031082153, "learning_rate": 5.46525075759157e-06, "loss": 0.3142, "step": 10949 }, { "epoch": 1.5701175795813018, "grad_norm": 0.30616241693496704, "learning_rate": 5.464420127450673e-06, "loss": 0.2994, "step": 10950 }, { "epoch": 1.570260969314597, "grad_norm": 0.2853200435638428, "learning_rate": 5.463589484381031e-06, "loss": 0.327, "step": 10951 }, { "epoch": 1.5704043590478922, "grad_norm": 0.2594764232635498, "learning_rate": 5.4627588284057734e-06, "loss": 0.3018, "step": 10952 }, { "epoch": 1.5705477487811872, "grad_norm": 0.2804476320743561, "learning_rate": 5.461928159548021e-06, "loss": 0.3247, "step": 10953 }, { "epoch": 1.5706911385144824, "grad_norm": 0.2933101952075958, "learning_rate": 5.461097477830899e-06, "loss": 0.3248, "step": 10954 }, { "epoch": 1.5708345282477776, "grad_norm": 0.26826274394989014, "learning_rate": 5.4602667832775316e-06, "loss": 0.3133, "step": 10955 }, { "epoch": 1.5709779179810726, "grad_norm": 0.29355356097221375, "learning_rate": 5.459436075911047e-06, "loss": 0.2972, "step": 10956 }, { "epoch": 1.5711213077143675, "grad_norm": 0.28364303708076477, "learning_rate": 5.458605355754566e-06, "loss": 0.3114, "step": 10957 }, { "epoch": 1.5712646974476627, "grad_norm": 0.28996601700782776, "learning_rate": 5.45777462283122e-06, "loss": 0.3326, "step": 10958 }, { "epoch": 1.571408087180958, "grad_norm": 0.2766745388507843, "learning_rate": 5.456943877164131e-06, "loss": 0.2966, "step": 10959 }, { "epoch": 1.571551476914253, "grad_norm": 0.2731979489326477, "learning_rate": 5.456113118776428e-06, "loss": 0.2946, "step": 10960 }, { "epoch": 1.5716948666475479, "grad_norm": 0.28859448432922363, "learning_rate": 5.455282347691239e-06, "loss": 0.315, "step": 10961 }, { "epoch": 1.5718382563808433, "grad_norm": 0.26964086294174194, "learning_rate": 5.454451563931688e-06, "loss": 0.3057, "step": 10962 }, { "epoch": 1.5719816461141383, "grad_norm": 0.2652161419391632, "learning_rate": 5.453620767520904e-06, "loss": 0.2917, "step": 10963 }, { "epoch": 1.5721250358474332, "grad_norm": 0.27781298756599426, "learning_rate": 5.4527899584820176e-06, "loss": 0.3056, "step": 10964 }, { "epoch": 1.5722684255807284, "grad_norm": 0.27720287442207336, "learning_rate": 5.451959136838154e-06, "loss": 0.3004, "step": 10965 }, { "epoch": 1.5724118153140236, "grad_norm": 0.26961854100227356, "learning_rate": 5.451128302612443e-06, "loss": 0.3041, "step": 10966 }, { "epoch": 1.5725552050473186, "grad_norm": 0.26557299494743347, "learning_rate": 5.4502974558280144e-06, "loss": 0.3101, "step": 10967 }, { "epoch": 1.5726985947806136, "grad_norm": 0.28986942768096924, "learning_rate": 5.4494665965079975e-06, "loss": 0.3039, "step": 10968 }, { "epoch": 1.5728419845139088, "grad_norm": 0.27274540066719055, "learning_rate": 5.448635724675522e-06, "loss": 0.3079, "step": 10969 }, { "epoch": 1.572985374247204, "grad_norm": 0.2674765884876251, "learning_rate": 5.447804840353718e-06, "loss": 0.3151, "step": 10970 }, { "epoch": 1.573128763980499, "grad_norm": 0.24374833703041077, "learning_rate": 5.446973943565715e-06, "loss": 0.2951, "step": 10971 }, { "epoch": 1.5732721537137941, "grad_norm": 0.2806694805622101, "learning_rate": 5.446143034334648e-06, "loss": 0.3107, "step": 10972 }, { "epoch": 1.5734155434470893, "grad_norm": 0.2966693639755249, "learning_rate": 5.445312112683642e-06, "loss": 0.3065, "step": 10973 }, { "epoch": 1.5735589331803843, "grad_norm": 0.27805423736572266, "learning_rate": 5.444481178635833e-06, "loss": 0.3006, "step": 10974 }, { "epoch": 1.5737023229136793, "grad_norm": 0.2759474217891693, "learning_rate": 5.443650232214352e-06, "loss": 0.3081, "step": 10975 }, { "epoch": 1.5738457126469745, "grad_norm": 0.2643778622150421, "learning_rate": 5.4428192734423305e-06, "loss": 0.3174, "step": 10976 }, { "epoch": 1.5739891023802697, "grad_norm": 0.29042986035346985, "learning_rate": 5.4419883023429025e-06, "loss": 0.3143, "step": 10977 }, { "epoch": 1.5741324921135647, "grad_norm": 0.27897128462791443, "learning_rate": 5.441157318939199e-06, "loss": 0.2979, "step": 10978 }, { "epoch": 1.5742758818468596, "grad_norm": 0.2785356938838959, "learning_rate": 5.440326323254355e-06, "loss": 0.3067, "step": 10979 }, { "epoch": 1.5744192715801548, "grad_norm": 0.2799576222896576, "learning_rate": 5.4394953153115025e-06, "loss": 0.3054, "step": 10980 }, { "epoch": 1.57456266131345, "grad_norm": 0.28220334649086, "learning_rate": 5.438664295133778e-06, "loss": 0.3034, "step": 10981 }, { "epoch": 1.574706051046745, "grad_norm": 0.26708805561065674, "learning_rate": 5.437833262744313e-06, "loss": 0.322, "step": 10982 }, { "epoch": 1.5748494407800402, "grad_norm": 0.26715049147605896, "learning_rate": 5.4370022181662455e-06, "loss": 0.3005, "step": 10983 }, { "epoch": 1.5749928305133354, "grad_norm": 0.2661004066467285, "learning_rate": 5.436171161422706e-06, "loss": 0.3222, "step": 10984 }, { "epoch": 1.5751362202466304, "grad_norm": 0.27538061141967773, "learning_rate": 5.435340092536833e-06, "loss": 0.2841, "step": 10985 }, { "epoch": 1.5752796099799253, "grad_norm": 0.2841825485229492, "learning_rate": 5.434509011531762e-06, "loss": 0.3023, "step": 10986 }, { "epoch": 1.5754229997132205, "grad_norm": 0.2895328998565674, "learning_rate": 5.433677918430628e-06, "loss": 0.3136, "step": 10987 }, { "epoch": 1.5755663894465157, "grad_norm": 0.2627426087856293, "learning_rate": 5.432846813256569e-06, "loss": 0.2936, "step": 10988 }, { "epoch": 1.5757097791798107, "grad_norm": 0.272257536649704, "learning_rate": 5.43201569603272e-06, "loss": 0.2945, "step": 10989 }, { "epoch": 1.5758531689131057, "grad_norm": 0.29429617524147034, "learning_rate": 5.431184566782217e-06, "loss": 0.312, "step": 10990 }, { "epoch": 1.5759965586464009, "grad_norm": 0.28242769837379456, "learning_rate": 5.430353425528202e-06, "loss": 0.2889, "step": 10991 }, { "epoch": 1.576139948379696, "grad_norm": 0.27784210443496704, "learning_rate": 5.4295222722938075e-06, "loss": 0.3076, "step": 10992 }, { "epoch": 1.576283338112991, "grad_norm": 0.28730645775794983, "learning_rate": 5.4286911071021744e-06, "loss": 0.2952, "step": 10993 }, { "epoch": 1.5764267278462862, "grad_norm": 0.27971479296684265, "learning_rate": 5.427859929976439e-06, "loss": 0.3081, "step": 10994 }, { "epoch": 1.5765701175795814, "grad_norm": 0.3105645179748535, "learning_rate": 5.427028740939743e-06, "loss": 0.3146, "step": 10995 }, { "epoch": 1.5767135073128764, "grad_norm": 0.2758674621582031, "learning_rate": 5.4261975400152235e-06, "loss": 0.3109, "step": 10996 }, { "epoch": 1.5768568970461714, "grad_norm": 0.27268898487091064, "learning_rate": 5.425366327226019e-06, "loss": 0.3132, "step": 10997 }, { "epoch": 1.5770002867794666, "grad_norm": 0.2993077039718628, "learning_rate": 5.424535102595271e-06, "loss": 0.3209, "step": 10998 }, { "epoch": 1.5771436765127618, "grad_norm": 0.2836141884326935, "learning_rate": 5.42370386614612e-06, "loss": 0.3202, "step": 10999 }, { "epoch": 1.5772870662460567, "grad_norm": 0.2868611216545105, "learning_rate": 5.422872617901703e-06, "loss": 0.3199, "step": 11000 }, { "epoch": 1.5774304559793517, "grad_norm": 0.2659319341182709, "learning_rate": 5.422041357885164e-06, "loss": 0.3193, "step": 11001 }, { "epoch": 1.5775738457126471, "grad_norm": 0.2900662124156952, "learning_rate": 5.421210086119643e-06, "loss": 0.3341, "step": 11002 }, { "epoch": 1.5777172354459421, "grad_norm": 0.27978894114494324, "learning_rate": 5.42037880262828e-06, "loss": 0.3156, "step": 11003 }, { "epoch": 1.577860625179237, "grad_norm": 0.27816274762153625, "learning_rate": 5.41954750743422e-06, "loss": 0.3006, "step": 11004 }, { "epoch": 1.5780040149125323, "grad_norm": 0.2799372971057892, "learning_rate": 5.418716200560601e-06, "loss": 0.2997, "step": 11005 }, { "epoch": 1.5781474046458275, "grad_norm": 0.27876996994018555, "learning_rate": 5.417884882030566e-06, "loss": 0.2956, "step": 11006 }, { "epoch": 1.5782907943791225, "grad_norm": 0.3129137456417084, "learning_rate": 5.417053551867262e-06, "loss": 0.3081, "step": 11007 }, { "epoch": 1.5784341841124174, "grad_norm": 0.28085392713546753, "learning_rate": 5.4162222100938265e-06, "loss": 0.2944, "step": 11008 }, { "epoch": 1.5785775738457126, "grad_norm": 0.28197839856147766, "learning_rate": 5.415390856733405e-06, "loss": 0.3018, "step": 11009 }, { "epoch": 1.5787209635790078, "grad_norm": 0.28210553526878357, "learning_rate": 5.414559491809141e-06, "loss": 0.3048, "step": 11010 }, { "epoch": 1.5788643533123028, "grad_norm": 0.29688048362731934, "learning_rate": 5.413728115344179e-06, "loss": 0.3148, "step": 11011 }, { "epoch": 1.579007743045598, "grad_norm": 0.2852081060409546, "learning_rate": 5.412896727361663e-06, "loss": 0.293, "step": 11012 }, { "epoch": 1.5791511327788932, "grad_norm": 0.29058247804641724, "learning_rate": 5.412065327884736e-06, "loss": 0.2963, "step": 11013 }, { "epoch": 1.5792945225121882, "grad_norm": 0.2873106002807617, "learning_rate": 5.411233916936545e-06, "loss": 0.3094, "step": 11014 }, { "epoch": 1.5794379122454831, "grad_norm": 0.29674986004829407, "learning_rate": 5.410402494540234e-06, "loss": 0.3086, "step": 11015 }, { "epoch": 1.5795813019787783, "grad_norm": 0.270035058259964, "learning_rate": 5.409571060718949e-06, "loss": 0.3157, "step": 11016 }, { "epoch": 1.5797246917120735, "grad_norm": 0.2737485468387604, "learning_rate": 5.408739615495836e-06, "loss": 0.307, "step": 11017 }, { "epoch": 1.5798680814453685, "grad_norm": 0.26271310448646545, "learning_rate": 5.407908158894042e-06, "loss": 0.2786, "step": 11018 }, { "epoch": 1.5800114711786635, "grad_norm": 0.28610703349113464, "learning_rate": 5.40707669093671e-06, "loss": 0.319, "step": 11019 }, { "epoch": 1.5801548609119587, "grad_norm": 0.2976306974887848, "learning_rate": 5.40624521164699e-06, "loss": 0.3002, "step": 11020 }, { "epoch": 1.5802982506452539, "grad_norm": 0.2652752995491028, "learning_rate": 5.4054137210480285e-06, "loss": 0.2965, "step": 11021 }, { "epoch": 1.5804416403785488, "grad_norm": 0.26102131605148315, "learning_rate": 5.4045822191629715e-06, "loss": 0.3057, "step": 11022 }, { "epoch": 1.580585030111844, "grad_norm": 0.2759573459625244, "learning_rate": 5.40375070601497e-06, "loss": 0.312, "step": 11023 }, { "epoch": 1.5807284198451392, "grad_norm": 0.29554423689842224, "learning_rate": 5.402919181627169e-06, "loss": 0.3007, "step": 11024 }, { "epoch": 1.5808718095784342, "grad_norm": 0.2639051675796509, "learning_rate": 5.402087646022716e-06, "loss": 0.2922, "step": 11025 }, { "epoch": 1.5810151993117292, "grad_norm": 0.27520525455474854, "learning_rate": 5.401256099224764e-06, "loss": 0.3325, "step": 11026 }, { "epoch": 1.5811585890450244, "grad_norm": 0.28095000982284546, "learning_rate": 5.4004245412564595e-06, "loss": 0.3059, "step": 11027 }, { "epoch": 1.5813019787783196, "grad_norm": 0.28323355317115784, "learning_rate": 5.39959297214095e-06, "loss": 0.3134, "step": 11028 }, { "epoch": 1.5814453685116145, "grad_norm": 0.2939389944076538, "learning_rate": 5.398761391901388e-06, "loss": 0.3118, "step": 11029 }, { "epoch": 1.5815887582449095, "grad_norm": 0.2914779484272003, "learning_rate": 5.397929800560921e-06, "loss": 0.3061, "step": 11030 }, { "epoch": 1.5817321479782047, "grad_norm": 0.28293490409851074, "learning_rate": 5.397098198142702e-06, "loss": 0.3283, "step": 11031 }, { "epoch": 1.5818755377115, "grad_norm": 0.24339881539344788, "learning_rate": 5.396266584669879e-06, "loss": 0.2817, "step": 11032 }, { "epoch": 1.5820189274447949, "grad_norm": 0.28508564829826355, "learning_rate": 5.395434960165604e-06, "loss": 0.2902, "step": 11033 }, { "epoch": 1.58216231717809, "grad_norm": 0.27617397904396057, "learning_rate": 5.394603324653029e-06, "loss": 0.3124, "step": 11034 }, { "epoch": 1.5823057069113853, "grad_norm": 0.2739664614200592, "learning_rate": 5.3937716781553025e-06, "loss": 0.3033, "step": 11035 }, { "epoch": 1.5824490966446803, "grad_norm": 0.2684577703475952, "learning_rate": 5.39294002069558e-06, "loss": 0.3164, "step": 11036 }, { "epoch": 1.5825924863779752, "grad_norm": 0.28301143646240234, "learning_rate": 5.3921083522970115e-06, "loss": 0.3138, "step": 11037 }, { "epoch": 1.5827358761112704, "grad_norm": 0.26616084575653076, "learning_rate": 5.391276672982749e-06, "loss": 0.2984, "step": 11038 }, { "epoch": 1.5828792658445656, "grad_norm": 0.2727208137512207, "learning_rate": 5.3904449827759466e-06, "loss": 0.312, "step": 11039 }, { "epoch": 1.5830226555778606, "grad_norm": 0.26395273208618164, "learning_rate": 5.3896132816997545e-06, "loss": 0.2965, "step": 11040 }, { "epoch": 1.5831660453111556, "grad_norm": 0.2937076985836029, "learning_rate": 5.38878156977733e-06, "loss": 0.3223, "step": 11041 }, { "epoch": 1.5833094350444508, "grad_norm": 0.28452518582344055, "learning_rate": 5.3879498470318245e-06, "loss": 0.3127, "step": 11042 }, { "epoch": 1.583452824777746, "grad_norm": 0.2781379222869873, "learning_rate": 5.387118113486391e-06, "loss": 0.2847, "step": 11043 }, { "epoch": 1.583596214511041, "grad_norm": 0.288991242647171, "learning_rate": 5.386286369164187e-06, "loss": 0.3177, "step": 11044 }, { "epoch": 1.5837396042443361, "grad_norm": 0.268421471118927, "learning_rate": 5.385454614088364e-06, "loss": 0.3049, "step": 11045 }, { "epoch": 1.5838829939776313, "grad_norm": 0.2914149761199951, "learning_rate": 5.384622848282075e-06, "loss": 0.3215, "step": 11046 }, { "epoch": 1.5840263837109263, "grad_norm": 0.2705555558204651, "learning_rate": 5.3837910717684795e-06, "loss": 0.2982, "step": 11047 }, { "epoch": 1.5841697734442213, "grad_norm": 0.2824259400367737, "learning_rate": 5.38295928457073e-06, "loss": 0.3098, "step": 11048 }, { "epoch": 1.5843131631775165, "grad_norm": 0.27255716919898987, "learning_rate": 5.382127486711983e-06, "loss": 0.3063, "step": 11049 }, { "epoch": 1.5844565529108117, "grad_norm": 0.3083907663822174, "learning_rate": 5.3812956782153965e-06, "loss": 0.3246, "step": 11050 }, { "epoch": 1.5845999426441066, "grad_norm": 0.2867782711982727, "learning_rate": 5.380463859104124e-06, "loss": 0.31, "step": 11051 }, { "epoch": 1.5847433323774016, "grad_norm": 0.2740146815776825, "learning_rate": 5.379632029401322e-06, "loss": 0.302, "step": 11052 }, { "epoch": 1.584886722110697, "grad_norm": 0.27060651779174805, "learning_rate": 5.378800189130149e-06, "loss": 0.3112, "step": 11053 }, { "epoch": 1.585030111843992, "grad_norm": 0.27667707204818726, "learning_rate": 5.3779683383137615e-06, "loss": 0.3076, "step": 11054 }, { "epoch": 1.585173501577287, "grad_norm": 0.2857342064380646, "learning_rate": 5.377136476975316e-06, "loss": 0.3028, "step": 11055 }, { "epoch": 1.5853168913105822, "grad_norm": 0.2781306505203247, "learning_rate": 5.376304605137973e-06, "loss": 0.2922, "step": 11056 }, { "epoch": 1.5854602810438774, "grad_norm": 0.2896175980567932, "learning_rate": 5.375472722824887e-06, "loss": 0.3041, "step": 11057 }, { "epoch": 1.5856036707771723, "grad_norm": 0.2857030928134918, "learning_rate": 5.374640830059219e-06, "loss": 0.2938, "step": 11058 }, { "epoch": 1.5857470605104673, "grad_norm": 0.2592315375804901, "learning_rate": 5.373808926864126e-06, "loss": 0.307, "step": 11059 }, { "epoch": 1.5858904502437625, "grad_norm": 0.2899799048900604, "learning_rate": 5.372977013262767e-06, "loss": 0.3168, "step": 11060 }, { "epoch": 1.5860338399770577, "grad_norm": 0.2886877655982971, "learning_rate": 5.372145089278302e-06, "loss": 0.3314, "step": 11061 }, { "epoch": 1.5861772297103527, "grad_norm": 0.2814736068248749, "learning_rate": 5.37131315493389e-06, "loss": 0.2995, "step": 11062 }, { "epoch": 1.5863206194436479, "grad_norm": 0.28002825379371643, "learning_rate": 5.370481210252692e-06, "loss": 0.2821, "step": 11063 }, { "epoch": 1.586464009176943, "grad_norm": 0.26904138922691345, "learning_rate": 5.369649255257867e-06, "loss": 0.3122, "step": 11064 }, { "epoch": 1.586607398910238, "grad_norm": 0.2596297264099121, "learning_rate": 5.368817289972575e-06, "loss": 0.3011, "step": 11065 }, { "epoch": 1.586750788643533, "grad_norm": 0.29108625650405884, "learning_rate": 5.367985314419977e-06, "loss": 0.3229, "step": 11066 }, { "epoch": 1.5868941783768282, "grad_norm": 0.2809394299983978, "learning_rate": 5.367153328623233e-06, "loss": 0.2986, "step": 11067 }, { "epoch": 1.5870375681101234, "grad_norm": 0.2789689898490906, "learning_rate": 5.366321332605505e-06, "loss": 0.3014, "step": 11068 }, { "epoch": 1.5871809578434184, "grad_norm": 0.2862038016319275, "learning_rate": 5.365489326389956e-06, "loss": 0.3161, "step": 11069 }, { "epoch": 1.5873243475767134, "grad_norm": 0.3042299449443817, "learning_rate": 5.364657309999746e-06, "loss": 0.3289, "step": 11070 }, { "epoch": 1.5874677373100086, "grad_norm": 0.2801152169704437, "learning_rate": 5.363825283458037e-06, "loss": 0.3026, "step": 11071 }, { "epoch": 1.5876111270433038, "grad_norm": 0.2861170172691345, "learning_rate": 5.362993246787993e-06, "loss": 0.3132, "step": 11072 }, { "epoch": 1.5877545167765987, "grad_norm": 0.2761545479297638, "learning_rate": 5.362161200012774e-06, "loss": 0.3127, "step": 11073 }, { "epoch": 1.587897906509894, "grad_norm": 0.3021491765975952, "learning_rate": 5.361329143155545e-06, "loss": 0.3358, "step": 11074 }, { "epoch": 1.5880412962431891, "grad_norm": 0.2911977767944336, "learning_rate": 5.360497076239467e-06, "loss": 0.3184, "step": 11075 }, { "epoch": 1.588184685976484, "grad_norm": 0.2972029745578766, "learning_rate": 5.359664999287706e-06, "loss": 0.3115, "step": 11076 }, { "epoch": 1.588328075709779, "grad_norm": 0.2973502576351166, "learning_rate": 5.358832912323426e-06, "loss": 0.2993, "step": 11077 }, { "epoch": 1.5884714654430743, "grad_norm": 0.27484744787216187, "learning_rate": 5.358000815369789e-06, "loss": 0.2959, "step": 11078 }, { "epoch": 1.5886148551763695, "grad_norm": 0.2902079224586487, "learning_rate": 5.357168708449958e-06, "loss": 0.3055, "step": 11079 }, { "epoch": 1.5887582449096644, "grad_norm": 0.31296104192733765, "learning_rate": 5.356336591587102e-06, "loss": 0.314, "step": 11080 }, { "epoch": 1.5889016346429594, "grad_norm": 0.3079182803630829, "learning_rate": 5.355504464804382e-06, "loss": 0.2982, "step": 11081 }, { "epoch": 1.5890450243762546, "grad_norm": 0.2857376039028168, "learning_rate": 5.3546723281249655e-06, "loss": 0.301, "step": 11082 }, { "epoch": 1.5891884141095498, "grad_norm": 0.27617111802101135, "learning_rate": 5.353840181572018e-06, "loss": 0.2851, "step": 11083 }, { "epoch": 1.5893318038428448, "grad_norm": 0.29683879017829895, "learning_rate": 5.353008025168702e-06, "loss": 0.2994, "step": 11084 }, { "epoch": 1.58947519357614, "grad_norm": 0.3006671369075775, "learning_rate": 5.352175858938185e-06, "loss": 0.3057, "step": 11085 }, { "epoch": 1.5896185833094352, "grad_norm": 0.303938090801239, "learning_rate": 5.351343682903636e-06, "loss": 0.322, "step": 11086 }, { "epoch": 1.5897619730427301, "grad_norm": 0.2686164379119873, "learning_rate": 5.350511497088218e-06, "loss": 0.2829, "step": 11087 }, { "epoch": 1.5899053627760251, "grad_norm": 0.2628910541534424, "learning_rate": 5.3496793015151e-06, "loss": 0.3075, "step": 11088 }, { "epoch": 1.5900487525093203, "grad_norm": 0.28042933344841003, "learning_rate": 5.348847096207448e-06, "loss": 0.3118, "step": 11089 }, { "epoch": 1.5901921422426155, "grad_norm": 0.3082119822502136, "learning_rate": 5.348014881188429e-06, "loss": 0.3156, "step": 11090 }, { "epoch": 1.5903355319759105, "grad_norm": 0.28258535265922546, "learning_rate": 5.347182656481212e-06, "loss": 0.2868, "step": 11091 }, { "epoch": 1.5904789217092055, "grad_norm": 0.32513928413391113, "learning_rate": 5.346350422108963e-06, "loss": 0.3111, "step": 11092 }, { "epoch": 1.5906223114425009, "grad_norm": 0.28094446659088135, "learning_rate": 5.34551817809485e-06, "loss": 0.2857, "step": 11093 }, { "epoch": 1.5907657011757959, "grad_norm": 0.28406575322151184, "learning_rate": 5.344685924462044e-06, "loss": 0.3074, "step": 11094 }, { "epoch": 1.5909090909090908, "grad_norm": 0.2961629033088684, "learning_rate": 5.343853661233712e-06, "loss": 0.3162, "step": 11095 }, { "epoch": 1.591052480642386, "grad_norm": 0.2999196946620941, "learning_rate": 5.343021388433024e-06, "loss": 0.2959, "step": 11096 }, { "epoch": 1.5911958703756812, "grad_norm": 0.28244683146476746, "learning_rate": 5.342189106083147e-06, "loss": 0.3178, "step": 11097 }, { "epoch": 1.5913392601089762, "grad_norm": 0.2953186631202698, "learning_rate": 5.34135681420725e-06, "loss": 0.3139, "step": 11098 }, { "epoch": 1.5914826498422712, "grad_norm": 0.30328673124313354, "learning_rate": 5.340524512828507e-06, "loss": 0.3067, "step": 11099 }, { "epoch": 1.5916260395755664, "grad_norm": 0.2889690101146698, "learning_rate": 5.339692201970085e-06, "loss": 0.3041, "step": 11100 }, { "epoch": 1.5917694293088616, "grad_norm": 0.311828076839447, "learning_rate": 5.338859881655154e-06, "loss": 0.3005, "step": 11101 }, { "epoch": 1.5919128190421565, "grad_norm": 0.27996644377708435, "learning_rate": 5.338027551906886e-06, "loss": 0.2943, "step": 11102 }, { "epoch": 1.5920562087754517, "grad_norm": 0.26370492577552795, "learning_rate": 5.337195212748448e-06, "loss": 0.2844, "step": 11103 }, { "epoch": 1.592199598508747, "grad_norm": 0.27952659130096436, "learning_rate": 5.336362864203017e-06, "loss": 0.2963, "step": 11104 }, { "epoch": 1.592342988242042, "grad_norm": 0.2982017993927002, "learning_rate": 5.3355305062937615e-06, "loss": 0.2968, "step": 11105 }, { "epoch": 1.5924863779753369, "grad_norm": 0.277219295501709, "learning_rate": 5.334698139043851e-06, "loss": 0.3126, "step": 11106 }, { "epoch": 1.592629767708632, "grad_norm": 0.2786380648612976, "learning_rate": 5.333865762476461e-06, "loss": 0.3083, "step": 11107 }, { "epoch": 1.5927731574419273, "grad_norm": 0.2846886217594147, "learning_rate": 5.33303337661476e-06, "loss": 0.3137, "step": 11108 }, { "epoch": 1.5929165471752222, "grad_norm": 0.2657207250595093, "learning_rate": 5.332200981481923e-06, "loss": 0.3051, "step": 11109 }, { "epoch": 1.5930599369085172, "grad_norm": 0.2673497796058655, "learning_rate": 5.331368577101122e-06, "loss": 0.2965, "step": 11110 }, { "epoch": 1.5932033266418124, "grad_norm": 0.28333306312561035, "learning_rate": 5.330536163495529e-06, "loss": 0.2937, "step": 11111 }, { "epoch": 1.5933467163751076, "grad_norm": 0.26155635714530945, "learning_rate": 5.329703740688318e-06, "loss": 0.3069, "step": 11112 }, { "epoch": 1.5934901061084026, "grad_norm": 0.273902028799057, "learning_rate": 5.328871308702661e-06, "loss": 0.3035, "step": 11113 }, { "epoch": 1.5936334958416978, "grad_norm": 0.2742288410663605, "learning_rate": 5.328038867561734e-06, "loss": 0.288, "step": 11114 }, { "epoch": 1.593776885574993, "grad_norm": 0.2750427722930908, "learning_rate": 5.32720641728871e-06, "loss": 0.2893, "step": 11115 }, { "epoch": 1.593920275308288, "grad_norm": 0.287418931722641, "learning_rate": 5.3263739579067604e-06, "loss": 0.3166, "step": 11116 }, { "epoch": 1.594063665041583, "grad_norm": 0.28191012144088745, "learning_rate": 5.325541489439063e-06, "loss": 0.3282, "step": 11117 }, { "epoch": 1.5942070547748781, "grad_norm": 0.2750903069972992, "learning_rate": 5.324709011908792e-06, "loss": 0.3046, "step": 11118 }, { "epoch": 1.5943504445081733, "grad_norm": 0.26011601090431213, "learning_rate": 5.323876525339121e-06, "loss": 0.3268, "step": 11119 }, { "epoch": 1.5944938342414683, "grad_norm": 0.2738891541957855, "learning_rate": 5.323044029753226e-06, "loss": 0.301, "step": 11120 }, { "epoch": 1.5946372239747633, "grad_norm": 0.28557437658309937, "learning_rate": 5.322211525174281e-06, "loss": 0.2754, "step": 11121 }, { "epoch": 1.5947806137080585, "grad_norm": 0.28030309081077576, "learning_rate": 5.321379011625464e-06, "loss": 0.3177, "step": 11122 }, { "epoch": 1.5949240034413537, "grad_norm": 0.28311675786972046, "learning_rate": 5.32054648912995e-06, "loss": 0.3029, "step": 11123 }, { "epoch": 1.5950673931746486, "grad_norm": 0.28926485776901245, "learning_rate": 5.319713957710914e-06, "loss": 0.303, "step": 11124 }, { "epoch": 1.5952107829079438, "grad_norm": 0.2672427296638489, "learning_rate": 5.318881417391534e-06, "loss": 0.2959, "step": 11125 }, { "epoch": 1.595354172641239, "grad_norm": 0.27716970443725586, "learning_rate": 5.318048868194985e-06, "loss": 0.3215, "step": 11126 }, { "epoch": 1.595497562374534, "grad_norm": 0.2712556719779968, "learning_rate": 5.317216310144445e-06, "loss": 0.3187, "step": 11127 }, { "epoch": 1.595640952107829, "grad_norm": 0.2861711382865906, "learning_rate": 5.3163837432630914e-06, "loss": 0.2847, "step": 11128 }, { "epoch": 1.5957843418411242, "grad_norm": 0.27181276679039, "learning_rate": 5.315551167574101e-06, "loss": 0.3141, "step": 11129 }, { "epoch": 1.5959277315744194, "grad_norm": 0.28287163376808167, "learning_rate": 5.314718583100651e-06, "loss": 0.302, "step": 11130 }, { "epoch": 1.5960711213077143, "grad_norm": 0.2642899751663208, "learning_rate": 5.31388598986592e-06, "loss": 0.2891, "step": 11131 }, { "epoch": 1.5962145110410093, "grad_norm": 0.2629319727420807, "learning_rate": 5.313053387893086e-06, "loss": 0.3136, "step": 11132 }, { "epoch": 1.5963579007743045, "grad_norm": 0.27498868107795715, "learning_rate": 5.312220777205327e-06, "loss": 0.2929, "step": 11133 }, { "epoch": 1.5965012905075997, "grad_norm": 0.2710088789463043, "learning_rate": 5.311388157825822e-06, "loss": 0.3021, "step": 11134 }, { "epoch": 1.5966446802408947, "grad_norm": 0.2775570750236511, "learning_rate": 5.31055552977775e-06, "loss": 0.3038, "step": 11135 }, { "epoch": 1.5967880699741899, "grad_norm": 0.27479031682014465, "learning_rate": 5.309722893084289e-06, "loss": 0.3142, "step": 11136 }, { "epoch": 1.596931459707485, "grad_norm": 0.2781761884689331, "learning_rate": 5.30889024776862e-06, "loss": 0.2997, "step": 11137 }, { "epoch": 1.59707484944078, "grad_norm": 0.2714761793613434, "learning_rate": 5.308057593853922e-06, "loss": 0.2918, "step": 11138 }, { "epoch": 1.597218239174075, "grad_norm": 0.2762371599674225, "learning_rate": 5.307224931363373e-06, "loss": 0.2901, "step": 11139 }, { "epoch": 1.5973616289073702, "grad_norm": 0.261618047952652, "learning_rate": 5.306392260320155e-06, "loss": 0.293, "step": 11140 }, { "epoch": 1.5975050186406654, "grad_norm": 0.2829287648200989, "learning_rate": 5.305559580747447e-06, "loss": 0.2872, "step": 11141 }, { "epoch": 1.5976484083739604, "grad_norm": 0.3003058433532715, "learning_rate": 5.304726892668433e-06, "loss": 0.3169, "step": 11142 }, { "epoch": 1.5977917981072554, "grad_norm": 0.2681909203529358, "learning_rate": 5.303894196106288e-06, "loss": 0.3065, "step": 11143 }, { "epoch": 1.5979351878405508, "grad_norm": 0.2816358506679535, "learning_rate": 5.303061491084197e-06, "loss": 0.3144, "step": 11144 }, { "epoch": 1.5980785775738457, "grad_norm": 0.3028623163700104, "learning_rate": 5.302228777625342e-06, "loss": 0.2992, "step": 11145 }, { "epoch": 1.5982219673071407, "grad_norm": 0.2706950604915619, "learning_rate": 5.301396055752901e-06, "loss": 0.2961, "step": 11146 }, { "epoch": 1.598365357040436, "grad_norm": 0.284679114818573, "learning_rate": 5.300563325490058e-06, "loss": 0.301, "step": 11147 }, { "epoch": 1.5985087467737311, "grad_norm": 0.30277273058891296, "learning_rate": 5.2997305868599934e-06, "loss": 0.3234, "step": 11148 }, { "epoch": 1.598652136507026, "grad_norm": 0.27412086725234985, "learning_rate": 5.298897839885891e-06, "loss": 0.3055, "step": 11149 }, { "epoch": 1.598795526240321, "grad_norm": 0.25473281741142273, "learning_rate": 5.298065084590934e-06, "loss": 0.2986, "step": 11150 }, { "epoch": 1.5989389159736163, "grad_norm": 0.2884891629219055, "learning_rate": 5.297232320998302e-06, "loss": 0.3055, "step": 11151 }, { "epoch": 1.5990823057069115, "grad_norm": 0.25951188802719116, "learning_rate": 5.296399549131179e-06, "loss": 0.2931, "step": 11152 }, { "epoch": 1.5992256954402064, "grad_norm": 0.2828448712825775, "learning_rate": 5.29556676901275e-06, "loss": 0.3337, "step": 11153 }, { "epoch": 1.5993690851735016, "grad_norm": 0.26948150992393494, "learning_rate": 5.294733980666196e-06, "loss": 0.2917, "step": 11154 }, { "epoch": 1.5995124749067968, "grad_norm": 0.2658928334712982, "learning_rate": 5.293901184114701e-06, "loss": 0.3, "step": 11155 }, { "epoch": 1.5996558646400918, "grad_norm": 0.2803137004375458, "learning_rate": 5.293068379381451e-06, "loss": 0.2972, "step": 11156 }, { "epoch": 1.5997992543733868, "grad_norm": 0.28430455923080444, "learning_rate": 5.292235566489626e-06, "loss": 0.3051, "step": 11157 }, { "epoch": 1.599942644106682, "grad_norm": 0.28866344690322876, "learning_rate": 5.291402745462413e-06, "loss": 0.3106, "step": 11158 }, { "epoch": 1.6000860338399772, "grad_norm": 0.27051985263824463, "learning_rate": 5.290569916322995e-06, "loss": 0.3028, "step": 11159 }, { "epoch": 1.6002294235732721, "grad_norm": 0.2668592631816864, "learning_rate": 5.2897370790945575e-06, "loss": 0.3014, "step": 11160 }, { "epoch": 1.600372813306567, "grad_norm": 0.2808753550052643, "learning_rate": 5.2889042338002885e-06, "loss": 0.306, "step": 11161 }, { "epoch": 1.6005162030398623, "grad_norm": 0.2872437834739685, "learning_rate": 5.288071380463366e-06, "loss": 0.2919, "step": 11162 }, { "epoch": 1.6006595927731575, "grad_norm": 0.2906237542629242, "learning_rate": 5.2872385191069816e-06, "loss": 0.2948, "step": 11163 }, { "epoch": 1.6008029825064525, "grad_norm": 0.27069729566574097, "learning_rate": 5.28640564975432e-06, "loss": 0.3044, "step": 11164 }, { "epoch": 1.6009463722397477, "grad_norm": 0.29115933179855347, "learning_rate": 5.285572772428563e-06, "loss": 0.3081, "step": 11165 }, { "epoch": 1.6010897619730429, "grad_norm": 0.2722826898097992, "learning_rate": 5.2847398871528996e-06, "loss": 0.3059, "step": 11166 }, { "epoch": 1.6012331517063378, "grad_norm": 0.28557127714157104, "learning_rate": 5.283906993950517e-06, "loss": 0.3168, "step": 11167 }, { "epoch": 1.6013765414396328, "grad_norm": 0.26946955919265747, "learning_rate": 5.2830740928446e-06, "loss": 0.3233, "step": 11168 }, { "epoch": 1.601519931172928, "grad_norm": 0.2873290777206421, "learning_rate": 5.282241183858337e-06, "loss": 0.2859, "step": 11169 }, { "epoch": 1.6016633209062232, "grad_norm": 0.3002883195877075, "learning_rate": 5.2814082670149116e-06, "loss": 0.2946, "step": 11170 }, { "epoch": 1.6018067106395182, "grad_norm": 0.2866988182067871, "learning_rate": 5.280575342337513e-06, "loss": 0.3122, "step": 11171 }, { "epoch": 1.6019501003728132, "grad_norm": 0.2599697411060333, "learning_rate": 5.27974240984933e-06, "loss": 0.2946, "step": 11172 }, { "epoch": 1.6020934901061084, "grad_norm": 0.27198249101638794, "learning_rate": 5.278909469573546e-06, "loss": 0.3068, "step": 11173 }, { "epoch": 1.6022368798394035, "grad_norm": 0.2822997570037842, "learning_rate": 5.278076521533355e-06, "loss": 0.3135, "step": 11174 }, { "epoch": 1.6023802695726985, "grad_norm": 0.28891250491142273, "learning_rate": 5.277243565751937e-06, "loss": 0.3083, "step": 11175 }, { "epoch": 1.6025236593059937, "grad_norm": 0.2600944936275482, "learning_rate": 5.276410602252487e-06, "loss": 0.3002, "step": 11176 }, { "epoch": 1.602667049039289, "grad_norm": 0.2683252692222595, "learning_rate": 5.275577631058193e-06, "loss": 0.3065, "step": 11177 }, { "epoch": 1.602810438772584, "grad_norm": 0.2675146162509918, "learning_rate": 5.274744652192239e-06, "loss": 0.3173, "step": 11178 }, { "epoch": 1.6029538285058789, "grad_norm": 0.26976484060287476, "learning_rate": 5.273911665677817e-06, "loss": 0.3004, "step": 11179 }, { "epoch": 1.603097218239174, "grad_norm": 0.2727532684803009, "learning_rate": 5.273078671538116e-06, "loss": 0.3155, "step": 11180 }, { "epoch": 1.6032406079724693, "grad_norm": 0.2663733661174774, "learning_rate": 5.272245669796325e-06, "loss": 0.3193, "step": 11181 }, { "epoch": 1.6033839977057642, "grad_norm": 0.2688981592655182, "learning_rate": 5.271412660475633e-06, "loss": 0.3205, "step": 11182 }, { "epoch": 1.6035273874390592, "grad_norm": 0.25780215859413147, "learning_rate": 5.270579643599231e-06, "loss": 0.3021, "step": 11183 }, { "epoch": 1.6036707771723546, "grad_norm": 0.2693292796611786, "learning_rate": 5.269746619190307e-06, "loss": 0.3272, "step": 11184 }, { "epoch": 1.6038141669056496, "grad_norm": 0.2649100124835968, "learning_rate": 5.268913587272052e-06, "loss": 0.3065, "step": 11185 }, { "epoch": 1.6039575566389446, "grad_norm": 0.28488627076148987, "learning_rate": 5.268080547867656e-06, "loss": 0.3039, "step": 11186 }, { "epoch": 1.6041009463722398, "grad_norm": 0.27884772419929504, "learning_rate": 5.26724750100031e-06, "loss": 0.2953, "step": 11187 }, { "epoch": 1.604244336105535, "grad_norm": 0.2790772020816803, "learning_rate": 5.266414446693207e-06, "loss": 0.3139, "step": 11188 }, { "epoch": 1.60438772583883, "grad_norm": 0.2807718515396118, "learning_rate": 5.265581384969533e-06, "loss": 0.2917, "step": 11189 }, { "epoch": 1.604531115572125, "grad_norm": 0.3178863227367401, "learning_rate": 5.264748315852482e-06, "loss": 0.3047, "step": 11190 }, { "epoch": 1.60467450530542, "grad_norm": 0.2569635212421417, "learning_rate": 5.263915239365246e-06, "loss": 0.2981, "step": 11191 }, { "epoch": 1.6048178950387153, "grad_norm": 0.2936462461948395, "learning_rate": 5.263082155531015e-06, "loss": 0.3183, "step": 11192 }, { "epoch": 1.6049612847720103, "grad_norm": 0.2594601511955261, "learning_rate": 5.262249064372982e-06, "loss": 0.2882, "step": 11193 }, { "epoch": 1.6051046745053055, "grad_norm": 0.28993284702301025, "learning_rate": 5.2614159659143385e-06, "loss": 0.3068, "step": 11194 }, { "epoch": 1.6052480642386007, "grad_norm": 0.26771071553230286, "learning_rate": 5.260582860178276e-06, "loss": 0.2976, "step": 11195 }, { "epoch": 1.6053914539718956, "grad_norm": 0.262466162443161, "learning_rate": 5.259749747187989e-06, "loss": 0.3027, "step": 11196 }, { "epoch": 1.6055348437051906, "grad_norm": 0.29744842648506165, "learning_rate": 5.258916626966667e-06, "loss": 0.3031, "step": 11197 }, { "epoch": 1.6056782334384858, "grad_norm": 0.2920137047767639, "learning_rate": 5.258083499537505e-06, "loss": 0.3007, "step": 11198 }, { "epoch": 1.605821623171781, "grad_norm": 0.26705828309059143, "learning_rate": 5.257250364923696e-06, "loss": 0.3055, "step": 11199 }, { "epoch": 1.605965012905076, "grad_norm": 0.2916750907897949, "learning_rate": 5.256417223148432e-06, "loss": 0.3232, "step": 11200 }, { "epoch": 1.606108402638371, "grad_norm": 0.3006017804145813, "learning_rate": 5.255584074234907e-06, "loss": 0.3071, "step": 11201 }, { "epoch": 1.6062517923716662, "grad_norm": 0.28509801626205444, "learning_rate": 5.254750918206315e-06, "loss": 0.2892, "step": 11202 }, { "epoch": 1.6063951821049613, "grad_norm": 0.2827090919017792, "learning_rate": 5.253917755085847e-06, "loss": 0.3204, "step": 11203 }, { "epoch": 1.6065385718382563, "grad_norm": 0.2792262136936188, "learning_rate": 5.253084584896702e-06, "loss": 0.3111, "step": 11204 }, { "epoch": 1.6066819615715515, "grad_norm": 0.27451783418655396, "learning_rate": 5.25225140766207e-06, "loss": 0.3176, "step": 11205 }, { "epoch": 1.6068253513048467, "grad_norm": 0.2974218726158142, "learning_rate": 5.251418223405147e-06, "loss": 0.3111, "step": 11206 }, { "epoch": 1.6069687410381417, "grad_norm": 0.2869349420070648, "learning_rate": 5.2505850321491296e-06, "loss": 0.2997, "step": 11207 }, { "epoch": 1.6071121307714367, "grad_norm": 0.2809063196182251, "learning_rate": 5.249751833917208e-06, "loss": 0.3079, "step": 11208 }, { "epoch": 1.6072555205047319, "grad_norm": 0.26756951212882996, "learning_rate": 5.24891862873258e-06, "loss": 0.317, "step": 11209 }, { "epoch": 1.607398910238027, "grad_norm": 0.2858055830001831, "learning_rate": 5.248085416618441e-06, "loss": 0.3195, "step": 11210 }, { "epoch": 1.607542299971322, "grad_norm": 0.28498297929763794, "learning_rate": 5.247252197597985e-06, "loss": 0.3054, "step": 11211 }, { "epoch": 1.607685689704617, "grad_norm": 0.29270032048225403, "learning_rate": 5.246418971694408e-06, "loss": 0.3116, "step": 11212 }, { "epoch": 1.6078290794379122, "grad_norm": 0.28027161955833435, "learning_rate": 5.245585738930905e-06, "loss": 0.2824, "step": 11213 }, { "epoch": 1.6079724691712074, "grad_norm": 0.3103381097316742, "learning_rate": 5.244752499330674e-06, "loss": 0.3029, "step": 11214 }, { "epoch": 1.6081158589045024, "grad_norm": 0.31960463523864746, "learning_rate": 5.2439192529169105e-06, "loss": 0.2893, "step": 11215 }, { "epoch": 1.6082592486377976, "grad_norm": 0.30506375432014465, "learning_rate": 5.243085999712809e-06, "loss": 0.2808, "step": 11216 }, { "epoch": 1.6084026383710928, "grad_norm": 0.2692209780216217, "learning_rate": 5.2422527397415676e-06, "loss": 0.3111, "step": 11217 }, { "epoch": 1.6085460281043877, "grad_norm": 0.2834418714046478, "learning_rate": 5.2414194730263835e-06, "loss": 0.2997, "step": 11218 }, { "epoch": 1.6086894178376827, "grad_norm": 0.2776368260383606, "learning_rate": 5.240586199590451e-06, "loss": 0.2874, "step": 11219 }, { "epoch": 1.608832807570978, "grad_norm": 0.2712301015853882, "learning_rate": 5.239752919456971e-06, "loss": 0.3049, "step": 11220 }, { "epoch": 1.608976197304273, "grad_norm": 0.26695334911346436, "learning_rate": 5.238919632649136e-06, "loss": 0.3101, "step": 11221 }, { "epoch": 1.609119587037568, "grad_norm": 0.2701549530029297, "learning_rate": 5.2380863391901474e-06, "loss": 0.2993, "step": 11222 }, { "epoch": 1.609262976770863, "grad_norm": 0.2986767590045929, "learning_rate": 5.237253039103202e-06, "loss": 0.2899, "step": 11223 }, { "epoch": 1.6094063665041582, "grad_norm": 0.29319998621940613, "learning_rate": 5.236419732411495e-06, "loss": 0.3074, "step": 11224 }, { "epoch": 1.6095497562374534, "grad_norm": 0.266117662191391, "learning_rate": 5.235586419138228e-06, "loss": 0.2839, "step": 11225 }, { "epoch": 1.6096931459707484, "grad_norm": 0.27894124388694763, "learning_rate": 5.234753099306598e-06, "loss": 0.3077, "step": 11226 }, { "epoch": 1.6098365357040436, "grad_norm": 0.2855842411518097, "learning_rate": 5.233919772939801e-06, "loss": 0.2877, "step": 11227 }, { "epoch": 1.6099799254373388, "grad_norm": 0.2801937162876129, "learning_rate": 5.233086440061038e-06, "loss": 0.3324, "step": 11228 }, { "epoch": 1.6101233151706338, "grad_norm": 0.2846713960170746, "learning_rate": 5.232253100693507e-06, "loss": 0.2854, "step": 11229 }, { "epoch": 1.6102667049039288, "grad_norm": 0.2688537538051605, "learning_rate": 5.2314197548604075e-06, "loss": 0.3058, "step": 11230 }, { "epoch": 1.610410094637224, "grad_norm": 0.25932809710502625, "learning_rate": 5.230586402584937e-06, "loss": 0.3259, "step": 11231 }, { "epoch": 1.6105534843705192, "grad_norm": 0.2801551818847656, "learning_rate": 5.229753043890296e-06, "loss": 0.3133, "step": 11232 }, { "epoch": 1.6106968741038141, "grad_norm": 0.2662923038005829, "learning_rate": 5.228919678799684e-06, "loss": 0.2794, "step": 11233 }, { "epoch": 1.610840263837109, "grad_norm": 0.2956511378288269, "learning_rate": 5.228086307336301e-06, "loss": 0.2986, "step": 11234 }, { "epoch": 1.6109836535704045, "grad_norm": 0.28279271721839905, "learning_rate": 5.227252929523346e-06, "loss": 0.3061, "step": 11235 }, { "epoch": 1.6111270433036995, "grad_norm": 0.28694552183151245, "learning_rate": 5.226419545384017e-06, "loss": 0.3106, "step": 11236 }, { "epoch": 1.6112704330369945, "grad_norm": 0.273787260055542, "learning_rate": 5.225586154941519e-06, "loss": 0.2913, "step": 11237 }, { "epoch": 1.6114138227702897, "grad_norm": 0.3165587782859802, "learning_rate": 5.224752758219047e-06, "loss": 0.3126, "step": 11238 }, { "epoch": 1.6115572125035849, "grad_norm": 0.2758154273033142, "learning_rate": 5.223919355239805e-06, "loss": 0.3245, "step": 11239 }, { "epoch": 1.6117006022368798, "grad_norm": 0.27464959025382996, "learning_rate": 5.223085946026992e-06, "loss": 0.2979, "step": 11240 }, { "epoch": 1.6118439919701748, "grad_norm": 0.284028559923172, "learning_rate": 5.222252530603811e-06, "loss": 0.3036, "step": 11241 }, { "epoch": 1.61198738170347, "grad_norm": 0.2728291153907776, "learning_rate": 5.22141910899346e-06, "loss": 0.3, "step": 11242 }, { "epoch": 1.6121307714367652, "grad_norm": 0.26572316884994507, "learning_rate": 5.220585681219142e-06, "loss": 0.2985, "step": 11243 }, { "epoch": 1.6122741611700602, "grad_norm": 0.28003591299057007, "learning_rate": 5.219752247304058e-06, "loss": 0.298, "step": 11244 }, { "epoch": 1.6124175509033554, "grad_norm": 0.28686386346817017, "learning_rate": 5.21891880727141e-06, "loss": 0.2992, "step": 11245 }, { "epoch": 1.6125609406366506, "grad_norm": 0.2662133276462555, "learning_rate": 5.2180853611443994e-06, "loss": 0.2915, "step": 11246 }, { "epoch": 1.6127043303699455, "grad_norm": 0.2653360664844513, "learning_rate": 5.217251908946227e-06, "loss": 0.299, "step": 11247 }, { "epoch": 1.6128477201032405, "grad_norm": 0.2803959250450134, "learning_rate": 5.216418450700095e-06, "loss": 0.3048, "step": 11248 }, { "epoch": 1.6129911098365357, "grad_norm": 0.28663426637649536, "learning_rate": 5.215584986429206e-06, "loss": 0.3108, "step": 11249 }, { "epoch": 1.613134499569831, "grad_norm": 0.28065869212150574, "learning_rate": 5.214751516156765e-06, "loss": 0.3075, "step": 11250 }, { "epoch": 1.6132778893031259, "grad_norm": 0.28011056780815125, "learning_rate": 5.21391803990597e-06, "loss": 0.2975, "step": 11251 }, { "epoch": 1.6134212790364209, "grad_norm": 0.291246235370636, "learning_rate": 5.213084557700027e-06, "loss": 0.312, "step": 11252 }, { "epoch": 1.613564668769716, "grad_norm": 0.2694132328033447, "learning_rate": 5.212251069562138e-06, "loss": 0.3007, "step": 11253 }, { "epoch": 1.6137080585030112, "grad_norm": 0.2700774371623993, "learning_rate": 5.211417575515505e-06, "loss": 0.3007, "step": 11254 }, { "epoch": 1.6138514482363062, "grad_norm": 0.28341594338417053, "learning_rate": 5.210584075583333e-06, "loss": 0.2973, "step": 11255 }, { "epoch": 1.6139948379696014, "grad_norm": 0.28376197814941406, "learning_rate": 5.209750569788822e-06, "loss": 0.3063, "step": 11256 }, { "epoch": 1.6141382277028966, "grad_norm": 0.2676944434642792, "learning_rate": 5.208917058155178e-06, "loss": 0.3, "step": 11257 }, { "epoch": 1.6142816174361916, "grad_norm": 0.2737267017364502, "learning_rate": 5.2080835407056066e-06, "loss": 0.3033, "step": 11258 }, { "epoch": 1.6144250071694866, "grad_norm": 0.27316057682037354, "learning_rate": 5.207250017463308e-06, "loss": 0.3088, "step": 11259 }, { "epoch": 1.6145683969027818, "grad_norm": 0.2755665183067322, "learning_rate": 5.2064164884514875e-06, "loss": 0.3297, "step": 11260 }, { "epoch": 1.614711786636077, "grad_norm": 0.2741144001483917, "learning_rate": 5.205582953693351e-06, "loss": 0.3151, "step": 11261 }, { "epoch": 1.614855176369372, "grad_norm": 0.2665899991989136, "learning_rate": 5.2047494132121e-06, "loss": 0.3074, "step": 11262 }, { "epoch": 1.614998566102667, "grad_norm": 0.2566157579421997, "learning_rate": 5.203915867030941e-06, "loss": 0.3179, "step": 11263 }, { "epoch": 1.615141955835962, "grad_norm": 0.30276334285736084, "learning_rate": 5.203082315173078e-06, "loss": 0.2993, "step": 11264 }, { "epoch": 1.6152853455692573, "grad_norm": 0.2792852222919464, "learning_rate": 5.202248757661715e-06, "loss": 0.3075, "step": 11265 }, { "epoch": 1.6154287353025523, "grad_norm": 0.2760462760925293, "learning_rate": 5.201415194520058e-06, "loss": 0.291, "step": 11266 }, { "epoch": 1.6155721250358475, "grad_norm": 0.26705992221832275, "learning_rate": 5.200581625771311e-06, "loss": 0.306, "step": 11267 }, { "epoch": 1.6157155147691427, "grad_norm": 0.2779959738254547, "learning_rate": 5.199748051438681e-06, "loss": 0.3217, "step": 11268 }, { "epoch": 1.6158589045024376, "grad_norm": 0.2764549255371094, "learning_rate": 5.198914471545373e-06, "loss": 0.2946, "step": 11269 }, { "epoch": 1.6160022942357326, "grad_norm": 0.2722167372703552, "learning_rate": 5.198080886114592e-06, "loss": 0.3107, "step": 11270 }, { "epoch": 1.6161456839690278, "grad_norm": 0.29506000876426697, "learning_rate": 5.197247295169543e-06, "loss": 0.2844, "step": 11271 }, { "epoch": 1.616289073702323, "grad_norm": 0.28686606884002686, "learning_rate": 5.196413698733434e-06, "loss": 0.3145, "step": 11272 }, { "epoch": 1.616432463435618, "grad_norm": 0.2604048550128937, "learning_rate": 5.195580096829469e-06, "loss": 0.3061, "step": 11273 }, { "epoch": 1.616575853168913, "grad_norm": 0.27859175205230713, "learning_rate": 5.1947464894808555e-06, "loss": 0.3129, "step": 11274 }, { "epoch": 1.6167192429022084, "grad_norm": 0.2906319499015808, "learning_rate": 5.193912876710798e-06, "loss": 0.3024, "step": 11275 }, { "epoch": 1.6168626326355033, "grad_norm": 0.2786504328250885, "learning_rate": 5.193079258542505e-06, "loss": 0.3154, "step": 11276 }, { "epoch": 1.6170060223687983, "grad_norm": 0.27951809763908386, "learning_rate": 5.192245634999182e-06, "loss": 0.3072, "step": 11277 }, { "epoch": 1.6171494121020935, "grad_norm": 0.2656498849391937, "learning_rate": 5.191412006104037e-06, "loss": 0.3034, "step": 11278 }, { "epoch": 1.6172928018353887, "grad_norm": 0.27184683084487915, "learning_rate": 5.190578371880276e-06, "loss": 0.3186, "step": 11279 }, { "epoch": 1.6174361915686837, "grad_norm": 0.28355059027671814, "learning_rate": 5.1897447323511075e-06, "loss": 0.2969, "step": 11280 }, { "epoch": 1.6175795813019787, "grad_norm": 0.2654508054256439, "learning_rate": 5.188911087539735e-06, "loss": 0.286, "step": 11281 }, { "epoch": 1.6177229710352738, "grad_norm": 0.28497231006622314, "learning_rate": 5.188077437469371e-06, "loss": 0.3061, "step": 11282 }, { "epoch": 1.617866360768569, "grad_norm": 0.2999533712863922, "learning_rate": 5.18724378216322e-06, "loss": 0.3022, "step": 11283 }, { "epoch": 1.618009750501864, "grad_norm": 0.27769312262535095, "learning_rate": 5.186410121644488e-06, "loss": 0.317, "step": 11284 }, { "epoch": 1.6181531402351592, "grad_norm": 0.2823673486709595, "learning_rate": 5.185576455936386e-06, "loss": 0.323, "step": 11285 }, { "epoch": 1.6182965299684544, "grad_norm": 0.2730986475944519, "learning_rate": 5.184742785062122e-06, "loss": 0.2838, "step": 11286 }, { "epoch": 1.6184399197017494, "grad_norm": 0.2769012153148651, "learning_rate": 5.183909109044902e-06, "loss": 0.2926, "step": 11287 }, { "epoch": 1.6185833094350444, "grad_norm": 0.26888492703437805, "learning_rate": 5.183075427907936e-06, "loss": 0.2995, "step": 11288 }, { "epoch": 1.6187266991683396, "grad_norm": 0.2694670855998993, "learning_rate": 5.1822417416744295e-06, "loss": 0.3108, "step": 11289 }, { "epoch": 1.6188700889016348, "grad_norm": 0.26698657870292664, "learning_rate": 5.181408050367596e-06, "loss": 0.3099, "step": 11290 }, { "epoch": 1.6190134786349297, "grad_norm": 0.2599262595176697, "learning_rate": 5.18057435401064e-06, "loss": 0.2906, "step": 11291 }, { "epoch": 1.6191568683682247, "grad_norm": 0.2799701392650604, "learning_rate": 5.179740652626772e-06, "loss": 0.3105, "step": 11292 }, { "epoch": 1.61930025810152, "grad_norm": 0.2721315324306488, "learning_rate": 5.178906946239201e-06, "loss": 0.2898, "step": 11293 }, { "epoch": 1.619443647834815, "grad_norm": 0.2579253613948822, "learning_rate": 5.178073234871136e-06, "loss": 0.2972, "step": 11294 }, { "epoch": 1.61958703756811, "grad_norm": 0.248313307762146, "learning_rate": 5.177239518545785e-06, "loss": 0.2958, "step": 11295 }, { "epoch": 1.6197304273014053, "grad_norm": 0.25953200459480286, "learning_rate": 5.1764057972863605e-06, "loss": 0.299, "step": 11296 }, { "epoch": 1.6198738170347005, "grad_norm": 0.2538604438304901, "learning_rate": 5.175572071116068e-06, "loss": 0.3069, "step": 11297 }, { "epoch": 1.6200172067679954, "grad_norm": 0.26291581988334656, "learning_rate": 5.1747383400581195e-06, "loss": 0.3009, "step": 11298 }, { "epoch": 1.6201605965012904, "grad_norm": 0.2822171151638031, "learning_rate": 5.173904604135727e-06, "loss": 0.3049, "step": 11299 }, { "epoch": 1.6203039862345856, "grad_norm": 0.26477330923080444, "learning_rate": 5.1730708633720935e-06, "loss": 0.3236, "step": 11300 }, { "epoch": 1.6204473759678808, "grad_norm": 0.2625761330127716, "learning_rate": 5.172237117790438e-06, "loss": 0.3007, "step": 11301 }, { "epoch": 1.6205907657011758, "grad_norm": 0.27886685729026794, "learning_rate": 5.171403367413963e-06, "loss": 0.3134, "step": 11302 }, { "epoch": 1.6207341554344707, "grad_norm": 0.2925677001476288, "learning_rate": 5.170569612265882e-06, "loss": 0.3072, "step": 11303 }, { "epoch": 1.620877545167766, "grad_norm": 0.2770528793334961, "learning_rate": 5.169735852369405e-06, "loss": 0.3104, "step": 11304 }, { "epoch": 1.6210209349010611, "grad_norm": 0.2558972239494324, "learning_rate": 5.168902087747744e-06, "loss": 0.2969, "step": 11305 }, { "epoch": 1.6211643246343561, "grad_norm": 0.2707234025001526, "learning_rate": 5.168068318424109e-06, "loss": 0.3063, "step": 11306 }, { "epoch": 1.6213077143676513, "grad_norm": 0.2557883858680725, "learning_rate": 5.16723454442171e-06, "loss": 0.3037, "step": 11307 }, { "epoch": 1.6214511041009465, "grad_norm": 0.28193581104278564, "learning_rate": 5.166400765763758e-06, "loss": 0.2969, "step": 11308 }, { "epoch": 1.6215944938342415, "grad_norm": 0.28668156266212463, "learning_rate": 5.165566982473465e-06, "loss": 0.322, "step": 11309 }, { "epoch": 1.6217378835675365, "grad_norm": 0.26951295137405396, "learning_rate": 5.164733194574044e-06, "loss": 0.304, "step": 11310 }, { "epoch": 1.6218812733008316, "grad_norm": 0.27635183930397034, "learning_rate": 5.163899402088702e-06, "loss": 0.2963, "step": 11311 }, { "epoch": 1.6220246630341268, "grad_norm": 0.2678948938846588, "learning_rate": 5.163065605040653e-06, "loss": 0.2876, "step": 11312 }, { "epoch": 1.6221680527674218, "grad_norm": 0.27297258377075195, "learning_rate": 5.162231803453108e-06, "loss": 0.2981, "step": 11313 }, { "epoch": 1.6223114425007168, "grad_norm": 0.291874498128891, "learning_rate": 5.16139799734928e-06, "loss": 0.306, "step": 11314 }, { "epoch": 1.622454832234012, "grad_norm": 0.26737159490585327, "learning_rate": 5.16056418675238e-06, "loss": 0.3267, "step": 11315 }, { "epoch": 1.6225982219673072, "grad_norm": 0.2913995683193207, "learning_rate": 5.159730371685621e-06, "loss": 0.3041, "step": 11316 }, { "epoch": 1.6227416117006022, "grad_norm": 0.2718349099159241, "learning_rate": 5.158896552172212e-06, "loss": 0.3058, "step": 11317 }, { "epoch": 1.6228850014338974, "grad_norm": 0.27107155323028564, "learning_rate": 5.158062728235369e-06, "loss": 0.2971, "step": 11318 }, { "epoch": 1.6230283911671926, "grad_norm": 0.27029818296432495, "learning_rate": 5.157228899898302e-06, "loss": 0.2962, "step": 11319 }, { "epoch": 1.6231717809004875, "grad_norm": 0.291180282831192, "learning_rate": 5.156395067184226e-06, "loss": 0.3252, "step": 11320 }, { "epoch": 1.6233151706337825, "grad_norm": 0.27857109904289246, "learning_rate": 5.155561230116348e-06, "loss": 0.2893, "step": 11321 }, { "epoch": 1.6234585603670777, "grad_norm": 0.2610001564025879, "learning_rate": 5.154727388717887e-06, "loss": 0.312, "step": 11322 }, { "epoch": 1.623601950100373, "grad_norm": 0.25911808013916016, "learning_rate": 5.153893543012055e-06, "loss": 0.3125, "step": 11323 }, { "epoch": 1.6237453398336679, "grad_norm": 0.2691013514995575, "learning_rate": 5.153059693022061e-06, "loss": 0.3043, "step": 11324 }, { "epoch": 1.6238887295669628, "grad_norm": 0.29427993297576904, "learning_rate": 5.152225838771122e-06, "loss": 0.3019, "step": 11325 }, { "epoch": 1.6240321193002583, "grad_norm": 0.27047592401504517, "learning_rate": 5.15139198028245e-06, "loss": 0.3285, "step": 11326 }, { "epoch": 1.6241755090335532, "grad_norm": 0.27148282527923584, "learning_rate": 5.150558117579258e-06, "loss": 0.2998, "step": 11327 }, { "epoch": 1.6243188987668482, "grad_norm": 0.26883089542388916, "learning_rate": 5.14972425068476e-06, "loss": 0.3153, "step": 11328 }, { "epoch": 1.6244622885001434, "grad_norm": 0.27534398436546326, "learning_rate": 5.148890379622168e-06, "loss": 0.2972, "step": 11329 }, { "epoch": 1.6246056782334386, "grad_norm": 0.29100218415260315, "learning_rate": 5.148056504414697e-06, "loss": 0.3243, "step": 11330 }, { "epoch": 1.6247490679667336, "grad_norm": 0.27952954173088074, "learning_rate": 5.14722262508556e-06, "loss": 0.3009, "step": 11331 }, { "epoch": 1.6248924577000285, "grad_norm": 0.2681542932987213, "learning_rate": 5.146388741657973e-06, "loss": 0.3259, "step": 11332 }, { "epoch": 1.6250358474333237, "grad_norm": 0.2778647840023041, "learning_rate": 5.145554854155148e-06, "loss": 0.3078, "step": 11333 }, { "epoch": 1.625179237166619, "grad_norm": 0.2726464569568634, "learning_rate": 5.144720962600301e-06, "loss": 0.2847, "step": 11334 }, { "epoch": 1.625322626899914, "grad_norm": 0.272042840719223, "learning_rate": 5.143887067016644e-06, "loss": 0.2991, "step": 11335 }, { "epoch": 1.625466016633209, "grad_norm": 0.2752375900745392, "learning_rate": 5.143053167427392e-06, "loss": 0.3195, "step": 11336 }, { "epoch": 1.6256094063665043, "grad_norm": 0.272816002368927, "learning_rate": 5.142219263855761e-06, "loss": 0.302, "step": 11337 }, { "epoch": 1.6257527960997993, "grad_norm": 0.26364949345588684, "learning_rate": 5.141385356324963e-06, "loss": 0.295, "step": 11338 }, { "epoch": 1.6258961858330943, "grad_norm": 0.2793393135070801, "learning_rate": 5.140551444858215e-06, "loss": 0.3019, "step": 11339 }, { "epoch": 1.6260395755663895, "grad_norm": 0.25607091188430786, "learning_rate": 5.139717529478731e-06, "loss": 0.3099, "step": 11340 }, { "epoch": 1.6261829652996846, "grad_norm": 0.26395368576049805, "learning_rate": 5.138883610209725e-06, "loss": 0.3349, "step": 11341 }, { "epoch": 1.6263263550329796, "grad_norm": 0.28866586089134216, "learning_rate": 5.1380496870744145e-06, "loss": 0.3059, "step": 11342 }, { "epoch": 1.6264697447662746, "grad_norm": 0.2652982175350189, "learning_rate": 5.137215760096011e-06, "loss": 0.3087, "step": 11343 }, { "epoch": 1.6266131344995698, "grad_norm": 0.28205975890159607, "learning_rate": 5.136381829297734e-06, "loss": 0.3112, "step": 11344 }, { "epoch": 1.626756524232865, "grad_norm": 0.2886606454849243, "learning_rate": 5.135547894702797e-06, "loss": 0.3033, "step": 11345 }, { "epoch": 1.62689991396616, "grad_norm": 0.27332958579063416, "learning_rate": 5.134713956334413e-06, "loss": 0.326, "step": 11346 }, { "epoch": 1.6270433036994552, "grad_norm": 0.29423949122428894, "learning_rate": 5.133880014215802e-06, "loss": 0.3085, "step": 11347 }, { "epoch": 1.6271866934327504, "grad_norm": 0.27772316336631775, "learning_rate": 5.133046068370176e-06, "loss": 0.3131, "step": 11348 }, { "epoch": 1.6273300831660453, "grad_norm": 0.306321918964386, "learning_rate": 5.132212118820749e-06, "loss": 0.2912, "step": 11349 }, { "epoch": 1.6274734728993403, "grad_norm": 0.27282607555389404, "learning_rate": 5.1313781655907445e-06, "loss": 0.3193, "step": 11350 }, { "epoch": 1.6276168626326355, "grad_norm": 0.2916145324707031, "learning_rate": 5.130544208703373e-06, "loss": 0.2906, "step": 11351 }, { "epoch": 1.6277602523659307, "grad_norm": 0.30970674753189087, "learning_rate": 5.1297102481818505e-06, "loss": 0.2952, "step": 11352 }, { "epoch": 1.6279036420992257, "grad_norm": 0.2865631878376007, "learning_rate": 5.128876284049396e-06, "loss": 0.3126, "step": 11353 }, { "epoch": 1.6280470318325206, "grad_norm": 0.26396018266677856, "learning_rate": 5.128042316329222e-06, "loss": 0.282, "step": 11354 }, { "epoch": 1.6281904215658158, "grad_norm": 0.2956077754497528, "learning_rate": 5.127208345044549e-06, "loss": 0.2992, "step": 11355 }, { "epoch": 1.628333811299111, "grad_norm": 0.3062543570995331, "learning_rate": 5.1263743702185884e-06, "loss": 0.2939, "step": 11356 }, { "epoch": 1.628477201032406, "grad_norm": 0.28235164284706116, "learning_rate": 5.1255403918745615e-06, "loss": 0.2933, "step": 11357 }, { "epoch": 1.6286205907657012, "grad_norm": 0.27603209018707275, "learning_rate": 5.124706410035683e-06, "loss": 0.3064, "step": 11358 }, { "epoch": 1.6287639804989964, "grad_norm": 0.28394168615341187, "learning_rate": 5.1238724247251704e-06, "loss": 0.293, "step": 11359 }, { "epoch": 1.6289073702322914, "grad_norm": 0.2854621410369873, "learning_rate": 5.123038435966239e-06, "loss": 0.3201, "step": 11360 }, { "epoch": 1.6290507599655863, "grad_norm": 0.29336801171302795, "learning_rate": 5.122204443782109e-06, "loss": 0.295, "step": 11361 }, { "epoch": 1.6291941496988815, "grad_norm": 0.2968822121620178, "learning_rate": 5.1213704481959926e-06, "loss": 0.3088, "step": 11362 }, { "epoch": 1.6293375394321767, "grad_norm": 0.28732338547706604, "learning_rate": 5.12053644923111e-06, "loss": 0.2885, "step": 11363 }, { "epoch": 1.6294809291654717, "grad_norm": 0.2675488591194153, "learning_rate": 5.119702446910679e-06, "loss": 0.2943, "step": 11364 }, { "epoch": 1.6296243188987667, "grad_norm": 0.28172987699508667, "learning_rate": 5.118868441257916e-06, "loss": 0.2725, "step": 11365 }, { "epoch": 1.629767708632062, "grad_norm": 0.308885395526886, "learning_rate": 5.118034432296038e-06, "loss": 0.3217, "step": 11366 }, { "epoch": 1.629911098365357, "grad_norm": 0.26906606554985046, "learning_rate": 5.117200420048262e-06, "loss": 0.3192, "step": 11367 }, { "epoch": 1.630054488098652, "grad_norm": 0.29567840695381165, "learning_rate": 5.1163664045378095e-06, "loss": 0.3014, "step": 11368 }, { "epoch": 1.6301978778319473, "grad_norm": 0.28138095140457153, "learning_rate": 5.1155323857878936e-06, "loss": 0.2999, "step": 11369 }, { "epoch": 1.6303412675652424, "grad_norm": 0.26958173513412476, "learning_rate": 5.114698363821734e-06, "loss": 0.3, "step": 11370 }, { "epoch": 1.6304846572985374, "grad_norm": 0.27822062373161316, "learning_rate": 5.113864338662548e-06, "loss": 0.3137, "step": 11371 }, { "epoch": 1.6306280470318324, "grad_norm": 0.27247315645217896, "learning_rate": 5.113030310333554e-06, "loss": 0.3001, "step": 11372 }, { "epoch": 1.6307714367651276, "grad_norm": 0.29444795846939087, "learning_rate": 5.11219627885797e-06, "loss": 0.2854, "step": 11373 }, { "epoch": 1.6309148264984228, "grad_norm": 0.2627123296260834, "learning_rate": 5.1113622442590165e-06, "loss": 0.2902, "step": 11374 }, { "epoch": 1.6310582162317178, "grad_norm": 0.28369033336639404, "learning_rate": 5.110528206559906e-06, "loss": 0.313, "step": 11375 }, { "epoch": 1.631201605965013, "grad_norm": 0.2670440673828125, "learning_rate": 5.1096941657838615e-06, "loss": 0.3038, "step": 11376 }, { "epoch": 1.6313449956983082, "grad_norm": 0.29813382029533386, "learning_rate": 5.1088601219541004e-06, "loss": 0.3002, "step": 11377 }, { "epoch": 1.6314883854316031, "grad_norm": 0.28045639395713806, "learning_rate": 5.10802607509384e-06, "loss": 0.2748, "step": 11378 }, { "epoch": 1.631631775164898, "grad_norm": 0.256557434797287, "learning_rate": 5.107192025226301e-06, "loss": 0.3199, "step": 11379 }, { "epoch": 1.6317751648981933, "grad_norm": 0.28437939286231995, "learning_rate": 5.106357972374701e-06, "loss": 0.3437, "step": 11380 }, { "epoch": 1.6319185546314885, "grad_norm": 0.28250473737716675, "learning_rate": 5.1055239165622575e-06, "loss": 0.3083, "step": 11381 }, { "epoch": 1.6320619443647835, "grad_norm": 0.30515775084495544, "learning_rate": 5.1046898578121915e-06, "loss": 0.3195, "step": 11382 }, { "epoch": 1.6322053340980784, "grad_norm": 0.27741739153862, "learning_rate": 5.1038557961477205e-06, "loss": 0.2988, "step": 11383 }, { "epoch": 1.6323487238313736, "grad_norm": 0.2787160277366638, "learning_rate": 5.103021731592064e-06, "loss": 0.3024, "step": 11384 }, { "epoch": 1.6324921135646688, "grad_norm": 0.29140904545783997, "learning_rate": 5.102187664168441e-06, "loss": 0.3239, "step": 11385 }, { "epoch": 1.6326355032979638, "grad_norm": 0.27954235672950745, "learning_rate": 5.1013535939000704e-06, "loss": 0.2925, "step": 11386 }, { "epoch": 1.632778893031259, "grad_norm": 0.26342684030532837, "learning_rate": 5.100519520810171e-06, "loss": 0.3006, "step": 11387 }, { "epoch": 1.6329222827645542, "grad_norm": 0.2847212255001068, "learning_rate": 5.099685444921965e-06, "loss": 0.3274, "step": 11388 }, { "epoch": 1.6330656724978492, "grad_norm": 0.26815423369407654, "learning_rate": 5.098851366258668e-06, "loss": 0.3263, "step": 11389 }, { "epoch": 1.6332090622311441, "grad_norm": 0.26838529109954834, "learning_rate": 5.0980172848435006e-06, "loss": 0.2906, "step": 11390 }, { "epoch": 1.6333524519644393, "grad_norm": 0.29014647006988525, "learning_rate": 5.097183200699685e-06, "loss": 0.3225, "step": 11391 }, { "epoch": 1.6334958416977345, "grad_norm": 0.2771168649196625, "learning_rate": 5.096349113850436e-06, "loss": 0.3187, "step": 11392 }, { "epoch": 1.6336392314310295, "grad_norm": 0.2677691578865051, "learning_rate": 5.0955150243189766e-06, "loss": 0.2936, "step": 11393 }, { "epoch": 1.6337826211643245, "grad_norm": 0.26998692750930786, "learning_rate": 5.094680932128526e-06, "loss": 0.3044, "step": 11394 }, { "epoch": 1.6339260108976197, "grad_norm": 0.263971745967865, "learning_rate": 5.093846837302303e-06, "loss": 0.3119, "step": 11395 }, { "epoch": 1.6340694006309149, "grad_norm": 0.3018858730792999, "learning_rate": 5.0930127398635305e-06, "loss": 0.3244, "step": 11396 }, { "epoch": 1.6342127903642099, "grad_norm": 0.2628677487373352, "learning_rate": 5.0921786398354255e-06, "loss": 0.2993, "step": 11397 }, { "epoch": 1.634356180097505, "grad_norm": 0.2653985023498535, "learning_rate": 5.091344537241209e-06, "loss": 0.3083, "step": 11398 }, { "epoch": 1.6344995698308002, "grad_norm": 0.27030715346336365, "learning_rate": 5.090510432104101e-06, "loss": 0.3016, "step": 11399 }, { "epoch": 1.6346429595640952, "grad_norm": 0.283182293176651, "learning_rate": 5.089676324447321e-06, "loss": 0.3104, "step": 11400 }, { "epoch": 1.6347863492973902, "grad_norm": 0.2832607924938202, "learning_rate": 5.088842214294092e-06, "loss": 0.3089, "step": 11401 }, { "epoch": 1.6349297390306854, "grad_norm": 0.2652934193611145, "learning_rate": 5.08800810166763e-06, "loss": 0.2889, "step": 11402 }, { "epoch": 1.6350731287639806, "grad_norm": 0.2820352613925934, "learning_rate": 5.087173986591159e-06, "loss": 0.299, "step": 11403 }, { "epoch": 1.6352165184972756, "grad_norm": 0.26881685853004456, "learning_rate": 5.086339869087899e-06, "loss": 0.3032, "step": 11404 }, { "epoch": 1.6353599082305705, "grad_norm": 0.27443090081214905, "learning_rate": 5.085505749181069e-06, "loss": 0.3011, "step": 11405 }, { "epoch": 1.6355032979638657, "grad_norm": 0.260871946811676, "learning_rate": 5.08467162689389e-06, "loss": 0.3052, "step": 11406 }, { "epoch": 1.635646687697161, "grad_norm": 0.3056044280529022, "learning_rate": 5.083837502249584e-06, "loss": 0.3022, "step": 11407 }, { "epoch": 1.635790077430456, "grad_norm": 0.26990950107574463, "learning_rate": 5.08300337527137e-06, "loss": 0.3151, "step": 11408 }, { "epoch": 1.635933467163751, "grad_norm": 0.2912845313549042, "learning_rate": 5.082169245982472e-06, "loss": 0.3225, "step": 11409 }, { "epoch": 1.6360768568970463, "grad_norm": 0.2870226800441742, "learning_rate": 5.081335114406107e-06, "loss": 0.3111, "step": 11410 }, { "epoch": 1.6362202466303413, "grad_norm": 0.26957032084465027, "learning_rate": 5.080500980565497e-06, "loss": 0.3086, "step": 11411 }, { "epoch": 1.6363636363636362, "grad_norm": 0.2813988924026489, "learning_rate": 5.079666844483864e-06, "loss": 0.3026, "step": 11412 }, { "epoch": 1.6365070260969314, "grad_norm": 0.2775883674621582, "learning_rate": 5.078832706184428e-06, "loss": 0.3112, "step": 11413 }, { "epoch": 1.6366504158302266, "grad_norm": 0.2554285526275635, "learning_rate": 5.077998565690412e-06, "loss": 0.2873, "step": 11414 }, { "epoch": 1.6367938055635216, "grad_norm": 0.26908400654792786, "learning_rate": 5.0771644230250346e-06, "loss": 0.2915, "step": 11415 }, { "epoch": 1.6369371952968166, "grad_norm": 0.261156290769577, "learning_rate": 5.07633027821152e-06, "loss": 0.2998, "step": 11416 }, { "epoch": 1.637080585030112, "grad_norm": 0.28372815251350403, "learning_rate": 5.075496131273086e-06, "loss": 0.3166, "step": 11417 }, { "epoch": 1.637223974763407, "grad_norm": 0.29797935485839844, "learning_rate": 5.074661982232958e-06, "loss": 0.2981, "step": 11418 }, { "epoch": 1.637367364496702, "grad_norm": 0.2967672049999237, "learning_rate": 5.073827831114354e-06, "loss": 0.315, "step": 11419 }, { "epoch": 1.6375107542299971, "grad_norm": 0.3049044907093048, "learning_rate": 5.072993677940497e-06, "loss": 0.3142, "step": 11420 }, { "epoch": 1.6376541439632923, "grad_norm": 0.2916841208934784, "learning_rate": 5.072159522734608e-06, "loss": 0.314, "step": 11421 }, { "epoch": 1.6377975336965873, "grad_norm": 0.2789624333381653, "learning_rate": 5.071325365519908e-06, "loss": 0.3072, "step": 11422 }, { "epoch": 1.6379409234298823, "grad_norm": 0.28272032737731934, "learning_rate": 5.070491206319621e-06, "loss": 0.3147, "step": 11423 }, { "epoch": 1.6380843131631775, "grad_norm": 0.28715693950653076, "learning_rate": 5.069657045156967e-06, "loss": 0.3035, "step": 11424 }, { "epoch": 1.6382277028964727, "grad_norm": 0.29318857192993164, "learning_rate": 5.0688228820551675e-06, "loss": 0.3166, "step": 11425 }, { "epoch": 1.6383710926297677, "grad_norm": 0.27504780888557434, "learning_rate": 5.067988717037446e-06, "loss": 0.2874, "step": 11426 }, { "epoch": 1.6385144823630629, "grad_norm": 0.2668953835964203, "learning_rate": 5.067154550127021e-06, "loss": 0.2954, "step": 11427 }, { "epoch": 1.638657872096358, "grad_norm": 0.3090948462486267, "learning_rate": 5.066320381347119e-06, "loss": 0.3044, "step": 11428 }, { "epoch": 1.638801261829653, "grad_norm": 0.3045520484447479, "learning_rate": 5.065486210720959e-06, "loss": 0.3091, "step": 11429 }, { "epoch": 1.638944651562948, "grad_norm": 0.2888672947883606, "learning_rate": 5.064652038271763e-06, "loss": 0.3166, "step": 11430 }, { "epoch": 1.6390880412962432, "grad_norm": 0.2647220194339752, "learning_rate": 5.063817864022753e-06, "loss": 0.3026, "step": 11431 }, { "epoch": 1.6392314310295384, "grad_norm": 0.28019681572914124, "learning_rate": 5.062983687997153e-06, "loss": 0.3119, "step": 11432 }, { "epoch": 1.6393748207628334, "grad_norm": 0.27465033531188965, "learning_rate": 5.062149510218183e-06, "loss": 0.3138, "step": 11433 }, { "epoch": 1.6395182104961283, "grad_norm": 0.29925239086151123, "learning_rate": 5.061315330709068e-06, "loss": 0.3002, "step": 11434 }, { "epoch": 1.6396616002294235, "grad_norm": 0.29613080620765686, "learning_rate": 5.060481149493028e-06, "loss": 0.3148, "step": 11435 }, { "epoch": 1.6398049899627187, "grad_norm": 0.2939320504665375, "learning_rate": 5.059646966593286e-06, "loss": 0.2868, "step": 11436 }, { "epoch": 1.6399483796960137, "grad_norm": 0.28866952657699585, "learning_rate": 5.058812782033062e-06, "loss": 0.2972, "step": 11437 }, { "epoch": 1.640091769429309, "grad_norm": 0.34019234776496887, "learning_rate": 5.057978595835583e-06, "loss": 0.2977, "step": 11438 }, { "epoch": 1.640235159162604, "grad_norm": 0.2910217046737671, "learning_rate": 5.057144408024067e-06, "loss": 0.3073, "step": 11439 }, { "epoch": 1.640378548895899, "grad_norm": 0.2947792410850525, "learning_rate": 5.056310218621741e-06, "loss": 0.3186, "step": 11440 }, { "epoch": 1.640521938629194, "grad_norm": 0.30181288719177246, "learning_rate": 5.055476027651824e-06, "loss": 0.2976, "step": 11441 }, { "epoch": 1.6406653283624892, "grad_norm": 0.28518757224082947, "learning_rate": 5.05464183513754e-06, "loss": 0.3075, "step": 11442 }, { "epoch": 1.6408087180957844, "grad_norm": 0.29502996802330017, "learning_rate": 5.05380764110211e-06, "loss": 0.3193, "step": 11443 }, { "epoch": 1.6409521078290794, "grad_norm": 0.2855472266674042, "learning_rate": 5.05297344556876e-06, "loss": 0.3176, "step": 11444 }, { "epoch": 1.6410954975623744, "grad_norm": 0.26044654846191406, "learning_rate": 5.05213924856071e-06, "loss": 0.2774, "step": 11445 }, { "epoch": 1.6412388872956696, "grad_norm": 0.2909714877605438, "learning_rate": 5.051305050101183e-06, "loss": 0.3061, "step": 11446 }, { "epoch": 1.6413822770289648, "grad_norm": 0.2977979779243469, "learning_rate": 5.050470850213403e-06, "loss": 0.3179, "step": 11447 }, { "epoch": 1.6415256667622597, "grad_norm": 0.29514458775520325, "learning_rate": 5.049636648920592e-06, "loss": 0.2995, "step": 11448 }, { "epoch": 1.641669056495555, "grad_norm": 0.2981281280517578, "learning_rate": 5.048802446245972e-06, "loss": 0.3044, "step": 11449 }, { "epoch": 1.6418124462288501, "grad_norm": 0.29458266496658325, "learning_rate": 5.047968242212768e-06, "loss": 0.2963, "step": 11450 }, { "epoch": 1.6419558359621451, "grad_norm": 0.2795877158641815, "learning_rate": 5.047134036844203e-06, "loss": 0.2967, "step": 11451 }, { "epoch": 1.64209922569544, "grad_norm": 0.2649778723716736, "learning_rate": 5.046299830163497e-06, "loss": 0.3089, "step": 11452 }, { "epoch": 1.6422426154287353, "grad_norm": 0.309272438287735, "learning_rate": 5.045465622193876e-06, "loss": 0.3144, "step": 11453 }, { "epoch": 1.6423860051620305, "grad_norm": 0.3134736120700836, "learning_rate": 5.044631412958562e-06, "loss": 0.2978, "step": 11454 }, { "epoch": 1.6425293948953255, "grad_norm": 0.2874220907688141, "learning_rate": 5.043797202480778e-06, "loss": 0.2941, "step": 11455 }, { "epoch": 1.6426727846286204, "grad_norm": 0.268576979637146, "learning_rate": 5.0429629907837465e-06, "loss": 0.2924, "step": 11456 }, { "epoch": 1.6428161743619158, "grad_norm": 0.28839313983917236, "learning_rate": 5.0421287778906925e-06, "loss": 0.3314, "step": 11457 }, { "epoch": 1.6429595640952108, "grad_norm": 0.2925068438053131, "learning_rate": 5.041294563824837e-06, "loss": 0.2916, "step": 11458 }, { "epoch": 1.6431029538285058, "grad_norm": 0.270536333322525, "learning_rate": 5.040460348609405e-06, "loss": 0.2964, "step": 11459 }, { "epoch": 1.643246343561801, "grad_norm": 0.2735770642757416, "learning_rate": 5.039626132267619e-06, "loss": 0.3008, "step": 11460 }, { "epoch": 1.6433897332950962, "grad_norm": 0.3235979676246643, "learning_rate": 5.038791914822703e-06, "loss": 0.3138, "step": 11461 }, { "epoch": 1.6435331230283912, "grad_norm": 0.28026923537254333, "learning_rate": 5.037957696297879e-06, "loss": 0.2986, "step": 11462 }, { "epoch": 1.6436765127616861, "grad_norm": 0.28369590640068054, "learning_rate": 5.037123476716373e-06, "loss": 0.301, "step": 11463 }, { "epoch": 1.6438199024949813, "grad_norm": 0.2819611728191376, "learning_rate": 5.036289256101405e-06, "loss": 0.3079, "step": 11464 }, { "epoch": 1.6439632922282765, "grad_norm": 0.2853083908557892, "learning_rate": 5.035455034476199e-06, "loss": 0.3121, "step": 11465 }, { "epoch": 1.6441066819615715, "grad_norm": 0.26196303963661194, "learning_rate": 5.03462081186398e-06, "loss": 0.3159, "step": 11466 }, { "epoch": 1.6442500716948667, "grad_norm": 0.2971433699131012, "learning_rate": 5.033786588287971e-06, "loss": 0.3, "step": 11467 }, { "epoch": 1.644393461428162, "grad_norm": 0.31231966614723206, "learning_rate": 5.0329523637713955e-06, "loss": 0.3049, "step": 11468 }, { "epoch": 1.6445368511614569, "grad_norm": 0.2679732143878937, "learning_rate": 5.032118138337477e-06, "loss": 0.3013, "step": 11469 }, { "epoch": 1.6446802408947518, "grad_norm": 0.2627663314342499, "learning_rate": 5.031283912009438e-06, "loss": 0.3129, "step": 11470 }, { "epoch": 1.644823630628047, "grad_norm": 0.27054718136787415, "learning_rate": 5.030449684810504e-06, "loss": 0.3169, "step": 11471 }, { "epoch": 1.6449670203613422, "grad_norm": 0.2846892476081848, "learning_rate": 5.029615456763899e-06, "loss": 0.2909, "step": 11472 }, { "epoch": 1.6451104100946372, "grad_norm": 0.28213000297546387, "learning_rate": 5.028781227892843e-06, "loss": 0.303, "step": 11473 }, { "epoch": 1.6452537998279322, "grad_norm": 0.2697395384311676, "learning_rate": 5.027946998220563e-06, "loss": 0.3048, "step": 11474 }, { "epoch": 1.6453971895612274, "grad_norm": 0.26257622241973877, "learning_rate": 5.0271127677702815e-06, "loss": 0.3096, "step": 11475 }, { "epoch": 1.6455405792945226, "grad_norm": 0.3019216060638428, "learning_rate": 5.026278536565221e-06, "loss": 0.2896, "step": 11476 }, { "epoch": 1.6456839690278176, "grad_norm": 0.3062422573566437, "learning_rate": 5.025444304628607e-06, "loss": 0.2981, "step": 11477 }, { "epoch": 1.6458273587611127, "grad_norm": 0.27163827419281006, "learning_rate": 5.0246100719836635e-06, "loss": 0.2874, "step": 11478 }, { "epoch": 1.645970748494408, "grad_norm": 0.2787711024284363, "learning_rate": 5.023775838653613e-06, "loss": 0.2864, "step": 11479 }, { "epoch": 1.646114138227703, "grad_norm": 0.2649170160293579, "learning_rate": 5.02294160466168e-06, "loss": 0.298, "step": 11480 }, { "epoch": 1.646257527960998, "grad_norm": 0.26550450921058655, "learning_rate": 5.022107370031088e-06, "loss": 0.3022, "step": 11481 }, { "epoch": 1.646400917694293, "grad_norm": 0.27137333154678345, "learning_rate": 5.0212731347850605e-06, "loss": 0.2928, "step": 11482 }, { "epoch": 1.6465443074275883, "grad_norm": 0.26946237683296204, "learning_rate": 5.0204388989468224e-06, "loss": 0.2983, "step": 11483 }, { "epoch": 1.6466876971608833, "grad_norm": 0.2803806662559509, "learning_rate": 5.019604662539596e-06, "loss": 0.3219, "step": 11484 }, { "epoch": 1.6468310868941782, "grad_norm": 0.27494698762893677, "learning_rate": 5.018770425586606e-06, "loss": 0.3132, "step": 11485 }, { "epoch": 1.6469744766274734, "grad_norm": 0.2726796567440033, "learning_rate": 5.017936188111076e-06, "loss": 0.2935, "step": 11486 }, { "epoch": 1.6471178663607686, "grad_norm": 0.2745305597782135, "learning_rate": 5.0171019501362295e-06, "loss": 0.2735, "step": 11487 }, { "epoch": 1.6472612560940636, "grad_norm": 0.29037371277809143, "learning_rate": 5.016267711685293e-06, "loss": 0.3224, "step": 11488 }, { "epoch": 1.6474046458273588, "grad_norm": 0.27789387106895447, "learning_rate": 5.015433472781487e-06, "loss": 0.2854, "step": 11489 }, { "epoch": 1.647548035560654, "grad_norm": 0.2882958650588989, "learning_rate": 5.0145992334480375e-06, "loss": 0.3091, "step": 11490 }, { "epoch": 1.647691425293949, "grad_norm": 0.2917881906032562, "learning_rate": 5.0137649937081675e-06, "loss": 0.3063, "step": 11491 }, { "epoch": 1.647834815027244, "grad_norm": 0.2822194993495941, "learning_rate": 5.0129307535851015e-06, "loss": 0.2954, "step": 11492 }, { "epoch": 1.6479782047605391, "grad_norm": 0.27144700288772583, "learning_rate": 5.0120965131020634e-06, "loss": 0.2988, "step": 11493 }, { "epoch": 1.6481215944938343, "grad_norm": 0.263462632894516, "learning_rate": 5.0112622722822756e-06, "loss": 0.3126, "step": 11494 }, { "epoch": 1.6482649842271293, "grad_norm": 0.27625802159309387, "learning_rate": 5.0104280311489635e-06, "loss": 0.2942, "step": 11495 }, { "epoch": 1.6484083739604243, "grad_norm": 0.2886893153190613, "learning_rate": 5.009593789725353e-06, "loss": 0.3073, "step": 11496 }, { "epoch": 1.6485517636937197, "grad_norm": 0.2991674542427063, "learning_rate": 5.008759548034665e-06, "loss": 0.3028, "step": 11497 }, { "epoch": 1.6486951534270147, "grad_norm": 0.28428879380226135, "learning_rate": 5.0079253061001244e-06, "loss": 0.3184, "step": 11498 }, { "epoch": 1.6488385431603096, "grad_norm": 0.2856106460094452, "learning_rate": 5.007091063944957e-06, "loss": 0.2975, "step": 11499 }, { "epoch": 1.6489819328936048, "grad_norm": 0.28092071413993835, "learning_rate": 5.006256821592383e-06, "loss": 0.3061, "step": 11500 }, { "epoch": 1.6491253226269, "grad_norm": 0.27666139602661133, "learning_rate": 5.005422579065632e-06, "loss": 0.2992, "step": 11501 }, { "epoch": 1.649268712360195, "grad_norm": 0.2820690870285034, "learning_rate": 5.004588336387922e-06, "loss": 0.2936, "step": 11502 }, { "epoch": 1.64941210209349, "grad_norm": 0.2643912434577942, "learning_rate": 5.0037540935824805e-06, "loss": 0.2975, "step": 11503 }, { "epoch": 1.6495554918267852, "grad_norm": 0.26881229877471924, "learning_rate": 5.0029198506725295e-06, "loss": 0.2989, "step": 11504 }, { "epoch": 1.6496988815600804, "grad_norm": 0.2926347851753235, "learning_rate": 5.002085607681295e-06, "loss": 0.3037, "step": 11505 }, { "epoch": 1.6498422712933754, "grad_norm": 0.28916963934898376, "learning_rate": 5.001251364632002e-06, "loss": 0.2957, "step": 11506 }, { "epoch": 1.6499856610266703, "grad_norm": 0.2789333760738373, "learning_rate": 5.000417121547871e-06, "loss": 0.3035, "step": 11507 }, { "epoch": 1.6501290507599657, "grad_norm": 0.2831238806247711, "learning_rate": 4.9995828784521296e-06, "loss": 0.2992, "step": 11508 }, { "epoch": 1.6502724404932607, "grad_norm": 0.2857605516910553, "learning_rate": 4.998748635367999e-06, "loss": 0.3013, "step": 11509 }, { "epoch": 1.6504158302265557, "grad_norm": 0.2731504738330841, "learning_rate": 4.997914392318704e-06, "loss": 0.3061, "step": 11510 }, { "epoch": 1.6505592199598509, "grad_norm": 0.29888173937797546, "learning_rate": 4.997080149327472e-06, "loss": 0.2988, "step": 11511 }, { "epoch": 1.650702609693146, "grad_norm": 0.2766183316707611, "learning_rate": 4.996245906417522e-06, "loss": 0.3022, "step": 11512 }, { "epoch": 1.650845999426441, "grad_norm": 0.28289902210235596, "learning_rate": 4.99541166361208e-06, "loss": 0.3007, "step": 11513 }, { "epoch": 1.650989389159736, "grad_norm": 0.27543729543685913, "learning_rate": 4.99457742093437e-06, "loss": 0.3166, "step": 11514 }, { "epoch": 1.6511327788930312, "grad_norm": 0.2870433032512665, "learning_rate": 4.993743178407617e-06, "loss": 0.2982, "step": 11515 }, { "epoch": 1.6512761686263264, "grad_norm": 0.2759719491004944, "learning_rate": 4.992908936055046e-06, "loss": 0.3259, "step": 11516 }, { "epoch": 1.6514195583596214, "grad_norm": 0.26164743304252625, "learning_rate": 4.992074693899877e-06, "loss": 0.2937, "step": 11517 }, { "epoch": 1.6515629480929166, "grad_norm": 0.2552756071090698, "learning_rate": 4.991240451965336e-06, "loss": 0.2903, "step": 11518 }, { "epoch": 1.6517063378262118, "grad_norm": 0.2781226634979248, "learning_rate": 4.990406210274648e-06, "loss": 0.3102, "step": 11519 }, { "epoch": 1.6518497275595068, "grad_norm": 0.2756653428077698, "learning_rate": 4.9895719688510365e-06, "loss": 0.2929, "step": 11520 }, { "epoch": 1.6519931172928017, "grad_norm": 0.28260812163352966, "learning_rate": 4.988737727717725e-06, "loss": 0.2933, "step": 11521 }, { "epoch": 1.652136507026097, "grad_norm": 0.28325459361076355, "learning_rate": 4.987903486897939e-06, "loss": 0.2913, "step": 11522 }, { "epoch": 1.6522798967593921, "grad_norm": 0.2586992681026459, "learning_rate": 4.9870692464149e-06, "loss": 0.2932, "step": 11523 }, { "epoch": 1.652423286492687, "grad_norm": 0.26445844769477844, "learning_rate": 4.986235006291834e-06, "loss": 0.309, "step": 11524 }, { "epoch": 1.652566676225982, "grad_norm": 0.2638854682445526, "learning_rate": 4.985400766551964e-06, "loss": 0.3111, "step": 11525 }, { "epoch": 1.6527100659592773, "grad_norm": 0.2651262879371643, "learning_rate": 4.984566527218513e-06, "loss": 0.3184, "step": 11526 }, { "epoch": 1.6528534556925725, "grad_norm": 0.2774631977081299, "learning_rate": 4.98373228831471e-06, "loss": 0.3053, "step": 11527 }, { "epoch": 1.6529968454258674, "grad_norm": 0.2661406099796295, "learning_rate": 4.982898049863772e-06, "loss": 0.2955, "step": 11528 }, { "epoch": 1.6531402351591626, "grad_norm": 0.28250330686569214, "learning_rate": 4.982063811888927e-06, "loss": 0.315, "step": 11529 }, { "epoch": 1.6532836248924578, "grad_norm": 0.27232450246810913, "learning_rate": 4.981229574413397e-06, "loss": 0.296, "step": 11530 }, { "epoch": 1.6534270146257528, "grad_norm": 0.29833880066871643, "learning_rate": 4.980395337460406e-06, "loss": 0.3047, "step": 11531 }, { "epoch": 1.6535704043590478, "grad_norm": 0.28192138671875, "learning_rate": 4.979561101053181e-06, "loss": 0.3102, "step": 11532 }, { "epoch": 1.653713794092343, "grad_norm": 0.28372326493263245, "learning_rate": 4.978726865214942e-06, "loss": 0.3175, "step": 11533 }, { "epoch": 1.6538571838256382, "grad_norm": 0.28031083941459656, "learning_rate": 4.977892629968914e-06, "loss": 0.3025, "step": 11534 }, { "epoch": 1.6540005735589332, "grad_norm": 0.2926732003688812, "learning_rate": 4.977058395338321e-06, "loss": 0.3024, "step": 11535 }, { "epoch": 1.6541439632922281, "grad_norm": 0.2999529242515564, "learning_rate": 4.976224161346388e-06, "loss": 0.3064, "step": 11536 }, { "epoch": 1.6542873530255233, "grad_norm": 0.29757925868034363, "learning_rate": 4.975389928016337e-06, "loss": 0.3027, "step": 11537 }, { "epoch": 1.6544307427588185, "grad_norm": 0.2864454984664917, "learning_rate": 4.974555695371394e-06, "loss": 0.3192, "step": 11538 }, { "epoch": 1.6545741324921135, "grad_norm": 0.292108952999115, "learning_rate": 4.97372146343478e-06, "loss": 0.3173, "step": 11539 }, { "epoch": 1.6547175222254087, "grad_norm": 0.28614023327827454, "learning_rate": 4.97288723222972e-06, "loss": 0.2944, "step": 11540 }, { "epoch": 1.6548609119587039, "grad_norm": 0.29201093316078186, "learning_rate": 4.972053001779438e-06, "loss": 0.3038, "step": 11541 }, { "epoch": 1.6550043016919989, "grad_norm": 0.3093642294406891, "learning_rate": 4.971218772107157e-06, "loss": 0.3011, "step": 11542 }, { "epoch": 1.6551476914252938, "grad_norm": 0.3017522096633911, "learning_rate": 4.970384543236104e-06, "loss": 0.2998, "step": 11543 }, { "epoch": 1.655291081158589, "grad_norm": 0.27811720967292786, "learning_rate": 4.9695503151894975e-06, "loss": 0.3092, "step": 11544 }, { "epoch": 1.6554344708918842, "grad_norm": 0.28071942925453186, "learning_rate": 4.968716087990563e-06, "loss": 0.3235, "step": 11545 }, { "epoch": 1.6555778606251792, "grad_norm": 0.31795957684516907, "learning_rate": 4.967881861662524e-06, "loss": 0.2875, "step": 11546 }, { "epoch": 1.6557212503584742, "grad_norm": 0.3085651099681854, "learning_rate": 4.967047636228605e-06, "loss": 0.3012, "step": 11547 }, { "epoch": 1.6558646400917696, "grad_norm": 0.2728627026081085, "learning_rate": 4.96621341171203e-06, "loss": 0.3009, "step": 11548 }, { "epoch": 1.6560080298250646, "grad_norm": 0.26519080996513367, "learning_rate": 4.965379188136022e-06, "loss": 0.2982, "step": 11549 }, { "epoch": 1.6561514195583595, "grad_norm": 0.2924976050853729, "learning_rate": 4.964544965523803e-06, "loss": 0.2992, "step": 11550 }, { "epoch": 1.6562948092916547, "grad_norm": 0.310303658246994, "learning_rate": 4.963710743898598e-06, "loss": 0.3213, "step": 11551 }, { "epoch": 1.65643819902495, "grad_norm": 0.2874809503555298, "learning_rate": 4.962876523283629e-06, "loss": 0.3086, "step": 11552 }, { "epoch": 1.656581588758245, "grad_norm": 0.287561297416687, "learning_rate": 4.962042303702122e-06, "loss": 0.3138, "step": 11553 }, { "epoch": 1.6567249784915399, "grad_norm": 0.27656984329223633, "learning_rate": 4.9612080851773e-06, "loss": 0.2766, "step": 11554 }, { "epoch": 1.656868368224835, "grad_norm": 0.24619124829769135, "learning_rate": 4.960373867732383e-06, "loss": 0.3061, "step": 11555 }, { "epoch": 1.6570117579581303, "grad_norm": 0.30802351236343384, "learning_rate": 4.959539651390597e-06, "loss": 0.2929, "step": 11556 }, { "epoch": 1.6571551476914252, "grad_norm": 0.3216029405593872, "learning_rate": 4.958705436175165e-06, "loss": 0.3106, "step": 11557 }, { "epoch": 1.6572985374247204, "grad_norm": 0.2786623537540436, "learning_rate": 4.957871222109309e-06, "loss": 0.2892, "step": 11558 }, { "epoch": 1.6574419271580156, "grad_norm": 0.26374563574790955, "learning_rate": 4.957037009216256e-06, "loss": 0.3153, "step": 11559 }, { "epoch": 1.6575853168913106, "grad_norm": 0.2656203508377075, "learning_rate": 4.956202797519224e-06, "loss": 0.3012, "step": 11560 }, { "epoch": 1.6577287066246056, "grad_norm": 0.27853524684906006, "learning_rate": 4.95536858704144e-06, "loss": 0.3136, "step": 11561 }, { "epoch": 1.6578720963579008, "grad_norm": 0.2971031367778778, "learning_rate": 4.954534377806126e-06, "loss": 0.2978, "step": 11562 }, { "epoch": 1.658015486091196, "grad_norm": 0.29291653633117676, "learning_rate": 4.953700169836504e-06, "loss": 0.315, "step": 11563 }, { "epoch": 1.658158875824491, "grad_norm": 0.26399654150009155, "learning_rate": 4.952865963155798e-06, "loss": 0.3061, "step": 11564 }, { "epoch": 1.658302265557786, "grad_norm": 0.26962798833847046, "learning_rate": 4.952031757787234e-06, "loss": 0.2891, "step": 11565 }, { "epoch": 1.6584456552910811, "grad_norm": 0.2814284861087799, "learning_rate": 4.95119755375403e-06, "loss": 0.3076, "step": 11566 }, { "epoch": 1.6585890450243763, "grad_norm": 0.26376208662986755, "learning_rate": 4.950363351079409e-06, "loss": 0.3075, "step": 11567 }, { "epoch": 1.6587324347576713, "grad_norm": 0.29406654834747314, "learning_rate": 4.949529149786598e-06, "loss": 0.3001, "step": 11568 }, { "epoch": 1.6588758244909665, "grad_norm": 0.26092013716697693, "learning_rate": 4.948694949898817e-06, "loss": 0.2988, "step": 11569 }, { "epoch": 1.6590192142242617, "grad_norm": 0.28497564792633057, "learning_rate": 4.947860751439293e-06, "loss": 0.3247, "step": 11570 }, { "epoch": 1.6591626039575567, "grad_norm": 0.2797105312347412, "learning_rate": 4.947026554431242e-06, "loss": 0.2862, "step": 11571 }, { "epoch": 1.6593059936908516, "grad_norm": 0.28600212931632996, "learning_rate": 4.946192358897891e-06, "loss": 0.3053, "step": 11572 }, { "epoch": 1.6594493834241468, "grad_norm": 0.28023970127105713, "learning_rate": 4.945358164862462e-06, "loss": 0.3132, "step": 11573 }, { "epoch": 1.659592773157442, "grad_norm": 0.27714014053344727, "learning_rate": 4.944523972348177e-06, "loss": 0.2928, "step": 11574 }, { "epoch": 1.659736162890737, "grad_norm": 0.3297550678253174, "learning_rate": 4.94368978137826e-06, "loss": 0.3132, "step": 11575 }, { "epoch": 1.659879552624032, "grad_norm": 0.26718461513519287, "learning_rate": 4.942855591975934e-06, "loss": 0.2917, "step": 11576 }, { "epoch": 1.6600229423573272, "grad_norm": 0.2682610750198364, "learning_rate": 4.942021404164419e-06, "loss": 0.296, "step": 11577 }, { "epoch": 1.6601663320906224, "grad_norm": 0.2680729329586029, "learning_rate": 4.941187217966939e-06, "loss": 0.3063, "step": 11578 }, { "epoch": 1.6603097218239173, "grad_norm": 0.2701486349105835, "learning_rate": 4.940353033406716e-06, "loss": 0.3142, "step": 11579 }, { "epoch": 1.6604531115572125, "grad_norm": 0.26492685079574585, "learning_rate": 4.939518850506973e-06, "loss": 0.3019, "step": 11580 }, { "epoch": 1.6605965012905077, "grad_norm": 0.27362746000289917, "learning_rate": 4.938684669290935e-06, "loss": 0.3009, "step": 11581 }, { "epoch": 1.6607398910238027, "grad_norm": 0.2747173011302948, "learning_rate": 4.937850489781818e-06, "loss": 0.3077, "step": 11582 }, { "epoch": 1.6608832807570977, "grad_norm": 0.29417484998703003, "learning_rate": 4.937016312002849e-06, "loss": 0.3053, "step": 11583 }, { "epoch": 1.6610266704903929, "grad_norm": 0.27128565311431885, "learning_rate": 4.9361821359772485e-06, "loss": 0.3001, "step": 11584 }, { "epoch": 1.661170060223688, "grad_norm": 0.2998932898044586, "learning_rate": 4.935347961728238e-06, "loss": 0.3102, "step": 11585 }, { "epoch": 1.661313449956983, "grad_norm": 0.25150468945503235, "learning_rate": 4.934513789279044e-06, "loss": 0.3246, "step": 11586 }, { "epoch": 1.661456839690278, "grad_norm": 0.2842532694339752, "learning_rate": 4.933679618652883e-06, "loss": 0.3159, "step": 11587 }, { "epoch": 1.6616002294235734, "grad_norm": 0.2898695766925812, "learning_rate": 4.93284544987298e-06, "loss": 0.293, "step": 11588 }, { "epoch": 1.6617436191568684, "grad_norm": 0.29298993945121765, "learning_rate": 4.932011282962555e-06, "loss": 0.3215, "step": 11589 }, { "epoch": 1.6618870088901634, "grad_norm": 0.2553147077560425, "learning_rate": 4.931177117944833e-06, "loss": 0.2875, "step": 11590 }, { "epoch": 1.6620303986234586, "grad_norm": 0.29113537073135376, "learning_rate": 4.930342954843034e-06, "loss": 0.2983, "step": 11591 }, { "epoch": 1.6621737883567538, "grad_norm": 0.25114843249320984, "learning_rate": 4.929508793680382e-06, "loss": 0.2805, "step": 11592 }, { "epoch": 1.6623171780900488, "grad_norm": 0.2610419690608978, "learning_rate": 4.928674634480093e-06, "loss": 0.3214, "step": 11593 }, { "epoch": 1.6624605678233437, "grad_norm": 0.24578474462032318, "learning_rate": 4.927840477265394e-06, "loss": 0.3124, "step": 11594 }, { "epoch": 1.662603957556639, "grad_norm": 0.29452505707740784, "learning_rate": 4.9270063220595035e-06, "loss": 0.3136, "step": 11595 }, { "epoch": 1.6627473472899341, "grad_norm": 0.2753407955169678, "learning_rate": 4.926172168885647e-06, "loss": 0.2879, "step": 11596 }, { "epoch": 1.662890737023229, "grad_norm": 0.26802587509155273, "learning_rate": 4.9253380177670445e-06, "loss": 0.3239, "step": 11597 }, { "epoch": 1.663034126756524, "grad_norm": 0.3069066107273102, "learning_rate": 4.924503868726915e-06, "loss": 0.3078, "step": 11598 }, { "epoch": 1.6631775164898195, "grad_norm": 0.2802796959877014, "learning_rate": 4.9236697217884815e-06, "loss": 0.3103, "step": 11599 }, { "epoch": 1.6633209062231145, "grad_norm": 0.2856316864490509, "learning_rate": 4.922835576974966e-06, "loss": 0.3194, "step": 11600 }, { "epoch": 1.6634642959564094, "grad_norm": 0.27723878622055054, "learning_rate": 4.922001434309589e-06, "loss": 0.2973, "step": 11601 }, { "epoch": 1.6636076856897046, "grad_norm": 0.2645808160305023, "learning_rate": 4.921167293815572e-06, "loss": 0.2983, "step": 11602 }, { "epoch": 1.6637510754229998, "grad_norm": 0.26228439807891846, "learning_rate": 4.920333155516138e-06, "loss": 0.2935, "step": 11603 }, { "epoch": 1.6638944651562948, "grad_norm": 0.2919025421142578, "learning_rate": 4.919499019434504e-06, "loss": 0.3234, "step": 11604 }, { "epoch": 1.6640378548895898, "grad_norm": 0.28321585059165955, "learning_rate": 4.918664885593894e-06, "loss": 0.3047, "step": 11605 }, { "epoch": 1.664181244622885, "grad_norm": 0.28151586651802063, "learning_rate": 4.9178307540175295e-06, "loss": 0.3121, "step": 11606 }, { "epoch": 1.6643246343561802, "grad_norm": 0.2871166169643402, "learning_rate": 4.91699662472863e-06, "loss": 0.2899, "step": 11607 }, { "epoch": 1.6644680240894751, "grad_norm": 0.2890799641609192, "learning_rate": 4.916162497750418e-06, "loss": 0.2893, "step": 11608 }, { "epoch": 1.6646114138227703, "grad_norm": 0.30300137400627136, "learning_rate": 4.915328373106112e-06, "loss": 0.2999, "step": 11609 }, { "epoch": 1.6647548035560655, "grad_norm": 0.2945578694343567, "learning_rate": 4.914494250818933e-06, "loss": 0.3046, "step": 11610 }, { "epoch": 1.6648981932893605, "grad_norm": 0.27368849515914917, "learning_rate": 4.913660130912103e-06, "loss": 0.2947, "step": 11611 }, { "epoch": 1.6650415830226555, "grad_norm": 0.2914464473724365, "learning_rate": 4.912826013408842e-06, "loss": 0.3139, "step": 11612 }, { "epoch": 1.6651849727559507, "grad_norm": 0.2993679642677307, "learning_rate": 4.911991898332371e-06, "loss": 0.2968, "step": 11613 }, { "epoch": 1.6653283624892459, "grad_norm": 0.2882043421268463, "learning_rate": 4.911157785705911e-06, "loss": 0.3062, "step": 11614 }, { "epoch": 1.6654717522225408, "grad_norm": 0.3087776303291321, "learning_rate": 4.91032367555268e-06, "loss": 0.2955, "step": 11615 }, { "epoch": 1.6656151419558358, "grad_norm": 0.2874678373336792, "learning_rate": 4.9094895678959e-06, "loss": 0.3222, "step": 11616 }, { "epoch": 1.665758531689131, "grad_norm": 0.2805917263031006, "learning_rate": 4.908655462758792e-06, "loss": 0.3194, "step": 11617 }, { "epoch": 1.6659019214224262, "grad_norm": 0.302714467048645, "learning_rate": 4.907821360164575e-06, "loss": 0.2962, "step": 11618 }, { "epoch": 1.6660453111557212, "grad_norm": 0.28702813386917114, "learning_rate": 4.906987260136472e-06, "loss": 0.285, "step": 11619 }, { "epoch": 1.6661887008890164, "grad_norm": 0.2919777035713196, "learning_rate": 4.9061531626976975e-06, "loss": 0.313, "step": 11620 }, { "epoch": 1.6663320906223116, "grad_norm": 0.28250551223754883, "learning_rate": 4.905319067871476e-06, "loss": 0.3018, "step": 11621 }, { "epoch": 1.6664754803556066, "grad_norm": 0.26592251658439636, "learning_rate": 4.904484975681025e-06, "loss": 0.2878, "step": 11622 }, { "epoch": 1.6666188700889015, "grad_norm": 0.29209187626838684, "learning_rate": 4.903650886149564e-06, "loss": 0.3043, "step": 11623 }, { "epoch": 1.6667622598221967, "grad_norm": 0.28215324878692627, "learning_rate": 4.9028167993003184e-06, "loss": 0.3072, "step": 11624 }, { "epoch": 1.666905649555492, "grad_norm": 0.30284014344215393, "learning_rate": 4.901982715156501e-06, "loss": 0.3127, "step": 11625 }, { "epoch": 1.667049039288787, "grad_norm": 0.292000949382782, "learning_rate": 4.9011486337413335e-06, "loss": 0.3196, "step": 11626 }, { "epoch": 1.6671924290220819, "grad_norm": 0.28845030069351196, "learning_rate": 4.900314555078037e-06, "loss": 0.3148, "step": 11627 }, { "epoch": 1.667335818755377, "grad_norm": 0.30742692947387695, "learning_rate": 4.899480479189829e-06, "loss": 0.3054, "step": 11628 }, { "epoch": 1.6674792084886723, "grad_norm": 0.2686268389225006, "learning_rate": 4.89864640609993e-06, "loss": 0.3045, "step": 11629 }, { "epoch": 1.6676225982219672, "grad_norm": 0.2862958610057831, "learning_rate": 4.8978123358315605e-06, "loss": 0.3213, "step": 11630 }, { "epoch": 1.6677659879552624, "grad_norm": 0.30187520384788513, "learning_rate": 4.896978268407937e-06, "loss": 0.3285, "step": 11631 }, { "epoch": 1.6679093776885576, "grad_norm": 0.2658444046974182, "learning_rate": 4.89614420385228e-06, "loss": 0.2915, "step": 11632 }, { "epoch": 1.6680527674218526, "grad_norm": 0.2893458604812622, "learning_rate": 4.895310142187809e-06, "loss": 0.292, "step": 11633 }, { "epoch": 1.6681961571551476, "grad_norm": 0.27920296788215637, "learning_rate": 4.8944760834377425e-06, "loss": 0.3062, "step": 11634 }, { "epoch": 1.6683395468884428, "grad_norm": 0.25707492232322693, "learning_rate": 4.893642027625302e-06, "loss": 0.315, "step": 11635 }, { "epoch": 1.668482936621738, "grad_norm": 0.28373777866363525, "learning_rate": 4.892807974773701e-06, "loss": 0.3316, "step": 11636 }, { "epoch": 1.668626326355033, "grad_norm": 0.2862107753753662, "learning_rate": 4.8919739249061615e-06, "loss": 0.3035, "step": 11637 }, { "epoch": 1.668769716088328, "grad_norm": 0.2942878007888794, "learning_rate": 4.891139878045901e-06, "loss": 0.3181, "step": 11638 }, { "epoch": 1.6689131058216233, "grad_norm": 0.2720692455768585, "learning_rate": 4.89030583421614e-06, "loss": 0.3168, "step": 11639 }, { "epoch": 1.6690564955549183, "grad_norm": 0.2684500217437744, "learning_rate": 4.889471793440096e-06, "loss": 0.3166, "step": 11640 }, { "epoch": 1.6691998852882133, "grad_norm": 0.24889960885047913, "learning_rate": 4.888637755740987e-06, "loss": 0.3008, "step": 11641 }, { "epoch": 1.6693432750215085, "grad_norm": 0.26587703824043274, "learning_rate": 4.887803721142031e-06, "loss": 0.2904, "step": 11642 }, { "epoch": 1.6694866647548037, "grad_norm": 0.27211815118789673, "learning_rate": 4.886969689666447e-06, "loss": 0.3046, "step": 11643 }, { "epoch": 1.6696300544880986, "grad_norm": 0.27153244614601135, "learning_rate": 4.886135661337453e-06, "loss": 0.2964, "step": 11644 }, { "epoch": 1.6697734442213936, "grad_norm": 0.2648993730545044, "learning_rate": 4.885301636178267e-06, "loss": 0.3045, "step": 11645 }, { "epoch": 1.6699168339546888, "grad_norm": 0.28289496898651123, "learning_rate": 4.884467614212109e-06, "loss": 0.3189, "step": 11646 }, { "epoch": 1.670060223687984, "grad_norm": 0.2946791350841522, "learning_rate": 4.883633595462193e-06, "loss": 0.2873, "step": 11647 }, { "epoch": 1.670203613421279, "grad_norm": 0.2614760398864746, "learning_rate": 4.8827995799517384e-06, "loss": 0.2864, "step": 11648 }, { "epoch": 1.6703470031545742, "grad_norm": 0.2780163586139679, "learning_rate": 4.881965567703963e-06, "loss": 0.3029, "step": 11649 }, { "epoch": 1.6704903928878694, "grad_norm": 0.26744788885116577, "learning_rate": 4.881131558742085e-06, "loss": 0.2894, "step": 11650 }, { "epoch": 1.6706337826211644, "grad_norm": 0.2691989839076996, "learning_rate": 4.880297553089323e-06, "loss": 0.3029, "step": 11651 }, { "epoch": 1.6707771723544593, "grad_norm": 0.2873574197292328, "learning_rate": 4.879463550768891e-06, "loss": 0.2999, "step": 11652 }, { "epoch": 1.6709205620877545, "grad_norm": 0.2818404734134674, "learning_rate": 4.878629551804009e-06, "loss": 0.3223, "step": 11653 }, { "epoch": 1.6710639518210497, "grad_norm": 0.2716718316078186, "learning_rate": 4.877795556217894e-06, "loss": 0.2878, "step": 11654 }, { "epoch": 1.6712073415543447, "grad_norm": 0.2826581299304962, "learning_rate": 4.876961564033761e-06, "loss": 0.3037, "step": 11655 }, { "epoch": 1.6713507312876397, "grad_norm": 0.27694737911224365, "learning_rate": 4.87612757527483e-06, "loss": 0.291, "step": 11656 }, { "epoch": 1.6714941210209349, "grad_norm": 0.28550586104393005, "learning_rate": 4.875293589964318e-06, "loss": 0.3333, "step": 11657 }, { "epoch": 1.67163751075423, "grad_norm": 0.2759539783000946, "learning_rate": 4.874459608125439e-06, "loss": 0.2995, "step": 11658 }, { "epoch": 1.671780900487525, "grad_norm": 0.2658621668815613, "learning_rate": 4.873625629781412e-06, "loss": 0.3048, "step": 11659 }, { "epoch": 1.6719242902208202, "grad_norm": 0.2613339424133301, "learning_rate": 4.872791654955453e-06, "loss": 0.2896, "step": 11660 }, { "epoch": 1.6720676799541154, "grad_norm": 0.2882988154888153, "learning_rate": 4.871957683670778e-06, "loss": 0.2908, "step": 11661 }, { "epoch": 1.6722110696874104, "grad_norm": 0.26980674266815186, "learning_rate": 4.871123715950607e-06, "loss": 0.308, "step": 11662 }, { "epoch": 1.6723544594207054, "grad_norm": 0.2794094383716583, "learning_rate": 4.870289751818151e-06, "loss": 0.3044, "step": 11663 }, { "epoch": 1.6724978491540006, "grad_norm": 0.3223720192909241, "learning_rate": 4.869455791296629e-06, "loss": 0.2977, "step": 11664 }, { "epoch": 1.6726412388872958, "grad_norm": 0.2844674289226532, "learning_rate": 4.868621834409256e-06, "loss": 0.3032, "step": 11665 }, { "epoch": 1.6727846286205907, "grad_norm": 0.2878175675868988, "learning_rate": 4.867787881179251e-06, "loss": 0.3044, "step": 11666 }, { "epoch": 1.6729280183538857, "grad_norm": 0.28379756212234497, "learning_rate": 4.8669539316298275e-06, "loss": 0.2936, "step": 11667 }, { "epoch": 1.673071408087181, "grad_norm": 0.2679119110107422, "learning_rate": 4.866119985784202e-06, "loss": 0.3208, "step": 11668 }, { "epoch": 1.673214797820476, "grad_norm": 0.29270413517951965, "learning_rate": 4.8652860436655884e-06, "loss": 0.2914, "step": 11669 }, { "epoch": 1.673358187553771, "grad_norm": 0.25773102045059204, "learning_rate": 4.864452105297205e-06, "loss": 0.2993, "step": 11670 }, { "epoch": 1.6735015772870663, "grad_norm": 0.26671162247657776, "learning_rate": 4.8636181707022665e-06, "loss": 0.2906, "step": 11671 }, { "epoch": 1.6736449670203615, "grad_norm": 0.29516690969467163, "learning_rate": 4.862784239903988e-06, "loss": 0.3039, "step": 11672 }, { "epoch": 1.6737883567536564, "grad_norm": 0.2840808629989624, "learning_rate": 4.861950312925588e-06, "loss": 0.3018, "step": 11673 }, { "epoch": 1.6739317464869514, "grad_norm": 0.2742656171321869, "learning_rate": 4.861116389790277e-06, "loss": 0.2992, "step": 11674 }, { "epoch": 1.6740751362202466, "grad_norm": 0.272508442401886, "learning_rate": 4.860282470521271e-06, "loss": 0.3175, "step": 11675 }, { "epoch": 1.6742185259535418, "grad_norm": 0.2762914001941681, "learning_rate": 4.859448555141786e-06, "loss": 0.3199, "step": 11676 }, { "epoch": 1.6743619156868368, "grad_norm": 0.29523736238479614, "learning_rate": 4.858614643675038e-06, "loss": 0.3098, "step": 11677 }, { "epoch": 1.6745053054201318, "grad_norm": 0.27341315150260925, "learning_rate": 4.857780736144242e-06, "loss": 0.2975, "step": 11678 }, { "epoch": 1.6746486951534272, "grad_norm": 0.2836295962333679, "learning_rate": 4.85694683257261e-06, "loss": 0.2965, "step": 11679 }, { "epoch": 1.6747920848867222, "grad_norm": 0.28514859080314636, "learning_rate": 4.856112932983358e-06, "loss": 0.286, "step": 11680 }, { "epoch": 1.6749354746200171, "grad_norm": 0.27095597982406616, "learning_rate": 4.8552790373997004e-06, "loss": 0.303, "step": 11681 }, { "epoch": 1.6750788643533123, "grad_norm": 0.26139482855796814, "learning_rate": 4.854445145844852e-06, "loss": 0.3113, "step": 11682 }, { "epoch": 1.6752222540866075, "grad_norm": 0.26928436756134033, "learning_rate": 4.853611258342027e-06, "loss": 0.2863, "step": 11683 }, { "epoch": 1.6753656438199025, "grad_norm": 0.2821832001209259, "learning_rate": 4.8527773749144405e-06, "loss": 0.301, "step": 11684 }, { "epoch": 1.6755090335531975, "grad_norm": 0.25488269329071045, "learning_rate": 4.851943495585305e-06, "loss": 0.3099, "step": 11685 }, { "epoch": 1.6756524232864927, "grad_norm": 0.27578070759773254, "learning_rate": 4.851109620377833e-06, "loss": 0.3076, "step": 11686 }, { "epoch": 1.6757958130197879, "grad_norm": 0.27537819743156433, "learning_rate": 4.850275749315241e-06, "loss": 0.3093, "step": 11687 }, { "epoch": 1.6759392027530828, "grad_norm": 0.2814653217792511, "learning_rate": 4.849441882420742e-06, "loss": 0.3045, "step": 11688 }, { "epoch": 1.6760825924863778, "grad_norm": 0.27903252840042114, "learning_rate": 4.848608019717552e-06, "loss": 0.3101, "step": 11689 }, { "epoch": 1.6762259822196732, "grad_norm": 0.26709434390068054, "learning_rate": 4.8477741612288795e-06, "loss": 0.293, "step": 11690 }, { "epoch": 1.6763693719529682, "grad_norm": 0.2741333246231079, "learning_rate": 4.84694030697794e-06, "loss": 0.2961, "step": 11691 }, { "epoch": 1.6765127616862632, "grad_norm": 0.284958153963089, "learning_rate": 4.846106456987947e-06, "loss": 0.3216, "step": 11692 }, { "epoch": 1.6766561514195584, "grad_norm": 0.28026434779167175, "learning_rate": 4.845272611282113e-06, "loss": 0.2919, "step": 11693 }, { "epoch": 1.6767995411528536, "grad_norm": 0.27420032024383545, "learning_rate": 4.844438769883653e-06, "loss": 0.3062, "step": 11694 }, { "epoch": 1.6769429308861485, "grad_norm": 0.27302250266075134, "learning_rate": 4.843604932815777e-06, "loss": 0.2872, "step": 11695 }, { "epoch": 1.6770863206194435, "grad_norm": 0.2830207347869873, "learning_rate": 4.842771100101699e-06, "loss": 0.3091, "step": 11696 }, { "epoch": 1.6772297103527387, "grad_norm": 0.31444334983825684, "learning_rate": 4.841937271764632e-06, "loss": 0.3135, "step": 11697 }, { "epoch": 1.677373100086034, "grad_norm": 0.297761470079422, "learning_rate": 4.841103447827789e-06, "loss": 0.3263, "step": 11698 }, { "epoch": 1.6775164898193289, "grad_norm": 0.27896222472190857, "learning_rate": 4.84026962831438e-06, "loss": 0.3084, "step": 11699 }, { "epoch": 1.677659879552624, "grad_norm": 0.2754208445549011, "learning_rate": 4.8394358132476224e-06, "loss": 0.3217, "step": 11700 }, { "epoch": 1.6778032692859193, "grad_norm": 0.279039204120636, "learning_rate": 4.838602002650721e-06, "loss": 0.3084, "step": 11701 }, { "epoch": 1.6779466590192142, "grad_norm": 0.2908633351325989, "learning_rate": 4.8377681965468935e-06, "loss": 0.3013, "step": 11702 }, { "epoch": 1.6780900487525092, "grad_norm": 0.27805095911026, "learning_rate": 4.836934394959348e-06, "loss": 0.3124, "step": 11703 }, { "epoch": 1.6782334384858044, "grad_norm": 0.25542593002319336, "learning_rate": 4.8361005979112996e-06, "loss": 0.2947, "step": 11704 }, { "epoch": 1.6783768282190996, "grad_norm": 0.29851967096328735, "learning_rate": 4.835266805425959e-06, "loss": 0.3411, "step": 11705 }, { "epoch": 1.6785202179523946, "grad_norm": 0.28586339950561523, "learning_rate": 4.834433017526536e-06, "loss": 0.3243, "step": 11706 }, { "epoch": 1.6786636076856896, "grad_norm": 0.2719639837741852, "learning_rate": 4.833599234236243e-06, "loss": 0.3038, "step": 11707 }, { "epoch": 1.6788069974189848, "grad_norm": 0.27127400040626526, "learning_rate": 4.832765455578291e-06, "loss": 0.304, "step": 11708 }, { "epoch": 1.67895038715228, "grad_norm": 0.28316831588745117, "learning_rate": 4.831931681575892e-06, "loss": 0.3014, "step": 11709 }, { "epoch": 1.679093776885575, "grad_norm": 0.2743045389652252, "learning_rate": 4.831097912252256e-06, "loss": 0.3098, "step": 11710 }, { "epoch": 1.6792371666188701, "grad_norm": 0.2780587077140808, "learning_rate": 4.830264147630597e-06, "loss": 0.3129, "step": 11711 }, { "epoch": 1.6793805563521653, "grad_norm": 0.279151052236557, "learning_rate": 4.82943038773412e-06, "loss": 0.3018, "step": 11712 }, { "epoch": 1.6795239460854603, "grad_norm": 0.2740218937397003, "learning_rate": 4.828596632586039e-06, "loss": 0.3026, "step": 11713 }, { "epoch": 1.6796673358187553, "grad_norm": 0.27280113101005554, "learning_rate": 4.827762882209564e-06, "loss": 0.3132, "step": 11714 }, { "epoch": 1.6798107255520505, "grad_norm": 0.2651737928390503, "learning_rate": 4.826929136627906e-06, "loss": 0.3008, "step": 11715 }, { "epoch": 1.6799541152853457, "grad_norm": 0.2774522006511688, "learning_rate": 4.826095395864277e-06, "loss": 0.2981, "step": 11716 }, { "epoch": 1.6800975050186406, "grad_norm": 0.2968536913394928, "learning_rate": 4.825261659941881e-06, "loss": 0.301, "step": 11717 }, { "epoch": 1.6802408947519356, "grad_norm": 0.27103981375694275, "learning_rate": 4.824427928883933e-06, "loss": 0.3008, "step": 11718 }, { "epoch": 1.6803842844852308, "grad_norm": 0.28168419003486633, "learning_rate": 4.823594202713641e-06, "loss": 0.3236, "step": 11719 }, { "epoch": 1.680527674218526, "grad_norm": 0.2753331661224365, "learning_rate": 4.822760481454215e-06, "loss": 0.3085, "step": 11720 }, { "epoch": 1.680671063951821, "grad_norm": 0.28362220525741577, "learning_rate": 4.821926765128866e-06, "loss": 0.3285, "step": 11721 }, { "epoch": 1.6808144536851162, "grad_norm": 0.270174115896225, "learning_rate": 4.8210930537608e-06, "loss": 0.2975, "step": 11722 }, { "epoch": 1.6809578434184114, "grad_norm": 0.2778874337673187, "learning_rate": 4.820259347373229e-06, "loss": 0.2919, "step": 11723 }, { "epoch": 1.6811012331517063, "grad_norm": 0.2801847457885742, "learning_rate": 4.8194256459893605e-06, "loss": 0.3146, "step": 11724 }, { "epoch": 1.6812446228850013, "grad_norm": 0.2771594822406769, "learning_rate": 4.818591949632405e-06, "loss": 0.324, "step": 11725 }, { "epoch": 1.6813880126182965, "grad_norm": 0.2675286829471588, "learning_rate": 4.81775825832557e-06, "loss": 0.2936, "step": 11726 }, { "epoch": 1.6815314023515917, "grad_norm": 0.27038079500198364, "learning_rate": 4.816924572092068e-06, "loss": 0.29, "step": 11727 }, { "epoch": 1.6816747920848867, "grad_norm": 0.2571374177932739, "learning_rate": 4.816090890955101e-06, "loss": 0.2869, "step": 11728 }, { "epoch": 1.6818181818181817, "grad_norm": 0.27016782760620117, "learning_rate": 4.815257214937881e-06, "loss": 0.3149, "step": 11729 }, { "epoch": 1.681961571551477, "grad_norm": 0.2763213813304901, "learning_rate": 4.8144235440636154e-06, "loss": 0.3081, "step": 11730 }, { "epoch": 1.682104961284772, "grad_norm": 0.2584781348705292, "learning_rate": 4.813589878355513e-06, "loss": 0.2938, "step": 11731 }, { "epoch": 1.682248351018067, "grad_norm": 0.26912766695022583, "learning_rate": 4.812756217836784e-06, "loss": 0.3001, "step": 11732 }, { "epoch": 1.6823917407513622, "grad_norm": 0.29011642932891846, "learning_rate": 4.811922562530631e-06, "loss": 0.288, "step": 11733 }, { "epoch": 1.6825351304846574, "grad_norm": 0.28061527013778687, "learning_rate": 4.811088912460266e-06, "loss": 0.325, "step": 11734 }, { "epoch": 1.6826785202179524, "grad_norm": 0.2895067036151886, "learning_rate": 4.810255267648894e-06, "loss": 0.2895, "step": 11735 }, { "epoch": 1.6828219099512474, "grad_norm": 0.27620381116867065, "learning_rate": 4.809421628119724e-06, "loss": 0.2935, "step": 11736 }, { "epoch": 1.6829652996845426, "grad_norm": 0.2724960446357727, "learning_rate": 4.808587993895963e-06, "loss": 0.3139, "step": 11737 }, { "epoch": 1.6831086894178378, "grad_norm": 0.2519948184490204, "learning_rate": 4.807754365000819e-06, "loss": 0.299, "step": 11738 }, { "epoch": 1.6832520791511327, "grad_norm": 0.27550897002220154, "learning_rate": 4.806920741457497e-06, "loss": 0.3072, "step": 11739 }, { "epoch": 1.683395468884428, "grad_norm": 0.2846537232398987, "learning_rate": 4.806087123289203e-06, "loss": 0.2964, "step": 11740 }, { "epoch": 1.6835388586177231, "grad_norm": 0.2722121775150299, "learning_rate": 4.805253510519145e-06, "loss": 0.2971, "step": 11741 }, { "epoch": 1.683682248351018, "grad_norm": 0.2605588436126709, "learning_rate": 4.8044199031705314e-06, "loss": 0.3081, "step": 11742 }, { "epoch": 1.683825638084313, "grad_norm": 0.2656603753566742, "learning_rate": 4.803586301266568e-06, "loss": 0.318, "step": 11743 }, { "epoch": 1.6839690278176083, "grad_norm": 0.2709615230560303, "learning_rate": 4.802752704830458e-06, "loss": 0.3037, "step": 11744 }, { "epoch": 1.6841124175509035, "grad_norm": 0.31446772813796997, "learning_rate": 4.801919113885409e-06, "loss": 0.3098, "step": 11745 }, { "epoch": 1.6842558072841984, "grad_norm": 0.2870716452598572, "learning_rate": 4.801085528454628e-06, "loss": 0.309, "step": 11746 }, { "epoch": 1.6843991970174934, "grad_norm": 0.28375208377838135, "learning_rate": 4.800251948561319e-06, "loss": 0.2818, "step": 11747 }, { "epoch": 1.6845425867507886, "grad_norm": 0.2853306829929352, "learning_rate": 4.79941837422869e-06, "loss": 0.2847, "step": 11748 }, { "epoch": 1.6846859764840838, "grad_norm": 0.2880123555660248, "learning_rate": 4.798584805479943e-06, "loss": 0.3159, "step": 11749 }, { "epoch": 1.6848293662173788, "grad_norm": 0.27943938970565796, "learning_rate": 4.797751242338286e-06, "loss": 0.3111, "step": 11750 }, { "epoch": 1.684972755950674, "grad_norm": 0.27324846386909485, "learning_rate": 4.7969176848269235e-06, "loss": 0.2949, "step": 11751 }, { "epoch": 1.6851161456839692, "grad_norm": 0.25658461451530457, "learning_rate": 4.79608413296906e-06, "loss": 0.2795, "step": 11752 }, { "epoch": 1.6852595354172641, "grad_norm": 0.26647236943244934, "learning_rate": 4.7952505867879005e-06, "loss": 0.3035, "step": 11753 }, { "epoch": 1.6854029251505591, "grad_norm": 0.28059133887290955, "learning_rate": 4.794417046306652e-06, "loss": 0.307, "step": 11754 }, { "epoch": 1.6855463148838543, "grad_norm": 0.27079832553863525, "learning_rate": 4.793583511548514e-06, "loss": 0.3025, "step": 11755 }, { "epoch": 1.6856897046171495, "grad_norm": 0.26876571774482727, "learning_rate": 4.792749982536694e-06, "loss": 0.2819, "step": 11756 }, { "epoch": 1.6858330943504445, "grad_norm": 0.2792525887489319, "learning_rate": 4.791916459294395e-06, "loss": 0.301, "step": 11757 }, { "epoch": 1.6859764840837395, "grad_norm": 0.2881861627101898, "learning_rate": 4.791082941844823e-06, "loss": 0.2942, "step": 11758 }, { "epoch": 1.6861198738170347, "grad_norm": 0.29268455505371094, "learning_rate": 4.79024943021118e-06, "loss": 0.3003, "step": 11759 }, { "epoch": 1.6862632635503298, "grad_norm": 0.2926754951477051, "learning_rate": 4.789415924416671e-06, "loss": 0.3041, "step": 11760 }, { "epoch": 1.6864066532836248, "grad_norm": 0.26380136609077454, "learning_rate": 4.788582424484497e-06, "loss": 0.3039, "step": 11761 }, { "epoch": 1.68655004301692, "grad_norm": 0.26478642225265503, "learning_rate": 4.787748930437864e-06, "loss": 0.2993, "step": 11762 }, { "epoch": 1.6866934327502152, "grad_norm": 0.29504790902137756, "learning_rate": 4.786915442299974e-06, "loss": 0.3124, "step": 11763 }, { "epoch": 1.6868368224835102, "grad_norm": 0.2818008363246918, "learning_rate": 4.78608196009403e-06, "loss": 0.3047, "step": 11764 }, { "epoch": 1.6869802122168052, "grad_norm": 0.27167654037475586, "learning_rate": 4.7852484838432376e-06, "loss": 0.2999, "step": 11765 }, { "epoch": 1.6871236019501004, "grad_norm": 0.2694788873195648, "learning_rate": 4.784415013570795e-06, "loss": 0.281, "step": 11766 }, { "epoch": 1.6872669916833956, "grad_norm": 0.290202796459198, "learning_rate": 4.7835815492999065e-06, "loss": 0.3245, "step": 11767 }, { "epoch": 1.6874103814166905, "grad_norm": 0.28531700372695923, "learning_rate": 4.782748091053774e-06, "loss": 0.2962, "step": 11768 }, { "epoch": 1.6875537711499855, "grad_norm": 0.28542372584342957, "learning_rate": 4.781914638855601e-06, "loss": 0.3009, "step": 11769 }, { "epoch": 1.687697160883281, "grad_norm": 0.2661232352256775, "learning_rate": 4.7810811927285926e-06, "loss": 0.2839, "step": 11770 }, { "epoch": 1.687840550616576, "grad_norm": 0.2890852689743042, "learning_rate": 4.7802477526959434e-06, "loss": 0.3095, "step": 11771 }, { "epoch": 1.6879839403498709, "grad_norm": 0.2579394578933716, "learning_rate": 4.779414318780859e-06, "loss": 0.3005, "step": 11772 }, { "epoch": 1.688127330083166, "grad_norm": 0.29870522022247314, "learning_rate": 4.7785808910065405e-06, "loss": 0.3002, "step": 11773 }, { "epoch": 1.6882707198164613, "grad_norm": 0.2763112783432007, "learning_rate": 4.77774746939619e-06, "loss": 0.2993, "step": 11774 }, { "epoch": 1.6884141095497562, "grad_norm": 0.28768154978752136, "learning_rate": 4.776914053973009e-06, "loss": 0.2906, "step": 11775 }, { "epoch": 1.6885574992830512, "grad_norm": 0.28458696603775024, "learning_rate": 4.7760806447601955e-06, "loss": 0.2846, "step": 11776 }, { "epoch": 1.6887008890163464, "grad_norm": 0.2850988805294037, "learning_rate": 4.7752472417809536e-06, "loss": 0.3263, "step": 11777 }, { "epoch": 1.6888442787496416, "grad_norm": 0.2881912291049957, "learning_rate": 4.774413845058482e-06, "loss": 0.3018, "step": 11778 }, { "epoch": 1.6889876684829366, "grad_norm": 0.25072774291038513, "learning_rate": 4.7735804546159835e-06, "loss": 0.2862, "step": 11779 }, { "epoch": 1.6891310582162318, "grad_norm": 0.27067312598228455, "learning_rate": 4.772747070476655e-06, "loss": 0.3111, "step": 11780 }, { "epoch": 1.689274447949527, "grad_norm": 0.27549296617507935, "learning_rate": 4.771913692663701e-06, "loss": 0.3105, "step": 11781 }, { "epoch": 1.689417837682822, "grad_norm": 0.2825244069099426, "learning_rate": 4.771080321200317e-06, "loss": 0.2989, "step": 11782 }, { "epoch": 1.689561227416117, "grad_norm": 0.25431951880455017, "learning_rate": 4.770246956109705e-06, "loss": 0.2969, "step": 11783 }, { "epoch": 1.6897046171494121, "grad_norm": 0.2758287489414215, "learning_rate": 4.769413597415064e-06, "loss": 0.299, "step": 11784 }, { "epoch": 1.6898480068827073, "grad_norm": 0.2658444344997406, "learning_rate": 4.768580245139594e-06, "loss": 0.3174, "step": 11785 }, { "epoch": 1.6899913966160023, "grad_norm": 0.27291879057884216, "learning_rate": 4.767746899306495e-06, "loss": 0.3154, "step": 11786 }, { "epoch": 1.6901347863492973, "grad_norm": 0.27624791860580444, "learning_rate": 4.766913559938964e-06, "loss": 0.298, "step": 11787 }, { "epoch": 1.6902781760825925, "grad_norm": 0.2771160900592804, "learning_rate": 4.766080227060201e-06, "loss": 0.3027, "step": 11788 }, { "epoch": 1.6904215658158877, "grad_norm": 0.26677918434143066, "learning_rate": 4.765246900693404e-06, "loss": 0.3068, "step": 11789 }, { "epoch": 1.6905649555491826, "grad_norm": 0.2985932528972626, "learning_rate": 4.764413580861772e-06, "loss": 0.3089, "step": 11790 }, { "epoch": 1.6907083452824778, "grad_norm": 0.2648945450782776, "learning_rate": 4.763580267588505e-06, "loss": 0.282, "step": 11791 }, { "epoch": 1.690851735015773, "grad_norm": 0.28723639249801636, "learning_rate": 4.762746960896801e-06, "loss": 0.3163, "step": 11792 }, { "epoch": 1.690995124749068, "grad_norm": 0.2714298665523529, "learning_rate": 4.761913660809854e-06, "loss": 0.3266, "step": 11793 }, { "epoch": 1.691138514482363, "grad_norm": 0.27222371101379395, "learning_rate": 4.761080367350865e-06, "loss": 0.2871, "step": 11794 }, { "epoch": 1.6912819042156582, "grad_norm": 0.2839101254940033, "learning_rate": 4.760247080543031e-06, "loss": 0.3021, "step": 11795 }, { "epoch": 1.6914252939489534, "grad_norm": 0.2556997835636139, "learning_rate": 4.759413800409549e-06, "loss": 0.31, "step": 11796 }, { "epoch": 1.6915686836822483, "grad_norm": 0.27596068382263184, "learning_rate": 4.758580526973618e-06, "loss": 0.3141, "step": 11797 }, { "epoch": 1.6917120734155433, "grad_norm": 0.2940691411495209, "learning_rate": 4.757747260258433e-06, "loss": 0.3178, "step": 11798 }, { "epoch": 1.6918554631488385, "grad_norm": 0.263416051864624, "learning_rate": 4.7569140002871926e-06, "loss": 0.2929, "step": 11799 }, { "epoch": 1.6919988528821337, "grad_norm": 0.28960275650024414, "learning_rate": 4.75608074708309e-06, "loss": 0.3019, "step": 11800 }, { "epoch": 1.6921422426154287, "grad_norm": 0.2714770436286926, "learning_rate": 4.755247500669326e-06, "loss": 0.3165, "step": 11801 }, { "epoch": 1.6922856323487239, "grad_norm": 0.2674355208873749, "learning_rate": 4.7544142610690956e-06, "loss": 0.2948, "step": 11802 }, { "epoch": 1.692429022082019, "grad_norm": 0.26419511437416077, "learning_rate": 4.753581028305594e-06, "loss": 0.2871, "step": 11803 }, { "epoch": 1.692572411815314, "grad_norm": 0.2954922020435333, "learning_rate": 4.752747802402017e-06, "loss": 0.3192, "step": 11804 }, { "epoch": 1.692715801548609, "grad_norm": 0.27305373549461365, "learning_rate": 4.751914583381561e-06, "loss": 0.3112, "step": 11805 }, { "epoch": 1.6928591912819042, "grad_norm": 0.27317023277282715, "learning_rate": 4.751081371267421e-06, "loss": 0.2911, "step": 11806 }, { "epoch": 1.6930025810151994, "grad_norm": 0.26745009422302246, "learning_rate": 4.750248166082793e-06, "loss": 0.2971, "step": 11807 }, { "epoch": 1.6931459707484944, "grad_norm": 0.28355926275253296, "learning_rate": 4.749414967850873e-06, "loss": 0.3188, "step": 11808 }, { "epoch": 1.6932893604817894, "grad_norm": 0.2844724655151367, "learning_rate": 4.748581776594854e-06, "loss": 0.3041, "step": 11809 }, { "epoch": 1.6934327502150845, "grad_norm": 0.2851604223251343, "learning_rate": 4.747748592337931e-06, "loss": 0.3188, "step": 11810 }, { "epoch": 1.6935761399483797, "grad_norm": 0.28933513164520264, "learning_rate": 4.746915415103299e-06, "loss": 0.311, "step": 11811 }, { "epoch": 1.6937195296816747, "grad_norm": 0.2957046627998352, "learning_rate": 4.746082244914153e-06, "loss": 0.2903, "step": 11812 }, { "epoch": 1.69386291941497, "grad_norm": 0.27362188696861267, "learning_rate": 4.745249081793688e-06, "loss": 0.2931, "step": 11813 }, { "epoch": 1.694006309148265, "grad_norm": 0.28772634267807007, "learning_rate": 4.744415925765095e-06, "loss": 0.2955, "step": 11814 }, { "epoch": 1.69414969888156, "grad_norm": 0.27470335364341736, "learning_rate": 4.74358277685157e-06, "loss": 0.3425, "step": 11815 }, { "epoch": 1.694293088614855, "grad_norm": 0.28112906217575073, "learning_rate": 4.742749635076305e-06, "loss": 0.2808, "step": 11816 }, { "epoch": 1.6944364783481503, "grad_norm": 0.2634749710559845, "learning_rate": 4.741916500462496e-06, "loss": 0.3009, "step": 11817 }, { "epoch": 1.6945798680814455, "grad_norm": 0.2778724730014801, "learning_rate": 4.741083373033333e-06, "loss": 0.2851, "step": 11818 }, { "epoch": 1.6947232578147404, "grad_norm": 0.2939727306365967, "learning_rate": 4.740250252812014e-06, "loss": 0.2949, "step": 11819 }, { "epoch": 1.6948666475480354, "grad_norm": 0.26385682821273804, "learning_rate": 4.739417139821726e-06, "loss": 0.2767, "step": 11820 }, { "epoch": 1.6950100372813308, "grad_norm": 0.27652984857559204, "learning_rate": 4.738584034085663e-06, "loss": 0.3003, "step": 11821 }, { "epoch": 1.6951534270146258, "grad_norm": 0.29331639409065247, "learning_rate": 4.73775093562702e-06, "loss": 0.3043, "step": 11822 }, { "epoch": 1.6952968167479208, "grad_norm": 0.2873956263065338, "learning_rate": 4.736917844468986e-06, "loss": 0.2995, "step": 11823 }, { "epoch": 1.695440206481216, "grad_norm": 0.2726956903934479, "learning_rate": 4.736084760634756e-06, "loss": 0.3096, "step": 11824 }, { "epoch": 1.6955835962145112, "grad_norm": 0.2808007001876831, "learning_rate": 4.73525168414752e-06, "loss": 0.3049, "step": 11825 }, { "epoch": 1.6957269859478061, "grad_norm": 0.2687961757183075, "learning_rate": 4.734418615030469e-06, "loss": 0.2965, "step": 11826 }, { "epoch": 1.695870375681101, "grad_norm": 0.27769163250923157, "learning_rate": 4.733585553306796e-06, "loss": 0.302, "step": 11827 }, { "epoch": 1.6960137654143963, "grad_norm": 0.2921682894229889, "learning_rate": 4.73275249899969e-06, "loss": 0.3124, "step": 11828 }, { "epoch": 1.6961571551476915, "grad_norm": 0.28153350949287415, "learning_rate": 4.731919452132346e-06, "loss": 0.324, "step": 11829 }, { "epoch": 1.6963005448809865, "grad_norm": 0.26938068866729736, "learning_rate": 4.73108641272795e-06, "loss": 0.2974, "step": 11830 }, { "epoch": 1.6964439346142817, "grad_norm": 0.2738614082336426, "learning_rate": 4.730253380809694e-06, "loss": 0.2992, "step": 11831 }, { "epoch": 1.6965873243475769, "grad_norm": 0.2943468987941742, "learning_rate": 4.729420356400771e-06, "loss": 0.3095, "step": 11832 }, { "epoch": 1.6967307140808718, "grad_norm": 0.26700568199157715, "learning_rate": 4.7285873395243674e-06, "loss": 0.304, "step": 11833 }, { "epoch": 1.6968741038141668, "grad_norm": 0.2579081356525421, "learning_rate": 4.727754330203675e-06, "loss": 0.3039, "step": 11834 }, { "epoch": 1.697017493547462, "grad_norm": 0.27117645740509033, "learning_rate": 4.726921328461886e-06, "loss": 0.3103, "step": 11835 }, { "epoch": 1.6971608832807572, "grad_norm": 0.28866031765937805, "learning_rate": 4.726088334322184e-06, "loss": 0.297, "step": 11836 }, { "epoch": 1.6973042730140522, "grad_norm": 0.29306063055992126, "learning_rate": 4.725255347807762e-06, "loss": 0.3068, "step": 11837 }, { "epoch": 1.6974476627473472, "grad_norm": 0.2650917172431946, "learning_rate": 4.724422368941809e-06, "loss": 0.3173, "step": 11838 }, { "epoch": 1.6975910524806423, "grad_norm": 0.2945965826511383, "learning_rate": 4.723589397747513e-06, "loss": 0.2998, "step": 11839 }, { "epoch": 1.6977344422139375, "grad_norm": 0.2865038216114044, "learning_rate": 4.722756434248064e-06, "loss": 0.3216, "step": 11840 }, { "epoch": 1.6978778319472325, "grad_norm": 0.2955990135669708, "learning_rate": 4.721923478466648e-06, "loss": 0.2941, "step": 11841 }, { "epoch": 1.6980212216805277, "grad_norm": 0.2885490357875824, "learning_rate": 4.7210905304264546e-06, "loss": 0.2949, "step": 11842 }, { "epoch": 1.698164611413823, "grad_norm": 0.275172621011734, "learning_rate": 4.720257590150672e-06, "loss": 0.2957, "step": 11843 }, { "epoch": 1.6983080011471179, "grad_norm": 0.2775900065898895, "learning_rate": 4.719424657662487e-06, "loss": 0.2857, "step": 11844 }, { "epoch": 1.6984513908804129, "grad_norm": 0.27138015627861023, "learning_rate": 4.718591732985089e-06, "loss": 0.3064, "step": 11845 }, { "epoch": 1.698594780613708, "grad_norm": 0.2731626629829407, "learning_rate": 4.717758816141667e-06, "loss": 0.2941, "step": 11846 }, { "epoch": 1.6987381703470033, "grad_norm": 0.26939284801483154, "learning_rate": 4.7169259071554015e-06, "loss": 0.2747, "step": 11847 }, { "epoch": 1.6988815600802982, "grad_norm": 0.2807253301143646, "learning_rate": 4.716093006049484e-06, "loss": 0.3064, "step": 11848 }, { "epoch": 1.6990249498135932, "grad_norm": 0.29591283202171326, "learning_rate": 4.715260112847101e-06, "loss": 0.3116, "step": 11849 }, { "epoch": 1.6991683395468884, "grad_norm": 0.27862975001335144, "learning_rate": 4.714427227571438e-06, "loss": 0.2876, "step": 11850 }, { "epoch": 1.6993117292801836, "grad_norm": 0.3020261526107788, "learning_rate": 4.713594350245683e-06, "loss": 0.3113, "step": 11851 }, { "epoch": 1.6994551190134786, "grad_norm": 0.2792055904865265, "learning_rate": 4.712761480893019e-06, "loss": 0.2906, "step": 11852 }, { "epoch": 1.6995985087467738, "grad_norm": 0.2666606307029724, "learning_rate": 4.711928619536635e-06, "loss": 0.2896, "step": 11853 }, { "epoch": 1.699741898480069, "grad_norm": 0.28451451659202576, "learning_rate": 4.711095766199714e-06, "loss": 0.2963, "step": 11854 }, { "epoch": 1.699885288213364, "grad_norm": 0.31188464164733887, "learning_rate": 4.710262920905442e-06, "loss": 0.3067, "step": 11855 }, { "epoch": 1.700028677946659, "grad_norm": 0.2922573983669281, "learning_rate": 4.709430083677007e-06, "loss": 0.2986, "step": 11856 }, { "epoch": 1.700172067679954, "grad_norm": 0.28431370854377747, "learning_rate": 4.708597254537589e-06, "loss": 0.2973, "step": 11857 }, { "epoch": 1.7003154574132493, "grad_norm": 0.2927282750606537, "learning_rate": 4.7077644335103756e-06, "loss": 0.3068, "step": 11858 }, { "epoch": 1.7004588471465443, "grad_norm": 0.2751113772392273, "learning_rate": 4.706931620618551e-06, "loss": 0.3116, "step": 11859 }, { "epoch": 1.7006022368798392, "grad_norm": 0.30480605363845825, "learning_rate": 4.706098815885299e-06, "loss": 0.2959, "step": 11860 }, { "epoch": 1.7007456266131347, "grad_norm": 0.2797972559928894, "learning_rate": 4.705266019333804e-06, "loss": 0.3087, "step": 11861 }, { "epoch": 1.7008890163464296, "grad_norm": 0.28371748328208923, "learning_rate": 4.704433230987252e-06, "loss": 0.3008, "step": 11862 }, { "epoch": 1.7010324060797246, "grad_norm": 0.2941613793373108, "learning_rate": 4.703600450868822e-06, "loss": 0.2891, "step": 11863 }, { "epoch": 1.7011757958130198, "grad_norm": 0.28200653195381165, "learning_rate": 4.7027676790016994e-06, "loss": 0.3116, "step": 11864 }, { "epoch": 1.701319185546315, "grad_norm": 0.2657807767391205, "learning_rate": 4.701934915409067e-06, "loss": 0.3054, "step": 11865 }, { "epoch": 1.70146257527961, "grad_norm": 0.2945448160171509, "learning_rate": 4.701102160114109e-06, "loss": 0.3184, "step": 11866 }, { "epoch": 1.701605965012905, "grad_norm": 0.262630432844162, "learning_rate": 4.700269413140007e-06, "loss": 0.3007, "step": 11867 }, { "epoch": 1.7017493547462001, "grad_norm": 0.28045815229415894, "learning_rate": 4.699436674509944e-06, "loss": 0.3192, "step": 11868 }, { "epoch": 1.7018927444794953, "grad_norm": 0.2830614745616913, "learning_rate": 4.698603944247101e-06, "loss": 0.2989, "step": 11869 }, { "epoch": 1.7020361342127903, "grad_norm": 0.2643522322177887, "learning_rate": 4.697771222374659e-06, "loss": 0.2973, "step": 11870 }, { "epoch": 1.7021795239460855, "grad_norm": 0.25833743810653687, "learning_rate": 4.696938508915803e-06, "loss": 0.3054, "step": 11871 }, { "epoch": 1.7023229136793807, "grad_norm": 0.27677059173583984, "learning_rate": 4.696105803893712e-06, "loss": 0.3069, "step": 11872 }, { "epoch": 1.7024663034126757, "grad_norm": 0.2707580626010895, "learning_rate": 4.695273107331571e-06, "loss": 0.2917, "step": 11873 }, { "epoch": 1.7026096931459707, "grad_norm": 0.25657036900520325, "learning_rate": 4.694440419252554e-06, "loss": 0.3106, "step": 11874 }, { "epoch": 1.7027530828792659, "grad_norm": 0.2771605849266052, "learning_rate": 4.693607739679847e-06, "loss": 0.3078, "step": 11875 }, { "epoch": 1.702896472612561, "grad_norm": 0.2880862355232239, "learning_rate": 4.692775068636629e-06, "loss": 0.3027, "step": 11876 }, { "epoch": 1.703039862345856, "grad_norm": 0.28210726380348206, "learning_rate": 4.691942406146081e-06, "loss": 0.3091, "step": 11877 }, { "epoch": 1.703183252079151, "grad_norm": 0.2856706976890564, "learning_rate": 4.691109752231383e-06, "loss": 0.334, "step": 11878 }, { "epoch": 1.7033266418124462, "grad_norm": 0.2590310573577881, "learning_rate": 4.6902771069157125e-06, "loss": 0.3083, "step": 11879 }, { "epoch": 1.7034700315457414, "grad_norm": 0.27392271161079407, "learning_rate": 4.689444470222252e-06, "loss": 0.3041, "step": 11880 }, { "epoch": 1.7036134212790364, "grad_norm": 0.2920782268047333, "learning_rate": 4.6886118421741795e-06, "loss": 0.3074, "step": 11881 }, { "epoch": 1.7037568110123316, "grad_norm": 0.2743551433086395, "learning_rate": 4.687779222794674e-06, "loss": 0.2918, "step": 11882 }, { "epoch": 1.7039002007456268, "grad_norm": 0.2681410014629364, "learning_rate": 4.686946612106917e-06, "loss": 0.3118, "step": 11883 }, { "epoch": 1.7040435904789217, "grad_norm": 0.2616022229194641, "learning_rate": 4.6861140101340825e-06, "loss": 0.2982, "step": 11884 }, { "epoch": 1.7041869802122167, "grad_norm": 0.2860231399536133, "learning_rate": 4.68528141689935e-06, "loss": 0.3137, "step": 11885 }, { "epoch": 1.704330369945512, "grad_norm": 0.2900969684123993, "learning_rate": 4.6844488324259005e-06, "loss": 0.3029, "step": 11886 }, { "epoch": 1.704473759678807, "grad_norm": 0.2796248495578766, "learning_rate": 4.683616256736909e-06, "loss": 0.306, "step": 11887 }, { "epoch": 1.704617149412102, "grad_norm": 0.25681591033935547, "learning_rate": 4.682783689855555e-06, "loss": 0.3119, "step": 11888 }, { "epoch": 1.704760539145397, "grad_norm": 0.2690257132053375, "learning_rate": 4.681951131805017e-06, "loss": 0.3046, "step": 11889 }, { "epoch": 1.7049039288786922, "grad_norm": 0.26255786418914795, "learning_rate": 4.6811185826084685e-06, "loss": 0.3045, "step": 11890 }, { "epoch": 1.7050473186119874, "grad_norm": 0.28111302852630615, "learning_rate": 4.680286042289087e-06, "loss": 0.2883, "step": 11891 }, { "epoch": 1.7051907083452824, "grad_norm": 0.27532631158828735, "learning_rate": 4.679453510870051e-06, "loss": 0.3008, "step": 11892 }, { "epoch": 1.7053340980785776, "grad_norm": 0.2629443109035492, "learning_rate": 4.678620988374537e-06, "loss": 0.2999, "step": 11893 }, { "epoch": 1.7054774878118728, "grad_norm": 0.2826916575431824, "learning_rate": 4.677788474825721e-06, "loss": 0.3014, "step": 11894 }, { "epoch": 1.7056208775451678, "grad_norm": 0.2847304940223694, "learning_rate": 4.676955970246776e-06, "loss": 0.2909, "step": 11895 }, { "epoch": 1.7057642672784628, "grad_norm": 0.27280646562576294, "learning_rate": 4.67612347466088e-06, "loss": 0.2801, "step": 11896 }, { "epoch": 1.705907657011758, "grad_norm": 0.29174283146858215, "learning_rate": 4.675290988091209e-06, "loss": 0.3175, "step": 11897 }, { "epoch": 1.7060510467450531, "grad_norm": 0.2890873849391937, "learning_rate": 4.674458510560938e-06, "loss": 0.3233, "step": 11898 }, { "epoch": 1.7061944364783481, "grad_norm": 0.29470396041870117, "learning_rate": 4.67362604209324e-06, "loss": 0.3161, "step": 11899 }, { "epoch": 1.706337826211643, "grad_norm": 0.30403947830200195, "learning_rate": 4.6727935827112936e-06, "loss": 0.3074, "step": 11900 }, { "epoch": 1.7064812159449383, "grad_norm": 0.2804306745529175, "learning_rate": 4.671961132438269e-06, "loss": 0.3017, "step": 11901 }, { "epoch": 1.7066246056782335, "grad_norm": 0.28628045320510864, "learning_rate": 4.671128691297341e-06, "loss": 0.2972, "step": 11902 }, { "epoch": 1.7067679954115285, "grad_norm": 0.2600151300430298, "learning_rate": 4.6702962593116845e-06, "loss": 0.3092, "step": 11903 }, { "epoch": 1.7069113851448237, "grad_norm": 0.29635390639305115, "learning_rate": 4.669463836504473e-06, "loss": 0.3074, "step": 11904 }, { "epoch": 1.7070547748781189, "grad_norm": 0.29159700870513916, "learning_rate": 4.66863142289888e-06, "loss": 0.3183, "step": 11905 }, { "epoch": 1.7071981646114138, "grad_norm": 0.28228825330734253, "learning_rate": 4.6677990185180784e-06, "loss": 0.2977, "step": 11906 }, { "epoch": 1.7073415543447088, "grad_norm": 0.2828816771507263, "learning_rate": 4.666966623385241e-06, "loss": 0.3051, "step": 11907 }, { "epoch": 1.707484944078004, "grad_norm": 0.29540470242500305, "learning_rate": 4.6661342375235406e-06, "loss": 0.2962, "step": 11908 }, { "epoch": 1.7076283338112992, "grad_norm": 0.27740705013275146, "learning_rate": 4.66530186095615e-06, "loss": 0.2896, "step": 11909 }, { "epoch": 1.7077717235445942, "grad_norm": 0.2832775413990021, "learning_rate": 4.664469493706242e-06, "loss": 0.2934, "step": 11910 }, { "epoch": 1.7079151132778891, "grad_norm": 0.2786891460418701, "learning_rate": 4.663637135796985e-06, "loss": 0.3035, "step": 11911 }, { "epoch": 1.7080585030111846, "grad_norm": 0.2696746289730072, "learning_rate": 4.662804787251553e-06, "loss": 0.308, "step": 11912 }, { "epoch": 1.7082018927444795, "grad_norm": 0.2802963852882385, "learning_rate": 4.661972448093116e-06, "loss": 0.3037, "step": 11913 }, { "epoch": 1.7083452824777745, "grad_norm": 0.27601358294487, "learning_rate": 4.661140118344847e-06, "loss": 0.2929, "step": 11914 }, { "epoch": 1.7084886722110697, "grad_norm": 0.3006778955459595, "learning_rate": 4.660307798029915e-06, "loss": 0.3074, "step": 11915 }, { "epoch": 1.708632061944365, "grad_norm": 0.27888181805610657, "learning_rate": 4.659475487171495e-06, "loss": 0.286, "step": 11916 }, { "epoch": 1.7087754516776599, "grad_norm": 0.2667906880378723, "learning_rate": 4.658643185792751e-06, "loss": 0.2927, "step": 11917 }, { "epoch": 1.7089188414109548, "grad_norm": 0.28446164727211, "learning_rate": 4.657810893916856e-06, "loss": 0.293, "step": 11918 }, { "epoch": 1.70906223114425, "grad_norm": 0.2736666202545166, "learning_rate": 4.656978611566978e-06, "loss": 0.2927, "step": 11919 }, { "epoch": 1.7092056208775452, "grad_norm": 0.26664093136787415, "learning_rate": 4.656146338766289e-06, "loss": 0.3246, "step": 11920 }, { "epoch": 1.7093490106108402, "grad_norm": 0.276748389005661, "learning_rate": 4.655314075537957e-06, "loss": 0.3012, "step": 11921 }, { "epoch": 1.7094924003441354, "grad_norm": 0.26033806800842285, "learning_rate": 4.6544818219051505e-06, "loss": 0.3067, "step": 11922 }, { "epoch": 1.7096357900774306, "grad_norm": 0.2681826055049896, "learning_rate": 4.653649577891038e-06, "loss": 0.317, "step": 11923 }, { "epoch": 1.7097791798107256, "grad_norm": 0.2727421820163727, "learning_rate": 4.6528173435187895e-06, "loss": 0.3013, "step": 11924 }, { "epoch": 1.7099225695440206, "grad_norm": 0.2726120948791504, "learning_rate": 4.651985118811572e-06, "loss": 0.2982, "step": 11925 }, { "epoch": 1.7100659592773158, "grad_norm": 0.3190760016441345, "learning_rate": 4.651152903792552e-06, "loss": 0.2858, "step": 11926 }, { "epoch": 1.710209349010611, "grad_norm": 0.2798575162887573, "learning_rate": 4.650320698484902e-06, "loss": 0.321, "step": 11927 }, { "epoch": 1.710352738743906, "grad_norm": 0.28721028566360474, "learning_rate": 4.649488502911784e-06, "loss": 0.2883, "step": 11928 }, { "epoch": 1.710496128477201, "grad_norm": 0.28746098279953003, "learning_rate": 4.648656317096365e-06, "loss": 0.3223, "step": 11929 }, { "epoch": 1.710639518210496, "grad_norm": 0.3043254315853119, "learning_rate": 4.647824141061816e-06, "loss": 0.3176, "step": 11930 }, { "epoch": 1.7107829079437913, "grad_norm": 0.29284659028053284, "learning_rate": 4.6469919748313e-06, "loss": 0.316, "step": 11931 }, { "epoch": 1.7109262976770863, "grad_norm": 0.2925458252429962, "learning_rate": 4.646159818427986e-06, "loss": 0.3073, "step": 11932 }, { "epoch": 1.7110696874103815, "grad_norm": 0.2880323827266693, "learning_rate": 4.645327671875036e-06, "loss": 0.3035, "step": 11933 }, { "epoch": 1.7112130771436767, "grad_norm": 0.2857211232185364, "learning_rate": 4.644495535195619e-06, "loss": 0.3143, "step": 11934 }, { "epoch": 1.7113564668769716, "grad_norm": 0.28211739659309387, "learning_rate": 4.643663408412899e-06, "loss": 0.2921, "step": 11935 }, { "epoch": 1.7114998566102666, "grad_norm": 0.2834619879722595, "learning_rate": 4.642831291550042e-06, "loss": 0.3085, "step": 11936 }, { "epoch": 1.7116432463435618, "grad_norm": 0.29091089963912964, "learning_rate": 4.641999184630215e-06, "loss": 0.3303, "step": 11937 }, { "epoch": 1.711786636076857, "grad_norm": 0.31087779998779297, "learning_rate": 4.641167087676577e-06, "loss": 0.3162, "step": 11938 }, { "epoch": 1.711930025810152, "grad_norm": 0.29541921615600586, "learning_rate": 4.640335000712296e-06, "loss": 0.3112, "step": 11939 }, { "epoch": 1.712073415543447, "grad_norm": 0.27266764640808105, "learning_rate": 4.639502923760535e-06, "loss": 0.3007, "step": 11940 }, { "epoch": 1.7122168052767421, "grad_norm": 0.2767946422100067, "learning_rate": 4.638670856844456e-06, "loss": 0.3021, "step": 11941 }, { "epoch": 1.7123601950100373, "grad_norm": 0.28268691897392273, "learning_rate": 4.637838799987227e-06, "loss": 0.3153, "step": 11942 }, { "epoch": 1.7125035847433323, "grad_norm": 0.2749710977077484, "learning_rate": 4.63700675321201e-06, "loss": 0.2812, "step": 11943 }, { "epoch": 1.7126469744766275, "grad_norm": 0.27140235900878906, "learning_rate": 4.636174716541965e-06, "loss": 0.3089, "step": 11944 }, { "epoch": 1.7127903642099227, "grad_norm": 0.27637988328933716, "learning_rate": 4.635342690000256e-06, "loss": 0.3016, "step": 11945 }, { "epoch": 1.7129337539432177, "grad_norm": 0.2964145243167877, "learning_rate": 4.6345106736100445e-06, "loss": 0.2997, "step": 11946 }, { "epoch": 1.7130771436765126, "grad_norm": 0.260410338640213, "learning_rate": 4.633678667394495e-06, "loss": 0.3051, "step": 11947 }, { "epoch": 1.7132205334098078, "grad_norm": 0.27293476462364197, "learning_rate": 4.632846671376769e-06, "loss": 0.3173, "step": 11948 }, { "epoch": 1.713363923143103, "grad_norm": 0.30154964327812195, "learning_rate": 4.632014685580025e-06, "loss": 0.3209, "step": 11949 }, { "epoch": 1.713507312876398, "grad_norm": 0.27439385652542114, "learning_rate": 4.631182710027426e-06, "loss": 0.2937, "step": 11950 }, { "epoch": 1.713650702609693, "grad_norm": 0.2786180078983307, "learning_rate": 4.6303507447421345e-06, "loss": 0.3103, "step": 11951 }, { "epoch": 1.7137940923429884, "grad_norm": 0.2953184247016907, "learning_rate": 4.629518789747308e-06, "loss": 0.3094, "step": 11952 }, { "epoch": 1.7139374820762834, "grad_norm": 0.2810578942298889, "learning_rate": 4.628686845066109e-06, "loss": 0.3043, "step": 11953 }, { "epoch": 1.7140808718095784, "grad_norm": 0.27053284645080566, "learning_rate": 4.6278549107216995e-06, "loss": 0.3081, "step": 11954 }, { "epoch": 1.7142242615428736, "grad_norm": 0.29913949966430664, "learning_rate": 4.627022986737235e-06, "loss": 0.2949, "step": 11955 }, { "epoch": 1.7143676512761687, "grad_norm": 0.2779555916786194, "learning_rate": 4.626191073135876e-06, "loss": 0.2834, "step": 11956 }, { "epoch": 1.7145110410094637, "grad_norm": 0.28407034277915955, "learning_rate": 4.625359169940783e-06, "loss": 0.2911, "step": 11957 }, { "epoch": 1.7146544307427587, "grad_norm": 0.2724411189556122, "learning_rate": 4.624527277175114e-06, "loss": 0.2856, "step": 11958 }, { "epoch": 1.714797820476054, "grad_norm": 0.27777597308158875, "learning_rate": 4.623695394862029e-06, "loss": 0.2976, "step": 11959 }, { "epoch": 1.714941210209349, "grad_norm": 0.29181045293807983, "learning_rate": 4.622863523024685e-06, "loss": 0.3203, "step": 11960 }, { "epoch": 1.715084599942644, "grad_norm": 0.2793278694152832, "learning_rate": 4.62203166168624e-06, "loss": 0.2998, "step": 11961 }, { "epoch": 1.7152279896759393, "grad_norm": 0.27967745065689087, "learning_rate": 4.621199810869852e-06, "loss": 0.3125, "step": 11962 }, { "epoch": 1.7153713794092345, "grad_norm": 0.27727752923965454, "learning_rate": 4.620367970598679e-06, "loss": 0.302, "step": 11963 }, { "epoch": 1.7155147691425294, "grad_norm": 0.2758244276046753, "learning_rate": 4.619536140895879e-06, "loss": 0.3199, "step": 11964 }, { "epoch": 1.7156581588758244, "grad_norm": 0.27973803877830505, "learning_rate": 4.618704321784606e-06, "loss": 0.3144, "step": 11965 }, { "epoch": 1.7158015486091196, "grad_norm": 0.2594835162162781, "learning_rate": 4.617872513288018e-06, "loss": 0.2976, "step": 11966 }, { "epoch": 1.7159449383424148, "grad_norm": 0.27311283349990845, "learning_rate": 4.617040715429272e-06, "loss": 0.2875, "step": 11967 }, { "epoch": 1.7160883280757098, "grad_norm": 0.279116153717041, "learning_rate": 4.616208928231522e-06, "loss": 0.2976, "step": 11968 }, { "epoch": 1.7162317178090047, "grad_norm": 0.2984631657600403, "learning_rate": 4.615377151717926e-06, "loss": 0.3041, "step": 11969 }, { "epoch": 1.7163751075423, "grad_norm": 0.28470730781555176, "learning_rate": 4.61454538591164e-06, "loss": 0.3102, "step": 11970 }, { "epoch": 1.7165184972755951, "grad_norm": 0.2606881856918335, "learning_rate": 4.613713630835816e-06, "loss": 0.3201, "step": 11971 }, { "epoch": 1.71666188700889, "grad_norm": 0.2987067699432373, "learning_rate": 4.6128818865136095e-06, "loss": 0.2968, "step": 11972 }, { "epoch": 1.7168052767421853, "grad_norm": 0.26114505529403687, "learning_rate": 4.612050152968176e-06, "loss": 0.3085, "step": 11973 }, { "epoch": 1.7169486664754805, "grad_norm": 0.2681777775287628, "learning_rate": 4.611218430222671e-06, "loss": 0.2933, "step": 11974 }, { "epoch": 1.7170920562087755, "grad_norm": 0.30999451875686646, "learning_rate": 4.610386718300246e-06, "loss": 0.3145, "step": 11975 }, { "epoch": 1.7172354459420704, "grad_norm": 0.26552703976631165, "learning_rate": 4.609555017224055e-06, "loss": 0.3076, "step": 11976 }, { "epoch": 1.7173788356753656, "grad_norm": 0.2724406123161316, "learning_rate": 4.608723327017252e-06, "loss": 0.2927, "step": 11977 }, { "epoch": 1.7175222254086608, "grad_norm": 0.2792024612426758, "learning_rate": 4.607891647702989e-06, "loss": 0.2759, "step": 11978 }, { "epoch": 1.7176656151419558, "grad_norm": 0.2952873408794403, "learning_rate": 4.60705997930442e-06, "loss": 0.3164, "step": 11979 }, { "epoch": 1.7178090048752508, "grad_norm": 0.2747594118118286, "learning_rate": 4.606228321844697e-06, "loss": 0.304, "step": 11980 }, { "epoch": 1.717952394608546, "grad_norm": 0.26343968510627747, "learning_rate": 4.605396675346973e-06, "loss": 0.3015, "step": 11981 }, { "epoch": 1.7180957843418412, "grad_norm": 0.2732545733451843, "learning_rate": 4.604565039834398e-06, "loss": 0.2976, "step": 11982 }, { "epoch": 1.7182391740751362, "grad_norm": 0.2756422758102417, "learning_rate": 4.603733415330123e-06, "loss": 0.3128, "step": 11983 }, { "epoch": 1.7183825638084314, "grad_norm": 0.27998900413513184, "learning_rate": 4.6029018018572995e-06, "loss": 0.2896, "step": 11984 }, { "epoch": 1.7185259535417265, "grad_norm": 0.28702202439308167, "learning_rate": 4.60207019943908e-06, "loss": 0.3033, "step": 11985 }, { "epoch": 1.7186693432750215, "grad_norm": 0.2766452431678772, "learning_rate": 4.6012386080986145e-06, "loss": 0.2916, "step": 11986 }, { "epoch": 1.7188127330083165, "grad_norm": 0.2864442765712738, "learning_rate": 4.600407027859052e-06, "loss": 0.3108, "step": 11987 }, { "epoch": 1.7189561227416117, "grad_norm": 0.26014280319213867, "learning_rate": 4.599575458743543e-06, "loss": 0.3052, "step": 11988 }, { "epoch": 1.7190995124749069, "grad_norm": 0.2934301197528839, "learning_rate": 4.598743900775237e-06, "loss": 0.2875, "step": 11989 }, { "epoch": 1.7192429022082019, "grad_norm": 0.27565667033195496, "learning_rate": 4.597912353977284e-06, "loss": 0.3095, "step": 11990 }, { "epoch": 1.7193862919414968, "grad_norm": 0.28942427039146423, "learning_rate": 4.597080818372834e-06, "loss": 0.292, "step": 11991 }, { "epoch": 1.719529681674792, "grad_norm": 0.2872937321662903, "learning_rate": 4.5962492939850325e-06, "loss": 0.2919, "step": 11992 }, { "epoch": 1.7196730714080872, "grad_norm": 0.26097872853279114, "learning_rate": 4.59541778083703e-06, "loss": 0.3001, "step": 11993 }, { "epoch": 1.7198164611413822, "grad_norm": 0.2876463830471039, "learning_rate": 4.594586278951973e-06, "loss": 0.2812, "step": 11994 }, { "epoch": 1.7199598508746774, "grad_norm": 0.30090734362602234, "learning_rate": 4.593754788353012e-06, "loss": 0.3169, "step": 11995 }, { "epoch": 1.7201032406079726, "grad_norm": 0.2604043185710907, "learning_rate": 4.592923309063291e-06, "loss": 0.2951, "step": 11996 }, { "epoch": 1.7202466303412676, "grad_norm": 0.28083887696266174, "learning_rate": 4.592091841105961e-06, "loss": 0.3229, "step": 11997 }, { "epoch": 1.7203900200745625, "grad_norm": 0.26596206426620483, "learning_rate": 4.591260384504166e-06, "loss": 0.2991, "step": 11998 }, { "epoch": 1.7205334098078577, "grad_norm": 0.2848697006702423, "learning_rate": 4.590428939281052e-06, "loss": 0.2913, "step": 11999 }, { "epoch": 1.720676799541153, "grad_norm": 0.2845052182674408, "learning_rate": 4.589597505459767e-06, "loss": 0.291, "step": 12000 }, { "epoch": 1.720820189274448, "grad_norm": 0.28703147172927856, "learning_rate": 4.588766083063456e-06, "loss": 0.3032, "step": 12001 }, { "epoch": 1.7209635790077429, "grad_norm": 0.3075646758079529, "learning_rate": 4.587934672115266e-06, "loss": 0.2996, "step": 12002 }, { "epoch": 1.7211069687410383, "grad_norm": 0.27657589316368103, "learning_rate": 4.587103272638339e-06, "loss": 0.3082, "step": 12003 }, { "epoch": 1.7212503584743333, "grad_norm": 0.2787970304489136, "learning_rate": 4.5862718846558226e-06, "loss": 0.2927, "step": 12004 }, { "epoch": 1.7213937482076282, "grad_norm": 0.2634420692920685, "learning_rate": 4.5854405081908596e-06, "loss": 0.3055, "step": 12005 }, { "epoch": 1.7215371379409234, "grad_norm": 0.29117757081985474, "learning_rate": 4.584609143266596e-06, "loss": 0.2929, "step": 12006 }, { "epoch": 1.7216805276742186, "grad_norm": 0.27386900782585144, "learning_rate": 4.583777789906174e-06, "loss": 0.3022, "step": 12007 }, { "epoch": 1.7218239174075136, "grad_norm": 0.2847100794315338, "learning_rate": 4.582946448132741e-06, "loss": 0.2919, "step": 12008 }, { "epoch": 1.7219673071408086, "grad_norm": 0.2922246754169464, "learning_rate": 4.5821151179694345e-06, "loss": 0.2953, "step": 12009 }, { "epoch": 1.7221106968741038, "grad_norm": 0.2685607671737671, "learning_rate": 4.581283799439401e-06, "loss": 0.3111, "step": 12010 }, { "epoch": 1.722254086607399, "grad_norm": 0.2578880190849304, "learning_rate": 4.580452492565782e-06, "loss": 0.2778, "step": 12011 }, { "epoch": 1.722397476340694, "grad_norm": 0.2785336971282959, "learning_rate": 4.57962119737172e-06, "loss": 0.28, "step": 12012 }, { "epoch": 1.7225408660739892, "grad_norm": 0.280224472284317, "learning_rate": 4.578789913880359e-06, "loss": 0.3081, "step": 12013 }, { "epoch": 1.7226842558072843, "grad_norm": 0.28650328516960144, "learning_rate": 4.577958642114837e-06, "loss": 0.293, "step": 12014 }, { "epoch": 1.7228276455405793, "grad_norm": 0.27363458275794983, "learning_rate": 4.5771273820982975e-06, "loss": 0.2979, "step": 12015 }, { "epoch": 1.7229710352738743, "grad_norm": 0.2573731243610382, "learning_rate": 4.576296133853881e-06, "loss": 0.2968, "step": 12016 }, { "epoch": 1.7231144250071695, "grad_norm": 0.27970796823501587, "learning_rate": 4.575464897404729e-06, "loss": 0.3032, "step": 12017 }, { "epoch": 1.7232578147404647, "grad_norm": 0.28824859857559204, "learning_rate": 4.5746336727739835e-06, "loss": 0.3074, "step": 12018 }, { "epoch": 1.7234012044737597, "grad_norm": 0.2729308009147644, "learning_rate": 4.573802459984779e-06, "loss": 0.2868, "step": 12019 }, { "epoch": 1.7235445942070546, "grad_norm": 0.26932257413864136, "learning_rate": 4.572971259060259e-06, "loss": 0.3116, "step": 12020 }, { "epoch": 1.7236879839403498, "grad_norm": 0.279153972864151, "learning_rate": 4.572140070023562e-06, "loss": 0.2767, "step": 12021 }, { "epoch": 1.723831373673645, "grad_norm": 0.277279794216156, "learning_rate": 4.571308892897828e-06, "loss": 0.3368, "step": 12022 }, { "epoch": 1.72397476340694, "grad_norm": 0.27041172981262207, "learning_rate": 4.570477727706194e-06, "loss": 0.3092, "step": 12023 }, { "epoch": 1.7241181531402352, "grad_norm": 0.2783508896827698, "learning_rate": 4.569646574471801e-06, "loss": 0.2922, "step": 12024 }, { "epoch": 1.7242615428735304, "grad_norm": 0.27674928307533264, "learning_rate": 4.568815433217784e-06, "loss": 0.3041, "step": 12025 }, { "epoch": 1.7244049326068254, "grad_norm": 0.28187599778175354, "learning_rate": 4.5679843039672825e-06, "loss": 0.2874, "step": 12026 }, { "epoch": 1.7245483223401203, "grad_norm": 0.25450599193573, "learning_rate": 4.567153186743433e-06, "loss": 0.305, "step": 12027 }, { "epoch": 1.7246917120734155, "grad_norm": 0.26317864656448364, "learning_rate": 4.566322081569373e-06, "loss": 0.2947, "step": 12028 }, { "epoch": 1.7248351018067107, "grad_norm": 0.2662152945995331, "learning_rate": 4.565490988468241e-06, "loss": 0.3089, "step": 12029 }, { "epoch": 1.7249784915400057, "grad_norm": 0.2842191159725189, "learning_rate": 4.5646599074631695e-06, "loss": 0.3045, "step": 12030 }, { "epoch": 1.7251218812733007, "grad_norm": 0.28033336997032166, "learning_rate": 4.563828838577295e-06, "loss": 0.2939, "step": 12031 }, { "epoch": 1.7252652710065959, "grad_norm": 0.2835569679737091, "learning_rate": 4.562997781833757e-06, "loss": 0.3278, "step": 12032 }, { "epoch": 1.725408660739891, "grad_norm": 0.27717962861061096, "learning_rate": 4.562166737255688e-06, "loss": 0.3182, "step": 12033 }, { "epoch": 1.725552050473186, "grad_norm": 0.279394268989563, "learning_rate": 4.561335704866223e-06, "loss": 0.2868, "step": 12034 }, { "epoch": 1.7256954402064812, "grad_norm": 0.273445725440979, "learning_rate": 4.560504684688499e-06, "loss": 0.3021, "step": 12035 }, { "epoch": 1.7258388299397764, "grad_norm": 0.26287901401519775, "learning_rate": 4.559673676745648e-06, "loss": 0.2887, "step": 12036 }, { "epoch": 1.7259822196730714, "grad_norm": 0.27518510818481445, "learning_rate": 4.558842681060803e-06, "loss": 0.3036, "step": 12037 }, { "epoch": 1.7261256094063664, "grad_norm": 0.2862704396247864, "learning_rate": 4.558011697657099e-06, "loss": 0.301, "step": 12038 }, { "epoch": 1.7262689991396616, "grad_norm": 0.290486603975296, "learning_rate": 4.55718072655767e-06, "loss": 0.3239, "step": 12039 }, { "epoch": 1.7264123888729568, "grad_norm": 0.28893205523490906, "learning_rate": 4.55634976778565e-06, "loss": 0.3091, "step": 12040 }, { "epoch": 1.7265557786062518, "grad_norm": 0.2710559666156769, "learning_rate": 4.5555188213641685e-06, "loss": 0.3209, "step": 12041 }, { "epoch": 1.7266991683395467, "grad_norm": 0.2767998278141022, "learning_rate": 4.554687887316359e-06, "loss": 0.2938, "step": 12042 }, { "epoch": 1.7268425580728421, "grad_norm": 0.27060946822166443, "learning_rate": 4.553856965665354e-06, "loss": 0.2876, "step": 12043 }, { "epoch": 1.7269859478061371, "grad_norm": 0.2747114896774292, "learning_rate": 4.553026056434285e-06, "loss": 0.2845, "step": 12044 }, { "epoch": 1.727129337539432, "grad_norm": 0.274357408285141, "learning_rate": 4.552195159646285e-06, "loss": 0.3042, "step": 12045 }, { "epoch": 1.7272727272727273, "grad_norm": 0.28239375352859497, "learning_rate": 4.55136427532448e-06, "loss": 0.3182, "step": 12046 }, { "epoch": 1.7274161170060225, "grad_norm": 0.251068651676178, "learning_rate": 4.550533403492004e-06, "loss": 0.3057, "step": 12047 }, { "epoch": 1.7275595067393175, "grad_norm": 0.28453126549720764, "learning_rate": 4.549702544171987e-06, "loss": 0.2752, "step": 12048 }, { "epoch": 1.7277028964726124, "grad_norm": 0.2724812626838684, "learning_rate": 4.548871697387558e-06, "loss": 0.2836, "step": 12049 }, { "epoch": 1.7278462862059076, "grad_norm": 0.26612141728401184, "learning_rate": 4.548040863161848e-06, "loss": 0.3214, "step": 12050 }, { "epoch": 1.7279896759392028, "grad_norm": 0.28746917843818665, "learning_rate": 4.547210041517985e-06, "loss": 0.3056, "step": 12051 }, { "epoch": 1.7281330656724978, "grad_norm": 0.27876654267311096, "learning_rate": 4.546379232479097e-06, "loss": 0.2957, "step": 12052 }, { "epoch": 1.728276455405793, "grad_norm": 0.28936097025871277, "learning_rate": 4.545548436068314e-06, "loss": 0.3027, "step": 12053 }, { "epoch": 1.7284198451390882, "grad_norm": 0.2811194360256195, "learning_rate": 4.544717652308763e-06, "loss": 0.2938, "step": 12054 }, { "epoch": 1.7285632348723832, "grad_norm": 0.2768821716308594, "learning_rate": 4.543886881223572e-06, "loss": 0.3316, "step": 12055 }, { "epoch": 1.7287066246056781, "grad_norm": 0.26677802205085754, "learning_rate": 4.543056122835871e-06, "loss": 0.3004, "step": 12056 }, { "epoch": 1.7288500143389733, "grad_norm": 0.2812485992908478, "learning_rate": 4.542225377168783e-06, "loss": 0.3065, "step": 12057 }, { "epoch": 1.7289934040722685, "grad_norm": 0.27740374207496643, "learning_rate": 4.541394644245435e-06, "loss": 0.2985, "step": 12058 }, { "epoch": 1.7291367938055635, "grad_norm": 0.271603524684906, "learning_rate": 4.540563924088954e-06, "loss": 0.284, "step": 12059 }, { "epoch": 1.7292801835388585, "grad_norm": 0.25970277190208435, "learning_rate": 4.539733216722468e-06, "loss": 0.2785, "step": 12060 }, { "epoch": 1.7294235732721537, "grad_norm": 0.30944445729255676, "learning_rate": 4.538902522169102e-06, "loss": 0.2987, "step": 12061 }, { "epoch": 1.7295669630054489, "grad_norm": 0.29291918873786926, "learning_rate": 4.538071840451982e-06, "loss": 0.3198, "step": 12062 }, { "epoch": 1.7297103527387439, "grad_norm": 0.2768805921077728, "learning_rate": 4.537241171594229e-06, "loss": 0.3106, "step": 12063 }, { "epoch": 1.729853742472039, "grad_norm": 0.29384297132492065, "learning_rate": 4.53641051561897e-06, "loss": 0.3151, "step": 12064 }, { "epoch": 1.7299971322053342, "grad_norm": 0.3065792918205261, "learning_rate": 4.535579872549329e-06, "loss": 0.2918, "step": 12065 }, { "epoch": 1.7301405219386292, "grad_norm": 0.28701093792915344, "learning_rate": 4.53474924240843e-06, "loss": 0.2997, "step": 12066 }, { "epoch": 1.7302839116719242, "grad_norm": 0.2770175039768219, "learning_rate": 4.533918625219398e-06, "loss": 0.3026, "step": 12067 }, { "epoch": 1.7304273014052194, "grad_norm": 0.2786700427532196, "learning_rate": 4.533088021005353e-06, "loss": 0.3063, "step": 12068 }, { "epoch": 1.7305706911385146, "grad_norm": 0.2596506178379059, "learning_rate": 4.532257429789419e-06, "loss": 0.2943, "step": 12069 }, { "epoch": 1.7307140808718096, "grad_norm": 0.2805655300617218, "learning_rate": 4.531426851594719e-06, "loss": 0.311, "step": 12070 }, { "epoch": 1.7308574706051045, "grad_norm": 0.31363558769226074, "learning_rate": 4.530596286444374e-06, "loss": 0.3053, "step": 12071 }, { "epoch": 1.7310008603383997, "grad_norm": 0.281955361366272, "learning_rate": 4.529765734361509e-06, "loss": 0.2813, "step": 12072 }, { "epoch": 1.731144250071695, "grad_norm": 0.2875111699104309, "learning_rate": 4.528935195369241e-06, "loss": 0.3079, "step": 12073 }, { "epoch": 1.73128763980499, "grad_norm": 0.30723246932029724, "learning_rate": 4.528104669490692e-06, "loss": 0.3134, "step": 12074 }, { "epoch": 1.731431029538285, "grad_norm": 0.2786428928375244, "learning_rate": 4.527274156748983e-06, "loss": 0.3214, "step": 12075 }, { "epoch": 1.7315744192715803, "grad_norm": 0.2912532687187195, "learning_rate": 4.526443657167235e-06, "loss": 0.3087, "step": 12076 }, { "epoch": 1.7317178090048753, "grad_norm": 0.25369492173194885, "learning_rate": 4.525613170768566e-06, "loss": 0.3022, "step": 12077 }, { "epoch": 1.7318611987381702, "grad_norm": 0.2888185381889343, "learning_rate": 4.524782697576098e-06, "loss": 0.2956, "step": 12078 }, { "epoch": 1.7320045884714654, "grad_norm": 0.28108909726142883, "learning_rate": 4.523952237612947e-06, "loss": 0.3155, "step": 12079 }, { "epoch": 1.7321479782047606, "grad_norm": 0.2902373969554901, "learning_rate": 4.5231217909022345e-06, "loss": 0.3186, "step": 12080 }, { "epoch": 1.7322913679380556, "grad_norm": 0.28649574518203735, "learning_rate": 4.5222913574670774e-06, "loss": 0.3123, "step": 12081 }, { "epoch": 1.7324347576713506, "grad_norm": 0.282387912273407, "learning_rate": 4.521460937330594e-06, "loss": 0.3016, "step": 12082 }, { "epoch": 1.7325781474046458, "grad_norm": 0.29566115140914917, "learning_rate": 4.520630530515904e-06, "loss": 0.3091, "step": 12083 }, { "epoch": 1.732721537137941, "grad_norm": 0.31223925948143005, "learning_rate": 4.519800137046121e-06, "loss": 0.3096, "step": 12084 }, { "epoch": 1.732864926871236, "grad_norm": 0.26545068621635437, "learning_rate": 4.518969756944363e-06, "loss": 0.2969, "step": 12085 }, { "epoch": 1.7330083166045311, "grad_norm": 0.3141895532608032, "learning_rate": 4.518139390233747e-06, "loss": 0.3038, "step": 12086 }, { "epoch": 1.7331517063378263, "grad_norm": 0.2998794615268707, "learning_rate": 4.517309036937389e-06, "loss": 0.2951, "step": 12087 }, { "epoch": 1.7332950960711213, "grad_norm": 0.3041084110736847, "learning_rate": 4.516478697078405e-06, "loss": 0.3121, "step": 12088 }, { "epoch": 1.7334384858044163, "grad_norm": 0.27284491062164307, "learning_rate": 4.515648370679912e-06, "loss": 0.3215, "step": 12089 }, { "epoch": 1.7335818755377115, "grad_norm": 0.27707621455192566, "learning_rate": 4.514818057765022e-06, "loss": 0.2962, "step": 12090 }, { "epoch": 1.7337252652710067, "grad_norm": 0.28462573885917664, "learning_rate": 4.51398775835685e-06, "loss": 0.3004, "step": 12091 }, { "epoch": 1.7338686550043017, "grad_norm": 0.31109556555747986, "learning_rate": 4.513157472478512e-06, "loss": 0.2725, "step": 12092 }, { "epoch": 1.7340120447375966, "grad_norm": 0.2731233239173889, "learning_rate": 4.512327200153122e-06, "loss": 0.3002, "step": 12093 }, { "epoch": 1.734155434470892, "grad_norm": 0.2892613112926483, "learning_rate": 4.511496941403792e-06, "loss": 0.2928, "step": 12094 }, { "epoch": 1.734298824204187, "grad_norm": 0.26466673612594604, "learning_rate": 4.510666696253635e-06, "loss": 0.2869, "step": 12095 }, { "epoch": 1.734442213937482, "grad_norm": 0.2699374854564667, "learning_rate": 4.509836464725765e-06, "loss": 0.3021, "step": 12096 }, { "epoch": 1.7345856036707772, "grad_norm": 0.29567015171051025, "learning_rate": 4.509006246843293e-06, "loss": 0.3001, "step": 12097 }, { "epoch": 1.7347289934040724, "grad_norm": 0.28738927841186523, "learning_rate": 4.508176042629332e-06, "loss": 0.3079, "step": 12098 }, { "epoch": 1.7348723831373674, "grad_norm": 0.27068811655044556, "learning_rate": 4.507345852106995e-06, "loss": 0.2947, "step": 12099 }, { "epoch": 1.7350157728706623, "grad_norm": 0.275424987077713, "learning_rate": 4.506515675299391e-06, "loss": 0.2994, "step": 12100 }, { "epoch": 1.7351591626039575, "grad_norm": 0.28220099210739136, "learning_rate": 4.505685512229631e-06, "loss": 0.2987, "step": 12101 }, { "epoch": 1.7353025523372527, "grad_norm": 0.2949932813644409, "learning_rate": 4.504855362920826e-06, "loss": 0.3079, "step": 12102 }, { "epoch": 1.7354459420705477, "grad_norm": 0.29557088017463684, "learning_rate": 4.504025227396086e-06, "loss": 0.3235, "step": 12103 }, { "epoch": 1.735589331803843, "grad_norm": 0.2774346172809601, "learning_rate": 4.50319510567852e-06, "loss": 0.3155, "step": 12104 }, { "epoch": 1.735732721537138, "grad_norm": 0.2859446108341217, "learning_rate": 4.502364997791239e-06, "loss": 0.3087, "step": 12105 }, { "epoch": 1.735876111270433, "grad_norm": 0.2600291073322296, "learning_rate": 4.501534903757351e-06, "loss": 0.3114, "step": 12106 }, { "epoch": 1.736019501003728, "grad_norm": 0.30547404289245605, "learning_rate": 4.500704823599964e-06, "loss": 0.2876, "step": 12107 }, { "epoch": 1.7361628907370232, "grad_norm": 0.2878568172454834, "learning_rate": 4.499874757342186e-06, "loss": 0.3084, "step": 12108 }, { "epoch": 1.7363062804703184, "grad_norm": 0.26953092217445374, "learning_rate": 4.499044705007126e-06, "loss": 0.3063, "step": 12109 }, { "epoch": 1.7364496702036134, "grad_norm": 0.2631324231624603, "learning_rate": 4.498214666617893e-06, "loss": 0.2877, "step": 12110 }, { "epoch": 1.7365930599369084, "grad_norm": 0.2784560024738312, "learning_rate": 4.497384642197589e-06, "loss": 0.3128, "step": 12111 }, { "epoch": 1.7367364496702036, "grad_norm": 0.2890625298023224, "learning_rate": 4.496554631769324e-06, "loss": 0.3113, "step": 12112 }, { "epoch": 1.7368798394034988, "grad_norm": 0.2644708454608917, "learning_rate": 4.495724635356204e-06, "loss": 0.3027, "step": 12113 }, { "epoch": 1.7370232291367937, "grad_norm": 0.27730652689933777, "learning_rate": 4.494894652981334e-06, "loss": 0.3183, "step": 12114 }, { "epoch": 1.737166618870089, "grad_norm": 0.2847556471824646, "learning_rate": 4.494064684667819e-06, "loss": 0.3064, "step": 12115 }, { "epoch": 1.7373100086033841, "grad_norm": 0.26671096682548523, "learning_rate": 4.493234730438767e-06, "loss": 0.2909, "step": 12116 }, { "epoch": 1.7374533983366791, "grad_norm": 0.2614745795726776, "learning_rate": 4.492404790317279e-06, "loss": 0.3167, "step": 12117 }, { "epoch": 1.737596788069974, "grad_norm": 0.28829097747802734, "learning_rate": 4.4915748643264615e-06, "loss": 0.3005, "step": 12118 }, { "epoch": 1.7377401778032693, "grad_norm": 0.292625367641449, "learning_rate": 4.490744952489418e-06, "loss": 0.295, "step": 12119 }, { "epoch": 1.7378835675365645, "grad_norm": 0.2869797646999359, "learning_rate": 4.489915054829251e-06, "loss": 0.287, "step": 12120 }, { "epoch": 1.7380269572698595, "grad_norm": 0.27168744802474976, "learning_rate": 4.4890851713690646e-06, "loss": 0.306, "step": 12121 }, { "epoch": 1.7381703470031544, "grad_norm": 0.27295130491256714, "learning_rate": 4.488255302131961e-06, "loss": 0.3005, "step": 12122 }, { "epoch": 1.7383137367364496, "grad_norm": 0.30487290024757385, "learning_rate": 4.487425447141042e-06, "loss": 0.3266, "step": 12123 }, { "epoch": 1.7384571264697448, "grad_norm": 0.27919700741767883, "learning_rate": 4.486595606419411e-06, "loss": 0.3064, "step": 12124 }, { "epoch": 1.7386005162030398, "grad_norm": 0.26194295287132263, "learning_rate": 4.485765779990167e-06, "loss": 0.3046, "step": 12125 }, { "epoch": 1.738743905936335, "grad_norm": 0.29029491543769836, "learning_rate": 4.484935967876416e-06, "loss": 0.3071, "step": 12126 }, { "epoch": 1.7388872956696302, "grad_norm": 0.27357906103134155, "learning_rate": 4.484106170101253e-06, "loss": 0.292, "step": 12127 }, { "epoch": 1.7390306854029252, "grad_norm": 0.24373044073581696, "learning_rate": 4.48327638668778e-06, "loss": 0.2801, "step": 12128 }, { "epoch": 1.7391740751362201, "grad_norm": 0.2733668088912964, "learning_rate": 4.482446617659098e-06, "loss": 0.2982, "step": 12129 }, { "epoch": 1.7393174648695153, "grad_norm": 0.261383593082428, "learning_rate": 4.4816168630383065e-06, "loss": 0.2788, "step": 12130 }, { "epoch": 1.7394608546028105, "grad_norm": 0.26428741216659546, "learning_rate": 4.480787122848503e-06, "loss": 0.2941, "step": 12131 }, { "epoch": 1.7396042443361055, "grad_norm": 0.29658210277557373, "learning_rate": 4.479957397112789e-06, "loss": 0.2858, "step": 12132 }, { "epoch": 1.7397476340694005, "grad_norm": 0.26920562982559204, "learning_rate": 4.47912768585426e-06, "loss": 0.298, "step": 12133 }, { "epoch": 1.739891023802696, "grad_norm": 0.286092072725296, "learning_rate": 4.478297989096015e-06, "loss": 0.3021, "step": 12134 }, { "epoch": 1.7400344135359909, "grad_norm": 0.2965514361858368, "learning_rate": 4.477468306861151e-06, "loss": 0.3114, "step": 12135 }, { "epoch": 1.7401778032692858, "grad_norm": 0.28908389806747437, "learning_rate": 4.4766386391727665e-06, "loss": 0.2989, "step": 12136 }, { "epoch": 1.740321193002581, "grad_norm": 0.2891639173030853, "learning_rate": 4.475808986053959e-06, "loss": 0.3095, "step": 12137 }, { "epoch": 1.7404645827358762, "grad_norm": 0.29467496275901794, "learning_rate": 4.47497934752782e-06, "loss": 0.3006, "step": 12138 }, { "epoch": 1.7406079724691712, "grad_norm": 0.29438677430152893, "learning_rate": 4.474149723617449e-06, "loss": 0.2857, "step": 12139 }, { "epoch": 1.7407513622024662, "grad_norm": 0.2711262106895447, "learning_rate": 4.473320114345941e-06, "loss": 0.3203, "step": 12140 }, { "epoch": 1.7408947519357614, "grad_norm": 0.2654103636741638, "learning_rate": 4.47249051973639e-06, "loss": 0.2871, "step": 12141 }, { "epoch": 1.7410381416690566, "grad_norm": 0.2781127989292145, "learning_rate": 4.471660939811893e-06, "loss": 0.3147, "step": 12142 }, { "epoch": 1.7411815314023515, "grad_norm": 0.30127018690109253, "learning_rate": 4.470831374595542e-06, "loss": 0.2926, "step": 12143 }, { "epoch": 1.7413249211356467, "grad_norm": 0.28260940313339233, "learning_rate": 4.470001824110431e-06, "loss": 0.306, "step": 12144 }, { "epoch": 1.741468310868942, "grad_norm": 0.29824909567832947, "learning_rate": 4.469172288379655e-06, "loss": 0.3042, "step": 12145 }, { "epoch": 1.741611700602237, "grad_norm": 0.2924175560474396, "learning_rate": 4.468342767426306e-06, "loss": 0.3048, "step": 12146 }, { "epoch": 1.7417550903355319, "grad_norm": 0.2807847857475281, "learning_rate": 4.4675132612734745e-06, "loss": 0.3154, "step": 12147 }, { "epoch": 1.741898480068827, "grad_norm": 0.2618063986301422, "learning_rate": 4.466683769944257e-06, "loss": 0.2935, "step": 12148 }, { "epoch": 1.7420418698021223, "grad_norm": 0.26915445923805237, "learning_rate": 4.465854293461742e-06, "loss": 0.2874, "step": 12149 }, { "epoch": 1.7421852595354173, "grad_norm": 0.27770519256591797, "learning_rate": 4.465024831849021e-06, "loss": 0.2844, "step": 12150 }, { "epoch": 1.7423286492687122, "grad_norm": 0.3031696081161499, "learning_rate": 4.464195385129186e-06, "loss": 0.3121, "step": 12151 }, { "epoch": 1.7424720390020074, "grad_norm": 0.2822417914867401, "learning_rate": 4.463365953325328e-06, "loss": 0.2888, "step": 12152 }, { "epoch": 1.7426154287353026, "grad_norm": 0.2535814642906189, "learning_rate": 4.462536536460538e-06, "loss": 0.2877, "step": 12153 }, { "epoch": 1.7427588184685976, "grad_norm": 0.302991658449173, "learning_rate": 4.461707134557902e-06, "loss": 0.319, "step": 12154 }, { "epoch": 1.7429022082018928, "grad_norm": 0.27256882190704346, "learning_rate": 4.460877747640512e-06, "loss": 0.3049, "step": 12155 }, { "epoch": 1.743045597935188, "grad_norm": 0.2725446820259094, "learning_rate": 4.460048375731455e-06, "loss": 0.2798, "step": 12156 }, { "epoch": 1.743188987668483, "grad_norm": 0.29782143235206604, "learning_rate": 4.459219018853821e-06, "loss": 0.3107, "step": 12157 }, { "epoch": 1.743332377401778, "grad_norm": 0.3065538704395294, "learning_rate": 4.458389677030697e-06, "loss": 0.3289, "step": 12158 }, { "epoch": 1.7434757671350731, "grad_norm": 0.2922420799732208, "learning_rate": 4.457560350285174e-06, "loss": 0.3149, "step": 12159 }, { "epoch": 1.7436191568683683, "grad_norm": 0.2703893482685089, "learning_rate": 4.456731038640334e-06, "loss": 0.3084, "step": 12160 }, { "epoch": 1.7437625466016633, "grad_norm": 0.28447461128234863, "learning_rate": 4.455901742119267e-06, "loss": 0.3015, "step": 12161 }, { "epoch": 1.7439059363349583, "grad_norm": 0.25893446803092957, "learning_rate": 4.455072460745058e-06, "loss": 0.2965, "step": 12162 }, { "epoch": 1.7440493260682535, "grad_norm": 0.2680504322052002, "learning_rate": 4.4542431945407935e-06, "loss": 0.2874, "step": 12163 }, { "epoch": 1.7441927158015487, "grad_norm": 0.2988182604312897, "learning_rate": 4.453413943529561e-06, "loss": 0.3186, "step": 12164 }, { "epoch": 1.7443361055348436, "grad_norm": 0.2639349400997162, "learning_rate": 4.452584707734441e-06, "loss": 0.286, "step": 12165 }, { "epoch": 1.7444794952681388, "grad_norm": 0.2843187153339386, "learning_rate": 4.451755487178522e-06, "loss": 0.3175, "step": 12166 }, { "epoch": 1.744622885001434, "grad_norm": 0.3021405339241028, "learning_rate": 4.450926281884886e-06, "loss": 0.327, "step": 12167 }, { "epoch": 1.744766274734729, "grad_norm": 0.2711458206176758, "learning_rate": 4.450097091876617e-06, "loss": 0.2909, "step": 12168 }, { "epoch": 1.744909664468024, "grad_norm": 0.2653805613517761, "learning_rate": 4.4492679171768e-06, "loss": 0.3133, "step": 12169 }, { "epoch": 1.7450530542013192, "grad_norm": 0.2908902168273926, "learning_rate": 4.448438757808518e-06, "loss": 0.3156, "step": 12170 }, { "epoch": 1.7451964439346144, "grad_norm": 0.29852184653282166, "learning_rate": 4.447609613794851e-06, "loss": 0.2991, "step": 12171 }, { "epoch": 1.7453398336679093, "grad_norm": 0.2947823405265808, "learning_rate": 4.446780485158883e-06, "loss": 0.3075, "step": 12172 }, { "epoch": 1.7454832234012043, "grad_norm": 0.28970250487327576, "learning_rate": 4.4459513719236955e-06, "loss": 0.3108, "step": 12173 }, { "epoch": 1.7456266131344995, "grad_norm": 0.26828861236572266, "learning_rate": 4.445122274112369e-06, "loss": 0.2894, "step": 12174 }, { "epoch": 1.7457700028677947, "grad_norm": 0.29608604311943054, "learning_rate": 4.444293191747987e-06, "loss": 0.2945, "step": 12175 }, { "epoch": 1.7459133926010897, "grad_norm": 0.30479276180267334, "learning_rate": 4.443464124853625e-06, "loss": 0.331, "step": 12176 }, { "epoch": 1.7460567823343849, "grad_norm": 0.2924124598503113, "learning_rate": 4.442635073452366e-06, "loss": 0.3089, "step": 12177 }, { "epoch": 1.74620017206768, "grad_norm": 0.26512935757637024, "learning_rate": 4.4418060375672895e-06, "loss": 0.3051, "step": 12178 }, { "epoch": 1.746343561800975, "grad_norm": 0.28185075521469116, "learning_rate": 4.440977017221474e-06, "loss": 0.2936, "step": 12179 }, { "epoch": 1.74648695153427, "grad_norm": 0.2788843512535095, "learning_rate": 4.440148012438001e-06, "loss": 0.2928, "step": 12180 }, { "epoch": 1.7466303412675652, "grad_norm": 0.2654285132884979, "learning_rate": 4.439319023239943e-06, "loss": 0.2864, "step": 12181 }, { "epoch": 1.7467737310008604, "grad_norm": 0.2897180914878845, "learning_rate": 4.438490049650382e-06, "loss": 0.3171, "step": 12182 }, { "epoch": 1.7469171207341554, "grad_norm": 0.27466869354248047, "learning_rate": 4.437661091692395e-06, "loss": 0.2904, "step": 12183 }, { "epoch": 1.7470605104674504, "grad_norm": 0.2789529860019684, "learning_rate": 4.436832149389057e-06, "loss": 0.3083, "step": 12184 }, { "epoch": 1.7472039002007458, "grad_norm": 0.2555057108402252, "learning_rate": 4.436003222763446e-06, "loss": 0.2863, "step": 12185 }, { "epoch": 1.7473472899340408, "grad_norm": 0.2886558473110199, "learning_rate": 4.435174311838637e-06, "loss": 0.2935, "step": 12186 }, { "epoch": 1.7474906796673357, "grad_norm": 0.27323055267333984, "learning_rate": 4.434345416637708e-06, "loss": 0.3136, "step": 12187 }, { "epoch": 1.747634069400631, "grad_norm": 0.2634531557559967, "learning_rate": 4.433516537183731e-06, "loss": 0.3157, "step": 12188 }, { "epoch": 1.7477774591339261, "grad_norm": 0.2732265293598175, "learning_rate": 4.432687673499782e-06, "loss": 0.2731, "step": 12189 }, { "epoch": 1.747920848867221, "grad_norm": 0.268989235162735, "learning_rate": 4.431858825608935e-06, "loss": 0.2894, "step": 12190 }, { "epoch": 1.748064238600516, "grad_norm": 0.28924620151519775, "learning_rate": 4.431029993534268e-06, "loss": 0.3038, "step": 12191 }, { "epoch": 1.7482076283338113, "grad_norm": 0.28961193561553955, "learning_rate": 4.430201177298848e-06, "loss": 0.303, "step": 12192 }, { "epoch": 1.7483510180671065, "grad_norm": 0.2586199939250946, "learning_rate": 4.42937237692575e-06, "loss": 0.298, "step": 12193 }, { "epoch": 1.7484944078004014, "grad_norm": 0.2718830406665802, "learning_rate": 4.428543592438049e-06, "loss": 0.31, "step": 12194 }, { "epoch": 1.7486377975336966, "grad_norm": 0.2916291356086731, "learning_rate": 4.427714823858814e-06, "loss": 0.3028, "step": 12195 }, { "epoch": 1.7487811872669918, "grad_norm": 0.286795973777771, "learning_rate": 4.426886071211119e-06, "loss": 0.3041, "step": 12196 }, { "epoch": 1.7489245770002868, "grad_norm": 0.283015638589859, "learning_rate": 4.426057334518034e-06, "loss": 0.3081, "step": 12197 }, { "epoch": 1.7490679667335818, "grad_norm": 0.2748386859893799, "learning_rate": 4.425228613802629e-06, "loss": 0.317, "step": 12198 }, { "epoch": 1.749211356466877, "grad_norm": 0.28174135088920593, "learning_rate": 4.424399909087975e-06, "loss": 0.3169, "step": 12199 }, { "epoch": 1.7493547462001722, "grad_norm": 0.2708064019680023, "learning_rate": 4.423571220397143e-06, "loss": 0.3085, "step": 12200 }, { "epoch": 1.7494981359334671, "grad_norm": 0.2764677405357361, "learning_rate": 4.422742547753201e-06, "loss": 0.3128, "step": 12201 }, { "epoch": 1.7496415256667621, "grad_norm": 0.2684398591518402, "learning_rate": 4.421913891179221e-06, "loss": 0.2984, "step": 12202 }, { "epoch": 1.7497849154000573, "grad_norm": 0.27515581250190735, "learning_rate": 4.421085250698266e-06, "loss": 0.2996, "step": 12203 }, { "epoch": 1.7499283051333525, "grad_norm": 0.2800312042236328, "learning_rate": 4.420256626333406e-06, "loss": 0.3157, "step": 12204 }, { "epoch": 1.7500716948666475, "grad_norm": 0.25325843691825867, "learning_rate": 4.419428018107712e-06, "loss": 0.3117, "step": 12205 }, { "epoch": 1.7502150845999427, "grad_norm": 0.2623119354248047, "learning_rate": 4.418599426044248e-06, "loss": 0.3117, "step": 12206 }, { "epoch": 1.7503584743332379, "grad_norm": 0.2697813808917999, "learning_rate": 4.417770850166084e-06, "loss": 0.2788, "step": 12207 }, { "epoch": 1.7505018640665329, "grad_norm": 0.2628028690814972, "learning_rate": 4.4169422904962824e-06, "loss": 0.2922, "step": 12208 }, { "epoch": 1.7506452537998278, "grad_norm": 0.26890918612480164, "learning_rate": 4.41611374705791e-06, "loss": 0.2989, "step": 12209 }, { "epoch": 1.750788643533123, "grad_norm": 0.2803889513015747, "learning_rate": 4.415285219874033e-06, "loss": 0.3133, "step": 12210 }, { "epoch": 1.7509320332664182, "grad_norm": 0.2572803497314453, "learning_rate": 4.414456708967715e-06, "loss": 0.2947, "step": 12211 }, { "epoch": 1.7510754229997132, "grad_norm": 0.271535724401474, "learning_rate": 4.4136282143620215e-06, "loss": 0.3167, "step": 12212 }, { "epoch": 1.7512188127330082, "grad_norm": 0.2653484642505646, "learning_rate": 4.412799736080019e-06, "loss": 0.2959, "step": 12213 }, { "epoch": 1.7513622024663034, "grad_norm": 0.29349175095558167, "learning_rate": 4.4119712741447664e-06, "loss": 0.3114, "step": 12214 }, { "epoch": 1.7515055921995986, "grad_norm": 0.2737618386745453, "learning_rate": 4.411142828579329e-06, "loss": 0.2806, "step": 12215 }, { "epoch": 1.7516489819328935, "grad_norm": 0.27199018001556396, "learning_rate": 4.41031439940677e-06, "loss": 0.2932, "step": 12216 }, { "epoch": 1.7517923716661887, "grad_norm": 0.29853227734565735, "learning_rate": 4.409485986650151e-06, "loss": 0.3035, "step": 12217 }, { "epoch": 1.751935761399484, "grad_norm": 0.27048060297966003, "learning_rate": 4.408657590332535e-06, "loss": 0.2882, "step": 12218 }, { "epoch": 1.752079151132779, "grad_norm": 0.2805006802082062, "learning_rate": 4.407829210476981e-06, "loss": 0.3115, "step": 12219 }, { "epoch": 1.7522225408660739, "grad_norm": 0.274870902299881, "learning_rate": 4.40700084710655e-06, "loss": 0.3008, "step": 12220 }, { "epoch": 1.752365930599369, "grad_norm": 0.281525194644928, "learning_rate": 4.406172500244304e-06, "loss": 0.3074, "step": 12221 }, { "epoch": 1.7525093203326643, "grad_norm": 0.2804824411869049, "learning_rate": 4.405344169913301e-06, "loss": 0.2908, "step": 12222 }, { "epoch": 1.7526527100659592, "grad_norm": 0.2669432759284973, "learning_rate": 4.404515856136602e-06, "loss": 0.303, "step": 12223 }, { "epoch": 1.7527960997992542, "grad_norm": 0.27154335379600525, "learning_rate": 4.403687558937267e-06, "loss": 0.2928, "step": 12224 }, { "epoch": 1.7529394895325496, "grad_norm": 0.28976190090179443, "learning_rate": 4.402859278338352e-06, "loss": 0.2998, "step": 12225 }, { "epoch": 1.7530828792658446, "grad_norm": 0.28778526186943054, "learning_rate": 4.402031014362916e-06, "loss": 0.3096, "step": 12226 }, { "epoch": 1.7532262689991396, "grad_norm": 0.2524998188018799, "learning_rate": 4.401202767034016e-06, "loss": 0.2957, "step": 12227 }, { "epoch": 1.7533696587324348, "grad_norm": 0.2722136676311493, "learning_rate": 4.400374536374711e-06, "loss": 0.3131, "step": 12228 }, { "epoch": 1.75351304846573, "grad_norm": 0.2755768597126007, "learning_rate": 4.399546322408058e-06, "loss": 0.2769, "step": 12229 }, { "epoch": 1.753656438199025, "grad_norm": 0.29769834876060486, "learning_rate": 4.39871812515711e-06, "loss": 0.3175, "step": 12230 }, { "epoch": 1.75379982793232, "grad_norm": 0.2715229094028473, "learning_rate": 4.397889944644924e-06, "loss": 0.2997, "step": 12231 }, { "epoch": 1.7539432176656151, "grad_norm": 0.288003146648407, "learning_rate": 4.3970617808945545e-06, "loss": 0.3306, "step": 12232 }, { "epoch": 1.7540866073989103, "grad_norm": 0.30893516540527344, "learning_rate": 4.396233633929059e-06, "loss": 0.3221, "step": 12233 }, { "epoch": 1.7542299971322053, "grad_norm": 0.28224360942840576, "learning_rate": 4.3954055037714925e-06, "loss": 0.3072, "step": 12234 }, { "epoch": 1.7543733868655005, "grad_norm": 0.28022024035453796, "learning_rate": 4.394577390444904e-06, "loss": 0.2964, "step": 12235 }, { "epoch": 1.7545167765987957, "grad_norm": 0.2910664975643158, "learning_rate": 4.3937492939723506e-06, "loss": 0.3071, "step": 12236 }, { "epoch": 1.7546601663320907, "grad_norm": 0.28741219639778137, "learning_rate": 4.392921214376883e-06, "loss": 0.2843, "step": 12237 }, { "epoch": 1.7548035560653856, "grad_norm": 0.2721494138240814, "learning_rate": 4.392093151681556e-06, "loss": 0.3037, "step": 12238 }, { "epoch": 1.7549469457986808, "grad_norm": 0.29390987753868103, "learning_rate": 4.39126510590942e-06, "loss": 0.3014, "step": 12239 }, { "epoch": 1.755090335531976, "grad_norm": 0.27827882766723633, "learning_rate": 4.3904370770835285e-06, "loss": 0.302, "step": 12240 }, { "epoch": 1.755233725265271, "grad_norm": 0.2739843428134918, "learning_rate": 4.38960906522693e-06, "loss": 0.3043, "step": 12241 }, { "epoch": 1.755377114998566, "grad_norm": 0.26033005118370056, "learning_rate": 4.388781070362676e-06, "loss": 0.3171, "step": 12242 }, { "epoch": 1.7555205047318612, "grad_norm": 0.2843165695667267, "learning_rate": 4.387953092513817e-06, "loss": 0.2968, "step": 12243 }, { "epoch": 1.7556638944651564, "grad_norm": 0.2907748222351074, "learning_rate": 4.387125131703402e-06, "loss": 0.3076, "step": 12244 }, { "epoch": 1.7558072841984513, "grad_norm": 0.28146132826805115, "learning_rate": 4.386297187954483e-06, "loss": 0.3101, "step": 12245 }, { "epoch": 1.7559506739317465, "grad_norm": 0.2619401514530182, "learning_rate": 4.385469261290104e-06, "loss": 0.2969, "step": 12246 }, { "epoch": 1.7560940636650417, "grad_norm": 0.2996132969856262, "learning_rate": 4.3846413517333155e-06, "loss": 0.2959, "step": 12247 }, { "epoch": 1.7562374533983367, "grad_norm": 0.2683219909667969, "learning_rate": 4.3838134593071656e-06, "loss": 0.2846, "step": 12248 }, { "epoch": 1.7563808431316317, "grad_norm": 0.2757238447666168, "learning_rate": 4.3829855840347e-06, "loss": 0.3163, "step": 12249 }, { "epoch": 1.7565242328649269, "grad_norm": 0.26038384437561035, "learning_rate": 4.382157725938967e-06, "loss": 0.3075, "step": 12250 }, { "epoch": 1.756667622598222, "grad_norm": 0.28642207384109497, "learning_rate": 4.381329885043014e-06, "loss": 0.2951, "step": 12251 }, { "epoch": 1.756811012331517, "grad_norm": 0.2968921661376953, "learning_rate": 4.380502061369884e-06, "loss": 0.2907, "step": 12252 }, { "epoch": 1.756954402064812, "grad_norm": 0.25474074482917786, "learning_rate": 4.379674254942624e-06, "loss": 0.2835, "step": 12253 }, { "epoch": 1.7570977917981072, "grad_norm": 0.2812435030937195, "learning_rate": 4.378846465784278e-06, "loss": 0.3131, "step": 12254 }, { "epoch": 1.7572411815314024, "grad_norm": 0.2621450424194336, "learning_rate": 4.378018693917891e-06, "loss": 0.2993, "step": 12255 }, { "epoch": 1.7573845712646974, "grad_norm": 0.2451777458190918, "learning_rate": 4.377190939366509e-06, "loss": 0.3022, "step": 12256 }, { "epoch": 1.7575279609979926, "grad_norm": 0.2700693607330322, "learning_rate": 4.376363202153171e-06, "loss": 0.3015, "step": 12257 }, { "epoch": 1.7576713507312878, "grad_norm": 0.2741672992706299, "learning_rate": 4.3755354823009225e-06, "loss": 0.3016, "step": 12258 }, { "epoch": 1.7578147404645827, "grad_norm": 0.2700293958187103, "learning_rate": 4.374707779832805e-06, "loss": 0.2888, "step": 12259 }, { "epoch": 1.7579581301978777, "grad_norm": 0.2736002206802368, "learning_rate": 4.373880094771859e-06, "loss": 0.3094, "step": 12260 }, { "epoch": 1.758101519931173, "grad_norm": 0.27391374111175537, "learning_rate": 4.3730524271411335e-06, "loss": 0.3305, "step": 12261 }, { "epoch": 1.7582449096644681, "grad_norm": 0.28108400106430054, "learning_rate": 4.37222477696366e-06, "loss": 0.2921, "step": 12262 }, { "epoch": 1.758388299397763, "grad_norm": 0.3042670488357544, "learning_rate": 4.3713971442624845e-06, "loss": 0.3102, "step": 12263 }, { "epoch": 1.758531689131058, "grad_norm": 0.2876208424568176, "learning_rate": 4.370569529060644e-06, "loss": 0.3114, "step": 12264 }, { "epoch": 1.7586750788643533, "grad_norm": 0.26136040687561035, "learning_rate": 4.3697419313811805e-06, "loss": 0.2991, "step": 12265 }, { "epoch": 1.7588184685976485, "grad_norm": 0.27231141924858093, "learning_rate": 4.368914351247132e-06, "loss": 0.2942, "step": 12266 }, { "epoch": 1.7589618583309434, "grad_norm": 0.28298839926719666, "learning_rate": 4.3680867886815385e-06, "loss": 0.3026, "step": 12267 }, { "epoch": 1.7591052480642386, "grad_norm": 0.3050190806388855, "learning_rate": 4.367259243707435e-06, "loss": 0.2949, "step": 12268 }, { "epoch": 1.7592486377975338, "grad_norm": 0.2935774326324463, "learning_rate": 4.366431716347862e-06, "loss": 0.3033, "step": 12269 }, { "epoch": 1.7593920275308288, "grad_norm": 0.26641303300857544, "learning_rate": 4.365604206625854e-06, "loss": 0.2917, "step": 12270 }, { "epoch": 1.7595354172641238, "grad_norm": 0.26682811975479126, "learning_rate": 4.364776714564449e-06, "loss": 0.2987, "step": 12271 }, { "epoch": 1.759678806997419, "grad_norm": 0.28771212697029114, "learning_rate": 4.363949240186687e-06, "loss": 0.3101, "step": 12272 }, { "epoch": 1.7598221967307142, "grad_norm": 0.2594192624092102, "learning_rate": 4.363121783515596e-06, "loss": 0.2897, "step": 12273 }, { "epoch": 1.7599655864640091, "grad_norm": 0.28537511825561523, "learning_rate": 4.362294344574215e-06, "loss": 0.2946, "step": 12274 }, { "epoch": 1.760108976197304, "grad_norm": 0.2722339332103729, "learning_rate": 4.361466923385579e-06, "loss": 0.2969, "step": 12275 }, { "epoch": 1.7602523659305995, "grad_norm": 0.2886629104614258, "learning_rate": 4.360639519972722e-06, "loss": 0.3071, "step": 12276 }, { "epoch": 1.7603957556638945, "grad_norm": 0.27306249737739563, "learning_rate": 4.359812134358676e-06, "loss": 0.3334, "step": 12277 }, { "epoch": 1.7605391453971895, "grad_norm": 0.2784336805343628, "learning_rate": 4.358984766566477e-06, "loss": 0.3091, "step": 12278 }, { "epoch": 1.7606825351304847, "grad_norm": 0.28050169348716736, "learning_rate": 4.3581574166191546e-06, "loss": 0.2882, "step": 12279 }, { "epoch": 1.7608259248637799, "grad_norm": 0.26898306608200073, "learning_rate": 4.357330084539743e-06, "loss": 0.2837, "step": 12280 }, { "epoch": 1.7609693145970748, "grad_norm": 0.2773343622684479, "learning_rate": 4.356502770351273e-06, "loss": 0.3034, "step": 12281 }, { "epoch": 1.7611127043303698, "grad_norm": 0.27779167890548706, "learning_rate": 4.355675474076776e-06, "loss": 0.3121, "step": 12282 }, { "epoch": 1.761256094063665, "grad_norm": 0.305864155292511, "learning_rate": 4.354848195739284e-06, "loss": 0.317, "step": 12283 }, { "epoch": 1.7613994837969602, "grad_norm": 0.2566823959350586, "learning_rate": 4.354020935361825e-06, "loss": 0.2897, "step": 12284 }, { "epoch": 1.7615428735302552, "grad_norm": 0.27654361724853516, "learning_rate": 4.353193692967429e-06, "loss": 0.286, "step": 12285 }, { "epoch": 1.7616862632635504, "grad_norm": 0.26843640208244324, "learning_rate": 4.352366468579126e-06, "loss": 0.2946, "step": 12286 }, { "epoch": 1.7618296529968456, "grad_norm": 0.2845594584941864, "learning_rate": 4.351539262219944e-06, "loss": 0.3139, "step": 12287 }, { "epoch": 1.7619730427301405, "grad_norm": 0.28054770827293396, "learning_rate": 4.3507120739129115e-06, "loss": 0.3049, "step": 12288 }, { "epoch": 1.7621164324634355, "grad_norm": 0.2925760746002197, "learning_rate": 4.349884903681056e-06, "loss": 0.2877, "step": 12289 }, { "epoch": 1.7622598221967307, "grad_norm": 0.2819012403488159, "learning_rate": 4.3490577515474045e-06, "loss": 0.3113, "step": 12290 }, { "epoch": 1.762403211930026, "grad_norm": 0.29841962456703186, "learning_rate": 4.348230617534984e-06, "loss": 0.2996, "step": 12291 }, { "epoch": 1.7625466016633209, "grad_norm": 0.2723042368888855, "learning_rate": 4.347403501666821e-06, "loss": 0.2976, "step": 12292 }, { "epoch": 1.7626899913966159, "grad_norm": 0.28985390067100525, "learning_rate": 4.3465764039659395e-06, "loss": 0.3275, "step": 12293 }, { "epoch": 1.762833381129911, "grad_norm": 0.28193187713623047, "learning_rate": 4.345749324455367e-06, "loss": 0.2825, "step": 12294 }, { "epoch": 1.7629767708632063, "grad_norm": 0.2822243571281433, "learning_rate": 4.344922263158126e-06, "loss": 0.2898, "step": 12295 }, { "epoch": 1.7631201605965012, "grad_norm": 0.28381651639938354, "learning_rate": 4.3440952200972416e-06, "loss": 0.2879, "step": 12296 }, { "epoch": 1.7632635503297964, "grad_norm": 0.2745850384235382, "learning_rate": 4.343268195295738e-06, "loss": 0.3107, "step": 12297 }, { "epoch": 1.7634069400630916, "grad_norm": 0.2735815942287445, "learning_rate": 4.342441188776637e-06, "loss": 0.3176, "step": 12298 }, { "epoch": 1.7635503297963866, "grad_norm": 0.2696424722671509, "learning_rate": 4.341614200562963e-06, "loss": 0.2983, "step": 12299 }, { "epoch": 1.7636937195296816, "grad_norm": 0.28854143619537354, "learning_rate": 4.340787230677736e-06, "loss": 0.2991, "step": 12300 }, { "epoch": 1.7638371092629768, "grad_norm": 0.2836384177207947, "learning_rate": 4.339960279143977e-06, "loss": 0.3055, "step": 12301 }, { "epoch": 1.763980498996272, "grad_norm": 0.2931998372077942, "learning_rate": 4.33913334598471e-06, "loss": 0.3181, "step": 12302 }, { "epoch": 1.764123888729567, "grad_norm": 0.28269949555397034, "learning_rate": 4.338306431222954e-06, "loss": 0.3052, "step": 12303 }, { "epoch": 1.764267278462862, "grad_norm": 0.2676745653152466, "learning_rate": 4.337479534881729e-06, "loss": 0.3362, "step": 12304 }, { "epoch": 1.764410668196157, "grad_norm": 0.2883671522140503, "learning_rate": 4.336652656984055e-06, "loss": 0.3145, "step": 12305 }, { "epoch": 1.7645540579294523, "grad_norm": 0.2713683545589447, "learning_rate": 4.33582579755295e-06, "loss": 0.307, "step": 12306 }, { "epoch": 1.7646974476627473, "grad_norm": 0.2790016829967499, "learning_rate": 4.3349989566114325e-06, "loss": 0.3212, "step": 12307 }, { "epoch": 1.7648408373960425, "grad_norm": 0.27114659547805786, "learning_rate": 4.334172134182522e-06, "loss": 0.2939, "step": 12308 }, { "epoch": 1.7649842271293377, "grad_norm": 0.2784179747104645, "learning_rate": 4.333345330289234e-06, "loss": 0.2755, "step": 12309 }, { "epoch": 1.7651276168626326, "grad_norm": 0.29421356320381165, "learning_rate": 4.33251854495459e-06, "loss": 0.3099, "step": 12310 }, { "epoch": 1.7652710065959276, "grad_norm": 0.26336342096328735, "learning_rate": 4.3316917782016e-06, "loss": 0.2905, "step": 12311 }, { "epoch": 1.7654143963292228, "grad_norm": 0.27970585227012634, "learning_rate": 4.3308650300532825e-06, "loss": 0.2942, "step": 12312 }, { "epoch": 1.765557786062518, "grad_norm": 0.2825136184692383, "learning_rate": 4.330038300532654e-06, "loss": 0.2952, "step": 12313 }, { "epoch": 1.765701175795813, "grad_norm": 0.2633165419101715, "learning_rate": 4.329211589662728e-06, "loss": 0.2984, "step": 12314 }, { "epoch": 1.765844565529108, "grad_norm": 0.2597064673900604, "learning_rate": 4.328384897466521e-06, "loss": 0.3089, "step": 12315 }, { "epoch": 1.7659879552624034, "grad_norm": 0.3002750873565674, "learning_rate": 4.327558223967044e-06, "loss": 0.3093, "step": 12316 }, { "epoch": 1.7661313449956983, "grad_norm": 0.30051860213279724, "learning_rate": 4.326731569187311e-06, "loss": 0.3151, "step": 12317 }, { "epoch": 1.7662747347289933, "grad_norm": 0.27919554710388184, "learning_rate": 4.325904933150337e-06, "loss": 0.3088, "step": 12318 }, { "epoch": 1.7664181244622885, "grad_norm": 0.2903788983821869, "learning_rate": 4.325078315879131e-06, "loss": 0.2911, "step": 12319 }, { "epoch": 1.7665615141955837, "grad_norm": 0.25496017932891846, "learning_rate": 4.324251717396707e-06, "loss": 0.2912, "step": 12320 }, { "epoch": 1.7667049039288787, "grad_norm": 0.27762818336486816, "learning_rate": 4.3234251377260775e-06, "loss": 0.3059, "step": 12321 }, { "epoch": 1.7668482936621737, "grad_norm": 0.2873598635196686, "learning_rate": 4.32259857689025e-06, "loss": 0.3, "step": 12322 }, { "epoch": 1.7669916833954689, "grad_norm": 0.2724112272262573, "learning_rate": 4.321772034912236e-06, "loss": 0.3246, "step": 12323 }, { "epoch": 1.767135073128764, "grad_norm": 0.2802342474460602, "learning_rate": 4.320945511815045e-06, "loss": 0.2846, "step": 12324 }, { "epoch": 1.767278462862059, "grad_norm": 0.29641178250312805, "learning_rate": 4.320119007621686e-06, "loss": 0.3188, "step": 12325 }, { "epoch": 1.7674218525953542, "grad_norm": 0.2714662551879883, "learning_rate": 4.319292522355171e-06, "loss": 0.2934, "step": 12326 }, { "epoch": 1.7675652423286494, "grad_norm": 0.27026575803756714, "learning_rate": 4.318466056038503e-06, "loss": 0.3213, "step": 12327 }, { "epoch": 1.7677086320619444, "grad_norm": 0.27350056171417236, "learning_rate": 4.317639608694691e-06, "loss": 0.3208, "step": 12328 }, { "epoch": 1.7678520217952394, "grad_norm": 0.2601173222064972, "learning_rate": 4.316813180346743e-06, "loss": 0.2952, "step": 12329 }, { "epoch": 1.7679954115285346, "grad_norm": 0.2863738536834717, "learning_rate": 4.315986771017664e-06, "loss": 0.2871, "step": 12330 }, { "epoch": 1.7681388012618298, "grad_norm": 0.2723848223686218, "learning_rate": 4.315160380730462e-06, "loss": 0.3027, "step": 12331 }, { "epoch": 1.7682821909951247, "grad_norm": 0.28004077076911926, "learning_rate": 4.314334009508143e-06, "loss": 0.3047, "step": 12332 }, { "epoch": 1.7684255807284197, "grad_norm": 0.28177493810653687, "learning_rate": 4.313507657373709e-06, "loss": 0.307, "step": 12333 }, { "epoch": 1.768568970461715, "grad_norm": 0.28906139731407166, "learning_rate": 4.312681324350166e-06, "loss": 0.3099, "step": 12334 }, { "epoch": 1.76871236019501, "grad_norm": 0.283067911863327, "learning_rate": 4.311855010460518e-06, "loss": 0.3121, "step": 12335 }, { "epoch": 1.768855749928305, "grad_norm": 0.2680053114891052, "learning_rate": 4.3110287157277664e-06, "loss": 0.3013, "step": 12336 }, { "epoch": 1.7689991396616003, "grad_norm": 0.2802181839942932, "learning_rate": 4.3102024401749184e-06, "loss": 0.3063, "step": 12337 }, { "epoch": 1.7691425293948955, "grad_norm": 0.276644229888916, "learning_rate": 4.3093761838249714e-06, "loss": 0.2891, "step": 12338 }, { "epoch": 1.7692859191281904, "grad_norm": 0.30880653858184814, "learning_rate": 4.308549946700929e-06, "loss": 0.3063, "step": 12339 }, { "epoch": 1.7694293088614854, "grad_norm": 0.2729574143886566, "learning_rate": 4.307723728825793e-06, "loss": 0.3255, "step": 12340 }, { "epoch": 1.7695726985947806, "grad_norm": 0.2956109941005707, "learning_rate": 4.306897530222563e-06, "loss": 0.3067, "step": 12341 }, { "epoch": 1.7697160883280758, "grad_norm": 0.2895974814891815, "learning_rate": 4.30607135091424e-06, "loss": 0.3267, "step": 12342 }, { "epoch": 1.7698594780613708, "grad_norm": 0.2812233865261078, "learning_rate": 4.305245190923823e-06, "loss": 0.3051, "step": 12343 }, { "epoch": 1.7700028677946658, "grad_norm": 0.2836620509624481, "learning_rate": 4.304419050274311e-06, "loss": 0.3161, "step": 12344 }, { "epoch": 1.770146257527961, "grad_norm": 0.3041459918022156, "learning_rate": 4.303592928988702e-06, "loss": 0.3053, "step": 12345 }, { "epoch": 1.7702896472612561, "grad_norm": 0.2930929362773895, "learning_rate": 4.302766827089995e-06, "loss": 0.2909, "step": 12346 }, { "epoch": 1.7704330369945511, "grad_norm": 0.28225240111351013, "learning_rate": 4.301940744601187e-06, "loss": 0.2924, "step": 12347 }, { "epoch": 1.7705764267278463, "grad_norm": 0.2681676149368286, "learning_rate": 4.301114681545278e-06, "loss": 0.2914, "step": 12348 }, { "epoch": 1.7707198164611415, "grad_norm": 0.284831166267395, "learning_rate": 4.300288637945256e-06, "loss": 0.2997, "step": 12349 }, { "epoch": 1.7708632061944365, "grad_norm": 0.28792521357536316, "learning_rate": 4.299462613824125e-06, "loss": 0.2835, "step": 12350 }, { "epoch": 1.7710065959277315, "grad_norm": 0.3060659170150757, "learning_rate": 4.298636609204876e-06, "loss": 0.329, "step": 12351 }, { "epoch": 1.7711499856610267, "grad_norm": 0.26713815331459045, "learning_rate": 4.297810624110504e-06, "loss": 0.2954, "step": 12352 }, { "epoch": 1.7712933753943219, "grad_norm": 0.2715955078601837, "learning_rate": 4.296984658564008e-06, "loss": 0.3011, "step": 12353 }, { "epoch": 1.7714367651276168, "grad_norm": 0.2756078839302063, "learning_rate": 4.296158712588375e-06, "loss": 0.2896, "step": 12354 }, { "epoch": 1.7715801548609118, "grad_norm": 0.2824985980987549, "learning_rate": 4.295332786206601e-06, "loss": 0.2977, "step": 12355 }, { "epoch": 1.771723544594207, "grad_norm": 0.2657146751880646, "learning_rate": 4.294506879441677e-06, "loss": 0.3059, "step": 12356 }, { "epoch": 1.7718669343275022, "grad_norm": 0.27387675642967224, "learning_rate": 4.293680992316597e-06, "loss": 0.3134, "step": 12357 }, { "epoch": 1.7720103240607972, "grad_norm": 0.2896668314933777, "learning_rate": 4.2928551248543524e-06, "loss": 0.3057, "step": 12358 }, { "epoch": 1.7721537137940924, "grad_norm": 0.2753485143184662, "learning_rate": 4.292029277077933e-06, "loss": 0.3026, "step": 12359 }, { "epoch": 1.7722971035273876, "grad_norm": 0.2862887680530548, "learning_rate": 4.291203449010329e-06, "loss": 0.3096, "step": 12360 }, { "epoch": 1.7724404932606825, "grad_norm": 0.28484317660331726, "learning_rate": 4.290377640674531e-06, "loss": 0.3131, "step": 12361 }, { "epoch": 1.7725838829939775, "grad_norm": 0.2632678747177124, "learning_rate": 4.2895518520935275e-06, "loss": 0.2939, "step": 12362 }, { "epoch": 1.7727272727272727, "grad_norm": 0.276837557554245, "learning_rate": 4.288726083290308e-06, "loss": 0.2998, "step": 12363 }, { "epoch": 1.772870662460568, "grad_norm": 0.2781852185726166, "learning_rate": 4.287900334287862e-06, "loss": 0.3101, "step": 12364 }, { "epoch": 1.7730140521938629, "grad_norm": 0.26660072803497314, "learning_rate": 4.287074605109173e-06, "loss": 0.2977, "step": 12365 }, { "epoch": 1.7731574419271579, "grad_norm": 0.26364412903785706, "learning_rate": 4.28624889577723e-06, "loss": 0.3034, "step": 12366 }, { "epoch": 1.7733008316604533, "grad_norm": 0.2810065746307373, "learning_rate": 4.285423206315021e-06, "loss": 0.3016, "step": 12367 }, { "epoch": 1.7734442213937482, "grad_norm": 0.2721593677997589, "learning_rate": 4.28459753674553e-06, "loss": 0.3181, "step": 12368 }, { "epoch": 1.7735876111270432, "grad_norm": 0.27277347445487976, "learning_rate": 4.283771887091743e-06, "loss": 0.2901, "step": 12369 }, { "epoch": 1.7737310008603384, "grad_norm": 0.2728355824947357, "learning_rate": 4.2829462573766456e-06, "loss": 0.312, "step": 12370 }, { "epoch": 1.7738743905936336, "grad_norm": 0.2648274898529053, "learning_rate": 4.282120647623221e-06, "loss": 0.297, "step": 12371 }, { "epoch": 1.7740177803269286, "grad_norm": 0.26399511098861694, "learning_rate": 4.281295057854453e-06, "loss": 0.3098, "step": 12372 }, { "epoch": 1.7741611700602236, "grad_norm": 0.27302253246307373, "learning_rate": 4.2804694880933255e-06, "loss": 0.3133, "step": 12373 }, { "epoch": 1.7743045597935188, "grad_norm": 0.2858419418334961, "learning_rate": 4.279643938362819e-06, "loss": 0.315, "step": 12374 }, { "epoch": 1.774447949526814, "grad_norm": 0.28794121742248535, "learning_rate": 4.278818408685921e-06, "loss": 0.2786, "step": 12375 }, { "epoch": 1.774591339260109, "grad_norm": 0.29642441868782043, "learning_rate": 4.277992899085606e-06, "loss": 0.3092, "step": 12376 }, { "epoch": 1.7747347289934041, "grad_norm": 0.26597902178764343, "learning_rate": 4.277167409584858e-06, "loss": 0.3202, "step": 12377 }, { "epoch": 1.7748781187266993, "grad_norm": 0.28329208493232727, "learning_rate": 4.2763419402066575e-06, "loss": 0.3161, "step": 12378 }, { "epoch": 1.7750215084599943, "grad_norm": 0.28634917736053467, "learning_rate": 4.275516490973984e-06, "loss": 0.2982, "step": 12379 }, { "epoch": 1.7751648981932893, "grad_norm": 0.2804948091506958, "learning_rate": 4.27469106190982e-06, "loss": 0.2978, "step": 12380 }, { "epoch": 1.7753082879265845, "grad_norm": 0.27583712339401245, "learning_rate": 4.273865653037138e-06, "loss": 0.315, "step": 12381 }, { "epoch": 1.7754516776598797, "grad_norm": 0.2790280878543854, "learning_rate": 4.27304026437892e-06, "loss": 0.2938, "step": 12382 }, { "epoch": 1.7755950673931746, "grad_norm": 0.27504318952560425, "learning_rate": 4.272214895958142e-06, "loss": 0.3012, "step": 12383 }, { "epoch": 1.7757384571264696, "grad_norm": 0.28355899453163147, "learning_rate": 4.271389547797783e-06, "loss": 0.2909, "step": 12384 }, { "epoch": 1.7758818468597648, "grad_norm": 0.2858531177043915, "learning_rate": 4.270564219920817e-06, "loss": 0.3096, "step": 12385 }, { "epoch": 1.77602523659306, "grad_norm": 0.2890618145465851, "learning_rate": 4.269738912350222e-06, "loss": 0.3124, "step": 12386 }, { "epoch": 1.776168626326355, "grad_norm": 0.2891257703304291, "learning_rate": 4.268913625108972e-06, "loss": 0.3006, "step": 12387 }, { "epoch": 1.7763120160596502, "grad_norm": 0.30912157893180847, "learning_rate": 4.268088358220042e-06, "loss": 0.2966, "step": 12388 }, { "epoch": 1.7764554057929454, "grad_norm": 0.2739367187023163, "learning_rate": 4.267263111706406e-06, "loss": 0.3006, "step": 12389 }, { "epoch": 1.7765987955262403, "grad_norm": 0.2782417833805084, "learning_rate": 4.266437885591038e-06, "loss": 0.3013, "step": 12390 }, { "epoch": 1.7767421852595353, "grad_norm": 0.31829240918159485, "learning_rate": 4.265612679896912e-06, "loss": 0.3046, "step": 12391 }, { "epoch": 1.7768855749928305, "grad_norm": 0.27834081649780273, "learning_rate": 4.264787494646999e-06, "loss": 0.3045, "step": 12392 }, { "epoch": 1.7770289647261257, "grad_norm": 0.27135926485061646, "learning_rate": 4.2639623298642694e-06, "loss": 0.2869, "step": 12393 }, { "epoch": 1.7771723544594207, "grad_norm": 0.255583792924881, "learning_rate": 4.263137185571698e-06, "loss": 0.3173, "step": 12394 }, { "epoch": 1.7773157441927157, "grad_norm": 0.25254711508750916, "learning_rate": 4.262312061792252e-06, "loss": 0.3018, "step": 12395 }, { "epoch": 1.7774591339260108, "grad_norm": 0.26888778805732727, "learning_rate": 4.2614869585489046e-06, "loss": 0.3062, "step": 12396 }, { "epoch": 1.777602523659306, "grad_norm": 0.29217594861984253, "learning_rate": 4.260661875864625e-06, "loss": 0.3225, "step": 12397 }, { "epoch": 1.777745913392601, "grad_norm": 0.26830774545669556, "learning_rate": 4.259836813762381e-06, "loss": 0.3022, "step": 12398 }, { "epoch": 1.7778893031258962, "grad_norm": 0.25857338309288025, "learning_rate": 4.25901177226514e-06, "loss": 0.3068, "step": 12399 }, { "epoch": 1.7780326928591914, "grad_norm": 0.28108522295951843, "learning_rate": 4.258186751395873e-06, "loss": 0.2924, "step": 12400 }, { "epoch": 1.7781760825924864, "grad_norm": 0.2775627076625824, "learning_rate": 4.257361751177545e-06, "loss": 0.2958, "step": 12401 }, { "epoch": 1.7783194723257814, "grad_norm": 0.29064202308654785, "learning_rate": 4.256536771633126e-06, "loss": 0.2993, "step": 12402 }, { "epoch": 1.7784628620590766, "grad_norm": 0.2868717610836029, "learning_rate": 4.255711812785577e-06, "loss": 0.2851, "step": 12403 }, { "epoch": 1.7786062517923718, "grad_norm": 0.26709988713264465, "learning_rate": 4.254886874657866e-06, "loss": 0.288, "step": 12404 }, { "epoch": 1.7787496415256667, "grad_norm": 0.2752986252307892, "learning_rate": 4.254061957272958e-06, "loss": 0.2923, "step": 12405 }, { "epoch": 1.7788930312589617, "grad_norm": 0.27570387721061707, "learning_rate": 4.2532370606538175e-06, "loss": 0.3121, "step": 12406 }, { "epoch": 1.7790364209922571, "grad_norm": 0.3092408776283264, "learning_rate": 4.252412184823411e-06, "loss": 0.2943, "step": 12407 }, { "epoch": 1.779179810725552, "grad_norm": 0.300062894821167, "learning_rate": 4.2515873298046976e-06, "loss": 0.3012, "step": 12408 }, { "epoch": 1.779323200458847, "grad_norm": 0.2847423851490021, "learning_rate": 4.250762495620641e-06, "loss": 0.3095, "step": 12409 }, { "epoch": 1.7794665901921423, "grad_norm": 0.27869564294815063, "learning_rate": 4.249937682294205e-06, "loss": 0.3053, "step": 12410 }, { "epoch": 1.7796099799254375, "grad_norm": 0.27261707186698914, "learning_rate": 4.24911288984835e-06, "loss": 0.2996, "step": 12411 }, { "epoch": 1.7797533696587324, "grad_norm": 0.2848823368549347, "learning_rate": 4.248288118306036e-06, "loss": 0.3052, "step": 12412 }, { "epoch": 1.7798967593920274, "grad_norm": 0.2845960259437561, "learning_rate": 4.247463367690227e-06, "loss": 0.2911, "step": 12413 }, { "epoch": 1.7800401491253226, "grad_norm": 0.2906392216682434, "learning_rate": 4.246638638023879e-06, "loss": 0.2937, "step": 12414 }, { "epoch": 1.7801835388586178, "grad_norm": 0.2627737522125244, "learning_rate": 4.245813929329951e-06, "loss": 0.3011, "step": 12415 }, { "epoch": 1.7803269285919128, "grad_norm": 0.27553123235702515, "learning_rate": 4.244989241631404e-06, "loss": 0.3021, "step": 12416 }, { "epoch": 1.780470318325208, "grad_norm": 0.29333996772766113, "learning_rate": 4.244164574951195e-06, "loss": 0.3054, "step": 12417 }, { "epoch": 1.7806137080585032, "grad_norm": 0.28925761580467224, "learning_rate": 4.243339929312283e-06, "loss": 0.3036, "step": 12418 }, { "epoch": 1.7807570977917981, "grad_norm": 0.29254233837127686, "learning_rate": 4.242515304737622e-06, "loss": 0.3069, "step": 12419 }, { "epoch": 1.7809004875250931, "grad_norm": 0.28460854291915894, "learning_rate": 4.24169070125017e-06, "loss": 0.3075, "step": 12420 }, { "epoch": 1.7810438772583883, "grad_norm": 0.2825569808483124, "learning_rate": 4.2408661188728815e-06, "loss": 0.2747, "step": 12421 }, { "epoch": 1.7811872669916835, "grad_norm": 0.3068403899669647, "learning_rate": 4.240041557628713e-06, "loss": 0.2919, "step": 12422 }, { "epoch": 1.7813306567249785, "grad_norm": 0.298344224691391, "learning_rate": 4.239217017540618e-06, "loss": 0.2945, "step": 12423 }, { "epoch": 1.7814740464582735, "grad_norm": 0.29783493280410767, "learning_rate": 4.2383924986315525e-06, "loss": 0.2963, "step": 12424 }, { "epoch": 1.7816174361915686, "grad_norm": 0.2804144620895386, "learning_rate": 4.237568000924468e-06, "loss": 0.2994, "step": 12425 }, { "epoch": 1.7817608259248638, "grad_norm": 0.26931247115135193, "learning_rate": 4.236743524442316e-06, "loss": 0.314, "step": 12426 }, { "epoch": 1.7819042156581588, "grad_norm": 0.27582067251205444, "learning_rate": 4.235919069208051e-06, "loss": 0.329, "step": 12427 }, { "epoch": 1.782047605391454, "grad_norm": 0.2934539318084717, "learning_rate": 4.2350946352446236e-06, "loss": 0.3032, "step": 12428 }, { "epoch": 1.7821909951247492, "grad_norm": 0.28461578488349915, "learning_rate": 4.234270222574986e-06, "loss": 0.2863, "step": 12429 }, { "epoch": 1.7823343848580442, "grad_norm": 0.26314577460289, "learning_rate": 4.233445831222087e-06, "loss": 0.3042, "step": 12430 }, { "epoch": 1.7824777745913392, "grad_norm": 0.2719612419605255, "learning_rate": 4.232621461208876e-06, "loss": 0.2935, "step": 12431 }, { "epoch": 1.7826211643246344, "grad_norm": 0.275873601436615, "learning_rate": 4.2317971125583035e-06, "loss": 0.2819, "step": 12432 }, { "epoch": 1.7827645540579296, "grad_norm": 0.2827873229980469, "learning_rate": 4.230972785293317e-06, "loss": 0.3269, "step": 12433 }, { "epoch": 1.7829079437912245, "grad_norm": 0.2634034752845764, "learning_rate": 4.230148479436867e-06, "loss": 0.3143, "step": 12434 }, { "epoch": 1.7830513335245195, "grad_norm": 0.2787180244922638, "learning_rate": 4.2293241950118975e-06, "loss": 0.3071, "step": 12435 }, { "epoch": 1.7831947232578147, "grad_norm": 0.2589346468448639, "learning_rate": 4.228499932041357e-06, "loss": 0.3093, "step": 12436 }, { "epoch": 1.78333811299111, "grad_norm": 0.2616075873374939, "learning_rate": 4.227675690548192e-06, "loss": 0.3174, "step": 12437 }, { "epoch": 1.7834815027244049, "grad_norm": 0.27884161472320557, "learning_rate": 4.2268514705553475e-06, "loss": 0.3045, "step": 12438 }, { "epoch": 1.7836248924577, "grad_norm": 0.28186237812042236, "learning_rate": 4.2260272720857685e-06, "loss": 0.2875, "step": 12439 }, { "epoch": 1.7837682821909953, "grad_norm": 0.26180723309516907, "learning_rate": 4.2252030951624005e-06, "loss": 0.2903, "step": 12440 }, { "epoch": 1.7839116719242902, "grad_norm": 0.2707076072692871, "learning_rate": 4.224378939808186e-06, "loss": 0.3079, "step": 12441 }, { "epoch": 1.7840550616575852, "grad_norm": 0.26858314871788025, "learning_rate": 4.223554806046069e-06, "loss": 0.3286, "step": 12442 }, { "epoch": 1.7841984513908804, "grad_norm": 0.2668752074241638, "learning_rate": 4.2227306938989905e-06, "loss": 0.3138, "step": 12443 }, { "epoch": 1.7843418411241756, "grad_norm": 0.2622552514076233, "learning_rate": 4.221906603389895e-06, "loss": 0.2973, "step": 12444 }, { "epoch": 1.7844852308574706, "grad_norm": 0.2831135392189026, "learning_rate": 4.221082534541724e-06, "loss": 0.3105, "step": 12445 }, { "epoch": 1.7846286205907655, "grad_norm": 0.2820178270339966, "learning_rate": 4.220258487377416e-06, "loss": 0.2918, "step": 12446 }, { "epoch": 1.7847720103240607, "grad_norm": 0.29545068740844727, "learning_rate": 4.219434461919911e-06, "loss": 0.2906, "step": 12447 }, { "epoch": 1.784915400057356, "grad_norm": 0.2622465193271637, "learning_rate": 4.2186104581921505e-06, "loss": 0.2951, "step": 12448 }, { "epoch": 1.785058789790651, "grad_norm": 0.27751702070236206, "learning_rate": 4.217786476217073e-06, "loss": 0.3012, "step": 12449 }, { "epoch": 1.785202179523946, "grad_norm": 0.2740273177623749, "learning_rate": 4.216962516017616e-06, "loss": 0.319, "step": 12450 }, { "epoch": 1.7853455692572413, "grad_norm": 0.26685601472854614, "learning_rate": 4.2161385776167194e-06, "loss": 0.3119, "step": 12451 }, { "epoch": 1.7854889589905363, "grad_norm": 0.2790782153606415, "learning_rate": 4.215314661037318e-06, "loss": 0.2967, "step": 12452 }, { "epoch": 1.7856323487238313, "grad_norm": 0.26804959774017334, "learning_rate": 4.21449076630235e-06, "loss": 0.3092, "step": 12453 }, { "epoch": 1.7857757384571264, "grad_norm": 0.29660946130752563, "learning_rate": 4.21366689343475e-06, "loss": 0.3067, "step": 12454 }, { "epoch": 1.7859191281904216, "grad_norm": 0.2761249542236328, "learning_rate": 4.212843042457454e-06, "loss": 0.3, "step": 12455 }, { "epoch": 1.7860625179237166, "grad_norm": 0.2554258704185486, "learning_rate": 4.2120192133934e-06, "loss": 0.3077, "step": 12456 }, { "epoch": 1.7862059076570116, "grad_norm": 0.2885743975639343, "learning_rate": 4.211195406265516e-06, "loss": 0.3317, "step": 12457 }, { "epoch": 1.786349297390307, "grad_norm": 0.2985903024673462, "learning_rate": 4.210371621096739e-06, "loss": 0.3075, "step": 12458 }, { "epoch": 1.786492687123602, "grad_norm": 0.27247878909111023, "learning_rate": 4.20954785791e-06, "loss": 0.2873, "step": 12459 }, { "epoch": 1.786636076856897, "grad_norm": 0.3210727870464325, "learning_rate": 4.208724116728233e-06, "loss": 0.3278, "step": 12460 }, { "epoch": 1.7867794665901922, "grad_norm": 0.2917569875717163, "learning_rate": 4.207900397574372e-06, "loss": 0.3107, "step": 12461 }, { "epoch": 1.7869228563234874, "grad_norm": 0.2801622152328491, "learning_rate": 4.207076700471343e-06, "loss": 0.2892, "step": 12462 }, { "epoch": 1.7870662460567823, "grad_norm": 0.26646721363067627, "learning_rate": 4.20625302544208e-06, "loss": 0.3135, "step": 12463 }, { "epoch": 1.7872096357900773, "grad_norm": 0.28557485342025757, "learning_rate": 4.2054293725095105e-06, "loss": 0.3106, "step": 12464 }, { "epoch": 1.7873530255233725, "grad_norm": 0.2918827533721924, "learning_rate": 4.204605741696566e-06, "loss": 0.2888, "step": 12465 }, { "epoch": 1.7874964152566677, "grad_norm": 0.30384138226509094, "learning_rate": 4.203782133026174e-06, "loss": 0.3161, "step": 12466 }, { "epoch": 1.7876398049899627, "grad_norm": 0.2889045178890228, "learning_rate": 4.202958546521263e-06, "loss": 0.2901, "step": 12467 }, { "epoch": 1.7877831947232579, "grad_norm": 0.25700104236602783, "learning_rate": 4.202134982204759e-06, "loss": 0.2943, "step": 12468 }, { "epoch": 1.787926584456553, "grad_norm": 0.2962987720966339, "learning_rate": 4.201311440099591e-06, "loss": 0.3014, "step": 12469 }, { "epoch": 1.788069974189848, "grad_norm": 0.28436923027038574, "learning_rate": 4.200487920228682e-06, "loss": 0.3058, "step": 12470 }, { "epoch": 1.788213363923143, "grad_norm": 0.262209951877594, "learning_rate": 4.199664422614961e-06, "loss": 0.292, "step": 12471 }, { "epoch": 1.7883567536564382, "grad_norm": 0.268849641084671, "learning_rate": 4.198840947281352e-06, "loss": 0.2986, "step": 12472 }, { "epoch": 1.7885001433897334, "grad_norm": 0.2726248502731323, "learning_rate": 4.198017494250778e-06, "loss": 0.2936, "step": 12473 }, { "epoch": 1.7886435331230284, "grad_norm": 0.2702620029449463, "learning_rate": 4.197194063546162e-06, "loss": 0.2871, "step": 12474 }, { "epoch": 1.7887869228563233, "grad_norm": 0.27243945002555847, "learning_rate": 4.196370655190429e-06, "loss": 0.3145, "step": 12475 }, { "epoch": 1.7889303125896185, "grad_norm": 0.28377947211265564, "learning_rate": 4.195547269206501e-06, "loss": 0.307, "step": 12476 }, { "epoch": 1.7890737023229137, "grad_norm": 0.27153462171554565, "learning_rate": 4.194723905617299e-06, "loss": 0.3098, "step": 12477 }, { "epoch": 1.7892170920562087, "grad_norm": 0.2823118567466736, "learning_rate": 4.193900564445746e-06, "loss": 0.303, "step": 12478 }, { "epoch": 1.789360481789504, "grad_norm": 0.2443789690732956, "learning_rate": 4.19307724571476e-06, "loss": 0.2898, "step": 12479 }, { "epoch": 1.789503871522799, "grad_norm": 0.2510945498943329, "learning_rate": 4.192253949447262e-06, "loss": 0.2809, "step": 12480 }, { "epoch": 1.789647261256094, "grad_norm": 0.28204768896102905, "learning_rate": 4.1914306756661726e-06, "loss": 0.3002, "step": 12481 }, { "epoch": 1.789790650989389, "grad_norm": 0.2622280418872833, "learning_rate": 4.190607424394408e-06, "loss": 0.3134, "step": 12482 }, { "epoch": 1.7899340407226842, "grad_norm": 0.2578722834587097, "learning_rate": 4.189784195654892e-06, "loss": 0.3071, "step": 12483 }, { "epoch": 1.7900774304559794, "grad_norm": 0.2644332945346832, "learning_rate": 4.188960989470533e-06, "loss": 0.2942, "step": 12484 }, { "epoch": 1.7902208201892744, "grad_norm": 0.2628699839115143, "learning_rate": 4.1881378058642535e-06, "loss": 0.2935, "step": 12485 }, { "epoch": 1.7903642099225694, "grad_norm": 0.26112648844718933, "learning_rate": 4.187314644858968e-06, "loss": 0.3028, "step": 12486 }, { "epoch": 1.7905075996558646, "grad_norm": 0.2710828483104706, "learning_rate": 4.1864915064775936e-06, "loss": 0.3204, "step": 12487 }, { "epoch": 1.7906509893891598, "grad_norm": 0.27038195729255676, "learning_rate": 4.185668390743045e-06, "loss": 0.2865, "step": 12488 }, { "epoch": 1.7907943791224548, "grad_norm": 0.26504266262054443, "learning_rate": 4.184845297678234e-06, "loss": 0.3102, "step": 12489 }, { "epoch": 1.79093776885575, "grad_norm": 0.26577579975128174, "learning_rate": 4.184022227306077e-06, "loss": 0.3083, "step": 12490 }, { "epoch": 1.7910811585890452, "grad_norm": 0.2909630239009857, "learning_rate": 4.183199179649485e-06, "loss": 0.3219, "step": 12491 }, { "epoch": 1.7912245483223401, "grad_norm": 0.24592143297195435, "learning_rate": 4.182376154731373e-06, "loss": 0.3079, "step": 12492 }, { "epoch": 1.791367938055635, "grad_norm": 0.28479140996932983, "learning_rate": 4.1815531525746485e-06, "loss": 0.3248, "step": 12493 }, { "epoch": 1.7915113277889303, "grad_norm": 0.2715193033218384, "learning_rate": 4.180730173202228e-06, "loss": 0.3007, "step": 12494 }, { "epoch": 1.7916547175222255, "grad_norm": 0.2681690752506256, "learning_rate": 4.179907216637018e-06, "loss": 0.3012, "step": 12495 }, { "epoch": 1.7917981072555205, "grad_norm": 0.28131139278411865, "learning_rate": 4.17908428290193e-06, "loss": 0.304, "step": 12496 }, { "epoch": 1.7919414969888154, "grad_norm": 0.27668413519859314, "learning_rate": 4.178261372019872e-06, "loss": 0.3185, "step": 12497 }, { "epoch": 1.7920848867221109, "grad_norm": 0.2951943874359131, "learning_rate": 4.1774384840137536e-06, "loss": 0.2964, "step": 12498 }, { "epoch": 1.7922282764554058, "grad_norm": 0.30867740511894226, "learning_rate": 4.176615618906484e-06, "loss": 0.3089, "step": 12499 }, { "epoch": 1.7923716661887008, "grad_norm": 0.2681090235710144, "learning_rate": 4.175792776720968e-06, "loss": 0.2999, "step": 12500 }, { "epoch": 1.792515055921996, "grad_norm": 0.26121771335601807, "learning_rate": 4.174969957480112e-06, "loss": 0.3069, "step": 12501 }, { "epoch": 1.7926584456552912, "grad_norm": 0.28305092453956604, "learning_rate": 4.174147161206824e-06, "loss": 0.3007, "step": 12502 }, { "epoch": 1.7928018353885862, "grad_norm": 0.2633973956108093, "learning_rate": 4.173324387924008e-06, "loss": 0.3036, "step": 12503 }, { "epoch": 1.7929452251218811, "grad_norm": 0.2631953954696655, "learning_rate": 4.17250163765457e-06, "loss": 0.296, "step": 12504 }, { "epoch": 1.7930886148551763, "grad_norm": 0.2933470606803894, "learning_rate": 4.171678910421414e-06, "loss": 0.3067, "step": 12505 }, { "epoch": 1.7932320045884715, "grad_norm": 0.31562110781669617, "learning_rate": 4.170856206247443e-06, "loss": 0.3172, "step": 12506 }, { "epoch": 1.7933753943217665, "grad_norm": 0.27621546387672424, "learning_rate": 4.170033525155559e-06, "loss": 0.313, "step": 12507 }, { "epoch": 1.7935187840550617, "grad_norm": 0.2783682346343994, "learning_rate": 4.169210867168665e-06, "loss": 0.3032, "step": 12508 }, { "epoch": 1.793662173788357, "grad_norm": 0.25639575719833374, "learning_rate": 4.168388232309663e-06, "loss": 0.3035, "step": 12509 }, { "epoch": 1.7938055635216519, "grad_norm": 0.2906784415245056, "learning_rate": 4.167565620601454e-06, "loss": 0.3057, "step": 12510 }, { "epoch": 1.7939489532549469, "grad_norm": 0.2878653407096863, "learning_rate": 4.1667430320669364e-06, "loss": 0.3232, "step": 12511 }, { "epoch": 1.794092342988242, "grad_norm": 0.26493480801582336, "learning_rate": 4.165920466729011e-06, "loss": 0.2897, "step": 12512 }, { "epoch": 1.7942357327215372, "grad_norm": 0.2858506441116333, "learning_rate": 4.165097924610577e-06, "loss": 0.3021, "step": 12513 }, { "epoch": 1.7943791224548322, "grad_norm": 0.2659570276737213, "learning_rate": 4.164275405734532e-06, "loss": 0.3013, "step": 12514 }, { "epoch": 1.7945225121881272, "grad_norm": 0.28167471289634705, "learning_rate": 4.163452910123775e-06, "loss": 0.3111, "step": 12515 }, { "epoch": 1.7946659019214224, "grad_norm": 0.27206987142562866, "learning_rate": 4.162630437801202e-06, "loss": 0.3089, "step": 12516 }, { "epoch": 1.7948092916547176, "grad_norm": 0.2526441812515259, "learning_rate": 4.161807988789709e-06, "loss": 0.2883, "step": 12517 }, { "epoch": 1.7949526813880126, "grad_norm": 0.2697303593158722, "learning_rate": 4.160985563112191e-06, "loss": 0.2955, "step": 12518 }, { "epoch": 1.7950960711213078, "grad_norm": 0.2731567323207855, "learning_rate": 4.160163160791546e-06, "loss": 0.3124, "step": 12519 }, { "epoch": 1.795239460854603, "grad_norm": 0.2703274190425873, "learning_rate": 4.159340781850666e-06, "loss": 0.2944, "step": 12520 }, { "epoch": 1.795382850587898, "grad_norm": 0.26905637979507446, "learning_rate": 4.158518426312448e-06, "loss": 0.3002, "step": 12521 }, { "epoch": 1.795526240321193, "grad_norm": 0.26195159554481506, "learning_rate": 4.15769609419978e-06, "loss": 0.2946, "step": 12522 }, { "epoch": 1.795669630054488, "grad_norm": 0.2859434187412262, "learning_rate": 4.156873785535556e-06, "loss": 0.3022, "step": 12523 }, { "epoch": 1.7958130197877833, "grad_norm": 0.2592528164386749, "learning_rate": 4.15605150034267e-06, "loss": 0.294, "step": 12524 }, { "epoch": 1.7959564095210783, "grad_norm": 0.27863553166389465, "learning_rate": 4.155229238644012e-06, "loss": 0.2989, "step": 12525 }, { "epoch": 1.7960997992543732, "grad_norm": 0.2790659964084625, "learning_rate": 4.1544070004624744e-06, "loss": 0.2972, "step": 12526 }, { "epoch": 1.7962431889876684, "grad_norm": 0.27755436301231384, "learning_rate": 4.153584785820943e-06, "loss": 0.3186, "step": 12527 }, { "epoch": 1.7963865787209636, "grad_norm": 0.25802895426750183, "learning_rate": 4.15276259474231e-06, "loss": 0.3017, "step": 12528 }, { "epoch": 1.7965299684542586, "grad_norm": 0.29594022035598755, "learning_rate": 4.1519404272494624e-06, "loss": 0.3126, "step": 12529 }, { "epoch": 1.7966733581875538, "grad_norm": 0.2798534035682678, "learning_rate": 4.151118283365288e-06, "loss": 0.3092, "step": 12530 }, { "epoch": 1.796816747920849, "grad_norm": 0.2797037363052368, "learning_rate": 4.150296163112676e-06, "loss": 0.3127, "step": 12531 }, { "epoch": 1.796960137654144, "grad_norm": 0.2818979322910309, "learning_rate": 4.149474066514512e-06, "loss": 0.2968, "step": 12532 }, { "epoch": 1.797103527387439, "grad_norm": 0.2823482155799866, "learning_rate": 4.148651993593682e-06, "loss": 0.3014, "step": 12533 }, { "epoch": 1.7972469171207341, "grad_norm": 0.29849082231521606, "learning_rate": 4.147829944373069e-06, "loss": 0.2994, "step": 12534 }, { "epoch": 1.7973903068540293, "grad_norm": 0.30021679401397705, "learning_rate": 4.147007918875561e-06, "loss": 0.3086, "step": 12535 }, { "epoch": 1.7975336965873243, "grad_norm": 0.263035386800766, "learning_rate": 4.1461859171240385e-06, "loss": 0.295, "step": 12536 }, { "epoch": 1.7976770863206193, "grad_norm": 0.28209301829338074, "learning_rate": 4.14536393914139e-06, "loss": 0.2948, "step": 12537 }, { "epoch": 1.7978204760539145, "grad_norm": 0.254803866147995, "learning_rate": 4.1445419849504935e-06, "loss": 0.2959, "step": 12538 }, { "epoch": 1.7979638657872097, "grad_norm": 0.2869901955127716, "learning_rate": 4.143720054574231e-06, "loss": 0.3317, "step": 12539 }, { "epoch": 1.7981072555205047, "grad_norm": 0.28213125467300415, "learning_rate": 4.142898148035485e-06, "loss": 0.2991, "step": 12540 }, { "epoch": 1.7982506452537999, "grad_norm": 0.26390793919563293, "learning_rate": 4.1420762653571365e-06, "loss": 0.3367, "step": 12541 }, { "epoch": 1.798394034987095, "grad_norm": 0.2757679522037506, "learning_rate": 4.141254406562066e-06, "loss": 0.3052, "step": 12542 }, { "epoch": 1.79853742472039, "grad_norm": 0.261086106300354, "learning_rate": 4.1404325716731505e-06, "loss": 0.302, "step": 12543 }, { "epoch": 1.798680814453685, "grad_norm": 0.26440712809562683, "learning_rate": 4.13961076071327e-06, "loss": 0.3222, "step": 12544 }, { "epoch": 1.7988242041869802, "grad_norm": 0.29516759514808655, "learning_rate": 4.138788973705302e-06, "loss": 0.3199, "step": 12545 }, { "epoch": 1.7989675939202754, "grad_norm": 0.2962626516819, "learning_rate": 4.137967210672124e-06, "loss": 0.3079, "step": 12546 }, { "epoch": 1.7991109836535704, "grad_norm": 0.25393393635749817, "learning_rate": 4.137145471636615e-06, "loss": 0.2963, "step": 12547 }, { "epoch": 1.7992543733868653, "grad_norm": 0.26115092635154724, "learning_rate": 4.136323756621649e-06, "loss": 0.2974, "step": 12548 }, { "epoch": 1.7993977631201608, "grad_norm": 0.26714441180229187, "learning_rate": 4.135502065650099e-06, "loss": 0.2948, "step": 12549 }, { "epoch": 1.7995411528534557, "grad_norm": 0.2753821909427643, "learning_rate": 4.134680398744842e-06, "loss": 0.3037, "step": 12550 }, { "epoch": 1.7996845425867507, "grad_norm": 0.26700666546821594, "learning_rate": 4.13385875592875e-06, "loss": 0.3088, "step": 12551 }, { "epoch": 1.799827932320046, "grad_norm": 0.26572030782699585, "learning_rate": 4.1330371372247e-06, "loss": 0.2874, "step": 12552 }, { "epoch": 1.799971322053341, "grad_norm": 0.26775068044662476, "learning_rate": 4.1322155426555635e-06, "loss": 0.2946, "step": 12553 }, { "epoch": 1.800114711786636, "grad_norm": 0.2873550355434418, "learning_rate": 4.1313939722442094e-06, "loss": 0.301, "step": 12554 }, { "epoch": 1.800258101519931, "grad_norm": 0.2714047133922577, "learning_rate": 4.1305724260135115e-06, "loss": 0.3031, "step": 12555 }, { "epoch": 1.8004014912532262, "grad_norm": 0.27136850357055664, "learning_rate": 4.1297509039863395e-06, "loss": 0.3131, "step": 12556 }, { "epoch": 1.8005448809865214, "grad_norm": 0.28226879239082336, "learning_rate": 4.128929406185564e-06, "loss": 0.2822, "step": 12557 }, { "epoch": 1.8006882707198164, "grad_norm": 0.2731815576553345, "learning_rate": 4.128107932634054e-06, "loss": 0.3045, "step": 12558 }, { "epoch": 1.8008316604531116, "grad_norm": 0.2762727737426758, "learning_rate": 4.127286483354679e-06, "loss": 0.3025, "step": 12559 }, { "epoch": 1.8009750501864068, "grad_norm": 0.2783254384994507, "learning_rate": 4.126465058370304e-06, "loss": 0.3127, "step": 12560 }, { "epoch": 1.8011184399197018, "grad_norm": 0.2725997269153595, "learning_rate": 4.125643657703798e-06, "loss": 0.317, "step": 12561 }, { "epoch": 1.8012618296529967, "grad_norm": 0.2736687958240509, "learning_rate": 4.124822281378028e-06, "loss": 0.2929, "step": 12562 }, { "epoch": 1.801405219386292, "grad_norm": 0.2683769166469574, "learning_rate": 4.124000929415859e-06, "loss": 0.2926, "step": 12563 }, { "epoch": 1.8015486091195871, "grad_norm": 0.27396920323371887, "learning_rate": 4.123179601840159e-06, "loss": 0.2933, "step": 12564 }, { "epoch": 1.8016919988528821, "grad_norm": 0.2889441251754761, "learning_rate": 4.122358298673788e-06, "loss": 0.3069, "step": 12565 }, { "epoch": 1.801835388586177, "grad_norm": 0.26448556780815125, "learning_rate": 4.121537019939612e-06, "loss": 0.3093, "step": 12566 }, { "epoch": 1.8019787783194723, "grad_norm": 0.2682490646839142, "learning_rate": 4.120715765660495e-06, "loss": 0.3078, "step": 12567 }, { "epoch": 1.8021221680527675, "grad_norm": 0.27983132004737854, "learning_rate": 4.119894535859297e-06, "loss": 0.3115, "step": 12568 }, { "epoch": 1.8022655577860625, "grad_norm": 0.2812883257865906, "learning_rate": 4.119073330558882e-06, "loss": 0.2976, "step": 12569 }, { "epoch": 1.8024089475193577, "grad_norm": 0.2877756357192993, "learning_rate": 4.11825214978211e-06, "loss": 0.313, "step": 12570 }, { "epoch": 1.8025523372526528, "grad_norm": 0.26164254546165466, "learning_rate": 4.117430993551843e-06, "loss": 0.3013, "step": 12571 }, { "epoch": 1.8026957269859478, "grad_norm": 0.2650739848613739, "learning_rate": 4.116609861890938e-06, "loss": 0.3023, "step": 12572 }, { "epoch": 1.8028391167192428, "grad_norm": 0.28909963369369507, "learning_rate": 4.115788754822256e-06, "loss": 0.2928, "step": 12573 }, { "epoch": 1.802982506452538, "grad_norm": 0.2618793249130249, "learning_rate": 4.114967672368654e-06, "loss": 0.3157, "step": 12574 }, { "epoch": 1.8031258961858332, "grad_norm": 0.2836885452270508, "learning_rate": 4.114146614552994e-06, "loss": 0.2866, "step": 12575 }, { "epoch": 1.8032692859191282, "grad_norm": 0.279080867767334, "learning_rate": 4.113325581398126e-06, "loss": 0.3022, "step": 12576 }, { "epoch": 1.8034126756524231, "grad_norm": 0.2656572461128235, "learning_rate": 4.112504572926912e-06, "loss": 0.3094, "step": 12577 }, { "epoch": 1.8035560653857183, "grad_norm": 0.28600189089775085, "learning_rate": 4.111683589162204e-06, "loss": 0.319, "step": 12578 }, { "epoch": 1.8036994551190135, "grad_norm": 0.26757219433784485, "learning_rate": 4.110862630126858e-06, "loss": 0.3135, "step": 12579 }, { "epoch": 1.8038428448523085, "grad_norm": 0.31136828660964966, "learning_rate": 4.110041695843731e-06, "loss": 0.3194, "step": 12580 }, { "epoch": 1.8039862345856037, "grad_norm": 0.28702786564826965, "learning_rate": 4.109220786335672e-06, "loss": 0.2888, "step": 12581 }, { "epoch": 1.804129624318899, "grad_norm": 0.2637832760810852, "learning_rate": 4.108399901625536e-06, "loss": 0.3136, "step": 12582 }, { "epoch": 1.8042730140521939, "grad_norm": 0.2844628691673279, "learning_rate": 4.107579041736176e-06, "loss": 0.2914, "step": 12583 }, { "epoch": 1.8044164037854888, "grad_norm": 0.2624465525150299, "learning_rate": 4.106758206690442e-06, "loss": 0.2911, "step": 12584 }, { "epoch": 1.804559793518784, "grad_norm": 0.27494001388549805, "learning_rate": 4.105937396511186e-06, "loss": 0.3159, "step": 12585 }, { "epoch": 1.8047031832520792, "grad_norm": 0.28191712498664856, "learning_rate": 4.105116611221258e-06, "loss": 0.3119, "step": 12586 }, { "epoch": 1.8048465729853742, "grad_norm": 0.2860889732837677, "learning_rate": 4.1042958508435065e-06, "loss": 0.3012, "step": 12587 }, { "epoch": 1.8049899627186692, "grad_norm": 0.2819702625274658, "learning_rate": 4.103475115400781e-06, "loss": 0.2972, "step": 12588 }, { "epoch": 1.8051333524519646, "grad_norm": 0.28798598051071167, "learning_rate": 4.102654404915928e-06, "loss": 0.3296, "step": 12589 }, { "epoch": 1.8052767421852596, "grad_norm": 0.2815008759498596, "learning_rate": 4.101833719411797e-06, "loss": 0.3084, "step": 12590 }, { "epoch": 1.8054201319185545, "grad_norm": 0.26812127232551575, "learning_rate": 4.1010130589112354e-06, "loss": 0.2831, "step": 12591 }, { "epoch": 1.8055635216518497, "grad_norm": 0.2616496682167053, "learning_rate": 4.1001924234370845e-06, "loss": 0.2921, "step": 12592 }, { "epoch": 1.805706911385145, "grad_norm": 0.29042962193489075, "learning_rate": 4.099371813012193e-06, "loss": 0.3106, "step": 12593 }, { "epoch": 1.80585030111844, "grad_norm": 0.29924044013023376, "learning_rate": 4.098551227659405e-06, "loss": 0.3134, "step": 12594 }, { "epoch": 1.805993690851735, "grad_norm": 0.26551929116249084, "learning_rate": 4.097730667401564e-06, "loss": 0.3087, "step": 12595 }, { "epoch": 1.80613708058503, "grad_norm": 0.2905448377132416, "learning_rate": 4.096910132261514e-06, "loss": 0.308, "step": 12596 }, { "epoch": 1.8062804703183253, "grad_norm": 0.26898032426834106, "learning_rate": 4.096089622262097e-06, "loss": 0.3038, "step": 12597 }, { "epoch": 1.8064238600516203, "grad_norm": 0.28220176696777344, "learning_rate": 4.095269137426153e-06, "loss": 0.2968, "step": 12598 }, { "epoch": 1.8065672497849155, "grad_norm": 0.28835225105285645, "learning_rate": 4.094448677776525e-06, "loss": 0.3044, "step": 12599 }, { "epoch": 1.8067106395182106, "grad_norm": 0.25749990344047546, "learning_rate": 4.093628243336053e-06, "loss": 0.3059, "step": 12600 }, { "epoch": 1.8068540292515056, "grad_norm": 0.2830556035041809, "learning_rate": 4.092807834127575e-06, "loss": 0.3001, "step": 12601 }, { "epoch": 1.8069974189848006, "grad_norm": 0.27981510758399963, "learning_rate": 4.091987450173936e-06, "loss": 0.3186, "step": 12602 }, { "epoch": 1.8071408087180958, "grad_norm": 0.2812577486038208, "learning_rate": 4.091167091497965e-06, "loss": 0.3282, "step": 12603 }, { "epoch": 1.807284198451391, "grad_norm": 0.2761622965335846, "learning_rate": 4.0903467581225056e-06, "loss": 0.312, "step": 12604 }, { "epoch": 1.807427588184686, "grad_norm": 0.27536436915397644, "learning_rate": 4.089526450070392e-06, "loss": 0.3007, "step": 12605 }, { "epoch": 1.807570977917981, "grad_norm": 0.29128599166870117, "learning_rate": 4.0887061673644626e-06, "loss": 0.2918, "step": 12606 }, { "epoch": 1.8077143676512761, "grad_norm": 0.28282594680786133, "learning_rate": 4.0878859100275525e-06, "loss": 0.3111, "step": 12607 }, { "epoch": 1.8078577573845713, "grad_norm": 0.2763548791408539, "learning_rate": 4.087065678082494e-06, "loss": 0.3165, "step": 12608 }, { "epoch": 1.8080011471178663, "grad_norm": 0.2937743663787842, "learning_rate": 4.0862454715521234e-06, "loss": 0.2769, "step": 12609 }, { "epoch": 1.8081445368511615, "grad_norm": 0.2876146733760834, "learning_rate": 4.085425290459273e-06, "loss": 0.3266, "step": 12610 }, { "epoch": 1.8082879265844567, "grad_norm": 0.2752096354961395, "learning_rate": 4.084605134826775e-06, "loss": 0.277, "step": 12611 }, { "epoch": 1.8084313163177517, "grad_norm": 0.29985538125038147, "learning_rate": 4.083785004677463e-06, "loss": 0.3197, "step": 12612 }, { "epoch": 1.8085747060510466, "grad_norm": 0.26565852761268616, "learning_rate": 4.082964900034167e-06, "loss": 0.2944, "step": 12613 }, { "epoch": 1.8087180957843418, "grad_norm": 0.280055969953537, "learning_rate": 4.0821448209197175e-06, "loss": 0.3128, "step": 12614 }, { "epoch": 1.808861485517637, "grad_norm": 0.2638902962207794, "learning_rate": 4.081324767356943e-06, "loss": 0.3151, "step": 12615 }, { "epoch": 1.809004875250932, "grad_norm": 0.26687994599342346, "learning_rate": 4.080504739368675e-06, "loss": 0.2813, "step": 12616 }, { "epoch": 1.809148264984227, "grad_norm": 0.2847103178501129, "learning_rate": 4.07968473697774e-06, "loss": 0.314, "step": 12617 }, { "epoch": 1.8092916547175222, "grad_norm": 0.29835283756256104, "learning_rate": 4.078864760206968e-06, "loss": 0.3188, "step": 12618 }, { "epoch": 1.8094350444508174, "grad_norm": 0.2688935399055481, "learning_rate": 4.078044809079183e-06, "loss": 0.2813, "step": 12619 }, { "epoch": 1.8095784341841123, "grad_norm": 0.26053905487060547, "learning_rate": 4.077224883617212e-06, "loss": 0.311, "step": 12620 }, { "epoch": 1.8097218239174075, "grad_norm": 0.26944461464881897, "learning_rate": 4.07640498384388e-06, "loss": 0.2976, "step": 12621 }, { "epoch": 1.8098652136507027, "grad_norm": 0.29792940616607666, "learning_rate": 4.0755851097820135e-06, "loss": 0.2964, "step": 12622 }, { "epoch": 1.8100086033839977, "grad_norm": 0.27903032302856445, "learning_rate": 4.074765261454436e-06, "loss": 0.3107, "step": 12623 }, { "epoch": 1.8101519931172927, "grad_norm": 0.27293410897254944, "learning_rate": 4.0739454388839695e-06, "loss": 0.293, "step": 12624 }, { "epoch": 1.8102953828505879, "grad_norm": 0.2661271095275879, "learning_rate": 4.0731256420934375e-06, "loss": 0.3162, "step": 12625 }, { "epoch": 1.810438772583883, "grad_norm": 0.27248573303222656, "learning_rate": 4.072305871105661e-06, "loss": 0.2996, "step": 12626 }, { "epoch": 1.810582162317178, "grad_norm": 0.26523974537849426, "learning_rate": 4.071486125943463e-06, "loss": 0.3096, "step": 12627 }, { "epoch": 1.810725552050473, "grad_norm": 0.2968008518218994, "learning_rate": 4.0706664066296624e-06, "loss": 0.3055, "step": 12628 }, { "epoch": 1.8108689417837682, "grad_norm": 0.2794043719768524, "learning_rate": 4.0698467131870815e-06, "loss": 0.2934, "step": 12629 }, { "epoch": 1.8110123315170634, "grad_norm": 0.27892830967903137, "learning_rate": 4.069027045638536e-06, "loss": 0.3174, "step": 12630 }, { "epoch": 1.8111557212503584, "grad_norm": 0.262215256690979, "learning_rate": 4.0682074040068445e-06, "loss": 0.3079, "step": 12631 }, { "epoch": 1.8112991109836536, "grad_norm": 0.28345128893852234, "learning_rate": 4.067387788314826e-06, "loss": 0.2876, "step": 12632 }, { "epoch": 1.8114425007169488, "grad_norm": 0.27934592962265015, "learning_rate": 4.066568198585297e-06, "loss": 0.3094, "step": 12633 }, { "epoch": 1.8115858904502438, "grad_norm": 0.2832659184932709, "learning_rate": 4.065748634841073e-06, "loss": 0.3024, "step": 12634 }, { "epoch": 1.8117292801835387, "grad_norm": 0.26252925395965576, "learning_rate": 4.06492909710497e-06, "loss": 0.3024, "step": 12635 }, { "epoch": 1.811872669916834, "grad_norm": 0.2741644084453583, "learning_rate": 4.064109585399803e-06, "loss": 0.2906, "step": 12636 }, { "epoch": 1.8120160596501291, "grad_norm": 0.27333852648735046, "learning_rate": 4.063290099748384e-06, "loss": 0.3315, "step": 12637 }, { "epoch": 1.812159449383424, "grad_norm": 0.2563973069190979, "learning_rate": 4.06247064017353e-06, "loss": 0.2972, "step": 12638 }, { "epoch": 1.812302839116719, "grad_norm": 0.2791880965232849, "learning_rate": 4.06165120669805e-06, "loss": 0.2997, "step": 12639 }, { "epoch": 1.8124462288500145, "grad_norm": 0.2728976011276245, "learning_rate": 4.060831799344758e-06, "loss": 0.3183, "step": 12640 }, { "epoch": 1.8125896185833095, "grad_norm": 0.2772565186023712, "learning_rate": 4.060012418136462e-06, "loss": 0.3054, "step": 12641 }, { "epoch": 1.8127330083166044, "grad_norm": 0.256875604391098, "learning_rate": 4.0591930630959756e-06, "loss": 0.2935, "step": 12642 }, { "epoch": 1.8128763980498996, "grad_norm": 0.26299747824668884, "learning_rate": 4.058373734246106e-06, "loss": 0.2949, "step": 12643 }, { "epoch": 1.8130197877831948, "grad_norm": 0.26638785004615784, "learning_rate": 4.057554431609664e-06, "loss": 0.3008, "step": 12644 }, { "epoch": 1.8131631775164898, "grad_norm": 0.27084091305732727, "learning_rate": 4.056735155209459e-06, "loss": 0.3156, "step": 12645 }, { "epoch": 1.8133065672497848, "grad_norm": 0.29225051403045654, "learning_rate": 4.055915905068294e-06, "loss": 0.3048, "step": 12646 }, { "epoch": 1.81344995698308, "grad_norm": 0.27038338780403137, "learning_rate": 4.055096681208978e-06, "loss": 0.3132, "step": 12647 }, { "epoch": 1.8135933467163752, "grad_norm": 0.27139246463775635, "learning_rate": 4.054277483654316e-06, "loss": 0.3056, "step": 12648 }, { "epoch": 1.8137367364496702, "grad_norm": 0.268970251083374, "learning_rate": 4.0534583124271145e-06, "loss": 0.3092, "step": 12649 }, { "epoch": 1.8138801261829653, "grad_norm": 0.2680823504924774, "learning_rate": 4.0526391675501795e-06, "loss": 0.3033, "step": 12650 }, { "epoch": 1.8140235159162605, "grad_norm": 0.3230504095554352, "learning_rate": 4.051820049046311e-06, "loss": 0.2967, "step": 12651 }, { "epoch": 1.8141669056495555, "grad_norm": 0.2629866302013397, "learning_rate": 4.051000956938314e-06, "loss": 0.298, "step": 12652 }, { "epoch": 1.8143102953828505, "grad_norm": 0.2823849320411682, "learning_rate": 4.05018189124899e-06, "loss": 0.2999, "step": 12653 }, { "epoch": 1.8144536851161457, "grad_norm": 0.27862802147865295, "learning_rate": 4.049362852001141e-06, "loss": 0.297, "step": 12654 }, { "epoch": 1.8145970748494409, "grad_norm": 0.28530293703079224, "learning_rate": 4.048543839217569e-06, "loss": 0.3142, "step": 12655 }, { "epoch": 1.8147404645827359, "grad_norm": 0.2831805944442749, "learning_rate": 4.047724852921074e-06, "loss": 0.3126, "step": 12656 }, { "epoch": 1.8148838543160308, "grad_norm": 0.29066258668899536, "learning_rate": 4.046905893134452e-06, "loss": 0.288, "step": 12657 }, { "epoch": 1.815027244049326, "grad_norm": 0.28389978408813477, "learning_rate": 4.046086959880504e-06, "loss": 0.3036, "step": 12658 }, { "epoch": 1.8151706337826212, "grad_norm": 0.2698882818222046, "learning_rate": 4.045268053182028e-06, "loss": 0.2954, "step": 12659 }, { "epoch": 1.8153140235159162, "grad_norm": 0.26495087146759033, "learning_rate": 4.044449173061821e-06, "loss": 0.2937, "step": 12660 }, { "epoch": 1.8154574132492114, "grad_norm": 0.2651308476924896, "learning_rate": 4.04363031954268e-06, "loss": 0.3132, "step": 12661 }, { "epoch": 1.8156008029825066, "grad_norm": 0.28327512741088867, "learning_rate": 4.042811492647398e-06, "loss": 0.3045, "step": 12662 }, { "epoch": 1.8157441927158016, "grad_norm": 0.2749490737915039, "learning_rate": 4.041992692398772e-06, "loss": 0.2916, "step": 12663 }, { "epoch": 1.8158875824490965, "grad_norm": 0.2688998281955719, "learning_rate": 4.041173918819595e-06, "loss": 0.3006, "step": 12664 }, { "epoch": 1.8160309721823917, "grad_norm": 0.29474031925201416, "learning_rate": 4.0403551719326625e-06, "loss": 0.3221, "step": 12665 }, { "epoch": 1.816174361915687, "grad_norm": 0.27430641651153564, "learning_rate": 4.0395364517607646e-06, "loss": 0.3057, "step": 12666 }, { "epoch": 1.816317751648982, "grad_norm": 0.25672343373298645, "learning_rate": 4.038717758326698e-06, "loss": 0.2915, "step": 12667 }, { "epoch": 1.8164611413822769, "grad_norm": 0.26825860142707825, "learning_rate": 4.0378990916532455e-06, "loss": 0.2991, "step": 12668 }, { "epoch": 1.816604531115572, "grad_norm": 0.27902889251708984, "learning_rate": 4.037080451763205e-06, "loss": 0.2978, "step": 12669 }, { "epoch": 1.8167479208488673, "grad_norm": 0.2760777473449707, "learning_rate": 4.0362618386793616e-06, "loss": 0.2876, "step": 12670 }, { "epoch": 1.8168913105821622, "grad_norm": 0.2913076877593994, "learning_rate": 4.035443252424508e-06, "loss": 0.3098, "step": 12671 }, { "epoch": 1.8170347003154574, "grad_norm": 0.2681998908519745, "learning_rate": 4.034624693021431e-06, "loss": 0.2916, "step": 12672 }, { "epoch": 1.8171780900487526, "grad_norm": 0.2612820565700531, "learning_rate": 4.033806160492916e-06, "loss": 0.2968, "step": 12673 }, { "epoch": 1.8173214797820476, "grad_norm": 0.2843099534511566, "learning_rate": 4.0329876548617515e-06, "loss": 0.2938, "step": 12674 }, { "epoch": 1.8174648695153426, "grad_norm": 0.26712489128112793, "learning_rate": 4.032169176150723e-06, "loss": 0.3103, "step": 12675 }, { "epoch": 1.8176082592486378, "grad_norm": 0.2785189151763916, "learning_rate": 4.031350724382615e-06, "loss": 0.3188, "step": 12676 }, { "epoch": 1.817751648981933, "grad_norm": 0.2709949314594269, "learning_rate": 4.030532299580215e-06, "loss": 0.2971, "step": 12677 }, { "epoch": 1.817895038715228, "grad_norm": 0.2600674331188202, "learning_rate": 4.029713901766303e-06, "loss": 0.2902, "step": 12678 }, { "epoch": 1.818038428448523, "grad_norm": 0.27369633316993713, "learning_rate": 4.028895530963664e-06, "loss": 0.3093, "step": 12679 }, { "epoch": 1.8181818181818183, "grad_norm": 0.27884435653686523, "learning_rate": 4.0280771871950784e-06, "loss": 0.3099, "step": 12680 }, { "epoch": 1.8183252079151133, "grad_norm": 0.2888962924480438, "learning_rate": 4.027258870483329e-06, "loss": 0.2992, "step": 12681 }, { "epoch": 1.8184685976484083, "grad_norm": 0.27309292554855347, "learning_rate": 4.026440580851197e-06, "loss": 0.3032, "step": 12682 }, { "epoch": 1.8186119873817035, "grad_norm": 0.28052592277526855, "learning_rate": 4.025622318321464e-06, "loss": 0.3052, "step": 12683 }, { "epoch": 1.8187553771149987, "grad_norm": 0.2957136631011963, "learning_rate": 4.024804082916904e-06, "loss": 0.2998, "step": 12684 }, { "epoch": 1.8188987668482937, "grad_norm": 0.2815306782722473, "learning_rate": 4.023985874660298e-06, "loss": 0.3084, "step": 12685 }, { "epoch": 1.8190421565815886, "grad_norm": 0.27222880721092224, "learning_rate": 4.023167693574424e-06, "loss": 0.2887, "step": 12686 }, { "epoch": 1.8191855463148838, "grad_norm": 0.2801588475704193, "learning_rate": 4.022349539682059e-06, "loss": 0.2834, "step": 12687 }, { "epoch": 1.819328936048179, "grad_norm": 0.2758893072605133, "learning_rate": 4.02153141300598e-06, "loss": 0.3049, "step": 12688 }, { "epoch": 1.819472325781474, "grad_norm": 0.27096012234687805, "learning_rate": 4.02071331356896e-06, "loss": 0.3087, "step": 12689 }, { "epoch": 1.8196157155147692, "grad_norm": 0.2706374526023865, "learning_rate": 4.019895241393776e-06, "loss": 0.3142, "step": 12690 }, { "epoch": 1.8197591052480644, "grad_norm": 0.2717254161834717, "learning_rate": 4.019077196503199e-06, "loss": 0.3049, "step": 12691 }, { "epoch": 1.8199024949813594, "grad_norm": 0.2677290737628937, "learning_rate": 4.018259178920004e-06, "loss": 0.3038, "step": 12692 }, { "epoch": 1.8200458847146543, "grad_norm": 0.2632494270801544, "learning_rate": 4.0174411886669645e-06, "loss": 0.2969, "step": 12693 }, { "epoch": 1.8201892744479495, "grad_norm": 0.2762698233127594, "learning_rate": 4.0166232257668525e-06, "loss": 0.2961, "step": 12694 }, { "epoch": 1.8203326641812447, "grad_norm": 0.2631444036960602, "learning_rate": 4.015805290242435e-06, "loss": 0.2817, "step": 12695 }, { "epoch": 1.8204760539145397, "grad_norm": 0.2620847225189209, "learning_rate": 4.014987382116483e-06, "loss": 0.3088, "step": 12696 }, { "epoch": 1.8206194436478347, "grad_norm": 0.2800970673561096, "learning_rate": 4.014169501411768e-06, "loss": 0.3024, "step": 12697 }, { "epoch": 1.8207628333811299, "grad_norm": 0.2778295874595642, "learning_rate": 4.0133516481510585e-06, "loss": 0.2983, "step": 12698 }, { "epoch": 1.820906223114425, "grad_norm": 0.2647962272167206, "learning_rate": 4.012533822357123e-06, "loss": 0.3058, "step": 12699 }, { "epoch": 1.82104961284772, "grad_norm": 0.274443656206131, "learning_rate": 4.011716024052724e-06, "loss": 0.3087, "step": 12700 }, { "epoch": 1.8211930025810152, "grad_norm": 0.28798800706863403, "learning_rate": 4.0108982532606315e-06, "loss": 0.2999, "step": 12701 }, { "epoch": 1.8213363923143104, "grad_norm": 0.2778298854827881, "learning_rate": 4.01008051000361e-06, "loss": 0.2913, "step": 12702 }, { "epoch": 1.8214797820476054, "grad_norm": 0.28442588448524475, "learning_rate": 4.009262794304424e-06, "loss": 0.3002, "step": 12703 }, { "epoch": 1.8216231717809004, "grad_norm": 0.2781067490577698, "learning_rate": 4.008445106185839e-06, "loss": 0.2907, "step": 12704 }, { "epoch": 1.8217665615141956, "grad_norm": 0.27845174074172974, "learning_rate": 4.007627445670616e-06, "loss": 0.3066, "step": 12705 }, { "epoch": 1.8219099512474908, "grad_norm": 0.26154887676239014, "learning_rate": 4.006809812781518e-06, "loss": 0.3017, "step": 12706 }, { "epoch": 1.8220533409807858, "grad_norm": 0.27151399850845337, "learning_rate": 4.005992207541307e-06, "loss": 0.2954, "step": 12707 }, { "epoch": 1.8221967307140807, "grad_norm": 0.29233068227767944, "learning_rate": 4.005174629972745e-06, "loss": 0.3025, "step": 12708 }, { "epoch": 1.822340120447376, "grad_norm": 0.294008731842041, "learning_rate": 4.00435708009859e-06, "loss": 0.3142, "step": 12709 }, { "epoch": 1.8224835101806711, "grad_norm": 0.28014495968818665, "learning_rate": 4.003539557941604e-06, "loss": 0.2944, "step": 12710 }, { "epoch": 1.822626899913966, "grad_norm": 0.28340840339660645, "learning_rate": 4.002722063524542e-06, "loss": 0.2997, "step": 12711 }, { "epoch": 1.8227702896472613, "grad_norm": 0.25840580463409424, "learning_rate": 4.0019045968701645e-06, "loss": 0.2989, "step": 12712 }, { "epoch": 1.8229136793805565, "grad_norm": 0.2799089848995209, "learning_rate": 4.001087158001226e-06, "loss": 0.3136, "step": 12713 }, { "epoch": 1.8230570691138515, "grad_norm": 0.26791855692863464, "learning_rate": 4.000269746940486e-06, "loss": 0.2985, "step": 12714 }, { "epoch": 1.8232004588471464, "grad_norm": 0.2910268008708954, "learning_rate": 3.999452363710699e-06, "loss": 0.3078, "step": 12715 }, { "epoch": 1.8233438485804416, "grad_norm": 0.271618127822876, "learning_rate": 3.998635008334618e-06, "loss": 0.2779, "step": 12716 }, { "epoch": 1.8234872383137368, "grad_norm": 0.28046658635139465, "learning_rate": 3.997817680834998e-06, "loss": 0.305, "step": 12717 }, { "epoch": 1.8236306280470318, "grad_norm": 0.25779908895492554, "learning_rate": 3.997000381234592e-06, "loss": 0.2946, "step": 12718 }, { "epoch": 1.8237740177803268, "grad_norm": 0.25864356756210327, "learning_rate": 3.996183109556153e-06, "loss": 0.2882, "step": 12719 }, { "epoch": 1.8239174075136222, "grad_norm": 0.29238492250442505, "learning_rate": 3.995365865822432e-06, "loss": 0.3198, "step": 12720 }, { "epoch": 1.8240607972469172, "grad_norm": 0.30032607913017273, "learning_rate": 3.994548650056181e-06, "loss": 0.2985, "step": 12721 }, { "epoch": 1.8242041869802121, "grad_norm": 0.28807878494262695, "learning_rate": 3.993731462280147e-06, "loss": 0.3049, "step": 12722 }, { "epoch": 1.8243475767135073, "grad_norm": 0.26187625527381897, "learning_rate": 3.992914302517081e-06, "loss": 0.291, "step": 12723 }, { "epoch": 1.8244909664468025, "grad_norm": 0.27643057703971863, "learning_rate": 3.992097170789732e-06, "loss": 0.3088, "step": 12724 }, { "epoch": 1.8246343561800975, "grad_norm": 0.27552539110183716, "learning_rate": 3.991280067120847e-06, "loss": 0.2923, "step": 12725 }, { "epoch": 1.8247777459133925, "grad_norm": 0.2698775827884674, "learning_rate": 3.990462991533176e-06, "loss": 0.3, "step": 12726 }, { "epoch": 1.8249211356466877, "grad_norm": 0.2609288990497589, "learning_rate": 3.98964594404946e-06, "loss": 0.302, "step": 12727 }, { "epoch": 1.8250645253799829, "grad_norm": 0.2660011351108551, "learning_rate": 3.988828924692447e-06, "loss": 0.3053, "step": 12728 }, { "epoch": 1.8252079151132778, "grad_norm": 0.2812410891056061, "learning_rate": 3.988011933484881e-06, "loss": 0.2996, "step": 12729 }, { "epoch": 1.8253513048465728, "grad_norm": 0.2744177579879761, "learning_rate": 3.987194970449506e-06, "loss": 0.2977, "step": 12730 }, { "epoch": 1.8254946945798682, "grad_norm": 0.2719379663467407, "learning_rate": 3.9863780356090655e-06, "loss": 0.2899, "step": 12731 }, { "epoch": 1.8256380843131632, "grad_norm": 0.2867906093597412, "learning_rate": 3.985561128986301e-06, "loss": 0.3052, "step": 12732 }, { "epoch": 1.8257814740464582, "grad_norm": 0.27765366435050964, "learning_rate": 3.984744250603954e-06, "loss": 0.303, "step": 12733 }, { "epoch": 1.8259248637797534, "grad_norm": 0.2527640461921692, "learning_rate": 3.983927400484766e-06, "loss": 0.2955, "step": 12734 }, { "epoch": 1.8260682535130486, "grad_norm": 0.27693748474121094, "learning_rate": 3.983110578651475e-06, "loss": 0.3134, "step": 12735 }, { "epoch": 1.8262116432463436, "grad_norm": 0.2773892283439636, "learning_rate": 3.982293785126822e-06, "loss": 0.2882, "step": 12736 }, { "epoch": 1.8263550329796385, "grad_norm": 0.2859739065170288, "learning_rate": 3.981477019933546e-06, "loss": 0.3177, "step": 12737 }, { "epoch": 1.8264984227129337, "grad_norm": 0.27274441719055176, "learning_rate": 3.980660283094381e-06, "loss": 0.2888, "step": 12738 }, { "epoch": 1.826641812446229, "grad_norm": 0.2713424563407898, "learning_rate": 3.9798435746320664e-06, "loss": 0.3165, "step": 12739 }, { "epoch": 1.826785202179524, "grad_norm": 0.2732570469379425, "learning_rate": 3.9790268945693365e-06, "loss": 0.3108, "step": 12740 }, { "epoch": 1.826928591912819, "grad_norm": 0.26519832015037537, "learning_rate": 3.9782102429289275e-06, "loss": 0.2898, "step": 12741 }, { "epoch": 1.8270719816461143, "grad_norm": 0.27932894229888916, "learning_rate": 3.977393619733575e-06, "loss": 0.3073, "step": 12742 }, { "epoch": 1.8272153713794093, "grad_norm": 0.276885986328125, "learning_rate": 3.97657702500601e-06, "loss": 0.2966, "step": 12743 }, { "epoch": 1.8273587611127042, "grad_norm": 0.2646945118904114, "learning_rate": 3.975760458768966e-06, "loss": 0.3081, "step": 12744 }, { "epoch": 1.8275021508459994, "grad_norm": 0.2641642093658447, "learning_rate": 3.974943921045175e-06, "loss": 0.3002, "step": 12745 }, { "epoch": 1.8276455405792946, "grad_norm": 0.28108641505241394, "learning_rate": 3.974127411857369e-06, "loss": 0.3151, "step": 12746 }, { "epoch": 1.8277889303125896, "grad_norm": 0.24970951676368713, "learning_rate": 3.973310931228277e-06, "loss": 0.3002, "step": 12747 }, { "epoch": 1.8279323200458846, "grad_norm": 0.2607645094394684, "learning_rate": 3.972494479180632e-06, "loss": 0.2841, "step": 12748 }, { "epoch": 1.8280757097791798, "grad_norm": 0.2645461857318878, "learning_rate": 3.9716780557371585e-06, "loss": 0.3148, "step": 12749 }, { "epoch": 1.828219099512475, "grad_norm": 0.2544814944267273, "learning_rate": 3.970861660920585e-06, "loss": 0.323, "step": 12750 }, { "epoch": 1.82836248924577, "grad_norm": 0.26561859250068665, "learning_rate": 3.97004529475364e-06, "loss": 0.3131, "step": 12751 }, { "epoch": 1.8285058789790651, "grad_norm": 0.27458012104034424, "learning_rate": 3.969228957259049e-06, "loss": 0.3, "step": 12752 }, { "epoch": 1.8286492687123603, "grad_norm": 0.2838112413883209, "learning_rate": 3.968412648459542e-06, "loss": 0.3041, "step": 12753 }, { "epoch": 1.8287926584456553, "grad_norm": 0.2517629563808441, "learning_rate": 3.967596368377836e-06, "loss": 0.2909, "step": 12754 }, { "epoch": 1.8289360481789503, "grad_norm": 0.26009121537208557, "learning_rate": 3.96678011703666e-06, "loss": 0.2997, "step": 12755 }, { "epoch": 1.8290794379122455, "grad_norm": 0.25551822781562805, "learning_rate": 3.9659638944587364e-06, "loss": 0.3001, "step": 12756 }, { "epoch": 1.8292228276455407, "grad_norm": 0.2826053202152252, "learning_rate": 3.965147700666787e-06, "loss": 0.3262, "step": 12757 }, { "epoch": 1.8293662173788356, "grad_norm": 0.28387755155563354, "learning_rate": 3.964331535683535e-06, "loss": 0.3099, "step": 12758 }, { "epoch": 1.8295096071121306, "grad_norm": 0.27130380272865295, "learning_rate": 3.9635153995316986e-06, "loss": 0.3088, "step": 12759 }, { "epoch": 1.8296529968454258, "grad_norm": 0.27888941764831543, "learning_rate": 3.962699292233999e-06, "loss": 0.3048, "step": 12760 }, { "epoch": 1.829796386578721, "grad_norm": 0.274319589138031, "learning_rate": 3.961883213813156e-06, "loss": 0.3177, "step": 12761 }, { "epoch": 1.829939776312016, "grad_norm": 0.28084778785705566, "learning_rate": 3.961067164291887e-06, "loss": 0.3072, "step": 12762 }, { "epoch": 1.8300831660453112, "grad_norm": 0.2718842923641205, "learning_rate": 3.96025114369291e-06, "loss": 0.2845, "step": 12763 }, { "epoch": 1.8302265557786064, "grad_norm": 0.2726837992668152, "learning_rate": 3.959435152038944e-06, "loss": 0.2959, "step": 12764 }, { "epoch": 1.8303699455119014, "grad_norm": 0.28918546438217163, "learning_rate": 3.9586191893527e-06, "loss": 0.3221, "step": 12765 }, { "epoch": 1.8305133352451963, "grad_norm": 0.28343838453292847, "learning_rate": 3.957803255656896e-06, "loss": 0.3047, "step": 12766 }, { "epoch": 1.8306567249784915, "grad_norm": 0.29218459129333496, "learning_rate": 3.956987350974247e-06, "loss": 0.3177, "step": 12767 }, { "epoch": 1.8308001147117867, "grad_norm": 0.2869034707546234, "learning_rate": 3.956171475327465e-06, "loss": 0.2868, "step": 12768 }, { "epoch": 1.8309435044450817, "grad_norm": 0.28196635842323303, "learning_rate": 3.955355628739264e-06, "loss": 0.3225, "step": 12769 }, { "epoch": 1.8310868941783767, "grad_norm": 0.2817453145980835, "learning_rate": 3.954539811232355e-06, "loss": 0.2877, "step": 12770 }, { "epoch": 1.831230283911672, "grad_norm": 0.28266942501068115, "learning_rate": 3.9537240228294494e-06, "loss": 0.2865, "step": 12771 }, { "epoch": 1.831373673644967, "grad_norm": 0.2919001877307892, "learning_rate": 3.952908263553257e-06, "loss": 0.3137, "step": 12772 }, { "epoch": 1.831517063378262, "grad_norm": 0.2527078092098236, "learning_rate": 3.9520925334264884e-06, "loss": 0.2842, "step": 12773 }, { "epoch": 1.8316604531115572, "grad_norm": 0.26844140887260437, "learning_rate": 3.951276832471851e-06, "loss": 0.2972, "step": 12774 }, { "epoch": 1.8318038428448524, "grad_norm": 0.2597314119338989, "learning_rate": 3.950461160712057e-06, "loss": 0.3135, "step": 12775 }, { "epoch": 1.8319472325781474, "grad_norm": 0.25294217467308044, "learning_rate": 3.949645518169806e-06, "loss": 0.3019, "step": 12776 }, { "epoch": 1.8320906223114424, "grad_norm": 0.270481675863266, "learning_rate": 3.948829904867809e-06, "loss": 0.2925, "step": 12777 }, { "epoch": 1.8322340120447376, "grad_norm": 0.29566827416419983, "learning_rate": 3.9480143208287705e-06, "loss": 0.3038, "step": 12778 }, { "epoch": 1.8323774017780328, "grad_norm": 0.2797568738460541, "learning_rate": 3.9471987660753945e-06, "loss": 0.2939, "step": 12779 }, { "epoch": 1.8325207915113277, "grad_norm": 0.2588193714618683, "learning_rate": 3.946383240630386e-06, "loss": 0.303, "step": 12780 }, { "epoch": 1.832664181244623, "grad_norm": 0.2694578170776367, "learning_rate": 3.945567744516447e-06, "loss": 0.3061, "step": 12781 }, { "epoch": 1.8328075709779181, "grad_norm": 0.2888510525226593, "learning_rate": 3.94475227775628e-06, "loss": 0.3009, "step": 12782 }, { "epoch": 1.832950960711213, "grad_norm": 0.2788538932800293, "learning_rate": 3.9439368403725855e-06, "loss": 0.3004, "step": 12783 }, { "epoch": 1.833094350444508, "grad_norm": 0.27265506982803345, "learning_rate": 3.943121432388066e-06, "loss": 0.2979, "step": 12784 }, { "epoch": 1.8332377401778033, "grad_norm": 0.2874892055988312, "learning_rate": 3.94230605382542e-06, "loss": 0.3159, "step": 12785 }, { "epoch": 1.8333811299110985, "grad_norm": 0.2686675786972046, "learning_rate": 3.941490704707346e-06, "loss": 0.3148, "step": 12786 }, { "epoch": 1.8335245196443934, "grad_norm": 0.26932141184806824, "learning_rate": 3.940675385056543e-06, "loss": 0.3004, "step": 12787 }, { "epoch": 1.8336679093776884, "grad_norm": 0.2742392420768738, "learning_rate": 3.9398600948957065e-06, "loss": 0.2816, "step": 12788 }, { "epoch": 1.8338112991109836, "grad_norm": 0.27488332986831665, "learning_rate": 3.939044834247535e-06, "loss": 0.3057, "step": 12789 }, { "epoch": 1.8339546888442788, "grad_norm": 0.2658735513687134, "learning_rate": 3.938229603134723e-06, "loss": 0.3002, "step": 12790 }, { "epoch": 1.8340980785775738, "grad_norm": 0.2778894007205963, "learning_rate": 3.9374144015799675e-06, "loss": 0.3065, "step": 12791 }, { "epoch": 1.834241468310869, "grad_norm": 0.2716621458530426, "learning_rate": 3.936599229605958e-06, "loss": 0.2914, "step": 12792 }, { "epoch": 1.8343848580441642, "grad_norm": 0.28426602482795715, "learning_rate": 3.935784087235391e-06, "loss": 0.3036, "step": 12793 }, { "epoch": 1.8345282477774592, "grad_norm": 0.30673786997795105, "learning_rate": 3.934968974490958e-06, "loss": 0.3085, "step": 12794 }, { "epoch": 1.8346716375107541, "grad_norm": 0.2698460817337036, "learning_rate": 3.934153891395349e-06, "loss": 0.2912, "step": 12795 }, { "epoch": 1.8348150272440493, "grad_norm": 0.2789500951766968, "learning_rate": 3.933338837971259e-06, "loss": 0.3089, "step": 12796 }, { "epoch": 1.8349584169773445, "grad_norm": 0.28593820333480835, "learning_rate": 3.932523814241373e-06, "loss": 0.3025, "step": 12797 }, { "epoch": 1.8351018067106395, "grad_norm": 0.2899641692638397, "learning_rate": 3.931708820228382e-06, "loss": 0.3007, "step": 12798 }, { "epoch": 1.8352451964439345, "grad_norm": 0.26355019211769104, "learning_rate": 3.930893855954974e-06, "loss": 0.2899, "step": 12799 }, { "epoch": 1.8353885861772297, "grad_norm": 0.3059825599193573, "learning_rate": 3.930078921443837e-06, "loss": 0.3033, "step": 12800 }, { "epoch": 1.8355319759105249, "grad_norm": 0.2822597324848175, "learning_rate": 3.929264016717656e-06, "loss": 0.3158, "step": 12801 }, { "epoch": 1.8356753656438198, "grad_norm": 0.30407530069351196, "learning_rate": 3.92844914179912e-06, "loss": 0.3022, "step": 12802 }, { "epoch": 1.835818755377115, "grad_norm": 0.2815418839454651, "learning_rate": 3.927634296710911e-06, "loss": 0.294, "step": 12803 }, { "epoch": 1.8359621451104102, "grad_norm": 0.2878757417201996, "learning_rate": 3.9268194814757116e-06, "loss": 0.3042, "step": 12804 }, { "epoch": 1.8361055348437052, "grad_norm": 0.27258050441741943, "learning_rate": 3.926004696116208e-06, "loss": 0.3016, "step": 12805 }, { "epoch": 1.8362489245770002, "grad_norm": 0.29212749004364014, "learning_rate": 3.9251899406550805e-06, "loss": 0.3449, "step": 12806 }, { "epoch": 1.8363923143102954, "grad_norm": 0.2807905375957489, "learning_rate": 3.924375215115014e-06, "loss": 0.3064, "step": 12807 }, { "epoch": 1.8365357040435906, "grad_norm": 0.2516346275806427, "learning_rate": 3.9235605195186845e-06, "loss": 0.3007, "step": 12808 }, { "epoch": 1.8366790937768855, "grad_norm": 0.2639324963092804, "learning_rate": 3.9227458538887755e-06, "loss": 0.3109, "step": 12809 }, { "epoch": 1.8368224835101805, "grad_norm": 0.2752632200717926, "learning_rate": 3.921931218247965e-06, "loss": 0.299, "step": 12810 }, { "epoch": 1.836965873243476, "grad_norm": 0.2680888772010803, "learning_rate": 3.92111661261893e-06, "loss": 0.2951, "step": 12811 }, { "epoch": 1.837109262976771, "grad_norm": 0.27478107810020447, "learning_rate": 3.920302037024351e-06, "loss": 0.3021, "step": 12812 }, { "epoch": 1.8372526527100659, "grad_norm": 0.2700086534023285, "learning_rate": 3.9194874914869e-06, "loss": 0.3222, "step": 12813 }, { "epoch": 1.837396042443361, "grad_norm": 0.28607282042503357, "learning_rate": 3.918672976029255e-06, "loss": 0.3099, "step": 12814 }, { "epoch": 1.8375394321766563, "grad_norm": 0.2827237844467163, "learning_rate": 3.917858490674093e-06, "loss": 0.3158, "step": 12815 }, { "epoch": 1.8376828219099512, "grad_norm": 0.27167147397994995, "learning_rate": 3.917044035444084e-06, "loss": 0.3047, "step": 12816 }, { "epoch": 1.8378262116432462, "grad_norm": 0.2815673351287842, "learning_rate": 3.916229610361904e-06, "loss": 0.2973, "step": 12817 }, { "epoch": 1.8379696013765414, "grad_norm": 0.2732485830783844, "learning_rate": 3.9154152154502264e-06, "loss": 0.2851, "step": 12818 }, { "epoch": 1.8381129911098366, "grad_norm": 0.27253052592277527, "learning_rate": 3.914600850731719e-06, "loss": 0.2857, "step": 12819 }, { "epoch": 1.8382563808431316, "grad_norm": 0.27026626467704773, "learning_rate": 3.913786516229053e-06, "loss": 0.302, "step": 12820 }, { "epoch": 1.8383997705764266, "grad_norm": 0.2668236494064331, "learning_rate": 3.912972211964901e-06, "loss": 0.3007, "step": 12821 }, { "epoch": 1.838543160309722, "grad_norm": 0.26902690529823303, "learning_rate": 3.912157937961929e-06, "loss": 0.29, "step": 12822 }, { "epoch": 1.838686550043017, "grad_norm": 0.26925304532051086, "learning_rate": 3.911343694242809e-06, "loss": 0.3008, "step": 12823 }, { "epoch": 1.838829939776312, "grad_norm": 0.2838680148124695, "learning_rate": 3.910529480830204e-06, "loss": 0.3319, "step": 12824 }, { "epoch": 1.8389733295096071, "grad_norm": 0.2577013671398163, "learning_rate": 3.909715297746783e-06, "loss": 0.3006, "step": 12825 }, { "epoch": 1.8391167192429023, "grad_norm": 0.28960901498794556, "learning_rate": 3.908901145015211e-06, "loss": 0.2983, "step": 12826 }, { "epoch": 1.8392601089761973, "grad_norm": 0.26704686880111694, "learning_rate": 3.908087022658153e-06, "loss": 0.3151, "step": 12827 }, { "epoch": 1.8394034987094923, "grad_norm": 0.2823341488838196, "learning_rate": 3.907272930698272e-06, "loss": 0.2921, "step": 12828 }, { "epoch": 1.8395468884427875, "grad_norm": 0.26318421959877014, "learning_rate": 3.906458869158234e-06, "loss": 0.3312, "step": 12829 }, { "epoch": 1.8396902781760827, "grad_norm": 0.25438928604125977, "learning_rate": 3.9056448380606956e-06, "loss": 0.3171, "step": 12830 }, { "epoch": 1.8398336679093776, "grad_norm": 0.2757793664932251, "learning_rate": 3.904830837428323e-06, "loss": 0.2824, "step": 12831 }, { "epoch": 1.8399770576426728, "grad_norm": 0.2829764485359192, "learning_rate": 3.904016867283774e-06, "loss": 0.2959, "step": 12832 }, { "epoch": 1.840120447375968, "grad_norm": 0.26944759488105774, "learning_rate": 3.90320292764971e-06, "loss": 0.3126, "step": 12833 }, { "epoch": 1.840263837109263, "grad_norm": 0.2779187560081482, "learning_rate": 3.902389018548791e-06, "loss": 0.3055, "step": 12834 }, { "epoch": 1.840407226842558, "grad_norm": 0.30206960439682007, "learning_rate": 3.901575140003672e-06, "loss": 0.3033, "step": 12835 }, { "epoch": 1.8405506165758532, "grad_norm": 0.26506707072257996, "learning_rate": 3.900761292037011e-06, "loss": 0.3018, "step": 12836 }, { "epoch": 1.8406940063091484, "grad_norm": 0.2688845098018646, "learning_rate": 3.899947474671465e-06, "loss": 0.2958, "step": 12837 }, { "epoch": 1.8408373960424433, "grad_norm": 0.2511656582355499, "learning_rate": 3.8991336879296885e-06, "loss": 0.2946, "step": 12838 }, { "epoch": 1.8409807857757383, "grad_norm": 0.2722906470298767, "learning_rate": 3.898319931834339e-06, "loss": 0.3229, "step": 12839 }, { "epoch": 1.8411241755090335, "grad_norm": 0.26189160346984863, "learning_rate": 3.897506206408066e-06, "loss": 0.2896, "step": 12840 }, { "epoch": 1.8412675652423287, "grad_norm": 0.26917341351509094, "learning_rate": 3.896692511673523e-06, "loss": 0.2947, "step": 12841 }, { "epoch": 1.8414109549756237, "grad_norm": 0.2668458819389343, "learning_rate": 3.895878847653364e-06, "loss": 0.299, "step": 12842 }, { "epoch": 1.8415543447089189, "grad_norm": 0.2718726694583893, "learning_rate": 3.89506521437024e-06, "loss": 0.3013, "step": 12843 }, { "epoch": 1.841697734442214, "grad_norm": 0.2721412181854248, "learning_rate": 3.894251611846801e-06, "loss": 0.3056, "step": 12844 }, { "epoch": 1.841841124175509, "grad_norm": 0.25598642230033875, "learning_rate": 3.8934380401056985e-06, "loss": 0.3022, "step": 12845 }, { "epoch": 1.841984513908804, "grad_norm": 0.2700249254703522, "learning_rate": 3.892624499169576e-06, "loss": 0.2964, "step": 12846 }, { "epoch": 1.8421279036420992, "grad_norm": 0.2604258358478546, "learning_rate": 3.8918109890610835e-06, "loss": 0.2803, "step": 12847 }, { "epoch": 1.8422712933753944, "grad_norm": 0.2612682282924652, "learning_rate": 3.890997509802869e-06, "loss": 0.294, "step": 12848 }, { "epoch": 1.8424146831086894, "grad_norm": 0.28253549337387085, "learning_rate": 3.890184061417578e-06, "loss": 0.3036, "step": 12849 }, { "epoch": 1.8425580728419844, "grad_norm": 0.2851485013961792, "learning_rate": 3.889370643927856e-06, "loss": 0.2945, "step": 12850 }, { "epoch": 1.8427014625752796, "grad_norm": 0.2671748399734497, "learning_rate": 3.8885572573563465e-06, "loss": 0.297, "step": 12851 }, { "epoch": 1.8428448523085748, "grad_norm": 0.2545957565307617, "learning_rate": 3.887743901725692e-06, "loss": 0.307, "step": 12852 }, { "epoch": 1.8429882420418697, "grad_norm": 0.2800251245498657, "learning_rate": 3.886930577058538e-06, "loss": 0.2779, "step": 12853 }, { "epoch": 1.843131631775165, "grad_norm": 0.28697240352630615, "learning_rate": 3.886117283377523e-06, "loss": 0.2848, "step": 12854 }, { "epoch": 1.8432750215084601, "grad_norm": 0.27073338627815247, "learning_rate": 3.88530402070529e-06, "loss": 0.2985, "step": 12855 }, { "epoch": 1.843418411241755, "grad_norm": 0.2771743834018707, "learning_rate": 3.8844907890644806e-06, "loss": 0.3125, "step": 12856 }, { "epoch": 1.84356180097505, "grad_norm": 0.2623516321182251, "learning_rate": 3.883677588477729e-06, "loss": 0.3313, "step": 12857 }, { "epoch": 1.8437051907083453, "grad_norm": 0.2756291627883911, "learning_rate": 3.882864418967677e-06, "loss": 0.2894, "step": 12858 }, { "epoch": 1.8438485804416405, "grad_norm": 0.26364612579345703, "learning_rate": 3.8820512805569605e-06, "loss": 0.3148, "step": 12859 }, { "epoch": 1.8439919701749354, "grad_norm": 0.2728976607322693, "learning_rate": 3.881238173268216e-06, "loss": 0.3219, "step": 12860 }, { "epoch": 1.8441353599082304, "grad_norm": 0.2731802761554718, "learning_rate": 3.880425097124082e-06, "loss": 0.3068, "step": 12861 }, { "epoch": 1.8442787496415258, "grad_norm": 0.27574729919433594, "learning_rate": 3.87961205214719e-06, "loss": 0.3013, "step": 12862 }, { "epoch": 1.8444221393748208, "grad_norm": 0.24451085925102234, "learning_rate": 3.878799038360175e-06, "loss": 0.3168, "step": 12863 }, { "epoch": 1.8445655291081158, "grad_norm": 0.30158594250679016, "learning_rate": 3.877986055785669e-06, "loss": 0.2944, "step": 12864 }, { "epoch": 1.844708918841411, "grad_norm": 0.29618051648139954, "learning_rate": 3.877173104446306e-06, "loss": 0.3139, "step": 12865 }, { "epoch": 1.8448523085747062, "grad_norm": 0.2819781005382538, "learning_rate": 3.876360184364718e-06, "loss": 0.2907, "step": 12866 }, { "epoch": 1.8449956983080011, "grad_norm": 0.26208600401878357, "learning_rate": 3.8755472955635324e-06, "loss": 0.2773, "step": 12867 }, { "epoch": 1.8451390880412961, "grad_norm": 0.28956642746925354, "learning_rate": 3.874734438065381e-06, "loss": 0.3005, "step": 12868 }, { "epoch": 1.8452824777745913, "grad_norm": 0.2955597937107086, "learning_rate": 3.873921611892889e-06, "loss": 0.3017, "step": 12869 }, { "epoch": 1.8454258675078865, "grad_norm": 0.26462531089782715, "learning_rate": 3.87310881706869e-06, "loss": 0.2961, "step": 12870 }, { "epoch": 1.8455692572411815, "grad_norm": 0.27754467725753784, "learning_rate": 3.872296053615407e-06, "loss": 0.3147, "step": 12871 }, { "epoch": 1.8457126469744767, "grad_norm": 0.2709243595600128, "learning_rate": 3.87148332155567e-06, "loss": 0.2901, "step": 12872 }, { "epoch": 1.8458560367077719, "grad_norm": 0.26750630140304565, "learning_rate": 3.870670620912098e-06, "loss": 0.2902, "step": 12873 }, { "epoch": 1.8459994264410668, "grad_norm": 0.305404394865036, "learning_rate": 3.8698579517073196e-06, "loss": 0.3073, "step": 12874 }, { "epoch": 1.8461428161743618, "grad_norm": 0.2740948796272278, "learning_rate": 3.8690453139639575e-06, "loss": 0.2884, "step": 12875 }, { "epoch": 1.846286205907657, "grad_norm": 0.2671893835067749, "learning_rate": 3.868232707704633e-06, "loss": 0.3106, "step": 12876 }, { "epoch": 1.8464295956409522, "grad_norm": 0.2508755028247833, "learning_rate": 3.867420132951971e-06, "loss": 0.3147, "step": 12877 }, { "epoch": 1.8465729853742472, "grad_norm": 0.24242258071899414, "learning_rate": 3.86660758972859e-06, "loss": 0.2988, "step": 12878 }, { "epoch": 1.8467163751075422, "grad_norm": 0.3044191002845764, "learning_rate": 3.865795078057108e-06, "loss": 0.2999, "step": 12879 }, { "epoch": 1.8468597648408374, "grad_norm": 0.3023441731929779, "learning_rate": 3.864982597960149e-06, "loss": 0.3046, "step": 12880 }, { "epoch": 1.8470031545741326, "grad_norm": 0.27377545833587646, "learning_rate": 3.864170149460326e-06, "loss": 0.2852, "step": 12881 }, { "epoch": 1.8471465443074275, "grad_norm": 0.30771955847740173, "learning_rate": 3.8633577325802605e-06, "loss": 0.3084, "step": 12882 }, { "epoch": 1.8472899340407227, "grad_norm": 0.30304309725761414, "learning_rate": 3.862545347342568e-06, "loss": 0.3003, "step": 12883 }, { "epoch": 1.847433323774018, "grad_norm": 0.2962389290332794, "learning_rate": 3.861732993769862e-06, "loss": 0.2934, "step": 12884 }, { "epoch": 1.847576713507313, "grad_norm": 0.2881491482257843, "learning_rate": 3.860920671884759e-06, "loss": 0.2917, "step": 12885 }, { "epoch": 1.8477201032406079, "grad_norm": 0.2865937352180481, "learning_rate": 3.8601083817098715e-06, "loss": 0.2953, "step": 12886 }, { "epoch": 1.847863492973903, "grad_norm": 0.30001625418663025, "learning_rate": 3.859296123267812e-06, "loss": 0.3154, "step": 12887 }, { "epoch": 1.8480068827071983, "grad_norm": 0.2825426161289215, "learning_rate": 3.858483896581196e-06, "loss": 0.3028, "step": 12888 }, { "epoch": 1.8481502724404932, "grad_norm": 0.30184200406074524, "learning_rate": 3.857671701672632e-06, "loss": 0.2812, "step": 12889 }, { "epoch": 1.8482936621737882, "grad_norm": 0.2779008448123932, "learning_rate": 3.85685953856473e-06, "loss": 0.2869, "step": 12890 }, { "epoch": 1.8484370519070834, "grad_norm": 0.25249940156936646, "learning_rate": 3.8560474072801e-06, "loss": 0.302, "step": 12891 }, { "epoch": 1.8485804416403786, "grad_norm": 0.29665282368659973, "learning_rate": 3.8552353078413504e-06, "loss": 0.3238, "step": 12892 }, { "epoch": 1.8487238313736736, "grad_norm": 0.2855987548828125, "learning_rate": 3.854423240271091e-06, "loss": 0.2928, "step": 12893 }, { "epoch": 1.8488672211069688, "grad_norm": 0.2687976062297821, "learning_rate": 3.853611204591924e-06, "loss": 0.3051, "step": 12894 }, { "epoch": 1.849010610840264, "grad_norm": 0.2784048914909363, "learning_rate": 3.852799200826458e-06, "loss": 0.3206, "step": 12895 }, { "epoch": 1.849154000573559, "grad_norm": 0.28310564160346985, "learning_rate": 3.851987228997297e-06, "loss": 0.3113, "step": 12896 }, { "epoch": 1.849297390306854, "grad_norm": 0.28107500076293945, "learning_rate": 3.851175289127045e-06, "loss": 0.2896, "step": 12897 }, { "epoch": 1.8494407800401491, "grad_norm": 0.2746097445487976, "learning_rate": 3.8503633812383064e-06, "loss": 0.3048, "step": 12898 }, { "epoch": 1.8495841697734443, "grad_norm": 0.28174886107444763, "learning_rate": 3.849551505353685e-06, "loss": 0.3064, "step": 12899 }, { "epoch": 1.8497275595067393, "grad_norm": 0.2544029951095581, "learning_rate": 3.8487396614957776e-06, "loss": 0.2908, "step": 12900 }, { "epoch": 1.8498709492400343, "grad_norm": 0.28295251727104187, "learning_rate": 3.847927849687187e-06, "loss": 0.2983, "step": 12901 }, { "epoch": 1.8500143389733297, "grad_norm": 0.26203861832618713, "learning_rate": 3.847116069950514e-06, "loss": 0.3066, "step": 12902 }, { "epoch": 1.8501577287066246, "grad_norm": 0.28568172454833984, "learning_rate": 3.846304322308355e-06, "loss": 0.309, "step": 12903 }, { "epoch": 1.8503011184399196, "grad_norm": 0.27682897448539734, "learning_rate": 3.84549260678331e-06, "loss": 0.2953, "step": 12904 }, { "epoch": 1.8504445081732148, "grad_norm": 0.2611767053604126, "learning_rate": 3.844680923397974e-06, "loss": 0.3009, "step": 12905 }, { "epoch": 1.85058789790651, "grad_norm": 0.24924412369728088, "learning_rate": 3.8438692721749446e-06, "loss": 0.3, "step": 12906 }, { "epoch": 1.850731287639805, "grad_norm": 0.278709352016449, "learning_rate": 3.843057653136815e-06, "loss": 0.2986, "step": 12907 }, { "epoch": 1.8508746773731, "grad_norm": 0.2835681140422821, "learning_rate": 3.8422460663061815e-06, "loss": 0.2831, "step": 12908 }, { "epoch": 1.8510180671063952, "grad_norm": 0.3067052960395813, "learning_rate": 3.841434511705637e-06, "loss": 0.3216, "step": 12909 }, { "epoch": 1.8511614568396904, "grad_norm": 0.2819592356681824, "learning_rate": 3.840622989357774e-06, "loss": 0.2912, "step": 12910 }, { "epoch": 1.8513048465729853, "grad_norm": 0.2619689106941223, "learning_rate": 3.839811499285183e-06, "loss": 0.2874, "step": 12911 }, { "epoch": 1.8514482363062803, "grad_norm": 0.29930171370506287, "learning_rate": 3.839000041510455e-06, "loss": 0.3126, "step": 12912 }, { "epoch": 1.8515916260395757, "grad_norm": 0.2827581465244293, "learning_rate": 3.838188616056179e-06, "loss": 0.2951, "step": 12913 }, { "epoch": 1.8517350157728707, "grad_norm": 0.26021620631217957, "learning_rate": 3.8373772229449445e-06, "loss": 0.304, "step": 12914 }, { "epoch": 1.8518784055061657, "grad_norm": 0.2859839200973511, "learning_rate": 3.8365658621993414e-06, "loss": 0.3083, "step": 12915 }, { "epoch": 1.8520217952394609, "grad_norm": 0.3000446856021881, "learning_rate": 3.835754533841954e-06, "loss": 0.3134, "step": 12916 }, { "epoch": 1.852165184972756, "grad_norm": 0.2703387439250946, "learning_rate": 3.834943237895369e-06, "loss": 0.3164, "step": 12917 }, { "epoch": 1.852308574706051, "grad_norm": 0.25822529196739197, "learning_rate": 3.834131974382172e-06, "loss": 0.3068, "step": 12918 }, { "epoch": 1.852451964439346, "grad_norm": 0.27388477325439453, "learning_rate": 3.833320743324949e-06, "loss": 0.3068, "step": 12919 }, { "epoch": 1.8525953541726412, "grad_norm": 0.2663496732711792, "learning_rate": 3.832509544746283e-06, "loss": 0.2936, "step": 12920 }, { "epoch": 1.8527387439059364, "grad_norm": 0.26273632049560547, "learning_rate": 3.831698378668752e-06, "loss": 0.3183, "step": 12921 }, { "epoch": 1.8528821336392314, "grad_norm": 0.25260329246520996, "learning_rate": 3.830887245114942e-06, "loss": 0.2982, "step": 12922 }, { "epoch": 1.8530255233725266, "grad_norm": 0.26966792345046997, "learning_rate": 3.830076144107432e-06, "loss": 0.2908, "step": 12923 }, { "epoch": 1.8531689131058218, "grad_norm": 0.27934715151786804, "learning_rate": 3.829265075668803e-06, "loss": 0.3112, "step": 12924 }, { "epoch": 1.8533123028391167, "grad_norm": 0.28530871868133545, "learning_rate": 3.828454039821634e-06, "loss": 0.2826, "step": 12925 }, { "epoch": 1.8534556925724117, "grad_norm": 0.25372564792633057, "learning_rate": 3.827643036588503e-06, "loss": 0.2849, "step": 12926 }, { "epoch": 1.853599082305707, "grad_norm": 0.2683166265487671, "learning_rate": 3.826832065991985e-06, "loss": 0.3094, "step": 12927 }, { "epoch": 1.853742472039002, "grad_norm": 0.2542126774787903, "learning_rate": 3.826021128054658e-06, "loss": 0.2911, "step": 12928 }, { "epoch": 1.853885861772297, "grad_norm": 0.27853959798812866, "learning_rate": 3.825210222799096e-06, "loss": 0.3285, "step": 12929 }, { "epoch": 1.854029251505592, "grad_norm": 0.253963828086853, "learning_rate": 3.824399350247875e-06, "loss": 0.3125, "step": 12930 }, { "epoch": 1.8541726412388873, "grad_norm": 0.2803727388381958, "learning_rate": 3.823588510423568e-06, "loss": 0.3015, "step": 12931 }, { "epoch": 1.8543160309721824, "grad_norm": 0.28061890602111816, "learning_rate": 3.822777703348747e-06, "loss": 0.3237, "step": 12932 }, { "epoch": 1.8544594207054774, "grad_norm": 0.25732168555259705, "learning_rate": 3.8219669290459835e-06, "loss": 0.2765, "step": 12933 }, { "epoch": 1.8546028104387726, "grad_norm": 0.2875150740146637, "learning_rate": 3.8211561875378485e-06, "loss": 0.2739, "step": 12934 }, { "epoch": 1.8547462001720678, "grad_norm": 0.2560631334781647, "learning_rate": 3.820345478846913e-06, "loss": 0.3003, "step": 12935 }, { "epoch": 1.8548895899053628, "grad_norm": 0.26488274335861206, "learning_rate": 3.819534802995743e-06, "loss": 0.2943, "step": 12936 }, { "epoch": 1.8550329796386578, "grad_norm": 0.25817519426345825, "learning_rate": 3.818724160006912e-06, "loss": 0.2782, "step": 12937 }, { "epoch": 1.855176369371953, "grad_norm": 0.27445584535598755, "learning_rate": 3.81791354990298e-06, "loss": 0.3176, "step": 12938 }, { "epoch": 1.8553197591052482, "grad_norm": 0.2597414553165436, "learning_rate": 3.817102972706516e-06, "loss": 0.2962, "step": 12939 }, { "epoch": 1.8554631488385431, "grad_norm": 0.27018043398857117, "learning_rate": 3.8162924284400856e-06, "loss": 0.3014, "step": 12940 }, { "epoch": 1.855606538571838, "grad_norm": 0.2592445909976959, "learning_rate": 3.815481917126254e-06, "loss": 0.2997, "step": 12941 }, { "epoch": 1.8557499283051333, "grad_norm": 0.2947792410850525, "learning_rate": 3.8146714387875844e-06, "loss": 0.286, "step": 12942 }, { "epoch": 1.8558933180384285, "grad_norm": 0.25430482625961304, "learning_rate": 3.8138609934466374e-06, "loss": 0.3108, "step": 12943 }, { "epoch": 1.8560367077717235, "grad_norm": 0.27666768431663513, "learning_rate": 3.813050581125976e-06, "loss": 0.3037, "step": 12944 }, { "epoch": 1.8561800975050187, "grad_norm": 0.28192129731178284, "learning_rate": 3.81224020184816e-06, "loss": 0.2994, "step": 12945 }, { "epoch": 1.8563234872383139, "grad_norm": 0.26377058029174805, "learning_rate": 3.8114298556357504e-06, "loss": 0.2969, "step": 12946 }, { "epoch": 1.8564668769716088, "grad_norm": 0.2523510754108429, "learning_rate": 3.8106195425113067e-06, "loss": 0.3033, "step": 12947 }, { "epoch": 1.8566102667049038, "grad_norm": 0.27508270740509033, "learning_rate": 3.8098092624973835e-06, "loss": 0.2925, "step": 12948 }, { "epoch": 1.856753656438199, "grad_norm": 0.29041436314582825, "learning_rate": 3.80899901561654e-06, "loss": 0.2939, "step": 12949 }, { "epoch": 1.8568970461714942, "grad_norm": 0.2727782726287842, "learning_rate": 3.808188801891331e-06, "loss": 0.2974, "step": 12950 }, { "epoch": 1.8570404359047892, "grad_norm": 0.2781178951263428, "learning_rate": 3.807378621344313e-06, "loss": 0.3251, "step": 12951 }, { "epoch": 1.8571838256380842, "grad_norm": 0.28571537137031555, "learning_rate": 3.806568473998039e-06, "loss": 0.2994, "step": 12952 }, { "epoch": 1.8573272153713796, "grad_norm": 0.28245750069618225, "learning_rate": 3.8057583598750645e-06, "loss": 0.3006, "step": 12953 }, { "epoch": 1.8574706051046745, "grad_norm": 0.30131295323371887, "learning_rate": 3.8049482789979384e-06, "loss": 0.287, "step": 12954 }, { "epoch": 1.8576139948379695, "grad_norm": 0.2836069166660309, "learning_rate": 3.804138231389215e-06, "loss": 0.3044, "step": 12955 }, { "epoch": 1.8577573845712647, "grad_norm": 0.27455979585647583, "learning_rate": 3.8033282170714433e-06, "loss": 0.2935, "step": 12956 }, { "epoch": 1.85790077430456, "grad_norm": 0.2710711658000946, "learning_rate": 3.8025182360671724e-06, "loss": 0.2831, "step": 12957 }, { "epoch": 1.8580441640378549, "grad_norm": 0.28450947999954224, "learning_rate": 3.8017082883989533e-06, "loss": 0.3175, "step": 12958 }, { "epoch": 1.8581875537711499, "grad_norm": 0.27322420477867126, "learning_rate": 3.800898374089331e-06, "loss": 0.3012, "step": 12959 }, { "epoch": 1.858330943504445, "grad_norm": 0.2880735397338867, "learning_rate": 3.800088493160854e-06, "loss": 0.3026, "step": 12960 }, { "epoch": 1.8584743332377403, "grad_norm": 0.2479076087474823, "learning_rate": 3.7992786456360664e-06, "loss": 0.2993, "step": 12961 }, { "epoch": 1.8586177229710352, "grad_norm": 0.2801281213760376, "learning_rate": 3.798468831537515e-06, "loss": 0.3015, "step": 12962 }, { "epoch": 1.8587611127043304, "grad_norm": 0.2756538689136505, "learning_rate": 3.797659050887743e-06, "loss": 0.31, "step": 12963 }, { "epoch": 1.8589045024376256, "grad_norm": 0.2636508047580719, "learning_rate": 3.7968493037092957e-06, "loss": 0.3027, "step": 12964 }, { "epoch": 1.8590478921709206, "grad_norm": 0.26365724205970764, "learning_rate": 3.7960395900247105e-06, "loss": 0.2855, "step": 12965 }, { "epoch": 1.8591912819042156, "grad_norm": 0.2587732970714569, "learning_rate": 3.7952299098565316e-06, "loss": 0.2986, "step": 12966 }, { "epoch": 1.8593346716375108, "grad_norm": 0.28217166662216187, "learning_rate": 3.794420263227298e-06, "loss": 0.3045, "step": 12967 }, { "epoch": 1.859478061370806, "grad_norm": 0.2665632665157318, "learning_rate": 3.7936106501595504e-06, "loss": 0.2899, "step": 12968 }, { "epoch": 1.859621451104101, "grad_norm": 0.27149906754493713, "learning_rate": 3.7928010706758275e-06, "loss": 0.2945, "step": 12969 }, { "epoch": 1.859764840837396, "grad_norm": 0.28337299823760986, "learning_rate": 3.7919915247986644e-06, "loss": 0.3046, "step": 12970 }, { "epoch": 1.859908230570691, "grad_norm": 0.2817922830581665, "learning_rate": 3.7911820125505987e-06, "loss": 0.2911, "step": 12971 }, { "epoch": 1.8600516203039863, "grad_norm": 0.28299587965011597, "learning_rate": 3.790372533954167e-06, "loss": 0.2756, "step": 12972 }, { "epoch": 1.8601950100372813, "grad_norm": 0.26274874806404114, "learning_rate": 3.789563089031903e-06, "loss": 0.3004, "step": 12973 }, { "epoch": 1.8603383997705765, "grad_norm": 0.2605351209640503, "learning_rate": 3.788753677806343e-06, "loss": 0.3079, "step": 12974 }, { "epoch": 1.8604817895038717, "grad_norm": 0.2786867916584015, "learning_rate": 3.7879443003000144e-06, "loss": 0.3103, "step": 12975 }, { "epoch": 1.8606251792371666, "grad_norm": 0.2720785140991211, "learning_rate": 3.787134956535452e-06, "loss": 0.3139, "step": 12976 }, { "epoch": 1.8607685689704616, "grad_norm": 0.2760671079158783, "learning_rate": 3.7863256465351873e-06, "loss": 0.3134, "step": 12977 }, { "epoch": 1.8609119587037568, "grad_norm": 0.27238401770591736, "learning_rate": 3.7855163703217496e-06, "loss": 0.2918, "step": 12978 }, { "epoch": 1.861055348437052, "grad_norm": 0.2749530076980591, "learning_rate": 3.784707127917667e-06, "loss": 0.3102, "step": 12979 }, { "epoch": 1.861198738170347, "grad_norm": 0.284983366727829, "learning_rate": 3.7838979193454705e-06, "loss": 0.2948, "step": 12980 }, { "epoch": 1.861342127903642, "grad_norm": 0.27567335963249207, "learning_rate": 3.783088744627684e-06, "loss": 0.2992, "step": 12981 }, { "epoch": 1.8614855176369371, "grad_norm": 0.301189124584198, "learning_rate": 3.782279603786835e-06, "loss": 0.3046, "step": 12982 }, { "epoch": 1.8616289073702323, "grad_norm": 0.28096985816955566, "learning_rate": 3.7814704968454484e-06, "loss": 0.2957, "step": 12983 }, { "epoch": 1.8617722971035273, "grad_norm": 0.28010228276252747, "learning_rate": 3.780661423826049e-06, "loss": 0.3224, "step": 12984 }, { "epoch": 1.8619156868368225, "grad_norm": 0.2710060775279999, "learning_rate": 3.7798523847511616e-06, "loss": 0.3093, "step": 12985 }, { "epoch": 1.8620590765701177, "grad_norm": 0.2778065800666809, "learning_rate": 3.7790433796433042e-06, "loss": 0.2981, "step": 12986 }, { "epoch": 1.8622024663034127, "grad_norm": 0.2877054214477539, "learning_rate": 3.778234408525001e-06, "loss": 0.2939, "step": 12987 }, { "epoch": 1.8623458560367077, "grad_norm": 0.30088886618614197, "learning_rate": 3.7774254714187737e-06, "loss": 0.307, "step": 12988 }, { "epoch": 1.8624892457700029, "grad_norm": 0.2748807966709137, "learning_rate": 3.77661656834714e-06, "loss": 0.2949, "step": 12989 }, { "epoch": 1.862632635503298, "grad_norm": 0.25538334250450134, "learning_rate": 3.775807699332619e-06, "loss": 0.2758, "step": 12990 }, { "epoch": 1.862776025236593, "grad_norm": 0.28693103790283203, "learning_rate": 3.774998864397731e-06, "loss": 0.309, "step": 12991 }, { "epoch": 1.862919414969888, "grad_norm": 0.28155460953712463, "learning_rate": 3.7741900635649873e-06, "loss": 0.288, "step": 12992 }, { "epoch": 1.8630628047031834, "grad_norm": 0.26552486419677734, "learning_rate": 3.7733812968569073e-06, "loss": 0.3041, "step": 12993 }, { "epoch": 1.8632061944364784, "grad_norm": 0.2606198489665985, "learning_rate": 3.7725725642960047e-06, "loss": 0.3253, "step": 12994 }, { "epoch": 1.8633495841697734, "grad_norm": 0.29404041171073914, "learning_rate": 3.771763865904794e-06, "loss": 0.3171, "step": 12995 }, { "epoch": 1.8634929739030686, "grad_norm": 0.285684734582901, "learning_rate": 3.770955201705789e-06, "loss": 0.2917, "step": 12996 }, { "epoch": 1.8636363636363638, "grad_norm": 0.2685803472995758, "learning_rate": 3.770146571721499e-06, "loss": 0.3099, "step": 12997 }, { "epoch": 1.8637797533696587, "grad_norm": 0.2649354040622711, "learning_rate": 3.7693379759744376e-06, "loss": 0.3024, "step": 12998 }, { "epoch": 1.8639231431029537, "grad_norm": 0.26453277468681335, "learning_rate": 3.768529414487113e-06, "loss": 0.3109, "step": 12999 }, { "epoch": 1.864066532836249, "grad_norm": 0.26899605989456177, "learning_rate": 3.7677208872820353e-06, "loss": 0.296, "step": 13000 }, { "epoch": 1.864209922569544, "grad_norm": 0.2794007658958435, "learning_rate": 3.7669123943817147e-06, "loss": 0.2964, "step": 13001 }, { "epoch": 1.864353312302839, "grad_norm": 0.2770131826400757, "learning_rate": 3.7661039358086537e-06, "loss": 0.3102, "step": 13002 }, { "epoch": 1.8644967020361343, "grad_norm": 0.2912673354148865, "learning_rate": 3.7652955115853617e-06, "loss": 0.3064, "step": 13003 }, { "epoch": 1.8646400917694295, "grad_norm": 0.27482160925865173, "learning_rate": 3.764487121734343e-06, "loss": 0.3055, "step": 13004 }, { "epoch": 1.8647834815027244, "grad_norm": 0.27216407656669617, "learning_rate": 3.763678766278102e-06, "loss": 0.2969, "step": 13005 }, { "epoch": 1.8649268712360194, "grad_norm": 0.29601559042930603, "learning_rate": 3.762870445239143e-06, "loss": 0.3062, "step": 13006 }, { "epoch": 1.8650702609693146, "grad_norm": 0.27865883708000183, "learning_rate": 3.7620621586399686e-06, "loss": 0.3124, "step": 13007 }, { "epoch": 1.8652136507026098, "grad_norm": 0.2828002870082855, "learning_rate": 3.761253906503078e-06, "loss": 0.2934, "step": 13008 }, { "epoch": 1.8653570404359048, "grad_norm": 0.26076018810272217, "learning_rate": 3.760445688850973e-06, "loss": 0.2761, "step": 13009 }, { "epoch": 1.8655004301691998, "grad_norm": 0.2906424105167389, "learning_rate": 3.7596375057061535e-06, "loss": 0.3076, "step": 13010 }, { "epoch": 1.865643819902495, "grad_norm": 0.25083667039871216, "learning_rate": 3.758829357091118e-06, "loss": 0.2961, "step": 13011 }, { "epoch": 1.8657872096357901, "grad_norm": 0.2623943090438843, "learning_rate": 3.758021243028366e-06, "loss": 0.2969, "step": 13012 }, { "epoch": 1.8659305993690851, "grad_norm": 0.2701796591281891, "learning_rate": 3.75721316354039e-06, "loss": 0.3081, "step": 13013 }, { "epoch": 1.8660739891023803, "grad_norm": 0.2912135124206543, "learning_rate": 3.756405118649687e-06, "loss": 0.2921, "step": 13014 }, { "epoch": 1.8662173788356755, "grad_norm": 0.2756759226322174, "learning_rate": 3.755597108378752e-06, "loss": 0.2999, "step": 13015 }, { "epoch": 1.8663607685689705, "grad_norm": 0.2719334661960602, "learning_rate": 3.7547891327500803e-06, "loss": 0.2946, "step": 13016 }, { "epoch": 1.8665041583022655, "grad_norm": 0.2672266662120819, "learning_rate": 3.7539811917861635e-06, "loss": 0.3008, "step": 13017 }, { "epoch": 1.8666475480355607, "grad_norm": 0.2731872498989105, "learning_rate": 3.753173285509496e-06, "loss": 0.2875, "step": 13018 }, { "epoch": 1.8667909377688559, "grad_norm": 0.2652117908000946, "learning_rate": 3.7523654139425635e-06, "loss": 0.3086, "step": 13019 }, { "epoch": 1.8669343275021508, "grad_norm": 0.2693059742450714, "learning_rate": 3.7515575771078594e-06, "loss": 0.293, "step": 13020 }, { "epoch": 1.8670777172354458, "grad_norm": 0.2848558723926544, "learning_rate": 3.7507497750278714e-06, "loss": 0.3059, "step": 13021 }, { "epoch": 1.867221106968741, "grad_norm": 0.2893854081630707, "learning_rate": 3.749942007725088e-06, "loss": 0.2828, "step": 13022 }, { "epoch": 1.8673644967020362, "grad_norm": 0.25039544701576233, "learning_rate": 3.7491342752219978e-06, "loss": 0.308, "step": 13023 }, { "epoch": 1.8675078864353312, "grad_norm": 0.253326952457428, "learning_rate": 3.7483265775410837e-06, "loss": 0.3105, "step": 13024 }, { "epoch": 1.8676512761686264, "grad_norm": 0.2739054262638092, "learning_rate": 3.7475189147048317e-06, "loss": 0.3008, "step": 13025 }, { "epoch": 1.8677946659019216, "grad_norm": 0.2776861786842346, "learning_rate": 3.7467112867357274e-06, "loss": 0.3257, "step": 13026 }, { "epoch": 1.8679380556352165, "grad_norm": 0.2758742868900299, "learning_rate": 3.7459036936562525e-06, "loss": 0.2961, "step": 13027 }, { "epoch": 1.8680814453685115, "grad_norm": 0.25439321994781494, "learning_rate": 3.7450961354888913e-06, "loss": 0.3031, "step": 13028 }, { "epoch": 1.8682248351018067, "grad_norm": 0.24909096956253052, "learning_rate": 3.744288612256122e-06, "loss": 0.2925, "step": 13029 }, { "epoch": 1.868368224835102, "grad_norm": 0.28857871890068054, "learning_rate": 3.743481123980426e-06, "loss": 0.295, "step": 13030 }, { "epoch": 1.8685116145683969, "grad_norm": 0.2919715940952301, "learning_rate": 3.7426736706842825e-06, "loss": 0.2873, "step": 13031 }, { "epoch": 1.8686550043016918, "grad_norm": 0.25375616550445557, "learning_rate": 3.7418662523901693e-06, "loss": 0.2821, "step": 13032 }, { "epoch": 1.868798394034987, "grad_norm": 0.2822031080722809, "learning_rate": 3.7410588691205652e-06, "loss": 0.2865, "step": 13033 }, { "epoch": 1.8689417837682822, "grad_norm": 0.2806793451309204, "learning_rate": 3.7402515208979463e-06, "loss": 0.291, "step": 13034 }, { "epoch": 1.8690851735015772, "grad_norm": 0.27544835209846497, "learning_rate": 3.7394442077447856e-06, "loss": 0.3152, "step": 13035 }, { "epoch": 1.8692285632348724, "grad_norm": 0.29204630851745605, "learning_rate": 3.738636929683559e-06, "loss": 0.2959, "step": 13036 }, { "epoch": 1.8693719529681676, "grad_norm": 0.2733650803565979, "learning_rate": 3.7378296867367403e-06, "loss": 0.2974, "step": 13037 }, { "epoch": 1.8695153427014626, "grad_norm": 0.26013410091400146, "learning_rate": 3.737022478926802e-06, "loss": 0.3117, "step": 13038 }, { "epoch": 1.8696587324347576, "grad_norm": 0.26212450861930847, "learning_rate": 3.7362153062762163e-06, "loss": 0.2839, "step": 13039 }, { "epoch": 1.8698021221680527, "grad_norm": 0.30527132749557495, "learning_rate": 3.7354081688074513e-06, "loss": 0.3141, "step": 13040 }, { "epoch": 1.869945511901348, "grad_norm": 0.2840614318847656, "learning_rate": 3.7346010665429765e-06, "loss": 0.295, "step": 13041 }, { "epoch": 1.870088901634643, "grad_norm": 0.2875860035419464, "learning_rate": 3.7337939995052624e-06, "loss": 0.3149, "step": 13042 }, { "epoch": 1.870232291367938, "grad_norm": 0.2660868465900421, "learning_rate": 3.732986967716774e-06, "loss": 0.3002, "step": 13043 }, { "epoch": 1.8703756811012333, "grad_norm": 0.2637498080730438, "learning_rate": 3.7321799711999806e-06, "loss": 0.2872, "step": 13044 }, { "epoch": 1.8705190708345283, "grad_norm": 0.27056288719177246, "learning_rate": 3.7313730099773488e-06, "loss": 0.3088, "step": 13045 }, { "epoch": 1.8706624605678233, "grad_norm": 0.2559163570404053, "learning_rate": 3.7305660840713385e-06, "loss": 0.3132, "step": 13046 }, { "epoch": 1.8708058503011185, "grad_norm": 0.28467243909835815, "learning_rate": 3.7297591935044153e-06, "loss": 0.3069, "step": 13047 }, { "epoch": 1.8709492400344137, "grad_norm": 0.27178868651390076, "learning_rate": 3.7289523382990428e-06, "loss": 0.2744, "step": 13048 }, { "epoch": 1.8710926297677086, "grad_norm": 0.28817862272262573, "learning_rate": 3.7281455184776818e-06, "loss": 0.2967, "step": 13049 }, { "epoch": 1.8712360195010036, "grad_norm": 0.27677667140960693, "learning_rate": 3.7273387340627943e-06, "loss": 0.2995, "step": 13050 }, { "epoch": 1.8713794092342988, "grad_norm": 0.2844569981098175, "learning_rate": 3.726531985076837e-06, "loss": 0.2912, "step": 13051 }, { "epoch": 1.871522798967594, "grad_norm": 0.27344176173210144, "learning_rate": 3.725725271542271e-06, "loss": 0.3199, "step": 13052 }, { "epoch": 1.871666188700889, "grad_norm": 0.2733686566352844, "learning_rate": 3.7249185934815527e-06, "loss": 0.302, "step": 13053 }, { "epoch": 1.8718095784341842, "grad_norm": 0.28186339139938354, "learning_rate": 3.7241119509171397e-06, "loss": 0.3034, "step": 13054 }, { "epoch": 1.8719529681674794, "grad_norm": 0.266136109828949, "learning_rate": 3.7233053438714893e-06, "loss": 0.2978, "step": 13055 }, { "epoch": 1.8720963579007743, "grad_norm": 0.28214624524116516, "learning_rate": 3.7224987723670525e-06, "loss": 0.3109, "step": 13056 }, { "epoch": 1.8722397476340693, "grad_norm": 0.265906423330307, "learning_rate": 3.7216922364262842e-06, "loss": 0.3139, "step": 13057 }, { "epoch": 1.8723831373673645, "grad_norm": 0.29671990871429443, "learning_rate": 3.7208857360716376e-06, "loss": 0.3189, "step": 13058 }, { "epoch": 1.8725265271006597, "grad_norm": 0.2728126645088196, "learning_rate": 3.7200792713255644e-06, "loss": 0.3122, "step": 13059 }, { "epoch": 1.8726699168339547, "grad_norm": 0.27777424454689026, "learning_rate": 3.7192728422105157e-06, "loss": 0.3027, "step": 13060 }, { "epoch": 1.8728133065672496, "grad_norm": 0.2737504839897156, "learning_rate": 3.7184664487489418e-06, "loss": 0.3137, "step": 13061 }, { "epoch": 1.8729566963005448, "grad_norm": 0.2677340805530548, "learning_rate": 3.7176600909632898e-06, "loss": 0.2945, "step": 13062 }, { "epoch": 1.87310008603384, "grad_norm": 0.26097843050956726, "learning_rate": 3.716853768876008e-06, "loss": 0.304, "step": 13063 }, { "epoch": 1.873243475767135, "grad_norm": 0.29971635341644287, "learning_rate": 3.716047482509544e-06, "loss": 0.3199, "step": 13064 }, { "epoch": 1.8733868655004302, "grad_norm": 0.287982314825058, "learning_rate": 3.715241231886342e-06, "loss": 0.3074, "step": 13065 }, { "epoch": 1.8735302552337254, "grad_norm": 0.2763839662075043, "learning_rate": 3.7144350170288506e-06, "loss": 0.3085, "step": 13066 }, { "epoch": 1.8736736449670204, "grad_norm": 0.28792694211006165, "learning_rate": 3.7136288379595086e-06, "loss": 0.3081, "step": 13067 }, { "epoch": 1.8738170347003154, "grad_norm": 0.29837682843208313, "learning_rate": 3.7128226947007607e-06, "loss": 0.3016, "step": 13068 }, { "epoch": 1.8739604244336105, "grad_norm": 0.28382882475852966, "learning_rate": 3.7120165872750493e-06, "loss": 0.2952, "step": 13069 }, { "epoch": 1.8741038141669057, "grad_norm": 0.2837507128715515, "learning_rate": 3.711210515704814e-06, "loss": 0.3283, "step": 13070 }, { "epoch": 1.8742472039002007, "grad_norm": 0.2562448978424072, "learning_rate": 3.710404480012495e-06, "loss": 0.2955, "step": 13071 }, { "epoch": 1.8743905936334957, "grad_norm": 0.28284409642219543, "learning_rate": 3.7095984802205348e-06, "loss": 0.3105, "step": 13072 }, { "epoch": 1.874533983366791, "grad_norm": 0.2709997892379761, "learning_rate": 3.7087925163513657e-06, "loss": 0.3109, "step": 13073 }, { "epoch": 1.874677373100086, "grad_norm": 0.28317928314208984, "learning_rate": 3.7079865884274263e-06, "loss": 0.3126, "step": 13074 }, { "epoch": 1.874820762833381, "grad_norm": 0.2543564438819885, "learning_rate": 3.7071806964711533e-06, "loss": 0.2841, "step": 13075 }, { "epoch": 1.8749641525666763, "grad_norm": 0.26664406061172485, "learning_rate": 3.7063748405049803e-06, "loss": 0.2898, "step": 13076 }, { "epoch": 1.8751075422999715, "grad_norm": 0.2903241813182831, "learning_rate": 3.7055690205513435e-06, "loss": 0.3157, "step": 13077 }, { "epoch": 1.8752509320332664, "grad_norm": 0.29198190569877625, "learning_rate": 3.7047632366326723e-06, "loss": 0.3102, "step": 13078 }, { "epoch": 1.8753943217665614, "grad_norm": 0.24606363475322723, "learning_rate": 3.703957488771401e-06, "loss": 0.3087, "step": 13079 }, { "epoch": 1.8755377114998566, "grad_norm": 0.26522335410118103, "learning_rate": 3.7031517769899587e-06, "loss": 0.32, "step": 13080 }, { "epoch": 1.8756811012331518, "grad_norm": 0.28885841369628906, "learning_rate": 3.7023461013107763e-06, "loss": 0.2911, "step": 13081 }, { "epoch": 1.8758244909664468, "grad_norm": 0.2745026648044586, "learning_rate": 3.7015404617562846e-06, "loss": 0.2996, "step": 13082 }, { "epoch": 1.8759678806997417, "grad_norm": 0.27319011092185974, "learning_rate": 3.700734858348907e-06, "loss": 0.3085, "step": 13083 }, { "epoch": 1.8761112704330372, "grad_norm": 0.27251023054122925, "learning_rate": 3.699929291111072e-06, "loss": 0.3317, "step": 13084 }, { "epoch": 1.8762546601663321, "grad_norm": 0.2711823582649231, "learning_rate": 3.699123760065205e-06, "loss": 0.3145, "step": 13085 }, { "epoch": 1.876398049899627, "grad_norm": 0.2896854281425476, "learning_rate": 3.6983182652337323e-06, "loss": 0.2805, "step": 13086 }, { "epoch": 1.8765414396329223, "grad_norm": 0.279979407787323, "learning_rate": 3.697512806639077e-06, "loss": 0.3119, "step": 13087 }, { "epoch": 1.8766848293662175, "grad_norm": 0.2652229368686676, "learning_rate": 3.6967073843036615e-06, "loss": 0.3023, "step": 13088 }, { "epoch": 1.8768282190995125, "grad_norm": 0.29138436913490295, "learning_rate": 3.6959019982499076e-06, "loss": 0.3094, "step": 13089 }, { "epoch": 1.8769716088328074, "grad_norm": 0.28111475706100464, "learning_rate": 3.695096648500236e-06, "loss": 0.2969, "step": 13090 }, { "epoch": 1.8771149985661026, "grad_norm": 0.2689124643802643, "learning_rate": 3.694291335077066e-06, "loss": 0.2943, "step": 13091 }, { "epoch": 1.8772583882993978, "grad_norm": 0.25189706683158875, "learning_rate": 3.6934860580028163e-06, "loss": 0.3057, "step": 13092 }, { "epoch": 1.8774017780326928, "grad_norm": 0.2643131911754608, "learning_rate": 3.6926808172999073e-06, "loss": 0.3275, "step": 13093 }, { "epoch": 1.877545167765988, "grad_norm": 0.26657623052597046, "learning_rate": 3.6918756129907514e-06, "loss": 0.2798, "step": 13094 }, { "epoch": 1.8776885574992832, "grad_norm": 0.2863411009311676, "learning_rate": 3.691070445097766e-06, "loss": 0.3182, "step": 13095 }, { "epoch": 1.8778319472325782, "grad_norm": 0.28665000200271606, "learning_rate": 3.690265313643366e-06, "loss": 0.3116, "step": 13096 }, { "epoch": 1.8779753369658732, "grad_norm": 0.2734296917915344, "learning_rate": 3.689460218649965e-06, "loss": 0.3183, "step": 13097 }, { "epoch": 1.8781187266991684, "grad_norm": 0.2668720781803131, "learning_rate": 3.6886551601399755e-06, "loss": 0.2995, "step": 13098 }, { "epoch": 1.8782621164324635, "grad_norm": 0.264621764421463, "learning_rate": 3.68785013813581e-06, "loss": 0.289, "step": 13099 }, { "epoch": 1.8784055061657585, "grad_norm": 0.2724704146385193, "learning_rate": 3.687045152659877e-06, "loss": 0.3093, "step": 13100 }, { "epoch": 1.8785488958990535, "grad_norm": 0.2891067862510681, "learning_rate": 3.6862402037345874e-06, "loss": 0.304, "step": 13101 }, { "epoch": 1.8786922856323487, "grad_norm": 0.2642137408256531, "learning_rate": 3.68543529138235e-06, "loss": 0.3072, "step": 13102 }, { "epoch": 1.8788356753656439, "grad_norm": 0.27063873410224915, "learning_rate": 3.684630415625571e-06, "loss": 0.3094, "step": 13103 }, { "epoch": 1.8789790650989389, "grad_norm": 0.2757955491542816, "learning_rate": 3.683825576486659e-06, "loss": 0.3137, "step": 13104 }, { "epoch": 1.879122454832234, "grad_norm": 0.28715571761131287, "learning_rate": 3.6830207739880186e-06, "loss": 0.3062, "step": 13105 }, { "epoch": 1.8792658445655293, "grad_norm": 0.2757236361503601, "learning_rate": 3.682216008152053e-06, "loss": 0.2979, "step": 13106 }, { "epoch": 1.8794092342988242, "grad_norm": 0.2596120834350586, "learning_rate": 3.6814112790011663e-06, "loss": 0.2845, "step": 13107 }, { "epoch": 1.8795526240321192, "grad_norm": 0.2618503272533417, "learning_rate": 3.680606586557761e-06, "loss": 0.3077, "step": 13108 }, { "epoch": 1.8796960137654144, "grad_norm": 0.2666599154472351, "learning_rate": 3.679801930844242e-06, "loss": 0.2878, "step": 13109 }, { "epoch": 1.8798394034987096, "grad_norm": 0.27996185421943665, "learning_rate": 3.6789973118830034e-06, "loss": 0.3042, "step": 13110 }, { "epoch": 1.8799827932320046, "grad_norm": 0.2774239480495453, "learning_rate": 3.6781927296964483e-06, "loss": 0.2982, "step": 13111 }, { "epoch": 1.8801261829652995, "grad_norm": 0.2858460247516632, "learning_rate": 3.6773881843069737e-06, "loss": 0.2901, "step": 13112 }, { "epoch": 1.8802695726985947, "grad_norm": 0.27968335151672363, "learning_rate": 3.6765836757369774e-06, "loss": 0.3092, "step": 13113 }, { "epoch": 1.88041296243189, "grad_norm": 0.2765089273452759, "learning_rate": 3.6757792040088557e-06, "loss": 0.3001, "step": 13114 }, { "epoch": 1.880556352165185, "grad_norm": 0.26513731479644775, "learning_rate": 3.6749747691450055e-06, "loss": 0.3092, "step": 13115 }, { "epoch": 1.88069974189848, "grad_norm": 0.27239543199539185, "learning_rate": 3.6741703711678177e-06, "loss": 0.2992, "step": 13116 }, { "epoch": 1.8808431316317753, "grad_norm": 0.28316712379455566, "learning_rate": 3.673366010099688e-06, "loss": 0.3005, "step": 13117 }, { "epoch": 1.8809865213650703, "grad_norm": 0.26446133852005005, "learning_rate": 3.6725616859630075e-06, "loss": 0.2911, "step": 13118 }, { "epoch": 1.8811299110983652, "grad_norm": 0.27068227529525757, "learning_rate": 3.671757398780168e-06, "loss": 0.2936, "step": 13119 }, { "epoch": 1.8812733008316604, "grad_norm": 0.29932114481925964, "learning_rate": 3.6709531485735607e-06, "loss": 0.3131, "step": 13120 }, { "epoch": 1.8814166905649556, "grad_norm": 0.2704319357872009, "learning_rate": 3.6701489353655718e-06, "loss": 0.2815, "step": 13121 }, { "epoch": 1.8815600802982506, "grad_norm": 0.2732883393764496, "learning_rate": 3.66934475917859e-06, "loss": 0.3018, "step": 13122 }, { "epoch": 1.8817034700315456, "grad_norm": 0.27554959058761597, "learning_rate": 3.668540620035004e-06, "loss": 0.3296, "step": 13123 }, { "epoch": 1.8818468597648408, "grad_norm": 0.2808811068534851, "learning_rate": 3.6677365179571988e-06, "loss": 0.2956, "step": 13124 }, { "epoch": 1.881990249498136, "grad_norm": 0.2806027829647064, "learning_rate": 3.6669324529675598e-06, "loss": 0.2984, "step": 13125 }, { "epoch": 1.882133639231431, "grad_norm": 0.295544296503067, "learning_rate": 3.6661284250884713e-06, "loss": 0.3002, "step": 13126 }, { "epoch": 1.8822770289647262, "grad_norm": 0.2802657186985016, "learning_rate": 3.665324434342315e-06, "loss": 0.2977, "step": 13127 }, { "epoch": 1.8824204186980213, "grad_norm": 0.2901213467121124, "learning_rate": 3.664520480751473e-06, "loss": 0.2977, "step": 13128 }, { "epoch": 1.8825638084313163, "grad_norm": 0.29463979601860046, "learning_rate": 3.6637165643383267e-06, "loss": 0.3026, "step": 13129 }, { "epoch": 1.8827071981646113, "grad_norm": 0.26090317964553833, "learning_rate": 3.662912685125255e-06, "loss": 0.2812, "step": 13130 }, { "epoch": 1.8828505878979065, "grad_norm": 0.2753140926361084, "learning_rate": 3.6621088431346398e-06, "loss": 0.3017, "step": 13131 }, { "epoch": 1.8829939776312017, "grad_norm": 0.2805033326148987, "learning_rate": 3.6613050383888536e-06, "loss": 0.3054, "step": 13132 }, { "epoch": 1.8831373673644967, "grad_norm": 0.2926293909549713, "learning_rate": 3.660501270910276e-06, "loss": 0.3034, "step": 13133 }, { "epoch": 1.8832807570977916, "grad_norm": 0.2785421311855316, "learning_rate": 3.659697540721283e-06, "loss": 0.3034, "step": 13134 }, { "epoch": 1.883424146831087, "grad_norm": 0.25996989011764526, "learning_rate": 3.6588938478442492e-06, "loss": 0.2823, "step": 13135 }, { "epoch": 1.883567536564382, "grad_norm": 0.25990116596221924, "learning_rate": 3.658090192301549e-06, "loss": 0.3019, "step": 13136 }, { "epoch": 1.883710926297677, "grad_norm": 0.27218931913375854, "learning_rate": 3.6572865741155517e-06, "loss": 0.2977, "step": 13137 }, { "epoch": 1.8838543160309722, "grad_norm": 0.2968498766422272, "learning_rate": 3.656482993308631e-06, "loss": 0.3136, "step": 13138 }, { "epoch": 1.8839977057642674, "grad_norm": 0.27401018142700195, "learning_rate": 3.655679449903157e-06, "loss": 0.2968, "step": 13139 }, { "epoch": 1.8841410954975624, "grad_norm": 0.296407014131546, "learning_rate": 3.6548759439214986e-06, "loss": 0.2802, "step": 13140 }, { "epoch": 1.8842844852308573, "grad_norm": 0.27519533038139343, "learning_rate": 3.654072475386025e-06, "loss": 0.2937, "step": 13141 }, { "epoch": 1.8844278749641525, "grad_norm": 0.28828752040863037, "learning_rate": 3.653269044319104e-06, "loss": 0.2829, "step": 13142 }, { "epoch": 1.8845712646974477, "grad_norm": 0.2677789032459259, "learning_rate": 3.652465650743101e-06, "loss": 0.3028, "step": 13143 }, { "epoch": 1.8847146544307427, "grad_norm": 0.2701530158519745, "learning_rate": 3.651662294680381e-06, "loss": 0.2971, "step": 13144 }, { "epoch": 1.884858044164038, "grad_norm": 0.2701534032821655, "learning_rate": 3.6508589761533086e-06, "loss": 0.2946, "step": 13145 }, { "epoch": 1.885001433897333, "grad_norm": 0.26183655858039856, "learning_rate": 3.6500556951842465e-06, "loss": 0.3046, "step": 13146 }, { "epoch": 1.885144823630628, "grad_norm": 0.2881532907485962, "learning_rate": 3.6492524517955595e-06, "loss": 0.3033, "step": 13147 }, { "epoch": 1.885288213363923, "grad_norm": 0.28259775042533875, "learning_rate": 3.6484492460096043e-06, "loss": 0.2998, "step": 13148 }, { "epoch": 1.8854316030972182, "grad_norm": 0.2708013653755188, "learning_rate": 3.6476460778487433e-06, "loss": 0.296, "step": 13149 }, { "epoch": 1.8855749928305134, "grad_norm": 0.2825169563293457, "learning_rate": 3.6468429473353353e-06, "loss": 0.2764, "step": 13150 }, { "epoch": 1.8857183825638084, "grad_norm": 0.29787901043891907, "learning_rate": 3.6460398544917376e-06, "loss": 0.2914, "step": 13151 }, { "epoch": 1.8858617722971034, "grad_norm": 0.3022303581237793, "learning_rate": 3.6452367993403082e-06, "loss": 0.3116, "step": 13152 }, { "epoch": 1.8860051620303986, "grad_norm": 0.2853562533855438, "learning_rate": 3.6444337819034024e-06, "loss": 0.2953, "step": 13153 }, { "epoch": 1.8861485517636938, "grad_norm": 0.2722521424293518, "learning_rate": 3.643630802203375e-06, "loss": 0.2838, "step": 13154 }, { "epoch": 1.8862919414969888, "grad_norm": 0.27419596910476685, "learning_rate": 3.642827860262579e-06, "loss": 0.3131, "step": 13155 }, { "epoch": 1.886435331230284, "grad_norm": 0.27559787034988403, "learning_rate": 3.642024956103367e-06, "loss": 0.2949, "step": 13156 }, { "epoch": 1.8865787209635791, "grad_norm": 0.2699989378452301, "learning_rate": 3.6412220897480922e-06, "loss": 0.2923, "step": 13157 }, { "epoch": 1.8867221106968741, "grad_norm": 0.2796139419078827, "learning_rate": 3.640419261219106e-06, "loss": 0.3107, "step": 13158 }, { "epoch": 1.886865500430169, "grad_norm": 0.3052347004413605, "learning_rate": 3.6396164705387537e-06, "loss": 0.3009, "step": 13159 }, { "epoch": 1.8870088901634643, "grad_norm": 0.28189778327941895, "learning_rate": 3.638813717729386e-06, "loss": 0.3068, "step": 13160 }, { "epoch": 1.8871522798967595, "grad_norm": 0.2742818593978882, "learning_rate": 3.6380110028133507e-06, "loss": 0.3132, "step": 13161 }, { "epoch": 1.8872956696300545, "grad_norm": 0.2883039712905884, "learning_rate": 3.637208325812994e-06, "loss": 0.3108, "step": 13162 }, { "epoch": 1.8874390593633494, "grad_norm": 0.2574666440486908, "learning_rate": 3.636405686750662e-06, "loss": 0.2933, "step": 13163 }, { "epoch": 1.8875824490966446, "grad_norm": 0.27174508571624756, "learning_rate": 3.6356030856486994e-06, "loss": 0.2846, "step": 13164 }, { "epoch": 1.8877258388299398, "grad_norm": 0.2678707540035248, "learning_rate": 3.6348005225294463e-06, "loss": 0.3021, "step": 13165 }, { "epoch": 1.8878692285632348, "grad_norm": 0.2728806138038635, "learning_rate": 3.633997997415246e-06, "loss": 0.3157, "step": 13166 }, { "epoch": 1.88801261829653, "grad_norm": 0.2762751281261444, "learning_rate": 3.633195510328441e-06, "loss": 0.2871, "step": 13167 }, { "epoch": 1.8881560080298252, "grad_norm": 0.2948501706123352, "learning_rate": 3.63239306129137e-06, "loss": 0.3071, "step": 13168 }, { "epoch": 1.8882993977631202, "grad_norm": 0.28433963656425476, "learning_rate": 3.6315906503263735e-06, "loss": 0.3006, "step": 13169 }, { "epoch": 1.8884427874964151, "grad_norm": 0.2846991717815399, "learning_rate": 3.6307882774557873e-06, "loss": 0.3074, "step": 13170 }, { "epoch": 1.8885861772297103, "grad_norm": 0.26850607991218567, "learning_rate": 3.629985942701949e-06, "loss": 0.2828, "step": 13171 }, { "epoch": 1.8887295669630055, "grad_norm": 0.29057568311691284, "learning_rate": 3.629183646087195e-06, "loss": 0.3064, "step": 13172 }, { "epoch": 1.8888729566963005, "grad_norm": 0.26449573040008545, "learning_rate": 3.628381387633859e-06, "loss": 0.2944, "step": 13173 }, { "epoch": 1.8890163464295955, "grad_norm": 0.2766660153865814, "learning_rate": 3.627579167364278e-06, "loss": 0.3082, "step": 13174 }, { "epoch": 1.889159736162891, "grad_norm": 0.27638503909111023, "learning_rate": 3.6267769853007793e-06, "loss": 0.2788, "step": 13175 }, { "epoch": 1.8893031258961859, "grad_norm": 0.2820926606655121, "learning_rate": 3.625974841465697e-06, "loss": 0.2995, "step": 13176 }, { "epoch": 1.8894465156294808, "grad_norm": 0.25454652309417725, "learning_rate": 3.6251727358813614e-06, "loss": 0.288, "step": 13177 }, { "epoch": 1.889589905362776, "grad_norm": 0.27702102065086365, "learning_rate": 3.6243706685701018e-06, "loss": 0.2887, "step": 13178 }, { "epoch": 1.8897332950960712, "grad_norm": 0.2574651837348938, "learning_rate": 3.623568639554247e-06, "loss": 0.2906, "step": 13179 }, { "epoch": 1.8898766848293662, "grad_norm": 0.26576361060142517, "learning_rate": 3.6227666488561246e-06, "loss": 0.317, "step": 13180 }, { "epoch": 1.8900200745626612, "grad_norm": 0.26499107480049133, "learning_rate": 3.6219646964980592e-06, "loss": 0.2941, "step": 13181 }, { "epoch": 1.8901634642959564, "grad_norm": 0.28938552737236023, "learning_rate": 3.621162782502377e-06, "loss": 0.304, "step": 13182 }, { "epoch": 1.8903068540292516, "grad_norm": 0.2596987783908844, "learning_rate": 3.6203609068914018e-06, "loss": 0.3038, "step": 13183 }, { "epoch": 1.8904502437625466, "grad_norm": 0.26435279846191406, "learning_rate": 3.619559069687457e-06, "loss": 0.2959, "step": 13184 }, { "epoch": 1.8905936334958418, "grad_norm": 0.2714073657989502, "learning_rate": 3.618757270912866e-06, "loss": 0.3098, "step": 13185 }, { "epoch": 1.890737023229137, "grad_norm": 0.27982911467552185, "learning_rate": 3.6179555105899454e-06, "loss": 0.3036, "step": 13186 }, { "epoch": 1.890880412962432, "grad_norm": 0.2808435261249542, "learning_rate": 3.617153788741018e-06, "loss": 0.2986, "step": 13187 }, { "epoch": 1.891023802695727, "grad_norm": 0.28848302364349365, "learning_rate": 3.6163521053883998e-06, "loss": 0.3181, "step": 13188 }, { "epoch": 1.891167192429022, "grad_norm": 0.25988292694091797, "learning_rate": 3.6155504605544127e-06, "loss": 0.2804, "step": 13189 }, { "epoch": 1.8913105821623173, "grad_norm": 0.26925960183143616, "learning_rate": 3.6147488542613705e-06, "loss": 0.2967, "step": 13190 }, { "epoch": 1.8914539718956123, "grad_norm": 0.2772354483604431, "learning_rate": 3.6139472865315916e-06, "loss": 0.3265, "step": 13191 }, { "epoch": 1.8915973616289072, "grad_norm": 0.2675155699253082, "learning_rate": 3.6131457573873863e-06, "loss": 0.3097, "step": 13192 }, { "epoch": 1.8917407513622024, "grad_norm": 0.2758421301841736, "learning_rate": 3.6123442668510695e-06, "loss": 0.2839, "step": 13193 }, { "epoch": 1.8918841410954976, "grad_norm": 0.29602575302124023, "learning_rate": 3.611542814944954e-06, "loss": 0.2942, "step": 13194 }, { "epoch": 1.8920275308287926, "grad_norm": 0.2808050215244293, "learning_rate": 3.61074140169135e-06, "loss": 0.3015, "step": 13195 }, { "epoch": 1.8921709205620878, "grad_norm": 0.24985988438129425, "learning_rate": 3.6099400271125694e-06, "loss": 0.2856, "step": 13196 }, { "epoch": 1.892314310295383, "grad_norm": 0.27164846658706665, "learning_rate": 3.6091386912309196e-06, "loss": 0.306, "step": 13197 }, { "epoch": 1.892457700028678, "grad_norm": 0.2685914933681488, "learning_rate": 3.608337394068709e-06, "loss": 0.3018, "step": 13198 }, { "epoch": 1.892601089761973, "grad_norm": 0.30672723054885864, "learning_rate": 3.607536135648244e-06, "loss": 0.3107, "step": 13199 }, { "epoch": 1.8927444794952681, "grad_norm": 0.2769002318382263, "learning_rate": 3.606734915991832e-06, "loss": 0.3113, "step": 13200 }, { "epoch": 1.8928878692285633, "grad_norm": 0.2727645933628082, "learning_rate": 3.6059337351217777e-06, "loss": 0.3184, "step": 13201 }, { "epoch": 1.8930312589618583, "grad_norm": 0.27743589878082275, "learning_rate": 3.605132593060382e-06, "loss": 0.305, "step": 13202 }, { "epoch": 1.8931746486951533, "grad_norm": 0.2672850489616394, "learning_rate": 3.6043314898299497e-06, "loss": 0.2896, "step": 13203 }, { "epoch": 1.8933180384284485, "grad_norm": 0.301527738571167, "learning_rate": 3.6035304254527815e-06, "loss": 0.312, "step": 13204 }, { "epoch": 1.8934614281617437, "grad_norm": 0.2958478331565857, "learning_rate": 3.602729399951178e-06, "loss": 0.3261, "step": 13205 }, { "epoch": 1.8936048178950387, "grad_norm": 0.26636871695518494, "learning_rate": 3.6019284133474388e-06, "loss": 0.3003, "step": 13206 }, { "epoch": 1.8937482076283338, "grad_norm": 0.29966315627098083, "learning_rate": 3.601127465663862e-06, "loss": 0.3023, "step": 13207 }, { "epoch": 1.893891597361629, "grad_norm": 0.26875534653663635, "learning_rate": 3.600326556922745e-06, "loss": 0.2982, "step": 13208 }, { "epoch": 1.894034987094924, "grad_norm": 0.2757962644100189, "learning_rate": 3.5995256871463822e-06, "loss": 0.2907, "step": 13209 }, { "epoch": 1.894178376828219, "grad_norm": 0.29556119441986084, "learning_rate": 3.598724856357071e-06, "loss": 0.3085, "step": 13210 }, { "epoch": 1.8943217665615142, "grad_norm": 0.26527684926986694, "learning_rate": 3.5979240645771042e-06, "loss": 0.3145, "step": 13211 }, { "epoch": 1.8944651562948094, "grad_norm": 0.283198744058609, "learning_rate": 3.5971233118287762e-06, "loss": 0.2839, "step": 13212 }, { "epoch": 1.8946085460281044, "grad_norm": 0.27301061153411865, "learning_rate": 3.5963225981343752e-06, "loss": 0.32, "step": 13213 }, { "epoch": 1.8947519357613993, "grad_norm": 0.2648354172706604, "learning_rate": 3.5955219235161943e-06, "loss": 0.3026, "step": 13214 }, { "epoch": 1.8948953254946945, "grad_norm": 0.2787317633628845, "learning_rate": 3.5947212879965223e-06, "loss": 0.2949, "step": 13215 }, { "epoch": 1.8950387152279897, "grad_norm": 0.28401777148246765, "learning_rate": 3.5939206915976468e-06, "loss": 0.3129, "step": 13216 }, { "epoch": 1.8951821049612847, "grad_norm": 0.2947387397289276, "learning_rate": 3.593120134341857e-06, "loss": 0.2968, "step": 13217 }, { "epoch": 1.89532549469458, "grad_norm": 0.29083603620529175, "learning_rate": 3.5923196162514408e-06, "loss": 0.3191, "step": 13218 }, { "epoch": 1.895468884427875, "grad_norm": 0.27646514773368835, "learning_rate": 3.591519137348679e-06, "loss": 0.2817, "step": 13219 }, { "epoch": 1.89561227416117, "grad_norm": 0.2611326277256012, "learning_rate": 3.590718697655858e-06, "loss": 0.2833, "step": 13220 }, { "epoch": 1.895755663894465, "grad_norm": 0.2867914140224457, "learning_rate": 3.5899182971952605e-06, "loss": 0.2912, "step": 13221 }, { "epoch": 1.8958990536277602, "grad_norm": 0.30960094928741455, "learning_rate": 3.589117935989168e-06, "loss": 0.3048, "step": 13222 }, { "epoch": 1.8960424433610554, "grad_norm": 0.25860166549682617, "learning_rate": 3.5883176140598634e-06, "loss": 0.2883, "step": 13223 }, { "epoch": 1.8961858330943504, "grad_norm": 0.3005225956439972, "learning_rate": 3.5875173314296243e-06, "loss": 0.3156, "step": 13224 }, { "epoch": 1.8963292228276454, "grad_norm": 0.27347806096076965, "learning_rate": 3.5867170881207293e-06, "loss": 0.3039, "step": 13225 }, { "epoch": 1.8964726125609408, "grad_norm": 0.2919362783432007, "learning_rate": 3.585916884155456e-06, "loss": 0.3156, "step": 13226 }, { "epoch": 1.8966160022942358, "grad_norm": 0.29607897996902466, "learning_rate": 3.585116719556082e-06, "loss": 0.2893, "step": 13227 }, { "epoch": 1.8967593920275307, "grad_norm": 0.26791104674339294, "learning_rate": 3.584316594344884e-06, "loss": 0.2955, "step": 13228 }, { "epoch": 1.896902781760826, "grad_norm": 0.2783357501029968, "learning_rate": 3.583516508544132e-06, "loss": 0.2992, "step": 13229 }, { "epoch": 1.8970461714941211, "grad_norm": 0.27872100472450256, "learning_rate": 3.5827164621761018e-06, "loss": 0.2918, "step": 13230 }, { "epoch": 1.897189561227416, "grad_norm": 0.275392085313797, "learning_rate": 3.5819164552630648e-06, "loss": 0.3154, "step": 13231 }, { "epoch": 1.897332950960711, "grad_norm": 0.28583621978759766, "learning_rate": 3.5811164878272925e-06, "loss": 0.3183, "step": 13232 }, { "epoch": 1.8974763406940063, "grad_norm": 0.2853679656982422, "learning_rate": 3.580316559891054e-06, "loss": 0.3086, "step": 13233 }, { "epoch": 1.8976197304273015, "grad_norm": 0.2746753990650177, "learning_rate": 3.5795166714766206e-06, "loss": 0.3203, "step": 13234 }, { "epoch": 1.8977631201605965, "grad_norm": 0.2849884629249573, "learning_rate": 3.578716822606256e-06, "loss": 0.2902, "step": 13235 }, { "epoch": 1.8979065098938916, "grad_norm": 0.306951642036438, "learning_rate": 3.577917013302229e-06, "loss": 0.3169, "step": 13236 }, { "epoch": 1.8980498996271868, "grad_norm": 0.26557135581970215, "learning_rate": 3.577117243586805e-06, "loss": 0.2976, "step": 13237 }, { "epoch": 1.8981932893604818, "grad_norm": 0.27775925397872925, "learning_rate": 3.576317513482248e-06, "loss": 0.3233, "step": 13238 }, { "epoch": 1.8983366790937768, "grad_norm": 0.2783222496509552, "learning_rate": 3.5755178230108232e-06, "loss": 0.2958, "step": 13239 }, { "epoch": 1.898480068827072, "grad_norm": 0.30716368556022644, "learning_rate": 3.5747181721947893e-06, "loss": 0.3029, "step": 13240 }, { "epoch": 1.8986234585603672, "grad_norm": 0.285942405462265, "learning_rate": 3.5739185610564087e-06, "loss": 0.3166, "step": 13241 }, { "epoch": 1.8987668482936622, "grad_norm": 0.2685311436653137, "learning_rate": 3.5731189896179418e-06, "loss": 0.2785, "step": 13242 }, { "epoch": 1.8989102380269571, "grad_norm": 0.2655504047870636, "learning_rate": 3.5723194579016473e-06, "loss": 0.2928, "step": 13243 }, { "epoch": 1.8990536277602523, "grad_norm": 0.2827690839767456, "learning_rate": 3.5715199659297816e-06, "loss": 0.2958, "step": 13244 }, { "epoch": 1.8991970174935475, "grad_norm": 0.2754054069519043, "learning_rate": 3.570720513724606e-06, "loss": 0.2989, "step": 13245 }, { "epoch": 1.8993404072268425, "grad_norm": 0.27638864517211914, "learning_rate": 3.5699211013083705e-06, "loss": 0.2804, "step": 13246 }, { "epoch": 1.8994837969601377, "grad_norm": 0.2918543815612793, "learning_rate": 3.569121728703332e-06, "loss": 0.3037, "step": 13247 }, { "epoch": 1.899627186693433, "grad_norm": 0.2717483639717102, "learning_rate": 3.5683223959317424e-06, "loss": 0.3192, "step": 13248 }, { "epoch": 1.8997705764267279, "grad_norm": 0.28649377822875977, "learning_rate": 3.5675231030158562e-06, "loss": 0.3004, "step": 13249 }, { "epoch": 1.8999139661600228, "grad_norm": 0.30965283513069153, "learning_rate": 3.5667238499779238e-06, "loss": 0.3145, "step": 13250 }, { "epoch": 1.900057355893318, "grad_norm": 0.280409574508667, "learning_rate": 3.5659246368401936e-06, "loss": 0.2935, "step": 13251 }, { "epoch": 1.9002007456266132, "grad_norm": 0.28018829226493835, "learning_rate": 3.5651254636249156e-06, "loss": 0.3038, "step": 13252 }, { "epoch": 1.9003441353599082, "grad_norm": 0.26283538341522217, "learning_rate": 3.5643263303543375e-06, "loss": 0.2891, "step": 13253 }, { "epoch": 1.9004875250932032, "grad_norm": 0.2749452292919159, "learning_rate": 3.563527237050706e-06, "loss": 0.3228, "step": 13254 }, { "epoch": 1.9006309148264984, "grad_norm": 0.28562262654304504, "learning_rate": 3.562728183736268e-06, "loss": 0.2971, "step": 13255 }, { "epoch": 1.9007743045597936, "grad_norm": 0.2742237150669098, "learning_rate": 3.5619291704332644e-06, "loss": 0.3023, "step": 13256 }, { "epoch": 1.9009176942930885, "grad_norm": 0.26604676246643066, "learning_rate": 3.561130197163941e-06, "loss": 0.2985, "step": 13257 }, { "epoch": 1.9010610840263837, "grad_norm": 0.2964881360530853, "learning_rate": 3.5603312639505396e-06, "loss": 0.3009, "step": 13258 }, { "epoch": 1.901204473759679, "grad_norm": 0.2948698401451111, "learning_rate": 3.559532370815301e-06, "loss": 0.3193, "step": 13259 }, { "epoch": 1.901347863492974, "grad_norm": 0.24652892351150513, "learning_rate": 3.5587335177804645e-06, "loss": 0.2867, "step": 13260 }, { "epoch": 1.9014912532262689, "grad_norm": 0.2879137098789215, "learning_rate": 3.557934704868272e-06, "loss": 0.3047, "step": 13261 }, { "epoch": 1.901634642959564, "grad_norm": 0.2718456983566284, "learning_rate": 3.5571359321009567e-06, "loss": 0.2768, "step": 13262 }, { "epoch": 1.9017780326928593, "grad_norm": 0.2572096586227417, "learning_rate": 3.5563371995007588e-06, "loss": 0.283, "step": 13263 }, { "epoch": 1.9019214224261543, "grad_norm": 0.26786932349205017, "learning_rate": 3.555538507089912e-06, "loss": 0.3003, "step": 13264 }, { "epoch": 1.9020648121594492, "grad_norm": 0.2594016194343567, "learning_rate": 3.554739854890651e-06, "loss": 0.3058, "step": 13265 }, { "epoch": 1.9022082018927446, "grad_norm": 0.28222206234931946, "learning_rate": 3.553941242925212e-06, "loss": 0.2983, "step": 13266 }, { "epoch": 1.9023515916260396, "grad_norm": 0.27579018473625183, "learning_rate": 3.5531426712158213e-06, "loss": 0.2905, "step": 13267 }, { "epoch": 1.9024949813593346, "grad_norm": 0.28200942277908325, "learning_rate": 3.5523441397847138e-06, "loss": 0.2954, "step": 13268 }, { "epoch": 1.9026383710926298, "grad_norm": 0.2707948684692383, "learning_rate": 3.5515456486541176e-06, "loss": 0.3085, "step": 13269 }, { "epoch": 1.902781760825925, "grad_norm": 0.2605675756931305, "learning_rate": 3.5507471978462627e-06, "loss": 0.3093, "step": 13270 }, { "epoch": 1.90292515055922, "grad_norm": 0.3117024600505829, "learning_rate": 3.549948787383377e-06, "loss": 0.3023, "step": 13271 }, { "epoch": 1.903068540292515, "grad_norm": 0.2545416057109833, "learning_rate": 3.5491504172876866e-06, "loss": 0.306, "step": 13272 }, { "epoch": 1.9032119300258101, "grad_norm": 0.2846684455871582, "learning_rate": 3.548352087581417e-06, "loss": 0.279, "step": 13273 }, { "epoch": 1.9033553197591053, "grad_norm": 0.28274813294410706, "learning_rate": 3.5475537982867914e-06, "loss": 0.3005, "step": 13274 }, { "epoch": 1.9034987094924003, "grad_norm": 0.2719014286994934, "learning_rate": 3.5467555494260335e-06, "loss": 0.2949, "step": 13275 }, { "epoch": 1.9036420992256955, "grad_norm": 0.29389095306396484, "learning_rate": 3.545957341021366e-06, "loss": 0.3118, "step": 13276 }, { "epoch": 1.9037854889589907, "grad_norm": 0.24610427021980286, "learning_rate": 3.545159173095011e-06, "loss": 0.3268, "step": 13277 }, { "epoch": 1.9039288786922857, "grad_norm": 0.2826260030269623, "learning_rate": 3.5443610456691848e-06, "loss": 0.3073, "step": 13278 }, { "epoch": 1.9040722684255806, "grad_norm": 0.2803647220134735, "learning_rate": 3.543562958766108e-06, "loss": 0.3006, "step": 13279 }, { "epoch": 1.9042156581588758, "grad_norm": 0.2719300091266632, "learning_rate": 3.5427649124079975e-06, "loss": 0.2869, "step": 13280 }, { "epoch": 1.904359047892171, "grad_norm": 0.2936299741268158, "learning_rate": 3.5419669066170713e-06, "loss": 0.2974, "step": 13281 }, { "epoch": 1.904502437625466, "grad_norm": 0.26152631640434265, "learning_rate": 3.5411689414155446e-06, "loss": 0.2941, "step": 13282 }, { "epoch": 1.904645827358761, "grad_norm": 0.27395015954971313, "learning_rate": 3.5403710168256296e-06, "loss": 0.3146, "step": 13283 }, { "epoch": 1.9047892170920562, "grad_norm": 0.29839667677879333, "learning_rate": 3.5395731328695394e-06, "loss": 0.3016, "step": 13284 }, { "epoch": 1.9049326068253514, "grad_norm": 0.2773171663284302, "learning_rate": 3.5387752895694864e-06, "loss": 0.2995, "step": 13285 }, { "epoch": 1.9050759965586463, "grad_norm": 0.30125942826271057, "learning_rate": 3.5379774869476826e-06, "loss": 0.2972, "step": 13286 }, { "epoch": 1.9052193862919415, "grad_norm": 0.2643798887729645, "learning_rate": 3.537179725026335e-06, "loss": 0.2849, "step": 13287 }, { "epoch": 1.9053627760252367, "grad_norm": 0.290204793214798, "learning_rate": 3.536382003827656e-06, "loss": 0.2872, "step": 13288 }, { "epoch": 1.9055061657585317, "grad_norm": 0.2823438048362732, "learning_rate": 3.5355843233738486e-06, "loss": 0.2892, "step": 13289 }, { "epoch": 1.9056495554918267, "grad_norm": 0.26244544982910156, "learning_rate": 3.5347866836871213e-06, "loss": 0.303, "step": 13290 }, { "epoch": 1.9057929452251219, "grad_norm": 0.28405046463012695, "learning_rate": 3.5339890847896792e-06, "loss": 0.3025, "step": 13291 }, { "epoch": 1.905936334958417, "grad_norm": 0.2938348054885864, "learning_rate": 3.5331915267037254e-06, "loss": 0.3102, "step": 13292 }, { "epoch": 1.906079724691712, "grad_norm": 0.2788131535053253, "learning_rate": 3.532394009451465e-06, "loss": 0.3123, "step": 13293 }, { "epoch": 1.906223114425007, "grad_norm": 0.2674371600151062, "learning_rate": 3.531596533055096e-06, "loss": 0.2715, "step": 13294 }, { "epoch": 1.9063665041583022, "grad_norm": 0.2698511779308319, "learning_rate": 3.5307990975368204e-06, "loss": 0.3073, "step": 13295 }, { "epoch": 1.9065098938915974, "grad_norm": 0.2749478816986084, "learning_rate": 3.530001702918838e-06, "loss": 0.3024, "step": 13296 }, { "epoch": 1.9066532836248924, "grad_norm": 0.2567559480667114, "learning_rate": 3.5292043492233474e-06, "loss": 0.3095, "step": 13297 }, { "epoch": 1.9067966733581876, "grad_norm": 0.2660074234008789, "learning_rate": 3.528407036472545e-06, "loss": 0.3175, "step": 13298 }, { "epoch": 1.9069400630914828, "grad_norm": 0.2826412618160248, "learning_rate": 3.527609764688628e-06, "loss": 0.3028, "step": 13299 }, { "epoch": 1.9070834528247778, "grad_norm": 0.2826445698738098, "learning_rate": 3.5268125338937886e-06, "loss": 0.3042, "step": 13300 }, { "epoch": 1.9072268425580727, "grad_norm": 0.2850267291069031, "learning_rate": 3.526015344110223e-06, "loss": 0.3125, "step": 13301 }, { "epoch": 1.907370232291368, "grad_norm": 0.26302456855773926, "learning_rate": 3.5252181953601217e-06, "loss": 0.3156, "step": 13302 }, { "epoch": 1.9075136220246631, "grad_norm": 0.2732461094856262, "learning_rate": 3.524421087665678e-06, "loss": 0.3067, "step": 13303 }, { "epoch": 1.907657011757958, "grad_norm": 0.2680846154689789, "learning_rate": 3.5236240210490837e-06, "loss": 0.2897, "step": 13304 }, { "epoch": 1.907800401491253, "grad_norm": 0.27644839882850647, "learning_rate": 3.5228269955325213e-06, "loss": 0.3043, "step": 13305 }, { "epoch": 1.9079437912245483, "grad_norm": 0.284822940826416, "learning_rate": 3.5220300111381846e-06, "loss": 0.2918, "step": 13306 }, { "epoch": 1.9080871809578435, "grad_norm": 0.2507565915584564, "learning_rate": 3.521233067888259e-06, "loss": 0.305, "step": 13307 }, { "epoch": 1.9082305706911384, "grad_norm": 0.2811923325061798, "learning_rate": 3.5204361658049303e-06, "loss": 0.3292, "step": 13308 }, { "epoch": 1.9083739604244336, "grad_norm": 0.2776457667350769, "learning_rate": 3.5196393049103842e-06, "loss": 0.2934, "step": 13309 }, { "epoch": 1.9085173501577288, "grad_norm": 0.2590983211994171, "learning_rate": 3.518842485226801e-06, "loss": 0.3023, "step": 13310 }, { "epoch": 1.9086607398910238, "grad_norm": 0.26098889112472534, "learning_rate": 3.518045706776364e-06, "loss": 0.2914, "step": 13311 }, { "epoch": 1.9088041296243188, "grad_norm": 0.2681879699230194, "learning_rate": 3.5172489695812557e-06, "loss": 0.2993, "step": 13312 }, { "epoch": 1.908947519357614, "grad_norm": 0.25961288809776306, "learning_rate": 3.5164522736636547e-06, "loss": 0.2854, "step": 13313 }, { "epoch": 1.9090909090909092, "grad_norm": 0.2855920195579529, "learning_rate": 3.5156556190457403e-06, "loss": 0.2936, "step": 13314 }, { "epoch": 1.9092342988242041, "grad_norm": 0.28800535202026367, "learning_rate": 3.5148590057496913e-06, "loss": 0.313, "step": 13315 }, { "epoch": 1.9093776885574991, "grad_norm": 0.27112269401550293, "learning_rate": 3.5140624337976818e-06, "loss": 0.3031, "step": 13316 }, { "epoch": 1.9095210782907945, "grad_norm": 0.2699843645095825, "learning_rate": 3.5132659032118887e-06, "loss": 0.3028, "step": 13317 }, { "epoch": 1.9096644680240895, "grad_norm": 0.29355543851852417, "learning_rate": 3.5124694140144856e-06, "loss": 0.2922, "step": 13318 }, { "epoch": 1.9098078577573845, "grad_norm": 0.27277225255966187, "learning_rate": 3.511672966227645e-06, "loss": 0.3069, "step": 13319 }, { "epoch": 1.9099512474906797, "grad_norm": 0.248001828789711, "learning_rate": 3.510876559873543e-06, "loss": 0.3044, "step": 13320 }, { "epoch": 1.9100946372239749, "grad_norm": 0.27135539054870605, "learning_rate": 3.5100801949743433e-06, "loss": 0.3119, "step": 13321 }, { "epoch": 1.9102380269572699, "grad_norm": 0.2747238278388977, "learning_rate": 3.5092838715522202e-06, "loss": 0.3033, "step": 13322 }, { "epoch": 1.9103814166905648, "grad_norm": 0.2670648694038391, "learning_rate": 3.508487589629341e-06, "loss": 0.3021, "step": 13323 }, { "epoch": 1.91052480642386, "grad_norm": 0.2658010721206665, "learning_rate": 3.5076913492278724e-06, "loss": 0.2987, "step": 13324 }, { "epoch": 1.9106681961571552, "grad_norm": 0.27205970883369446, "learning_rate": 3.5068951503699813e-06, "loss": 0.2964, "step": 13325 }, { "epoch": 1.9108115858904502, "grad_norm": 0.26641297340393066, "learning_rate": 3.5060989930778333e-06, "loss": 0.3017, "step": 13326 }, { "epoch": 1.9109549756237454, "grad_norm": 0.2809292674064636, "learning_rate": 3.5053028773735897e-06, "loss": 0.3024, "step": 13327 }, { "epoch": 1.9110983653570406, "grad_norm": 0.27025845646858215, "learning_rate": 3.504506803279416e-06, "loss": 0.3095, "step": 13328 }, { "epoch": 1.9112417550903356, "grad_norm": 0.2975200116634369, "learning_rate": 3.503710770817471e-06, "loss": 0.322, "step": 13329 }, { "epoch": 1.9113851448236305, "grad_norm": 0.2737032473087311, "learning_rate": 3.5029147800099173e-06, "loss": 0.3336, "step": 13330 }, { "epoch": 1.9115285345569257, "grad_norm": 0.2646119296550751, "learning_rate": 3.5021188308789155e-06, "loss": 0.2986, "step": 13331 }, { "epoch": 1.911671924290221, "grad_norm": 0.2638711929321289, "learning_rate": 3.5013229234466185e-06, "loss": 0.2754, "step": 13332 }, { "epoch": 1.911815314023516, "grad_norm": 0.2850515842437744, "learning_rate": 3.500527057735185e-06, "loss": 0.3039, "step": 13333 }, { "epoch": 1.9119587037568109, "grad_norm": 0.30659225583076477, "learning_rate": 3.4997312337667726e-06, "loss": 0.2763, "step": 13334 }, { "epoch": 1.912102093490106, "grad_norm": 0.28201526403427124, "learning_rate": 3.4989354515635353e-06, "loss": 0.298, "step": 13335 }, { "epoch": 1.9122454832234013, "grad_norm": 0.2656297981739044, "learning_rate": 3.498139711147628e-06, "loss": 0.3023, "step": 13336 }, { "epoch": 1.9123888729566962, "grad_norm": 0.284009724855423, "learning_rate": 3.4973440125411985e-06, "loss": 0.3069, "step": 13337 }, { "epoch": 1.9125322626899914, "grad_norm": 0.277067095041275, "learning_rate": 3.4965483557663997e-06, "loss": 0.2903, "step": 13338 }, { "epoch": 1.9126756524232866, "grad_norm": 0.27515819668769836, "learning_rate": 3.4957527408453822e-06, "loss": 0.2907, "step": 13339 }, { "epoch": 1.9128190421565816, "grad_norm": 0.24620066583156586, "learning_rate": 3.4949571678002947e-06, "loss": 0.2884, "step": 13340 }, { "epoch": 1.9129624318898766, "grad_norm": 0.2719591557979584, "learning_rate": 3.4941616366532844e-06, "loss": 0.3089, "step": 13341 }, { "epoch": 1.9131058216231718, "grad_norm": 0.2898402512073517, "learning_rate": 3.493366147426499e-06, "loss": 0.3235, "step": 13342 }, { "epoch": 1.913249211356467, "grad_norm": 0.2745426595211029, "learning_rate": 3.4925707001420807e-06, "loss": 0.3143, "step": 13343 }, { "epoch": 1.913392601089762, "grad_norm": 0.27449285984039307, "learning_rate": 3.491775294822176e-06, "loss": 0.28, "step": 13344 }, { "epoch": 1.913535990823057, "grad_norm": 0.26151180267333984, "learning_rate": 3.4909799314889263e-06, "loss": 0.3014, "step": 13345 }, { "epoch": 1.9136793805563521, "grad_norm": 0.30641499161720276, "learning_rate": 3.4901846101644742e-06, "loss": 0.3125, "step": 13346 }, { "epoch": 1.9138227702896473, "grad_norm": 0.2882342040538788, "learning_rate": 3.4893893308709625e-06, "loss": 0.2752, "step": 13347 }, { "epoch": 1.9139661600229423, "grad_norm": 0.27667832374572754, "learning_rate": 3.488594093630526e-06, "loss": 0.3031, "step": 13348 }, { "epoch": 1.9141095497562375, "grad_norm": 0.29062339663505554, "learning_rate": 3.4877988984653043e-06, "loss": 0.3059, "step": 13349 }, { "epoch": 1.9142529394895327, "grad_norm": 0.26793715357780457, "learning_rate": 3.487003745397436e-06, "loss": 0.316, "step": 13350 }, { "epoch": 1.9143963292228277, "grad_norm": 0.2785639762878418, "learning_rate": 3.4862086344490553e-06, "loss": 0.3184, "step": 13351 }, { "epoch": 1.9145397189561226, "grad_norm": 0.25967341661453247, "learning_rate": 3.485413565642297e-06, "loss": 0.2907, "step": 13352 }, { "epoch": 1.9146831086894178, "grad_norm": 0.26704469323158264, "learning_rate": 3.4846185389992975e-06, "loss": 0.2873, "step": 13353 }, { "epoch": 1.914826498422713, "grad_norm": 0.26003095507621765, "learning_rate": 3.483823554542185e-06, "loss": 0.3013, "step": 13354 }, { "epoch": 1.914969888156008, "grad_norm": 0.287658154964447, "learning_rate": 3.483028612293093e-06, "loss": 0.3033, "step": 13355 }, { "epoch": 1.915113277889303, "grad_norm": 0.2974465787410736, "learning_rate": 3.48223371227415e-06, "loss": 0.2927, "step": 13356 }, { "epoch": 1.9152566676225984, "grad_norm": 0.2825602889060974, "learning_rate": 3.481438854507486e-06, "loss": 0.3166, "step": 13357 }, { "epoch": 1.9154000573558934, "grad_norm": 0.28439754247665405, "learning_rate": 3.480644039015231e-06, "loss": 0.2953, "step": 13358 }, { "epoch": 1.9155434470891883, "grad_norm": 0.26130419969558716, "learning_rate": 3.479849265819506e-06, "loss": 0.3196, "step": 13359 }, { "epoch": 1.9156868368224835, "grad_norm": 0.28193849325180054, "learning_rate": 3.4790545349424397e-06, "loss": 0.3192, "step": 13360 }, { "epoch": 1.9158302265557787, "grad_norm": 0.29244866967201233, "learning_rate": 3.4782598464061536e-06, "loss": 0.3084, "step": 13361 }, { "epoch": 1.9159736162890737, "grad_norm": 0.2556775212287903, "learning_rate": 3.477465200232774e-06, "loss": 0.3109, "step": 13362 }, { "epoch": 1.9161170060223687, "grad_norm": 0.2796725332736969, "learning_rate": 3.4766705964444235e-06, "loss": 0.2905, "step": 13363 }, { "epoch": 1.9162603957556639, "grad_norm": 0.29784825444221497, "learning_rate": 3.4758760350632175e-06, "loss": 0.3131, "step": 13364 }, { "epoch": 1.916403785488959, "grad_norm": 0.2764636278152466, "learning_rate": 3.4750815161112784e-06, "loss": 0.3008, "step": 13365 }, { "epoch": 1.916547175222254, "grad_norm": 0.2617996037006378, "learning_rate": 3.4742870396107243e-06, "loss": 0.297, "step": 13366 }, { "epoch": 1.9166905649555492, "grad_norm": 0.26944500207901, "learning_rate": 3.4734926055836722e-06, "loss": 0.3208, "step": 13367 }, { "epoch": 1.9168339546888444, "grad_norm": 0.2805398106575012, "learning_rate": 3.4726982140522377e-06, "loss": 0.3013, "step": 13368 }, { "epoch": 1.9169773444221394, "grad_norm": 0.26621463894844055, "learning_rate": 3.4719038650385363e-06, "loss": 0.2935, "step": 13369 }, { "epoch": 1.9171207341554344, "grad_norm": 0.27089470624923706, "learning_rate": 3.4711095585646794e-06, "loss": 0.3001, "step": 13370 }, { "epoch": 1.9172641238887296, "grad_norm": 0.27242138981819153, "learning_rate": 3.4703152946527806e-06, "loss": 0.3253, "step": 13371 }, { "epoch": 1.9174075136220248, "grad_norm": 0.2805926203727722, "learning_rate": 3.469521073324951e-06, "loss": 0.3097, "step": 13372 }, { "epoch": 1.9175509033553197, "grad_norm": 0.2927752137184143, "learning_rate": 3.4687268946033003e-06, "loss": 0.2918, "step": 13373 }, { "epoch": 1.9176942930886147, "grad_norm": 0.2775646448135376, "learning_rate": 3.46793275850994e-06, "loss": 0.2836, "step": 13374 }, { "epoch": 1.91783768282191, "grad_norm": 0.28330111503601074, "learning_rate": 3.4671386650669725e-06, "loss": 0.286, "step": 13375 }, { "epoch": 1.9179810725552051, "grad_norm": 0.2569740414619446, "learning_rate": 3.4663446142965064e-06, "loss": 0.2938, "step": 13376 }, { "epoch": 1.9181244622885, "grad_norm": 0.25014060735702515, "learning_rate": 3.4655506062206467e-06, "loss": 0.2939, "step": 13377 }, { "epoch": 1.9182678520217953, "grad_norm": 0.26217421889305115, "learning_rate": 3.4647566408614987e-06, "loss": 0.2892, "step": 13378 }, { "epoch": 1.9184112417550905, "grad_norm": 0.27822285890579224, "learning_rate": 3.4639627182411633e-06, "loss": 0.3055, "step": 13379 }, { "epoch": 1.9185546314883855, "grad_norm": 0.2705201506614685, "learning_rate": 3.463168838381744e-06, "loss": 0.3225, "step": 13380 }, { "epoch": 1.9186980212216804, "grad_norm": 0.2918865382671356, "learning_rate": 3.4623750013053396e-06, "loss": 0.2831, "step": 13381 }, { "epoch": 1.9188414109549756, "grad_norm": 0.2831902801990509, "learning_rate": 3.46158120703405e-06, "loss": 0.3167, "step": 13382 }, { "epoch": 1.9189848006882708, "grad_norm": 0.3024769127368927, "learning_rate": 3.4607874555899723e-06, "loss": 0.3307, "step": 13383 }, { "epoch": 1.9191281904215658, "grad_norm": 0.27347683906555176, "learning_rate": 3.459993746995205e-06, "loss": 0.3091, "step": 13384 }, { "epoch": 1.9192715801548608, "grad_norm": 0.2563050091266632, "learning_rate": 3.459200081271844e-06, "loss": 0.3009, "step": 13385 }, { "epoch": 1.919414969888156, "grad_norm": 0.277224600315094, "learning_rate": 3.4584064584419807e-06, "loss": 0.2902, "step": 13386 }, { "epoch": 1.9195583596214512, "grad_norm": 0.27025821805000305, "learning_rate": 3.457612878527711e-06, "loss": 0.2952, "step": 13387 }, { "epoch": 1.9197017493547461, "grad_norm": 0.26577845215797424, "learning_rate": 3.4568193415511252e-06, "loss": 0.3203, "step": 13388 }, { "epoch": 1.9198451390880413, "grad_norm": 0.26221024990081787, "learning_rate": 3.4560258475343155e-06, "loss": 0.3029, "step": 13389 }, { "epoch": 1.9199885288213365, "grad_norm": 0.27138757705688477, "learning_rate": 3.455232396499372e-06, "loss": 0.3072, "step": 13390 }, { "epoch": 1.9201319185546315, "grad_norm": 0.28010597825050354, "learning_rate": 3.454438988468381e-06, "loss": 0.3084, "step": 13391 }, { "epoch": 1.9202753082879265, "grad_norm": 0.27866458892822266, "learning_rate": 3.4536456234634315e-06, "loss": 0.298, "step": 13392 }, { "epoch": 1.9204186980212217, "grad_norm": 0.2680933475494385, "learning_rate": 3.4528523015066084e-06, "loss": 0.2826, "step": 13393 }, { "epoch": 1.9205620877545169, "grad_norm": 0.2907683849334717, "learning_rate": 3.452059022619998e-06, "loss": 0.327, "step": 13394 }, { "epoch": 1.9207054774878118, "grad_norm": 0.26793572306632996, "learning_rate": 3.4512657868256827e-06, "loss": 0.3059, "step": 13395 }, { "epoch": 1.9208488672211068, "grad_norm": 0.2552069425582886, "learning_rate": 3.4504725941457474e-06, "loss": 0.2852, "step": 13396 }, { "epoch": 1.920992256954402, "grad_norm": 0.2560949921607971, "learning_rate": 3.44967944460227e-06, "loss": 0.3037, "step": 13397 }, { "epoch": 1.9211356466876972, "grad_norm": 0.2718600332736969, "learning_rate": 3.4488863382173316e-06, "loss": 0.3126, "step": 13398 }, { "epoch": 1.9212790364209922, "grad_norm": 0.28744715452194214, "learning_rate": 3.448093275013011e-06, "loss": 0.3021, "step": 13399 }, { "epoch": 1.9214224261542874, "grad_norm": 0.27004754543304443, "learning_rate": 3.4473002550113877e-06, "loss": 0.284, "step": 13400 }, { "epoch": 1.9215658158875826, "grad_norm": 0.2751327157020569, "learning_rate": 3.4465072782345386e-06, "loss": 0.3015, "step": 13401 }, { "epoch": 1.9217092056208775, "grad_norm": 0.26623082160949707, "learning_rate": 3.4457143447045347e-06, "loss": 0.2799, "step": 13402 }, { "epoch": 1.9218525953541725, "grad_norm": 0.2890641391277313, "learning_rate": 3.444921454443452e-06, "loss": 0.3093, "step": 13403 }, { "epoch": 1.9219959850874677, "grad_norm": 0.2983357012271881, "learning_rate": 3.4441286074733647e-06, "loss": 0.312, "step": 13404 }, { "epoch": 1.922139374820763, "grad_norm": 0.2764778733253479, "learning_rate": 3.4433358038163433e-06, "loss": 0.2921, "step": 13405 }, { "epoch": 1.9222827645540579, "grad_norm": 0.26786550879478455, "learning_rate": 3.4425430434944585e-06, "loss": 0.2863, "step": 13406 }, { "epoch": 1.9224261542873529, "grad_norm": 0.25791582465171814, "learning_rate": 3.441750326529781e-06, "loss": 0.2945, "step": 13407 }, { "epoch": 1.9225695440206483, "grad_norm": 0.27089837193489075, "learning_rate": 3.4409576529443755e-06, "loss": 0.3138, "step": 13408 }, { "epoch": 1.9227129337539433, "grad_norm": 0.28009694814682007, "learning_rate": 3.4401650227603112e-06, "loss": 0.3015, "step": 13409 }, { "epoch": 1.9228563234872382, "grad_norm": 0.28747501969337463, "learning_rate": 3.439372435999653e-06, "loss": 0.2896, "step": 13410 }, { "epoch": 1.9229997132205334, "grad_norm": 0.2755560874938965, "learning_rate": 3.438579892684466e-06, "loss": 0.2991, "step": 13411 }, { "epoch": 1.9231431029538286, "grad_norm": 0.2770431637763977, "learning_rate": 3.4377873928368143e-06, "loss": 0.3057, "step": 13412 }, { "epoch": 1.9232864926871236, "grad_norm": 0.2742195129394531, "learning_rate": 3.436994936478757e-06, "loss": 0.305, "step": 13413 }, { "epoch": 1.9234298824204186, "grad_norm": 0.26630985736846924, "learning_rate": 3.436202523632356e-06, "loss": 0.2975, "step": 13414 }, { "epoch": 1.9235732721537138, "grad_norm": 0.2565319240093231, "learning_rate": 3.4354101543196706e-06, "loss": 0.2871, "step": 13415 }, { "epoch": 1.923716661887009, "grad_norm": 0.2757585942745209, "learning_rate": 3.4346178285627606e-06, "loss": 0.3027, "step": 13416 }, { "epoch": 1.923860051620304, "grad_norm": 0.28523823618888855, "learning_rate": 3.433825546383683e-06, "loss": 0.2906, "step": 13417 }, { "epoch": 1.9240034413535991, "grad_norm": 0.2680416405200958, "learning_rate": 3.4330333078044918e-06, "loss": 0.3176, "step": 13418 }, { "epoch": 1.9241468310868943, "grad_norm": 0.2767844796180725, "learning_rate": 3.4322411128472426e-06, "loss": 0.3019, "step": 13419 }, { "epoch": 1.9242902208201893, "grad_norm": 0.26930227875709534, "learning_rate": 3.43144896153399e-06, "loss": 0.2953, "step": 13420 }, { "epoch": 1.9244336105534843, "grad_norm": 0.24513021111488342, "learning_rate": 3.4306568538867847e-06, "loss": 0.3007, "step": 13421 }, { "epoch": 1.9245770002867795, "grad_norm": 0.24884022772312164, "learning_rate": 3.429864789927678e-06, "loss": 0.2801, "step": 13422 }, { "epoch": 1.9247203900200747, "grad_norm": 0.26821500062942505, "learning_rate": 3.4290727696787225e-06, "loss": 0.296, "step": 13423 }, { "epoch": 1.9248637797533696, "grad_norm": 0.2735070586204529, "learning_rate": 3.428280793161963e-06, "loss": 0.2936, "step": 13424 }, { "epoch": 1.9250071694866646, "grad_norm": 0.28652167320251465, "learning_rate": 3.427488860399448e-06, "loss": 0.2817, "step": 13425 }, { "epoch": 1.9251505592199598, "grad_norm": 0.26556089520454407, "learning_rate": 3.426696971413225e-06, "loss": 0.2951, "step": 13426 }, { "epoch": 1.925293948953255, "grad_norm": 0.26700329780578613, "learning_rate": 3.4259051262253373e-06, "loss": 0.2969, "step": 13427 }, { "epoch": 1.92543733868655, "grad_norm": 0.24269486963748932, "learning_rate": 3.425113324857832e-06, "loss": 0.2903, "step": 13428 }, { "epoch": 1.9255807284198452, "grad_norm": 0.2822205126285553, "learning_rate": 3.4243215673327467e-06, "loss": 0.296, "step": 13429 }, { "epoch": 1.9257241181531404, "grad_norm": 0.2983405292034149, "learning_rate": 3.423529853672126e-06, "loss": 0.2858, "step": 13430 }, { "epoch": 1.9258675078864353, "grad_norm": 0.27065905928611755, "learning_rate": 3.422738183898008e-06, "loss": 0.2915, "step": 13431 }, { "epoch": 1.9260108976197303, "grad_norm": 0.26509618759155273, "learning_rate": 3.4219465580324337e-06, "loss": 0.2944, "step": 13432 }, { "epoch": 1.9261542873530255, "grad_norm": 0.27397528290748596, "learning_rate": 3.421154976097439e-06, "loss": 0.2844, "step": 13433 }, { "epoch": 1.9262976770863207, "grad_norm": 0.27193892002105713, "learning_rate": 3.420363438115062e-06, "loss": 0.2961, "step": 13434 }, { "epoch": 1.9264410668196157, "grad_norm": 0.28654882311820984, "learning_rate": 3.419571944107336e-06, "loss": 0.3089, "step": 13435 }, { "epoch": 1.9265844565529107, "grad_norm": 0.2870037257671356, "learning_rate": 3.4187804940962966e-06, "loss": 0.3073, "step": 13436 }, { "epoch": 1.9267278462862059, "grad_norm": 0.27780336141586304, "learning_rate": 3.417989088103975e-06, "loss": 0.299, "step": 13437 }, { "epoch": 1.926871236019501, "grad_norm": 0.25970181822776794, "learning_rate": 3.417197726152404e-06, "loss": 0.2782, "step": 13438 }, { "epoch": 1.927014625752796, "grad_norm": 0.2952115535736084, "learning_rate": 3.4164064082636157e-06, "loss": 0.3115, "step": 13439 }, { "epoch": 1.9271580154860912, "grad_norm": 0.272970587015152, "learning_rate": 3.4156151344596344e-06, "loss": 0.3215, "step": 13440 }, { "epoch": 1.9273014052193864, "grad_norm": 0.2645166516304016, "learning_rate": 3.414823904762491e-06, "loss": 0.2833, "step": 13441 }, { "epoch": 1.9274447949526814, "grad_norm": 0.29429659247398376, "learning_rate": 3.414032719194211e-06, "loss": 0.3041, "step": 13442 }, { "epoch": 1.9275881846859764, "grad_norm": 0.2681286036968231, "learning_rate": 3.4132415777768213e-06, "loss": 0.2912, "step": 13443 }, { "epoch": 1.9277315744192716, "grad_norm": 0.2907939553260803, "learning_rate": 3.412450480532346e-06, "loss": 0.3117, "step": 13444 }, { "epoch": 1.9278749641525668, "grad_norm": 0.2915160357952118, "learning_rate": 3.4116594274828064e-06, "loss": 0.3038, "step": 13445 }, { "epoch": 1.9280183538858617, "grad_norm": 0.28596070408821106, "learning_rate": 3.4108684186502242e-06, "loss": 0.2824, "step": 13446 }, { "epoch": 1.9281617436191567, "grad_norm": 0.2801794111728668, "learning_rate": 3.410077454056621e-06, "loss": 0.3059, "step": 13447 }, { "epoch": 1.9283051333524521, "grad_norm": 0.26805952191352844, "learning_rate": 3.409286533724016e-06, "loss": 0.3037, "step": 13448 }, { "epoch": 1.928448523085747, "grad_norm": 0.30241090059280396, "learning_rate": 3.4084956576744266e-06, "loss": 0.3085, "step": 13449 }, { "epoch": 1.928591912819042, "grad_norm": 0.2873515784740448, "learning_rate": 3.4077048259298715e-06, "loss": 0.2772, "step": 13450 }, { "epoch": 1.9287353025523373, "grad_norm": 0.2997976541519165, "learning_rate": 3.4069140385123633e-06, "loss": 0.2978, "step": 13451 }, { "epoch": 1.9288786922856325, "grad_norm": 0.27184194326400757, "learning_rate": 3.406123295443917e-06, "loss": 0.3046, "step": 13452 }, { "epoch": 1.9290220820189274, "grad_norm": 0.2603234648704529, "learning_rate": 3.405332596746547e-06, "loss": 0.3076, "step": 13453 }, { "epoch": 1.9291654717522224, "grad_norm": 0.2724153399467468, "learning_rate": 3.4045419424422644e-06, "loss": 0.3013, "step": 13454 }, { "epoch": 1.9293088614855176, "grad_norm": 0.27766719460487366, "learning_rate": 3.403751332553082e-06, "loss": 0.3024, "step": 13455 }, { "epoch": 1.9294522512188128, "grad_norm": 0.2806171774864197, "learning_rate": 3.4029607671010044e-06, "loss": 0.3062, "step": 13456 }, { "epoch": 1.9295956409521078, "grad_norm": 0.28692111372947693, "learning_rate": 3.402170246108043e-06, "loss": 0.2989, "step": 13457 }, { "epoch": 1.929739030685403, "grad_norm": 0.2537735104560852, "learning_rate": 3.4013797695962036e-06, "loss": 0.2946, "step": 13458 }, { "epoch": 1.9298824204186982, "grad_norm": 0.2793416976928711, "learning_rate": 3.400589337587492e-06, "loss": 0.3091, "step": 13459 }, { "epoch": 1.9300258101519931, "grad_norm": 0.2703608274459839, "learning_rate": 3.3997989501039134e-06, "loss": 0.31, "step": 13460 }, { "epoch": 1.9301691998852881, "grad_norm": 0.28655949234962463, "learning_rate": 3.3990086071674715e-06, "loss": 0.2954, "step": 13461 }, { "epoch": 1.9303125896185833, "grad_norm": 0.2785983383655548, "learning_rate": 3.3982183088001665e-06, "loss": 0.2902, "step": 13462 }, { "epoch": 1.9304559793518785, "grad_norm": 0.278665691614151, "learning_rate": 3.397428055023999e-06, "loss": 0.3097, "step": 13463 }, { "epoch": 1.9305993690851735, "grad_norm": 0.2783457636833191, "learning_rate": 3.3966378458609694e-06, "loss": 0.2977, "step": 13464 }, { "epoch": 1.9307427588184685, "grad_norm": 0.24898923933506012, "learning_rate": 3.395847681333076e-06, "loss": 0.3001, "step": 13465 }, { "epoch": 1.9308861485517637, "grad_norm": 0.26395532488822937, "learning_rate": 3.395057561462317e-06, "loss": 0.2829, "step": 13466 }, { "epoch": 1.9310295382850589, "grad_norm": 0.29875728487968445, "learning_rate": 3.3942674862706853e-06, "loss": 0.3152, "step": 13467 }, { "epoch": 1.9311729280183538, "grad_norm": 0.2832943797111511, "learning_rate": 3.3934774557801765e-06, "loss": 0.308, "step": 13468 }, { "epoch": 1.931316317751649, "grad_norm": 0.2620420455932617, "learning_rate": 3.3926874700127845e-06, "loss": 0.2938, "step": 13469 }, { "epoch": 1.9314597074849442, "grad_norm": 0.275755375623703, "learning_rate": 3.3918975289905e-06, "loss": 0.3037, "step": 13470 }, { "epoch": 1.9316030972182392, "grad_norm": 0.2858802378177643, "learning_rate": 3.3911076327353154e-06, "loss": 0.3078, "step": 13471 }, { "epoch": 1.9317464869515342, "grad_norm": 0.28946444392204285, "learning_rate": 3.390317781269219e-06, "loss": 0.2968, "step": 13472 }, { "epoch": 1.9318898766848294, "grad_norm": 0.3026631772518158, "learning_rate": 3.3895279746141996e-06, "loss": 0.3046, "step": 13473 }, { "epoch": 1.9320332664181246, "grad_norm": 0.28670814633369446, "learning_rate": 3.388738212792243e-06, "loss": 0.2883, "step": 13474 }, { "epoch": 1.9321766561514195, "grad_norm": 0.2511950731277466, "learning_rate": 3.387948495825337e-06, "loss": 0.3063, "step": 13475 }, { "epoch": 1.9323200458847145, "grad_norm": 0.28322210907936096, "learning_rate": 3.3871588237354645e-06, "loss": 0.2911, "step": 13476 }, { "epoch": 1.9324634356180097, "grad_norm": 0.29408955574035645, "learning_rate": 3.3863691965446116e-06, "loss": 0.3139, "step": 13477 }, { "epoch": 1.932606825351305, "grad_norm": 0.27030307054519653, "learning_rate": 3.385579614274756e-06, "loss": 0.2793, "step": 13478 }, { "epoch": 1.9327502150845999, "grad_norm": 0.30434152483940125, "learning_rate": 3.384790076947879e-06, "loss": 0.2948, "step": 13479 }, { "epoch": 1.932893604817895, "grad_norm": 0.2965722382068634, "learning_rate": 3.3840005845859635e-06, "loss": 0.3306, "step": 13480 }, { "epoch": 1.9330369945511903, "grad_norm": 0.2640879452228546, "learning_rate": 3.3832111372109854e-06, "loss": 0.3061, "step": 13481 }, { "epoch": 1.9331803842844852, "grad_norm": 0.2840067744255066, "learning_rate": 3.382421734844924e-06, "loss": 0.307, "step": 13482 }, { "epoch": 1.9333237740177802, "grad_norm": 0.2651004195213318, "learning_rate": 3.3816323775097516e-06, "loss": 0.2885, "step": 13483 }, { "epoch": 1.9334671637510754, "grad_norm": 0.28013530373573303, "learning_rate": 3.3808430652274437e-06, "loss": 0.331, "step": 13484 }, { "epoch": 1.9336105534843706, "grad_norm": 0.27379605174064636, "learning_rate": 3.3800537980199743e-06, "loss": 0.303, "step": 13485 }, { "epoch": 1.9337539432176656, "grad_norm": 0.26111897826194763, "learning_rate": 3.3792645759093156e-06, "loss": 0.3086, "step": 13486 }, { "epoch": 1.9338973329509606, "grad_norm": 0.27651447057724, "learning_rate": 3.3784753989174377e-06, "loss": 0.3099, "step": 13487 }, { "epoch": 1.9340407226842558, "grad_norm": 0.25732743740081787, "learning_rate": 3.3776862670663114e-06, "loss": 0.2903, "step": 13488 }, { "epoch": 1.934184112417551, "grad_norm": 0.24383744597434998, "learning_rate": 3.3768971803779026e-06, "loss": 0.3013, "step": 13489 }, { "epoch": 1.934327502150846, "grad_norm": 0.26185065507888794, "learning_rate": 3.37610813887418e-06, "loss": 0.2984, "step": 13490 }, { "epoch": 1.9344708918841411, "grad_norm": 0.270277738571167, "learning_rate": 3.375319142577109e-06, "loss": 0.3095, "step": 13491 }, { "epoch": 1.9346142816174363, "grad_norm": 0.27136537432670593, "learning_rate": 3.3745301915086526e-06, "loss": 0.2828, "step": 13492 }, { "epoch": 1.9347576713507313, "grad_norm": 0.2800045907497406, "learning_rate": 3.373741285690778e-06, "loss": 0.2982, "step": 13493 }, { "epoch": 1.9349010610840263, "grad_norm": 0.2701006829738617, "learning_rate": 3.3729524251454426e-06, "loss": 0.3101, "step": 13494 }, { "epoch": 1.9350444508173215, "grad_norm": 0.2946971654891968, "learning_rate": 3.3721636098946088e-06, "loss": 0.3073, "step": 13495 }, { "epoch": 1.9351878405506167, "grad_norm": 0.27378591895103455, "learning_rate": 3.3713748399602354e-06, "loss": 0.3018, "step": 13496 }, { "epoch": 1.9353312302839116, "grad_norm": 0.278929203748703, "learning_rate": 3.3705861153642814e-06, "loss": 0.3271, "step": 13497 }, { "epoch": 1.9354746200172066, "grad_norm": 0.26517045497894287, "learning_rate": 3.369797436128705e-06, "loss": 0.3114, "step": 13498 }, { "epoch": 1.935618009750502, "grad_norm": 0.27904894948005676, "learning_rate": 3.3690088022754587e-06, "loss": 0.3012, "step": 13499 }, { "epoch": 1.935761399483797, "grad_norm": 0.2606715261936188, "learning_rate": 3.3682202138264985e-06, "loss": 0.2994, "step": 13500 }, { "epoch": 1.935904789217092, "grad_norm": 0.26156988739967346, "learning_rate": 3.3674316708037776e-06, "loss": 0.2923, "step": 13501 }, { "epoch": 1.9360481789503872, "grad_norm": 0.2835065722465515, "learning_rate": 3.3666431732292472e-06, "loss": 0.3005, "step": 13502 }, { "epoch": 1.9361915686836824, "grad_norm": 0.27879270911216736, "learning_rate": 3.365854721124859e-06, "loss": 0.3113, "step": 13503 }, { "epoch": 1.9363349584169773, "grad_norm": 0.25955456495285034, "learning_rate": 3.3650663145125627e-06, "loss": 0.3012, "step": 13504 }, { "epoch": 1.9364783481502723, "grad_norm": 0.2697140872478485, "learning_rate": 3.364277953414304e-06, "loss": 0.291, "step": 13505 }, { "epoch": 1.9366217378835675, "grad_norm": 0.252849280834198, "learning_rate": 3.3634896378520297e-06, "loss": 0.318, "step": 13506 }, { "epoch": 1.9367651276168627, "grad_norm": 0.27778953313827515, "learning_rate": 3.362701367847687e-06, "loss": 0.2901, "step": 13507 }, { "epoch": 1.9369085173501577, "grad_norm": 0.26532629132270813, "learning_rate": 3.36191314342322e-06, "loss": 0.2999, "step": 13508 }, { "epoch": 1.9370519070834529, "grad_norm": 0.2738869786262512, "learning_rate": 3.361124964600573e-06, "loss": 0.3112, "step": 13509 }, { "epoch": 1.937195296816748, "grad_norm": 0.2542206943035126, "learning_rate": 3.360336831401684e-06, "loss": 0.3026, "step": 13510 }, { "epoch": 1.937338686550043, "grad_norm": 0.2617182433605194, "learning_rate": 3.3595487438484954e-06, "loss": 0.2944, "step": 13511 }, { "epoch": 1.937482076283338, "grad_norm": 0.2609923779964447, "learning_rate": 3.358760701962946e-06, "loss": 0.2802, "step": 13512 }, { "epoch": 1.9376254660166332, "grad_norm": 0.2682812809944153, "learning_rate": 3.357972705766974e-06, "loss": 0.2887, "step": 13513 }, { "epoch": 1.9377688557499284, "grad_norm": 0.28194931149482727, "learning_rate": 3.357184755282516e-06, "loss": 0.2934, "step": 13514 }, { "epoch": 1.9379122454832234, "grad_norm": 0.28751039505004883, "learning_rate": 3.3563968505315085e-06, "loss": 0.2967, "step": 13515 }, { "epoch": 1.9380556352165184, "grad_norm": 0.2754696309566498, "learning_rate": 3.3556089915358837e-06, "loss": 0.2966, "step": 13516 }, { "epoch": 1.9381990249498136, "grad_norm": 0.2800770103931427, "learning_rate": 3.3548211783175743e-06, "loss": 0.2942, "step": 13517 }, { "epoch": 1.9383424146831087, "grad_norm": 0.26413822174072266, "learning_rate": 3.354033410898513e-06, "loss": 0.296, "step": 13518 }, { "epoch": 1.9384858044164037, "grad_norm": 0.28921017050743103, "learning_rate": 3.3532456893006293e-06, "loss": 0.3115, "step": 13519 }, { "epoch": 1.938629194149699, "grad_norm": 0.2524819076061249, "learning_rate": 3.3524580135458544e-06, "loss": 0.306, "step": 13520 }, { "epoch": 1.9387725838829941, "grad_norm": 0.2620484530925751, "learning_rate": 3.351670383656112e-06, "loss": 0.3049, "step": 13521 }, { "epoch": 1.938915973616289, "grad_norm": 0.27213332056999207, "learning_rate": 3.3508827996533313e-06, "loss": 0.3065, "step": 13522 }, { "epoch": 1.939059363349584, "grad_norm": 0.2857891023159027, "learning_rate": 3.350095261559435e-06, "loss": 0.2931, "step": 13523 }, { "epoch": 1.9392027530828793, "grad_norm": 0.2884117364883423, "learning_rate": 3.34930776939635e-06, "loss": 0.2835, "step": 13524 }, { "epoch": 1.9393461428161745, "grad_norm": 0.2583143711090088, "learning_rate": 3.348520323185998e-06, "loss": 0.2927, "step": 13525 }, { "epoch": 1.9394895325494694, "grad_norm": 0.267608106136322, "learning_rate": 3.3477329229502985e-06, "loss": 0.313, "step": 13526 }, { "epoch": 1.9396329222827644, "grad_norm": 0.27491727471351624, "learning_rate": 3.3469455687111733e-06, "loss": 0.3235, "step": 13527 }, { "epoch": 1.9397763120160596, "grad_norm": 0.30357787013053894, "learning_rate": 3.3461582604905405e-06, "loss": 0.3083, "step": 13528 }, { "epoch": 1.9399197017493548, "grad_norm": 0.27910658717155457, "learning_rate": 3.345370998310318e-06, "loss": 0.2844, "step": 13529 }, { "epoch": 1.9400630914826498, "grad_norm": 0.27902013063430786, "learning_rate": 3.3445837821924204e-06, "loss": 0.305, "step": 13530 }, { "epoch": 1.940206481215945, "grad_norm": 0.29095736145973206, "learning_rate": 3.3437966121587666e-06, "loss": 0.3095, "step": 13531 }, { "epoch": 1.9403498709492402, "grad_norm": 0.28066322207450867, "learning_rate": 3.343009488231265e-06, "loss": 0.3025, "step": 13532 }, { "epoch": 1.9404932606825351, "grad_norm": 0.3031577467918396, "learning_rate": 3.342222410431831e-06, "loss": 0.3063, "step": 13533 }, { "epoch": 1.9406366504158301, "grad_norm": 0.28432005643844604, "learning_rate": 3.3414353787823744e-06, "loss": 0.2929, "step": 13534 }, { "epoch": 1.9407800401491253, "grad_norm": 0.28828638792037964, "learning_rate": 3.340648393304805e-06, "loss": 0.2974, "step": 13535 }, { "epoch": 1.9409234298824205, "grad_norm": 0.28645285964012146, "learning_rate": 3.339861454021034e-06, "loss": 0.3117, "step": 13536 }, { "epoch": 1.9410668196157155, "grad_norm": 0.2898566424846649, "learning_rate": 3.339074560952965e-06, "loss": 0.3062, "step": 13537 }, { "epoch": 1.9412102093490105, "grad_norm": 0.2900486886501312, "learning_rate": 3.338287714122505e-06, "loss": 0.3218, "step": 13538 }, { "epoch": 1.9413535990823059, "grad_norm": 0.26532021164894104, "learning_rate": 3.337500913551559e-06, "loss": 0.3009, "step": 13539 }, { "epoch": 1.9414969888156008, "grad_norm": 0.26633670926094055, "learning_rate": 3.33671415926203e-06, "loss": 0.2908, "step": 13540 }, { "epoch": 1.9416403785488958, "grad_norm": 0.280112624168396, "learning_rate": 3.33592745127582e-06, "loss": 0.2931, "step": 13541 }, { "epoch": 1.941783768282191, "grad_norm": 0.26756739616394043, "learning_rate": 3.3351407896148312e-06, "loss": 0.3166, "step": 13542 }, { "epoch": 1.9419271580154862, "grad_norm": 0.2807942032814026, "learning_rate": 3.334354174300961e-06, "loss": 0.3147, "step": 13543 }, { "epoch": 1.9420705477487812, "grad_norm": 0.2551945745944977, "learning_rate": 3.3335676053561077e-06, "loss": 0.3002, "step": 13544 }, { "epoch": 1.9422139374820762, "grad_norm": 0.27121803164482117, "learning_rate": 3.3327810828021693e-06, "loss": 0.3262, "step": 13545 }, { "epoch": 1.9423573272153714, "grad_norm": 0.2738611698150635, "learning_rate": 3.3319946066610408e-06, "loss": 0.2993, "step": 13546 }, { "epoch": 1.9425007169486666, "grad_norm": 0.26439496874809265, "learning_rate": 3.331208176954619e-06, "loss": 0.3024, "step": 13547 }, { "epoch": 1.9426441066819615, "grad_norm": 0.2627159059047699, "learning_rate": 3.3304217937047916e-06, "loss": 0.3028, "step": 13548 }, { "epoch": 1.9427874964152567, "grad_norm": 0.24804829061031342, "learning_rate": 3.3296354569334537e-06, "loss": 0.2753, "step": 13549 }, { "epoch": 1.942930886148552, "grad_norm": 0.2677724361419678, "learning_rate": 3.3288491666624954e-06, "loss": 0.2821, "step": 13550 }, { "epoch": 1.943074275881847, "grad_norm": 0.276157945394516, "learning_rate": 3.328062922913805e-06, "loss": 0.2971, "step": 13551 }, { "epoch": 1.9432176656151419, "grad_norm": 0.28600630164146423, "learning_rate": 3.327276725709272e-06, "loss": 0.3133, "step": 13552 }, { "epoch": 1.943361055348437, "grad_norm": 0.2813032567501068, "learning_rate": 3.326490575070781e-06, "loss": 0.3043, "step": 13553 }, { "epoch": 1.9435044450817323, "grad_norm": 0.2887241840362549, "learning_rate": 3.325704471020218e-06, "loss": 0.3122, "step": 13554 }, { "epoch": 1.9436478348150272, "grad_norm": 0.25552859902381897, "learning_rate": 3.324918413579468e-06, "loss": 0.3084, "step": 13555 }, { "epoch": 1.9437912245483222, "grad_norm": 0.25772756338119507, "learning_rate": 3.3241324027704113e-06, "loss": 0.3104, "step": 13556 }, { "epoch": 1.9439346142816174, "grad_norm": 0.2690074145793915, "learning_rate": 3.3233464386149304e-06, "loss": 0.3167, "step": 13557 }, { "epoch": 1.9440780040149126, "grad_norm": 0.2730536460876465, "learning_rate": 3.3225605211349082e-06, "loss": 0.3107, "step": 13558 }, { "epoch": 1.9442213937482076, "grad_norm": 0.2675207257270813, "learning_rate": 3.3217746503522186e-06, "loss": 0.2962, "step": 13559 }, { "epoch": 1.9443647834815028, "grad_norm": 0.263461172580719, "learning_rate": 3.320988826288741e-06, "loss": 0.2929, "step": 13560 }, { "epoch": 1.944508173214798, "grad_norm": 0.2646991014480591, "learning_rate": 3.3202030489663516e-06, "loss": 0.2951, "step": 13561 }, { "epoch": 1.944651562948093, "grad_norm": 0.26931232213974, "learning_rate": 3.3194173184069255e-06, "loss": 0.3008, "step": 13562 }, { "epoch": 1.944794952681388, "grad_norm": 0.2689995765686035, "learning_rate": 3.3186316346323364e-06, "loss": 0.3053, "step": 13563 }, { "epoch": 1.944938342414683, "grad_norm": 0.26413851976394653, "learning_rate": 3.317845997664456e-06, "loss": 0.2815, "step": 13564 }, { "epoch": 1.9450817321479783, "grad_norm": 0.2814600169658661, "learning_rate": 3.317060407525155e-06, "loss": 0.3099, "step": 13565 }, { "epoch": 1.9452251218812733, "grad_norm": 0.30267196893692017, "learning_rate": 3.3162748642363033e-06, "loss": 0.3088, "step": 13566 }, { "epoch": 1.9453685116145683, "grad_norm": 0.28568223118782043, "learning_rate": 3.31548936781977e-06, "loss": 0.3026, "step": 13567 }, { "epoch": 1.9455119013478634, "grad_norm": 0.285541296005249, "learning_rate": 3.3147039182974206e-06, "loss": 0.3101, "step": 13568 }, { "epoch": 1.9456552910811586, "grad_norm": 0.2776150703430176, "learning_rate": 3.313918515691123e-06, "loss": 0.3114, "step": 13569 }, { "epoch": 1.9457986808144536, "grad_norm": 0.28689277172088623, "learning_rate": 3.313133160022739e-06, "loss": 0.3034, "step": 13570 }, { "epoch": 1.9459420705477488, "grad_norm": 0.2874981462955475, "learning_rate": 3.3123478513141333e-06, "loss": 0.3056, "step": 13571 }, { "epoch": 1.946085460281044, "grad_norm": 0.2611282467842102, "learning_rate": 3.3115625895871674e-06, "loss": 0.3041, "step": 13572 }, { "epoch": 1.946228850014339, "grad_norm": 0.2742082476615906, "learning_rate": 3.3107773748637013e-06, "loss": 0.3054, "step": 13573 }, { "epoch": 1.946372239747634, "grad_norm": 0.2727443277835846, "learning_rate": 3.3099922071655965e-06, "loss": 0.2813, "step": 13574 }, { "epoch": 1.9465156294809292, "grad_norm": 0.2737666964530945, "learning_rate": 3.3092070865147073e-06, "loss": 0.2995, "step": 13575 }, { "epoch": 1.9466590192142244, "grad_norm": 0.2754867374897003, "learning_rate": 3.308422012932892e-06, "loss": 0.3026, "step": 13576 }, { "epoch": 1.9468024089475193, "grad_norm": 0.27358365058898926, "learning_rate": 3.3076369864420055e-06, "loss": 0.3246, "step": 13577 }, { "epoch": 1.9469457986808143, "grad_norm": 0.27623316645622253, "learning_rate": 3.306852007063902e-06, "loss": 0.3218, "step": 13578 }, { "epoch": 1.9470891884141095, "grad_norm": 0.259887158870697, "learning_rate": 3.306067074820435e-06, "loss": 0.2798, "step": 13579 }, { "epoch": 1.9472325781474047, "grad_norm": 0.27141520380973816, "learning_rate": 3.3052821897334543e-06, "loss": 0.2925, "step": 13580 }, { "epoch": 1.9473759678806997, "grad_norm": 0.2805250883102417, "learning_rate": 3.30449735182481e-06, "loss": 0.3128, "step": 13581 }, { "epoch": 1.9475193576139949, "grad_norm": 0.2938458323478699, "learning_rate": 3.3037125611163514e-06, "loss": 0.3152, "step": 13582 }, { "epoch": 1.94766274734729, "grad_norm": 0.2809942066669464, "learning_rate": 3.302927817629925e-06, "loss": 0.3016, "step": 13583 }, { "epoch": 1.947806137080585, "grad_norm": 0.2753413915634155, "learning_rate": 3.302143121387379e-06, "loss": 0.2973, "step": 13584 }, { "epoch": 1.94794952681388, "grad_norm": 0.27521756291389465, "learning_rate": 3.3013584724105573e-06, "loss": 0.3101, "step": 13585 }, { "epoch": 1.9480929165471752, "grad_norm": 0.27801913022994995, "learning_rate": 3.300573870721302e-06, "loss": 0.3034, "step": 13586 }, { "epoch": 1.9482363062804704, "grad_norm": 0.26204267144203186, "learning_rate": 3.299789316341455e-06, "loss": 0.3004, "step": 13587 }, { "epoch": 1.9483796960137654, "grad_norm": 0.2710978090763092, "learning_rate": 3.2990048092928583e-06, "loss": 0.3177, "step": 13588 }, { "epoch": 1.9485230857470603, "grad_norm": 0.2636796832084656, "learning_rate": 3.298220349597351e-06, "loss": 0.3012, "step": 13589 }, { "epoch": 1.9486664754803558, "grad_norm": 0.2885882556438446, "learning_rate": 3.297435937276773e-06, "loss": 0.2888, "step": 13590 }, { "epoch": 1.9488098652136507, "grad_norm": 0.3044581711292267, "learning_rate": 3.2966515723529576e-06, "loss": 0.3126, "step": 13591 }, { "epoch": 1.9489532549469457, "grad_norm": 0.26637065410614014, "learning_rate": 3.295867254847742e-06, "loss": 0.314, "step": 13592 }, { "epoch": 1.949096644680241, "grad_norm": 0.28250956535339355, "learning_rate": 3.2950829847829614e-06, "loss": 0.2963, "step": 13593 }, { "epoch": 1.949240034413536, "grad_norm": 0.2749060392379761, "learning_rate": 3.294298762180447e-06, "loss": 0.2962, "step": 13594 }, { "epoch": 1.949383424146831, "grad_norm": 0.2730352282524109, "learning_rate": 3.293514587062032e-06, "loss": 0.2926, "step": 13595 }, { "epoch": 1.949526813880126, "grad_norm": 0.2942519783973694, "learning_rate": 3.292730459449547e-06, "loss": 0.3017, "step": 13596 }, { "epoch": 1.9496702036134212, "grad_norm": 0.26635175943374634, "learning_rate": 3.2919463793648175e-06, "loss": 0.3116, "step": 13597 }, { "epoch": 1.9498135933467164, "grad_norm": 0.28145718574523926, "learning_rate": 3.291162346829675e-06, "loss": 0.31, "step": 13598 }, { "epoch": 1.9499569830800114, "grad_norm": 0.26607468724250793, "learning_rate": 3.2903783618659426e-06, "loss": 0.321, "step": 13599 }, { "epoch": 1.9501003728133066, "grad_norm": 0.29053178429603577, "learning_rate": 3.2895944244954477e-06, "loss": 0.3132, "step": 13600 }, { "epoch": 1.9502437625466018, "grad_norm": 0.2785927355289459, "learning_rate": 3.2888105347400146e-06, "loss": 0.2818, "step": 13601 }, { "epoch": 1.9503871522798968, "grad_norm": 0.26803624629974365, "learning_rate": 3.2880266926214617e-06, "loss": 0.3235, "step": 13602 }, { "epoch": 1.9505305420131918, "grad_norm": 0.26726973056793213, "learning_rate": 3.2872428981616122e-06, "loss": 0.3161, "step": 13603 }, { "epoch": 1.950673931746487, "grad_norm": 0.29766443371772766, "learning_rate": 3.286459151382286e-06, "loss": 0.2865, "step": 13604 }, { "epoch": 1.9508173214797822, "grad_norm": 0.2699008584022522, "learning_rate": 3.2856754523053008e-06, "loss": 0.2971, "step": 13605 }, { "epoch": 1.9509607112130771, "grad_norm": 0.25571343302726746, "learning_rate": 3.284891800952475e-06, "loss": 0.3175, "step": 13606 }, { "epoch": 1.951104100946372, "grad_norm": 0.2677291929721832, "learning_rate": 3.2841081973456225e-06, "loss": 0.3169, "step": 13607 }, { "epoch": 1.9512474906796673, "grad_norm": 0.2730993628501892, "learning_rate": 3.2833246415065578e-06, "loss": 0.2864, "step": 13608 }, { "epoch": 1.9513908804129625, "grad_norm": 0.26956671476364136, "learning_rate": 3.2825411334570945e-06, "loss": 0.2902, "step": 13609 }, { "epoch": 1.9515342701462575, "grad_norm": 0.2708781361579895, "learning_rate": 3.2817576732190437e-06, "loss": 0.2974, "step": 13610 }, { "epoch": 1.9516776598795527, "grad_norm": 0.26733261346817017, "learning_rate": 3.2809742608142166e-06, "loss": 0.2902, "step": 13611 }, { "epoch": 1.9518210496128479, "grad_norm": 0.2656770348548889, "learning_rate": 3.2801908962644237e-06, "loss": 0.2969, "step": 13612 }, { "epoch": 1.9519644393461428, "grad_norm": 0.258395791053772, "learning_rate": 3.2794075795914682e-06, "loss": 0.2797, "step": 13613 }, { "epoch": 1.9521078290794378, "grad_norm": 0.2510175108909607, "learning_rate": 3.278624310817159e-06, "loss": 0.2804, "step": 13614 }, { "epoch": 1.952251218812733, "grad_norm": 0.2678927481174469, "learning_rate": 3.277841089963302e-06, "loss": 0.2881, "step": 13615 }, { "epoch": 1.9523946085460282, "grad_norm": 0.292307585477829, "learning_rate": 3.2770579170516987e-06, "loss": 0.2947, "step": 13616 }, { "epoch": 1.9525379982793232, "grad_norm": 0.2716456353664398, "learning_rate": 3.2762747921041536e-06, "loss": 0.2858, "step": 13617 }, { "epoch": 1.9526813880126181, "grad_norm": 0.2941673994064331, "learning_rate": 3.275491715142467e-06, "loss": 0.3002, "step": 13618 }, { "epoch": 1.9528247777459133, "grad_norm": 0.277700275182724, "learning_rate": 3.274708686188437e-06, "loss": 0.2865, "step": 13619 }, { "epoch": 1.9529681674792085, "grad_norm": 0.2645511031150818, "learning_rate": 3.273925705263864e-06, "loss": 0.3129, "step": 13620 }, { "epoch": 1.9531115572125035, "grad_norm": 0.2840912938117981, "learning_rate": 3.2731427723905434e-06, "loss": 0.2994, "step": 13621 }, { "epoch": 1.9532549469457987, "grad_norm": 0.2681969106197357, "learning_rate": 3.272359887590272e-06, "loss": 0.3029, "step": 13622 }, { "epoch": 1.953398336679094, "grad_norm": 0.2803575396537781, "learning_rate": 3.2715770508848455e-06, "loss": 0.3009, "step": 13623 }, { "epoch": 1.9535417264123889, "grad_norm": 0.2696347236633301, "learning_rate": 3.2707942622960527e-06, "loss": 0.3059, "step": 13624 }, { "epoch": 1.9536851161456839, "grad_norm": 0.28258901834487915, "learning_rate": 3.270011521845688e-06, "loss": 0.2999, "step": 13625 }, { "epoch": 1.953828505878979, "grad_norm": 0.26103726029396057, "learning_rate": 3.269228829555542e-06, "loss": 0.2983, "step": 13626 }, { "epoch": 1.9539718956122742, "grad_norm": 0.2812935411930084, "learning_rate": 3.268446185447402e-06, "loss": 0.3157, "step": 13627 }, { "epoch": 1.9541152853455692, "grad_norm": 0.2598678469657898, "learning_rate": 3.267663589543059e-06, "loss": 0.313, "step": 13628 }, { "epoch": 1.9542586750788642, "grad_norm": 0.2582203149795532, "learning_rate": 3.266881041864295e-06, "loss": 0.2765, "step": 13629 }, { "epoch": 1.9544020648121596, "grad_norm": 0.27705326676368713, "learning_rate": 3.266098542432897e-06, "loss": 0.3093, "step": 13630 }, { "epoch": 1.9545454545454546, "grad_norm": 0.27773794531822205, "learning_rate": 3.2653160912706477e-06, "loss": 0.299, "step": 13631 }, { "epoch": 1.9546888442787496, "grad_norm": 0.2817066013813019, "learning_rate": 3.2645336883993308e-06, "loss": 0.2949, "step": 13632 }, { "epoch": 1.9548322340120448, "grad_norm": 0.25519853830337524, "learning_rate": 3.2637513338407267e-06, "loss": 0.2962, "step": 13633 }, { "epoch": 1.95497562374534, "grad_norm": 0.2912062108516693, "learning_rate": 3.2629690276166136e-06, "loss": 0.3059, "step": 13634 }, { "epoch": 1.955119013478635, "grad_norm": 0.257208913564682, "learning_rate": 3.262186769748771e-06, "loss": 0.2844, "step": 13635 }, { "epoch": 1.95526240321193, "grad_norm": 0.2739880084991455, "learning_rate": 3.261404560258975e-06, "loss": 0.2864, "step": 13636 }, { "epoch": 1.955405792945225, "grad_norm": 0.25889769196510315, "learning_rate": 3.2606223991690023e-06, "loss": 0.3064, "step": 13637 }, { "epoch": 1.9555491826785203, "grad_norm": 0.28505271673202515, "learning_rate": 3.259840286500625e-06, "loss": 0.2905, "step": 13638 }, { "epoch": 1.9556925724118153, "grad_norm": 0.29807397723197937, "learning_rate": 3.259058222275621e-06, "loss": 0.2907, "step": 13639 }, { "epoch": 1.9558359621451105, "grad_norm": 0.2894923686981201, "learning_rate": 3.2582762065157546e-06, "loss": 0.2917, "step": 13640 }, { "epoch": 1.9559793518784057, "grad_norm": 0.2535782754421234, "learning_rate": 3.2574942392428004e-06, "loss": 0.2954, "step": 13641 }, { "epoch": 1.9561227416117006, "grad_norm": 0.25768527388572693, "learning_rate": 3.2567123204785256e-06, "loss": 0.299, "step": 13642 }, { "epoch": 1.9562661313449956, "grad_norm": 0.25631847977638245, "learning_rate": 3.2559304502446987e-06, "loss": 0.2939, "step": 13643 }, { "epoch": 1.9564095210782908, "grad_norm": 0.27070993185043335, "learning_rate": 3.2551486285630852e-06, "loss": 0.3103, "step": 13644 }, { "epoch": 1.956552910811586, "grad_norm": 0.2922089993953705, "learning_rate": 3.25436685545545e-06, "loss": 0.3211, "step": 13645 }, { "epoch": 1.956696300544881, "grad_norm": 0.2627922296524048, "learning_rate": 3.253585130943555e-06, "loss": 0.2939, "step": 13646 }, { "epoch": 1.956839690278176, "grad_norm": 0.2522720992565155, "learning_rate": 3.252803455049164e-06, "loss": 0.3005, "step": 13647 }, { "epoch": 1.9569830800114711, "grad_norm": 0.2835446894168854, "learning_rate": 3.2520218277940367e-06, "loss": 0.2987, "step": 13648 }, { "epoch": 1.9571264697447663, "grad_norm": 0.28665441274642944, "learning_rate": 3.2512402491999333e-06, "loss": 0.2952, "step": 13649 }, { "epoch": 1.9572698594780613, "grad_norm": 0.29597917199134827, "learning_rate": 3.250458719288614e-06, "loss": 0.2918, "step": 13650 }, { "epoch": 1.9574132492113565, "grad_norm": 0.29380887746810913, "learning_rate": 3.249677238081829e-06, "loss": 0.3003, "step": 13651 }, { "epoch": 1.9575566389446517, "grad_norm": 0.2573593258857727, "learning_rate": 3.2488958056013373e-06, "loss": 0.2939, "step": 13652 }, { "epoch": 1.9577000286779467, "grad_norm": 0.27570050954818726, "learning_rate": 3.248114421868893e-06, "loss": 0.304, "step": 13653 }, { "epoch": 1.9578434184112417, "grad_norm": 0.2725782096385956, "learning_rate": 3.247333086906249e-06, "loss": 0.2938, "step": 13654 }, { "epoch": 1.9579868081445368, "grad_norm": 0.2626263201236725, "learning_rate": 3.2465518007351577e-06, "loss": 0.3011, "step": 13655 }, { "epoch": 1.958130197877832, "grad_norm": 0.28428900241851807, "learning_rate": 3.245770563377365e-06, "loss": 0.2916, "step": 13656 }, { "epoch": 1.958273587611127, "grad_norm": 0.26093822717666626, "learning_rate": 3.2449893748546207e-06, "loss": 0.2876, "step": 13657 }, { "epoch": 1.958416977344422, "grad_norm": 0.2828366458415985, "learning_rate": 3.244208235188673e-06, "loss": 0.3001, "step": 13658 }, { "epoch": 1.9585603670777172, "grad_norm": 0.2829315662384033, "learning_rate": 3.2434271444012665e-06, "loss": 0.3178, "step": 13659 }, { "epoch": 1.9587037568110124, "grad_norm": 0.26947301626205444, "learning_rate": 3.2426461025141476e-06, "loss": 0.3034, "step": 13660 }, { "epoch": 1.9588471465443074, "grad_norm": 0.288700133562088, "learning_rate": 3.241865109549057e-06, "loss": 0.3174, "step": 13661 }, { "epoch": 1.9589905362776026, "grad_norm": 0.2851676940917969, "learning_rate": 3.2410841655277373e-06, "loss": 0.2818, "step": 13662 }, { "epoch": 1.9591339260108978, "grad_norm": 0.28750523924827576, "learning_rate": 3.2403032704719286e-06, "loss": 0.2864, "step": 13663 }, { "epoch": 1.9592773157441927, "grad_norm": 0.28434890508651733, "learning_rate": 3.2395224244033695e-06, "loss": 0.295, "step": 13664 }, { "epoch": 1.9594207054774877, "grad_norm": 0.27421945333480835, "learning_rate": 3.2387416273437987e-06, "loss": 0.3007, "step": 13665 }, { "epoch": 1.959564095210783, "grad_norm": 0.27714070677757263, "learning_rate": 3.2379608793149538e-06, "loss": 0.3202, "step": 13666 }, { "epoch": 1.959707484944078, "grad_norm": 0.3106706142425537, "learning_rate": 3.2371801803385648e-06, "loss": 0.31, "step": 13667 }, { "epoch": 1.959850874677373, "grad_norm": 0.26555347442626953, "learning_rate": 3.2363995304363683e-06, "loss": 0.311, "step": 13668 }, { "epoch": 1.959994264410668, "grad_norm": 0.26501986384391785, "learning_rate": 3.2356189296300955e-06, "loss": 0.299, "step": 13669 }, { "epoch": 1.9601376541439632, "grad_norm": 0.27337753772735596, "learning_rate": 3.2348383779414777e-06, "loss": 0.2997, "step": 13670 }, { "epoch": 1.9602810438772584, "grad_norm": 0.2751489579677582, "learning_rate": 3.234057875392245e-06, "loss": 0.2928, "step": 13671 }, { "epoch": 1.9604244336105534, "grad_norm": 0.2810264229774475, "learning_rate": 3.2332774220041243e-06, "loss": 0.3079, "step": 13672 }, { "epoch": 1.9605678233438486, "grad_norm": 0.2905997633934021, "learning_rate": 3.2324970177988415e-06, "loss": 0.3121, "step": 13673 }, { "epoch": 1.9607112130771438, "grad_norm": 0.2707681655883789, "learning_rate": 3.231716662798123e-06, "loss": 0.2885, "step": 13674 }, { "epoch": 1.9608546028104388, "grad_norm": 0.26477211713790894, "learning_rate": 3.2309363570236927e-06, "loss": 0.3013, "step": 13675 }, { "epoch": 1.9609979925437337, "grad_norm": 0.2846842408180237, "learning_rate": 3.230156100497273e-06, "loss": 0.3171, "step": 13676 }, { "epoch": 1.961141382277029, "grad_norm": 0.2784605026245117, "learning_rate": 3.229375893240587e-06, "loss": 0.2976, "step": 13677 }, { "epoch": 1.9612847720103241, "grad_norm": 0.27514415979385376, "learning_rate": 3.2285957352753507e-06, "loss": 0.2855, "step": 13678 }, { "epoch": 1.9614281617436191, "grad_norm": 0.2871725261211395, "learning_rate": 3.227815626623283e-06, "loss": 0.3088, "step": 13679 }, { "epoch": 1.961571551476914, "grad_norm": 0.2844785749912262, "learning_rate": 3.227035567306102e-06, "loss": 0.2976, "step": 13680 }, { "epoch": 1.9617149412102095, "grad_norm": 0.2795146405696869, "learning_rate": 3.2262555573455246e-06, "loss": 0.3086, "step": 13681 }, { "epoch": 1.9618583309435045, "grad_norm": 0.26394346356391907, "learning_rate": 3.2254755967632655e-06, "loss": 0.3127, "step": 13682 }, { "epoch": 1.9620017206767995, "grad_norm": 0.27005112171173096, "learning_rate": 3.2246956855810342e-06, "loss": 0.3065, "step": 13683 }, { "epoch": 1.9621451104100947, "grad_norm": 0.2578636109828949, "learning_rate": 3.2239158238205438e-06, "loss": 0.3077, "step": 13684 }, { "epoch": 1.9622885001433898, "grad_norm": 0.26368942856788635, "learning_rate": 3.2231360115035056e-06, "loss": 0.3028, "step": 13685 }, { "epoch": 1.9624318898766848, "grad_norm": 0.2665584087371826, "learning_rate": 3.222356248651627e-06, "loss": 0.2971, "step": 13686 }, { "epoch": 1.9625752796099798, "grad_norm": 0.26681485772132874, "learning_rate": 3.221576535286617e-06, "loss": 0.3039, "step": 13687 }, { "epoch": 1.962718669343275, "grad_norm": 0.2837035357952118, "learning_rate": 3.2207968714301783e-06, "loss": 0.2869, "step": 13688 }, { "epoch": 1.9628620590765702, "grad_norm": 0.2709757685661316, "learning_rate": 3.2200172571040194e-06, "loss": 0.3077, "step": 13689 }, { "epoch": 1.9630054488098652, "grad_norm": 0.25538235902786255, "learning_rate": 3.2192376923298414e-06, "loss": 0.3096, "step": 13690 }, { "epoch": 1.9631488385431604, "grad_norm": 0.26828455924987793, "learning_rate": 3.2184581771293466e-06, "loss": 0.3016, "step": 13691 }, { "epoch": 1.9632922282764556, "grad_norm": 0.2757762670516968, "learning_rate": 3.217678711524236e-06, "loss": 0.3055, "step": 13692 }, { "epoch": 1.9634356180097505, "grad_norm": 0.28321152925491333, "learning_rate": 3.21689929553621e-06, "loss": 0.3015, "step": 13693 }, { "epoch": 1.9635790077430455, "grad_norm": 0.2795217037200928, "learning_rate": 3.216119929186963e-06, "loss": 0.3016, "step": 13694 }, { "epoch": 1.9637223974763407, "grad_norm": 0.2733878493309021, "learning_rate": 3.2153406124981925e-06, "loss": 0.2974, "step": 13695 }, { "epoch": 1.963865787209636, "grad_norm": 0.2788996696472168, "learning_rate": 3.2145613454915946e-06, "loss": 0.2868, "step": 13696 }, { "epoch": 1.9640091769429309, "grad_norm": 0.2675491273403168, "learning_rate": 3.2137821281888617e-06, "loss": 0.2998, "step": 13697 }, { "epoch": 1.9641525666762258, "grad_norm": 0.30567798018455505, "learning_rate": 3.213002960611689e-06, "loss": 0.3074, "step": 13698 }, { "epoch": 1.964295956409521, "grad_norm": 0.2683027684688568, "learning_rate": 3.212223842781762e-06, "loss": 0.3077, "step": 13699 }, { "epoch": 1.9644393461428162, "grad_norm": 0.27334967255592346, "learning_rate": 3.2114447747207745e-06, "loss": 0.307, "step": 13700 }, { "epoch": 1.9645827358761112, "grad_norm": 0.2907654345035553, "learning_rate": 3.210665756450412e-06, "loss": 0.2879, "step": 13701 }, { "epoch": 1.9647261256094064, "grad_norm": 0.2846241295337677, "learning_rate": 3.2098867879923635e-06, "loss": 0.2886, "step": 13702 }, { "epoch": 1.9648695153427016, "grad_norm": 0.26547059416770935, "learning_rate": 3.2091078693683123e-06, "loss": 0.3111, "step": 13703 }, { "epoch": 1.9650129050759966, "grad_norm": 0.2920629680156708, "learning_rate": 3.208329000599946e-06, "loss": 0.2986, "step": 13704 }, { "epoch": 1.9651562948092915, "grad_norm": 0.2654135227203369, "learning_rate": 3.2075501817089416e-06, "loss": 0.2924, "step": 13705 }, { "epoch": 1.9652996845425867, "grad_norm": 0.27404358983039856, "learning_rate": 3.2067714127169825e-06, "loss": 0.2919, "step": 13706 }, { "epoch": 1.965443074275882, "grad_norm": 0.28086602687835693, "learning_rate": 3.20599269364575e-06, "loss": 0.3013, "step": 13707 }, { "epoch": 1.965586464009177, "grad_norm": 0.2623041868209839, "learning_rate": 3.2052140245169187e-06, "loss": 0.3078, "step": 13708 }, { "epoch": 1.9657298537424719, "grad_norm": 0.2663659453392029, "learning_rate": 3.204435405352172e-06, "loss": 0.3068, "step": 13709 }, { "epoch": 1.965873243475767, "grad_norm": 0.274815171957016, "learning_rate": 3.203656836173179e-06, "loss": 0.3023, "step": 13710 }, { "epoch": 1.9660166332090623, "grad_norm": 0.2779804468154907, "learning_rate": 3.202878317001617e-06, "loss": 0.3233, "step": 13711 }, { "epoch": 1.9661600229423573, "grad_norm": 0.26888641715049744, "learning_rate": 3.202099847859158e-06, "loss": 0.311, "step": 13712 }, { "epoch": 1.9663034126756525, "grad_norm": 0.2550077736377716, "learning_rate": 3.2013214287674732e-06, "loss": 0.295, "step": 13713 }, { "epoch": 1.9664468024089476, "grad_norm": 0.26368141174316406, "learning_rate": 3.200543059748234e-06, "loss": 0.3057, "step": 13714 }, { "epoch": 1.9665901921422426, "grad_norm": 0.27573221921920776, "learning_rate": 3.199764740823107e-06, "loss": 0.2888, "step": 13715 }, { "epoch": 1.9667335818755376, "grad_norm": 0.27471429109573364, "learning_rate": 3.1989864720137607e-06, "loss": 0.2998, "step": 13716 }, { "epoch": 1.9668769716088328, "grad_norm": 0.30171653628349304, "learning_rate": 3.1982082533418603e-06, "loss": 0.282, "step": 13717 }, { "epoch": 1.967020361342128, "grad_norm": 0.25687915086746216, "learning_rate": 3.1974300848290703e-06, "loss": 0.3096, "step": 13718 }, { "epoch": 1.967163751075423, "grad_norm": 0.27074936032295227, "learning_rate": 3.196651966497054e-06, "loss": 0.3288, "step": 13719 }, { "epoch": 1.967307140808718, "grad_norm": 0.2934134006500244, "learning_rate": 3.1958738983674744e-06, "loss": 0.3046, "step": 13720 }, { "epoch": 1.9674505305420134, "grad_norm": 0.24629101157188416, "learning_rate": 3.195095880461989e-06, "loss": 0.3011, "step": 13721 }, { "epoch": 1.9675939202753083, "grad_norm": 0.25546708703041077, "learning_rate": 3.1943179128022573e-06, "loss": 0.2979, "step": 13722 }, { "epoch": 1.9677373100086033, "grad_norm": 0.27869075536727905, "learning_rate": 3.1935399954099376e-06, "loss": 0.3162, "step": 13723 }, { "epoch": 1.9678806997418985, "grad_norm": 0.27047643065452576, "learning_rate": 3.1927621283066852e-06, "loss": 0.312, "step": 13724 }, { "epoch": 1.9680240894751937, "grad_norm": 0.26315775513648987, "learning_rate": 3.1919843115141562e-06, "loss": 0.3217, "step": 13725 }, { "epoch": 1.9681674792084887, "grad_norm": 0.25838494300842285, "learning_rate": 3.191206545054002e-06, "loss": 0.3047, "step": 13726 }, { "epoch": 1.9683108689417836, "grad_norm": 0.2746683657169342, "learning_rate": 3.1904288289478747e-06, "loss": 0.3112, "step": 13727 }, { "epoch": 1.9684542586750788, "grad_norm": 0.27513590455055237, "learning_rate": 3.1896511632174254e-06, "loss": 0.3141, "step": 13728 }, { "epoch": 1.968597648408374, "grad_norm": 0.2959587872028351, "learning_rate": 3.1888735478843023e-06, "loss": 0.3058, "step": 13729 }, { "epoch": 1.968741038141669, "grad_norm": 0.2764202952384949, "learning_rate": 3.1880959829701547e-06, "loss": 0.305, "step": 13730 }, { "epoch": 1.9688844278749642, "grad_norm": 0.2669822871685028, "learning_rate": 3.1873184684966294e-06, "loss": 0.3116, "step": 13731 }, { "epoch": 1.9690278176082594, "grad_norm": 0.26375001668930054, "learning_rate": 3.1865410044853672e-06, "loss": 0.2956, "step": 13732 }, { "epoch": 1.9691712073415544, "grad_norm": 0.3034263551235199, "learning_rate": 3.1857635909580137e-06, "loss": 0.2877, "step": 13733 }, { "epoch": 1.9693145970748493, "grad_norm": 0.26023969054222107, "learning_rate": 3.184986227936211e-06, "loss": 0.3085, "step": 13734 }, { "epoch": 1.9694579868081445, "grad_norm": 0.2704092860221863, "learning_rate": 3.1842089154416e-06, "loss": 0.3002, "step": 13735 }, { "epoch": 1.9696013765414397, "grad_norm": 0.2830539047718048, "learning_rate": 3.183431653495821e-06, "loss": 0.3308, "step": 13736 }, { "epoch": 1.9697447662747347, "grad_norm": 0.28231269121170044, "learning_rate": 3.1826544421205084e-06, "loss": 0.3211, "step": 13737 }, { "epoch": 1.9698881560080297, "grad_norm": 0.24121998250484467, "learning_rate": 3.1818772813373013e-06, "loss": 0.3088, "step": 13738 }, { "epoch": 1.9700315457413249, "grad_norm": 0.2557092607021332, "learning_rate": 3.1811001711678334e-06, "loss": 0.3002, "step": 13739 }, { "epoch": 1.97017493547462, "grad_norm": 0.26247841119766235, "learning_rate": 3.1803231116337386e-06, "loss": 0.2992, "step": 13740 }, { "epoch": 1.970318325207915, "grad_norm": 0.26130592823028564, "learning_rate": 3.179546102756651e-06, "loss": 0.2986, "step": 13741 }, { "epoch": 1.9704617149412103, "grad_norm": 0.27374163269996643, "learning_rate": 3.178769144558198e-06, "loss": 0.3032, "step": 13742 }, { "epoch": 1.9706051046745054, "grad_norm": 0.26506665349006653, "learning_rate": 3.17799223706001e-06, "loss": 0.2913, "step": 13743 }, { "epoch": 1.9707484944078004, "grad_norm": 0.26782405376434326, "learning_rate": 3.177215380283716e-06, "loss": 0.2918, "step": 13744 }, { "epoch": 1.9708918841410954, "grad_norm": 0.2669987678527832, "learning_rate": 3.1764385742509417e-06, "loss": 0.3066, "step": 13745 }, { "epoch": 1.9710352738743906, "grad_norm": 0.26298055052757263, "learning_rate": 3.175661818983312e-06, "loss": 0.3101, "step": 13746 }, { "epoch": 1.9711786636076858, "grad_norm": 0.275656133890152, "learning_rate": 3.1748851145024535e-06, "loss": 0.3022, "step": 13747 }, { "epoch": 1.9713220533409808, "grad_norm": 0.2756446301937103, "learning_rate": 3.1741084608299835e-06, "loss": 0.2843, "step": 13748 }, { "epoch": 1.9714654430742757, "grad_norm": 0.27266329526901245, "learning_rate": 3.173331857987525e-06, "loss": 0.3152, "step": 13749 }, { "epoch": 1.971608832807571, "grad_norm": 0.2620880901813507, "learning_rate": 3.172555305996698e-06, "loss": 0.3036, "step": 13750 }, { "epoch": 1.9717522225408661, "grad_norm": 0.27918410301208496, "learning_rate": 3.17177880487912e-06, "loss": 0.2935, "step": 13751 }, { "epoch": 1.971895612274161, "grad_norm": 0.2701181173324585, "learning_rate": 3.1710023546564083e-06, "loss": 0.301, "step": 13752 }, { "epoch": 1.9720390020074563, "grad_norm": 0.26610812544822693, "learning_rate": 3.170225955350177e-06, "loss": 0.2945, "step": 13753 }, { "epoch": 1.9721823917407515, "grad_norm": 0.2873814105987549, "learning_rate": 3.16944960698204e-06, "loss": 0.3231, "step": 13754 }, { "epoch": 1.9723257814740465, "grad_norm": 0.2921718657016754, "learning_rate": 3.168673309573611e-06, "loss": 0.3099, "step": 13755 }, { "epoch": 1.9724691712073414, "grad_norm": 0.26019516587257385, "learning_rate": 3.167897063146499e-06, "loss": 0.3053, "step": 13756 }, { "epoch": 1.9726125609406366, "grad_norm": 0.2796887159347534, "learning_rate": 3.167120867722315e-06, "loss": 0.2985, "step": 13757 }, { "epoch": 1.9727559506739318, "grad_norm": 0.26763230562210083, "learning_rate": 3.1663447233226685e-06, "loss": 0.3015, "step": 13758 }, { "epoch": 1.9728993404072268, "grad_norm": 0.2785375714302063, "learning_rate": 3.1655686299691622e-06, "loss": 0.2869, "step": 13759 }, { "epoch": 1.9730427301405218, "grad_norm": 0.2801001965999603, "learning_rate": 3.1647925876834028e-06, "loss": 0.2945, "step": 13760 }, { "epoch": 1.973186119873817, "grad_norm": 0.25217920541763306, "learning_rate": 3.164016596486995e-06, "loss": 0.2907, "step": 13761 }, { "epoch": 1.9733295096071122, "grad_norm": 0.2754765748977661, "learning_rate": 3.163240656401541e-06, "loss": 0.3079, "step": 13762 }, { "epoch": 1.9734728993404071, "grad_norm": 0.27776333689689636, "learning_rate": 3.162464767448643e-06, "loss": 0.2919, "step": 13763 }, { "epoch": 1.9736162890737023, "grad_norm": 0.2562617063522339, "learning_rate": 3.1616889296498976e-06, "loss": 0.2991, "step": 13764 }, { "epoch": 1.9737596788069975, "grad_norm": 0.2811731994152069, "learning_rate": 3.1609131430269047e-06, "loss": 0.2909, "step": 13765 }, { "epoch": 1.9739030685402925, "grad_norm": 0.2657697796821594, "learning_rate": 3.1601374076012616e-06, "loss": 0.2928, "step": 13766 }, { "epoch": 1.9740464582735875, "grad_norm": 0.2627662718296051, "learning_rate": 3.159361723394562e-06, "loss": 0.3204, "step": 13767 }, { "epoch": 1.9741898480068827, "grad_norm": 0.2633216381072998, "learning_rate": 3.158586090428403e-06, "loss": 0.3034, "step": 13768 }, { "epoch": 1.9743332377401779, "grad_norm": 0.27826499938964844, "learning_rate": 3.1578105087243704e-06, "loss": 0.3092, "step": 13769 }, { "epoch": 1.9744766274734729, "grad_norm": 0.2840648591518402, "learning_rate": 3.157034978304062e-06, "loss": 0.3155, "step": 13770 }, { "epoch": 1.9746200172067678, "grad_norm": 0.25832095742225647, "learning_rate": 3.156259499189064e-06, "loss": 0.3187, "step": 13771 }, { "epoch": 1.9747634069400632, "grad_norm": 0.26604941487312317, "learning_rate": 3.1554840714009655e-06, "loss": 0.3017, "step": 13772 }, { "epoch": 1.9749067966733582, "grad_norm": 0.2702319324016571, "learning_rate": 3.154708694961353e-06, "loss": 0.3068, "step": 13773 }, { "epoch": 1.9750501864066532, "grad_norm": 0.27307674288749695, "learning_rate": 3.153933369891814e-06, "loss": 0.2909, "step": 13774 }, { "epoch": 1.9751935761399484, "grad_norm": 0.2919091582298279, "learning_rate": 3.1531580962139285e-06, "loss": 0.3019, "step": 13775 }, { "epoch": 1.9753369658732436, "grad_norm": 0.2847743630409241, "learning_rate": 3.15238287394928e-06, "loss": 0.3153, "step": 13776 }, { "epoch": 1.9754803556065386, "grad_norm": 0.27535709738731384, "learning_rate": 3.1516077031194504e-06, "loss": 0.3033, "step": 13777 }, { "epoch": 1.9756237453398335, "grad_norm": 0.27589792013168335, "learning_rate": 3.1508325837460184e-06, "loss": 0.3114, "step": 13778 }, { "epoch": 1.9757671350731287, "grad_norm": 0.2603153884410858, "learning_rate": 3.150057515850565e-06, "loss": 0.309, "step": 13779 }, { "epoch": 1.975910524806424, "grad_norm": 0.26927468180656433, "learning_rate": 3.149282499454662e-06, "loss": 0.3048, "step": 13780 }, { "epoch": 1.976053914539719, "grad_norm": 0.2772754728794098, "learning_rate": 3.148507534579889e-06, "loss": 0.3142, "step": 13781 }, { "epoch": 1.976197304273014, "grad_norm": 0.26181310415267944, "learning_rate": 3.1477326212478173e-06, "loss": 0.3217, "step": 13782 }, { "epoch": 1.9763406940063093, "grad_norm": 0.3202551305294037, "learning_rate": 3.14695775948002e-06, "loss": 0.3123, "step": 13783 }, { "epoch": 1.9764840837396043, "grad_norm": 0.2865568697452545, "learning_rate": 3.146182949298068e-06, "loss": 0.2991, "step": 13784 }, { "epoch": 1.9766274734728992, "grad_norm": 0.2896750271320343, "learning_rate": 3.1454081907235333e-06, "loss": 0.3196, "step": 13785 }, { "epoch": 1.9767708632061944, "grad_norm": 0.25446945428848267, "learning_rate": 3.14463348377798e-06, "loss": 0.2973, "step": 13786 }, { "epoch": 1.9769142529394896, "grad_norm": 0.2828342318534851, "learning_rate": 3.143858828482975e-06, "loss": 0.2957, "step": 13787 }, { "epoch": 1.9770576426727846, "grad_norm": 0.2902848422527313, "learning_rate": 3.143084224860087e-06, "loss": 0.2986, "step": 13788 }, { "epoch": 1.9772010324060796, "grad_norm": 0.2882746458053589, "learning_rate": 3.142309672930876e-06, "loss": 0.3073, "step": 13789 }, { "epoch": 1.9773444221393748, "grad_norm": 0.27793461084365845, "learning_rate": 3.141535172716908e-06, "loss": 0.2912, "step": 13790 }, { "epoch": 1.97748781187267, "grad_norm": 0.2608479857444763, "learning_rate": 3.1407607242397407e-06, "loss": 0.3101, "step": 13791 }, { "epoch": 1.977631201605965, "grad_norm": 0.2573520839214325, "learning_rate": 3.1399863275209346e-06, "loss": 0.3005, "step": 13792 }, { "epoch": 1.9777745913392601, "grad_norm": 0.2960168123245239, "learning_rate": 3.1392119825820476e-06, "loss": 0.3037, "step": 13793 }, { "epoch": 1.9779179810725553, "grad_norm": 0.2737691104412079, "learning_rate": 3.1384376894446378e-06, "loss": 0.3003, "step": 13794 }, { "epoch": 1.9780613708058503, "grad_norm": 0.2910824120044708, "learning_rate": 3.1376634481302606e-06, "loss": 0.3098, "step": 13795 }, { "epoch": 1.9782047605391453, "grad_norm": 0.2678578794002533, "learning_rate": 3.1368892586604657e-06, "loss": 0.2995, "step": 13796 }, { "epoch": 1.9783481502724405, "grad_norm": 0.27135151624679565, "learning_rate": 3.1361151210568063e-06, "loss": 0.3123, "step": 13797 }, { "epoch": 1.9784915400057357, "grad_norm": 0.2659638524055481, "learning_rate": 3.1353410353408365e-06, "loss": 0.3078, "step": 13798 }, { "epoch": 1.9786349297390307, "grad_norm": 0.26935523748397827, "learning_rate": 3.1345670015341047e-06, "loss": 0.3212, "step": 13799 }, { "epoch": 1.9787783194723256, "grad_norm": 0.29714635014533997, "learning_rate": 3.1337930196581566e-06, "loss": 0.3111, "step": 13800 }, { "epoch": 1.9789217092056208, "grad_norm": 0.2869744896888733, "learning_rate": 3.1330190897345423e-06, "loss": 0.2895, "step": 13801 }, { "epoch": 1.979065098938916, "grad_norm": 0.26563549041748047, "learning_rate": 3.1322452117848036e-06, "loss": 0.2935, "step": 13802 }, { "epoch": 1.979208488672211, "grad_norm": 0.26001012325286865, "learning_rate": 3.1314713858304847e-06, "loss": 0.2899, "step": 13803 }, { "epoch": 1.9793518784055062, "grad_norm": 0.26217415928840637, "learning_rate": 3.1306976118931275e-06, "loss": 0.297, "step": 13804 }, { "epoch": 1.9794952681388014, "grad_norm": 0.27814945578575134, "learning_rate": 3.129923889994273e-06, "loss": 0.313, "step": 13805 }, { "epoch": 1.9796386578720964, "grad_norm": 0.26375052332878113, "learning_rate": 3.1291502201554626e-06, "loss": 0.2929, "step": 13806 }, { "epoch": 1.9797820476053913, "grad_norm": 0.23947754502296448, "learning_rate": 3.128376602398231e-06, "loss": 0.2906, "step": 13807 }, { "epoch": 1.9799254373386865, "grad_norm": 0.2874397039413452, "learning_rate": 3.127603036744115e-06, "loss": 0.2975, "step": 13808 }, { "epoch": 1.9800688270719817, "grad_norm": 0.24522580206394196, "learning_rate": 3.1268295232146508e-06, "loss": 0.3041, "step": 13809 }, { "epoch": 1.9802122168052767, "grad_norm": 0.2512197494506836, "learning_rate": 3.126056061831371e-06, "loss": 0.2915, "step": 13810 }, { "epoch": 1.9803556065385717, "grad_norm": 0.2556228041648865, "learning_rate": 3.125282652615808e-06, "loss": 0.2915, "step": 13811 }, { "epoch": 1.980498996271867, "grad_norm": 0.27401143312454224, "learning_rate": 3.124509295589494e-06, "loss": 0.3039, "step": 13812 }, { "epoch": 1.980642386005162, "grad_norm": 0.2794836461544037, "learning_rate": 3.123735990773954e-06, "loss": 0.319, "step": 13813 }, { "epoch": 1.980785775738457, "grad_norm": 0.276183158159256, "learning_rate": 3.122962738190718e-06, "loss": 0.3041, "step": 13814 }, { "epoch": 1.9809291654717522, "grad_norm": 0.2728665769100189, "learning_rate": 3.1221895378613122e-06, "loss": 0.3258, "step": 13815 }, { "epoch": 1.9810725552050474, "grad_norm": 0.27996280789375305, "learning_rate": 3.1214163898072604e-06, "loss": 0.3111, "step": 13816 }, { "epoch": 1.9812159449383424, "grad_norm": 0.26409196853637695, "learning_rate": 3.1206432940500876e-06, "loss": 0.3158, "step": 13817 }, { "epoch": 1.9813593346716374, "grad_norm": 0.2754180431365967, "learning_rate": 3.1198702506113137e-06, "loss": 0.2874, "step": 13818 }, { "epoch": 1.9815027244049326, "grad_norm": 0.2544354200363159, "learning_rate": 3.11909725951246e-06, "loss": 0.2814, "step": 13819 }, { "epoch": 1.9816461141382278, "grad_norm": 0.27009913325309753, "learning_rate": 3.1183243207750445e-06, "loss": 0.3155, "step": 13820 }, { "epoch": 1.9817895038715228, "grad_norm": 0.28605660796165466, "learning_rate": 3.117551434420586e-06, "loss": 0.293, "step": 13821 }, { "epoch": 1.981932893604818, "grad_norm": 0.2844957113265991, "learning_rate": 3.116778600470602e-06, "loss": 0.2918, "step": 13822 }, { "epoch": 1.9820762833381131, "grad_norm": 0.2846926748752594, "learning_rate": 3.1160058189466026e-06, "loss": 0.3079, "step": 13823 }, { "epoch": 1.9822196730714081, "grad_norm": 0.26084232330322266, "learning_rate": 3.1152330898701026e-06, "loss": 0.2847, "step": 13824 }, { "epoch": 1.982363062804703, "grad_norm": 0.2683614194393158, "learning_rate": 3.114460413262613e-06, "loss": 0.2857, "step": 13825 }, { "epoch": 1.9825064525379983, "grad_norm": 0.26420819759368896, "learning_rate": 3.1136877891456473e-06, "loss": 0.3007, "step": 13826 }, { "epoch": 1.9826498422712935, "grad_norm": 0.26472175121307373, "learning_rate": 3.1129152175407106e-06, "loss": 0.3101, "step": 13827 }, { "epoch": 1.9827932320045885, "grad_norm": 0.2663361728191376, "learning_rate": 3.1121426984693138e-06, "loss": 0.3031, "step": 13828 }, { "epoch": 1.9829366217378834, "grad_norm": 0.29437685012817383, "learning_rate": 3.1113702319529583e-06, "loss": 0.3006, "step": 13829 }, { "epoch": 1.9830800114711786, "grad_norm": 0.285180926322937, "learning_rate": 3.11059781801315e-06, "loss": 0.2903, "step": 13830 }, { "epoch": 1.9832234012044738, "grad_norm": 0.25517895817756653, "learning_rate": 3.1098254566713925e-06, "loss": 0.2871, "step": 13831 }, { "epoch": 1.9833667909377688, "grad_norm": 0.28417783975601196, "learning_rate": 3.1090531479491864e-06, "loss": 0.2861, "step": 13832 }, { "epoch": 1.983510180671064, "grad_norm": 0.2707520127296448, "learning_rate": 3.1082808918680328e-06, "loss": 0.3127, "step": 13833 }, { "epoch": 1.9836535704043592, "grad_norm": 0.25692760944366455, "learning_rate": 3.1075086884494277e-06, "loss": 0.299, "step": 13834 }, { "epoch": 1.9837969601376542, "grad_norm": 0.28568148612976074, "learning_rate": 3.1067365377148705e-06, "loss": 0.2981, "step": 13835 }, { "epoch": 1.9839403498709491, "grad_norm": 0.2653903663158417, "learning_rate": 3.1059644396858547e-06, "loss": 0.2858, "step": 13836 }, { "epoch": 1.9840837396042443, "grad_norm": 0.2722056806087494, "learning_rate": 3.105192394383876e-06, "loss": 0.2989, "step": 13837 }, { "epoch": 1.9842271293375395, "grad_norm": 0.2533811628818512, "learning_rate": 3.1044204018304257e-06, "loss": 0.3159, "step": 13838 }, { "epoch": 1.9843705190708345, "grad_norm": 0.26861336827278137, "learning_rate": 3.103648462046998e-06, "loss": 0.3095, "step": 13839 }, { "epoch": 1.9845139088041295, "grad_norm": 0.2478668987751007, "learning_rate": 3.102876575055078e-06, "loss": 0.2948, "step": 13840 }, { "epoch": 1.9846572985374247, "grad_norm": 0.2842639088630676, "learning_rate": 3.102104740876156e-06, "loss": 0.3152, "step": 13841 }, { "epoch": 1.9848006882707199, "grad_norm": 0.27851298451423645, "learning_rate": 3.101332959531718e-06, "loss": 0.3134, "step": 13842 }, { "epoch": 1.9849440780040148, "grad_norm": 0.2705311179161072, "learning_rate": 3.1005612310432496e-06, "loss": 0.293, "step": 13843 }, { "epoch": 1.98508746773731, "grad_norm": 0.2641081213951111, "learning_rate": 3.099789555432236e-06, "loss": 0.2877, "step": 13844 }, { "epoch": 1.9852308574706052, "grad_norm": 0.25949767231941223, "learning_rate": 3.099017932720157e-06, "loss": 0.2774, "step": 13845 }, { "epoch": 1.9853742472039002, "grad_norm": 0.2740398645401001, "learning_rate": 3.0982463629284943e-06, "loss": 0.2968, "step": 13846 }, { "epoch": 1.9855176369371952, "grad_norm": 0.2705254852771759, "learning_rate": 3.0974748460787275e-06, "loss": 0.3052, "step": 13847 }, { "epoch": 1.9856610266704904, "grad_norm": 0.27166587114334106, "learning_rate": 3.096703382192334e-06, "loss": 0.3116, "step": 13848 }, { "epoch": 1.9858044164037856, "grad_norm": 0.2823379933834076, "learning_rate": 3.0959319712907935e-06, "loss": 0.3087, "step": 13849 }, { "epoch": 1.9859478061370806, "grad_norm": 0.3004896640777588, "learning_rate": 3.095160613395576e-06, "loss": 0.3095, "step": 13850 }, { "epoch": 1.9860911958703755, "grad_norm": 0.28707754611968994, "learning_rate": 3.0943893085281557e-06, "loss": 0.3099, "step": 13851 }, { "epoch": 1.986234585603671, "grad_norm": 0.2801067531108856, "learning_rate": 3.093618056710006e-06, "loss": 0.3127, "step": 13852 }, { "epoch": 1.986377975336966, "grad_norm": 0.27387842535972595, "learning_rate": 3.0928468579625965e-06, "loss": 0.2811, "step": 13853 }, { "epoch": 1.986521365070261, "grad_norm": 0.28151735663414, "learning_rate": 3.0920757123073976e-06, "loss": 0.305, "step": 13854 }, { "epoch": 1.986664754803556, "grad_norm": 0.28009670972824097, "learning_rate": 3.0913046197658773e-06, "loss": 0.3121, "step": 13855 }, { "epoch": 1.9868081445368513, "grad_norm": 0.29202041029930115, "learning_rate": 3.090533580359499e-06, "loss": 0.3157, "step": 13856 }, { "epoch": 1.9869515342701463, "grad_norm": 0.2776140868663788, "learning_rate": 3.0897625941097286e-06, "loss": 0.2965, "step": 13857 }, { "epoch": 1.9870949240034412, "grad_norm": 0.2584928274154663, "learning_rate": 3.088991661038029e-06, "loss": 0.3243, "step": 13858 }, { "epoch": 1.9872383137367364, "grad_norm": 0.2747747004032135, "learning_rate": 3.088220781165861e-06, "loss": 0.3014, "step": 13859 }, { "epoch": 1.9873817034700316, "grad_norm": 0.27643898129463196, "learning_rate": 3.0874499545146873e-06, "loss": 0.3031, "step": 13860 }, { "epoch": 1.9875250932033266, "grad_norm": 0.26572903990745544, "learning_rate": 3.0866791811059636e-06, "loss": 0.304, "step": 13861 }, { "epoch": 1.9876684829366216, "grad_norm": 0.26662084460258484, "learning_rate": 3.0859084609611484e-06, "loss": 0.3009, "step": 13862 }, { "epoch": 1.987811872669917, "grad_norm": 0.25346842408180237, "learning_rate": 3.085137794101697e-06, "loss": 0.3164, "step": 13863 }, { "epoch": 1.987955262403212, "grad_norm": 0.2818847894668579, "learning_rate": 3.0843671805490637e-06, "loss": 0.3081, "step": 13864 }, { "epoch": 1.988098652136507, "grad_norm": 0.269534170627594, "learning_rate": 3.083596620324702e-06, "loss": 0.3016, "step": 13865 }, { "epoch": 1.9882420418698021, "grad_norm": 0.26043254137039185, "learning_rate": 3.0828261134500643e-06, "loss": 0.2993, "step": 13866 }, { "epoch": 1.9883854316030973, "grad_norm": 0.2651481032371521, "learning_rate": 3.0820556599465956e-06, "loss": 0.2924, "step": 13867 }, { "epoch": 1.9885288213363923, "grad_norm": 0.26469218730926514, "learning_rate": 3.081285259835748e-06, "loss": 0.3009, "step": 13868 }, { "epoch": 1.9886722110696873, "grad_norm": 0.29263627529144287, "learning_rate": 3.0805149131389665e-06, "loss": 0.2781, "step": 13869 }, { "epoch": 1.9888156008029825, "grad_norm": 0.27527496218681335, "learning_rate": 3.0797446198776978e-06, "loss": 0.318, "step": 13870 }, { "epoch": 1.9889589905362777, "grad_norm": 0.27195435762405396, "learning_rate": 3.078974380073385e-06, "loss": 0.297, "step": 13871 }, { "epoch": 1.9891023802695726, "grad_norm": 0.26943689584732056, "learning_rate": 3.0782041937474705e-06, "loss": 0.3134, "step": 13872 }, { "epoch": 1.9892457700028678, "grad_norm": 0.2737066447734833, "learning_rate": 3.077434060921394e-06, "loss": 0.3027, "step": 13873 }, { "epoch": 1.989389159736163, "grad_norm": 0.2685994803905487, "learning_rate": 3.076663981616596e-06, "loss": 0.2952, "step": 13874 }, { "epoch": 1.989532549469458, "grad_norm": 0.2970195710659027, "learning_rate": 3.075893955854514e-06, "loss": 0.3006, "step": 13875 }, { "epoch": 1.989675939202753, "grad_norm": 0.2771969139575958, "learning_rate": 3.075123983656587e-06, "loss": 0.3148, "step": 13876 }, { "epoch": 1.9898193289360482, "grad_norm": 0.29414933919906616, "learning_rate": 3.074354065044245e-06, "loss": 0.3033, "step": 13877 }, { "epoch": 1.9899627186693434, "grad_norm": 0.2826137840747833, "learning_rate": 3.0735842000389237e-06, "loss": 0.3236, "step": 13878 }, { "epoch": 1.9901061084026384, "grad_norm": 0.2745234966278076, "learning_rate": 3.0728143886620542e-06, "loss": 0.3033, "step": 13879 }, { "epoch": 1.9902494981359333, "grad_norm": 0.27839142084121704, "learning_rate": 3.0720446309350683e-06, "loss": 0.3041, "step": 13880 }, { "epoch": 1.9903928878692285, "grad_norm": 0.2735622525215149, "learning_rate": 3.071274926879394e-06, "loss": 0.2968, "step": 13881 }, { "epoch": 1.9905362776025237, "grad_norm": 0.27243223786354065, "learning_rate": 3.07050527651646e-06, "loss": 0.2984, "step": 13882 }, { "epoch": 1.9906796673358187, "grad_norm": 0.2700546085834503, "learning_rate": 3.06973567986769e-06, "loss": 0.2961, "step": 13883 }, { "epoch": 1.9908230570691139, "grad_norm": 0.2765374183654785, "learning_rate": 3.068966136954509e-06, "loss": 0.3104, "step": 13884 }, { "epoch": 1.990966446802409, "grad_norm": 0.2715241014957428, "learning_rate": 3.0681966477983403e-06, "loss": 0.3088, "step": 13885 }, { "epoch": 1.991109836535704, "grad_norm": 0.25350913405418396, "learning_rate": 3.067427212420605e-06, "loss": 0.3003, "step": 13886 }, { "epoch": 1.991253226268999, "grad_norm": 0.2689211070537567, "learning_rate": 3.0666578308427253e-06, "loss": 0.2919, "step": 13887 }, { "epoch": 1.9913966160022942, "grad_norm": 0.26000237464904785, "learning_rate": 3.0658885030861154e-06, "loss": 0.3051, "step": 13888 }, { "epoch": 1.9915400057355894, "grad_norm": 0.26109734177589417, "learning_rate": 3.065119229172195e-06, "loss": 0.2953, "step": 13889 }, { "epoch": 1.9916833954688844, "grad_norm": 0.28047847747802734, "learning_rate": 3.064350009122379e-06, "loss": 0.3014, "step": 13890 }, { "epoch": 1.9918267852021794, "grad_norm": 0.260486364364624, "learning_rate": 3.0635808429580806e-06, "loss": 0.292, "step": 13891 }, { "epoch": 1.9919701749354746, "grad_norm": 0.2753509283065796, "learning_rate": 3.0628117307007128e-06, "loss": 0.3189, "step": 13892 }, { "epoch": 1.9921135646687698, "grad_norm": 0.28820905089378357, "learning_rate": 3.0620426723716885e-06, "loss": 0.3099, "step": 13893 }, { "epoch": 1.9922569544020647, "grad_norm": 0.2743584215641022, "learning_rate": 3.061273667992413e-06, "loss": 0.3313, "step": 13894 }, { "epoch": 1.99240034413536, "grad_norm": 0.2623857855796814, "learning_rate": 3.0605047175842967e-06, "loss": 0.2853, "step": 13895 }, { "epoch": 1.9925437338686551, "grad_norm": 0.2619759738445282, "learning_rate": 3.0597358211687454e-06, "loss": 0.2826, "step": 13896 }, { "epoch": 1.99268712360195, "grad_norm": 0.2877287268638611, "learning_rate": 3.058966978767164e-06, "loss": 0.2947, "step": 13897 }, { "epoch": 1.992830513335245, "grad_norm": 0.26620277762413025, "learning_rate": 3.0581981904009572e-06, "loss": 0.2945, "step": 13898 }, { "epoch": 1.9929739030685403, "grad_norm": 0.2591054141521454, "learning_rate": 3.057429456091524e-06, "loss": 0.3099, "step": 13899 }, { "epoch": 1.9931172928018355, "grad_norm": 0.282650351524353, "learning_rate": 3.0566607758602674e-06, "loss": 0.3128, "step": 13900 }, { "epoch": 1.9932606825351304, "grad_norm": 0.27013540267944336, "learning_rate": 3.0558921497285856e-06, "loss": 0.2963, "step": 13901 }, { "epoch": 1.9934040722684254, "grad_norm": 0.2722240686416626, "learning_rate": 3.055123577717875e-06, "loss": 0.3113, "step": 13902 }, { "epoch": 1.9935474620017208, "grad_norm": 0.28581973910331726, "learning_rate": 3.0543550598495343e-06, "loss": 0.2975, "step": 13903 }, { "epoch": 1.9936908517350158, "grad_norm": 0.27797001600265503, "learning_rate": 3.053586596144954e-06, "loss": 0.2842, "step": 13904 }, { "epoch": 1.9938342414683108, "grad_norm": 0.2537083029747009, "learning_rate": 3.052818186625529e-06, "loss": 0.2985, "step": 13905 }, { "epoch": 1.993977631201606, "grad_norm": 0.2767851650714874, "learning_rate": 3.0520498313126502e-06, "loss": 0.3189, "step": 13906 }, { "epoch": 1.9941210209349012, "grad_norm": 0.2624533176422119, "learning_rate": 3.051281530227707e-06, "loss": 0.2952, "step": 13907 }, { "epoch": 1.9942644106681962, "grad_norm": 0.2623003423213959, "learning_rate": 3.0505132833920893e-06, "loss": 0.3275, "step": 13908 }, { "epoch": 1.9944078004014911, "grad_norm": 0.2697847783565521, "learning_rate": 3.0497450908271832e-06, "loss": 0.2879, "step": 13909 }, { "epoch": 1.9945511901347863, "grad_norm": 0.2659883201122284, "learning_rate": 3.0489769525543722e-06, "loss": 0.2928, "step": 13910 }, { "epoch": 1.9946945798680815, "grad_norm": 0.271894633769989, "learning_rate": 3.0482088685950426e-06, "loss": 0.2988, "step": 13911 }, { "epoch": 1.9948379696013765, "grad_norm": 0.28885066509246826, "learning_rate": 3.0474408389705756e-06, "loss": 0.3042, "step": 13912 }, { "epoch": 1.9949813593346717, "grad_norm": 0.26649796962738037, "learning_rate": 3.046672863702351e-06, "loss": 0.2927, "step": 13913 }, { "epoch": 1.9951247490679669, "grad_norm": 0.24819494783878326, "learning_rate": 3.0459049428117513e-06, "loss": 0.3153, "step": 13914 }, { "epoch": 1.9952681388012619, "grad_norm": 0.2656151056289673, "learning_rate": 3.0451370763201506e-06, "loss": 0.3084, "step": 13915 }, { "epoch": 1.9954115285345568, "grad_norm": 0.28339383006095886, "learning_rate": 3.0443692642489264e-06, "loss": 0.3095, "step": 13916 }, { "epoch": 1.995554918267852, "grad_norm": 0.24572321772575378, "learning_rate": 3.043601506619454e-06, "loss": 0.2767, "step": 13917 }, { "epoch": 1.9956983080011472, "grad_norm": 0.2712375819683075, "learning_rate": 3.0428338034531057e-06, "loss": 0.303, "step": 13918 }, { "epoch": 1.9958416977344422, "grad_norm": 0.27988380193710327, "learning_rate": 3.042066154771254e-06, "loss": 0.2932, "step": 13919 }, { "epoch": 1.9959850874677372, "grad_norm": 0.2667945325374603, "learning_rate": 3.0412985605952705e-06, "loss": 0.3001, "step": 13920 }, { "epoch": 1.9961284772010324, "grad_norm": 0.29924532771110535, "learning_rate": 3.04053102094652e-06, "loss": 0.3053, "step": 13921 }, { "epoch": 1.9962718669343276, "grad_norm": 0.25582343339920044, "learning_rate": 3.039763535846372e-06, "loss": 0.281, "step": 13922 }, { "epoch": 1.9964152566676225, "grad_norm": 0.2920742928981781, "learning_rate": 3.038996105316191e-06, "loss": 0.3126, "step": 13923 }, { "epoch": 1.9965586464009177, "grad_norm": 0.2747664749622345, "learning_rate": 3.0382287293773425e-06, "loss": 0.3082, "step": 13924 }, { "epoch": 1.996702036134213, "grad_norm": 0.2911021113395691, "learning_rate": 3.0374614080511895e-06, "loss": 0.2891, "step": 13925 }, { "epoch": 1.996845425867508, "grad_norm": 0.2724820077419281, "learning_rate": 3.03669414135909e-06, "loss": 0.2919, "step": 13926 }, { "epoch": 1.9969888156008029, "grad_norm": 0.26795685291290283, "learning_rate": 3.0359269293224063e-06, "loss": 0.2911, "step": 13927 }, { "epoch": 1.997132205334098, "grad_norm": 0.2890531122684479, "learning_rate": 3.035159771962496e-06, "loss": 0.2948, "step": 13928 }, { "epoch": 1.9972755950673933, "grad_norm": 0.28261134028434753, "learning_rate": 3.034392669300714e-06, "loss": 0.3182, "step": 13929 }, { "epoch": 1.9974189848006882, "grad_norm": 0.2584385573863983, "learning_rate": 3.033625621358419e-06, "loss": 0.2917, "step": 13930 }, { "epoch": 1.9975623745339832, "grad_norm": 0.2832307517528534, "learning_rate": 3.03285862815696e-06, "loss": 0.3114, "step": 13931 }, { "epoch": 1.9977057642672784, "grad_norm": 0.2751474678516388, "learning_rate": 3.0320916897176906e-06, "loss": 0.2888, "step": 13932 }, { "epoch": 1.9978491540005736, "grad_norm": 0.27256155014038086, "learning_rate": 3.031324806061961e-06, "loss": 0.3022, "step": 13933 }, { "epoch": 1.9979925437338686, "grad_norm": 0.2640223801136017, "learning_rate": 3.0305579772111206e-06, "loss": 0.2802, "step": 13934 }, { "epoch": 1.9981359334671638, "grad_norm": 0.27960699796676636, "learning_rate": 3.0297912031865172e-06, "loss": 0.2927, "step": 13935 }, { "epoch": 1.998279323200459, "grad_norm": 0.2754304111003876, "learning_rate": 3.029024484009496e-06, "loss": 0.3097, "step": 13936 }, { "epoch": 1.998422712933754, "grad_norm": 0.262624591588974, "learning_rate": 3.028257819701401e-06, "loss": 0.3013, "step": 13937 }, { "epoch": 1.998566102667049, "grad_norm": 0.27345138788223267, "learning_rate": 3.0274912102835747e-06, "loss": 0.2936, "step": 13938 }, { "epoch": 1.9987094924003441, "grad_norm": 0.273811399936676, "learning_rate": 3.026724655777359e-06, "loss": 0.3, "step": 13939 }, { "epoch": 1.9988528821336393, "grad_norm": 0.27447202801704407, "learning_rate": 3.0259581562040928e-06, "loss": 0.2996, "step": 13940 }, { "epoch": 1.9989962718669343, "grad_norm": 0.26534754037857056, "learning_rate": 3.025191711585118e-06, "loss": 0.3062, "step": 13941 }, { "epoch": 1.9991396616002293, "grad_norm": 0.24827371537685394, "learning_rate": 3.0244253219417647e-06, "loss": 0.3086, "step": 13942 }, { "epoch": 1.9992830513335247, "grad_norm": 0.28603920340538025, "learning_rate": 3.0236589872953726e-06, "loss": 0.318, "step": 13943 }, { "epoch": 1.9994264410668197, "grad_norm": 0.26863589882850647, "learning_rate": 3.0228927076672733e-06, "loss": 0.2843, "step": 13944 }, { "epoch": 1.9995698308001146, "grad_norm": 0.2767474949359894, "learning_rate": 3.0221264830787998e-06, "loss": 0.3075, "step": 13945 }, { "epoch": 1.9997132205334098, "grad_norm": 0.2651631832122803, "learning_rate": 3.0213603135512837e-06, "loss": 0.2805, "step": 13946 }, { "epoch": 1.999856610266705, "grad_norm": 0.30109426379203796, "learning_rate": 3.0205941991060538e-06, "loss": 0.2961, "step": 13947 }, { "epoch": 2.0, "grad_norm": 0.29340311884880066, "learning_rate": 3.0198281397644345e-06, "loss": 0.2893, "step": 13948 }, { "epoch": 2.000143389733295, "grad_norm": 0.27939677238464355, "learning_rate": 3.0190621355477533e-06, "loss": 0.2812, "step": 13949 }, { "epoch": 2.0002867794665904, "grad_norm": 0.2944352626800537, "learning_rate": 3.0182961864773364e-06, "loss": 0.2975, "step": 13950 }, { "epoch": 2.0004301691998854, "grad_norm": 0.2832344174385071, "learning_rate": 3.0175302925745044e-06, "loss": 0.2721, "step": 13951 }, { "epoch": 2.0005735589331803, "grad_norm": 0.2589770555496216, "learning_rate": 3.01676445386058e-06, "loss": 0.2873, "step": 13952 }, { "epoch": 2.0007169486664753, "grad_norm": 0.28359073400497437, "learning_rate": 3.0159986703568817e-06, "loss": 0.2787, "step": 13953 }, { "epoch": 2.0008603383997707, "grad_norm": 0.2748209834098816, "learning_rate": 3.015232942084729e-06, "loss": 0.2719, "step": 13954 }, { "epoch": 2.0010037281330657, "grad_norm": 0.2862526774406433, "learning_rate": 3.0144672690654374e-06, "loss": 0.2947, "step": 13955 }, { "epoch": 2.0011471178663607, "grad_norm": 0.2979702651500702, "learning_rate": 3.0137016513203227e-06, "loss": 0.2857, "step": 13956 }, { "epoch": 2.0012905075996557, "grad_norm": 0.29284289479255676, "learning_rate": 3.012936088870701e-06, "loss": 0.2809, "step": 13957 }, { "epoch": 2.001433897332951, "grad_norm": 0.27015256881713867, "learning_rate": 3.01217058173788e-06, "loss": 0.2833, "step": 13958 }, { "epoch": 2.001577287066246, "grad_norm": 0.26014038920402527, "learning_rate": 3.011405129943171e-06, "loss": 0.299, "step": 13959 }, { "epoch": 2.001720676799541, "grad_norm": 0.2610284388065338, "learning_rate": 3.0106397335078852e-06, "loss": 0.2874, "step": 13960 }, { "epoch": 2.0018640665328364, "grad_norm": 0.2760811150074005, "learning_rate": 3.009874392453329e-06, "loss": 0.2813, "step": 13961 }, { "epoch": 2.0020074562661314, "grad_norm": 0.27203530073165894, "learning_rate": 3.009109106800808e-06, "loss": 0.2799, "step": 13962 }, { "epoch": 2.0021508459994264, "grad_norm": 0.31290769577026367, "learning_rate": 3.008343876571628e-06, "loss": 0.2746, "step": 13963 }, { "epoch": 2.0022942357327214, "grad_norm": 0.28805696964263916, "learning_rate": 3.0075787017870896e-06, "loss": 0.2968, "step": 13964 }, { "epoch": 2.002437625466017, "grad_norm": 0.2780969738960266, "learning_rate": 3.0068135824684953e-06, "loss": 0.2775, "step": 13965 }, { "epoch": 2.0025810151993118, "grad_norm": 0.2670140266418457, "learning_rate": 3.006048518637145e-06, "loss": 0.2776, "step": 13966 }, { "epoch": 2.0027244049326067, "grad_norm": 0.28846147656440735, "learning_rate": 3.0052835103143364e-06, "loss": 0.3016, "step": 13967 }, { "epoch": 2.0028677946659017, "grad_norm": 0.24993593990802765, "learning_rate": 3.0045185575213688e-06, "loss": 0.2716, "step": 13968 }, { "epoch": 2.003011184399197, "grad_norm": 0.292061984539032, "learning_rate": 3.0037536602795327e-06, "loss": 0.271, "step": 13969 }, { "epoch": 2.003154574132492, "grad_norm": 0.2744864523410797, "learning_rate": 3.002988818610124e-06, "loss": 0.2753, "step": 13970 }, { "epoch": 2.003297963865787, "grad_norm": 0.28100964426994324, "learning_rate": 3.002224032534433e-06, "loss": 0.2939, "step": 13971 }, { "epoch": 2.0034413535990825, "grad_norm": 0.2680167853832245, "learning_rate": 3.0014593020737536e-06, "loss": 0.2854, "step": 13972 }, { "epoch": 2.0035847433323775, "grad_norm": 0.2618260681629181, "learning_rate": 3.000694627249372e-06, "loss": 0.2721, "step": 13973 }, { "epoch": 2.0037281330656724, "grad_norm": 0.2921803891658783, "learning_rate": 2.9999300080825792e-06, "loss": 0.271, "step": 13974 }, { "epoch": 2.0038715227989674, "grad_norm": 0.27549973130226135, "learning_rate": 2.999165444594656e-06, "loss": 0.2779, "step": 13975 }, { "epoch": 2.004014912532263, "grad_norm": 0.293617844581604, "learning_rate": 2.9984009368068895e-06, "loss": 0.3048, "step": 13976 }, { "epoch": 2.004158302265558, "grad_norm": 0.2667071521282196, "learning_rate": 2.9976364847405614e-06, "loss": 0.2985, "step": 13977 }, { "epoch": 2.0043016919988528, "grad_norm": 0.2881963551044464, "learning_rate": 2.9968720884169533e-06, "loss": 0.2706, "step": 13978 }, { "epoch": 2.004445081732148, "grad_norm": 0.2569873034954071, "learning_rate": 2.9961077478573462e-06, "loss": 0.2751, "step": 13979 }, { "epoch": 2.004588471465443, "grad_norm": 0.2481982260942459, "learning_rate": 2.9953434630830157e-06, "loss": 0.2823, "step": 13980 }, { "epoch": 2.004731861198738, "grad_norm": 0.2707689106464386, "learning_rate": 2.994579234115239e-06, "loss": 0.2692, "step": 13981 }, { "epoch": 2.004875250932033, "grad_norm": 0.2748081386089325, "learning_rate": 2.993815060975292e-06, "loss": 0.2946, "step": 13982 }, { "epoch": 2.0050186406653285, "grad_norm": 0.2763102650642395, "learning_rate": 2.9930509436844473e-06, "loss": 0.2868, "step": 13983 }, { "epoch": 2.0051620303986235, "grad_norm": 0.26967719197273254, "learning_rate": 2.992286882263977e-06, "loss": 0.289, "step": 13984 }, { "epoch": 2.0053054201319185, "grad_norm": 0.2654525637626648, "learning_rate": 2.991522876735154e-06, "loss": 0.2913, "step": 13985 }, { "epoch": 2.0054488098652135, "grad_norm": 0.2623424530029297, "learning_rate": 2.9907589271192423e-06, "loss": 0.2766, "step": 13986 }, { "epoch": 2.005592199598509, "grad_norm": 0.2647208571434021, "learning_rate": 2.989995033437511e-06, "loss": 0.2867, "step": 13987 }, { "epoch": 2.005735589331804, "grad_norm": 0.2883822023868561, "learning_rate": 2.9892311957112263e-06, "loss": 0.284, "step": 13988 }, { "epoch": 2.005878979065099, "grad_norm": 0.2726749777793884, "learning_rate": 2.988467413961652e-06, "loss": 0.2931, "step": 13989 }, { "epoch": 2.0060223687983942, "grad_norm": 0.27426809072494507, "learning_rate": 2.9877036882100515e-06, "loss": 0.2833, "step": 13990 }, { "epoch": 2.006165758531689, "grad_norm": 0.27789682149887085, "learning_rate": 2.9869400184776832e-06, "loss": 0.276, "step": 13991 }, { "epoch": 2.006309148264984, "grad_norm": 0.27302229404449463, "learning_rate": 2.986176404785809e-06, "loss": 0.2758, "step": 13992 }, { "epoch": 2.006452537998279, "grad_norm": 0.27867594361305237, "learning_rate": 2.9854128471556855e-06, "loss": 0.2795, "step": 13993 }, { "epoch": 2.0065959277315746, "grad_norm": 0.27277445793151855, "learning_rate": 2.984649345608569e-06, "loss": 0.2814, "step": 13994 }, { "epoch": 2.0067393174648696, "grad_norm": 0.2852219045162201, "learning_rate": 2.9838859001657174e-06, "loss": 0.2769, "step": 13995 }, { "epoch": 2.0068827071981645, "grad_norm": 0.27625957131385803, "learning_rate": 2.9831225108483785e-06, "loss": 0.2708, "step": 13996 }, { "epoch": 2.0070260969314595, "grad_norm": 0.27811530232429504, "learning_rate": 2.982359177677806e-06, "loss": 0.2795, "step": 13997 }, { "epoch": 2.007169486664755, "grad_norm": 0.2589243948459625, "learning_rate": 2.9815959006752503e-06, "loss": 0.2922, "step": 13998 }, { "epoch": 2.00731287639805, "grad_norm": 0.2639351785182953, "learning_rate": 2.9808326798619593e-06, "loss": 0.2684, "step": 13999 }, { "epoch": 2.007456266131345, "grad_norm": 0.2735798954963684, "learning_rate": 2.980069515259181e-06, "loss": 0.2727, "step": 14000 }, { "epoch": 2.0075996558646403, "grad_norm": 0.279201477766037, "learning_rate": 2.979306406888163e-06, "loss": 0.2766, "step": 14001 }, { "epoch": 2.0077430455979353, "grad_norm": 0.27575087547302246, "learning_rate": 2.978543354770144e-06, "loss": 0.3004, "step": 14002 }, { "epoch": 2.0078864353312302, "grad_norm": 0.2700839340686798, "learning_rate": 2.9777803589263675e-06, "loss": 0.3023, "step": 14003 }, { "epoch": 2.008029825064525, "grad_norm": 0.25720760226249695, "learning_rate": 2.9770174193780755e-06, "loss": 0.259, "step": 14004 }, { "epoch": 2.0081732147978206, "grad_norm": 0.27785322070121765, "learning_rate": 2.9762545361465067e-06, "loss": 0.2926, "step": 14005 }, { "epoch": 2.0083166045311156, "grad_norm": 0.2567565143108368, "learning_rate": 2.9754917092528995e-06, "loss": 0.2687, "step": 14006 }, { "epoch": 2.0084599942644106, "grad_norm": 0.2890687882900238, "learning_rate": 2.974728938718488e-06, "loss": 0.2648, "step": 14007 }, { "epoch": 2.0086033839977055, "grad_norm": 0.28720521926879883, "learning_rate": 2.9739662245645073e-06, "loss": 0.2788, "step": 14008 }, { "epoch": 2.008746773731001, "grad_norm": 0.26065942645072937, "learning_rate": 2.97320356681219e-06, "loss": 0.2719, "step": 14009 }, { "epoch": 2.008890163464296, "grad_norm": 0.2673683762550354, "learning_rate": 2.972440965482768e-06, "loss": 0.2922, "step": 14010 }, { "epoch": 2.009033553197591, "grad_norm": 0.26585718989372253, "learning_rate": 2.9716784205974703e-06, "loss": 0.2785, "step": 14011 }, { "epoch": 2.0091769429308863, "grad_norm": 0.2733840346336365, "learning_rate": 2.970915932177527e-06, "loss": 0.2773, "step": 14012 }, { "epoch": 2.0093203326641813, "grad_norm": 0.27688345313072205, "learning_rate": 2.9701535002441617e-06, "loss": 0.2721, "step": 14013 }, { "epoch": 2.0094637223974763, "grad_norm": 0.2482430338859558, "learning_rate": 2.9693911248185996e-06, "loss": 0.2847, "step": 14014 }, { "epoch": 2.0096071121307713, "grad_norm": 0.27440789341926575, "learning_rate": 2.9686288059220653e-06, "loss": 0.2918, "step": 14015 }, { "epoch": 2.0097505018640667, "grad_norm": 0.28085678815841675, "learning_rate": 2.9678665435757802e-06, "loss": 0.2822, "step": 14016 }, { "epoch": 2.0098938915973616, "grad_norm": 0.2726622521877289, "learning_rate": 2.9671043378009657e-06, "loss": 0.2912, "step": 14017 }, { "epoch": 2.0100372813306566, "grad_norm": 0.26908159255981445, "learning_rate": 2.966342188618838e-06, "loss": 0.2934, "step": 14018 }, { "epoch": 2.010180671063952, "grad_norm": 0.2877890467643738, "learning_rate": 2.9655800960506156e-06, "loss": 0.2957, "step": 14019 }, { "epoch": 2.010324060797247, "grad_norm": 0.2731461524963379, "learning_rate": 2.964818060117514e-06, "loss": 0.2715, "step": 14020 }, { "epoch": 2.010467450530542, "grad_norm": 0.2906479835510254, "learning_rate": 2.964056080840746e-06, "loss": 0.2719, "step": 14021 }, { "epoch": 2.010610840263837, "grad_norm": 0.28511062264442444, "learning_rate": 2.9632941582415275e-06, "loss": 0.2858, "step": 14022 }, { "epoch": 2.0107542299971324, "grad_norm": 0.2889536917209625, "learning_rate": 2.9625322923410644e-06, "loss": 0.2923, "step": 14023 }, { "epoch": 2.0108976197304274, "grad_norm": 0.2717704772949219, "learning_rate": 2.9617704831605677e-06, "loss": 0.2882, "step": 14024 }, { "epoch": 2.0110410094637223, "grad_norm": 0.2936982810497284, "learning_rate": 2.961008730721246e-06, "loss": 0.2937, "step": 14025 }, { "epoch": 2.0111843991970173, "grad_norm": 0.3058421313762665, "learning_rate": 2.9602470350443046e-06, "loss": 0.2918, "step": 14026 }, { "epoch": 2.0113277889303127, "grad_norm": 0.26411116123199463, "learning_rate": 2.959485396150947e-06, "loss": 0.2933, "step": 14027 }, { "epoch": 2.0114711786636077, "grad_norm": 0.28546077013015747, "learning_rate": 2.9587238140623797e-06, "loss": 0.2782, "step": 14028 }, { "epoch": 2.0116145683969027, "grad_norm": 0.2742670774459839, "learning_rate": 2.9579622887997993e-06, "loss": 0.276, "step": 14029 }, { "epoch": 2.011757958130198, "grad_norm": 0.28354570269584656, "learning_rate": 2.9572008203844084e-06, "loss": 0.2861, "step": 14030 }, { "epoch": 2.011901347863493, "grad_norm": 0.2838078439235687, "learning_rate": 2.956439408837404e-06, "loss": 0.28, "step": 14031 }, { "epoch": 2.012044737596788, "grad_norm": 0.29285624623298645, "learning_rate": 2.9556780541799824e-06, "loss": 0.2849, "step": 14032 }, { "epoch": 2.012188127330083, "grad_norm": 0.27462443709373474, "learning_rate": 2.9549167564333404e-06, "loss": 0.2885, "step": 14033 }, { "epoch": 2.0123315170633784, "grad_norm": 0.2783254086971283, "learning_rate": 2.9541555156186683e-06, "loss": 0.2846, "step": 14034 }, { "epoch": 2.0124749067966734, "grad_norm": 0.2808690071105957, "learning_rate": 2.95339433175716e-06, "loss": 0.2831, "step": 14035 }, { "epoch": 2.0126182965299684, "grad_norm": 0.29452046751976013, "learning_rate": 2.952633204870006e-06, "loss": 0.2697, "step": 14036 }, { "epoch": 2.0127616862632633, "grad_norm": 0.28511372208595276, "learning_rate": 2.951872134978393e-06, "loss": 0.2733, "step": 14037 }, { "epoch": 2.0129050759965588, "grad_norm": 0.2705402076244354, "learning_rate": 2.9511111221035094e-06, "loss": 0.2836, "step": 14038 }, { "epoch": 2.0130484657298537, "grad_norm": 0.31174999475479126, "learning_rate": 2.9503501662665422e-06, "loss": 0.2707, "step": 14039 }, { "epoch": 2.0131918554631487, "grad_norm": 0.30123329162597656, "learning_rate": 2.9495892674886718e-06, "loss": 0.2912, "step": 14040 }, { "epoch": 2.013335245196444, "grad_norm": 0.26635241508483887, "learning_rate": 2.9488284257910827e-06, "loss": 0.286, "step": 14041 }, { "epoch": 2.013478634929739, "grad_norm": 0.2698884606361389, "learning_rate": 2.948067641194954e-06, "loss": 0.2853, "step": 14042 }, { "epoch": 2.013622024663034, "grad_norm": 0.31322312355041504, "learning_rate": 2.947306913721466e-06, "loss": 0.2655, "step": 14043 }, { "epoch": 2.013765414396329, "grad_norm": 0.2625868618488312, "learning_rate": 2.946546243391797e-06, "loss": 0.2707, "step": 14044 }, { "epoch": 2.0139088041296245, "grad_norm": 0.27965158224105835, "learning_rate": 2.9457856302271204e-06, "loss": 0.2801, "step": 14045 }, { "epoch": 2.0140521938629194, "grad_norm": 0.2691933810710907, "learning_rate": 2.945025074248612e-06, "loss": 0.2746, "step": 14046 }, { "epoch": 2.0141955835962144, "grad_norm": 0.27578026056289673, "learning_rate": 2.9442645754774448e-06, "loss": 0.2979, "step": 14047 }, { "epoch": 2.0143389733295094, "grad_norm": 0.2877720296382904, "learning_rate": 2.9435041339347893e-06, "loss": 0.277, "step": 14048 }, { "epoch": 2.014482363062805, "grad_norm": 0.2545176148414612, "learning_rate": 2.9427437496418163e-06, "loss": 0.273, "step": 14049 }, { "epoch": 2.0146257527961, "grad_norm": 0.2532820999622345, "learning_rate": 2.941983422619692e-06, "loss": 0.2791, "step": 14050 }, { "epoch": 2.0147691425293948, "grad_norm": 0.2855339050292969, "learning_rate": 2.941223152889583e-06, "loss": 0.3008, "step": 14051 }, { "epoch": 2.01491253226269, "grad_norm": 0.26701271533966064, "learning_rate": 2.9404629404726536e-06, "loss": 0.292, "step": 14052 }, { "epoch": 2.015055921995985, "grad_norm": 0.2565978765487671, "learning_rate": 2.939702785390069e-06, "loss": 0.2728, "step": 14053 }, { "epoch": 2.01519931172928, "grad_norm": 0.2763436734676361, "learning_rate": 2.938942687662989e-06, "loss": 0.2737, "step": 14054 }, { "epoch": 2.015342701462575, "grad_norm": 0.28979137539863586, "learning_rate": 2.938182647312575e-06, "loss": 0.2912, "step": 14055 }, { "epoch": 2.0154860911958705, "grad_norm": 0.2685098350048065, "learning_rate": 2.9374226643599836e-06, "loss": 0.2893, "step": 14056 }, { "epoch": 2.0156294809291655, "grad_norm": 0.28546786308288574, "learning_rate": 2.9366627388263725e-06, "loss": 0.293, "step": 14057 }, { "epoch": 2.0157728706624605, "grad_norm": 0.27448570728302, "learning_rate": 2.9359028707328963e-06, "loss": 0.2986, "step": 14058 }, { "epoch": 2.0159162603957554, "grad_norm": 0.2748511731624603, "learning_rate": 2.9351430601007092e-06, "loss": 0.291, "step": 14059 }, { "epoch": 2.016059650129051, "grad_norm": 0.2796427011489868, "learning_rate": 2.9343833069509643e-06, "loss": 0.2914, "step": 14060 }, { "epoch": 2.016203039862346, "grad_norm": 0.27328136563301086, "learning_rate": 2.933623611304809e-06, "loss": 0.2766, "step": 14061 }, { "epoch": 2.016346429595641, "grad_norm": 0.2790466248989105, "learning_rate": 2.932863973183394e-06, "loss": 0.271, "step": 14062 }, { "epoch": 2.0164898193289362, "grad_norm": 0.27861374616622925, "learning_rate": 2.932104392607866e-06, "loss": 0.2765, "step": 14063 }, { "epoch": 2.016633209062231, "grad_norm": 0.2859664857387543, "learning_rate": 2.931344869599371e-06, "loss": 0.292, "step": 14064 }, { "epoch": 2.016776598795526, "grad_norm": 0.2756343185901642, "learning_rate": 2.9305854041790525e-06, "loss": 0.2873, "step": 14065 }, { "epoch": 2.016919988528821, "grad_norm": 0.2601189613342285, "learning_rate": 2.9298259963680546e-06, "loss": 0.2726, "step": 14066 }, { "epoch": 2.0170633782621166, "grad_norm": 0.2783838212490082, "learning_rate": 2.929066646187515e-06, "loss": 0.2661, "step": 14067 }, { "epoch": 2.0172067679954115, "grad_norm": 0.27201229333877563, "learning_rate": 2.928307353658574e-06, "loss": 0.2911, "step": 14068 }, { "epoch": 2.0173501577287065, "grad_norm": 0.26134172081947327, "learning_rate": 2.9275481188023695e-06, "loss": 0.2992, "step": 14069 }, { "epoch": 2.017493547462002, "grad_norm": 0.2768664062023163, "learning_rate": 2.9267889416400377e-06, "loss": 0.2769, "step": 14070 }, { "epoch": 2.017636937195297, "grad_norm": 0.2967735528945923, "learning_rate": 2.926029822192713e-06, "loss": 0.2745, "step": 14071 }, { "epoch": 2.017780326928592, "grad_norm": 0.28222715854644775, "learning_rate": 2.9252707604815268e-06, "loss": 0.2927, "step": 14072 }, { "epoch": 2.017923716661887, "grad_norm": 0.280582070350647, "learning_rate": 2.9245117565276117e-06, "loss": 0.2836, "step": 14073 }, { "epoch": 2.0180671063951823, "grad_norm": 0.27143144607543945, "learning_rate": 2.923752810352095e-06, "loss": 0.2745, "step": 14074 }, { "epoch": 2.0182104961284772, "grad_norm": 0.2597213685512543, "learning_rate": 2.9229939219761077e-06, "loss": 0.3047, "step": 14075 }, { "epoch": 2.0183538858617722, "grad_norm": 0.27166056632995605, "learning_rate": 2.9222350914207764e-06, "loss": 0.2864, "step": 14076 }, { "epoch": 2.018497275595067, "grad_norm": 0.28943130373954773, "learning_rate": 2.9214763187072214e-06, "loss": 0.2925, "step": 14077 }, { "epoch": 2.0186406653283626, "grad_norm": 0.28673091530799866, "learning_rate": 2.9207176038565687e-06, "loss": 0.284, "step": 14078 }, { "epoch": 2.0187840550616576, "grad_norm": 0.27444425225257874, "learning_rate": 2.9199589468899393e-06, "loss": 0.2799, "step": 14079 }, { "epoch": 2.0189274447949526, "grad_norm": 0.266890287399292, "learning_rate": 2.9192003478284537e-06, "loss": 0.28, "step": 14080 }, { "epoch": 2.019070834528248, "grad_norm": 0.2796008586883545, "learning_rate": 2.9184418066932284e-06, "loss": 0.2667, "step": 14081 }, { "epoch": 2.019214224261543, "grad_norm": 0.30196425318717957, "learning_rate": 2.9176833235053837e-06, "loss": 0.2766, "step": 14082 }, { "epoch": 2.019357613994838, "grad_norm": 0.26035550236701965, "learning_rate": 2.9169248982860298e-06, "loss": 0.2741, "step": 14083 }, { "epoch": 2.019501003728133, "grad_norm": 0.3229130506515503, "learning_rate": 2.916166531056283e-06, "loss": 0.2728, "step": 14084 }, { "epoch": 2.0196443934614283, "grad_norm": 0.27909740805625916, "learning_rate": 2.915408221837252e-06, "loss": 0.2778, "step": 14085 }, { "epoch": 2.0197877831947233, "grad_norm": 0.2841128706932068, "learning_rate": 2.914649970650052e-06, "loss": 0.2991, "step": 14086 }, { "epoch": 2.0199311729280183, "grad_norm": 0.2971951961517334, "learning_rate": 2.9138917775157904e-06, "loss": 0.2865, "step": 14087 }, { "epoch": 2.0200745626613132, "grad_norm": 0.28096675872802734, "learning_rate": 2.91313364245557e-06, "loss": 0.294, "step": 14088 }, { "epoch": 2.0202179523946087, "grad_norm": 0.27435407042503357, "learning_rate": 2.9123755654905e-06, "loss": 0.2927, "step": 14089 }, { "epoch": 2.0203613421279036, "grad_norm": 0.2792142331600189, "learning_rate": 2.911617546641682e-06, "loss": 0.2758, "step": 14090 }, { "epoch": 2.0205047318611986, "grad_norm": 0.2748234272003174, "learning_rate": 2.91085958593022e-06, "loss": 0.2986, "step": 14091 }, { "epoch": 2.020648121594494, "grad_norm": 0.2706679403781891, "learning_rate": 2.9101016833772123e-06, "loss": 0.2763, "step": 14092 }, { "epoch": 2.020791511327789, "grad_norm": 0.28721001744270325, "learning_rate": 2.9093438390037614e-06, "loss": 0.2695, "step": 14093 }, { "epoch": 2.020934901061084, "grad_norm": 0.2974325716495514, "learning_rate": 2.90858605283096e-06, "loss": 0.3002, "step": 14094 }, { "epoch": 2.021078290794379, "grad_norm": 0.27286049723625183, "learning_rate": 2.9078283248799056e-06, "loss": 0.3004, "step": 14095 }, { "epoch": 2.0212216805276744, "grad_norm": 0.2721204161643982, "learning_rate": 2.9070706551716928e-06, "loss": 0.2568, "step": 14096 }, { "epoch": 2.0213650702609693, "grad_norm": 0.2804585099220276, "learning_rate": 2.9063130437274124e-06, "loss": 0.2869, "step": 14097 }, { "epoch": 2.0215084599942643, "grad_norm": 0.28185510635375977, "learning_rate": 2.9055554905681595e-06, "loss": 0.3064, "step": 14098 }, { "epoch": 2.0216518497275593, "grad_norm": 0.27705174684524536, "learning_rate": 2.9047979957150162e-06, "loss": 0.3019, "step": 14099 }, { "epoch": 2.0217952394608547, "grad_norm": 0.3027016818523407, "learning_rate": 2.904040559189075e-06, "loss": 0.287, "step": 14100 }, { "epoch": 2.0219386291941497, "grad_norm": 0.28974610567092896, "learning_rate": 2.9032831810114202e-06, "loss": 0.2999, "step": 14101 }, { "epoch": 2.0220820189274447, "grad_norm": 0.2791289985179901, "learning_rate": 2.902525861203136e-06, "loss": 0.2922, "step": 14102 }, { "epoch": 2.02222540866074, "grad_norm": 0.26577556133270264, "learning_rate": 2.901768599785305e-06, "loss": 0.2906, "step": 14103 }, { "epoch": 2.022368798394035, "grad_norm": 0.2832737863063812, "learning_rate": 2.9010113967790095e-06, "loss": 0.2755, "step": 14104 }, { "epoch": 2.02251218812733, "grad_norm": 0.2727683484554291, "learning_rate": 2.9002542522053268e-06, "loss": 0.269, "step": 14105 }, { "epoch": 2.022655577860625, "grad_norm": 0.2909970283508301, "learning_rate": 2.8994971660853365e-06, "loss": 0.2872, "step": 14106 }, { "epoch": 2.0227989675939204, "grad_norm": 0.2910420298576355, "learning_rate": 2.8987401384401137e-06, "loss": 0.2677, "step": 14107 }, { "epoch": 2.0229423573272154, "grad_norm": 0.26505881547927856, "learning_rate": 2.897983169290733e-06, "loss": 0.2717, "step": 14108 }, { "epoch": 2.0230857470605104, "grad_norm": 0.27733415365219116, "learning_rate": 2.897226258658269e-06, "loss": 0.3005, "step": 14109 }, { "epoch": 2.023229136793806, "grad_norm": 0.27809420228004456, "learning_rate": 2.89646940656379e-06, "loss": 0.2803, "step": 14110 }, { "epoch": 2.0233725265271008, "grad_norm": 0.2992507219314575, "learning_rate": 2.8957126130283663e-06, "loss": 0.291, "step": 14111 }, { "epoch": 2.0235159162603957, "grad_norm": 0.26763904094696045, "learning_rate": 2.894955878073067e-06, "loss": 0.2767, "step": 14112 }, { "epoch": 2.0236593059936907, "grad_norm": 0.2607983648777008, "learning_rate": 2.894199201718957e-06, "loss": 0.2798, "step": 14113 }, { "epoch": 2.023802695726986, "grad_norm": 0.27787965536117554, "learning_rate": 2.8934425839871048e-06, "loss": 0.2738, "step": 14114 }, { "epoch": 2.023946085460281, "grad_norm": 0.25835949182510376, "learning_rate": 2.8926860248985677e-06, "loss": 0.2764, "step": 14115 }, { "epoch": 2.024089475193576, "grad_norm": 0.2878798842430115, "learning_rate": 2.891929524474411e-06, "loss": 0.2772, "step": 14116 }, { "epoch": 2.024232864926871, "grad_norm": 0.28145599365234375, "learning_rate": 2.8911730827356934e-06, "loss": 0.283, "step": 14117 }, { "epoch": 2.0243762546601665, "grad_norm": 0.3096058666706085, "learning_rate": 2.8904166997034723e-06, "loss": 0.2856, "step": 14118 }, { "epoch": 2.0245196443934614, "grad_norm": 0.2426326870918274, "learning_rate": 2.889660375398806e-06, "loss": 0.2677, "step": 14119 }, { "epoch": 2.0246630341267564, "grad_norm": 0.2791867256164551, "learning_rate": 2.888904109842748e-06, "loss": 0.278, "step": 14120 }, { "epoch": 2.024806423860052, "grad_norm": 0.27609074115753174, "learning_rate": 2.8881479030563524e-06, "loss": 0.3016, "step": 14121 }, { "epoch": 2.024949813593347, "grad_norm": 0.2886514663696289, "learning_rate": 2.8873917550606708e-06, "loss": 0.2849, "step": 14122 }, { "epoch": 2.0250932033266418, "grad_norm": 0.2639271020889282, "learning_rate": 2.8866356658767525e-06, "loss": 0.2628, "step": 14123 }, { "epoch": 2.0252365930599368, "grad_norm": 0.25981950759887695, "learning_rate": 2.8858796355256463e-06, "loss": 0.2715, "step": 14124 }, { "epoch": 2.025379982793232, "grad_norm": 0.26218271255493164, "learning_rate": 2.885123664028401e-06, "loss": 0.2767, "step": 14125 }, { "epoch": 2.025523372526527, "grad_norm": 0.2685973346233368, "learning_rate": 2.8843677514060573e-06, "loss": 0.2802, "step": 14126 }, { "epoch": 2.025666762259822, "grad_norm": 0.28860411047935486, "learning_rate": 2.883611897679661e-06, "loss": 0.2922, "step": 14127 }, { "epoch": 2.025810151993117, "grad_norm": 0.2962353527545929, "learning_rate": 2.882856102870255e-06, "loss": 0.2624, "step": 14128 }, { "epoch": 2.0259535417264125, "grad_norm": 0.29897135496139526, "learning_rate": 2.8821003669988773e-06, "loss": 0.277, "step": 14129 }, { "epoch": 2.0260969314597075, "grad_norm": 0.2610849440097809, "learning_rate": 2.8813446900865693e-06, "loss": 0.2826, "step": 14130 }, { "epoch": 2.0262403211930025, "grad_norm": 0.27562686800956726, "learning_rate": 2.8805890721543643e-06, "loss": 0.2748, "step": 14131 }, { "epoch": 2.026383710926298, "grad_norm": 0.26884180307388306, "learning_rate": 2.8798335132232998e-06, "loss": 0.283, "step": 14132 }, { "epoch": 2.026527100659593, "grad_norm": 0.2599153518676758, "learning_rate": 2.879078013314408e-06, "loss": 0.2803, "step": 14133 }, { "epoch": 2.026670490392888, "grad_norm": 0.27264782786369324, "learning_rate": 2.878322572448722e-06, "loss": 0.2804, "step": 14134 }, { "epoch": 2.026813880126183, "grad_norm": 0.2711849808692932, "learning_rate": 2.8775671906472725e-06, "loss": 0.2796, "step": 14135 }, { "epoch": 2.026957269859478, "grad_norm": 0.2668548822402954, "learning_rate": 2.8768118679310888e-06, "loss": 0.2881, "step": 14136 }, { "epoch": 2.027100659592773, "grad_norm": 0.26679688692092896, "learning_rate": 2.8760566043211945e-06, "loss": 0.2796, "step": 14137 }, { "epoch": 2.027244049326068, "grad_norm": 0.26256194710731506, "learning_rate": 2.875301399838617e-06, "loss": 0.2978, "step": 14138 }, { "epoch": 2.027387439059363, "grad_norm": 0.27098366618156433, "learning_rate": 2.8745462545043806e-06, "loss": 0.27, "step": 14139 }, { "epoch": 2.0275308287926586, "grad_norm": 0.2756800353527069, "learning_rate": 2.873791168339507e-06, "loss": 0.2732, "step": 14140 }, { "epoch": 2.0276742185259535, "grad_norm": 0.29005032777786255, "learning_rate": 2.8730361413650165e-06, "loss": 0.3112, "step": 14141 }, { "epoch": 2.0278176082592485, "grad_norm": 0.2861959636211395, "learning_rate": 2.8722811736019272e-06, "loss": 0.2727, "step": 14142 }, { "epoch": 2.027960997992544, "grad_norm": 0.2858811616897583, "learning_rate": 2.8715262650712577e-06, "loss": 0.2943, "step": 14143 }, { "epoch": 2.028104387725839, "grad_norm": 0.27463120222091675, "learning_rate": 2.8707714157940225e-06, "loss": 0.2798, "step": 14144 }, { "epoch": 2.028247777459134, "grad_norm": 0.2842727303504944, "learning_rate": 2.870016625791236e-06, "loss": 0.2954, "step": 14145 }, { "epoch": 2.028391167192429, "grad_norm": 0.3002273440361023, "learning_rate": 2.86926189508391e-06, "loss": 0.2865, "step": 14146 }, { "epoch": 2.0285345569257243, "grad_norm": 0.2986183166503906, "learning_rate": 2.8685072236930574e-06, "loss": 0.2793, "step": 14147 }, { "epoch": 2.0286779466590192, "grad_norm": 0.30077236890792847, "learning_rate": 2.8677526116396827e-06, "loss": 0.2931, "step": 14148 }, { "epoch": 2.028821336392314, "grad_norm": 0.28820958733558655, "learning_rate": 2.8669980589447943e-06, "loss": 0.281, "step": 14149 }, { "epoch": 2.0289647261256096, "grad_norm": 0.2628823220729828, "learning_rate": 2.8662435656294e-06, "loss": 0.2752, "step": 14150 }, { "epoch": 2.0291081158589046, "grad_norm": 0.2865910530090332, "learning_rate": 2.865489131714502e-06, "loss": 0.2859, "step": 14151 }, { "epoch": 2.0292515055921996, "grad_norm": 0.2900839149951935, "learning_rate": 2.8647347572211053e-06, "loss": 0.2719, "step": 14152 }, { "epoch": 2.0293948953254946, "grad_norm": 0.27943769097328186, "learning_rate": 2.863980442170206e-06, "loss": 0.2703, "step": 14153 }, { "epoch": 2.02953828505879, "grad_norm": 0.2760370373725891, "learning_rate": 2.863226186582806e-06, "loss": 0.2921, "step": 14154 }, { "epoch": 2.029681674792085, "grad_norm": 0.27071142196655273, "learning_rate": 2.8624719904799013e-06, "loss": 0.2852, "step": 14155 }, { "epoch": 2.02982506452538, "grad_norm": 0.25924062728881836, "learning_rate": 2.8617178538824884e-06, "loss": 0.2801, "step": 14156 }, { "epoch": 2.029968454258675, "grad_norm": 0.2802048325538635, "learning_rate": 2.860963776811562e-06, "loss": 0.2758, "step": 14157 }, { "epoch": 2.0301118439919703, "grad_norm": 0.2888210713863373, "learning_rate": 2.8602097592881116e-06, "loss": 0.2822, "step": 14158 }, { "epoch": 2.0302552337252653, "grad_norm": 0.2703086733818054, "learning_rate": 2.859455801333132e-06, "loss": 0.2838, "step": 14159 }, { "epoch": 2.0303986234585603, "grad_norm": 0.2652643322944641, "learning_rate": 2.8587019029676087e-06, "loss": 0.2817, "step": 14160 }, { "epoch": 2.0305420131918557, "grad_norm": 0.2618056833744049, "learning_rate": 2.8579480642125312e-06, "loss": 0.27, "step": 14161 }, { "epoch": 2.0306854029251507, "grad_norm": 0.24916090071201324, "learning_rate": 2.8571942850888845e-06, "loss": 0.2627, "step": 14162 }, { "epoch": 2.0308287926584456, "grad_norm": 0.2721713185310364, "learning_rate": 2.8564405656176543e-06, "loss": 0.282, "step": 14163 }, { "epoch": 2.0309721823917406, "grad_norm": 0.2858218252658844, "learning_rate": 2.85568690581982e-06, "loss": 0.301, "step": 14164 }, { "epoch": 2.031115572125036, "grad_norm": 0.2661788761615753, "learning_rate": 2.8549333057163633e-06, "loss": 0.2843, "step": 14165 }, { "epoch": 2.031258961858331, "grad_norm": 0.2969587743282318, "learning_rate": 2.8541797653282633e-06, "loss": 0.2712, "step": 14166 }, { "epoch": 2.031402351591626, "grad_norm": 0.2985200583934784, "learning_rate": 2.853426284676498e-06, "loss": 0.2801, "step": 14167 }, { "epoch": 2.031545741324921, "grad_norm": 0.2905716300010681, "learning_rate": 2.8526728637820445e-06, "loss": 0.2936, "step": 14168 }, { "epoch": 2.0316891310582164, "grad_norm": 0.28447890281677246, "learning_rate": 2.8519195026658738e-06, "loss": 0.2934, "step": 14169 }, { "epoch": 2.0318325207915113, "grad_norm": 0.2900097966194153, "learning_rate": 2.8511662013489594e-06, "loss": 0.279, "step": 14170 }, { "epoch": 2.0319759105248063, "grad_norm": 0.26957640051841736, "learning_rate": 2.8504129598522726e-06, "loss": 0.2889, "step": 14171 }, { "epoch": 2.0321193002581017, "grad_norm": 0.2822262644767761, "learning_rate": 2.8496597781967827e-06, "loss": 0.2983, "step": 14172 }, { "epoch": 2.0322626899913967, "grad_norm": 0.28282979130744934, "learning_rate": 2.848906656403456e-06, "loss": 0.2919, "step": 14173 }, { "epoch": 2.0324060797246917, "grad_norm": 0.2684563994407654, "learning_rate": 2.848153594493259e-06, "loss": 0.2704, "step": 14174 }, { "epoch": 2.0325494694579866, "grad_norm": 0.2804999053478241, "learning_rate": 2.8474005924871567e-06, "loss": 0.2641, "step": 14175 }, { "epoch": 2.032692859191282, "grad_norm": 0.2956949472427368, "learning_rate": 2.84664765040611e-06, "loss": 0.2942, "step": 14176 }, { "epoch": 2.032836248924577, "grad_norm": 0.2821188271045685, "learning_rate": 2.8458947682710804e-06, "loss": 0.2731, "step": 14177 }, { "epoch": 2.032979638657872, "grad_norm": 0.27382299304008484, "learning_rate": 2.8451419461030267e-06, "loss": 0.2628, "step": 14178 }, { "epoch": 2.033123028391167, "grad_norm": 0.2759177088737488, "learning_rate": 2.844389183922908e-06, "loss": 0.2774, "step": 14179 }, { "epoch": 2.0332664181244624, "grad_norm": 0.2793464660644531, "learning_rate": 2.8436364817516772e-06, "loss": 0.2792, "step": 14180 }, { "epoch": 2.0334098078577574, "grad_norm": 0.28627002239227295, "learning_rate": 2.842883839610289e-06, "loss": 0.282, "step": 14181 }, { "epoch": 2.0335531975910524, "grad_norm": 0.278622031211853, "learning_rate": 2.8421312575196967e-06, "loss": 0.279, "step": 14182 }, { "epoch": 2.0336965873243478, "grad_norm": 0.30403679609298706, "learning_rate": 2.841378735500851e-06, "loss": 0.2895, "step": 14183 }, { "epoch": 2.0338399770576427, "grad_norm": 0.2793707549571991, "learning_rate": 2.8406262735747024e-06, "loss": 0.2761, "step": 14184 }, { "epoch": 2.0339833667909377, "grad_norm": 0.26016297936439514, "learning_rate": 2.839873871762195e-06, "loss": 0.2942, "step": 14185 }, { "epoch": 2.0341267565242327, "grad_norm": 0.2779315710067749, "learning_rate": 2.8391215300842756e-06, "loss": 0.2704, "step": 14186 }, { "epoch": 2.034270146257528, "grad_norm": 0.28408339619636536, "learning_rate": 2.838369248561889e-06, "loss": 0.2864, "step": 14187 }, { "epoch": 2.034413535990823, "grad_norm": 0.2742708921432495, "learning_rate": 2.8376170272159775e-06, "loss": 0.2899, "step": 14188 }, { "epoch": 2.034556925724118, "grad_norm": 0.27651941776275635, "learning_rate": 2.8368648660674816e-06, "loss": 0.2827, "step": 14189 }, { "epoch": 2.034700315457413, "grad_norm": 0.27475258708000183, "learning_rate": 2.836112765137342e-06, "loss": 0.2779, "step": 14190 }, { "epoch": 2.0348437051907085, "grad_norm": 0.27895593643188477, "learning_rate": 2.835360724446492e-06, "loss": 0.2664, "step": 14191 }, { "epoch": 2.0349870949240034, "grad_norm": 0.28395509719848633, "learning_rate": 2.8346087440158703e-06, "loss": 0.2819, "step": 14192 }, { "epoch": 2.0351304846572984, "grad_norm": 0.28015318512916565, "learning_rate": 2.83385682386641e-06, "loss": 0.2768, "step": 14193 }, { "epoch": 2.035273874390594, "grad_norm": 0.25541919469833374, "learning_rate": 2.8331049640190432e-06, "loss": 0.2848, "step": 14194 }, { "epoch": 2.035417264123889, "grad_norm": 0.29986047744750977, "learning_rate": 2.8323531644947012e-06, "loss": 0.2803, "step": 14195 }, { "epoch": 2.0355606538571838, "grad_norm": 0.27026909589767456, "learning_rate": 2.8316014253143133e-06, "loss": 0.2931, "step": 14196 }, { "epoch": 2.0357040435904787, "grad_norm": 0.27852171659469604, "learning_rate": 2.830849746498805e-06, "loss": 0.2643, "step": 14197 }, { "epoch": 2.035847433323774, "grad_norm": 0.26311156153678894, "learning_rate": 2.8300981280691033e-06, "loss": 0.2737, "step": 14198 }, { "epoch": 2.035990823057069, "grad_norm": 0.2793005704879761, "learning_rate": 2.829346570046132e-06, "loss": 0.2839, "step": 14199 }, { "epoch": 2.036134212790364, "grad_norm": 0.29693692922592163, "learning_rate": 2.8285950724508128e-06, "loss": 0.296, "step": 14200 }, { "epoch": 2.0362776025236595, "grad_norm": 0.28001630306243896, "learning_rate": 2.827843635304069e-06, "loss": 0.2803, "step": 14201 }, { "epoch": 2.0364209922569545, "grad_norm": 0.27780067920684814, "learning_rate": 2.8270922586268146e-06, "loss": 0.2842, "step": 14202 }, { "epoch": 2.0365643819902495, "grad_norm": 0.2673146426677704, "learning_rate": 2.826340942439969e-06, "loss": 0.2815, "step": 14203 }, { "epoch": 2.0367077717235444, "grad_norm": 0.2865373194217682, "learning_rate": 2.8255896867644483e-06, "loss": 0.2804, "step": 14204 }, { "epoch": 2.03685116145684, "grad_norm": 0.26326844096183777, "learning_rate": 2.8248384916211657e-06, "loss": 0.2752, "step": 14205 }, { "epoch": 2.036994551190135, "grad_norm": 0.2979150414466858, "learning_rate": 2.8240873570310357e-06, "loss": 0.2803, "step": 14206 }, { "epoch": 2.03713794092343, "grad_norm": 0.285602331161499, "learning_rate": 2.8233362830149634e-06, "loss": 0.2895, "step": 14207 }, { "epoch": 2.037281330656725, "grad_norm": 0.2764008939266205, "learning_rate": 2.8225852695938617e-06, "loss": 0.2715, "step": 14208 }, { "epoch": 2.03742472039002, "grad_norm": 0.29255813360214233, "learning_rate": 2.8218343167886366e-06, "loss": 0.2682, "step": 14209 }, { "epoch": 2.037568110123315, "grad_norm": 0.25623515248298645, "learning_rate": 2.8210834246201933e-06, "loss": 0.2783, "step": 14210 }, { "epoch": 2.03771149985661, "grad_norm": 0.29636284708976746, "learning_rate": 2.8203325931094357e-06, "loss": 0.28, "step": 14211 }, { "epoch": 2.0378548895899056, "grad_norm": 0.28061121702194214, "learning_rate": 2.8195818222772652e-06, "loss": 0.285, "step": 14212 }, { "epoch": 2.0379982793232005, "grad_norm": 0.2812548577785492, "learning_rate": 2.8188311121445832e-06, "loss": 0.2849, "step": 14213 }, { "epoch": 2.0381416690564955, "grad_norm": 0.2736932039260864, "learning_rate": 2.8180804627322878e-06, "loss": 0.2886, "step": 14214 }, { "epoch": 2.0382850587897905, "grad_norm": 0.2660958468914032, "learning_rate": 2.817329874061276e-06, "loss": 0.2813, "step": 14215 }, { "epoch": 2.038428448523086, "grad_norm": 0.28795790672302246, "learning_rate": 2.8165793461524427e-06, "loss": 0.2987, "step": 14216 }, { "epoch": 2.038571838256381, "grad_norm": 0.2660023272037506, "learning_rate": 2.8158288790266837e-06, "loss": 0.2917, "step": 14217 }, { "epoch": 2.038715227989676, "grad_norm": 0.2991487979888916, "learning_rate": 2.815078472704886e-06, "loss": 0.2671, "step": 14218 }, { "epoch": 2.038858617722971, "grad_norm": 0.28269603848457336, "learning_rate": 2.8143281272079436e-06, "loss": 0.2744, "step": 14219 }, { "epoch": 2.0390020074562663, "grad_norm": 0.28904831409454346, "learning_rate": 2.813577842556744e-06, "loss": 0.2763, "step": 14220 }, { "epoch": 2.0391453971895612, "grad_norm": 0.2548820674419403, "learning_rate": 2.812827618772174e-06, "loss": 0.2605, "step": 14221 }, { "epoch": 2.039288786922856, "grad_norm": 0.27908241748809814, "learning_rate": 2.8120774558751195e-06, "loss": 0.2824, "step": 14222 }, { "epoch": 2.0394321766561516, "grad_norm": 0.26067978143692017, "learning_rate": 2.811327353886462e-06, "loss": 0.2819, "step": 14223 }, { "epoch": 2.0395755663894466, "grad_norm": 0.2809767723083496, "learning_rate": 2.810577312827084e-06, "loss": 0.2623, "step": 14224 }, { "epoch": 2.0397189561227416, "grad_norm": 0.25400298833847046, "learning_rate": 2.8098273327178653e-06, "loss": 0.2972, "step": 14225 }, { "epoch": 2.0398623458560365, "grad_norm": 0.2954743504524231, "learning_rate": 2.809077413579685e-06, "loss": 0.2825, "step": 14226 }, { "epoch": 2.040005735589332, "grad_norm": 0.3045474886894226, "learning_rate": 2.8083275554334187e-06, "loss": 0.2815, "step": 14227 }, { "epoch": 2.040149125322627, "grad_norm": 0.28302648663520813, "learning_rate": 2.807577758299944e-06, "loss": 0.2779, "step": 14228 }, { "epoch": 2.040292515055922, "grad_norm": 0.28462648391723633, "learning_rate": 2.8068280222001303e-06, "loss": 0.2723, "step": 14229 }, { "epoch": 2.040435904789217, "grad_norm": 0.26892679929733276, "learning_rate": 2.8060783471548504e-06, "loss": 0.273, "step": 14230 }, { "epoch": 2.0405792945225123, "grad_norm": 0.2557692229747772, "learning_rate": 2.8053287331849725e-06, "loss": 0.2858, "step": 14231 }, { "epoch": 2.0407226842558073, "grad_norm": 0.2764635682106018, "learning_rate": 2.8045791803113685e-06, "loss": 0.275, "step": 14232 }, { "epoch": 2.0408660739891022, "grad_norm": 0.28052252531051636, "learning_rate": 2.8038296885549055e-06, "loss": 0.2777, "step": 14233 }, { "epoch": 2.0410094637223977, "grad_norm": 0.2831832468509674, "learning_rate": 2.803080257936443e-06, "loss": 0.2939, "step": 14234 }, { "epoch": 2.0411528534556926, "grad_norm": 0.2904496192932129, "learning_rate": 2.802330888476847e-06, "loss": 0.2936, "step": 14235 }, { "epoch": 2.0412962431889876, "grad_norm": 0.2800566554069519, "learning_rate": 2.801581580196978e-06, "loss": 0.2653, "step": 14236 }, { "epoch": 2.0414396329222826, "grad_norm": 0.29668548703193665, "learning_rate": 2.8008323331176967e-06, "loss": 0.2666, "step": 14237 }, { "epoch": 2.041583022655578, "grad_norm": 0.2978306710720062, "learning_rate": 2.8000831472598617e-06, "loss": 0.2784, "step": 14238 }, { "epoch": 2.041726412388873, "grad_norm": 0.27190765738487244, "learning_rate": 2.7993340226443257e-06, "loss": 0.2697, "step": 14239 }, { "epoch": 2.041869802122168, "grad_norm": 0.2945743799209595, "learning_rate": 2.7985849592919455e-06, "loss": 0.2861, "step": 14240 }, { "epoch": 2.042013191855463, "grad_norm": 0.27651503682136536, "learning_rate": 2.7978359572235735e-06, "loss": 0.272, "step": 14241 }, { "epoch": 2.0421565815887583, "grad_norm": 0.29909396171569824, "learning_rate": 2.797087016460061e-06, "loss": 0.2764, "step": 14242 }, { "epoch": 2.0422999713220533, "grad_norm": 0.26056182384490967, "learning_rate": 2.7963381370222575e-06, "loss": 0.2861, "step": 14243 }, { "epoch": 2.0424433610553483, "grad_norm": 0.2845945954322815, "learning_rate": 2.795589318931012e-06, "loss": 0.2862, "step": 14244 }, { "epoch": 2.0425867507886437, "grad_norm": 0.2605592906475067, "learning_rate": 2.794840562207167e-06, "loss": 0.2748, "step": 14245 }, { "epoch": 2.0427301405219387, "grad_norm": 0.31339532136917114, "learning_rate": 2.794091866871568e-06, "loss": 0.2833, "step": 14246 }, { "epoch": 2.0428735302552337, "grad_norm": 0.2858162224292755, "learning_rate": 2.7933432329450594e-06, "loss": 0.2671, "step": 14247 }, { "epoch": 2.0430169199885286, "grad_norm": 0.2659970223903656, "learning_rate": 2.79259466044848e-06, "loss": 0.2843, "step": 14248 }, { "epoch": 2.043160309721824, "grad_norm": 0.2663521468639374, "learning_rate": 2.7918461494026694e-06, "loss": 0.2719, "step": 14249 }, { "epoch": 2.043303699455119, "grad_norm": 0.2764125168323517, "learning_rate": 2.7910976998284656e-06, "loss": 0.2642, "step": 14250 }, { "epoch": 2.043447089188414, "grad_norm": 0.2750745713710785, "learning_rate": 2.7903493117467036e-06, "loss": 0.3003, "step": 14251 }, { "epoch": 2.0435904789217094, "grad_norm": 0.28464412689208984, "learning_rate": 2.7896009851782174e-06, "loss": 0.272, "step": 14252 }, { "epoch": 2.0437338686550044, "grad_norm": 0.28214725852012634, "learning_rate": 2.78885272014384e-06, "loss": 0.2835, "step": 14253 }, { "epoch": 2.0438772583882994, "grad_norm": 0.2894681990146637, "learning_rate": 2.7881045166644015e-06, "loss": 0.285, "step": 14254 }, { "epoch": 2.0440206481215943, "grad_norm": 0.29270994663238525, "learning_rate": 2.7873563747607323e-06, "loss": 0.3021, "step": 14255 }, { "epoch": 2.0441640378548898, "grad_norm": 0.28243952989578247, "learning_rate": 2.7866082944536564e-06, "loss": 0.2741, "step": 14256 }, { "epoch": 2.0443074275881847, "grad_norm": 0.2814863622188568, "learning_rate": 2.7858602757640007e-06, "loss": 0.2684, "step": 14257 }, { "epoch": 2.0444508173214797, "grad_norm": 0.28659534454345703, "learning_rate": 2.7851123187125896e-06, "loss": 0.2815, "step": 14258 }, { "epoch": 2.0445942070547747, "grad_norm": 0.27904099225997925, "learning_rate": 2.7843644233202434e-06, "loss": 0.2832, "step": 14259 }, { "epoch": 2.04473759678807, "grad_norm": 0.2745126485824585, "learning_rate": 2.7836165896077854e-06, "loss": 0.2781, "step": 14260 }, { "epoch": 2.044880986521365, "grad_norm": 0.2974378168582916, "learning_rate": 2.7828688175960308e-06, "loss": 0.2858, "step": 14261 }, { "epoch": 2.04502437625466, "grad_norm": 0.27015650272369385, "learning_rate": 2.7821211073057967e-06, "loss": 0.2964, "step": 14262 }, { "epoch": 2.0451677659879555, "grad_norm": 0.2667984664440155, "learning_rate": 2.7813734587579e-06, "loss": 0.2863, "step": 14263 }, { "epoch": 2.0453111557212504, "grad_norm": 0.3021838366985321, "learning_rate": 2.7806258719731532e-06, "loss": 0.2755, "step": 14264 }, { "epoch": 2.0454545454545454, "grad_norm": 0.26798054575920105, "learning_rate": 2.779878346972368e-06, "loss": 0.2821, "step": 14265 }, { "epoch": 2.0455979351878404, "grad_norm": 0.2685127854347229, "learning_rate": 2.779130883776354e-06, "loss": 0.2897, "step": 14266 }, { "epoch": 2.045741324921136, "grad_norm": 0.2618863582611084, "learning_rate": 2.778383482405921e-06, "loss": 0.272, "step": 14267 }, { "epoch": 2.045884714654431, "grad_norm": 0.2981032729148865, "learning_rate": 2.777636142881873e-06, "loss": 0.2802, "step": 14268 }, { "epoch": 2.0460281043877258, "grad_norm": 0.27369314432144165, "learning_rate": 2.7768888652250175e-06, "loss": 0.271, "step": 14269 }, { "epoch": 2.0461714941210207, "grad_norm": 0.25621339678764343, "learning_rate": 2.7761416494561554e-06, "loss": 0.2843, "step": 14270 }, { "epoch": 2.046314883854316, "grad_norm": 0.2705371379852295, "learning_rate": 2.775394495596091e-06, "loss": 0.2729, "step": 14271 }, { "epoch": 2.046458273587611, "grad_norm": 0.2917652130126953, "learning_rate": 2.7746474036656203e-06, "loss": 0.2671, "step": 14272 }, { "epoch": 2.046601663320906, "grad_norm": 0.27704480290412903, "learning_rate": 2.7739003736855417e-06, "loss": 0.2978, "step": 14273 }, { "epoch": 2.0467450530542015, "grad_norm": 0.28338897228240967, "learning_rate": 2.773153405676653e-06, "loss": 0.2865, "step": 14274 }, { "epoch": 2.0468884427874965, "grad_norm": 0.27158617973327637, "learning_rate": 2.772406499659748e-06, "loss": 0.2895, "step": 14275 }, { "epoch": 2.0470318325207915, "grad_norm": 0.28365200757980347, "learning_rate": 2.771659655655621e-06, "loss": 0.293, "step": 14276 }, { "epoch": 2.0471752222540864, "grad_norm": 0.26296061277389526, "learning_rate": 2.7709128736850587e-06, "loss": 0.2778, "step": 14277 }, { "epoch": 2.047318611987382, "grad_norm": 0.28453895449638367, "learning_rate": 2.770166153768854e-06, "loss": 0.2883, "step": 14278 }, { "epoch": 2.047462001720677, "grad_norm": 0.2728336751461029, "learning_rate": 2.769419495927793e-06, "loss": 0.2941, "step": 14279 }, { "epoch": 2.047605391453972, "grad_norm": 0.2928946614265442, "learning_rate": 2.7686729001826616e-06, "loss": 0.2791, "step": 14280 }, { "epoch": 2.0477487811872668, "grad_norm": 0.2934867739677429, "learning_rate": 2.7679263665542454e-06, "loss": 0.2953, "step": 14281 }, { "epoch": 2.047892170920562, "grad_norm": 0.26329556107521057, "learning_rate": 2.7671798950633267e-06, "loss": 0.2847, "step": 14282 }, { "epoch": 2.048035560653857, "grad_norm": 0.2839733362197876, "learning_rate": 2.766433485730683e-06, "loss": 0.2888, "step": 14283 }, { "epoch": 2.048178950387152, "grad_norm": 0.27278459072113037, "learning_rate": 2.7656871385770956e-06, "loss": 0.2887, "step": 14284 }, { "epoch": 2.0483223401204476, "grad_norm": 0.2785126268863678, "learning_rate": 2.764940853623341e-06, "loss": 0.2806, "step": 14285 }, { "epoch": 2.0484657298537425, "grad_norm": 0.2574782073497772, "learning_rate": 2.764194630890195e-06, "loss": 0.2895, "step": 14286 }, { "epoch": 2.0486091195870375, "grad_norm": 0.28710365295410156, "learning_rate": 2.763448470398431e-06, "loss": 0.2829, "step": 14287 }, { "epoch": 2.0487525093203325, "grad_norm": 0.27037712931632996, "learning_rate": 2.7627023721688217e-06, "loss": 0.2993, "step": 14288 }, { "epoch": 2.048895899053628, "grad_norm": 0.2742232084274292, "learning_rate": 2.7619563362221356e-06, "loss": 0.2794, "step": 14289 }, { "epoch": 2.049039288786923, "grad_norm": 0.26539745926856995, "learning_rate": 2.7612103625791436e-06, "loss": 0.2815, "step": 14290 }, { "epoch": 2.049182678520218, "grad_norm": 0.2759624719619751, "learning_rate": 2.7604644512606104e-06, "loss": 0.2672, "step": 14291 }, { "epoch": 2.0493260682535133, "grad_norm": 0.2521445155143738, "learning_rate": 2.7597186022873046e-06, "loss": 0.2848, "step": 14292 }, { "epoch": 2.0494694579868082, "grad_norm": 0.27519741654396057, "learning_rate": 2.758972815679984e-06, "loss": 0.2851, "step": 14293 }, { "epoch": 2.049612847720103, "grad_norm": 0.27781957387924194, "learning_rate": 2.7582270914594127e-06, "loss": 0.2897, "step": 14294 }, { "epoch": 2.049756237453398, "grad_norm": 0.2888595461845398, "learning_rate": 2.7574814296463515e-06, "loss": 0.2703, "step": 14295 }, { "epoch": 2.0498996271866936, "grad_norm": 0.2784944474697113, "learning_rate": 2.7567358302615565e-06, "loss": 0.2724, "step": 14296 }, { "epoch": 2.0500430169199886, "grad_norm": 0.2985919117927551, "learning_rate": 2.7559902933257864e-06, "loss": 0.2903, "step": 14297 }, { "epoch": 2.0501864066532836, "grad_norm": 0.27020788192749023, "learning_rate": 2.7552448188597957e-06, "loss": 0.2833, "step": 14298 }, { "epoch": 2.0503297963865785, "grad_norm": 0.2583458721637726, "learning_rate": 2.7544994068843344e-06, "loss": 0.3019, "step": 14299 }, { "epoch": 2.050473186119874, "grad_norm": 0.2769884765148163, "learning_rate": 2.753754057420156e-06, "loss": 0.3091, "step": 14300 }, { "epoch": 2.050616575853169, "grad_norm": 0.26808440685272217, "learning_rate": 2.7530087704880083e-06, "loss": 0.2785, "step": 14301 }, { "epoch": 2.050759965586464, "grad_norm": 0.26939499378204346, "learning_rate": 2.7522635461086404e-06, "loss": 0.2868, "step": 14302 }, { "epoch": 2.0509033553197593, "grad_norm": 0.2743476331233978, "learning_rate": 2.751518384302798e-06, "loss": 0.2702, "step": 14303 }, { "epoch": 2.0510467450530543, "grad_norm": 0.2739489674568176, "learning_rate": 2.750773285091225e-06, "loss": 0.2884, "step": 14304 }, { "epoch": 2.0511901347863493, "grad_norm": 0.26648208498954773, "learning_rate": 2.750028248494663e-06, "loss": 0.2456, "step": 14305 }, { "epoch": 2.0513335245196442, "grad_norm": 0.26305103302001953, "learning_rate": 2.7492832745338537e-06, "loss": 0.2711, "step": 14306 }, { "epoch": 2.0514769142529397, "grad_norm": 0.25854501128196716, "learning_rate": 2.7485383632295357e-06, "loss": 0.2819, "step": 14307 }, { "epoch": 2.0516203039862346, "grad_norm": 0.2735440731048584, "learning_rate": 2.747793514602447e-06, "loss": 0.2923, "step": 14308 }, { "epoch": 2.0517636937195296, "grad_norm": 0.2654350697994232, "learning_rate": 2.7470487286733237e-06, "loss": 0.2692, "step": 14309 }, { "epoch": 2.0519070834528246, "grad_norm": 0.2757566273212433, "learning_rate": 2.7463040054628966e-06, "loss": 0.2821, "step": 14310 }, { "epoch": 2.05205047318612, "grad_norm": 0.2696864604949951, "learning_rate": 2.745559344991899e-06, "loss": 0.2743, "step": 14311 }, { "epoch": 2.052193862919415, "grad_norm": 0.29146087169647217, "learning_rate": 2.744814747281061e-06, "loss": 0.2723, "step": 14312 }, { "epoch": 2.05233725265271, "grad_norm": 0.2641282379627228, "learning_rate": 2.7440702123511113e-06, "loss": 0.2738, "step": 14313 }, { "epoch": 2.0524806423860054, "grad_norm": 0.26636460423469543, "learning_rate": 2.743325740222779e-06, "loss": 0.292, "step": 14314 }, { "epoch": 2.0526240321193003, "grad_norm": 0.28546497225761414, "learning_rate": 2.742581330916784e-06, "loss": 0.3005, "step": 14315 }, { "epoch": 2.0527674218525953, "grad_norm": 0.2907398045063019, "learning_rate": 2.741836984453853e-06, "loss": 0.2927, "step": 14316 }, { "epoch": 2.0529108115858903, "grad_norm": 0.2637389600276947, "learning_rate": 2.7410927008547063e-06, "loss": 0.2906, "step": 14317 }, { "epoch": 2.0530542013191857, "grad_norm": 0.2697712182998657, "learning_rate": 2.7403484801400636e-06, "loss": 0.2626, "step": 14318 }, { "epoch": 2.0531975910524807, "grad_norm": 0.2623339891433716, "learning_rate": 2.7396043223306455e-06, "loss": 0.266, "step": 14319 }, { "epoch": 2.0533409807857756, "grad_norm": 0.2827199697494507, "learning_rate": 2.7388602274471622e-06, "loss": 0.2911, "step": 14320 }, { "epoch": 2.0534843705190706, "grad_norm": 0.2819969058036804, "learning_rate": 2.738116195510333e-06, "loss": 0.2895, "step": 14321 }, { "epoch": 2.053627760252366, "grad_norm": 0.26300209760665894, "learning_rate": 2.737372226540871e-06, "loss": 0.2795, "step": 14322 }, { "epoch": 2.053771149985661, "grad_norm": 0.26548081636428833, "learning_rate": 2.7366283205594848e-06, "loss": 0.2824, "step": 14323 }, { "epoch": 2.053914539718956, "grad_norm": 0.2754436135292053, "learning_rate": 2.7358844775868844e-06, "loss": 0.2832, "step": 14324 }, { "epoch": 2.0540579294522514, "grad_norm": 0.27959102392196655, "learning_rate": 2.735140697643779e-06, "loss": 0.2713, "step": 14325 }, { "epoch": 2.0542013191855464, "grad_norm": 0.2743178904056549, "learning_rate": 2.7343969807508707e-06, "loss": 0.2898, "step": 14326 }, { "epoch": 2.0543447089188414, "grad_norm": 0.27267053723335266, "learning_rate": 2.7336533269288657e-06, "loss": 0.2855, "step": 14327 }, { "epoch": 2.0544880986521363, "grad_norm": 0.2817407250404358, "learning_rate": 2.732909736198465e-06, "loss": 0.2848, "step": 14328 }, { "epoch": 2.0546314883854317, "grad_norm": 0.28764694929122925, "learning_rate": 2.73216620858037e-06, "loss": 0.2743, "step": 14329 }, { "epoch": 2.0547748781187267, "grad_norm": 0.2675808370113373, "learning_rate": 2.7314227440952825e-06, "loss": 0.2719, "step": 14330 }, { "epoch": 2.0549182678520217, "grad_norm": 0.2585974335670471, "learning_rate": 2.730679342763892e-06, "loss": 0.2639, "step": 14331 }, { "epoch": 2.055061657585317, "grad_norm": 0.28536272048950195, "learning_rate": 2.729936004606899e-06, "loss": 0.2674, "step": 14332 }, { "epoch": 2.055205047318612, "grad_norm": 0.2804446816444397, "learning_rate": 2.729192729644996e-06, "loss": 0.2948, "step": 14333 }, { "epoch": 2.055348437051907, "grad_norm": 0.29989057779312134, "learning_rate": 2.7284495178988734e-06, "loss": 0.3034, "step": 14334 }, { "epoch": 2.055491826785202, "grad_norm": 0.2764212489128113, "learning_rate": 2.7277063693892225e-06, "loss": 0.2724, "step": 14335 }, { "epoch": 2.0556352165184975, "grad_norm": 0.2537233531475067, "learning_rate": 2.726963284136732e-06, "loss": 0.2768, "step": 14336 }, { "epoch": 2.0557786062517924, "grad_norm": 0.28662461042404175, "learning_rate": 2.726220262162086e-06, "loss": 0.2775, "step": 14337 }, { "epoch": 2.0559219959850874, "grad_norm": 0.2861365079879761, "learning_rate": 2.7254773034859694e-06, "loss": 0.2919, "step": 14338 }, { "epoch": 2.0560653857183824, "grad_norm": 0.28990262746810913, "learning_rate": 2.724734408129066e-06, "loss": 0.2943, "step": 14339 }, { "epoch": 2.056208775451678, "grad_norm": 0.27932828664779663, "learning_rate": 2.7239915761120574e-06, "loss": 0.2955, "step": 14340 }, { "epoch": 2.0563521651849728, "grad_norm": 0.2760547995567322, "learning_rate": 2.7232488074556217e-06, "loss": 0.2955, "step": 14341 }, { "epoch": 2.0564955549182677, "grad_norm": 0.2678643465042114, "learning_rate": 2.7225061021804367e-06, "loss": 0.2918, "step": 14342 }, { "epoch": 2.056638944651563, "grad_norm": 0.2711191773414612, "learning_rate": 2.7217634603071786e-06, "loss": 0.2761, "step": 14343 }, { "epoch": 2.056782334384858, "grad_norm": 0.26515743136405945, "learning_rate": 2.721020881856521e-06, "loss": 0.2745, "step": 14344 }, { "epoch": 2.056925724118153, "grad_norm": 0.288103848695755, "learning_rate": 2.7202783668491362e-06, "loss": 0.2677, "step": 14345 }, { "epoch": 2.057069113851448, "grad_norm": 0.2707791328430176, "learning_rate": 2.719535915305697e-06, "loss": 0.2712, "step": 14346 }, { "epoch": 2.0572125035847435, "grad_norm": 0.2738076150417328, "learning_rate": 2.718793527246868e-06, "loss": 0.2658, "step": 14347 }, { "epoch": 2.0573558933180385, "grad_norm": 0.2588362395763397, "learning_rate": 2.718051202693318e-06, "loss": 0.2875, "step": 14348 }, { "epoch": 2.0574992830513334, "grad_norm": 0.2693365514278412, "learning_rate": 2.7173089416657124e-06, "loss": 0.27, "step": 14349 }, { "epoch": 2.0576426727846284, "grad_norm": 0.2609342932701111, "learning_rate": 2.716566744184714e-06, "loss": 0.2658, "step": 14350 }, { "epoch": 2.057786062517924, "grad_norm": 0.26073211431503296, "learning_rate": 2.715824610270985e-06, "loss": 0.2811, "step": 14351 }, { "epoch": 2.057929452251219, "grad_norm": 0.2704618573188782, "learning_rate": 2.7150825399451875e-06, "loss": 0.2715, "step": 14352 }, { "epoch": 2.058072841984514, "grad_norm": 0.28636640310287476, "learning_rate": 2.714340533227975e-06, "loss": 0.2921, "step": 14353 }, { "epoch": 2.058216231717809, "grad_norm": 0.25525182485580444, "learning_rate": 2.713598590140005e-06, "loss": 0.2689, "step": 14354 }, { "epoch": 2.058359621451104, "grad_norm": 0.27201491594314575, "learning_rate": 2.7128567107019343e-06, "loss": 0.2857, "step": 14355 }, { "epoch": 2.058503011184399, "grad_norm": 0.26817211508750916, "learning_rate": 2.7121148949344144e-06, "loss": 0.2732, "step": 14356 }, { "epoch": 2.058646400917694, "grad_norm": 0.2758072018623352, "learning_rate": 2.7113731428580962e-06, "loss": 0.279, "step": 14357 }, { "epoch": 2.0587897906509895, "grad_norm": 0.2690869867801666, "learning_rate": 2.7106314544936295e-06, "loss": 0.2771, "step": 14358 }, { "epoch": 2.0589331803842845, "grad_norm": 0.2776457667350769, "learning_rate": 2.709889829861661e-06, "loss": 0.3038, "step": 14359 }, { "epoch": 2.0590765701175795, "grad_norm": 0.278459370136261, "learning_rate": 2.709148268982837e-06, "loss": 0.2712, "step": 14360 }, { "epoch": 2.0592199598508745, "grad_norm": 0.27707311511039734, "learning_rate": 2.708406771877801e-06, "loss": 0.281, "step": 14361 }, { "epoch": 2.05936334958417, "grad_norm": 0.2692003846168518, "learning_rate": 2.707665338567196e-06, "loss": 0.2845, "step": 14362 }, { "epoch": 2.059506739317465, "grad_norm": 0.272005170583725, "learning_rate": 2.7069239690716627e-06, "loss": 0.2921, "step": 14363 }, { "epoch": 2.05965012905076, "grad_norm": 0.29270297288894653, "learning_rate": 2.706182663411837e-06, "loss": 0.2894, "step": 14364 }, { "epoch": 2.0597935187840553, "grad_norm": 0.2774943709373474, "learning_rate": 2.7054414216083575e-06, "loss": 0.2674, "step": 14365 }, { "epoch": 2.0599369085173502, "grad_norm": 0.2624453604221344, "learning_rate": 2.7047002436818592e-06, "loss": 0.2765, "step": 14366 }, { "epoch": 2.060080298250645, "grad_norm": 0.27984586358070374, "learning_rate": 2.7039591296529753e-06, "loss": 0.2884, "step": 14367 }, { "epoch": 2.06022368798394, "grad_norm": 0.2744254469871521, "learning_rate": 2.703218079542339e-06, "loss": 0.27, "step": 14368 }, { "epoch": 2.0603670777172356, "grad_norm": 0.2751997709274292, "learning_rate": 2.7024770933705767e-06, "loss": 0.2727, "step": 14369 }, { "epoch": 2.0605104674505306, "grad_norm": 0.2703936994075775, "learning_rate": 2.701736171158317e-06, "loss": 0.264, "step": 14370 }, { "epoch": 2.0606538571838255, "grad_norm": 0.25549811124801636, "learning_rate": 2.7009953129261877e-06, "loss": 0.2944, "step": 14371 }, { "epoch": 2.0607972469171205, "grad_norm": 0.2510785758495331, "learning_rate": 2.7002545186948124e-06, "loss": 0.2898, "step": 14372 }, { "epoch": 2.060940636650416, "grad_norm": 0.2767472565174103, "learning_rate": 2.6995137884848156e-06, "loss": 0.276, "step": 14373 }, { "epoch": 2.061084026383711, "grad_norm": 0.2605390250682831, "learning_rate": 2.6987731223168136e-06, "loss": 0.2777, "step": 14374 }, { "epoch": 2.061227416117006, "grad_norm": 0.27254152297973633, "learning_rate": 2.6980325202114284e-06, "loss": 0.2838, "step": 14375 }, { "epoch": 2.0613708058503013, "grad_norm": 0.2642447054386139, "learning_rate": 2.6972919821892745e-06, "loss": 0.2835, "step": 14376 }, { "epoch": 2.0615141955835963, "grad_norm": 0.29788172245025635, "learning_rate": 2.6965515082709714e-06, "loss": 0.2819, "step": 14377 }, { "epoch": 2.0616575853168913, "grad_norm": 0.2657318115234375, "learning_rate": 2.6958110984771314e-06, "loss": 0.2608, "step": 14378 }, { "epoch": 2.0618009750501862, "grad_norm": 0.25762757658958435, "learning_rate": 2.6950707528283678e-06, "loss": 0.2784, "step": 14379 }, { "epoch": 2.0619443647834816, "grad_norm": 0.27966392040252686, "learning_rate": 2.6943304713452865e-06, "loss": 0.2914, "step": 14380 }, { "epoch": 2.0620877545167766, "grad_norm": 0.2733667194843292, "learning_rate": 2.693590254048498e-06, "loss": 0.2688, "step": 14381 }, { "epoch": 2.0622311442500716, "grad_norm": 0.2978080213069916, "learning_rate": 2.6928501009586095e-06, "loss": 0.2727, "step": 14382 }, { "epoch": 2.062374533983367, "grad_norm": 0.2773337662220001, "learning_rate": 2.692110012096225e-06, "loss": 0.3016, "step": 14383 }, { "epoch": 2.062517923716662, "grad_norm": 0.27082738280296326, "learning_rate": 2.6913699874819493e-06, "loss": 0.2833, "step": 14384 }, { "epoch": 2.062661313449957, "grad_norm": 0.2729804515838623, "learning_rate": 2.69063002713638e-06, "loss": 0.2848, "step": 14385 }, { "epoch": 2.062804703183252, "grad_norm": 0.27371615171432495, "learning_rate": 2.6898901310801185e-06, "loss": 0.2785, "step": 14386 }, { "epoch": 2.0629480929165473, "grad_norm": 0.2810249328613281, "learning_rate": 2.6891502993337613e-06, "loss": 0.2961, "step": 14387 }, { "epoch": 2.0630914826498423, "grad_norm": 0.28210416436195374, "learning_rate": 2.6884105319179056e-06, "loss": 0.2596, "step": 14388 }, { "epoch": 2.0632348723831373, "grad_norm": 0.277862548828125, "learning_rate": 2.687670828853145e-06, "loss": 0.3029, "step": 14389 }, { "epoch": 2.0633782621164323, "grad_norm": 0.26390576362609863, "learning_rate": 2.686931190160073e-06, "loss": 0.2781, "step": 14390 }, { "epoch": 2.0635216518497277, "grad_norm": 0.2769523561000824, "learning_rate": 2.686191615859277e-06, "loss": 0.2723, "step": 14391 }, { "epoch": 2.0636650415830227, "grad_norm": 0.2827506959438324, "learning_rate": 2.6854521059713466e-06, "loss": 0.2801, "step": 14392 }, { "epoch": 2.0638084313163176, "grad_norm": 0.28288009762763977, "learning_rate": 2.6847126605168693e-06, "loss": 0.2801, "step": 14393 }, { "epoch": 2.063951821049613, "grad_norm": 0.2779572010040283, "learning_rate": 2.6839732795164303e-06, "loss": 0.2852, "step": 14394 }, { "epoch": 2.064095210782908, "grad_norm": 0.27344629168510437, "learning_rate": 2.6832339629906124e-06, "loss": 0.2658, "step": 14395 }, { "epoch": 2.064238600516203, "grad_norm": 0.2713738977909088, "learning_rate": 2.6824947109599965e-06, "loss": 0.2626, "step": 14396 }, { "epoch": 2.064381990249498, "grad_norm": 0.2750313878059387, "learning_rate": 2.6817555234451633e-06, "loss": 0.3066, "step": 14397 }, { "epoch": 2.0645253799827934, "grad_norm": 0.2589168846607208, "learning_rate": 2.6810164004666906e-06, "loss": 0.278, "step": 14398 }, { "epoch": 2.0646687697160884, "grad_norm": 0.28660136461257935, "learning_rate": 2.6802773420451534e-06, "loss": 0.2632, "step": 14399 }, { "epoch": 2.0648121594493833, "grad_norm": 0.27824896574020386, "learning_rate": 2.6795383482011285e-06, "loss": 0.2809, "step": 14400 }, { "epoch": 2.0649555491826783, "grad_norm": 0.27422812581062317, "learning_rate": 2.6787994189551845e-06, "loss": 0.2875, "step": 14401 }, { "epoch": 2.0650989389159737, "grad_norm": 0.2603357434272766, "learning_rate": 2.6780605543278946e-06, "loss": 0.2662, "step": 14402 }, { "epoch": 2.0652423286492687, "grad_norm": 0.28469863533973694, "learning_rate": 2.6773217543398268e-06, "loss": 0.2733, "step": 14403 }, { "epoch": 2.0653857183825637, "grad_norm": 0.2970627248287201, "learning_rate": 2.6765830190115483e-06, "loss": 0.3045, "step": 14404 }, { "epoch": 2.065529108115859, "grad_norm": 0.2814401090145111, "learning_rate": 2.6758443483636243e-06, "loss": 0.2714, "step": 14405 }, { "epoch": 2.065672497849154, "grad_norm": 0.29473677277565, "learning_rate": 2.6751057424166207e-06, "loss": 0.3114, "step": 14406 }, { "epoch": 2.065815887582449, "grad_norm": 0.27270981669425964, "learning_rate": 2.674367201191095e-06, "loss": 0.2859, "step": 14407 }, { "epoch": 2.065959277315744, "grad_norm": 0.28878161311149597, "learning_rate": 2.673628724707609e-06, "loss": 0.278, "step": 14408 }, { "epoch": 2.0661026670490394, "grad_norm": 0.28232625126838684, "learning_rate": 2.67289031298672e-06, "loss": 0.2767, "step": 14409 }, { "epoch": 2.0662460567823344, "grad_norm": 0.27476373314857483, "learning_rate": 2.6721519660489852e-06, "loss": 0.2767, "step": 14410 }, { "epoch": 2.0663894465156294, "grad_norm": 0.25965598225593567, "learning_rate": 2.671413683914959e-06, "loss": 0.2975, "step": 14411 }, { "epoch": 2.0665328362489244, "grad_norm": 0.2911795973777771, "learning_rate": 2.670675466605194e-06, "loss": 0.2761, "step": 14412 }, { "epoch": 2.06667622598222, "grad_norm": 0.269593209028244, "learning_rate": 2.66993731414024e-06, "loss": 0.2707, "step": 14413 }, { "epoch": 2.0668196157155148, "grad_norm": 0.29399314522743225, "learning_rate": 2.6691992265406473e-06, "loss": 0.3114, "step": 14414 }, { "epoch": 2.0669630054488097, "grad_norm": 0.28431394696235657, "learning_rate": 2.668461203826963e-06, "loss": 0.2995, "step": 14415 }, { "epoch": 2.067106395182105, "grad_norm": 0.2497744858264923, "learning_rate": 2.667723246019732e-06, "loss": 0.2869, "step": 14416 }, { "epoch": 2.0672497849154, "grad_norm": 0.2521645128726959, "learning_rate": 2.6669853531395e-06, "loss": 0.277, "step": 14417 }, { "epoch": 2.067393174648695, "grad_norm": 0.2703823149204254, "learning_rate": 2.6662475252068055e-06, "loss": 0.2982, "step": 14418 }, { "epoch": 2.06753656438199, "grad_norm": 0.2751674950122833, "learning_rate": 2.665509762242189e-06, "loss": 0.2566, "step": 14419 }, { "epoch": 2.0676799541152855, "grad_norm": 0.28819039463996887, "learning_rate": 2.66477206426619e-06, "loss": 0.2652, "step": 14420 }, { "epoch": 2.0678233438485805, "grad_norm": 0.27480173110961914, "learning_rate": 2.6640344312993447e-06, "loss": 0.2841, "step": 14421 }, { "epoch": 2.0679667335818754, "grad_norm": 0.2779506742954254, "learning_rate": 2.6632968633621893e-06, "loss": 0.3076, "step": 14422 }, { "epoch": 2.0681101233151704, "grad_norm": 0.2714466154575348, "learning_rate": 2.6625593604752525e-06, "loss": 0.2659, "step": 14423 }, { "epoch": 2.068253513048466, "grad_norm": 0.2771548628807068, "learning_rate": 2.6618219226590676e-06, "loss": 0.2782, "step": 14424 }, { "epoch": 2.068396902781761, "grad_norm": 0.26181164383888245, "learning_rate": 2.661084549934163e-06, "loss": 0.2766, "step": 14425 }, { "epoch": 2.068540292515056, "grad_norm": 0.2650154232978821, "learning_rate": 2.6603472423210677e-06, "loss": 0.2657, "step": 14426 }, { "epoch": 2.068683682248351, "grad_norm": 0.26848119497299194, "learning_rate": 2.6596099998403074e-06, "loss": 0.2689, "step": 14427 }, { "epoch": 2.068827071981646, "grad_norm": 0.29089874029159546, "learning_rate": 2.658872822512403e-06, "loss": 0.2727, "step": 14428 }, { "epoch": 2.068970461714941, "grad_norm": 0.2658626437187195, "learning_rate": 2.6581357103578775e-06, "loss": 0.3034, "step": 14429 }, { "epoch": 2.069113851448236, "grad_norm": 0.2730611264705658, "learning_rate": 2.6573986633972513e-06, "loss": 0.275, "step": 14430 }, { "epoch": 2.0692572411815315, "grad_norm": 0.2774890661239624, "learning_rate": 2.656661681651043e-06, "loss": 0.2804, "step": 14431 }, { "epoch": 2.0694006309148265, "grad_norm": 0.2807677090167999, "learning_rate": 2.655924765139767e-06, "loss": 0.2576, "step": 14432 }, { "epoch": 2.0695440206481215, "grad_norm": 0.2740477919578552, "learning_rate": 2.6551879138839434e-06, "loss": 0.2541, "step": 14433 }, { "epoch": 2.069687410381417, "grad_norm": 0.2580203115940094, "learning_rate": 2.6544511279040785e-06, "loss": 0.2748, "step": 14434 }, { "epoch": 2.069830800114712, "grad_norm": 0.28028011322021484, "learning_rate": 2.6537144072206868e-06, "loss": 0.2777, "step": 14435 }, { "epoch": 2.069974189848007, "grad_norm": 0.2731165885925293, "learning_rate": 2.6529777518542765e-06, "loss": 0.2593, "step": 14436 }, { "epoch": 2.070117579581302, "grad_norm": 0.28455764055252075, "learning_rate": 2.6522411618253556e-06, "loss": 0.2728, "step": 14437 }, { "epoch": 2.0702609693145972, "grad_norm": 0.28073850274086, "learning_rate": 2.651504637154431e-06, "loss": 0.294, "step": 14438 }, { "epoch": 2.070404359047892, "grad_norm": 0.2916406989097595, "learning_rate": 2.6507681778620027e-06, "loss": 0.2863, "step": 14439 }, { "epoch": 2.070547748781187, "grad_norm": 0.27023041248321533, "learning_rate": 2.650031783968574e-06, "loss": 0.2753, "step": 14440 }, { "epoch": 2.070691138514482, "grad_norm": 0.2704605758190155, "learning_rate": 2.6492954554946464e-06, "loss": 0.2889, "step": 14441 }, { "epoch": 2.0708345282477776, "grad_norm": 0.2730608582496643, "learning_rate": 2.6485591924607166e-06, "loss": 0.2718, "step": 14442 }, { "epoch": 2.0709779179810726, "grad_norm": 0.27518001198768616, "learning_rate": 2.6478229948872823e-06, "loss": 0.2863, "step": 14443 }, { "epoch": 2.0711213077143675, "grad_norm": 0.3583412766456604, "learning_rate": 2.647086862794839e-06, "loss": 0.2931, "step": 14444 }, { "epoch": 2.071264697447663, "grad_norm": 0.26680219173431396, "learning_rate": 2.6463507962038758e-06, "loss": 0.2687, "step": 14445 }, { "epoch": 2.071408087180958, "grad_norm": 0.27211087942123413, "learning_rate": 2.6456147951348863e-06, "loss": 0.2989, "step": 14446 }, { "epoch": 2.071551476914253, "grad_norm": 0.26999911665916443, "learning_rate": 2.644878859608359e-06, "loss": 0.287, "step": 14447 }, { "epoch": 2.071694866647548, "grad_norm": 0.25944453477859497, "learning_rate": 2.6441429896447816e-06, "loss": 0.2828, "step": 14448 }, { "epoch": 2.0718382563808433, "grad_norm": 0.2918558418750763, "learning_rate": 2.6434071852646394e-06, "loss": 0.2978, "step": 14449 }, { "epoch": 2.0719816461141383, "grad_norm": 0.3084771931171417, "learning_rate": 2.642671446488416e-06, "loss": 0.2914, "step": 14450 }, { "epoch": 2.0721250358474332, "grad_norm": 0.2846807539463043, "learning_rate": 2.6419357733365924e-06, "loss": 0.2921, "step": 14451 }, { "epoch": 2.072268425580728, "grad_norm": 0.28611287474632263, "learning_rate": 2.6412001658296504e-06, "loss": 0.3073, "step": 14452 }, { "epoch": 2.0724118153140236, "grad_norm": 0.2827419936656952, "learning_rate": 2.640464623988067e-06, "loss": 0.2776, "step": 14453 }, { "epoch": 2.0725552050473186, "grad_norm": 0.2800166606903076, "learning_rate": 2.639729147832321e-06, "loss": 0.2835, "step": 14454 }, { "epoch": 2.0726985947806136, "grad_norm": 0.27685198187828064, "learning_rate": 2.6389937373828827e-06, "loss": 0.2772, "step": 14455 }, { "epoch": 2.072841984513909, "grad_norm": 0.28677862882614136, "learning_rate": 2.6382583926602267e-06, "loss": 0.2896, "step": 14456 }, { "epoch": 2.072985374247204, "grad_norm": 0.2888938784599304, "learning_rate": 2.6375231136848245e-06, "loss": 0.2731, "step": 14457 }, { "epoch": 2.073128763980499, "grad_norm": 0.26163601875305176, "learning_rate": 2.636787900477144e-06, "loss": 0.2676, "step": 14458 }, { "epoch": 2.073272153713794, "grad_norm": 0.2667017877101898, "learning_rate": 2.6360527530576534e-06, "loss": 0.2793, "step": 14459 }, { "epoch": 2.0734155434470893, "grad_norm": 0.2826390266418457, "learning_rate": 2.63531767144682e-06, "loss": 0.265, "step": 14460 }, { "epoch": 2.0735589331803843, "grad_norm": 0.2816130518913269, "learning_rate": 2.634582655665102e-06, "loss": 0.288, "step": 14461 }, { "epoch": 2.0737023229136793, "grad_norm": 0.303387314081192, "learning_rate": 2.633847705732965e-06, "loss": 0.2729, "step": 14462 }, { "epoch": 2.0738457126469747, "grad_norm": 0.2709418535232544, "learning_rate": 2.633112821670868e-06, "loss": 0.2921, "step": 14463 }, { "epoch": 2.0739891023802697, "grad_norm": 0.2615172863006592, "learning_rate": 2.632378003499269e-06, "loss": 0.2666, "step": 14464 }, { "epoch": 2.0741324921135647, "grad_norm": 0.25378185510635376, "learning_rate": 2.6316432512386247e-06, "loss": 0.2679, "step": 14465 }, { "epoch": 2.0742758818468596, "grad_norm": 0.29699864983558655, "learning_rate": 2.630908564909388e-06, "loss": 0.3124, "step": 14466 }, { "epoch": 2.074419271580155, "grad_norm": 0.27608996629714966, "learning_rate": 2.6301739445320135e-06, "loss": 0.2872, "step": 14467 }, { "epoch": 2.07456266131345, "grad_norm": 0.263095885515213, "learning_rate": 2.6294393901269505e-06, "loss": 0.295, "step": 14468 }, { "epoch": 2.074706051046745, "grad_norm": 0.281313419342041, "learning_rate": 2.6287049017146483e-06, "loss": 0.2858, "step": 14469 }, { "epoch": 2.07484944078004, "grad_norm": 0.2948477566242218, "learning_rate": 2.6279704793155543e-06, "loss": 0.2798, "step": 14470 }, { "epoch": 2.0749928305133354, "grad_norm": 0.2823617160320282, "learning_rate": 2.6272361229501147e-06, "loss": 0.263, "step": 14471 }, { "epoch": 2.0751362202466304, "grad_norm": 0.27538472414016724, "learning_rate": 2.6265018326387696e-06, "loss": 0.2732, "step": 14472 }, { "epoch": 2.0752796099799253, "grad_norm": 0.2892831563949585, "learning_rate": 2.625767608401962e-06, "loss": 0.2656, "step": 14473 }, { "epoch": 2.0754229997132203, "grad_norm": 0.27132076025009155, "learning_rate": 2.6250334502601326e-06, "loss": 0.2834, "step": 14474 }, { "epoch": 2.0755663894465157, "grad_norm": 0.26875630021095276, "learning_rate": 2.6242993582337184e-06, "loss": 0.2736, "step": 14475 }, { "epoch": 2.0757097791798107, "grad_norm": 0.2680800259113312, "learning_rate": 2.6235653323431574e-06, "loss": 0.2802, "step": 14476 }, { "epoch": 2.0758531689131057, "grad_norm": 0.2784351706504822, "learning_rate": 2.6228313726088807e-06, "loss": 0.2897, "step": 14477 }, { "epoch": 2.075996558646401, "grad_norm": 0.27690380811691284, "learning_rate": 2.622097479051321e-06, "loss": 0.3059, "step": 14478 }, { "epoch": 2.076139948379696, "grad_norm": 0.25870266556739807, "learning_rate": 2.621363651690909e-06, "loss": 0.2851, "step": 14479 }, { "epoch": 2.076283338112991, "grad_norm": 0.2608216106891632, "learning_rate": 2.620629890548075e-06, "loss": 0.2836, "step": 14480 }, { "epoch": 2.076426727846286, "grad_norm": 0.2718522250652313, "learning_rate": 2.619896195643246e-06, "loss": 0.3066, "step": 14481 }, { "epoch": 2.0765701175795814, "grad_norm": 0.28080329298973083, "learning_rate": 2.619162566996844e-06, "loss": 0.2815, "step": 14482 }, { "epoch": 2.0767135073128764, "grad_norm": 0.3131330907344818, "learning_rate": 2.6184290046292938e-06, "loss": 0.2978, "step": 14483 }, { "epoch": 2.0768568970461714, "grad_norm": 0.2751534879207611, "learning_rate": 2.6176955085610168e-06, "loss": 0.2671, "step": 14484 }, { "epoch": 2.077000286779467, "grad_norm": 0.298531711101532, "learning_rate": 2.6169620788124316e-06, "loss": 0.2785, "step": 14485 }, { "epoch": 2.0771436765127618, "grad_norm": 0.29573339223861694, "learning_rate": 2.616228715403956e-06, "loss": 0.2853, "step": 14486 }, { "epoch": 2.0772870662460567, "grad_norm": 0.2696872055530548, "learning_rate": 2.6154954183560066e-06, "loss": 0.2901, "step": 14487 }, { "epoch": 2.0774304559793517, "grad_norm": 0.3040351867675781, "learning_rate": 2.6147621876889966e-06, "loss": 0.293, "step": 14488 }, { "epoch": 2.077573845712647, "grad_norm": 0.28104832768440247, "learning_rate": 2.6140290234233384e-06, "loss": 0.2759, "step": 14489 }, { "epoch": 2.077717235445942, "grad_norm": 0.27293431758880615, "learning_rate": 2.6132959255794417e-06, "loss": 0.2771, "step": 14490 }, { "epoch": 2.077860625179237, "grad_norm": 0.27060291171073914, "learning_rate": 2.6125628941777148e-06, "loss": 0.2681, "step": 14491 }, { "epoch": 2.078004014912532, "grad_norm": 0.29546430706977844, "learning_rate": 2.611829929238566e-06, "loss": 0.286, "step": 14492 }, { "epoch": 2.0781474046458275, "grad_norm": 0.2804129421710968, "learning_rate": 2.611097030782397e-06, "loss": 0.2844, "step": 14493 }, { "epoch": 2.0782907943791225, "grad_norm": 0.2709861695766449, "learning_rate": 2.610364198829612e-06, "loss": 0.2838, "step": 14494 }, { "epoch": 2.0784341841124174, "grad_norm": 0.2768558859825134, "learning_rate": 2.6096314334006114e-06, "loss": 0.2723, "step": 14495 }, { "epoch": 2.078577573845713, "grad_norm": 0.29527750611305237, "learning_rate": 2.6088987345157948e-06, "loss": 0.2754, "step": 14496 }, { "epoch": 2.078720963579008, "grad_norm": 0.28709277510643005, "learning_rate": 2.6081661021955582e-06, "loss": 0.2907, "step": 14497 }, { "epoch": 2.078864353312303, "grad_norm": 0.26265743374824524, "learning_rate": 2.607433536460301e-06, "loss": 0.2655, "step": 14498 }, { "epoch": 2.0790077430455978, "grad_norm": 0.2802809178829193, "learning_rate": 2.6067010373304113e-06, "loss": 0.2666, "step": 14499 }, { "epoch": 2.079151132778893, "grad_norm": 0.3112219572067261, "learning_rate": 2.6059686048262833e-06, "loss": 0.3019, "step": 14500 }, { "epoch": 2.079294522512188, "grad_norm": 0.306774377822876, "learning_rate": 2.6052362389683073e-06, "loss": 0.2951, "step": 14501 }, { "epoch": 2.079437912245483, "grad_norm": 0.2660827934741974, "learning_rate": 2.6045039397768696e-06, "loss": 0.2861, "step": 14502 }, { "epoch": 2.079581301978778, "grad_norm": 0.3124072551727295, "learning_rate": 2.603771707272358e-06, "loss": 0.2678, "step": 14503 }, { "epoch": 2.0797246917120735, "grad_norm": 0.29494133591651917, "learning_rate": 2.603039541475155e-06, "loss": 0.2782, "step": 14504 }, { "epoch": 2.0798680814453685, "grad_norm": 0.2959177792072296, "learning_rate": 2.602307442405645e-06, "loss": 0.2844, "step": 14505 }, { "epoch": 2.0800114711786635, "grad_norm": 0.2889140546321869, "learning_rate": 2.6015754100842063e-06, "loss": 0.298, "step": 14506 }, { "epoch": 2.080154860911959, "grad_norm": 0.25985637307167053, "learning_rate": 2.600843444531219e-06, "loss": 0.2838, "step": 14507 }, { "epoch": 2.080298250645254, "grad_norm": 0.29893240332603455, "learning_rate": 2.600111545767062e-06, "loss": 0.29, "step": 14508 }, { "epoch": 2.080441640378549, "grad_norm": 0.27296730875968933, "learning_rate": 2.5993797138121064e-06, "loss": 0.2922, "step": 14509 }, { "epoch": 2.080585030111844, "grad_norm": 0.2609708309173584, "learning_rate": 2.598647948686726e-06, "loss": 0.2959, "step": 14510 }, { "epoch": 2.0807284198451392, "grad_norm": 0.2858049273490906, "learning_rate": 2.597916250411293e-06, "loss": 0.2735, "step": 14511 }, { "epoch": 2.080871809578434, "grad_norm": 0.26705679297447205, "learning_rate": 2.5971846190061767e-06, "loss": 0.2829, "step": 14512 }, { "epoch": 2.081015199311729, "grad_norm": 0.28634539246559143, "learning_rate": 2.596453054491744e-06, "loss": 0.266, "step": 14513 }, { "epoch": 2.0811585890450246, "grad_norm": 0.3019426763057709, "learning_rate": 2.5957215568883625e-06, "loss": 0.3162, "step": 14514 }, { "epoch": 2.0813019787783196, "grad_norm": 0.2764309346675873, "learning_rate": 2.594990126216393e-06, "loss": 0.2773, "step": 14515 }, { "epoch": 2.0814453685116145, "grad_norm": 0.26936569809913635, "learning_rate": 2.5942587624961983e-06, "loss": 0.2578, "step": 14516 }, { "epoch": 2.0815887582449095, "grad_norm": 0.28160810470581055, "learning_rate": 2.5935274657481395e-06, "loss": 0.2712, "step": 14517 }, { "epoch": 2.081732147978205, "grad_norm": 0.2717095911502838, "learning_rate": 2.592796235992574e-06, "loss": 0.2663, "step": 14518 }, { "epoch": 2.0818755377115, "grad_norm": 0.28255966305732727, "learning_rate": 2.5920650732498603e-06, "loss": 0.2911, "step": 14519 }, { "epoch": 2.082018927444795, "grad_norm": 0.28313934803009033, "learning_rate": 2.591333977540349e-06, "loss": 0.2876, "step": 14520 }, { "epoch": 2.08216231717809, "grad_norm": 0.27908992767333984, "learning_rate": 2.590602948884392e-06, "loss": 0.3027, "step": 14521 }, { "epoch": 2.0823057069113853, "grad_norm": 0.2979050874710083, "learning_rate": 2.5898719873023455e-06, "loss": 0.2711, "step": 14522 }, { "epoch": 2.0824490966446803, "grad_norm": 0.2798437178134918, "learning_rate": 2.589141092814554e-06, "loss": 0.2887, "step": 14523 }, { "epoch": 2.0825924863779752, "grad_norm": 0.2832687199115753, "learning_rate": 2.588410265441367e-06, "loss": 0.2841, "step": 14524 }, { "epoch": 2.0827358761112706, "grad_norm": 0.30226242542266846, "learning_rate": 2.5876795052031307e-06, "loss": 0.2757, "step": 14525 }, { "epoch": 2.0828792658445656, "grad_norm": 0.2656897306442261, "learning_rate": 2.5869488121201836e-06, "loss": 0.2925, "step": 14526 }, { "epoch": 2.0830226555778606, "grad_norm": 0.28209787607192993, "learning_rate": 2.58621818621287e-06, "loss": 0.2659, "step": 14527 }, { "epoch": 2.0831660453111556, "grad_norm": 0.2903876006603241, "learning_rate": 2.5854876275015284e-06, "loss": 0.2782, "step": 14528 }, { "epoch": 2.083309435044451, "grad_norm": 0.28448784351348877, "learning_rate": 2.5847571360064973e-06, "loss": 0.2581, "step": 14529 }, { "epoch": 2.083452824777746, "grad_norm": 0.30334845185279846, "learning_rate": 2.5840267117481137e-06, "loss": 0.2977, "step": 14530 }, { "epoch": 2.083596214511041, "grad_norm": 0.2814764082431793, "learning_rate": 2.5832963547467087e-06, "loss": 0.2599, "step": 14531 }, { "epoch": 2.083739604244336, "grad_norm": 0.268387109041214, "learning_rate": 2.5825660650226155e-06, "loss": 0.2786, "step": 14532 }, { "epoch": 2.0838829939776313, "grad_norm": 0.2660163342952728, "learning_rate": 2.5818358425961644e-06, "loss": 0.2609, "step": 14533 }, { "epoch": 2.0840263837109263, "grad_norm": 0.27453339099884033, "learning_rate": 2.5811056874876827e-06, "loss": 0.2831, "step": 14534 }, { "epoch": 2.0841697734442213, "grad_norm": 0.28575193881988525, "learning_rate": 2.5803755997175004e-06, "loss": 0.2768, "step": 14535 }, { "epoch": 2.0843131631775167, "grad_norm": 0.2702955901622772, "learning_rate": 2.579645579305938e-06, "loss": 0.2996, "step": 14536 }, { "epoch": 2.0844565529108117, "grad_norm": 0.2701984941959381, "learning_rate": 2.578915626273318e-06, "loss": 0.2941, "step": 14537 }, { "epoch": 2.0845999426441066, "grad_norm": 0.2893982529640198, "learning_rate": 2.578185740639964e-06, "loss": 0.2675, "step": 14538 }, { "epoch": 2.0847433323774016, "grad_norm": 0.2998686134815216, "learning_rate": 2.577455922426193e-06, "loss": 0.2647, "step": 14539 }, { "epoch": 2.084886722110697, "grad_norm": 0.2749715745449066, "learning_rate": 2.576726171652323e-06, "loss": 0.2853, "step": 14540 }, { "epoch": 2.085030111843992, "grad_norm": 0.27745765447616577, "learning_rate": 2.575996488338668e-06, "loss": 0.2924, "step": 14541 }, { "epoch": 2.085173501577287, "grad_norm": 0.27294856309890747, "learning_rate": 2.5752668725055417e-06, "loss": 0.29, "step": 14542 }, { "epoch": 2.085316891310582, "grad_norm": 0.27696943283081055, "learning_rate": 2.5745373241732563e-06, "loss": 0.2619, "step": 14543 }, { "epoch": 2.0854602810438774, "grad_norm": 0.28840371966362, "learning_rate": 2.5738078433621206e-06, "loss": 0.293, "step": 14544 }, { "epoch": 2.0856036707771723, "grad_norm": 0.26107141375541687, "learning_rate": 2.573078430092443e-06, "loss": 0.3016, "step": 14545 }, { "epoch": 2.0857470605104673, "grad_norm": 0.27817487716674805, "learning_rate": 2.5723490843845293e-06, "loss": 0.2734, "step": 14546 }, { "epoch": 2.0858904502437627, "grad_norm": 0.28190213441848755, "learning_rate": 2.571619806258682e-06, "loss": 0.2991, "step": 14547 }, { "epoch": 2.0860338399770577, "grad_norm": 0.28372350335121155, "learning_rate": 2.5708905957352027e-06, "loss": 0.2801, "step": 14548 }, { "epoch": 2.0861772297103527, "grad_norm": 0.2909190058708191, "learning_rate": 2.5701614528343934e-06, "loss": 0.2699, "step": 14549 }, { "epoch": 2.0863206194436477, "grad_norm": 0.27364423871040344, "learning_rate": 2.5694323775765507e-06, "loss": 0.2621, "step": 14550 }, { "epoch": 2.086464009176943, "grad_norm": 0.26897749304771423, "learning_rate": 2.568703369981972e-06, "loss": 0.2843, "step": 14551 }, { "epoch": 2.086607398910238, "grad_norm": 0.27276211977005005, "learning_rate": 2.567974430070953e-06, "loss": 0.2706, "step": 14552 }, { "epoch": 2.086750788643533, "grad_norm": 0.28064823150634766, "learning_rate": 2.567245557863783e-06, "loss": 0.2842, "step": 14553 }, { "epoch": 2.086894178376828, "grad_norm": 0.2652326822280884, "learning_rate": 2.566516753380755e-06, "loss": 0.2855, "step": 14554 }, { "epoch": 2.0870375681101234, "grad_norm": 0.2971441447734833, "learning_rate": 2.5657880166421566e-06, "loss": 0.2797, "step": 14555 }, { "epoch": 2.0871809578434184, "grad_norm": 0.26015862822532654, "learning_rate": 2.5650593476682758e-06, "loss": 0.2887, "step": 14556 }, { "epoch": 2.0873243475767134, "grad_norm": 0.2878916561603546, "learning_rate": 2.5643307464793966e-06, "loss": 0.3052, "step": 14557 }, { "epoch": 2.087467737310009, "grad_norm": 0.25265923142433167, "learning_rate": 2.5636022130958026e-06, "loss": 0.2828, "step": 14558 }, { "epoch": 2.0876111270433038, "grad_norm": 0.2745272219181061, "learning_rate": 2.562873747537775e-06, "loss": 0.2802, "step": 14559 }, { "epoch": 2.0877545167765987, "grad_norm": 0.26826560497283936, "learning_rate": 2.562145349825594e-06, "loss": 0.2807, "step": 14560 }, { "epoch": 2.0878979065098937, "grad_norm": 0.2510768473148346, "learning_rate": 2.561417019979536e-06, "loss": 0.2893, "step": 14561 }, { "epoch": 2.088041296243189, "grad_norm": 0.2648427188396454, "learning_rate": 2.5606887580198786e-06, "loss": 0.2784, "step": 14562 }, { "epoch": 2.088184685976484, "grad_norm": 0.2748781740665436, "learning_rate": 2.559960563966892e-06, "loss": 0.2662, "step": 14563 }, { "epoch": 2.088328075709779, "grad_norm": 0.28198719024658203, "learning_rate": 2.5592324378408503e-06, "loss": 0.2921, "step": 14564 }, { "epoch": 2.0884714654430745, "grad_norm": 0.27719902992248535, "learning_rate": 2.5585043796620224e-06, "loss": 0.2745, "step": 14565 }, { "epoch": 2.0886148551763695, "grad_norm": 0.2701278030872345, "learning_rate": 2.5577763894506767e-06, "loss": 0.266, "step": 14566 }, { "epoch": 2.0887582449096644, "grad_norm": 0.293374240398407, "learning_rate": 2.55704846722708e-06, "loss": 0.2932, "step": 14567 }, { "epoch": 2.0889016346429594, "grad_norm": 0.2790259122848511, "learning_rate": 2.5563206130114977e-06, "loss": 0.2828, "step": 14568 }, { "epoch": 2.089045024376255, "grad_norm": 0.289730429649353, "learning_rate": 2.5555928268241882e-06, "loss": 0.275, "step": 14569 }, { "epoch": 2.08918841410955, "grad_norm": 0.30071696639060974, "learning_rate": 2.5548651086854148e-06, "loss": 0.2757, "step": 14570 }, { "epoch": 2.089331803842845, "grad_norm": 0.2661493420600891, "learning_rate": 2.554137458615435e-06, "loss": 0.2918, "step": 14571 }, { "epoch": 2.0894751935761398, "grad_norm": 0.264093279838562, "learning_rate": 2.5534098766345063e-06, "loss": 0.2841, "step": 14572 }, { "epoch": 2.089618583309435, "grad_norm": 0.27668190002441406, "learning_rate": 2.5526823627628844e-06, "loss": 0.2866, "step": 14573 }, { "epoch": 2.08976197304273, "grad_norm": 0.26466163992881775, "learning_rate": 2.551954917020819e-06, "loss": 0.2775, "step": 14574 }, { "epoch": 2.089905362776025, "grad_norm": 0.2904236316680908, "learning_rate": 2.5512275394285634e-06, "loss": 0.2984, "step": 14575 }, { "epoch": 2.0900487525093205, "grad_norm": 0.2774265706539154, "learning_rate": 2.5505002300063656e-06, "loss": 0.2908, "step": 14576 }, { "epoch": 2.0901921422426155, "grad_norm": 0.26799893379211426, "learning_rate": 2.5497729887744723e-06, "loss": 0.2832, "step": 14577 }, { "epoch": 2.0903355319759105, "grad_norm": 0.27510160207748413, "learning_rate": 2.5490458157531304e-06, "loss": 0.3255, "step": 14578 }, { "epoch": 2.0904789217092055, "grad_norm": 0.2875758409500122, "learning_rate": 2.548318710962586e-06, "loss": 0.2852, "step": 14579 }, { "epoch": 2.090622311442501, "grad_norm": 0.2630704939365387, "learning_rate": 2.547591674423075e-06, "loss": 0.2804, "step": 14580 }, { "epoch": 2.090765701175796, "grad_norm": 0.2719389498233795, "learning_rate": 2.546864706154839e-06, "loss": 0.2816, "step": 14581 }, { "epoch": 2.090909090909091, "grad_norm": 0.2590269148349762, "learning_rate": 2.546137806178116e-06, "loss": 0.2671, "step": 14582 }, { "epoch": 2.091052480642386, "grad_norm": 0.29332756996154785, "learning_rate": 2.5454109745131415e-06, "loss": 0.2806, "step": 14583 }, { "epoch": 2.091195870375681, "grad_norm": 0.28282463550567627, "learning_rate": 2.5446842111801524e-06, "loss": 0.3018, "step": 14584 }, { "epoch": 2.091339260108976, "grad_norm": 0.27945536375045776, "learning_rate": 2.5439575161993756e-06, "loss": 0.2735, "step": 14585 }, { "epoch": 2.091482649842271, "grad_norm": 0.28499341011047363, "learning_rate": 2.543230889591043e-06, "loss": 0.2849, "step": 14586 }, { "epoch": 2.0916260395755666, "grad_norm": 0.2641936242580414, "learning_rate": 2.5425043313753833e-06, "loss": 0.2586, "step": 14587 }, { "epoch": 2.0917694293088616, "grad_norm": 0.27602460980415344, "learning_rate": 2.541777841572623e-06, "loss": 0.2777, "step": 14588 }, { "epoch": 2.0919128190421565, "grad_norm": 0.28508907556533813, "learning_rate": 2.5410514202029877e-06, "loss": 0.3061, "step": 14589 }, { "epoch": 2.0920562087754515, "grad_norm": 0.2883993983268738, "learning_rate": 2.5403250672866965e-06, "loss": 0.2746, "step": 14590 }, { "epoch": 2.092199598508747, "grad_norm": 0.27033185958862305, "learning_rate": 2.5395987828439716e-06, "loss": 0.2869, "step": 14591 }, { "epoch": 2.092342988242042, "grad_norm": 0.2813417315483093, "learning_rate": 2.538872566895032e-06, "loss": 0.2763, "step": 14592 }, { "epoch": 2.092486377975337, "grad_norm": 0.28636184334754944, "learning_rate": 2.5381464194600946e-06, "loss": 0.2929, "step": 14593 }, { "epoch": 2.092629767708632, "grad_norm": 0.2668319642543793, "learning_rate": 2.5374203405593735e-06, "loss": 0.2723, "step": 14594 }, { "epoch": 2.0927731574419273, "grad_norm": 0.28741511702537537, "learning_rate": 2.536694330213082e-06, "loss": 0.3016, "step": 14595 }, { "epoch": 2.0929165471752222, "grad_norm": 0.26416870951652527, "learning_rate": 2.535968388441431e-06, "loss": 0.2856, "step": 14596 }, { "epoch": 2.093059936908517, "grad_norm": 0.2766434848308563, "learning_rate": 2.5352425152646292e-06, "loss": 0.2901, "step": 14597 }, { "epoch": 2.0932033266418126, "grad_norm": 0.2865271270275116, "learning_rate": 2.5345167107028843e-06, "loss": 0.3001, "step": 14598 }, { "epoch": 2.0933467163751076, "grad_norm": 0.2838568687438965, "learning_rate": 2.533790974776402e-06, "loss": 0.2983, "step": 14599 }, { "epoch": 2.0934901061084026, "grad_norm": 0.28241997957229614, "learning_rate": 2.533065307505388e-06, "loss": 0.2875, "step": 14600 }, { "epoch": 2.0936334958416976, "grad_norm": 0.27176862955093384, "learning_rate": 2.5323397089100377e-06, "loss": 0.2712, "step": 14601 }, { "epoch": 2.093776885574993, "grad_norm": 0.2878339886665344, "learning_rate": 2.5316141790105544e-06, "loss": 0.2902, "step": 14602 }, { "epoch": 2.093920275308288, "grad_norm": 0.25523489713668823, "learning_rate": 2.530888717827135e-06, "loss": 0.2912, "step": 14603 }, { "epoch": 2.094063665041583, "grad_norm": 0.2903726100921631, "learning_rate": 2.530163325379976e-06, "loss": 0.268, "step": 14604 }, { "epoch": 2.094207054774878, "grad_norm": 0.2869507372379303, "learning_rate": 2.5294380016892705e-06, "loss": 0.2854, "step": 14605 }, { "epoch": 2.0943504445081733, "grad_norm": 0.25992077589035034, "learning_rate": 2.5287127467752125e-06, "loss": 0.2847, "step": 14606 }, { "epoch": 2.0944938342414683, "grad_norm": 0.28197571635246277, "learning_rate": 2.5279875606579883e-06, "loss": 0.2786, "step": 14607 }, { "epoch": 2.0946372239747633, "grad_norm": 0.28790169954299927, "learning_rate": 2.5272624433577873e-06, "loss": 0.2466, "step": 14608 }, { "epoch": 2.0947806137080587, "grad_norm": 0.28600990772247314, "learning_rate": 2.5265373948947965e-06, "loss": 0.2644, "step": 14609 }, { "epoch": 2.0949240034413537, "grad_norm": 0.27546244859695435, "learning_rate": 2.5258124152891993e-06, "loss": 0.2733, "step": 14610 }, { "epoch": 2.0950673931746486, "grad_norm": 0.2686503529548645, "learning_rate": 2.525087504561179e-06, "loss": 0.292, "step": 14611 }, { "epoch": 2.0952107829079436, "grad_norm": 0.27144813537597656, "learning_rate": 2.5243626627309147e-06, "loss": 0.2728, "step": 14612 }, { "epoch": 2.095354172641239, "grad_norm": 0.2805846035480499, "learning_rate": 2.5236378898185866e-06, "loss": 0.2524, "step": 14613 }, { "epoch": 2.095497562374534, "grad_norm": 0.27152127027511597, "learning_rate": 2.52291318584437e-06, "loss": 0.266, "step": 14614 }, { "epoch": 2.095640952107829, "grad_norm": 0.27232563495635986, "learning_rate": 2.5221885508284392e-06, "loss": 0.2907, "step": 14615 }, { "epoch": 2.0957843418411244, "grad_norm": 0.2755977511405945, "learning_rate": 2.52146398479097e-06, "loss": 0.2792, "step": 14616 }, { "epoch": 2.0959277315744194, "grad_norm": 0.2669391930103302, "learning_rate": 2.5207394877521285e-06, "loss": 0.2834, "step": 14617 }, { "epoch": 2.0960711213077143, "grad_norm": 0.26858314871788025, "learning_rate": 2.520015059732086e-06, "loss": 0.3022, "step": 14618 }, { "epoch": 2.0962145110410093, "grad_norm": 0.28459101915359497, "learning_rate": 2.51929070075101e-06, "loss": 0.2849, "step": 14619 }, { "epoch": 2.0963579007743047, "grad_norm": 0.29578620195388794, "learning_rate": 2.5185664108290642e-06, "loss": 0.2703, "step": 14620 }, { "epoch": 2.0965012905075997, "grad_norm": 0.29022783041000366, "learning_rate": 2.5178421899864125e-06, "loss": 0.2759, "step": 14621 }, { "epoch": 2.0966446802408947, "grad_norm": 0.2583557963371277, "learning_rate": 2.5171180382432175e-06, "loss": 0.2997, "step": 14622 }, { "epoch": 2.0967880699741896, "grad_norm": 0.276818186044693, "learning_rate": 2.5163939556196355e-06, "loss": 0.2723, "step": 14623 }, { "epoch": 2.096931459707485, "grad_norm": 0.27780336141586304, "learning_rate": 2.515669942135825e-06, "loss": 0.2828, "step": 14624 }, { "epoch": 2.09707484944078, "grad_norm": 0.2827712297439575, "learning_rate": 2.514945997811942e-06, "loss": 0.2875, "step": 14625 }, { "epoch": 2.097218239174075, "grad_norm": 0.27832192182540894, "learning_rate": 2.514222122668139e-06, "loss": 0.2846, "step": 14626 }, { "epoch": 2.0973616289073704, "grad_norm": 0.2654266953468323, "learning_rate": 2.5134983167245706e-06, "loss": 0.2712, "step": 14627 }, { "epoch": 2.0975050186406654, "grad_norm": 0.25453630089759827, "learning_rate": 2.5127745800013824e-06, "loss": 0.2995, "step": 14628 }, { "epoch": 2.0976484083739604, "grad_norm": 0.27056747674942017, "learning_rate": 2.5120509125187233e-06, "loss": 0.2697, "step": 14629 }, { "epoch": 2.0977917981072554, "grad_norm": 0.26525017619132996, "learning_rate": 2.51132731429674e-06, "loss": 0.276, "step": 14630 }, { "epoch": 2.0979351878405508, "grad_norm": 0.26228758692741394, "learning_rate": 2.5106037853555766e-06, "loss": 0.2694, "step": 14631 }, { "epoch": 2.0980785775738457, "grad_norm": 0.25880107283592224, "learning_rate": 2.509880325715373e-06, "loss": 0.2793, "step": 14632 }, { "epoch": 2.0982219673071407, "grad_norm": 0.27955901622772217, "learning_rate": 2.509156935396272e-06, "loss": 0.2753, "step": 14633 }, { "epoch": 2.0983653570404357, "grad_norm": 0.2975037097930908, "learning_rate": 2.508433614418409e-06, "loss": 0.2793, "step": 14634 }, { "epoch": 2.098508746773731, "grad_norm": 0.2686154842376709, "learning_rate": 2.5077103628019223e-06, "loss": 0.2807, "step": 14635 }, { "epoch": 2.098652136507026, "grad_norm": 0.2892096936702728, "learning_rate": 2.506987180566945e-06, "loss": 0.2829, "step": 14636 }, { "epoch": 2.098795526240321, "grad_norm": 0.2720798850059509, "learning_rate": 2.506264067733609e-06, "loss": 0.2821, "step": 14637 }, { "epoch": 2.0989389159736165, "grad_norm": 0.25914809107780457, "learning_rate": 2.505541024322048e-06, "loss": 0.2542, "step": 14638 }, { "epoch": 2.0990823057069115, "grad_norm": 0.27924129366874695, "learning_rate": 2.5048180503523854e-06, "loss": 0.2993, "step": 14639 }, { "epoch": 2.0992256954402064, "grad_norm": 0.27436408400535583, "learning_rate": 2.5040951458447506e-06, "loss": 0.2744, "step": 14640 }, { "epoch": 2.0993690851735014, "grad_norm": 0.27077025175094604, "learning_rate": 2.503372310819267e-06, "loss": 0.2577, "step": 14641 }, { "epoch": 2.099512474906797, "grad_norm": 0.2808566987514496, "learning_rate": 2.5026495452960575e-06, "loss": 0.2947, "step": 14642 }, { "epoch": 2.099655864640092, "grad_norm": 0.2763063907623291, "learning_rate": 2.501926849295245e-06, "loss": 0.2915, "step": 14643 }, { "epoch": 2.0997992543733868, "grad_norm": 0.28970208764076233, "learning_rate": 2.501204222836945e-06, "loss": 0.2819, "step": 14644 }, { "epoch": 2.099942644106682, "grad_norm": 0.28248587250709534, "learning_rate": 2.5004816659412754e-06, "loss": 0.277, "step": 14645 }, { "epoch": 2.100086033839977, "grad_norm": 0.2560103237628937, "learning_rate": 2.4997591786283508e-06, "loss": 0.2959, "step": 14646 }, { "epoch": 2.100229423573272, "grad_norm": 0.2853938937187195, "learning_rate": 2.4990367609182846e-06, "loss": 0.2751, "step": 14647 }, { "epoch": 2.100372813306567, "grad_norm": 0.28160881996154785, "learning_rate": 2.498314412831188e-06, "loss": 0.2885, "step": 14648 }, { "epoch": 2.1005162030398625, "grad_norm": 0.26877719163894653, "learning_rate": 2.49759213438717e-06, "loss": 0.2818, "step": 14649 }, { "epoch": 2.1006595927731575, "grad_norm": 0.2767764627933502, "learning_rate": 2.4968699256063373e-06, "loss": 0.2889, "step": 14650 }, { "epoch": 2.1008029825064525, "grad_norm": 0.24919231235980988, "learning_rate": 2.4961477865087954e-06, "loss": 0.2851, "step": 14651 }, { "epoch": 2.1009463722397475, "grad_norm": 0.2806569039821625, "learning_rate": 2.4954257171146475e-06, "loss": 0.2861, "step": 14652 }, { "epoch": 2.101089761973043, "grad_norm": 0.27882641553878784, "learning_rate": 2.494703717443994e-06, "loss": 0.2796, "step": 14653 }, { "epoch": 2.101233151706338, "grad_norm": 0.2659989595413208, "learning_rate": 2.493981787516938e-06, "loss": 0.2739, "step": 14654 }, { "epoch": 2.101376541439633, "grad_norm": 0.2717256546020508, "learning_rate": 2.4932599273535722e-06, "loss": 0.2738, "step": 14655 }, { "epoch": 2.101519931172928, "grad_norm": 0.2994314134120941, "learning_rate": 2.4925381369739936e-06, "loss": 0.2819, "step": 14656 }, { "epoch": 2.101663320906223, "grad_norm": 0.2944682836532593, "learning_rate": 2.4918164163982968e-06, "loss": 0.3015, "step": 14657 }, { "epoch": 2.101806710639518, "grad_norm": 0.27472400665283203, "learning_rate": 2.4910947656465717e-06, "loss": 0.2941, "step": 14658 }, { "epoch": 2.101950100372813, "grad_norm": 0.26198533177375793, "learning_rate": 2.490373184738909e-06, "loss": 0.2964, "step": 14659 }, { "epoch": 2.1020934901061086, "grad_norm": 0.28219738602638245, "learning_rate": 2.4896516736953984e-06, "loss": 0.2902, "step": 14660 }, { "epoch": 2.1022368798394035, "grad_norm": 0.2639254033565521, "learning_rate": 2.488930232536122e-06, "loss": 0.2773, "step": 14661 }, { "epoch": 2.1023802695726985, "grad_norm": 0.29061856865882874, "learning_rate": 2.488208861281165e-06, "loss": 0.2674, "step": 14662 }, { "epoch": 2.1025236593059935, "grad_norm": 0.2880419194698334, "learning_rate": 2.487487559950609e-06, "loss": 0.2692, "step": 14663 }, { "epoch": 2.102667049039289, "grad_norm": 0.26972874999046326, "learning_rate": 2.486766328564535e-06, "loss": 0.2721, "step": 14664 }, { "epoch": 2.102810438772584, "grad_norm": 0.28780001401901245, "learning_rate": 2.486045167143022e-06, "loss": 0.2733, "step": 14665 }, { "epoch": 2.102953828505879, "grad_norm": 0.2785981297492981, "learning_rate": 2.4853240757061404e-06, "loss": 0.2808, "step": 14666 }, { "epoch": 2.1030972182391743, "grad_norm": 0.26121315360069275, "learning_rate": 2.48460305427397e-06, "loss": 0.2667, "step": 14667 }, { "epoch": 2.1032406079724693, "grad_norm": 0.2749711275100708, "learning_rate": 2.4838821028665818e-06, "loss": 0.3131, "step": 14668 }, { "epoch": 2.1033839977057642, "grad_norm": 0.26899638772010803, "learning_rate": 2.4831612215040448e-06, "loss": 0.2892, "step": 14669 }, { "epoch": 2.103527387439059, "grad_norm": 0.27722665667533875, "learning_rate": 2.48244041020643e-06, "loss": 0.2737, "step": 14670 }, { "epoch": 2.1036707771723546, "grad_norm": 0.2970122694969177, "learning_rate": 2.4817196689937993e-06, "loss": 0.2748, "step": 14671 }, { "epoch": 2.1038141669056496, "grad_norm": 0.2783261239528656, "learning_rate": 2.4809989978862193e-06, "loss": 0.2608, "step": 14672 }, { "epoch": 2.1039575566389446, "grad_norm": 0.27771198749542236, "learning_rate": 2.4802783969037517e-06, "loss": 0.2755, "step": 14673 }, { "epoch": 2.1041009463722395, "grad_norm": 0.276395708322525, "learning_rate": 2.4795578660664584e-06, "loss": 0.275, "step": 14674 }, { "epoch": 2.104244336105535, "grad_norm": 0.27310341596603394, "learning_rate": 2.4788374053943963e-06, "loss": 0.2963, "step": 14675 }, { "epoch": 2.10438772583883, "grad_norm": 0.26147356629371643, "learning_rate": 2.478117014907625e-06, "loss": 0.2843, "step": 14676 }, { "epoch": 2.104531115572125, "grad_norm": 0.26890289783477783, "learning_rate": 2.477396694626194e-06, "loss": 0.2751, "step": 14677 }, { "epoch": 2.1046745053054203, "grad_norm": 0.2741742432117462, "learning_rate": 2.476676444570159e-06, "loss": 0.2858, "step": 14678 }, { "epoch": 2.1048178950387153, "grad_norm": 0.2600882053375244, "learning_rate": 2.4759562647595703e-06, "loss": 0.2576, "step": 14679 }, { "epoch": 2.1049612847720103, "grad_norm": 0.2731885313987732, "learning_rate": 2.475236155214476e-06, "loss": 0.2716, "step": 14680 }, { "epoch": 2.1051046745053053, "grad_norm": 0.29120731353759766, "learning_rate": 2.4745161159549258e-06, "loss": 0.2975, "step": 14681 }, { "epoch": 2.1052480642386007, "grad_norm": 0.2982240617275238, "learning_rate": 2.4737961470009597e-06, "loss": 0.2882, "step": 14682 }, { "epoch": 2.1053914539718956, "grad_norm": 0.2679056227207184, "learning_rate": 2.473076248372623e-06, "loss": 0.2785, "step": 14683 }, { "epoch": 2.1055348437051906, "grad_norm": 0.2684513032436371, "learning_rate": 2.4723564200899563e-06, "loss": 0.2705, "step": 14684 }, { "epoch": 2.1056782334384856, "grad_norm": 0.27005451917648315, "learning_rate": 2.471636662172999e-06, "loss": 0.2716, "step": 14685 }, { "epoch": 2.105821623171781, "grad_norm": 0.2579443156719208, "learning_rate": 2.470916974641787e-06, "loss": 0.2891, "step": 14686 }, { "epoch": 2.105965012905076, "grad_norm": 0.28489619493484497, "learning_rate": 2.470197357516356e-06, "loss": 0.2748, "step": 14687 }, { "epoch": 2.106108402638371, "grad_norm": 0.2686613202095032, "learning_rate": 2.4694778108167392e-06, "loss": 0.2844, "step": 14688 }, { "epoch": 2.1062517923716664, "grad_norm": 0.28526121377944946, "learning_rate": 2.468758334562968e-06, "loss": 0.3007, "step": 14689 }, { "epoch": 2.1063951821049613, "grad_norm": 0.27238962054252625, "learning_rate": 2.4680389287750704e-06, "loss": 0.2831, "step": 14690 }, { "epoch": 2.1065385718382563, "grad_norm": 0.2674783170223236, "learning_rate": 2.467319593473075e-06, "loss": 0.2567, "step": 14691 }, { "epoch": 2.1066819615715513, "grad_norm": 0.2889416217803955, "learning_rate": 2.466600328677007e-06, "loss": 0.2632, "step": 14692 }, { "epoch": 2.1068253513048467, "grad_norm": 0.2865450978279114, "learning_rate": 2.4658811344068875e-06, "loss": 0.278, "step": 14693 }, { "epoch": 2.1069687410381417, "grad_norm": 0.26085346937179565, "learning_rate": 2.4651620106827384e-06, "loss": 0.2846, "step": 14694 }, { "epoch": 2.1071121307714367, "grad_norm": 0.27611130475997925, "learning_rate": 2.4644429575245797e-06, "loss": 0.3003, "step": 14695 }, { "epoch": 2.107255520504732, "grad_norm": 0.2960037887096405, "learning_rate": 2.463723974952429e-06, "loss": 0.2789, "step": 14696 }, { "epoch": 2.107398910238027, "grad_norm": 0.26519736647605896, "learning_rate": 2.463005062986303e-06, "loss": 0.2872, "step": 14697 }, { "epoch": 2.107542299971322, "grad_norm": 0.284824401140213, "learning_rate": 2.4622862216462116e-06, "loss": 0.2677, "step": 14698 }, { "epoch": 2.107685689704617, "grad_norm": 0.2658900320529938, "learning_rate": 2.461567450952168e-06, "loss": 0.2889, "step": 14699 }, { "epoch": 2.1078290794379124, "grad_norm": 0.26869258284568787, "learning_rate": 2.460848750924182e-06, "loss": 0.281, "step": 14700 }, { "epoch": 2.1079724691712074, "grad_norm": 0.2722095549106598, "learning_rate": 2.4601301215822605e-06, "loss": 0.2869, "step": 14701 }, { "epoch": 2.1081158589045024, "grad_norm": 0.271731972694397, "learning_rate": 2.4594115629464093e-06, "loss": 0.2939, "step": 14702 }, { "epoch": 2.1082592486377973, "grad_norm": 0.28684672713279724, "learning_rate": 2.4586930750366323e-06, "loss": 0.2697, "step": 14703 }, { "epoch": 2.1084026383710928, "grad_norm": 0.27736949920654297, "learning_rate": 2.457974657872931e-06, "loss": 0.2777, "step": 14704 }, { "epoch": 2.1085460281043877, "grad_norm": 0.2823498547077179, "learning_rate": 2.4572563114753047e-06, "loss": 0.2689, "step": 14705 }, { "epoch": 2.1086894178376827, "grad_norm": 0.27715370059013367, "learning_rate": 2.456538035863751e-06, "loss": 0.2848, "step": 14706 }, { "epoch": 2.108832807570978, "grad_norm": 0.29056060314178467, "learning_rate": 2.455819831058265e-06, "loss": 0.2722, "step": 14707 }, { "epoch": 2.108976197304273, "grad_norm": 0.2632509171962738, "learning_rate": 2.455101697078845e-06, "loss": 0.2814, "step": 14708 }, { "epoch": 2.109119587037568, "grad_norm": 0.2659444510936737, "learning_rate": 2.454383633945476e-06, "loss": 0.2951, "step": 14709 }, { "epoch": 2.109262976770863, "grad_norm": 0.2843572199344635, "learning_rate": 2.4536656416781508e-06, "loss": 0.2904, "step": 14710 }, { "epoch": 2.1094063665041585, "grad_norm": 0.2580637037754059, "learning_rate": 2.4529477202968572e-06, "loss": 0.2746, "step": 14711 }, { "epoch": 2.1095497562374534, "grad_norm": 0.2557590901851654, "learning_rate": 2.4522298698215807e-06, "loss": 0.2988, "step": 14712 }, { "epoch": 2.1096931459707484, "grad_norm": 0.27867481112480164, "learning_rate": 2.4515120902723056e-06, "loss": 0.2924, "step": 14713 }, { "epoch": 2.1098365357040434, "grad_norm": 0.2663123905658722, "learning_rate": 2.4507943816690156e-06, "loss": 0.2758, "step": 14714 }, { "epoch": 2.109979925437339, "grad_norm": 0.2713906764984131, "learning_rate": 2.450076744031686e-06, "loss": 0.271, "step": 14715 }, { "epoch": 2.110123315170634, "grad_norm": 0.2876513600349426, "learning_rate": 2.449359177380299e-06, "loss": 0.2668, "step": 14716 }, { "epoch": 2.1102667049039288, "grad_norm": 0.2921755015850067, "learning_rate": 2.4486416817348274e-06, "loss": 0.2849, "step": 14717 }, { "epoch": 2.110410094637224, "grad_norm": 0.27571117877960205, "learning_rate": 2.4479242571152474e-06, "loss": 0.2932, "step": 14718 }, { "epoch": 2.110553484370519, "grad_norm": 0.2806794047355652, "learning_rate": 2.447206903541532e-06, "loss": 0.276, "step": 14719 }, { "epoch": 2.110696874103814, "grad_norm": 0.2654176950454712, "learning_rate": 2.446489621033647e-06, "loss": 0.2832, "step": 14720 }, { "epoch": 2.110840263837109, "grad_norm": 0.27875834703445435, "learning_rate": 2.445772409611564e-06, "loss": 0.2937, "step": 14721 }, { "epoch": 2.1109836535704045, "grad_norm": 0.2913077771663666, "learning_rate": 2.445055269295247e-06, "loss": 0.2804, "step": 14722 }, { "epoch": 2.1111270433036995, "grad_norm": 0.2821854054927826, "learning_rate": 2.4443382001046595e-06, "loss": 0.293, "step": 14723 }, { "epoch": 2.1112704330369945, "grad_norm": 0.2809799015522003, "learning_rate": 2.443621202059769e-06, "loss": 0.2733, "step": 14724 }, { "epoch": 2.1114138227702894, "grad_norm": 0.27612680196762085, "learning_rate": 2.4429042751805297e-06, "loss": 0.271, "step": 14725 }, { "epoch": 2.111557212503585, "grad_norm": 0.26116809248924255, "learning_rate": 2.4421874194869015e-06, "loss": 0.2942, "step": 14726 }, { "epoch": 2.11170060223688, "grad_norm": 0.28266045451164246, "learning_rate": 2.4414706349988414e-06, "loss": 0.2878, "step": 14727 }, { "epoch": 2.111843991970175, "grad_norm": 0.26846373081207275, "learning_rate": 2.4407539217363037e-06, "loss": 0.3004, "step": 14728 }, { "epoch": 2.1119873817034702, "grad_norm": 0.27157676219940186, "learning_rate": 2.440037279719239e-06, "loss": 0.2831, "step": 14729 }, { "epoch": 2.112130771436765, "grad_norm": 0.28611695766448975, "learning_rate": 2.4393207089676007e-06, "loss": 0.2785, "step": 14730 }, { "epoch": 2.11227416117006, "grad_norm": 0.29164692759513855, "learning_rate": 2.4386042095013327e-06, "loss": 0.2834, "step": 14731 }, { "epoch": 2.112417550903355, "grad_norm": 0.27095067501068115, "learning_rate": 2.4378877813403836e-06, "loss": 0.2719, "step": 14732 }, { "epoch": 2.1125609406366506, "grad_norm": 0.27719181776046753, "learning_rate": 2.437171424504697e-06, "loss": 0.2794, "step": 14733 }, { "epoch": 2.1127043303699455, "grad_norm": 0.2844369113445282, "learning_rate": 2.436455139014216e-06, "loss": 0.2622, "step": 14734 }, { "epoch": 2.1128477201032405, "grad_norm": 0.27906835079193115, "learning_rate": 2.4357389248888823e-06, "loss": 0.2791, "step": 14735 }, { "epoch": 2.1129911098365355, "grad_norm": 0.2739616930484772, "learning_rate": 2.4350227821486304e-06, "loss": 0.2963, "step": 14736 }, { "epoch": 2.113134499569831, "grad_norm": 0.2935791611671448, "learning_rate": 2.4343067108133987e-06, "loss": 0.2878, "step": 14737 }, { "epoch": 2.113277889303126, "grad_norm": 0.28637275099754333, "learning_rate": 2.4335907109031216e-06, "loss": 0.2608, "step": 14738 }, { "epoch": 2.113421279036421, "grad_norm": 0.2793984115123749, "learning_rate": 2.4328747824377308e-06, "loss": 0.2973, "step": 14739 }, { "epoch": 2.1135646687697163, "grad_norm": 0.2872292399406433, "learning_rate": 2.432158925437157e-06, "loss": 0.2886, "step": 14740 }, { "epoch": 2.1137080585030112, "grad_norm": 0.29470306634902954, "learning_rate": 2.4314431399213288e-06, "loss": 0.2802, "step": 14741 }, { "epoch": 2.113851448236306, "grad_norm": 0.28811171650886536, "learning_rate": 2.430727425910172e-06, "loss": 0.2789, "step": 14742 }, { "epoch": 2.113994837969601, "grad_norm": 0.2822772264480591, "learning_rate": 2.430011783423612e-06, "loss": 0.2903, "step": 14743 }, { "epoch": 2.1141382277028966, "grad_norm": 0.2908514440059662, "learning_rate": 2.4292962124815704e-06, "loss": 0.2802, "step": 14744 }, { "epoch": 2.1142816174361916, "grad_norm": 0.28043973445892334, "learning_rate": 2.428580713103968e-06, "loss": 0.2618, "step": 14745 }, { "epoch": 2.1144250071694866, "grad_norm": 0.2747686207294464, "learning_rate": 2.427865285310724e-06, "loss": 0.2919, "step": 14746 }, { "epoch": 2.114568396902782, "grad_norm": 0.27049288153648376, "learning_rate": 2.4271499291217527e-06, "loss": 0.2868, "step": 14747 }, { "epoch": 2.114711786636077, "grad_norm": 0.2614326477050781, "learning_rate": 2.4264346445569695e-06, "loss": 0.2769, "step": 14748 }, { "epoch": 2.114855176369372, "grad_norm": 0.26794394850730896, "learning_rate": 2.425719431636287e-06, "loss": 0.2962, "step": 14749 }, { "epoch": 2.114998566102667, "grad_norm": 0.26500797271728516, "learning_rate": 2.425004290379615e-06, "loss": 0.2879, "step": 14750 }, { "epoch": 2.1151419558359623, "grad_norm": 0.27015218138694763, "learning_rate": 2.4242892208068646e-06, "loss": 0.2913, "step": 14751 }, { "epoch": 2.1152853455692573, "grad_norm": 0.2765408754348755, "learning_rate": 2.4235742229379385e-06, "loss": 0.2981, "step": 14752 }, { "epoch": 2.1154287353025523, "grad_norm": 0.2759801149368286, "learning_rate": 2.4228592967927422e-06, "loss": 0.262, "step": 14753 }, { "epoch": 2.1155721250358472, "grad_norm": 0.27828094363212585, "learning_rate": 2.4221444423911794e-06, "loss": 0.2833, "step": 14754 }, { "epoch": 2.1157155147691427, "grad_norm": 0.26422354578971863, "learning_rate": 2.4214296597531494e-06, "loss": 0.2759, "step": 14755 }, { "epoch": 2.1158589045024376, "grad_norm": 0.27772340178489685, "learning_rate": 2.420714948898551e-06, "loss": 0.2789, "step": 14756 }, { "epoch": 2.1160022942357326, "grad_norm": 0.26871049404144287, "learning_rate": 2.4200003098472806e-06, "loss": 0.2618, "step": 14757 }, { "epoch": 2.116145683969028, "grad_norm": 0.26283055543899536, "learning_rate": 2.4192857426192336e-06, "loss": 0.284, "step": 14758 }, { "epoch": 2.116289073702323, "grad_norm": 0.27046334743499756, "learning_rate": 2.4185712472343008e-06, "loss": 0.2835, "step": 14759 }, { "epoch": 2.116432463435618, "grad_norm": 0.2978654205799103, "learning_rate": 2.417856823712374e-06, "loss": 0.2972, "step": 14760 }, { "epoch": 2.116575853168913, "grad_norm": 0.2713073790073395, "learning_rate": 2.4171424720733404e-06, "loss": 0.2798, "step": 14761 }, { "epoch": 2.1167192429022084, "grad_norm": 0.30412039160728455, "learning_rate": 2.4164281923370904e-06, "loss": 0.2907, "step": 14762 }, { "epoch": 2.1168626326355033, "grad_norm": 0.30495813488960266, "learning_rate": 2.4157139845235023e-06, "loss": 0.2794, "step": 14763 }, { "epoch": 2.1170060223687983, "grad_norm": 0.2658679783344269, "learning_rate": 2.414999848652462e-06, "loss": 0.2879, "step": 14764 }, { "epoch": 2.1171494121020933, "grad_norm": 0.2682899534702301, "learning_rate": 2.414285784743849e-06, "loss": 0.2911, "step": 14765 }, { "epoch": 2.1172928018353887, "grad_norm": 0.2831772267818451, "learning_rate": 2.4135717928175425e-06, "loss": 0.2731, "step": 14766 }, { "epoch": 2.1174361915686837, "grad_norm": 0.26429569721221924, "learning_rate": 2.412857872893418e-06, "loss": 0.2958, "step": 14767 }, { "epoch": 2.1175795813019787, "grad_norm": 0.2708892226219177, "learning_rate": 2.4121440249913536e-06, "loss": 0.2627, "step": 14768 }, { "epoch": 2.117722971035274, "grad_norm": 0.3052862584590912, "learning_rate": 2.411430249131216e-06, "loss": 0.2734, "step": 14769 }, { "epoch": 2.117866360768569, "grad_norm": 0.281310498714447, "learning_rate": 2.4107165453328783e-06, "loss": 0.2646, "step": 14770 }, { "epoch": 2.118009750501864, "grad_norm": 0.27346816658973694, "learning_rate": 2.410002913616209e-06, "loss": 0.2842, "step": 14771 }, { "epoch": 2.118153140235159, "grad_norm": 0.2697531282901764, "learning_rate": 2.409289354001075e-06, "loss": 0.2763, "step": 14772 }, { "epoch": 2.1182965299684544, "grad_norm": 0.27355772256851196, "learning_rate": 2.408575866507341e-06, "loss": 0.2873, "step": 14773 }, { "epoch": 2.1184399197017494, "grad_norm": 0.2729785144329071, "learning_rate": 2.407862451154867e-06, "loss": 0.2861, "step": 14774 }, { "epoch": 2.1185833094350444, "grad_norm": 0.26621097326278687, "learning_rate": 2.407149107963515e-06, "loss": 0.2839, "step": 14775 }, { "epoch": 2.1187266991683393, "grad_norm": 0.27419328689575195, "learning_rate": 2.4064358369531437e-06, "loss": 0.2823, "step": 14776 }, { "epoch": 2.1188700889016348, "grad_norm": 0.27185261249542236, "learning_rate": 2.4057226381436084e-06, "loss": 0.2901, "step": 14777 }, { "epoch": 2.1190134786349297, "grad_norm": 0.2709982991218567, "learning_rate": 2.4050095115547644e-06, "loss": 0.2714, "step": 14778 }, { "epoch": 2.1191568683682247, "grad_norm": 0.2648300528526306, "learning_rate": 2.4042964572064636e-06, "loss": 0.2929, "step": 14779 }, { "epoch": 2.11930025810152, "grad_norm": 0.27807819843292236, "learning_rate": 2.4035834751185567e-06, "loss": 0.2682, "step": 14780 }, { "epoch": 2.119443647834815, "grad_norm": 0.2795445919036865, "learning_rate": 2.4028705653108918e-06, "loss": 0.2985, "step": 14781 }, { "epoch": 2.11958703756811, "grad_norm": 0.287754088640213, "learning_rate": 2.402157727803315e-06, "loss": 0.2782, "step": 14782 }, { "epoch": 2.119730427301405, "grad_norm": 0.27755221724510193, "learning_rate": 2.4014449626156712e-06, "loss": 0.2971, "step": 14783 }, { "epoch": 2.1198738170347005, "grad_norm": 0.2801445722579956, "learning_rate": 2.4007322697678042e-06, "loss": 0.271, "step": 14784 }, { "epoch": 2.1200172067679954, "grad_norm": 0.2890595495700836, "learning_rate": 2.4000196492795504e-06, "loss": 0.2804, "step": 14785 }, { "epoch": 2.1201605965012904, "grad_norm": 0.297775000333786, "learning_rate": 2.39930710117075e-06, "loss": 0.2807, "step": 14786 }, { "epoch": 2.1203039862345854, "grad_norm": 0.2844022810459137, "learning_rate": 2.398594625461239e-06, "loss": 0.2941, "step": 14787 }, { "epoch": 2.120447375967881, "grad_norm": 0.2588433623313904, "learning_rate": 2.397882222170852e-06, "loss": 0.2635, "step": 14788 }, { "epoch": 2.1205907657011758, "grad_norm": 0.2845669090747833, "learning_rate": 2.397169891319423e-06, "loss": 0.2733, "step": 14789 }, { "epoch": 2.1207341554344707, "grad_norm": 0.2917141616344452, "learning_rate": 2.3964576329267784e-06, "loss": 0.2945, "step": 14790 }, { "epoch": 2.120877545167766, "grad_norm": 0.2792814373970032, "learning_rate": 2.395745447012749e-06, "loss": 0.2778, "step": 14791 }, { "epoch": 2.121020934901061, "grad_norm": 0.2694900929927826, "learning_rate": 2.39503333359716e-06, "loss": 0.2694, "step": 14792 }, { "epoch": 2.121164324634356, "grad_norm": 0.27665889263153076, "learning_rate": 2.394321292699835e-06, "loss": 0.2887, "step": 14793 }, { "epoch": 2.121307714367651, "grad_norm": 0.25923946499824524, "learning_rate": 2.3936093243405977e-06, "loss": 0.2918, "step": 14794 }, { "epoch": 2.1214511041009465, "grad_norm": 0.2827438712120056, "learning_rate": 2.392897428539267e-06, "loss": 0.2762, "step": 14795 }, { "epoch": 2.1215944938342415, "grad_norm": 0.30731430649757385, "learning_rate": 2.3921856053156616e-06, "loss": 0.3081, "step": 14796 }, { "epoch": 2.1217378835675365, "grad_norm": 0.2658998668193817, "learning_rate": 2.3914738546895976e-06, "loss": 0.2882, "step": 14797 }, { "epoch": 2.121881273300832, "grad_norm": 0.2865254580974579, "learning_rate": 2.3907621766808888e-06, "loss": 0.2827, "step": 14798 }, { "epoch": 2.122024663034127, "grad_norm": 0.2679305374622345, "learning_rate": 2.3900505713093468e-06, "loss": 0.2889, "step": 14799 }, { "epoch": 2.122168052767422, "grad_norm": 0.27533701062202454, "learning_rate": 2.389339038594784e-06, "loss": 0.2803, "step": 14800 }, { "epoch": 2.122311442500717, "grad_norm": 0.28402698040008545, "learning_rate": 2.3886275785570052e-06, "loss": 0.2977, "step": 14801 }, { "epoch": 2.122454832234012, "grad_norm": 0.27887973189353943, "learning_rate": 2.387916191215817e-06, "loss": 0.2618, "step": 14802 }, { "epoch": 2.122598221967307, "grad_norm": 0.2657504379749298, "learning_rate": 2.3872048765910237e-06, "loss": 0.2651, "step": 14803 }, { "epoch": 2.122741611700602, "grad_norm": 0.27481210231781006, "learning_rate": 2.3864936347024274e-06, "loss": 0.2814, "step": 14804 }, { "epoch": 2.122885001433897, "grad_norm": 0.2916565537452698, "learning_rate": 2.3857824655698286e-06, "loss": 0.271, "step": 14805 }, { "epoch": 2.1230283911671926, "grad_norm": 0.27245816588401794, "learning_rate": 2.3850713692130263e-06, "loss": 0.2957, "step": 14806 }, { "epoch": 2.1231717809004875, "grad_norm": 0.28346362709999084, "learning_rate": 2.3843603456518122e-06, "loss": 0.2895, "step": 14807 }, { "epoch": 2.1233151706337825, "grad_norm": 0.2727716863155365, "learning_rate": 2.3836493949059832e-06, "loss": 0.2765, "step": 14808 }, { "epoch": 2.123458560367078, "grad_norm": 0.28184062242507935, "learning_rate": 2.3829385169953294e-06, "loss": 0.2793, "step": 14809 }, { "epoch": 2.123601950100373, "grad_norm": 0.27662259340286255, "learning_rate": 2.382227711939642e-06, "loss": 0.282, "step": 14810 }, { "epoch": 2.123745339833668, "grad_norm": 0.3089098036289215, "learning_rate": 2.38151697975871e-06, "loss": 0.2824, "step": 14811 }, { "epoch": 2.123888729566963, "grad_norm": 0.27832695841789246, "learning_rate": 2.3808063204723142e-06, "loss": 0.2665, "step": 14812 }, { "epoch": 2.1240321193002583, "grad_norm": 0.2826739549636841, "learning_rate": 2.3800957341002424e-06, "loss": 0.2695, "step": 14813 }, { "epoch": 2.1241755090335532, "grad_norm": 0.2723598778247833, "learning_rate": 2.3793852206622754e-06, "loss": 0.2857, "step": 14814 }, { "epoch": 2.124318898766848, "grad_norm": 0.2652181088924408, "learning_rate": 2.378674780178193e-06, "loss": 0.2804, "step": 14815 }, { "epoch": 2.124462288500143, "grad_norm": 0.40826982259750366, "learning_rate": 2.3779644126677737e-06, "loss": 0.2879, "step": 14816 }, { "epoch": 2.1246056782334386, "grad_norm": 0.28615137934684753, "learning_rate": 2.3772541181507895e-06, "loss": 0.3021, "step": 14817 }, { "epoch": 2.1247490679667336, "grad_norm": 0.27346673607826233, "learning_rate": 2.376543896647017e-06, "loss": 0.27, "step": 14818 }, { "epoch": 2.1248924577000285, "grad_norm": 0.27857983112335205, "learning_rate": 2.3758337481762267e-06, "loss": 0.2766, "step": 14819 }, { "epoch": 2.125035847433324, "grad_norm": 0.27421295642852783, "learning_rate": 2.375123672758188e-06, "loss": 0.283, "step": 14820 }, { "epoch": 2.125179237166619, "grad_norm": 0.2869506776332855, "learning_rate": 2.374413670412668e-06, "loss": 0.2929, "step": 14821 }, { "epoch": 2.125322626899914, "grad_norm": 0.2861037850379944, "learning_rate": 2.373703741159434e-06, "loss": 0.3071, "step": 14822 }, { "epoch": 2.125466016633209, "grad_norm": 0.4115258455276489, "learning_rate": 2.372993885018247e-06, "loss": 0.2848, "step": 14823 }, { "epoch": 2.1256094063665043, "grad_norm": 0.26139166951179504, "learning_rate": 2.3722841020088687e-06, "loss": 0.2784, "step": 14824 }, { "epoch": 2.1257527960997993, "grad_norm": 0.2720594108104706, "learning_rate": 2.371574392151059e-06, "loss": 0.2885, "step": 14825 }, { "epoch": 2.1258961858330943, "grad_norm": 0.2795441746711731, "learning_rate": 2.3708647554645753e-06, "loss": 0.2673, "step": 14826 }, { "epoch": 2.1260395755663897, "grad_norm": 0.28892672061920166, "learning_rate": 2.3701551919691734e-06, "loss": 0.2933, "step": 14827 }, { "epoch": 2.1261829652996846, "grad_norm": 0.28858083486557007, "learning_rate": 2.3694457016846044e-06, "loss": 0.2809, "step": 14828 }, { "epoch": 2.1263263550329796, "grad_norm": 0.2590523064136505, "learning_rate": 2.36873628463062e-06, "loss": 0.2893, "step": 14829 }, { "epoch": 2.1264697447662746, "grad_norm": 0.2794933319091797, "learning_rate": 2.36802694082697e-06, "loss": 0.2834, "step": 14830 }, { "epoch": 2.12661313449957, "grad_norm": 0.2820284068584442, "learning_rate": 2.3673176702934013e-06, "loss": 0.3119, "step": 14831 }, { "epoch": 2.126756524232865, "grad_norm": 0.28475379943847656, "learning_rate": 2.3666084730496586e-06, "loss": 0.2856, "step": 14832 }, { "epoch": 2.12689991396616, "grad_norm": 0.27902233600616455, "learning_rate": 2.365899349115485e-06, "loss": 0.294, "step": 14833 }, { "epoch": 2.127043303699455, "grad_norm": 0.29245519638061523, "learning_rate": 2.3651902985106224e-06, "loss": 0.3096, "step": 14834 }, { "epoch": 2.1271866934327504, "grad_norm": 0.27645283937454224, "learning_rate": 2.3644813212548077e-06, "loss": 0.2923, "step": 14835 }, { "epoch": 2.1273300831660453, "grad_norm": 0.2744741141796112, "learning_rate": 2.3637724173677795e-06, "loss": 0.2735, "step": 14836 }, { "epoch": 2.1274734728993403, "grad_norm": 0.28561943769454956, "learning_rate": 2.363063586869272e-06, "loss": 0.2642, "step": 14837 }, { "epoch": 2.1276168626326353, "grad_norm": 0.27569466829299927, "learning_rate": 2.3623548297790196e-06, "loss": 0.2845, "step": 14838 }, { "epoch": 2.1277602523659307, "grad_norm": 0.28559744358062744, "learning_rate": 2.361646146116749e-06, "loss": 0.31, "step": 14839 }, { "epoch": 2.1279036420992257, "grad_norm": 0.3086552023887634, "learning_rate": 2.3609375359021913e-06, "loss": 0.2704, "step": 14840 }, { "epoch": 2.1280470318325206, "grad_norm": 0.2850794792175293, "learning_rate": 2.3602289991550732e-06, "loss": 0.2888, "step": 14841 }, { "epoch": 2.128190421565816, "grad_norm": 0.2859083414077759, "learning_rate": 2.359520535895119e-06, "loss": 0.2846, "step": 14842 }, { "epoch": 2.128333811299111, "grad_norm": 0.2663607895374298, "learning_rate": 2.3588121461420525e-06, "loss": 0.2817, "step": 14843 }, { "epoch": 2.128477201032406, "grad_norm": 0.26712825894355774, "learning_rate": 2.3581038299155915e-06, "loss": 0.2738, "step": 14844 }, { "epoch": 2.128620590765701, "grad_norm": 0.2863178551197052, "learning_rate": 2.3573955872354564e-06, "loss": 0.2823, "step": 14845 }, { "epoch": 2.1287639804989964, "grad_norm": 0.264274924993515, "learning_rate": 2.356687418121363e-06, "loss": 0.2776, "step": 14846 }, { "epoch": 2.1289073702322914, "grad_norm": 0.2820545732975006, "learning_rate": 2.355979322593025e-06, "loss": 0.2768, "step": 14847 }, { "epoch": 2.1290507599655863, "grad_norm": 0.27945366501808167, "learning_rate": 2.3552713006701557e-06, "loss": 0.2895, "step": 14848 }, { "epoch": 2.1291941496988818, "grad_norm": 0.2570236921310425, "learning_rate": 2.3545633523724653e-06, "loss": 0.2942, "step": 14849 }, { "epoch": 2.1293375394321767, "grad_norm": 0.2850980758666992, "learning_rate": 2.3538554777196614e-06, "loss": 0.2957, "step": 14850 }, { "epoch": 2.1294809291654717, "grad_norm": 0.2731406092643738, "learning_rate": 2.3531476767314503e-06, "loss": 0.2791, "step": 14851 }, { "epoch": 2.1296243188987667, "grad_norm": 0.25276267528533936, "learning_rate": 2.352439949427537e-06, "loss": 0.2864, "step": 14852 }, { "epoch": 2.129767708632062, "grad_norm": 0.28135940432548523, "learning_rate": 2.351732295827622e-06, "loss": 0.2912, "step": 14853 }, { "epoch": 2.129911098365357, "grad_norm": 0.2637842893600464, "learning_rate": 2.351024715951408e-06, "loss": 0.2943, "step": 14854 }, { "epoch": 2.130054488098652, "grad_norm": 0.27065983414649963, "learning_rate": 2.35031720981859e-06, "loss": 0.2615, "step": 14855 }, { "epoch": 2.130197877831947, "grad_norm": 0.2711428105831146, "learning_rate": 2.349609777448864e-06, "loss": 0.2857, "step": 14856 }, { "epoch": 2.1303412675652424, "grad_norm": 0.28359532356262207, "learning_rate": 2.3489024188619252e-06, "loss": 0.2942, "step": 14857 }, { "epoch": 2.1304846572985374, "grad_norm": 0.2852613031864166, "learning_rate": 2.348195134077465e-06, "loss": 0.2865, "step": 14858 }, { "epoch": 2.1306280470318324, "grad_norm": 0.2670895755290985, "learning_rate": 2.3474879231151727e-06, "loss": 0.27, "step": 14859 }, { "epoch": 2.130771436765128, "grad_norm": 0.26103559136390686, "learning_rate": 2.3467807859947383e-06, "loss": 0.2713, "step": 14860 }, { "epoch": 2.130914826498423, "grad_norm": 0.29431483149528503, "learning_rate": 2.3460737227358434e-06, "loss": 0.2684, "step": 14861 }, { "epoch": 2.1310582162317178, "grad_norm": 0.29283976554870605, "learning_rate": 2.3453667333581738e-06, "loss": 0.29, "step": 14862 }, { "epoch": 2.1312016059650127, "grad_norm": 0.2669470012187958, "learning_rate": 2.3446598178814106e-06, "loss": 0.2803, "step": 14863 }, { "epoch": 2.131344995698308, "grad_norm": 0.2764682471752167, "learning_rate": 2.343952976325233e-06, "loss": 0.2818, "step": 14864 }, { "epoch": 2.131488385431603, "grad_norm": 0.27341270446777344, "learning_rate": 2.3432462087093212e-06, "loss": 0.2832, "step": 14865 }, { "epoch": 2.131631775164898, "grad_norm": 0.2751372158527374, "learning_rate": 2.3425395150533465e-06, "loss": 0.2817, "step": 14866 }, { "epoch": 2.131775164898193, "grad_norm": 0.2767936587333679, "learning_rate": 2.341832895376983e-06, "loss": 0.2763, "step": 14867 }, { "epoch": 2.1319185546314885, "grad_norm": 0.30071401596069336, "learning_rate": 2.341126349699901e-06, "loss": 0.2786, "step": 14868 }, { "epoch": 2.1320619443647835, "grad_norm": 0.26223230361938477, "learning_rate": 2.3404198780417735e-06, "loss": 0.2676, "step": 14869 }, { "epoch": 2.1322053340980784, "grad_norm": 0.25928354263305664, "learning_rate": 2.339713480422267e-06, "loss": 0.2893, "step": 14870 }, { "epoch": 2.132348723831374, "grad_norm": 0.2696389853954315, "learning_rate": 2.339007156861043e-06, "loss": 0.2613, "step": 14871 }, { "epoch": 2.132492113564669, "grad_norm": 0.2742818593978882, "learning_rate": 2.3383009073777667e-06, "loss": 0.3028, "step": 14872 }, { "epoch": 2.132635503297964, "grad_norm": 0.2779621481895447, "learning_rate": 2.3375947319920983e-06, "loss": 0.2876, "step": 14873 }, { "epoch": 2.132778893031259, "grad_norm": 0.27482354640960693, "learning_rate": 2.336888630723697e-06, "loss": 0.2841, "step": 14874 }, { "epoch": 2.132922282764554, "grad_norm": 0.2915154993534088, "learning_rate": 2.3361826035922203e-06, "loss": 0.2953, "step": 14875 }, { "epoch": 2.133065672497849, "grad_norm": 0.27343568205833435, "learning_rate": 2.3354766506173232e-06, "loss": 0.2774, "step": 14876 }, { "epoch": 2.133209062231144, "grad_norm": 0.2821274995803833, "learning_rate": 2.3347707718186557e-06, "loss": 0.2941, "step": 14877 }, { "epoch": 2.1333524519644396, "grad_norm": 0.25928255915641785, "learning_rate": 2.3340649672158705e-06, "loss": 0.2641, "step": 14878 }, { "epoch": 2.1334958416977345, "grad_norm": 0.2895418107509613, "learning_rate": 2.333359236828615e-06, "loss": 0.2774, "step": 14879 }, { "epoch": 2.1336392314310295, "grad_norm": 0.2814124822616577, "learning_rate": 2.3326535806765365e-06, "loss": 0.2643, "step": 14880 }, { "epoch": 2.1337826211643245, "grad_norm": 0.2843765616416931, "learning_rate": 2.3319479987792817e-06, "loss": 0.27, "step": 14881 }, { "epoch": 2.13392601089762, "grad_norm": 0.2813150882720947, "learning_rate": 2.3312424911564875e-06, "loss": 0.2727, "step": 14882 }, { "epoch": 2.134069400630915, "grad_norm": 0.305385023355484, "learning_rate": 2.3305370578277973e-06, "loss": 0.2958, "step": 14883 }, { "epoch": 2.13421279036421, "grad_norm": 0.30660340189933777, "learning_rate": 2.3298316988128494e-06, "loss": 0.2872, "step": 14884 }, { "epoch": 2.134356180097505, "grad_norm": 0.2586281895637512, "learning_rate": 2.3291264141312793e-06, "loss": 0.2775, "step": 14885 }, { "epoch": 2.1344995698308002, "grad_norm": 0.27602094411849976, "learning_rate": 2.328421203802721e-06, "loss": 0.2758, "step": 14886 }, { "epoch": 2.134642959564095, "grad_norm": 0.2913700044155121, "learning_rate": 2.327716067846807e-06, "loss": 0.2786, "step": 14887 }, { "epoch": 2.13478634929739, "grad_norm": 0.26552072167396545, "learning_rate": 2.3270110062831667e-06, "loss": 0.2885, "step": 14888 }, { "epoch": 2.134929739030685, "grad_norm": 0.28698161244392395, "learning_rate": 2.3263060191314286e-06, "loss": 0.2788, "step": 14889 }, { "epoch": 2.1350731287639806, "grad_norm": 0.2972336411476135, "learning_rate": 2.3256011064112178e-06, "loss": 0.2637, "step": 14890 }, { "epoch": 2.1352165184972756, "grad_norm": 0.27498582005500793, "learning_rate": 2.324896268142158e-06, "loss": 0.27, "step": 14891 }, { "epoch": 2.1353599082305705, "grad_norm": 0.27940768003463745, "learning_rate": 2.3241915043438727e-06, "loss": 0.2831, "step": 14892 }, { "epoch": 2.135503297963866, "grad_norm": 0.2775631844997406, "learning_rate": 2.3234868150359786e-06, "loss": 0.2811, "step": 14893 }, { "epoch": 2.135646687697161, "grad_norm": 0.28023913502693176, "learning_rate": 2.322782200238094e-06, "loss": 0.2654, "step": 14894 }, { "epoch": 2.135790077430456, "grad_norm": 0.29316434264183044, "learning_rate": 2.3220776599698342e-06, "loss": 0.2998, "step": 14895 }, { "epoch": 2.135933467163751, "grad_norm": 0.29013916850090027, "learning_rate": 2.321373194250813e-06, "loss": 0.272, "step": 14896 }, { "epoch": 2.1360768568970463, "grad_norm": 0.264376163482666, "learning_rate": 2.320668803100644e-06, "loss": 0.2939, "step": 14897 }, { "epoch": 2.1362202466303413, "grad_norm": 0.2687801718711853, "learning_rate": 2.319964486538932e-06, "loss": 0.2823, "step": 14898 }, { "epoch": 2.1363636363636362, "grad_norm": 0.2773663401603699, "learning_rate": 2.319260244585286e-06, "loss": 0.287, "step": 14899 }, { "epoch": 2.1365070260969317, "grad_norm": 0.27065885066986084, "learning_rate": 2.3185560772593106e-06, "loss": 0.3191, "step": 14900 }, { "epoch": 2.1366504158302266, "grad_norm": 0.27606815099716187, "learning_rate": 2.317851984580609e-06, "loss": 0.3072, "step": 14901 }, { "epoch": 2.1367938055635216, "grad_norm": 0.2814736068248749, "learning_rate": 2.317147966568783e-06, "loss": 0.2779, "step": 14902 }, { "epoch": 2.1369371952968166, "grad_norm": 0.2646755874156952, "learning_rate": 2.3164440232434306e-06, "loss": 0.2796, "step": 14903 }, { "epoch": 2.137080585030112, "grad_norm": 0.2626490890979767, "learning_rate": 2.3157401546241475e-06, "loss": 0.3042, "step": 14904 }, { "epoch": 2.137223974763407, "grad_norm": 0.27823182940483093, "learning_rate": 2.3150363607305304e-06, "loss": 0.2963, "step": 14905 }, { "epoch": 2.137367364496702, "grad_norm": 0.3070492744445801, "learning_rate": 2.3143326415821706e-06, "loss": 0.2964, "step": 14906 }, { "epoch": 2.1375107542299974, "grad_norm": 0.2810390889644623, "learning_rate": 2.3136289971986583e-06, "loss": 0.287, "step": 14907 }, { "epoch": 2.1376541439632923, "grad_norm": 0.28975796699523926, "learning_rate": 2.3129254275995845e-06, "loss": 0.2641, "step": 14908 }, { "epoch": 2.1377975336965873, "grad_norm": 0.271418035030365, "learning_rate": 2.312221932804531e-06, "loss": 0.2634, "step": 14909 }, { "epoch": 2.1379409234298823, "grad_norm": 0.28031474351882935, "learning_rate": 2.311518512833085e-06, "loss": 0.2755, "step": 14910 }, { "epoch": 2.1380843131631777, "grad_norm": 0.26712319254875183, "learning_rate": 2.3108151677048276e-06, "loss": 0.2879, "step": 14911 }, { "epoch": 2.1382277028964727, "grad_norm": 0.304239958524704, "learning_rate": 2.310111897439339e-06, "loss": 0.2991, "step": 14912 }, { "epoch": 2.1383710926297677, "grad_norm": 0.26982581615448, "learning_rate": 2.309408702056197e-06, "loss": 0.2811, "step": 14913 }, { "epoch": 2.1385144823630626, "grad_norm": 0.25921908020973206, "learning_rate": 2.30870558157498e-06, "loss": 0.2836, "step": 14914 }, { "epoch": 2.138657872096358, "grad_norm": 0.2616525888442993, "learning_rate": 2.3080025360152585e-06, "loss": 0.2975, "step": 14915 }, { "epoch": 2.138801261829653, "grad_norm": 0.3054240942001343, "learning_rate": 2.3072995653966045e-06, "loss": 0.2819, "step": 14916 }, { "epoch": 2.138944651562948, "grad_norm": 0.27703070640563965, "learning_rate": 2.3065966697385888e-06, "loss": 0.2824, "step": 14917 }, { "epoch": 2.139088041296243, "grad_norm": 0.28226587176322937, "learning_rate": 2.305893849060779e-06, "loss": 0.2901, "step": 14918 }, { "epoch": 2.1392314310295384, "grad_norm": 0.26810216903686523, "learning_rate": 2.305191103382741e-06, "loss": 0.2794, "step": 14919 }, { "epoch": 2.1393748207628334, "grad_norm": 0.24546973407268524, "learning_rate": 2.3044884327240362e-06, "loss": 0.2965, "step": 14920 }, { "epoch": 2.1395182104961283, "grad_norm": 0.28199800848960876, "learning_rate": 2.3037858371042266e-06, "loss": 0.2777, "step": 14921 }, { "epoch": 2.1396616002294238, "grad_norm": 0.2807081341743469, "learning_rate": 2.3030833165428724e-06, "loss": 0.2655, "step": 14922 }, { "epoch": 2.1398049899627187, "grad_norm": 0.31159737706184387, "learning_rate": 2.302380871059529e-06, "loss": 0.2727, "step": 14923 }, { "epoch": 2.1399483796960137, "grad_norm": 0.2677486836910248, "learning_rate": 2.301678500673753e-06, "loss": 0.2888, "step": 14924 }, { "epoch": 2.1400917694293087, "grad_norm": 0.27420416474342346, "learning_rate": 2.3009762054050965e-06, "loss": 0.2816, "step": 14925 }, { "epoch": 2.140235159162604, "grad_norm": 0.27561256289482117, "learning_rate": 2.300273985273111e-06, "loss": 0.2725, "step": 14926 }, { "epoch": 2.140378548895899, "grad_norm": 0.30361849069595337, "learning_rate": 2.2995718402973445e-06, "loss": 0.2709, "step": 14927 }, { "epoch": 2.140521938629194, "grad_norm": 0.29443371295928955, "learning_rate": 2.298869770497344e-06, "loss": 0.2905, "step": 14928 }, { "epoch": 2.1406653283624895, "grad_norm": 0.3127148151397705, "learning_rate": 2.298167775892654e-06, "loss": 0.3053, "step": 14929 }, { "epoch": 2.1408087180957844, "grad_norm": 0.2671416997909546, "learning_rate": 2.2974658565028185e-06, "loss": 0.269, "step": 14930 }, { "epoch": 2.1409521078290794, "grad_norm": 0.2684893310070038, "learning_rate": 2.2967640123473746e-06, "loss": 0.2649, "step": 14931 }, { "epoch": 2.1410954975623744, "grad_norm": 0.2834298312664032, "learning_rate": 2.2960622434458623e-06, "loss": 0.2926, "step": 14932 }, { "epoch": 2.14123888729567, "grad_norm": 0.2898268401622772, "learning_rate": 2.295360549817818e-06, "loss": 0.292, "step": 14933 }, { "epoch": 2.1413822770289648, "grad_norm": 0.27910587191581726, "learning_rate": 2.294658931482775e-06, "loss": 0.2911, "step": 14934 }, { "epoch": 2.1415256667622597, "grad_norm": 0.28290000557899475, "learning_rate": 2.293957388460267e-06, "loss": 0.3046, "step": 14935 }, { "epoch": 2.1416690564955547, "grad_norm": 0.2814615070819855, "learning_rate": 2.293255920769822e-06, "loss": 0.269, "step": 14936 }, { "epoch": 2.14181244622885, "grad_norm": 0.29701945185661316, "learning_rate": 2.292554528430968e-06, "loss": 0.296, "step": 14937 }, { "epoch": 2.141955835962145, "grad_norm": 0.27934500575065613, "learning_rate": 2.2918532114632307e-06, "loss": 0.2941, "step": 14938 }, { "epoch": 2.14209922569544, "grad_norm": 0.3022213280200958, "learning_rate": 2.2911519698861344e-06, "loss": 0.2906, "step": 14939 }, { "epoch": 2.1422426154287355, "grad_norm": 0.29257869720458984, "learning_rate": 2.2904508037192007e-06, "loss": 0.2742, "step": 14940 }, { "epoch": 2.1423860051620305, "grad_norm": 0.2682081162929535, "learning_rate": 2.289749712981948e-06, "loss": 0.2742, "step": 14941 }, { "epoch": 2.1425293948953255, "grad_norm": 0.27280697226524353, "learning_rate": 2.289048697693894e-06, "loss": 0.2688, "step": 14942 }, { "epoch": 2.1426727846286204, "grad_norm": 0.29224589467048645, "learning_rate": 2.2883477578745544e-06, "loss": 0.2798, "step": 14943 }, { "epoch": 2.142816174361916, "grad_norm": 0.27503177523612976, "learning_rate": 2.287646893543442e-06, "loss": 0.2823, "step": 14944 }, { "epoch": 2.142959564095211, "grad_norm": 0.2704939842224121, "learning_rate": 2.2869461047200674e-06, "loss": 0.2724, "step": 14945 }, { "epoch": 2.143102953828506, "grad_norm": 0.2906951606273651, "learning_rate": 2.286245391423942e-06, "loss": 0.2799, "step": 14946 }, { "epoch": 2.1432463435618008, "grad_norm": 0.26532137393951416, "learning_rate": 2.2855447536745686e-06, "loss": 0.277, "step": 14947 }, { "epoch": 2.143389733295096, "grad_norm": 0.2574708163738251, "learning_rate": 2.284844191491453e-06, "loss": 0.2845, "step": 14948 }, { "epoch": 2.143533123028391, "grad_norm": 0.2917749881744385, "learning_rate": 2.284143704894099e-06, "loss": 0.3032, "step": 14949 }, { "epoch": 2.143676512761686, "grad_norm": 0.2794148921966553, "learning_rate": 2.283443293902006e-06, "loss": 0.288, "step": 14950 }, { "epoch": 2.1438199024949816, "grad_norm": 0.2752021253108978, "learning_rate": 2.282742958534675e-06, "loss": 0.287, "step": 14951 }, { "epoch": 2.1439632922282765, "grad_norm": 0.2573353946208954, "learning_rate": 2.2820426988115986e-06, "loss": 0.2863, "step": 14952 }, { "epoch": 2.1441066819615715, "grad_norm": 0.26210644841194153, "learning_rate": 2.2813425147522715e-06, "loss": 0.2928, "step": 14953 }, { "epoch": 2.1442500716948665, "grad_norm": 0.2877013683319092, "learning_rate": 2.2806424063761874e-06, "loss": 0.2916, "step": 14954 }, { "epoch": 2.144393461428162, "grad_norm": 0.27917611598968506, "learning_rate": 2.279942373702835e-06, "loss": 0.2873, "step": 14955 }, { "epoch": 2.144536851161457, "grad_norm": 0.27589911222457886, "learning_rate": 2.2792424167517035e-06, "loss": 0.2853, "step": 14956 }, { "epoch": 2.144680240894752, "grad_norm": 0.28180086612701416, "learning_rate": 2.2785425355422774e-06, "loss": 0.2695, "step": 14957 }, { "epoch": 2.1448236306280473, "grad_norm": 0.27653267979621887, "learning_rate": 2.277842730094041e-06, "loss": 0.2787, "step": 14958 }, { "epoch": 2.1449670203613422, "grad_norm": 0.2668703496456146, "learning_rate": 2.277143000426475e-06, "loss": 0.2808, "step": 14959 }, { "epoch": 2.145110410094637, "grad_norm": 0.28393346071243286, "learning_rate": 2.2764433465590598e-06, "loss": 0.2867, "step": 14960 }, { "epoch": 2.145253799827932, "grad_norm": 0.2748531103134155, "learning_rate": 2.275743768511272e-06, "loss": 0.3071, "step": 14961 }, { "epoch": 2.1453971895612276, "grad_norm": 0.29662278294563293, "learning_rate": 2.275044266302589e-06, "loss": 0.2591, "step": 14962 }, { "epoch": 2.1455405792945226, "grad_norm": 0.2730312645435333, "learning_rate": 2.2743448399524798e-06, "loss": 0.2914, "step": 14963 }, { "epoch": 2.1456839690278176, "grad_norm": 0.27066516876220703, "learning_rate": 2.273645489480417e-06, "loss": 0.2916, "step": 14964 }, { "epoch": 2.1458273587611125, "grad_norm": 0.2928362488746643, "learning_rate": 2.272946214905871e-06, "loss": 0.2807, "step": 14965 }, { "epoch": 2.145970748494408, "grad_norm": 0.2777216136455536, "learning_rate": 2.2722470162483063e-06, "loss": 0.2865, "step": 14966 }, { "epoch": 2.146114138227703, "grad_norm": 0.2565169632434845, "learning_rate": 2.271547893527189e-06, "loss": 0.2716, "step": 14967 }, { "epoch": 2.146257527960998, "grad_norm": 0.2696375250816345, "learning_rate": 2.270848846761983e-06, "loss": 0.2861, "step": 14968 }, { "epoch": 2.146400917694293, "grad_norm": 0.28016015887260437, "learning_rate": 2.270149875972145e-06, "loss": 0.2834, "step": 14969 }, { "epoch": 2.1465443074275883, "grad_norm": 0.2850779592990875, "learning_rate": 2.2694509811771353e-06, "loss": 0.2991, "step": 14970 }, { "epoch": 2.1466876971608833, "grad_norm": 0.26628169417381287, "learning_rate": 2.26875216239641e-06, "loss": 0.293, "step": 14971 }, { "epoch": 2.1468310868941782, "grad_norm": 0.2739250361919403, "learning_rate": 2.268053419649423e-06, "loss": 0.298, "step": 14972 }, { "epoch": 2.1469744766274736, "grad_norm": 0.2620849907398224, "learning_rate": 2.267354752955628e-06, "loss": 0.279, "step": 14973 }, { "epoch": 2.1471178663607686, "grad_norm": 0.2731008231639862, "learning_rate": 2.266656162334471e-06, "loss": 0.2763, "step": 14974 }, { "epoch": 2.1472612560940636, "grad_norm": 0.2699071764945984, "learning_rate": 2.2659576478054023e-06, "loss": 0.3031, "step": 14975 }, { "epoch": 2.1474046458273586, "grad_norm": 0.2770184874534607, "learning_rate": 2.265259209387867e-06, "loss": 0.2615, "step": 14976 }, { "epoch": 2.147548035560654, "grad_norm": 0.27450883388519287, "learning_rate": 2.264560847101308e-06, "loss": 0.2916, "step": 14977 }, { "epoch": 2.147691425293949, "grad_norm": 0.28191980719566345, "learning_rate": 2.263862560965167e-06, "loss": 0.2724, "step": 14978 }, { "epoch": 2.147834815027244, "grad_norm": 0.27271702885627747, "learning_rate": 2.2631643509988838e-06, "loss": 0.3074, "step": 14979 }, { "epoch": 2.1479782047605394, "grad_norm": 0.27639007568359375, "learning_rate": 2.2624662172218948e-06, "loss": 0.2787, "step": 14980 }, { "epoch": 2.1481215944938343, "grad_norm": 0.29250368475914, "learning_rate": 2.2617681596536357e-06, "loss": 0.2829, "step": 14981 }, { "epoch": 2.1482649842271293, "grad_norm": 0.2758156657218933, "learning_rate": 2.261070178313538e-06, "loss": 0.2848, "step": 14982 }, { "epoch": 2.1484083739604243, "grad_norm": 0.2940891683101654, "learning_rate": 2.2603722732210338e-06, "loss": 0.2802, "step": 14983 }, { "epoch": 2.1485517636937197, "grad_norm": 0.28174448013305664, "learning_rate": 2.2596744443955527e-06, "loss": 0.269, "step": 14984 }, { "epoch": 2.1486951534270147, "grad_norm": 0.2903096079826355, "learning_rate": 2.258976691856518e-06, "loss": 0.2904, "step": 14985 }, { "epoch": 2.1488385431603096, "grad_norm": 0.2750124931335449, "learning_rate": 2.258279015623356e-06, "loss": 0.2791, "step": 14986 }, { "epoch": 2.1489819328936046, "grad_norm": 0.27204573154449463, "learning_rate": 2.2575814157154885e-06, "loss": 0.2657, "step": 14987 }, { "epoch": 2.1491253226269, "grad_norm": 0.27675992250442505, "learning_rate": 2.2568838921523354e-06, "loss": 0.2846, "step": 14988 }, { "epoch": 2.149268712360195, "grad_norm": 0.27174273133277893, "learning_rate": 2.256186444953317e-06, "loss": 0.2749, "step": 14989 }, { "epoch": 2.14941210209349, "grad_norm": 0.25273433327674866, "learning_rate": 2.2554890741378455e-06, "loss": 0.2772, "step": 14990 }, { "epoch": 2.1495554918267854, "grad_norm": 0.2878783941268921, "learning_rate": 2.2547917797253367e-06, "loss": 0.2753, "step": 14991 }, { "epoch": 2.1496988815600804, "grad_norm": 0.2663082182407379, "learning_rate": 2.254094561735201e-06, "loss": 0.2861, "step": 14992 }, { "epoch": 2.1498422712933754, "grad_norm": 0.26953747868537903, "learning_rate": 2.253397420186849e-06, "loss": 0.2976, "step": 14993 }, { "epoch": 2.1499856610266703, "grad_norm": 0.2823110818862915, "learning_rate": 2.252700355099688e-06, "loss": 0.289, "step": 14994 }, { "epoch": 2.1501290507599657, "grad_norm": 0.2749547064304352, "learning_rate": 2.252003366493123e-06, "loss": 0.2855, "step": 14995 }, { "epoch": 2.1502724404932607, "grad_norm": 0.2749454081058502, "learning_rate": 2.251306454386557e-06, "loss": 0.2642, "step": 14996 }, { "epoch": 2.1504158302265557, "grad_norm": 0.27467238903045654, "learning_rate": 2.2506096187993908e-06, "loss": 0.2786, "step": 14997 }, { "epoch": 2.1505592199598507, "grad_norm": 0.33204978704452515, "learning_rate": 2.2499128597510235e-06, "loss": 0.2772, "step": 14998 }, { "epoch": 2.150702609693146, "grad_norm": 0.2773992717266083, "learning_rate": 2.2492161772608515e-06, "loss": 0.289, "step": 14999 }, { "epoch": 2.150845999426441, "grad_norm": 0.2961278557777405, "learning_rate": 2.248519571348272e-06, "loss": 0.2754, "step": 15000 }, { "epoch": 2.150989389159736, "grad_norm": 0.2648200988769531, "learning_rate": 2.2478230420326737e-06, "loss": 0.2556, "step": 15001 }, { "epoch": 2.1511327788930314, "grad_norm": 0.2822825312614441, "learning_rate": 2.247126589333447e-06, "loss": 0.2806, "step": 15002 }, { "epoch": 2.1512761686263264, "grad_norm": 0.2923571765422821, "learning_rate": 2.246430213269982e-06, "loss": 0.2826, "step": 15003 }, { "epoch": 2.1514195583596214, "grad_norm": 0.27515414357185364, "learning_rate": 2.245733913861664e-06, "loss": 0.2668, "step": 15004 }, { "epoch": 2.1515629480929164, "grad_norm": 0.2870337665081024, "learning_rate": 2.2450376911278792e-06, "loss": 0.2977, "step": 15005 }, { "epoch": 2.151706337826212, "grad_norm": 0.2759285271167755, "learning_rate": 2.244341545088005e-06, "loss": 0.2695, "step": 15006 }, { "epoch": 2.1518497275595068, "grad_norm": 0.2724173069000244, "learning_rate": 2.243645475761423e-06, "loss": 0.2625, "step": 15007 }, { "epoch": 2.1519931172928017, "grad_norm": 0.2753107249736786, "learning_rate": 2.2429494831675115e-06, "loss": 0.2635, "step": 15008 }, { "epoch": 2.152136507026097, "grad_norm": 0.2866571247577667, "learning_rate": 2.2422535673256447e-06, "loss": 0.283, "step": 15009 }, { "epoch": 2.152279896759392, "grad_norm": 0.2856100797653198, "learning_rate": 2.2415577282551964e-06, "loss": 0.2915, "step": 15010 }, { "epoch": 2.152423286492687, "grad_norm": 0.2727241516113281, "learning_rate": 2.2408619659755393e-06, "loss": 0.2655, "step": 15011 }, { "epoch": 2.152566676225982, "grad_norm": 0.2826828360557556, "learning_rate": 2.2401662805060393e-06, "loss": 0.2905, "step": 15012 }, { "epoch": 2.1527100659592775, "grad_norm": 0.2686605751514435, "learning_rate": 2.2394706718660623e-06, "loss": 0.2774, "step": 15013 }, { "epoch": 2.1528534556925725, "grad_norm": 0.2825703024864197, "learning_rate": 2.238775140074978e-06, "loss": 0.2699, "step": 15014 }, { "epoch": 2.1529968454258674, "grad_norm": 0.26701244711875916, "learning_rate": 2.238079685152146e-06, "loss": 0.2917, "step": 15015 }, { "epoch": 2.1531402351591624, "grad_norm": 0.2623187303543091, "learning_rate": 2.237384307116928e-06, "loss": 0.3032, "step": 15016 }, { "epoch": 2.153283624892458, "grad_norm": 0.28352299332618713, "learning_rate": 2.2366890059886797e-06, "loss": 0.2762, "step": 15017 }, { "epoch": 2.153427014625753, "grad_norm": 0.27478793263435364, "learning_rate": 2.2359937817867594e-06, "loss": 0.2897, "step": 15018 }, { "epoch": 2.153570404359048, "grad_norm": 0.27007973194122314, "learning_rate": 2.2352986345305195e-06, "loss": 0.2769, "step": 15019 }, { "epoch": 2.1537137940923428, "grad_norm": 0.2709185481071472, "learning_rate": 2.234603564239313e-06, "loss": 0.2869, "step": 15020 }, { "epoch": 2.153857183825638, "grad_norm": 0.2707670331001282, "learning_rate": 2.23390857093249e-06, "loss": 0.2915, "step": 15021 }, { "epoch": 2.154000573558933, "grad_norm": 0.313109427690506, "learning_rate": 2.2332136546293983e-06, "loss": 0.2869, "step": 15022 }, { "epoch": 2.154143963292228, "grad_norm": 0.253151535987854, "learning_rate": 2.2325188153493803e-06, "loss": 0.266, "step": 15023 }, { "epoch": 2.1542873530255235, "grad_norm": 0.27332451939582825, "learning_rate": 2.231824053111782e-06, "loss": 0.2796, "step": 15024 }, { "epoch": 2.1544307427588185, "grad_norm": 0.2816116213798523, "learning_rate": 2.2311293679359435e-06, "loss": 0.2881, "step": 15025 }, { "epoch": 2.1545741324921135, "grad_norm": 0.2725905776023865, "learning_rate": 2.230434759841204e-06, "loss": 0.272, "step": 15026 }, { "epoch": 2.1547175222254085, "grad_norm": 0.2759375274181366, "learning_rate": 2.2297402288469023e-06, "loss": 0.2958, "step": 15027 }, { "epoch": 2.154860911958704, "grad_norm": 0.25849649310112, "learning_rate": 2.2290457749723692e-06, "loss": 0.2888, "step": 15028 }, { "epoch": 2.155004301691999, "grad_norm": 0.28420698642730713, "learning_rate": 2.22835139823694e-06, "loss": 0.2796, "step": 15029 }, { "epoch": 2.155147691425294, "grad_norm": 0.26038193702697754, "learning_rate": 2.227657098659943e-06, "loss": 0.2972, "step": 15030 }, { "epoch": 2.1552910811585893, "grad_norm": 0.26461344957351685, "learning_rate": 2.2269628762607087e-06, "loss": 0.2678, "step": 15031 }, { "epoch": 2.1554344708918842, "grad_norm": 0.28009307384490967, "learning_rate": 2.2262687310585617e-06, "loss": 0.2738, "step": 15032 }, { "epoch": 2.155577860625179, "grad_norm": 0.27934226393699646, "learning_rate": 2.225574663072827e-06, "loss": 0.2868, "step": 15033 }, { "epoch": 2.155721250358474, "grad_norm": 0.2867262363433838, "learning_rate": 2.224880672322825e-06, "loss": 0.2745, "step": 15034 }, { "epoch": 2.1558646400917696, "grad_norm": 0.2660866975784302, "learning_rate": 2.2241867588278763e-06, "loss": 0.277, "step": 15035 }, { "epoch": 2.1560080298250646, "grad_norm": 0.2878865599632263, "learning_rate": 2.223492922607299e-06, "loss": 0.2829, "step": 15036 }, { "epoch": 2.1561514195583595, "grad_norm": 0.2689167559146881, "learning_rate": 2.222799163680407e-06, "loss": 0.2982, "step": 15037 }, { "epoch": 2.1562948092916545, "grad_norm": 0.2680145800113678, "learning_rate": 2.2221054820665163e-06, "loss": 0.2738, "step": 15038 }, { "epoch": 2.15643819902495, "grad_norm": 0.2854951024055481, "learning_rate": 2.221411877784934e-06, "loss": 0.2663, "step": 15039 }, { "epoch": 2.156581588758245, "grad_norm": 0.2556709051132202, "learning_rate": 2.22071835085497e-06, "loss": 0.276, "step": 15040 }, { "epoch": 2.15672497849154, "grad_norm": 0.2770397365093231, "learning_rate": 2.2200249012959325e-06, "loss": 0.2824, "step": 15041 }, { "epoch": 2.1568683682248353, "grad_norm": 0.2816038429737091, "learning_rate": 2.219331529127125e-06, "loss": 0.2888, "step": 15042 }, { "epoch": 2.1570117579581303, "grad_norm": 0.2828330397605896, "learning_rate": 2.2186382343678527e-06, "loss": 0.259, "step": 15043 }, { "epoch": 2.1571551476914252, "grad_norm": 0.28493428230285645, "learning_rate": 2.217945017037411e-06, "loss": 0.2669, "step": 15044 }, { "epoch": 2.15729853742472, "grad_norm": 0.29562684893608093, "learning_rate": 2.2172518771551004e-06, "loss": 0.2941, "step": 15045 }, { "epoch": 2.1574419271580156, "grad_norm": 0.29131025075912476, "learning_rate": 2.2165588147402174e-06, "loss": 0.2849, "step": 15046 }, { "epoch": 2.1575853168913106, "grad_norm": 0.2773287296295166, "learning_rate": 2.2158658298120546e-06, "loss": 0.2846, "step": 15047 }, { "epoch": 2.1577287066246056, "grad_norm": 0.28912898898124695, "learning_rate": 2.2151729223899043e-06, "loss": 0.2747, "step": 15048 }, { "epoch": 2.1578720963579006, "grad_norm": 0.3110640347003937, "learning_rate": 2.2144800924930565e-06, "loss": 0.2661, "step": 15049 }, { "epoch": 2.158015486091196, "grad_norm": 0.2704242765903473, "learning_rate": 2.2137873401407977e-06, "loss": 0.2688, "step": 15050 }, { "epoch": 2.158158875824491, "grad_norm": 0.2902178168296814, "learning_rate": 2.213094665352413e-06, "loss": 0.3063, "step": 15051 }, { "epoch": 2.158302265557786, "grad_norm": 0.276410311460495, "learning_rate": 2.2124020681471864e-06, "loss": 0.2831, "step": 15052 }, { "epoch": 2.1584456552910813, "grad_norm": 0.27861759066581726, "learning_rate": 2.211709548544398e-06, "loss": 0.267, "step": 15053 }, { "epoch": 2.1585890450243763, "grad_norm": 0.29113495349884033, "learning_rate": 2.2110171065633283e-06, "loss": 0.2877, "step": 15054 }, { "epoch": 2.1587324347576713, "grad_norm": 0.26084285974502563, "learning_rate": 2.2103247422232504e-06, "loss": 0.2937, "step": 15055 }, { "epoch": 2.1588758244909663, "grad_norm": 0.289097398519516, "learning_rate": 2.20963245554344e-06, "loss": 0.2864, "step": 15056 }, { "epoch": 2.1590192142242617, "grad_norm": 0.26125192642211914, "learning_rate": 2.20894024654317e-06, "loss": 0.2725, "step": 15057 }, { "epoch": 2.1591626039575567, "grad_norm": 0.2692597210407257, "learning_rate": 2.2082481152417094e-06, "loss": 0.2779, "step": 15058 }, { "epoch": 2.1593059936908516, "grad_norm": 0.2887188196182251, "learning_rate": 2.2075560616583293e-06, "loss": 0.2871, "step": 15059 }, { "epoch": 2.159449383424147, "grad_norm": 0.292525976896286, "learning_rate": 2.2068640858122907e-06, "loss": 0.2874, "step": 15060 }, { "epoch": 2.159592773157442, "grad_norm": 0.2843877077102661, "learning_rate": 2.2061721877228598e-06, "loss": 0.2743, "step": 15061 }, { "epoch": 2.159736162890737, "grad_norm": 0.2696754038333893, "learning_rate": 2.205480367409297e-06, "loss": 0.2817, "step": 15062 }, { "epoch": 2.159879552624032, "grad_norm": 0.2732265591621399, "learning_rate": 2.2047886248908617e-06, "loss": 0.2926, "step": 15063 }, { "epoch": 2.1600229423573274, "grad_norm": 0.27232086658477783, "learning_rate": 2.2040969601868116e-06, "loss": 0.2763, "step": 15064 }, { "epoch": 2.1601663320906224, "grad_norm": 0.28119784593582153, "learning_rate": 2.2034053733164035e-06, "loss": 0.2829, "step": 15065 }, { "epoch": 2.1603097218239173, "grad_norm": 0.26717016100883484, "learning_rate": 2.2027138642988853e-06, "loss": 0.2792, "step": 15066 }, { "epoch": 2.1604531115572123, "grad_norm": 0.2788797616958618, "learning_rate": 2.2020224331535105e-06, "loss": 0.296, "step": 15067 }, { "epoch": 2.1605965012905077, "grad_norm": 0.27709054946899414, "learning_rate": 2.2013310798995267e-06, "loss": 0.2679, "step": 15068 }, { "epoch": 2.1607398910238027, "grad_norm": 0.2825821340084076, "learning_rate": 2.2006398045561793e-06, "loss": 0.2779, "step": 15069 }, { "epoch": 2.1608832807570977, "grad_norm": 0.2726225256919861, "learning_rate": 2.1999486071427167e-06, "loss": 0.2808, "step": 15070 }, { "epoch": 2.1610266704903927, "grad_norm": 0.27375340461730957, "learning_rate": 2.1992574876783755e-06, "loss": 0.2575, "step": 15071 }, { "epoch": 2.161170060223688, "grad_norm": 0.2733892798423767, "learning_rate": 2.198566446182398e-06, "loss": 0.285, "step": 15072 }, { "epoch": 2.161313449956983, "grad_norm": 0.2810990512371063, "learning_rate": 2.1978754826740213e-06, "loss": 0.2625, "step": 15073 }, { "epoch": 2.161456839690278, "grad_norm": 0.2852660119533539, "learning_rate": 2.1971845971724803e-06, "loss": 0.2796, "step": 15074 }, { "epoch": 2.1616002294235734, "grad_norm": 0.28405097126960754, "learning_rate": 2.196493789697009e-06, "loss": 0.2795, "step": 15075 }, { "epoch": 2.1617436191568684, "grad_norm": 0.2737131416797638, "learning_rate": 2.1958030602668394e-06, "loss": 0.2669, "step": 15076 }, { "epoch": 2.1618870088901634, "grad_norm": 0.26481595635414124, "learning_rate": 2.195112408901197e-06, "loss": 0.2877, "step": 15077 }, { "epoch": 2.1620303986234584, "grad_norm": 0.2852844297885895, "learning_rate": 2.194421835619311e-06, "loss": 0.2909, "step": 15078 }, { "epoch": 2.162173788356754, "grad_norm": 0.28435131907463074, "learning_rate": 2.1937313404404047e-06, "loss": 0.2886, "step": 15079 }, { "epoch": 2.1623171780900488, "grad_norm": 0.27905550599098206, "learning_rate": 2.193040923383701e-06, "loss": 0.2598, "step": 15080 }, { "epoch": 2.1624605678233437, "grad_norm": 0.27996644377708435, "learning_rate": 2.192350584468423e-06, "loss": 0.2951, "step": 15081 }, { "epoch": 2.162603957556639, "grad_norm": 0.2939704656600952, "learning_rate": 2.191660323713783e-06, "loss": 0.2666, "step": 15082 }, { "epoch": 2.162747347289934, "grad_norm": 0.2705312669277191, "learning_rate": 2.1909701411389992e-06, "loss": 0.2985, "step": 15083 }, { "epoch": 2.162890737023229, "grad_norm": 0.29017430543899536, "learning_rate": 2.1902800367632864e-06, "loss": 0.2847, "step": 15084 }, { "epoch": 2.163034126756524, "grad_norm": 0.2786964476108551, "learning_rate": 2.189590010605855e-06, "loss": 0.2869, "step": 15085 }, { "epoch": 2.1631775164898195, "grad_norm": 0.2805679738521576, "learning_rate": 2.188900062685914e-06, "loss": 0.2949, "step": 15086 }, { "epoch": 2.1633209062231145, "grad_norm": 0.26856887340545654, "learning_rate": 2.1882101930226714e-06, "loss": 0.2745, "step": 15087 }, { "epoch": 2.1634642959564094, "grad_norm": 0.24491463601589203, "learning_rate": 2.187520401635332e-06, "loss": 0.2907, "step": 15088 }, { "epoch": 2.163607685689705, "grad_norm": 0.27197888493537903, "learning_rate": 2.1868306885430975e-06, "loss": 0.288, "step": 15089 }, { "epoch": 2.163751075423, "grad_norm": 0.2740711271762848, "learning_rate": 2.1861410537651696e-06, "loss": 0.2587, "step": 15090 }, { "epoch": 2.163894465156295, "grad_norm": 0.26065629720687866, "learning_rate": 2.1854514973207458e-06, "loss": 0.2765, "step": 15091 }, { "epoch": 2.1640378548895898, "grad_norm": 0.27551648020744324, "learning_rate": 2.1847620192290252e-06, "loss": 0.2981, "step": 15092 }, { "epoch": 2.164181244622885, "grad_norm": 0.2805739939212799, "learning_rate": 2.184072619509197e-06, "loss": 0.2925, "step": 15093 }, { "epoch": 2.16432463435618, "grad_norm": 0.2743876278400421, "learning_rate": 2.183383298180456e-06, "loss": 0.2818, "step": 15094 }, { "epoch": 2.164468024089475, "grad_norm": 0.26744017004966736, "learning_rate": 2.1826940552619903e-06, "loss": 0.294, "step": 15095 }, { "epoch": 2.16461141382277, "grad_norm": 0.2651614248752594, "learning_rate": 2.182004890772989e-06, "loss": 0.2798, "step": 15096 }, { "epoch": 2.1647548035560655, "grad_norm": 0.26162296533584595, "learning_rate": 2.181315804732638e-06, "loss": 0.2556, "step": 15097 }, { "epoch": 2.1648981932893605, "grad_norm": 0.2815299332141876, "learning_rate": 2.1806267971601176e-06, "loss": 0.2913, "step": 15098 }, { "epoch": 2.1650415830226555, "grad_norm": 0.26199546456336975, "learning_rate": 2.17993786807461e-06, "loss": 0.2918, "step": 15099 }, { "epoch": 2.1651849727559505, "grad_norm": 0.26585039496421814, "learning_rate": 2.179249017495294e-06, "loss": 0.272, "step": 15100 }, { "epoch": 2.165328362489246, "grad_norm": 0.2772896885871887, "learning_rate": 2.1785602454413464e-06, "loss": 0.2652, "step": 15101 }, { "epoch": 2.165471752222541, "grad_norm": 0.2820235788822174, "learning_rate": 2.177871551931941e-06, "loss": 0.2688, "step": 15102 }, { "epoch": 2.165615141955836, "grad_norm": 0.26635441184043884, "learning_rate": 2.177182936986251e-06, "loss": 0.2564, "step": 15103 }, { "epoch": 2.1657585316891312, "grad_norm": 0.2702934443950653, "learning_rate": 2.176494400623445e-06, "loss": 0.2788, "step": 15104 }, { "epoch": 2.165901921422426, "grad_norm": 0.26608169078826904, "learning_rate": 2.175805942862692e-06, "loss": 0.2673, "step": 15105 }, { "epoch": 2.166045311155721, "grad_norm": 0.2766936421394348, "learning_rate": 2.175117563723157e-06, "loss": 0.2923, "step": 15106 }, { "epoch": 2.166188700889016, "grad_norm": 0.2853583097457886, "learning_rate": 2.1744292632240036e-06, "loss": 0.3084, "step": 15107 }, { "epoch": 2.1663320906223116, "grad_norm": 0.27753746509552, "learning_rate": 2.1737410413843947e-06, "loss": 0.2989, "step": 15108 }, { "epoch": 2.1664754803556066, "grad_norm": 0.2819943130016327, "learning_rate": 2.1730528982234863e-06, "loss": 0.2736, "step": 15109 }, { "epoch": 2.1666188700889015, "grad_norm": 0.27807527780532837, "learning_rate": 2.1723648337604356e-06, "loss": 0.2885, "step": 15110 }, { "epoch": 2.166762259822197, "grad_norm": 0.283844530582428, "learning_rate": 2.171676848014399e-06, "loss": 0.273, "step": 15111 }, { "epoch": 2.166905649555492, "grad_norm": 0.27999347448349, "learning_rate": 2.1709889410045275e-06, "loss": 0.2867, "step": 15112 }, { "epoch": 2.167049039288787, "grad_norm": 0.25957363843917847, "learning_rate": 2.1703011127499744e-06, "loss": 0.2972, "step": 15113 }, { "epoch": 2.167192429022082, "grad_norm": 0.2810249328613281, "learning_rate": 2.1696133632698836e-06, "loss": 0.2885, "step": 15114 }, { "epoch": 2.1673358187553773, "grad_norm": 0.2686099112033844, "learning_rate": 2.1689256925834025e-06, "loss": 0.2876, "step": 15115 }, { "epoch": 2.1674792084886723, "grad_norm": 0.27177077531814575, "learning_rate": 2.1682381007096755e-06, "loss": 0.2741, "step": 15116 }, { "epoch": 2.1676225982219672, "grad_norm": 0.28659510612487793, "learning_rate": 2.1675505876678434e-06, "loss": 0.2654, "step": 15117 }, { "epoch": 2.167765987955262, "grad_norm": 0.2836141586303711, "learning_rate": 2.1668631534770456e-06, "loss": 0.2803, "step": 15118 }, { "epoch": 2.1679093776885576, "grad_norm": 0.2729630470275879, "learning_rate": 2.1661757981564218e-06, "loss": 0.2873, "step": 15119 }, { "epoch": 2.1680527674218526, "grad_norm": 0.29136475920677185, "learning_rate": 2.1654885217251026e-06, "loss": 0.2928, "step": 15120 }, { "epoch": 2.1681961571551476, "grad_norm": 0.2648475170135498, "learning_rate": 2.1648013242022225e-06, "loss": 0.2883, "step": 15121 }, { "epoch": 2.168339546888443, "grad_norm": 0.2664426565170288, "learning_rate": 2.164114205606912e-06, "loss": 0.2818, "step": 15122 }, { "epoch": 2.168482936621738, "grad_norm": 0.27020692825317383, "learning_rate": 2.1634271659583e-06, "loss": 0.2983, "step": 15123 }, { "epoch": 2.168626326355033, "grad_norm": 0.2765491306781769, "learning_rate": 2.162740205275512e-06, "loss": 0.2861, "step": 15124 }, { "epoch": 2.168769716088328, "grad_norm": 0.25353193283081055, "learning_rate": 2.162053323577672e-06, "loss": 0.2939, "step": 15125 }, { "epoch": 2.1689131058216233, "grad_norm": 0.2658320665359497, "learning_rate": 2.1613665208839025e-06, "loss": 0.2752, "step": 15126 }, { "epoch": 2.1690564955549183, "grad_norm": 0.27013328671455383, "learning_rate": 2.160679797213322e-06, "loss": 0.2818, "step": 15127 }, { "epoch": 2.1691998852882133, "grad_norm": 0.2971060574054718, "learning_rate": 2.159993152585048e-06, "loss": 0.257, "step": 15128 }, { "epoch": 2.1693432750215083, "grad_norm": 0.2885401248931885, "learning_rate": 2.159306587018196e-06, "loss": 0.2786, "step": 15129 }, { "epoch": 2.1694866647548037, "grad_norm": 0.2898959815502167, "learning_rate": 2.158620100531881e-06, "loss": 0.2708, "step": 15130 }, { "epoch": 2.1696300544880986, "grad_norm": 0.2609538733959198, "learning_rate": 2.1579336931452092e-06, "loss": 0.2777, "step": 15131 }, { "epoch": 2.1697734442213936, "grad_norm": 0.28899070620536804, "learning_rate": 2.1572473648772925e-06, "loss": 0.2825, "step": 15132 }, { "epoch": 2.169916833954689, "grad_norm": 0.26977813243865967, "learning_rate": 2.156561115747235e-06, "loss": 0.2798, "step": 15133 }, { "epoch": 2.170060223687984, "grad_norm": 0.2834266126155853, "learning_rate": 2.1558749457741424e-06, "loss": 0.2784, "step": 15134 }, { "epoch": 2.170203613421279, "grad_norm": 0.291041761636734, "learning_rate": 2.1551888549771183e-06, "loss": 0.2995, "step": 15135 }, { "epoch": 2.170347003154574, "grad_norm": 0.2869545519351959, "learning_rate": 2.1545028433752586e-06, "loss": 0.2615, "step": 15136 }, { "epoch": 2.1704903928878694, "grad_norm": 0.2966249883174896, "learning_rate": 2.153816910987662e-06, "loss": 0.2685, "step": 15137 }, { "epoch": 2.1706337826211644, "grad_norm": 0.2935033142566681, "learning_rate": 2.1531310578334246e-06, "loss": 0.2902, "step": 15138 }, { "epoch": 2.1707771723544593, "grad_norm": 0.2810303568840027, "learning_rate": 2.1524452839316396e-06, "loss": 0.3007, "step": 15139 }, { "epoch": 2.1709205620877547, "grad_norm": 0.28246068954467773, "learning_rate": 2.151759589301397e-06, "loss": 0.2919, "step": 15140 }, { "epoch": 2.1710639518210497, "grad_norm": 0.2647559940814972, "learning_rate": 2.151073973961786e-06, "loss": 0.2606, "step": 15141 }, { "epoch": 2.1712073415543447, "grad_norm": 0.27897992730140686, "learning_rate": 2.1503884379318934e-06, "loss": 0.2816, "step": 15142 }, { "epoch": 2.1713507312876397, "grad_norm": 0.2764872908592224, "learning_rate": 2.149702981230803e-06, "loss": 0.2762, "step": 15143 }, { "epoch": 2.171494121020935, "grad_norm": 0.28297480940818787, "learning_rate": 2.1490176038775974e-06, "loss": 0.3012, "step": 15144 }, { "epoch": 2.17163751075423, "grad_norm": 0.286230206489563, "learning_rate": 2.1483323058913557e-06, "loss": 0.2816, "step": 15145 }, { "epoch": 2.171780900487525, "grad_norm": 0.26044604182243347, "learning_rate": 2.1476470872911576e-06, "loss": 0.2981, "step": 15146 }, { "epoch": 2.17192429022082, "grad_norm": 0.2844223082065582, "learning_rate": 2.146961948096075e-06, "loss": 0.2882, "step": 15147 }, { "epoch": 2.1720676799541154, "grad_norm": 0.28769969940185547, "learning_rate": 2.1462768883251837e-06, "loss": 0.274, "step": 15148 }, { "epoch": 2.1722110696874104, "grad_norm": 0.3019407391548157, "learning_rate": 2.1455919079975535e-06, "loss": 0.2841, "step": 15149 }, { "epoch": 2.1723544594207054, "grad_norm": 0.2770855724811554, "learning_rate": 2.1449070071322537e-06, "loss": 0.2871, "step": 15150 }, { "epoch": 2.1724978491540003, "grad_norm": 0.2651974856853485, "learning_rate": 2.1442221857483524e-06, "loss": 0.2697, "step": 15151 }, { "epoch": 2.1726412388872958, "grad_norm": 0.264060914516449, "learning_rate": 2.1435374438649107e-06, "loss": 0.2657, "step": 15152 }, { "epoch": 2.1727846286205907, "grad_norm": 0.27481207251548767, "learning_rate": 2.1428527815009933e-06, "loss": 0.2705, "step": 15153 }, { "epoch": 2.1729280183538857, "grad_norm": 0.28030654788017273, "learning_rate": 2.1421681986756588e-06, "loss": 0.2725, "step": 15154 }, { "epoch": 2.173071408087181, "grad_norm": 0.28516608476638794, "learning_rate": 2.141483695407965e-06, "loss": 0.2788, "step": 15155 }, { "epoch": 2.173214797820476, "grad_norm": 0.30104729533195496, "learning_rate": 2.1407992717169684e-06, "loss": 0.2822, "step": 15156 }, { "epoch": 2.173358187553771, "grad_norm": 0.2878495156764984, "learning_rate": 2.1401149276217236e-06, "loss": 0.2818, "step": 15157 }, { "epoch": 2.173501577287066, "grad_norm": 0.2673434019088745, "learning_rate": 2.139430663141278e-06, "loss": 0.2819, "step": 15158 }, { "epoch": 2.1736449670203615, "grad_norm": 0.25602850317955017, "learning_rate": 2.1387464782946804e-06, "loss": 0.281, "step": 15159 }, { "epoch": 2.1737883567536564, "grad_norm": 0.26362162828445435, "learning_rate": 2.1380623731009813e-06, "loss": 0.2785, "step": 15160 }, { "epoch": 2.1739317464869514, "grad_norm": 0.27364298701286316, "learning_rate": 2.1373783475792236e-06, "loss": 0.2799, "step": 15161 }, { "epoch": 2.174075136220247, "grad_norm": 0.30668288469314575, "learning_rate": 2.1366944017484503e-06, "loss": 0.2842, "step": 15162 }, { "epoch": 2.174218525953542, "grad_norm": 0.28868821263313293, "learning_rate": 2.136010535627699e-06, "loss": 0.2929, "step": 15163 }, { "epoch": 2.174361915686837, "grad_norm": 0.26235252618789673, "learning_rate": 2.1353267492360084e-06, "loss": 0.2783, "step": 15164 }, { "epoch": 2.1745053054201318, "grad_norm": 0.28836730122566223, "learning_rate": 2.1346430425924148e-06, "loss": 0.2823, "step": 15165 }, { "epoch": 2.174648695153427, "grad_norm": 0.276123583316803, "learning_rate": 2.133959415715951e-06, "loss": 0.3146, "step": 15166 }, { "epoch": 2.174792084886722, "grad_norm": 0.28399932384490967, "learning_rate": 2.13327586862565e-06, "loss": 0.2828, "step": 15167 }, { "epoch": 2.174935474620017, "grad_norm": 0.2596176266670227, "learning_rate": 2.1325924013405373e-06, "loss": 0.2657, "step": 15168 }, { "epoch": 2.175078864353312, "grad_norm": 0.280569851398468, "learning_rate": 2.131909013879641e-06, "loss": 0.2619, "step": 15169 }, { "epoch": 2.1752222540866075, "grad_norm": 0.27415627241134644, "learning_rate": 2.1312257062619863e-06, "loss": 0.2717, "step": 15170 }, { "epoch": 2.1753656438199025, "grad_norm": 0.2626740634441376, "learning_rate": 2.1305424785065942e-06, "loss": 0.2933, "step": 15171 }, { "epoch": 2.1755090335531975, "grad_norm": 0.26979830861091614, "learning_rate": 2.129859330632486e-06, "loss": 0.283, "step": 15172 }, { "epoch": 2.175652423286493, "grad_norm": 0.26807165145874023, "learning_rate": 2.1291762626586803e-06, "loss": 0.2608, "step": 15173 }, { "epoch": 2.175795813019788, "grad_norm": 0.2761724591255188, "learning_rate": 2.12849327460419e-06, "loss": 0.2804, "step": 15174 }, { "epoch": 2.175939202753083, "grad_norm": 0.2821192145347595, "learning_rate": 2.1278103664880294e-06, "loss": 0.2875, "step": 15175 }, { "epoch": 2.176082592486378, "grad_norm": 0.25915345549583435, "learning_rate": 2.12712753832921e-06, "loss": 0.28, "step": 15176 }, { "epoch": 2.1762259822196732, "grad_norm": 0.2705345153808594, "learning_rate": 2.1264447901467406e-06, "loss": 0.2997, "step": 15177 }, { "epoch": 2.176369371952968, "grad_norm": 0.2764226496219635, "learning_rate": 2.125762121959628e-06, "loss": 0.2785, "step": 15178 }, { "epoch": 2.176512761686263, "grad_norm": 0.29796624183654785, "learning_rate": 2.1250795337868766e-06, "loss": 0.2812, "step": 15179 }, { "epoch": 2.176656151419558, "grad_norm": 0.2774115800857544, "learning_rate": 2.124397025647488e-06, "loss": 0.2887, "step": 15180 }, { "epoch": 2.1767995411528536, "grad_norm": 0.2633560597896576, "learning_rate": 2.1237145975604635e-06, "loss": 0.2836, "step": 15181 }, { "epoch": 2.1769429308861485, "grad_norm": 0.2754638195037842, "learning_rate": 2.1230322495447996e-06, "loss": 0.2736, "step": 15182 }, { "epoch": 2.1770863206194435, "grad_norm": 0.27076205611228943, "learning_rate": 2.122349981619492e-06, "loss": 0.2779, "step": 15183 }, { "epoch": 2.177229710352739, "grad_norm": 0.27759870886802673, "learning_rate": 2.121667793803537e-06, "loss": 0.3086, "step": 15184 }, { "epoch": 2.177373100086034, "grad_norm": 0.2856605648994446, "learning_rate": 2.12098568611592e-06, "loss": 0.3021, "step": 15185 }, { "epoch": 2.177516489819329, "grad_norm": 0.277943879365921, "learning_rate": 2.120303658575633e-06, "loss": 0.2844, "step": 15186 }, { "epoch": 2.177659879552624, "grad_norm": 0.275680273771286, "learning_rate": 2.1196217112016625e-06, "loss": 0.2694, "step": 15187 }, { "epoch": 2.1778032692859193, "grad_norm": 0.3026510179042816, "learning_rate": 2.1189398440129933e-06, "loss": 0.2785, "step": 15188 }, { "epoch": 2.1779466590192142, "grad_norm": 0.28286227583885193, "learning_rate": 2.118258057028608e-06, "loss": 0.2901, "step": 15189 }, { "epoch": 2.1780900487525092, "grad_norm": 0.28112852573394775, "learning_rate": 2.117576350267484e-06, "loss": 0.2833, "step": 15190 }, { "epoch": 2.1782334384858046, "grad_norm": 0.2829703390598297, "learning_rate": 2.1168947237485997e-06, "loss": 0.2716, "step": 15191 }, { "epoch": 2.1783768282190996, "grad_norm": 0.2759682238101959, "learning_rate": 2.1162131774909323e-06, "loss": 0.2801, "step": 15192 }, { "epoch": 2.1785202179523946, "grad_norm": 0.28420937061309814, "learning_rate": 2.1155317115134534e-06, "loss": 0.3002, "step": 15193 }, { "epoch": 2.1786636076856896, "grad_norm": 0.2677525281906128, "learning_rate": 2.1148503258351342e-06, "loss": 0.2733, "step": 15194 }, { "epoch": 2.178806997418985, "grad_norm": 0.2799367308616638, "learning_rate": 2.1141690204749444e-06, "loss": 0.287, "step": 15195 }, { "epoch": 2.17895038715228, "grad_norm": 0.2851523458957672, "learning_rate": 2.1134877954518494e-06, "loss": 0.2936, "step": 15196 }, { "epoch": 2.179093776885575, "grad_norm": 0.2843724489212036, "learning_rate": 2.112806650784814e-06, "loss": 0.301, "step": 15197 }, { "epoch": 2.17923716661887, "grad_norm": 0.2630918622016907, "learning_rate": 2.1121255864928003e-06, "loss": 0.2904, "step": 15198 }, { "epoch": 2.1793805563521653, "grad_norm": 0.274358332157135, "learning_rate": 2.1114446025947675e-06, "loss": 0.281, "step": 15199 }, { "epoch": 2.1795239460854603, "grad_norm": 0.26734021306037903, "learning_rate": 2.110763699109676e-06, "loss": 0.2717, "step": 15200 }, { "epoch": 2.1796673358187553, "grad_norm": 0.2849523723125458, "learning_rate": 2.1100828760564764e-06, "loss": 0.2862, "step": 15201 }, { "epoch": 2.1798107255520502, "grad_norm": 0.2744559943675995, "learning_rate": 2.1094021334541237e-06, "loss": 0.2713, "step": 15202 }, { "epoch": 2.1799541152853457, "grad_norm": 0.2959249019622803, "learning_rate": 2.108721471321569e-06, "loss": 0.2892, "step": 15203 }, { "epoch": 2.1800975050186406, "grad_norm": 0.2657800316810608, "learning_rate": 2.1080408896777614e-06, "loss": 0.2797, "step": 15204 }, { "epoch": 2.1802408947519356, "grad_norm": 0.2937087416648865, "learning_rate": 2.107360388541648e-06, "loss": 0.2793, "step": 15205 }, { "epoch": 2.180384284485231, "grad_norm": 0.27388256788253784, "learning_rate": 2.1066799679321703e-06, "loss": 0.2931, "step": 15206 }, { "epoch": 2.180527674218526, "grad_norm": 0.2959498465061188, "learning_rate": 2.105999627868272e-06, "loss": 0.309, "step": 15207 }, { "epoch": 2.180671063951821, "grad_norm": 0.262723445892334, "learning_rate": 2.1053193683688917e-06, "loss": 0.2976, "step": 15208 }, { "epoch": 2.180814453685116, "grad_norm": 0.3019240200519562, "learning_rate": 2.1046391894529673e-06, "loss": 0.2838, "step": 15209 }, { "epoch": 2.1809578434184114, "grad_norm": 0.2738766074180603, "learning_rate": 2.1039590911394344e-06, "loss": 0.2816, "step": 15210 }, { "epoch": 2.1811012331517063, "grad_norm": 0.2610863745212555, "learning_rate": 2.1032790734472262e-06, "loss": 0.2858, "step": 15211 }, { "epoch": 2.1812446228850013, "grad_norm": 0.2538219392299652, "learning_rate": 2.102599136395272e-06, "loss": 0.2933, "step": 15212 }, { "epoch": 2.1813880126182967, "grad_norm": 0.28828418254852295, "learning_rate": 2.1019192800025002e-06, "loss": 0.3195, "step": 15213 }, { "epoch": 2.1815314023515917, "grad_norm": 0.26119181513786316, "learning_rate": 2.1012395042878375e-06, "loss": 0.2818, "step": 15214 }, { "epoch": 2.1816747920848867, "grad_norm": 0.26799002289772034, "learning_rate": 2.100559809270206e-06, "loss": 0.2607, "step": 15215 }, { "epoch": 2.1818181818181817, "grad_norm": 0.2789333164691925, "learning_rate": 2.0998801949685334e-06, "loss": 0.2647, "step": 15216 }, { "epoch": 2.181961571551477, "grad_norm": 0.2685316205024719, "learning_rate": 2.099200661401733e-06, "loss": 0.2696, "step": 15217 }, { "epoch": 2.182104961284772, "grad_norm": 0.27259284257888794, "learning_rate": 2.0985212085887236e-06, "loss": 0.2817, "step": 15218 }, { "epoch": 2.182248351018067, "grad_norm": 0.28199440240859985, "learning_rate": 2.0978418365484206e-06, "loss": 0.2722, "step": 15219 }, { "epoch": 2.182391740751362, "grad_norm": 0.2746238112449646, "learning_rate": 2.097162545299736e-06, "loss": 0.2776, "step": 15220 }, { "epoch": 2.1825351304846574, "grad_norm": 0.28385570645332336, "learning_rate": 2.0964833348615833e-06, "loss": 0.2937, "step": 15221 }, { "epoch": 2.1826785202179524, "grad_norm": 0.27477124333381653, "learning_rate": 2.095804205252866e-06, "loss": 0.2875, "step": 15222 }, { "epoch": 2.1828219099512474, "grad_norm": 0.25380024313926697, "learning_rate": 2.0951251564924923e-06, "loss": 0.2683, "step": 15223 }, { "epoch": 2.182965299684543, "grad_norm": 0.27758100628852844, "learning_rate": 2.0944461885993657e-06, "loss": 0.2669, "step": 15224 }, { "epoch": 2.1831086894178378, "grad_norm": 0.30167222023010254, "learning_rate": 2.0937673015923878e-06, "loss": 0.2917, "step": 15225 }, { "epoch": 2.1832520791511327, "grad_norm": 0.2653573453426361, "learning_rate": 2.093088495490457e-06, "loss": 0.2737, "step": 15226 }, { "epoch": 2.1833954688844277, "grad_norm": 0.26423096656799316, "learning_rate": 2.092409770312473e-06, "loss": 0.2668, "step": 15227 }, { "epoch": 2.183538858617723, "grad_norm": 0.2715412676334381, "learning_rate": 2.0917311260773263e-06, "loss": 0.2937, "step": 15228 }, { "epoch": 2.183682248351018, "grad_norm": 0.2789446711540222, "learning_rate": 2.091052562803911e-06, "loss": 0.2854, "step": 15229 }, { "epoch": 2.183825638084313, "grad_norm": 0.26674848794937134, "learning_rate": 2.0903740805111182e-06, "loss": 0.2839, "step": 15230 }, { "epoch": 2.183969027817608, "grad_norm": 0.2825891375541687, "learning_rate": 2.089695679217835e-06, "loss": 0.272, "step": 15231 }, { "epoch": 2.1841124175509035, "grad_norm": 0.2632359564304352, "learning_rate": 2.0890173589429475e-06, "loss": 0.2691, "step": 15232 }, { "epoch": 2.1842558072841984, "grad_norm": 0.29100412130355835, "learning_rate": 2.088339119705338e-06, "loss": 0.2924, "step": 15233 }, { "epoch": 2.1843991970174934, "grad_norm": 0.27919816970825195, "learning_rate": 2.087660961523889e-06, "loss": 0.2695, "step": 15234 }, { "epoch": 2.184542586750789, "grad_norm": 0.2801348567008972, "learning_rate": 2.0869828844174787e-06, "loss": 0.2642, "step": 15235 }, { "epoch": 2.184685976484084, "grad_norm": 0.28532320261001587, "learning_rate": 2.086304888404984e-06, "loss": 0.3021, "step": 15236 }, { "epoch": 2.1848293662173788, "grad_norm": 0.276326060295105, "learning_rate": 2.0856269735052788e-06, "loss": 0.2832, "step": 15237 }, { "epoch": 2.1849727559506738, "grad_norm": 0.270907461643219, "learning_rate": 2.0849491397372374e-06, "loss": 0.3021, "step": 15238 }, { "epoch": 2.185116145683969, "grad_norm": 0.2707526385784149, "learning_rate": 2.084271387119726e-06, "loss": 0.2733, "step": 15239 }, { "epoch": 2.185259535417264, "grad_norm": 0.28272590041160583, "learning_rate": 2.083593715671614e-06, "loss": 0.2752, "step": 15240 }, { "epoch": 2.185402925150559, "grad_norm": 0.2762306034564972, "learning_rate": 2.082916125411767e-06, "loss": 0.3079, "step": 15241 }, { "epoch": 2.1855463148838545, "grad_norm": 0.27787330746650696, "learning_rate": 2.0822386163590467e-06, "loss": 0.287, "step": 15242 }, { "epoch": 2.1856897046171495, "grad_norm": 0.27733951807022095, "learning_rate": 2.0815611885323178e-06, "loss": 0.2934, "step": 15243 }, { "epoch": 2.1858330943504445, "grad_norm": 0.3022250533103943, "learning_rate": 2.080883841950434e-06, "loss": 0.3021, "step": 15244 }, { "epoch": 2.1859764840837395, "grad_norm": 0.28067898750305176, "learning_rate": 2.080206576632253e-06, "loss": 0.2708, "step": 15245 }, { "epoch": 2.186119873817035, "grad_norm": 0.2617053985595703, "learning_rate": 2.07952939259663e-06, "loss": 0.283, "step": 15246 }, { "epoch": 2.18626326355033, "grad_norm": 0.2697545886039734, "learning_rate": 2.0788522898624165e-06, "loss": 0.2853, "step": 15247 }, { "epoch": 2.186406653283625, "grad_norm": 0.2716647982597351, "learning_rate": 2.0781752684484607e-06, "loss": 0.2784, "step": 15248 }, { "epoch": 2.18655004301692, "grad_norm": 0.28592029213905334, "learning_rate": 2.0774983283736117e-06, "loss": 0.2945, "step": 15249 }, { "epoch": 2.186693432750215, "grad_norm": 0.26624539494514465, "learning_rate": 2.076821469656713e-06, "loss": 0.2757, "step": 15250 }, { "epoch": 2.18683682248351, "grad_norm": 0.2596072256565094, "learning_rate": 2.0761446923166085e-06, "loss": 0.2847, "step": 15251 }, { "epoch": 2.186980212216805, "grad_norm": 0.2836114764213562, "learning_rate": 2.0754679963721374e-06, "loss": 0.2694, "step": 15252 }, { "epoch": 2.1871236019501, "grad_norm": 0.2660558223724365, "learning_rate": 2.0747913818421388e-06, "loss": 0.2977, "step": 15253 }, { "epoch": 2.1872669916833956, "grad_norm": 0.2701374590396881, "learning_rate": 2.07411484874545e-06, "loss": 0.2906, "step": 15254 }, { "epoch": 2.1874103814166905, "grad_norm": 0.2854791581630707, "learning_rate": 2.0734383971009013e-06, "loss": 0.2904, "step": 15255 }, { "epoch": 2.1875537711499855, "grad_norm": 0.27721238136291504, "learning_rate": 2.0727620269273257e-06, "loss": 0.2992, "step": 15256 }, { "epoch": 2.187697160883281, "grad_norm": 0.27143970131874084, "learning_rate": 2.0720857382435522e-06, "loss": 0.2629, "step": 15257 }, { "epoch": 2.187840550616576, "grad_norm": 0.27977418899536133, "learning_rate": 2.071409531068408e-06, "loss": 0.2947, "step": 15258 }, { "epoch": 2.187983940349871, "grad_norm": 0.2804434895515442, "learning_rate": 2.070733405420719e-06, "loss": 0.2792, "step": 15259 }, { "epoch": 2.188127330083166, "grad_norm": 0.26688241958618164, "learning_rate": 2.0700573613193044e-06, "loss": 0.282, "step": 15260 }, { "epoch": 2.1882707198164613, "grad_norm": 0.26416268944740295, "learning_rate": 2.069381398782986e-06, "loss": 0.2714, "step": 15261 }, { "epoch": 2.1884141095497562, "grad_norm": 0.2863828241825104, "learning_rate": 2.0687055178305804e-06, "loss": 0.2729, "step": 15262 }, { "epoch": 2.188557499283051, "grad_norm": 0.266678124666214, "learning_rate": 2.0680297184809046e-06, "loss": 0.2828, "step": 15263 }, { "epoch": 2.1887008890163466, "grad_norm": 0.2713547348976135, "learning_rate": 2.0673540007527716e-06, "loss": 0.2832, "step": 15264 }, { "epoch": 2.1888442787496416, "grad_norm": 0.286929190158844, "learning_rate": 2.0666783646649928e-06, "loss": 0.2805, "step": 15265 }, { "epoch": 2.1889876684829366, "grad_norm": 0.294116735458374, "learning_rate": 2.0660028102363747e-06, "loss": 0.2874, "step": 15266 }, { "epoch": 2.1891310582162316, "grad_norm": 0.28095772862434387, "learning_rate": 2.065327337485725e-06, "loss": 0.2942, "step": 15267 }, { "epoch": 2.189274447949527, "grad_norm": 0.2654753625392914, "learning_rate": 2.064651946431848e-06, "loss": 0.2793, "step": 15268 }, { "epoch": 2.189417837682822, "grad_norm": 0.26188433170318604, "learning_rate": 2.063976637093546e-06, "loss": 0.2895, "step": 15269 }, { "epoch": 2.189561227416117, "grad_norm": 0.28386062383651733, "learning_rate": 2.063301409489617e-06, "loss": 0.2871, "step": 15270 }, { "epoch": 2.1897046171494123, "grad_norm": 0.2748781442642212, "learning_rate": 2.0626262636388594e-06, "loss": 0.2885, "step": 15271 }, { "epoch": 2.1898480068827073, "grad_norm": 0.2694520354270935, "learning_rate": 2.061951199560069e-06, "loss": 0.2807, "step": 15272 }, { "epoch": 2.1899913966160023, "grad_norm": 0.2899956703186035, "learning_rate": 2.061276217272036e-06, "loss": 0.2652, "step": 15273 }, { "epoch": 2.1901347863492973, "grad_norm": 0.2638397514820099, "learning_rate": 2.060601316793554e-06, "loss": 0.267, "step": 15274 }, { "epoch": 2.1902781760825927, "grad_norm": 0.2792268991470337, "learning_rate": 2.059926498143411e-06, "loss": 0.2941, "step": 15275 }, { "epoch": 2.1904215658158877, "grad_norm": 0.2751989960670471, "learning_rate": 2.0592517613403897e-06, "loss": 0.2788, "step": 15276 }, { "epoch": 2.1905649555491826, "grad_norm": 0.2607067823410034, "learning_rate": 2.0585771064032756e-06, "loss": 0.2733, "step": 15277 }, { "epoch": 2.1907083452824776, "grad_norm": 0.2916031777858734, "learning_rate": 2.05790253335085e-06, "loss": 0.2876, "step": 15278 }, { "epoch": 2.190851735015773, "grad_norm": 0.2989314794540405, "learning_rate": 2.0572280422018927e-06, "loss": 0.2878, "step": 15279 }, { "epoch": 2.190995124749068, "grad_norm": 0.2921896278858185, "learning_rate": 2.0565536329751796e-06, "loss": 0.2981, "step": 15280 }, { "epoch": 2.191138514482363, "grad_norm": 0.27475491166114807, "learning_rate": 2.0558793056894864e-06, "loss": 0.2894, "step": 15281 }, { "epoch": 2.191281904215658, "grad_norm": 0.29182323813438416, "learning_rate": 2.0552050603635838e-06, "loss": 0.2959, "step": 15282 }, { "epoch": 2.1914252939489534, "grad_norm": 0.270033597946167, "learning_rate": 2.054530897016242e-06, "loss": 0.2837, "step": 15283 }, { "epoch": 2.1915686836822483, "grad_norm": 0.27849239110946655, "learning_rate": 2.0538568156662288e-06, "loss": 0.2773, "step": 15284 }, { "epoch": 2.1917120734155433, "grad_norm": 0.2728039622306824, "learning_rate": 2.0531828163323105e-06, "loss": 0.2821, "step": 15285 }, { "epoch": 2.1918554631488387, "grad_norm": 0.2606741487979889, "learning_rate": 2.0525088990332486e-06, "loss": 0.2948, "step": 15286 }, { "epoch": 2.1919988528821337, "grad_norm": 0.27383747696876526, "learning_rate": 2.0518350637878052e-06, "loss": 0.3009, "step": 15287 }, { "epoch": 2.1921422426154287, "grad_norm": 0.2713768482208252, "learning_rate": 2.051161310614739e-06, "loss": 0.2713, "step": 15288 }, { "epoch": 2.1922856323487236, "grad_norm": 0.291625052690506, "learning_rate": 2.0504876395328047e-06, "loss": 0.3007, "step": 15289 }, { "epoch": 2.192429022082019, "grad_norm": 0.2743871808052063, "learning_rate": 2.0498140505607585e-06, "loss": 0.2899, "step": 15290 }, { "epoch": 2.192572411815314, "grad_norm": 0.28026190400123596, "learning_rate": 2.04914054371735e-06, "loss": 0.2688, "step": 15291 }, { "epoch": 2.192715801548609, "grad_norm": 0.2755705416202545, "learning_rate": 2.048467119021332e-06, "loss": 0.278, "step": 15292 }, { "epoch": 2.1928591912819044, "grad_norm": 0.27408885955810547, "learning_rate": 2.047793776491447e-06, "loss": 0.2978, "step": 15293 }, { "epoch": 2.1930025810151994, "grad_norm": 0.28390008211135864, "learning_rate": 2.047120516146442e-06, "loss": 0.2592, "step": 15294 }, { "epoch": 2.1931459707484944, "grad_norm": 0.2840160131454468, "learning_rate": 2.0464473380050593e-06, "loss": 0.2867, "step": 15295 }, { "epoch": 2.1932893604817894, "grad_norm": 0.2798948585987091, "learning_rate": 2.0457742420860394e-06, "loss": 0.2653, "step": 15296 }, { "epoch": 2.1934327502150848, "grad_norm": 0.27733513712882996, "learning_rate": 2.0451012284081216e-06, "loss": 0.281, "step": 15297 }, { "epoch": 2.1935761399483797, "grad_norm": 0.29786938428878784, "learning_rate": 2.044428296990039e-06, "loss": 0.2889, "step": 15298 }, { "epoch": 2.1937195296816747, "grad_norm": 0.2930350601673126, "learning_rate": 2.043755447850525e-06, "loss": 0.289, "step": 15299 }, { "epoch": 2.1938629194149697, "grad_norm": 0.26833847165107727, "learning_rate": 2.0430826810083125e-06, "loss": 0.2758, "step": 15300 }, { "epoch": 2.194006309148265, "grad_norm": 0.27934935688972473, "learning_rate": 2.04240999648213e-06, "loss": 0.2867, "step": 15301 }, { "epoch": 2.19414969888156, "grad_norm": 0.2803035080432892, "learning_rate": 2.041737394290705e-06, "loss": 0.2828, "step": 15302 }, { "epoch": 2.194293088614855, "grad_norm": 0.2660321295261383, "learning_rate": 2.041064874452758e-06, "loss": 0.2807, "step": 15303 }, { "epoch": 2.1944364783481505, "grad_norm": 0.26241379976272583, "learning_rate": 2.0403924369870116e-06, "loss": 0.2722, "step": 15304 }, { "epoch": 2.1945798680814455, "grad_norm": 0.2798965573310852, "learning_rate": 2.0397200819121893e-06, "loss": 0.2873, "step": 15305 }, { "epoch": 2.1947232578147404, "grad_norm": 0.27603834867477417, "learning_rate": 2.039047809247006e-06, "loss": 0.3064, "step": 15306 }, { "epoch": 2.1948666475480354, "grad_norm": 0.29302966594696045, "learning_rate": 2.0383756190101767e-06, "loss": 0.2893, "step": 15307 }, { "epoch": 2.195010037281331, "grad_norm": 0.2605198621749878, "learning_rate": 2.037703511220416e-06, "loss": 0.2858, "step": 15308 }, { "epoch": 2.195153427014626, "grad_norm": 0.2904021441936493, "learning_rate": 2.0370314858964305e-06, "loss": 0.2901, "step": 15309 }, { "epoch": 2.1952968167479208, "grad_norm": 0.2691994905471802, "learning_rate": 2.0363595430569316e-06, "loss": 0.2903, "step": 15310 }, { "epoch": 2.1954402064812157, "grad_norm": 0.29148730635643005, "learning_rate": 2.0356876827206233e-06, "loss": 0.296, "step": 15311 }, { "epoch": 2.195583596214511, "grad_norm": 0.26467078924179077, "learning_rate": 2.0350159049062106e-06, "loss": 0.2849, "step": 15312 }, { "epoch": 2.195726985947806, "grad_norm": 0.2732304632663727, "learning_rate": 2.0343442096323956e-06, "loss": 0.3034, "step": 15313 }, { "epoch": 2.195870375681101, "grad_norm": 0.27123987674713135, "learning_rate": 2.033672596917874e-06, "loss": 0.2785, "step": 15314 }, { "epoch": 2.1960137654143965, "grad_norm": 0.2536970376968384, "learning_rate": 2.033001066781344e-06, "loss": 0.2783, "step": 15315 }, { "epoch": 2.1961571551476915, "grad_norm": 0.2842646539211273, "learning_rate": 2.0323296192415005e-06, "loss": 0.2831, "step": 15316 }, { "epoch": 2.1963005448809865, "grad_norm": 0.27207887172698975, "learning_rate": 2.031658254317035e-06, "loss": 0.2803, "step": 15317 }, { "epoch": 2.1964439346142814, "grad_norm": 0.2788197100162506, "learning_rate": 2.0309869720266374e-06, "loss": 0.2845, "step": 15318 }, { "epoch": 2.196587324347577, "grad_norm": 0.28398364782333374, "learning_rate": 2.0303157723889977e-06, "loss": 0.2688, "step": 15319 }, { "epoch": 2.196730714080872, "grad_norm": 0.28703489899635315, "learning_rate": 2.029644655422796e-06, "loss": 0.2825, "step": 15320 }, { "epoch": 2.196874103814167, "grad_norm": 0.2680131793022156, "learning_rate": 2.0289736211467187e-06, "loss": 0.2943, "step": 15321 }, { "epoch": 2.1970174935474622, "grad_norm": 0.285459041595459, "learning_rate": 2.028302669579445e-06, "loss": 0.2708, "step": 15322 }, { "epoch": 2.197160883280757, "grad_norm": 0.29195937514305115, "learning_rate": 2.0276318007396538e-06, "loss": 0.2809, "step": 15323 }, { "epoch": 2.197304273014052, "grad_norm": 0.27079030871391296, "learning_rate": 2.026961014646021e-06, "loss": 0.2991, "step": 15324 }, { "epoch": 2.197447662747347, "grad_norm": 0.25965696573257446, "learning_rate": 2.02629031131722e-06, "loss": 0.2864, "step": 15325 }, { "epoch": 2.1975910524806426, "grad_norm": 0.2669088840484619, "learning_rate": 2.025619690771922e-06, "loss": 0.2766, "step": 15326 }, { "epoch": 2.1977344422139375, "grad_norm": 0.2784617841243744, "learning_rate": 2.024949153028797e-06, "loss": 0.2818, "step": 15327 }, { "epoch": 2.1978778319472325, "grad_norm": 0.26298972964286804, "learning_rate": 2.024278698106511e-06, "loss": 0.2696, "step": 15328 }, { "epoch": 2.1980212216805275, "grad_norm": 0.28083309531211853, "learning_rate": 2.02360832602373e-06, "loss": 0.2685, "step": 15329 }, { "epoch": 2.198164611413823, "grad_norm": 0.2747686803340912, "learning_rate": 2.022938036799113e-06, "loss": 0.2872, "step": 15330 }, { "epoch": 2.198308001147118, "grad_norm": 0.27198439836502075, "learning_rate": 2.0222678304513217e-06, "loss": 0.2834, "step": 15331 }, { "epoch": 2.198451390880413, "grad_norm": 0.2660009264945984, "learning_rate": 2.021597706999013e-06, "loss": 0.2776, "step": 15332 }, { "epoch": 2.198594780613708, "grad_norm": 0.28420671820640564, "learning_rate": 2.020927666460843e-06, "loss": 0.2914, "step": 15333 }, { "epoch": 2.1987381703470033, "grad_norm": 0.2802938222885132, "learning_rate": 2.0202577088554637e-06, "loss": 0.2796, "step": 15334 }, { "epoch": 2.1988815600802982, "grad_norm": 0.26716065406799316, "learning_rate": 2.019587834201528e-06, "loss": 0.296, "step": 15335 }, { "epoch": 2.199024949813593, "grad_norm": 0.2729973793029785, "learning_rate": 2.01891804251768e-06, "loss": 0.2707, "step": 15336 }, { "epoch": 2.1991683395468886, "grad_norm": 0.26764512062072754, "learning_rate": 2.0182483338225684e-06, "loss": 0.2865, "step": 15337 }, { "epoch": 2.1993117292801836, "grad_norm": 0.278046578168869, "learning_rate": 2.0175787081348364e-06, "loss": 0.2834, "step": 15338 }, { "epoch": 2.1994551190134786, "grad_norm": 0.28340959548950195, "learning_rate": 2.0169091654731253e-06, "loss": 0.2687, "step": 15339 }, { "epoch": 2.1995985087467735, "grad_norm": 0.2815527319908142, "learning_rate": 2.016239705856074e-06, "loss": 0.2724, "step": 15340 }, { "epoch": 2.199741898480069, "grad_norm": 0.2791328430175781, "learning_rate": 2.0155703293023197e-06, "loss": 0.2787, "step": 15341 }, { "epoch": 2.199885288213364, "grad_norm": 0.26407963037490845, "learning_rate": 2.0149010358304968e-06, "loss": 0.2693, "step": 15342 }, { "epoch": 2.200028677946659, "grad_norm": 0.280978798866272, "learning_rate": 2.0142318254592365e-06, "loss": 0.3057, "step": 15343 }, { "epoch": 2.2001720676799543, "grad_norm": 0.2919079661369324, "learning_rate": 2.0135626982071695e-06, "loss": 0.2881, "step": 15344 }, { "epoch": 2.2003154574132493, "grad_norm": 0.2581195831298828, "learning_rate": 2.0128936540929224e-06, "loss": 0.2755, "step": 15345 }, { "epoch": 2.2004588471465443, "grad_norm": 0.29627031087875366, "learning_rate": 2.0122246931351234e-06, "loss": 0.2888, "step": 15346 }, { "epoch": 2.2006022368798392, "grad_norm": 0.2704452574253082, "learning_rate": 2.0115558153523905e-06, "loss": 0.2932, "step": 15347 }, { "epoch": 2.2007456266131347, "grad_norm": 0.26217252016067505, "learning_rate": 2.0108870207633474e-06, "loss": 0.2696, "step": 15348 }, { "epoch": 2.2008890163464296, "grad_norm": 0.27481809258461, "learning_rate": 2.0102183093866113e-06, "loss": 0.291, "step": 15349 }, { "epoch": 2.2010324060797246, "grad_norm": 0.27873694896698, "learning_rate": 2.0095496812407976e-06, "loss": 0.2739, "step": 15350 }, { "epoch": 2.2011757958130196, "grad_norm": 0.27481457591056824, "learning_rate": 2.008881136344523e-06, "loss": 0.2812, "step": 15351 }, { "epoch": 2.201319185546315, "grad_norm": 0.27558404207229614, "learning_rate": 2.0082126747163948e-06, "loss": 0.2931, "step": 15352 }, { "epoch": 2.20146257527961, "grad_norm": 0.2616458833217621, "learning_rate": 2.007544296375023e-06, "loss": 0.2968, "step": 15353 }, { "epoch": 2.201605965012905, "grad_norm": 0.28701600432395935, "learning_rate": 2.0068760013390155e-06, "loss": 0.3089, "step": 15354 }, { "epoch": 2.2017493547462004, "grad_norm": 0.2822836935520172, "learning_rate": 2.0062077896269748e-06, "loss": 0.283, "step": 15355 }, { "epoch": 2.2018927444794953, "grad_norm": 0.26270392537117004, "learning_rate": 2.0055396612575064e-06, "loss": 0.27, "step": 15356 }, { "epoch": 2.2020361342127903, "grad_norm": 0.2835598289966583, "learning_rate": 2.0048716162492056e-06, "loss": 0.2916, "step": 15357 }, { "epoch": 2.2021795239460853, "grad_norm": 0.2773594856262207, "learning_rate": 2.0042036546206716e-06, "loss": 0.2886, "step": 15358 }, { "epoch": 2.2023229136793807, "grad_norm": 0.2607223391532898, "learning_rate": 2.0035357763904993e-06, "loss": 0.2811, "step": 15359 }, { "epoch": 2.2024663034126757, "grad_norm": 0.2821847200393677, "learning_rate": 2.00286798157728e-06, "loss": 0.2848, "step": 15360 }, { "epoch": 2.2026096931459707, "grad_norm": 0.26960715651512146, "learning_rate": 2.0022002701996067e-06, "loss": 0.2753, "step": 15361 }, { "epoch": 2.2027530828792656, "grad_norm": 0.26988211274147034, "learning_rate": 2.001532642276069e-06, "loss": 0.2827, "step": 15362 }, { "epoch": 2.202896472612561, "grad_norm": 0.27505332231521606, "learning_rate": 2.000865097825248e-06, "loss": 0.2898, "step": 15363 }, { "epoch": 2.203039862345856, "grad_norm": 0.2702901363372803, "learning_rate": 2.0001976368657293e-06, "loss": 0.2916, "step": 15364 }, { "epoch": 2.203183252079151, "grad_norm": 0.2571544945240021, "learning_rate": 1.9995302594160938e-06, "loss": 0.2717, "step": 15365 }, { "epoch": 2.2033266418124464, "grad_norm": 0.27804285287857056, "learning_rate": 1.99886296549492e-06, "loss": 0.2799, "step": 15366 }, { "epoch": 2.2034700315457414, "grad_norm": 0.2788774371147156, "learning_rate": 1.9981957551207866e-06, "loss": 0.2705, "step": 15367 }, { "epoch": 2.2036134212790364, "grad_norm": 0.2773163616657257, "learning_rate": 1.9975286283122642e-06, "loss": 0.2987, "step": 15368 }, { "epoch": 2.2037568110123313, "grad_norm": 0.26191428303718567, "learning_rate": 1.996861585087926e-06, "loss": 0.2832, "step": 15369 }, { "epoch": 2.2039002007456268, "grad_norm": 0.26940542459487915, "learning_rate": 1.996194625466341e-06, "loss": 0.2855, "step": 15370 }, { "epoch": 2.2040435904789217, "grad_norm": 0.29375508427619934, "learning_rate": 1.9955277494660773e-06, "loss": 0.2825, "step": 15371 }, { "epoch": 2.2041869802122167, "grad_norm": 0.27928227186203003, "learning_rate": 1.9948609571057e-06, "loss": 0.2868, "step": 15372 }, { "epoch": 2.204330369945512, "grad_norm": 0.2848093509674072, "learning_rate": 1.994194248403772e-06, "loss": 0.2633, "step": 15373 }, { "epoch": 2.204473759678807, "grad_norm": 0.26445817947387695, "learning_rate": 1.9935276233788503e-06, "loss": 0.2567, "step": 15374 }, { "epoch": 2.204617149412102, "grad_norm": 0.2590903341770172, "learning_rate": 1.9928610820494948e-06, "loss": 0.2792, "step": 15375 }, { "epoch": 2.204760539145397, "grad_norm": 0.26429444551467896, "learning_rate": 1.9921946244342612e-06, "loss": 0.2737, "step": 15376 }, { "epoch": 2.2049039288786925, "grad_norm": 0.28902241587638855, "learning_rate": 1.9915282505517023e-06, "loss": 0.271, "step": 15377 }, { "epoch": 2.2050473186119874, "grad_norm": 0.2718484103679657, "learning_rate": 1.9908619604203693e-06, "loss": 0.298, "step": 15378 }, { "epoch": 2.2051907083452824, "grad_norm": 0.29496487975120544, "learning_rate": 1.99019575405881e-06, "loss": 0.2899, "step": 15379 }, { "epoch": 2.2053340980785774, "grad_norm": 0.2957319915294647, "learning_rate": 1.989529631485571e-06, "loss": 0.2971, "step": 15380 }, { "epoch": 2.205477487811873, "grad_norm": 0.2619832158088684, "learning_rate": 1.9888635927191964e-06, "loss": 0.2717, "step": 15381 }, { "epoch": 2.205620877545168, "grad_norm": 0.2742781341075897, "learning_rate": 1.988197637778227e-06, "loss": 0.2806, "step": 15382 }, { "epoch": 2.2057642672784628, "grad_norm": 0.25642314553260803, "learning_rate": 1.9875317666812045e-06, "loss": 0.2856, "step": 15383 }, { "epoch": 2.2059076570117577, "grad_norm": 0.27357304096221924, "learning_rate": 1.9868659794466616e-06, "loss": 0.2554, "step": 15384 }, { "epoch": 2.206051046745053, "grad_norm": 0.278475821018219, "learning_rate": 1.986200276093135e-06, "loss": 0.2789, "step": 15385 }, { "epoch": 2.206194436478348, "grad_norm": 0.25875282287597656, "learning_rate": 1.985534656639156e-06, "loss": 0.2806, "step": 15386 }, { "epoch": 2.206337826211643, "grad_norm": 0.28550460934638977, "learning_rate": 1.9848691211032554e-06, "loss": 0.2786, "step": 15387 }, { "epoch": 2.2064812159449385, "grad_norm": 0.27158012986183167, "learning_rate": 1.98420366950396e-06, "loss": 0.2762, "step": 15388 }, { "epoch": 2.2066246056782335, "grad_norm": 0.2936292290687561, "learning_rate": 1.9835383018597974e-06, "loss": 0.2717, "step": 15389 }, { "epoch": 2.2067679954115285, "grad_norm": 0.26805517077445984, "learning_rate": 1.9828730181892867e-06, "loss": 0.2929, "step": 15390 }, { "epoch": 2.2069113851448234, "grad_norm": 0.30001530051231384, "learning_rate": 1.9822078185109502e-06, "loss": 0.2901, "step": 15391 }, { "epoch": 2.207054774878119, "grad_norm": 0.2767181992530823, "learning_rate": 1.9815427028433053e-06, "loss": 0.2879, "step": 15392 }, { "epoch": 2.207198164611414, "grad_norm": 0.27721869945526123, "learning_rate": 1.9808776712048684e-06, "loss": 0.2794, "step": 15393 }, { "epoch": 2.207341554344709, "grad_norm": 0.25863760709762573, "learning_rate": 1.980212723614153e-06, "loss": 0.2768, "step": 15394 }, { "epoch": 2.207484944078004, "grad_norm": 0.2687791585922241, "learning_rate": 1.9795478600896693e-06, "loss": 0.282, "step": 15395 }, { "epoch": 2.207628333811299, "grad_norm": 0.2646936774253845, "learning_rate": 1.978883080649928e-06, "loss": 0.272, "step": 15396 }, { "epoch": 2.207771723544594, "grad_norm": 0.2778870761394501, "learning_rate": 1.9782183853134336e-06, "loss": 0.2843, "step": 15397 }, { "epoch": 2.207915113277889, "grad_norm": 0.26717710494995117, "learning_rate": 1.977553774098691e-06, "loss": 0.2565, "step": 15398 }, { "epoch": 2.2080585030111846, "grad_norm": 0.2656046748161316, "learning_rate": 1.9768892470242024e-06, "loss": 0.2747, "step": 15399 }, { "epoch": 2.2082018927444795, "grad_norm": 0.28216129541397095, "learning_rate": 1.976224804108468e-06, "loss": 0.2934, "step": 15400 }, { "epoch": 2.2083452824777745, "grad_norm": 0.26227661967277527, "learning_rate": 1.9755604453699825e-06, "loss": 0.2834, "step": 15401 }, { "epoch": 2.2084886722110695, "grad_norm": 0.278389036655426, "learning_rate": 1.9748961708272414e-06, "loss": 0.2721, "step": 15402 }, { "epoch": 2.208632061944365, "grad_norm": 0.27555349469184875, "learning_rate": 1.974231980498737e-06, "loss": 0.2918, "step": 15403 }, { "epoch": 2.20877545167766, "grad_norm": 0.2628471553325653, "learning_rate": 1.97356787440296e-06, "loss": 0.2805, "step": 15404 }, { "epoch": 2.208918841410955, "grad_norm": 0.2617151141166687, "learning_rate": 1.9729038525584e-06, "loss": 0.2732, "step": 15405 }, { "epoch": 2.2090622311442503, "grad_norm": 0.2708963453769684, "learning_rate": 1.972239914983538e-06, "loss": 0.2869, "step": 15406 }, { "epoch": 2.2092056208775452, "grad_norm": 0.28612104058265686, "learning_rate": 1.9715760616968593e-06, "loss": 0.2781, "step": 15407 }, { "epoch": 2.20934901061084, "grad_norm": 0.2738327980041504, "learning_rate": 1.9709122927168446e-06, "loss": 0.2819, "step": 15408 }, { "epoch": 2.209492400344135, "grad_norm": 0.2904842495918274, "learning_rate": 1.9702486080619722e-06, "loss": 0.2884, "step": 15409 }, { "epoch": 2.2096357900774306, "grad_norm": 0.2712312936782837, "learning_rate": 1.9695850077507195e-06, "loss": 0.2763, "step": 15410 }, { "epoch": 2.2097791798107256, "grad_norm": 0.2697753310203552, "learning_rate": 1.9689214918015566e-06, "loss": 0.2727, "step": 15411 }, { "epoch": 2.2099225695440206, "grad_norm": 0.26584213972091675, "learning_rate": 1.968258060232957e-06, "loss": 0.2715, "step": 15412 }, { "epoch": 2.2100659592773155, "grad_norm": 0.27517232298851013, "learning_rate": 1.9675947130633888e-06, "loss": 0.2833, "step": 15413 }, { "epoch": 2.210209349010611, "grad_norm": 0.2628076374530792, "learning_rate": 1.9669314503113195e-06, "loss": 0.2685, "step": 15414 }, { "epoch": 2.210352738743906, "grad_norm": 0.2741786241531372, "learning_rate": 1.9662682719952132e-06, "loss": 0.2916, "step": 15415 }, { "epoch": 2.210496128477201, "grad_norm": 0.2748537063598633, "learning_rate": 1.965605178133531e-06, "loss": 0.2843, "step": 15416 }, { "epoch": 2.2106395182104963, "grad_norm": 0.29362523555755615, "learning_rate": 1.964942168744733e-06, "loss": 0.2909, "step": 15417 }, { "epoch": 2.2107829079437913, "grad_norm": 0.24910125136375427, "learning_rate": 1.964279243847276e-06, "loss": 0.2769, "step": 15418 }, { "epoch": 2.2109262976770863, "grad_norm": 0.2668837904930115, "learning_rate": 1.963616403459615e-06, "loss": 0.2749, "step": 15419 }, { "epoch": 2.2110696874103812, "grad_norm": 0.29308345913887024, "learning_rate": 1.962953647600203e-06, "loss": 0.2737, "step": 15420 }, { "epoch": 2.2112130771436767, "grad_norm": 0.27252596616744995, "learning_rate": 1.9622909762874907e-06, "loss": 0.2904, "step": 15421 }, { "epoch": 2.2113564668769716, "grad_norm": 0.28232577443122864, "learning_rate": 1.9616283895399233e-06, "loss": 0.2761, "step": 15422 }, { "epoch": 2.2114998566102666, "grad_norm": 0.27332454919815063, "learning_rate": 1.960965887375948e-06, "loss": 0.2776, "step": 15423 }, { "epoch": 2.211643246343562, "grad_norm": 0.2633780837059021, "learning_rate": 1.960303469814007e-06, "loss": 0.2716, "step": 15424 }, { "epoch": 2.211786636076857, "grad_norm": 0.27232271432876587, "learning_rate": 1.9596411368725417e-06, "loss": 0.2912, "step": 15425 }, { "epoch": 2.211930025810152, "grad_norm": 0.2849941849708557, "learning_rate": 1.95897888856999e-06, "loss": 0.2842, "step": 15426 }, { "epoch": 2.212073415543447, "grad_norm": 0.28122586011886597, "learning_rate": 1.95831672492479e-06, "loss": 0.2984, "step": 15427 }, { "epoch": 2.2122168052767424, "grad_norm": 0.27788686752319336, "learning_rate": 1.957654645955372e-06, "loss": 0.2878, "step": 15428 }, { "epoch": 2.2123601950100373, "grad_norm": 0.2525656819343567, "learning_rate": 1.956992651680168e-06, "loss": 0.2847, "step": 15429 }, { "epoch": 2.2125035847433323, "grad_norm": 0.2525494694709778, "learning_rate": 1.956330742117608e-06, "loss": 0.2858, "step": 15430 }, { "epoch": 2.2126469744766273, "grad_norm": 0.2626310884952545, "learning_rate": 1.9556689172861175e-06, "loss": 0.2606, "step": 15431 }, { "epoch": 2.2127903642099227, "grad_norm": 0.2840140759944916, "learning_rate": 1.955007177204122e-06, "loss": 0.3002, "step": 15432 }, { "epoch": 2.2129337539432177, "grad_norm": 0.2927526533603668, "learning_rate": 1.9543455218900427e-06, "loss": 0.2742, "step": 15433 }, { "epoch": 2.2130771436765126, "grad_norm": 0.2824375629425049, "learning_rate": 1.9536839513622984e-06, "loss": 0.2711, "step": 15434 }, { "epoch": 2.2132205334098076, "grad_norm": 0.2872316241264343, "learning_rate": 1.9530224656393073e-06, "loss": 0.2944, "step": 15435 }, { "epoch": 2.213363923143103, "grad_norm": 0.2808710038661957, "learning_rate": 1.952361064739483e-06, "loss": 0.2862, "step": 15436 }, { "epoch": 2.213507312876398, "grad_norm": 0.2915230989456177, "learning_rate": 1.9516997486812405e-06, "loss": 0.2989, "step": 15437 }, { "epoch": 2.213650702609693, "grad_norm": 0.27413830161094666, "learning_rate": 1.951038517482986e-06, "loss": 0.2862, "step": 15438 }, { "epoch": 2.2137940923429884, "grad_norm": 0.2760787606239319, "learning_rate": 1.950377371163129e-06, "loss": 0.2689, "step": 15439 }, { "epoch": 2.2139374820762834, "grad_norm": 0.2647106945514679, "learning_rate": 1.949716309740075e-06, "loss": 0.2782, "step": 15440 }, { "epoch": 2.2140808718095784, "grad_norm": 0.29961085319519043, "learning_rate": 1.9490553332322265e-06, "loss": 0.2921, "step": 15441 }, { "epoch": 2.2142242615428733, "grad_norm": 0.2733738422393799, "learning_rate": 1.948394441657985e-06, "loss": 0.2912, "step": 15442 }, { "epoch": 2.2143676512761687, "grad_norm": 0.2947736382484436, "learning_rate": 1.947733635035749e-06, "loss": 0.3012, "step": 15443 }, { "epoch": 2.2145110410094637, "grad_norm": 0.28463631868362427, "learning_rate": 1.9470729133839116e-06, "loss": 0.2727, "step": 15444 }, { "epoch": 2.2146544307427587, "grad_norm": 0.2812053859233856, "learning_rate": 1.946412276720868e-06, "loss": 0.2705, "step": 15445 }, { "epoch": 2.214797820476054, "grad_norm": 0.2825659215450287, "learning_rate": 1.94575172506501e-06, "loss": 0.2742, "step": 15446 }, { "epoch": 2.214941210209349, "grad_norm": 0.2807465195655823, "learning_rate": 1.945091258434725e-06, "loss": 0.2861, "step": 15447 }, { "epoch": 2.215084599942644, "grad_norm": 0.2767361104488373, "learning_rate": 1.9444308768484015e-06, "loss": 0.2729, "step": 15448 }, { "epoch": 2.215227989675939, "grad_norm": 0.28012168407440186, "learning_rate": 1.9437705803244195e-06, "loss": 0.2885, "step": 15449 }, { "epoch": 2.2153713794092345, "grad_norm": 0.2887907922267914, "learning_rate": 1.943110368881164e-06, "loss": 0.2723, "step": 15450 }, { "epoch": 2.2155147691425294, "grad_norm": 0.2745788097381592, "learning_rate": 1.942450242537014e-06, "loss": 0.2698, "step": 15451 }, { "epoch": 2.2156581588758244, "grad_norm": 0.2820829153060913, "learning_rate": 1.941790201310346e-06, "loss": 0.2648, "step": 15452 }, { "epoch": 2.21580154860912, "grad_norm": 0.2694944143295288, "learning_rate": 1.9411302452195343e-06, "loss": 0.2786, "step": 15453 }, { "epoch": 2.215944938342415, "grad_norm": 0.2779299020767212, "learning_rate": 1.9404703742829524e-06, "loss": 0.2877, "step": 15454 }, { "epoch": 2.2160883280757098, "grad_norm": 0.2788049578666687, "learning_rate": 1.9398105885189673e-06, "loss": 0.282, "step": 15455 }, { "epoch": 2.2162317178090047, "grad_norm": 0.27001217007637024, "learning_rate": 1.939150887945948e-06, "loss": 0.2744, "step": 15456 }, { "epoch": 2.2163751075423, "grad_norm": 0.2942190170288086, "learning_rate": 1.9384912725822596e-06, "loss": 0.2936, "step": 15457 }, { "epoch": 2.216518497275595, "grad_norm": 0.2681851387023926, "learning_rate": 1.9378317424462644e-06, "loss": 0.3042, "step": 15458 }, { "epoch": 2.21666188700889, "grad_norm": 0.2736603915691376, "learning_rate": 1.937172297556325e-06, "loss": 0.2904, "step": 15459 }, { "epoch": 2.216805276742185, "grad_norm": 0.2697143852710724, "learning_rate": 1.9365129379307954e-06, "loss": 0.2674, "step": 15460 }, { "epoch": 2.2169486664754805, "grad_norm": 0.2668646275997162, "learning_rate": 1.9358536635880337e-06, "loss": 0.2933, "step": 15461 }, { "epoch": 2.2170920562087755, "grad_norm": 0.27826759219169617, "learning_rate": 1.935194474546392e-06, "loss": 0.2965, "step": 15462 }, { "epoch": 2.2172354459420704, "grad_norm": 0.27281638979911804, "learning_rate": 1.9345353708242216e-06, "loss": 0.2669, "step": 15463 }, { "epoch": 2.2173788356753654, "grad_norm": 0.2754305899143219, "learning_rate": 1.933876352439873e-06, "loss": 0.2643, "step": 15464 }, { "epoch": 2.217522225408661, "grad_norm": 0.2765026092529297, "learning_rate": 1.933217419411688e-06, "loss": 0.2871, "step": 15465 }, { "epoch": 2.217665615141956, "grad_norm": 0.28041189908981323, "learning_rate": 1.9325585717580124e-06, "loss": 0.2909, "step": 15466 }, { "epoch": 2.217809004875251, "grad_norm": 0.25032469630241394, "learning_rate": 1.9318998094971873e-06, "loss": 0.2814, "step": 15467 }, { "epoch": 2.217952394608546, "grad_norm": 0.286927193403244, "learning_rate": 1.931241132647553e-06, "loss": 0.2733, "step": 15468 }, { "epoch": 2.218095784341841, "grad_norm": 0.27179256081581116, "learning_rate": 1.930582541227444e-06, "loss": 0.2863, "step": 15469 }, { "epoch": 2.218239174075136, "grad_norm": 0.2556409537792206, "learning_rate": 1.929924035255195e-06, "loss": 0.2888, "step": 15470 }, { "epoch": 2.218382563808431, "grad_norm": 0.2837446331977844, "learning_rate": 1.929265614749139e-06, "loss": 0.2894, "step": 15471 }, { "epoch": 2.2185259535417265, "grad_norm": 0.29316282272338867, "learning_rate": 1.9286072797276046e-06, "loss": 0.2847, "step": 15472 }, { "epoch": 2.2186693432750215, "grad_norm": 0.27608397603034973, "learning_rate": 1.927949030208918e-06, "loss": 0.2736, "step": 15473 }, { "epoch": 2.2188127330083165, "grad_norm": 0.2927865982055664, "learning_rate": 1.9272908662114057e-06, "loss": 0.2802, "step": 15474 }, { "epoch": 2.218956122741612, "grad_norm": 0.28239303827285767, "learning_rate": 1.9266327877533904e-06, "loss": 0.2897, "step": 15475 }, { "epoch": 2.219099512474907, "grad_norm": 0.2616725564002991, "learning_rate": 1.925974794853189e-06, "loss": 0.2708, "step": 15476 }, { "epoch": 2.219242902208202, "grad_norm": 0.27451592683792114, "learning_rate": 1.92531688752912e-06, "loss": 0.2689, "step": 15477 }, { "epoch": 2.219386291941497, "grad_norm": 0.29593512415885925, "learning_rate": 1.9246590657994996e-06, "loss": 0.2729, "step": 15478 }, { "epoch": 2.2195296816747923, "grad_norm": 0.27630487084388733, "learning_rate": 1.9240013296826397e-06, "loss": 0.2744, "step": 15479 }, { "epoch": 2.2196730714080872, "grad_norm": 0.2863471508026123, "learning_rate": 1.9233436791968513e-06, "loss": 0.2745, "step": 15480 }, { "epoch": 2.219816461141382, "grad_norm": 0.26524618268013, "learning_rate": 1.9226861143604443e-06, "loss": 0.2812, "step": 15481 }, { "epoch": 2.219959850874677, "grad_norm": 0.2874990701675415, "learning_rate": 1.9220286351917193e-06, "loss": 0.2854, "step": 15482 }, { "epoch": 2.2201032406079726, "grad_norm": 0.28187239170074463, "learning_rate": 1.921371241708983e-06, "loss": 0.2976, "step": 15483 }, { "epoch": 2.2202466303412676, "grad_norm": 0.31304168701171875, "learning_rate": 1.9207139339305353e-06, "loss": 0.2774, "step": 15484 }, { "epoch": 2.2203900200745625, "grad_norm": 0.27024373412132263, "learning_rate": 1.920056711874675e-06, "loss": 0.2751, "step": 15485 }, { "epoch": 2.220533409807858, "grad_norm": 0.28097808361053467, "learning_rate": 1.9193995755596974e-06, "loss": 0.2732, "step": 15486 }, { "epoch": 2.220676799541153, "grad_norm": 0.2770324945449829, "learning_rate": 1.918742525003897e-06, "loss": 0.2838, "step": 15487 }, { "epoch": 2.220820189274448, "grad_norm": 0.29526445269584656, "learning_rate": 1.9180855602255653e-06, "loss": 0.2933, "step": 15488 }, { "epoch": 2.220963579007743, "grad_norm": 0.28159642219543457, "learning_rate": 1.9174286812429903e-06, "loss": 0.3004, "step": 15489 }, { "epoch": 2.2211069687410383, "grad_norm": 0.2707802653312683, "learning_rate": 1.916771888074459e-06, "loss": 0.2719, "step": 15490 }, { "epoch": 2.2212503584743333, "grad_norm": 0.2604416310787201, "learning_rate": 1.9161151807382576e-06, "loss": 0.2858, "step": 15491 }, { "epoch": 2.2213937482076282, "grad_norm": 0.27093711495399475, "learning_rate": 1.915458559252664e-06, "loss": 0.2844, "step": 15492 }, { "epoch": 2.2215371379409232, "grad_norm": 0.27674588561058044, "learning_rate": 1.9148020236359584e-06, "loss": 0.3059, "step": 15493 }, { "epoch": 2.2216805276742186, "grad_norm": 0.25855207443237305, "learning_rate": 1.914145573906419e-06, "loss": 0.2886, "step": 15494 }, { "epoch": 2.2218239174075136, "grad_norm": 0.28062883019447327, "learning_rate": 1.9134892100823198e-06, "loss": 0.2782, "step": 15495 }, { "epoch": 2.2219673071408086, "grad_norm": 0.2676522135734558, "learning_rate": 1.9128329321819327e-06, "loss": 0.2827, "step": 15496 }, { "epoch": 2.222110696874104, "grad_norm": 0.27418678998947144, "learning_rate": 1.9121767402235303e-06, "loss": 0.294, "step": 15497 }, { "epoch": 2.222254086607399, "grad_norm": 0.25497692823410034, "learning_rate": 1.9115206342253755e-06, "loss": 0.2827, "step": 15498 }, { "epoch": 2.222397476340694, "grad_norm": 0.2835875451564789, "learning_rate": 1.9108646142057354e-06, "loss": 0.2874, "step": 15499 }, { "epoch": 2.222540866073989, "grad_norm": 0.27955198287963867, "learning_rate": 1.910208680182872e-06, "loss": 0.2767, "step": 15500 }, { "epoch": 2.2226842558072843, "grad_norm": 0.28041791915893555, "learning_rate": 1.909552832175046e-06, "loss": 0.2919, "step": 15501 }, { "epoch": 2.2228276455405793, "grad_norm": 0.2785671651363373, "learning_rate": 1.908897070200517e-06, "loss": 0.2903, "step": 15502 }, { "epoch": 2.2229710352738743, "grad_norm": 0.2670559585094452, "learning_rate": 1.908241394277537e-06, "loss": 0.2736, "step": 15503 }, { "epoch": 2.2231144250071697, "grad_norm": 0.27385032176971436, "learning_rate": 1.9075858044243605e-06, "loss": 0.2797, "step": 15504 }, { "epoch": 2.2232578147404647, "grad_norm": 0.2625648081302643, "learning_rate": 1.906930300659236e-06, "loss": 0.2926, "step": 15505 }, { "epoch": 2.2234012044737597, "grad_norm": 0.2656099498271942, "learning_rate": 1.9062748830004162e-06, "loss": 0.2605, "step": 15506 }, { "epoch": 2.2235445942070546, "grad_norm": 0.27435627579689026, "learning_rate": 1.9056195514661446e-06, "loss": 0.2753, "step": 15507 }, { "epoch": 2.22368798394035, "grad_norm": 0.29514163732528687, "learning_rate": 1.9049643060746658e-06, "loss": 0.2736, "step": 15508 }, { "epoch": 2.223831373673645, "grad_norm": 0.2548290491104126, "learning_rate": 1.9043091468442187e-06, "loss": 0.2753, "step": 15509 }, { "epoch": 2.22397476340694, "grad_norm": 0.3055661618709564, "learning_rate": 1.9036540737930425e-06, "loss": 0.2795, "step": 15510 }, { "epoch": 2.224118153140235, "grad_norm": 0.2641296982765198, "learning_rate": 1.9029990869393738e-06, "loss": 0.2962, "step": 15511 }, { "epoch": 2.2242615428735304, "grad_norm": 0.2813078761100769, "learning_rate": 1.9023441863014463e-06, "loss": 0.2751, "step": 15512 }, { "epoch": 2.2244049326068254, "grad_norm": 0.27437007427215576, "learning_rate": 1.901689371897494e-06, "loss": 0.2632, "step": 15513 }, { "epoch": 2.2245483223401203, "grad_norm": 0.271371066570282, "learning_rate": 1.901034643745741e-06, "loss": 0.2678, "step": 15514 }, { "epoch": 2.2246917120734153, "grad_norm": 0.29057714343070984, "learning_rate": 1.9003800018644164e-06, "loss": 0.3031, "step": 15515 }, { "epoch": 2.2248351018067107, "grad_norm": 0.28033649921417236, "learning_rate": 1.8997254462717447e-06, "loss": 0.2932, "step": 15516 }, { "epoch": 2.2249784915400057, "grad_norm": 0.26872900128364563, "learning_rate": 1.8990709769859477e-06, "loss": 0.278, "step": 15517 }, { "epoch": 2.2251218812733007, "grad_norm": 0.2670436203479767, "learning_rate": 1.8984165940252458e-06, "loss": 0.2812, "step": 15518 }, { "epoch": 2.225265271006596, "grad_norm": 0.26067814230918884, "learning_rate": 1.8977622974078536e-06, "loss": 0.2892, "step": 15519 }, { "epoch": 2.225408660739891, "grad_norm": 0.25524410605430603, "learning_rate": 1.8971080871519865e-06, "loss": 0.2798, "step": 15520 }, { "epoch": 2.225552050473186, "grad_norm": 0.2788526117801666, "learning_rate": 1.8964539632758572e-06, "loss": 0.2684, "step": 15521 }, { "epoch": 2.225695440206481, "grad_norm": 0.2670222520828247, "learning_rate": 1.8957999257976757e-06, "loss": 0.297, "step": 15522 }, { "epoch": 2.2258388299397764, "grad_norm": 0.28124964237213135, "learning_rate": 1.8951459747356487e-06, "loss": 0.2937, "step": 15523 }, { "epoch": 2.2259822196730714, "grad_norm": 0.2832702100276947, "learning_rate": 1.8944921101079817e-06, "loss": 0.29, "step": 15524 }, { "epoch": 2.2261256094063664, "grad_norm": 0.2811752259731293, "learning_rate": 1.8938383319328774e-06, "loss": 0.2797, "step": 15525 }, { "epoch": 2.226268999139662, "grad_norm": 0.28462323546409607, "learning_rate": 1.8931846402285353e-06, "loss": 0.2884, "step": 15526 }, { "epoch": 2.226412388872957, "grad_norm": 0.2608664333820343, "learning_rate": 1.8925310350131543e-06, "loss": 0.2926, "step": 15527 }, { "epoch": 2.2265557786062518, "grad_norm": 0.2649473547935486, "learning_rate": 1.8918775163049285e-06, "loss": 0.2905, "step": 15528 }, { "epoch": 2.2266991683395467, "grad_norm": 0.2897500693798065, "learning_rate": 1.8912240841220535e-06, "loss": 0.2758, "step": 15529 }, { "epoch": 2.226842558072842, "grad_norm": 0.3005574941635132, "learning_rate": 1.890570738482716e-06, "loss": 0.2933, "step": 15530 }, { "epoch": 2.226985947806137, "grad_norm": 0.2661631405353546, "learning_rate": 1.889917479405106e-06, "loss": 0.2839, "step": 15531 }, { "epoch": 2.227129337539432, "grad_norm": 0.2662310004234314, "learning_rate": 1.889264306907409e-06, "loss": 0.2929, "step": 15532 }, { "epoch": 2.227272727272727, "grad_norm": 0.2744949162006378, "learning_rate": 1.888611221007809e-06, "loss": 0.2885, "step": 15533 }, { "epoch": 2.2274161170060225, "grad_norm": 0.28838038444519043, "learning_rate": 1.8879582217244856e-06, "loss": 0.286, "step": 15534 }, { "epoch": 2.2275595067393175, "grad_norm": 0.27768778800964355, "learning_rate": 1.8873053090756206e-06, "loss": 0.2813, "step": 15535 }, { "epoch": 2.2277028964726124, "grad_norm": 0.2831990122795105, "learning_rate": 1.8866524830793854e-06, "loss": 0.2754, "step": 15536 }, { "epoch": 2.227846286205908, "grad_norm": 0.2945375144481659, "learning_rate": 1.8859997437539557e-06, "loss": 0.2946, "step": 15537 }, { "epoch": 2.227989675939203, "grad_norm": 0.2747674286365509, "learning_rate": 1.8853470911175032e-06, "loss": 0.296, "step": 15538 }, { "epoch": 2.228133065672498, "grad_norm": 0.2697402238845825, "learning_rate": 1.8846945251881966e-06, "loss": 0.2815, "step": 15539 }, { "epoch": 2.2282764554057928, "grad_norm": 0.2826692759990692, "learning_rate": 1.884042045984202e-06, "loss": 0.2775, "step": 15540 }, { "epoch": 2.228419845139088, "grad_norm": 0.26844510436058044, "learning_rate": 1.8833896535236835e-06, "loss": 0.2778, "step": 15541 }, { "epoch": 2.228563234872383, "grad_norm": 0.2677091658115387, "learning_rate": 1.8827373478248028e-06, "loss": 0.2719, "step": 15542 }, { "epoch": 2.228706624605678, "grad_norm": 0.2795974016189575, "learning_rate": 1.882085128905719e-06, "loss": 0.2869, "step": 15543 }, { "epoch": 2.228850014338973, "grad_norm": 0.2718711495399475, "learning_rate": 1.881432996784589e-06, "loss": 0.3058, "step": 15544 }, { "epoch": 2.2289934040722685, "grad_norm": 0.2791638672351837, "learning_rate": 1.8807809514795694e-06, "loss": 0.282, "step": 15545 }, { "epoch": 2.2291367938055635, "grad_norm": 0.2881219983100891, "learning_rate": 1.8801289930088073e-06, "loss": 0.2908, "step": 15546 }, { "epoch": 2.2292801835388585, "grad_norm": 0.2802797257900238, "learning_rate": 1.8794771213904556e-06, "loss": 0.2926, "step": 15547 }, { "epoch": 2.229423573272154, "grad_norm": 0.28293049335479736, "learning_rate": 1.8788253366426601e-06, "loss": 0.282, "step": 15548 }, { "epoch": 2.229566963005449, "grad_norm": 0.2725931704044342, "learning_rate": 1.8781736387835658e-06, "loss": 0.2795, "step": 15549 }, { "epoch": 2.229710352738744, "grad_norm": 0.28336095809936523, "learning_rate": 1.8775220278313156e-06, "loss": 0.2954, "step": 15550 }, { "epoch": 2.229853742472039, "grad_norm": 0.2917473614215851, "learning_rate": 1.8768705038040503e-06, "loss": 0.2841, "step": 15551 }, { "epoch": 2.2299971322053342, "grad_norm": 0.2770662009716034, "learning_rate": 1.8762190667199037e-06, "loss": 0.2775, "step": 15552 }, { "epoch": 2.230140521938629, "grad_norm": 0.2678062617778778, "learning_rate": 1.8755677165970137e-06, "loss": 0.2909, "step": 15553 }, { "epoch": 2.230283911671924, "grad_norm": 0.28601863980293274, "learning_rate": 1.874916453453512e-06, "loss": 0.2898, "step": 15554 }, { "epoch": 2.2304273014052196, "grad_norm": 0.2844833731651306, "learning_rate": 1.8742652773075288e-06, "loss": 0.2849, "step": 15555 }, { "epoch": 2.2305706911385146, "grad_norm": 0.2689322829246521, "learning_rate": 1.8736141881771942e-06, "loss": 0.2786, "step": 15556 }, { "epoch": 2.2307140808718096, "grad_norm": 0.2647896111011505, "learning_rate": 1.872963186080629e-06, "loss": 0.2669, "step": 15557 }, { "epoch": 2.2308574706051045, "grad_norm": 0.27086710929870605, "learning_rate": 1.8723122710359587e-06, "loss": 0.2753, "step": 15558 }, { "epoch": 2.2310008603384, "grad_norm": 0.2844945192337036, "learning_rate": 1.8716614430613034e-06, "loss": 0.2666, "step": 15559 }, { "epoch": 2.231144250071695, "grad_norm": 0.28691914677619934, "learning_rate": 1.8710107021747815e-06, "loss": 0.2808, "step": 15560 }, { "epoch": 2.23128763980499, "grad_norm": 0.26754987239837646, "learning_rate": 1.8703600483945084e-06, "loss": 0.3025, "step": 15561 }, { "epoch": 2.231431029538285, "grad_norm": 0.2622285485267639, "learning_rate": 1.869709481738597e-06, "loss": 0.2866, "step": 15562 }, { "epoch": 2.2315744192715803, "grad_norm": 0.2688300311565399, "learning_rate": 1.869059002225158e-06, "loss": 0.2644, "step": 15563 }, { "epoch": 2.2317178090048753, "grad_norm": 0.2639588713645935, "learning_rate": 1.8684086098723003e-06, "loss": 0.2904, "step": 15564 }, { "epoch": 2.2318611987381702, "grad_norm": 0.3157672882080078, "learning_rate": 1.8677583046981301e-06, "loss": 0.3006, "step": 15565 }, { "epoch": 2.232004588471465, "grad_norm": 0.2798140347003937, "learning_rate": 1.8671080867207503e-06, "loss": 0.2869, "step": 15566 }, { "epoch": 2.2321479782047606, "grad_norm": 0.2822119891643524, "learning_rate": 1.8664579559582635e-06, "loss": 0.2837, "step": 15567 }, { "epoch": 2.2322913679380556, "grad_norm": 0.2767685651779175, "learning_rate": 1.8658079124287648e-06, "loss": 0.2905, "step": 15568 }, { "epoch": 2.2324347576713506, "grad_norm": 0.27756816148757935, "learning_rate": 1.865157956150353e-06, "loss": 0.2871, "step": 15569 }, { "epoch": 2.232578147404646, "grad_norm": 0.28637340664863586, "learning_rate": 1.8645080871411215e-06, "loss": 0.2932, "step": 15570 }, { "epoch": 2.232721537137941, "grad_norm": 0.26239854097366333, "learning_rate": 1.8638583054191618e-06, "loss": 0.2931, "step": 15571 }, { "epoch": 2.232864926871236, "grad_norm": 0.27425941824913025, "learning_rate": 1.8632086110025638e-06, "loss": 0.2905, "step": 15572 }, { "epoch": 2.233008316604531, "grad_norm": 0.26714348793029785, "learning_rate": 1.862559003909411e-06, "loss": 0.2929, "step": 15573 }, { "epoch": 2.2331517063378263, "grad_norm": 0.29152917861938477, "learning_rate": 1.8619094841577895e-06, "loss": 0.292, "step": 15574 }, { "epoch": 2.2332950960711213, "grad_norm": 0.27841004729270935, "learning_rate": 1.8612600517657808e-06, "loss": 0.2859, "step": 15575 }, { "epoch": 2.2334384858044163, "grad_norm": 0.2883641719818115, "learning_rate": 1.860610706751464e-06, "loss": 0.2888, "step": 15576 }, { "epoch": 2.2335818755377117, "grad_norm": 0.25833195447921753, "learning_rate": 1.8599614491329154e-06, "loss": 0.2733, "step": 15577 }, { "epoch": 2.2337252652710067, "grad_norm": 0.2538650333881378, "learning_rate": 1.8593122789282103e-06, "loss": 0.2712, "step": 15578 }, { "epoch": 2.2338686550043017, "grad_norm": 0.2823438346385956, "learning_rate": 1.8586631961554197e-06, "loss": 0.2901, "step": 15579 }, { "epoch": 2.2340120447375966, "grad_norm": 0.29926809668540955, "learning_rate": 1.8580142008326136e-06, "loss": 0.2797, "step": 15580 }, { "epoch": 2.234155434470892, "grad_norm": 0.26589998602867126, "learning_rate": 1.8573652929778585e-06, "loss": 0.2774, "step": 15581 }, { "epoch": 2.234298824204187, "grad_norm": 0.2896008789539337, "learning_rate": 1.8567164726092202e-06, "loss": 0.2736, "step": 15582 }, { "epoch": 2.234442213937482, "grad_norm": 0.29405677318573, "learning_rate": 1.8560677397447608e-06, "loss": 0.2789, "step": 15583 }, { "epoch": 2.234585603670777, "grad_norm": 0.27550530433654785, "learning_rate": 1.855419094402538e-06, "loss": 0.2751, "step": 15584 }, { "epoch": 2.2347289934040724, "grad_norm": 0.26680317521095276, "learning_rate": 1.8547705366006102e-06, "loss": 0.2709, "step": 15585 }, { "epoch": 2.2348723831373674, "grad_norm": 0.26837560534477234, "learning_rate": 1.8541220663570326e-06, "loss": 0.2672, "step": 15586 }, { "epoch": 2.2350157728706623, "grad_norm": 0.2584037184715271, "learning_rate": 1.8534736836898576e-06, "loss": 0.271, "step": 15587 }, { "epoch": 2.2351591626039577, "grad_norm": 0.2948841154575348, "learning_rate": 1.8528253886171343e-06, "loss": 0.2723, "step": 15588 }, { "epoch": 2.2353025523372527, "grad_norm": 0.2565709352493286, "learning_rate": 1.8521771811569134e-06, "loss": 0.2614, "step": 15589 }, { "epoch": 2.2354459420705477, "grad_norm": 0.3052370250225067, "learning_rate": 1.8515290613272351e-06, "loss": 0.2968, "step": 15590 }, { "epoch": 2.2355893318038427, "grad_norm": 0.2832071781158447, "learning_rate": 1.8508810291461448e-06, "loss": 0.2774, "step": 15591 }, { "epoch": 2.235732721537138, "grad_norm": 0.2834053635597229, "learning_rate": 1.8502330846316825e-06, "loss": 0.287, "step": 15592 }, { "epoch": 2.235876111270433, "grad_norm": 0.2788005769252777, "learning_rate": 1.8495852278018856e-06, "loss": 0.2906, "step": 15593 }, { "epoch": 2.236019501003728, "grad_norm": 0.2761489450931549, "learning_rate": 1.84893745867479e-06, "loss": 0.2763, "step": 15594 }, { "epoch": 2.236162890737023, "grad_norm": 0.25694724917411804, "learning_rate": 1.8482897772684284e-06, "loss": 0.2794, "step": 15595 }, { "epoch": 2.2363062804703184, "grad_norm": 0.3147697448730469, "learning_rate": 1.8476421836008307e-06, "loss": 0.2934, "step": 15596 }, { "epoch": 2.2364496702036134, "grad_norm": 0.26482701301574707, "learning_rate": 1.8469946776900256e-06, "loss": 0.2988, "step": 15597 }, { "epoch": 2.2365930599369084, "grad_norm": 0.28165239095687866, "learning_rate": 1.8463472595540383e-06, "loss": 0.2497, "step": 15598 }, { "epoch": 2.236736449670204, "grad_norm": 0.2813873887062073, "learning_rate": 1.845699929210894e-06, "loss": 0.2749, "step": 15599 }, { "epoch": 2.2368798394034988, "grad_norm": 0.26576468348503113, "learning_rate": 1.84505268667861e-06, "loss": 0.269, "step": 15600 }, { "epoch": 2.2370232291367937, "grad_norm": 0.26987117528915405, "learning_rate": 1.8444055319752057e-06, "loss": 0.2781, "step": 15601 }, { "epoch": 2.2371666188700887, "grad_norm": 0.27493518590927124, "learning_rate": 1.8437584651186969e-06, "loss": 0.2833, "step": 15602 }, { "epoch": 2.237310008603384, "grad_norm": 0.2624921202659607, "learning_rate": 1.8431114861270976e-06, "loss": 0.28, "step": 15603 }, { "epoch": 2.237453398336679, "grad_norm": 0.28657039999961853, "learning_rate": 1.8424645950184178e-06, "loss": 0.2781, "step": 15604 }, { "epoch": 2.237596788069974, "grad_norm": 0.263474702835083, "learning_rate": 1.841817791810669e-06, "loss": 0.2694, "step": 15605 }, { "epoch": 2.2377401778032695, "grad_norm": 0.286297082901001, "learning_rate": 1.841171076521852e-06, "loss": 0.294, "step": 15606 }, { "epoch": 2.2378835675365645, "grad_norm": 0.2795441150665283, "learning_rate": 1.8405244491699742e-06, "loss": 0.2668, "step": 15607 }, { "epoch": 2.2380269572698595, "grad_norm": 0.2647377848625183, "learning_rate": 1.8398779097730346e-06, "loss": 0.2656, "step": 15608 }, { "epoch": 2.2381703470031544, "grad_norm": 0.26776933670043945, "learning_rate": 1.8392314583490334e-06, "loss": 0.2772, "step": 15609 }, { "epoch": 2.23831373673645, "grad_norm": 0.28119298815727234, "learning_rate": 1.8385850949159672e-06, "loss": 0.2946, "step": 15610 }, { "epoch": 2.238457126469745, "grad_norm": 0.2572159171104431, "learning_rate": 1.8379388194918279e-06, "loss": 0.2627, "step": 15611 }, { "epoch": 2.23860051620304, "grad_norm": 0.26979243755340576, "learning_rate": 1.8372926320946071e-06, "loss": 0.2796, "step": 15612 }, { "epoch": 2.2387439059363348, "grad_norm": 0.269121378660202, "learning_rate": 1.836646532742295e-06, "loss": 0.2871, "step": 15613 }, { "epoch": 2.23888729566963, "grad_norm": 0.29582956433296204, "learning_rate": 1.8360005214528765e-06, "loss": 0.2748, "step": 15614 }, { "epoch": 2.239030685402925, "grad_norm": 0.2682875692844391, "learning_rate": 1.8353545982443367e-06, "loss": 0.2724, "step": 15615 }, { "epoch": 2.23917407513622, "grad_norm": 0.2676381468772888, "learning_rate": 1.8347087631346565e-06, "loss": 0.2653, "step": 15616 }, { "epoch": 2.2393174648695156, "grad_norm": 0.24887289106845856, "learning_rate": 1.8340630161418154e-06, "loss": 0.2688, "step": 15617 }, { "epoch": 2.2394608546028105, "grad_norm": 0.2875766456127167, "learning_rate": 1.83341735728379e-06, "loss": 0.2813, "step": 15618 }, { "epoch": 2.2396042443361055, "grad_norm": 0.278793066740036, "learning_rate": 1.8327717865785538e-06, "loss": 0.3105, "step": 15619 }, { "epoch": 2.2397476340694005, "grad_norm": 0.2762376368045807, "learning_rate": 1.8321263040440795e-06, "loss": 0.2829, "step": 15620 }, { "epoch": 2.239891023802696, "grad_norm": 0.26971444487571716, "learning_rate": 1.8314809096983372e-06, "loss": 0.2902, "step": 15621 }, { "epoch": 2.240034413535991, "grad_norm": 0.28901752829551697, "learning_rate": 1.8308356035592905e-06, "loss": 0.2846, "step": 15622 }, { "epoch": 2.240177803269286, "grad_norm": 0.2664164900779724, "learning_rate": 1.8301903856449055e-06, "loss": 0.2754, "step": 15623 }, { "epoch": 2.240321193002581, "grad_norm": 0.2764822542667389, "learning_rate": 1.829545255973144e-06, "loss": 0.2843, "step": 15624 }, { "epoch": 2.2404645827358762, "grad_norm": 0.2807343304157257, "learning_rate": 1.828900214561966e-06, "loss": 0.2814, "step": 15625 }, { "epoch": 2.240607972469171, "grad_norm": 0.279548317193985, "learning_rate": 1.8282552614293275e-06, "loss": 0.287, "step": 15626 }, { "epoch": 2.240751362202466, "grad_norm": 0.281572550535202, "learning_rate": 1.8276103965931851e-06, "loss": 0.2867, "step": 15627 }, { "epoch": 2.2408947519357616, "grad_norm": 0.29207611083984375, "learning_rate": 1.826965620071488e-06, "loss": 0.2874, "step": 15628 }, { "epoch": 2.2410381416690566, "grad_norm": 0.2802518308162689, "learning_rate": 1.8263209318821867e-06, "loss": 0.2822, "step": 15629 }, { "epoch": 2.2411815314023515, "grad_norm": 0.29693537950515747, "learning_rate": 1.8256763320432286e-06, "loss": 0.2912, "step": 15630 }, { "epoch": 2.2413249211356465, "grad_norm": 0.2652004063129425, "learning_rate": 1.8250318205725586e-06, "loss": 0.2712, "step": 15631 }, { "epoch": 2.241468310868942, "grad_norm": 0.26506710052490234, "learning_rate": 1.8243873974881182e-06, "loss": 0.2718, "step": 15632 }, { "epoch": 2.241611700602237, "grad_norm": 0.282240092754364, "learning_rate": 1.8237430628078479e-06, "loss": 0.2949, "step": 15633 }, { "epoch": 2.241755090335532, "grad_norm": 0.284711629152298, "learning_rate": 1.823098816549685e-06, "loss": 0.2875, "step": 15634 }, { "epoch": 2.2418984800688273, "grad_norm": 0.26360154151916504, "learning_rate": 1.822454658731564e-06, "loss": 0.314, "step": 15635 }, { "epoch": 2.2420418698021223, "grad_norm": 0.26134464144706726, "learning_rate": 1.8218105893714172e-06, "loss": 0.2828, "step": 15636 }, { "epoch": 2.2421852595354173, "grad_norm": 0.2859596610069275, "learning_rate": 1.821166608487176e-06, "loss": 0.2697, "step": 15637 }, { "epoch": 2.2423286492687122, "grad_norm": 0.27953487634658813, "learning_rate": 1.8205227160967653e-06, "loss": 0.2794, "step": 15638 }, { "epoch": 2.2424720390020076, "grad_norm": 0.2857118844985962, "learning_rate": 1.819878912218111e-06, "loss": 0.2743, "step": 15639 }, { "epoch": 2.2426154287353026, "grad_norm": 0.27362513542175293, "learning_rate": 1.8192351968691357e-06, "loss": 0.2796, "step": 15640 }, { "epoch": 2.2427588184685976, "grad_norm": 0.28569117188453674, "learning_rate": 1.8185915700677598e-06, "loss": 0.2667, "step": 15641 }, { "epoch": 2.2429022082018926, "grad_norm": 0.27098608016967773, "learning_rate": 1.8179480318319003e-06, "loss": 0.276, "step": 15642 }, { "epoch": 2.243045597935188, "grad_norm": 0.25137218832969666, "learning_rate": 1.817304582179475e-06, "loss": 0.2864, "step": 15643 }, { "epoch": 2.243188987668483, "grad_norm": 0.27642369270324707, "learning_rate": 1.816661221128392e-06, "loss": 0.2781, "step": 15644 }, { "epoch": 2.243332377401778, "grad_norm": 0.28301766514778137, "learning_rate": 1.816017948696564e-06, "loss": 0.2887, "step": 15645 }, { "epoch": 2.243475767135073, "grad_norm": 0.2956179976463318, "learning_rate": 1.8153747649018982e-06, "loss": 0.2989, "step": 15646 }, { "epoch": 2.2436191568683683, "grad_norm": 0.2775847017765045, "learning_rate": 1.8147316697623002e-06, "loss": 0.2999, "step": 15647 }, { "epoch": 2.2437625466016633, "grad_norm": 0.2989232838153839, "learning_rate": 1.8140886632956744e-06, "loss": 0.2891, "step": 15648 }, { "epoch": 2.2439059363349583, "grad_norm": 0.27760541439056396, "learning_rate": 1.8134457455199178e-06, "loss": 0.2878, "step": 15649 }, { "epoch": 2.2440493260682537, "grad_norm": 0.2766817510128021, "learning_rate": 1.8128029164529293e-06, "loss": 0.2664, "step": 15650 }, { "epoch": 2.2441927158015487, "grad_norm": 0.27366361021995544, "learning_rate": 1.8121601761126034e-06, "loss": 0.2958, "step": 15651 }, { "epoch": 2.2443361055348436, "grad_norm": 0.2818664312362671, "learning_rate": 1.811517524516836e-06, "loss": 0.2643, "step": 15652 }, { "epoch": 2.2444794952681386, "grad_norm": 0.2951253354549408, "learning_rate": 1.8108749616835164e-06, "loss": 0.2772, "step": 15653 }, { "epoch": 2.244622885001434, "grad_norm": 0.28757810592651367, "learning_rate": 1.8102324876305333e-06, "loss": 0.2721, "step": 15654 }, { "epoch": 2.244766274734729, "grad_norm": 0.3123611509799957, "learning_rate": 1.809590102375769e-06, "loss": 0.2853, "step": 15655 }, { "epoch": 2.244909664468024, "grad_norm": 0.25832054018974304, "learning_rate": 1.808947805937109e-06, "loss": 0.2694, "step": 15656 }, { "epoch": 2.2450530542013194, "grad_norm": 0.27836453914642334, "learning_rate": 1.8083055983324332e-06, "loss": 0.2793, "step": 15657 }, { "epoch": 2.2451964439346144, "grad_norm": 0.27906328439712524, "learning_rate": 1.8076634795796194e-06, "loss": 0.2799, "step": 15658 }, { "epoch": 2.2453398336679093, "grad_norm": 0.26259344816207886, "learning_rate": 1.8070214496965461e-06, "loss": 0.2706, "step": 15659 }, { "epoch": 2.2454832234012043, "grad_norm": 0.28875693678855896, "learning_rate": 1.8063795087010815e-06, "loss": 0.2838, "step": 15660 }, { "epoch": 2.2456266131344997, "grad_norm": 0.2619156837463379, "learning_rate": 1.805737656611099e-06, "loss": 0.2673, "step": 15661 }, { "epoch": 2.2457700028677947, "grad_norm": 0.2607075870037079, "learning_rate": 1.8050958934444663e-06, "loss": 0.2694, "step": 15662 }, { "epoch": 2.2459133926010897, "grad_norm": 0.2847054600715637, "learning_rate": 1.8044542192190496e-06, "loss": 0.2743, "step": 15663 }, { "epoch": 2.2460567823343847, "grad_norm": 0.30537763237953186, "learning_rate": 1.803812633952713e-06, "loss": 0.2762, "step": 15664 }, { "epoch": 2.24620017206768, "grad_norm": 0.27063247561454773, "learning_rate": 1.8031711376633142e-06, "loss": 0.2855, "step": 15665 }, { "epoch": 2.246343561800975, "grad_norm": 0.28722888231277466, "learning_rate": 1.8025297303687138e-06, "loss": 0.2767, "step": 15666 }, { "epoch": 2.24648695153427, "grad_norm": 0.280264288187027, "learning_rate": 1.8018884120867668e-06, "loss": 0.2921, "step": 15667 }, { "epoch": 2.2466303412675654, "grad_norm": 0.2804717421531677, "learning_rate": 1.8012471828353268e-06, "loss": 0.2847, "step": 15668 }, { "epoch": 2.2467737310008604, "grad_norm": 0.2884141504764557, "learning_rate": 1.8006060426322447e-06, "loss": 0.2877, "step": 15669 }, { "epoch": 2.2469171207341554, "grad_norm": 0.2784998118877411, "learning_rate": 1.7999649914953687e-06, "loss": 0.2635, "step": 15670 }, { "epoch": 2.2470605104674504, "grad_norm": 0.2708815932273865, "learning_rate": 1.7993240294425445e-06, "loss": 0.2871, "step": 15671 }, { "epoch": 2.247203900200746, "grad_norm": 0.28058546781539917, "learning_rate": 1.7986831564916164e-06, "loss": 0.2963, "step": 15672 }, { "epoch": 2.2473472899340408, "grad_norm": 0.2764632999897003, "learning_rate": 1.7980423726604241e-06, "loss": 0.2667, "step": 15673 }, { "epoch": 2.2474906796673357, "grad_norm": 0.26091182231903076, "learning_rate": 1.7974016779668068e-06, "loss": 0.2694, "step": 15674 }, { "epoch": 2.2476340694006307, "grad_norm": 0.27195969223976135, "learning_rate": 1.7967610724286022e-06, "loss": 0.2795, "step": 15675 }, { "epoch": 2.247777459133926, "grad_norm": 0.2755105495452881, "learning_rate": 1.7961205560636396e-06, "loss": 0.2945, "step": 15676 }, { "epoch": 2.247920848867221, "grad_norm": 0.2685329020023346, "learning_rate": 1.7954801288897528e-06, "loss": 0.2639, "step": 15677 }, { "epoch": 2.248064238600516, "grad_norm": 0.2764206826686859, "learning_rate": 1.7948397909247695e-06, "loss": 0.2573, "step": 15678 }, { "epoch": 2.2482076283338115, "grad_norm": 0.2720259130001068, "learning_rate": 1.794199542186516e-06, "loss": 0.2849, "step": 15679 }, { "epoch": 2.2483510180671065, "grad_norm": 0.2998289167881012, "learning_rate": 1.793559382692816e-06, "loss": 0.2851, "step": 15680 }, { "epoch": 2.2484944078004014, "grad_norm": 0.2714463770389557, "learning_rate": 1.792919312461492e-06, "loss": 0.2816, "step": 15681 }, { "epoch": 2.2486377975336964, "grad_norm": 0.2772369980812073, "learning_rate": 1.7922793315103588e-06, "loss": 0.275, "step": 15682 }, { "epoch": 2.248781187266992, "grad_norm": 0.2857534885406494, "learning_rate": 1.7916394398572346e-06, "loss": 0.2964, "step": 15683 }, { "epoch": 2.248924577000287, "grad_norm": 0.271573543548584, "learning_rate": 1.7909996375199335e-06, "loss": 0.276, "step": 15684 }, { "epoch": 2.249067966733582, "grad_norm": 0.27217087149620056, "learning_rate": 1.7903599245162657e-06, "loss": 0.3039, "step": 15685 }, { "epoch": 2.249211356466877, "grad_norm": 0.26244038343429565, "learning_rate": 1.7897203008640407e-06, "loss": 0.2943, "step": 15686 }, { "epoch": 2.249354746200172, "grad_norm": 0.27206116914749146, "learning_rate": 1.7890807665810638e-06, "loss": 0.2895, "step": 15687 }, { "epoch": 2.249498135933467, "grad_norm": 0.25709715485572815, "learning_rate": 1.7884413216851393e-06, "loss": 0.2693, "step": 15688 }, { "epoch": 2.249641525666762, "grad_norm": 0.286051481962204, "learning_rate": 1.7878019661940677e-06, "loss": 0.2753, "step": 15689 }, { "epoch": 2.2497849154000575, "grad_norm": 0.2646695673465729, "learning_rate": 1.7871627001256487e-06, "loss": 0.2839, "step": 15690 }, { "epoch": 2.2499283051333525, "grad_norm": 0.2457009106874466, "learning_rate": 1.7865235234976791e-06, "loss": 0.2782, "step": 15691 }, { "epoch": 2.2500716948666475, "grad_norm": 0.27220675349235535, "learning_rate": 1.7858844363279498e-06, "loss": 0.298, "step": 15692 }, { "epoch": 2.2502150845999425, "grad_norm": 0.28854963183403015, "learning_rate": 1.7852454386342533e-06, "loss": 0.302, "step": 15693 }, { "epoch": 2.250358474333238, "grad_norm": 0.29091572761535645, "learning_rate": 1.784606530434379e-06, "loss": 0.2829, "step": 15694 }, { "epoch": 2.250501864066533, "grad_norm": 0.2933465838432312, "learning_rate": 1.7839677117461124e-06, "loss": 0.2964, "step": 15695 }, { "epoch": 2.250645253799828, "grad_norm": 0.2676372230052948, "learning_rate": 1.7833289825872374e-06, "loss": 0.283, "step": 15696 }, { "epoch": 2.250788643533123, "grad_norm": 0.2818697690963745, "learning_rate": 1.7826903429755372e-06, "loss": 0.2716, "step": 15697 }, { "epoch": 2.250932033266418, "grad_norm": 0.2808983027935028, "learning_rate": 1.7820517929287872e-06, "loss": 0.2892, "step": 15698 }, { "epoch": 2.251075422999713, "grad_norm": 0.310558021068573, "learning_rate": 1.7814133324647647e-06, "loss": 0.3049, "step": 15699 }, { "epoch": 2.251218812733008, "grad_norm": 0.27965056896209717, "learning_rate": 1.7807749616012444e-06, "loss": 0.2927, "step": 15700 }, { "epoch": 2.2513622024663036, "grad_norm": 0.27496451139450073, "learning_rate": 1.7801366803559971e-06, "loss": 0.2991, "step": 15701 }, { "epoch": 2.2515055921995986, "grad_norm": 0.26550716161727905, "learning_rate": 1.779498488746793e-06, "loss": 0.28, "step": 15702 }, { "epoch": 2.2516489819328935, "grad_norm": 0.277187705039978, "learning_rate": 1.778860386791395e-06, "loss": 0.2798, "step": 15703 }, { "epoch": 2.2517923716661885, "grad_norm": 0.2736639678478241, "learning_rate": 1.7782223745075695e-06, "loss": 0.2875, "step": 15704 }, { "epoch": 2.251935761399484, "grad_norm": 0.2713592052459717, "learning_rate": 1.7775844519130763e-06, "loss": 0.2656, "step": 15705 }, { "epoch": 2.252079151132779, "grad_norm": 0.26288044452667236, "learning_rate": 1.7769466190256752e-06, "loss": 0.2641, "step": 15706 }, { "epoch": 2.252222540866074, "grad_norm": 0.26784655451774597, "learning_rate": 1.7763088758631208e-06, "loss": 0.2603, "step": 15707 }, { "epoch": 2.2523659305993693, "grad_norm": 0.2787811756134033, "learning_rate": 1.7756712224431717e-06, "loss": 0.287, "step": 15708 }, { "epoch": 2.2525093203326643, "grad_norm": 0.27361589670181274, "learning_rate": 1.7750336587835737e-06, "loss": 0.2824, "step": 15709 }, { "epoch": 2.2526527100659592, "grad_norm": 0.27072465419769287, "learning_rate": 1.774396184902078e-06, "loss": 0.2797, "step": 15710 }, { "epoch": 2.252796099799254, "grad_norm": 0.28841617703437805, "learning_rate": 1.7737588008164308e-06, "loss": 0.2836, "step": 15711 }, { "epoch": 2.2529394895325496, "grad_norm": 0.2690582871437073, "learning_rate": 1.7731215065443753e-06, "loss": 0.2977, "step": 15712 }, { "epoch": 2.2530828792658446, "grad_norm": 0.2652103304862976, "learning_rate": 1.7724843021036548e-06, "loss": 0.2753, "step": 15713 }, { "epoch": 2.2532262689991396, "grad_norm": 0.2807949483394623, "learning_rate": 1.7718471875120053e-06, "loss": 0.2775, "step": 15714 }, { "epoch": 2.253369658732435, "grad_norm": 0.297638863325119, "learning_rate": 1.7712101627871641e-06, "loss": 0.2863, "step": 15715 }, { "epoch": 2.25351304846573, "grad_norm": 0.2871641516685486, "learning_rate": 1.7705732279468651e-06, "loss": 0.2667, "step": 15716 }, { "epoch": 2.253656438199025, "grad_norm": 0.28959938883781433, "learning_rate": 1.7699363830088396e-06, "loss": 0.2952, "step": 15717 }, { "epoch": 2.25379982793232, "grad_norm": 0.2879672944545746, "learning_rate": 1.769299627990818e-06, "loss": 0.2723, "step": 15718 }, { "epoch": 2.253943217665615, "grad_norm": 0.28366002440452576, "learning_rate": 1.7686629629105234e-06, "loss": 0.2671, "step": 15719 }, { "epoch": 2.2540866073989103, "grad_norm": 0.2816503643989563, "learning_rate": 1.7680263877856812e-06, "loss": 0.2778, "step": 15720 }, { "epoch": 2.2542299971322053, "grad_norm": 0.27964356541633606, "learning_rate": 1.7673899026340124e-06, "loss": 0.2681, "step": 15721 }, { "epoch": 2.2543733868655003, "grad_norm": 0.27096688747406006, "learning_rate": 1.7667535074732367e-06, "loss": 0.294, "step": 15722 }, { "epoch": 2.2545167765987957, "grad_norm": 0.27483224868774414, "learning_rate": 1.7661172023210694e-06, "loss": 0.2914, "step": 15723 }, { "epoch": 2.2546601663320907, "grad_norm": 0.28984469175338745, "learning_rate": 1.7654809871952244e-06, "loss": 0.2729, "step": 15724 }, { "epoch": 2.2548035560653856, "grad_norm": 0.28438279032707214, "learning_rate": 1.764844862113413e-06, "loss": 0.2611, "step": 15725 }, { "epoch": 2.2549469457986806, "grad_norm": 0.2769162952899933, "learning_rate": 1.7642088270933444e-06, "loss": 0.2744, "step": 15726 }, { "epoch": 2.255090335531976, "grad_norm": 0.2896764576435089, "learning_rate": 1.7635728821527243e-06, "loss": 0.2768, "step": 15727 }, { "epoch": 2.255233725265271, "grad_norm": 0.286172479391098, "learning_rate": 1.762937027309256e-06, "loss": 0.268, "step": 15728 }, { "epoch": 2.255377114998566, "grad_norm": 0.27256181836128235, "learning_rate": 1.762301262580644e-06, "loss": 0.275, "step": 15729 }, { "epoch": 2.2555205047318614, "grad_norm": 0.2713143229484558, "learning_rate": 1.7616655879845824e-06, "loss": 0.2734, "step": 15730 }, { "epoch": 2.2556638944651564, "grad_norm": 0.27827972173690796, "learning_rate": 1.761030003538769e-06, "loss": 0.2912, "step": 15731 }, { "epoch": 2.2558072841984513, "grad_norm": 0.2530607283115387, "learning_rate": 1.7603945092608976e-06, "loss": 0.2792, "step": 15732 }, { "epoch": 2.2559506739317463, "grad_norm": 0.2680990397930145, "learning_rate": 1.7597591051686602e-06, "loss": 0.2955, "step": 15733 }, { "epoch": 2.2560940636650417, "grad_norm": 0.2808174192905426, "learning_rate": 1.759123791279745e-06, "loss": 0.2825, "step": 15734 }, { "epoch": 2.2562374533983367, "grad_norm": 0.2834058403968811, "learning_rate": 1.7584885676118391e-06, "loss": 0.2778, "step": 15735 }, { "epoch": 2.2563808431316317, "grad_norm": 0.2777475416660309, "learning_rate": 1.7578534341826237e-06, "loss": 0.2827, "step": 15736 }, { "epoch": 2.256524232864927, "grad_norm": 0.27601662278175354, "learning_rate": 1.7572183910097811e-06, "loss": 0.2862, "step": 15737 }, { "epoch": 2.256667622598222, "grad_norm": 0.30301007628440857, "learning_rate": 1.7565834381109909e-06, "loss": 0.2865, "step": 15738 }, { "epoch": 2.256811012331517, "grad_norm": 0.28091955184936523, "learning_rate": 1.755948575503928e-06, "loss": 0.2655, "step": 15739 }, { "epoch": 2.256954402064812, "grad_norm": 0.29215991497039795, "learning_rate": 1.755313803206266e-06, "loss": 0.2811, "step": 15740 }, { "epoch": 2.2570977917981074, "grad_norm": 0.2567484378814697, "learning_rate": 1.7546791212356773e-06, "loss": 0.2916, "step": 15741 }, { "epoch": 2.2572411815314024, "grad_norm": 0.26590996980667114, "learning_rate": 1.7540445296098291e-06, "loss": 0.2838, "step": 15742 }, { "epoch": 2.2573845712646974, "grad_norm": 0.2783011496067047, "learning_rate": 1.7534100283463884e-06, "loss": 0.2785, "step": 15743 }, { "epoch": 2.2575279609979924, "grad_norm": 0.2689270079135895, "learning_rate": 1.7527756174630178e-06, "loss": 0.2844, "step": 15744 }, { "epoch": 2.2576713507312878, "grad_norm": 0.26621001958847046, "learning_rate": 1.752141296977381e-06, "loss": 0.2652, "step": 15745 }, { "epoch": 2.2578147404645827, "grad_norm": 0.27144524455070496, "learning_rate": 1.7515070669071321e-06, "loss": 0.2563, "step": 15746 }, { "epoch": 2.2579581301978777, "grad_norm": 0.2735458016395569, "learning_rate": 1.75087292726993e-06, "loss": 0.3113, "step": 15747 }, { "epoch": 2.2581015199311727, "grad_norm": 0.2746265232563019, "learning_rate": 1.7502388780834272e-06, "loss": 0.2874, "step": 15748 }, { "epoch": 2.258244909664468, "grad_norm": 0.28067606687545776, "learning_rate": 1.7496049193652748e-06, "loss": 0.2904, "step": 15749 }, { "epoch": 2.258388299397763, "grad_norm": 0.2671841084957123, "learning_rate": 1.7489710511331214e-06, "loss": 0.2779, "step": 15750 }, { "epoch": 2.258531689131058, "grad_norm": 0.26388365030288696, "learning_rate": 1.748337273404615e-06, "loss": 0.2793, "step": 15751 }, { "epoch": 2.2586750788643535, "grad_norm": 0.28192102909088135, "learning_rate": 1.7477035861973952e-06, "loss": 0.2964, "step": 15752 }, { "epoch": 2.2588184685976485, "grad_norm": 0.27471739053726196, "learning_rate": 1.7470699895291043e-06, "loss": 0.2943, "step": 15753 }, { "epoch": 2.2589618583309434, "grad_norm": 0.29578834772109985, "learning_rate": 1.7464364834173808e-06, "loss": 0.2875, "step": 15754 }, { "epoch": 2.2591052480642384, "grad_norm": 0.282047837972641, "learning_rate": 1.745803067879861e-06, "loss": 0.2754, "step": 15755 }, { "epoch": 2.259248637797534, "grad_norm": 0.2772606611251831, "learning_rate": 1.7451697429341797e-06, "loss": 0.2858, "step": 15756 }, { "epoch": 2.259392027530829, "grad_norm": 0.2670080065727234, "learning_rate": 1.7445365085979638e-06, "loss": 0.2745, "step": 15757 }, { "epoch": 2.2595354172641238, "grad_norm": 0.29477259516716003, "learning_rate": 1.7439033648888436e-06, "loss": 0.2766, "step": 15758 }, { "epoch": 2.259678806997419, "grad_norm": 0.2951103150844574, "learning_rate": 1.7432703118244454e-06, "loss": 0.2955, "step": 15759 }, { "epoch": 2.259822196730714, "grad_norm": 0.24947547912597656, "learning_rate": 1.742637349422392e-06, "loss": 0.2935, "step": 15760 }, { "epoch": 2.259965586464009, "grad_norm": 0.26626715064048767, "learning_rate": 1.7420044777003036e-06, "loss": 0.2871, "step": 15761 }, { "epoch": 2.260108976197304, "grad_norm": 0.27173280715942383, "learning_rate": 1.7413716966757987e-06, "loss": 0.2777, "step": 15762 }, { "epoch": 2.2602523659305995, "grad_norm": 0.25572720170021057, "learning_rate": 1.7407390063664935e-06, "loss": 0.284, "step": 15763 }, { "epoch": 2.2603957556638945, "grad_norm": 0.27390772104263306, "learning_rate": 1.7401064067900003e-06, "loss": 0.2895, "step": 15764 }, { "epoch": 2.2605391453971895, "grad_norm": 0.27745088934898376, "learning_rate": 1.7394738979639304e-06, "loss": 0.2818, "step": 15765 }, { "epoch": 2.260682535130485, "grad_norm": 0.27635079622268677, "learning_rate": 1.7388414799058911e-06, "loss": 0.2833, "step": 15766 }, { "epoch": 2.26082592486378, "grad_norm": 0.2673623263835907, "learning_rate": 1.7382091526334905e-06, "loss": 0.3071, "step": 15767 }, { "epoch": 2.260969314597075, "grad_norm": 0.27697572112083435, "learning_rate": 1.737576916164328e-06, "loss": 0.2906, "step": 15768 }, { "epoch": 2.26111270433037, "grad_norm": 0.2832857072353363, "learning_rate": 1.7369447705160053e-06, "loss": 0.279, "step": 15769 }, { "epoch": 2.2612560940636652, "grad_norm": 0.26226410269737244, "learning_rate": 1.7363127157061204e-06, "loss": 0.284, "step": 15770 }, { "epoch": 2.26139948379696, "grad_norm": 0.2668338418006897, "learning_rate": 1.7356807517522696e-06, "loss": 0.2621, "step": 15771 }, { "epoch": 2.261542873530255, "grad_norm": 0.2713228464126587, "learning_rate": 1.7350488786720465e-06, "loss": 0.2772, "step": 15772 }, { "epoch": 2.26168626326355, "grad_norm": 0.2593654990196228, "learning_rate": 1.7344170964830388e-06, "loss": 0.2798, "step": 15773 }, { "epoch": 2.2618296529968456, "grad_norm": 0.29081785678863525, "learning_rate": 1.7337854052028353e-06, "loss": 0.2761, "step": 15774 }, { "epoch": 2.2619730427301405, "grad_norm": 0.2878377437591553, "learning_rate": 1.7331538048490221e-06, "loss": 0.2649, "step": 15775 }, { "epoch": 2.2621164324634355, "grad_norm": 0.288987934589386, "learning_rate": 1.7325222954391813e-06, "loss": 0.2937, "step": 15776 }, { "epoch": 2.2622598221967305, "grad_norm": 0.276068776845932, "learning_rate": 1.7318908769908937e-06, "loss": 0.28, "step": 15777 }, { "epoch": 2.262403211930026, "grad_norm": 0.2661852240562439, "learning_rate": 1.7312595495217367e-06, "loss": 0.2775, "step": 15778 }, { "epoch": 2.262546601663321, "grad_norm": 0.27697697281837463, "learning_rate": 1.7306283130492858e-06, "loss": 0.3033, "step": 15779 }, { "epoch": 2.262689991396616, "grad_norm": 0.27802079916000366, "learning_rate": 1.7299971675911131e-06, "loss": 0.2784, "step": 15780 }, { "epoch": 2.2628333811299113, "grad_norm": 0.270977646112442, "learning_rate": 1.7293661131647887e-06, "loss": 0.2706, "step": 15781 }, { "epoch": 2.2629767708632063, "grad_norm": 0.27905017137527466, "learning_rate": 1.7287351497878812e-06, "loss": 0.2729, "step": 15782 }, { "epoch": 2.2631201605965012, "grad_norm": 0.276652455329895, "learning_rate": 1.7281042774779565e-06, "loss": 0.2953, "step": 15783 }, { "epoch": 2.263263550329796, "grad_norm": 0.2757580578327179, "learning_rate": 1.7274734962525742e-06, "loss": 0.2956, "step": 15784 }, { "epoch": 2.2634069400630916, "grad_norm": 0.2763148248195648, "learning_rate": 1.7268428061292953e-06, "loss": 0.2537, "step": 15785 }, { "epoch": 2.2635503297963866, "grad_norm": 0.29704928398132324, "learning_rate": 1.7262122071256777e-06, "loss": 0.3132, "step": 15786 }, { "epoch": 2.2636937195296816, "grad_norm": 0.27352216839790344, "learning_rate": 1.7255816992592762e-06, "loss": 0.2888, "step": 15787 }, { "epoch": 2.263837109262977, "grad_norm": 0.28919702768325806, "learning_rate": 1.7249512825476427e-06, "loss": 0.2615, "step": 15788 }, { "epoch": 2.263980498996272, "grad_norm": 0.2655531167984009, "learning_rate": 1.7243209570083302e-06, "loss": 0.275, "step": 15789 }, { "epoch": 2.264123888729567, "grad_norm": 0.2793438732624054, "learning_rate": 1.7236907226588812e-06, "loss": 0.2891, "step": 15790 }, { "epoch": 2.264267278462862, "grad_norm": 0.25898557901382446, "learning_rate": 1.723060579516842e-06, "loss": 0.2795, "step": 15791 }, { "epoch": 2.2644106681961573, "grad_norm": 0.28012749552726746, "learning_rate": 1.722430527599756e-06, "loss": 0.2803, "step": 15792 }, { "epoch": 2.2645540579294523, "grad_norm": 0.28294530510902405, "learning_rate": 1.7218005669251625e-06, "loss": 0.2885, "step": 15793 }, { "epoch": 2.2646974476627473, "grad_norm": 0.2581925690174103, "learning_rate": 1.7211706975105995e-06, "loss": 0.2474, "step": 15794 }, { "epoch": 2.2648408373960423, "grad_norm": 0.2549414336681366, "learning_rate": 1.7205409193735989e-06, "loss": 0.2775, "step": 15795 }, { "epoch": 2.2649842271293377, "grad_norm": 0.2824383080005646, "learning_rate": 1.719911232531693e-06, "loss": 0.2941, "step": 15796 }, { "epoch": 2.2651276168626326, "grad_norm": 0.2685598134994507, "learning_rate": 1.7192816370024147e-06, "loss": 0.2743, "step": 15797 }, { "epoch": 2.2652710065959276, "grad_norm": 0.2684311866760254, "learning_rate": 1.7186521328032879e-06, "loss": 0.2811, "step": 15798 }, { "epoch": 2.2654143963292226, "grad_norm": 0.2851613461971283, "learning_rate": 1.7180227199518402e-06, "loss": 0.2791, "step": 15799 }, { "epoch": 2.265557786062518, "grad_norm": 0.26385700702667236, "learning_rate": 1.71739339846559e-06, "loss": 0.2805, "step": 15800 }, { "epoch": 2.265701175795813, "grad_norm": 0.26422008872032166, "learning_rate": 1.716764168362058e-06, "loss": 0.2992, "step": 15801 }, { "epoch": 2.265844565529108, "grad_norm": 0.2794232964515686, "learning_rate": 1.7161350296587603e-06, "loss": 0.2863, "step": 15802 }, { "epoch": 2.2659879552624034, "grad_norm": 0.27376410365104675, "learning_rate": 1.7155059823732123e-06, "loss": 0.283, "step": 15803 }, { "epoch": 2.2661313449956983, "grad_norm": 0.2906390428543091, "learning_rate": 1.7148770265229253e-06, "loss": 0.2863, "step": 15804 }, { "epoch": 2.2662747347289933, "grad_norm": 0.27096471190452576, "learning_rate": 1.7142481621254098e-06, "loss": 0.3039, "step": 15805 }, { "epoch": 2.2664181244622883, "grad_norm": 0.26034072041511536, "learning_rate": 1.7136193891981694e-06, "loss": 0.2846, "step": 15806 }, { "epoch": 2.2665615141955837, "grad_norm": 0.28873980045318604, "learning_rate": 1.7129907077587105e-06, "loss": 0.2692, "step": 15807 }, { "epoch": 2.2667049039288787, "grad_norm": 0.2542930543422699, "learning_rate": 1.712362117824533e-06, "loss": 0.2732, "step": 15808 }, { "epoch": 2.2668482936621737, "grad_norm": 0.26778650283813477, "learning_rate": 1.7117336194131373e-06, "loss": 0.2865, "step": 15809 }, { "epoch": 2.266991683395469, "grad_norm": 0.28224241733551025, "learning_rate": 1.7111052125420207e-06, "loss": 0.2974, "step": 15810 }, { "epoch": 2.267135073128764, "grad_norm": 0.2766115963459015, "learning_rate": 1.7104768972286745e-06, "loss": 0.3103, "step": 15811 }, { "epoch": 2.267278462862059, "grad_norm": 0.26053860783576965, "learning_rate": 1.709848673490591e-06, "loss": 0.2916, "step": 15812 }, { "epoch": 2.267421852595354, "grad_norm": 0.2787383496761322, "learning_rate": 1.7092205413452588e-06, "loss": 0.2559, "step": 15813 }, { "epoch": 2.2675652423286494, "grad_norm": 0.2777206599712372, "learning_rate": 1.7085925008101656e-06, "loss": 0.2728, "step": 15814 }, { "epoch": 2.2677086320619444, "grad_norm": 0.28171253204345703, "learning_rate": 1.7079645519027932e-06, "loss": 0.281, "step": 15815 }, { "epoch": 2.2678520217952394, "grad_norm": 0.275850385427475, "learning_rate": 1.707336694640624e-06, "loss": 0.2868, "step": 15816 }, { "epoch": 2.267995411528535, "grad_norm": 0.25931671261787415, "learning_rate": 1.706708929041136e-06, "loss": 0.2704, "step": 15817 }, { "epoch": 2.2681388012618298, "grad_norm": 0.27234962582588196, "learning_rate": 1.7060812551218054e-06, "loss": 0.2894, "step": 15818 }, { "epoch": 2.2682821909951247, "grad_norm": 0.2839270234107971, "learning_rate": 1.7054536729001054e-06, "loss": 0.2746, "step": 15819 }, { "epoch": 2.2684255807284197, "grad_norm": 0.27168887853622437, "learning_rate": 1.7048261823935074e-06, "loss": 0.2828, "step": 15820 }, { "epoch": 2.268568970461715, "grad_norm": 0.27371105551719666, "learning_rate": 1.7041987836194812e-06, "loss": 0.2874, "step": 15821 }, { "epoch": 2.26871236019501, "grad_norm": 0.2677798569202423, "learning_rate": 1.703571476595489e-06, "loss": 0.286, "step": 15822 }, { "epoch": 2.268855749928305, "grad_norm": 0.28121593594551086, "learning_rate": 1.7029442613389967e-06, "loss": 0.2844, "step": 15823 }, { "epoch": 2.2689991396616, "grad_norm": 0.2752704322338104, "learning_rate": 1.702317137867464e-06, "loss": 0.2889, "step": 15824 }, { "epoch": 2.2691425293948955, "grad_norm": 0.2712661027908325, "learning_rate": 1.7016901061983493e-06, "loss": 0.2782, "step": 15825 }, { "epoch": 2.2692859191281904, "grad_norm": 0.2756229043006897, "learning_rate": 1.70106316634911e-06, "loss": 0.2753, "step": 15826 }, { "epoch": 2.2694293088614854, "grad_norm": 0.28535646200180054, "learning_rate": 1.7004363183371959e-06, "loss": 0.2896, "step": 15827 }, { "epoch": 2.2695726985947804, "grad_norm": 0.33017072081565857, "learning_rate": 1.6998095621800586e-06, "loss": 0.2783, "step": 15828 }, { "epoch": 2.269716088328076, "grad_norm": 0.26012739539146423, "learning_rate": 1.6991828978951468e-06, "loss": 0.2864, "step": 15829 }, { "epoch": 2.269859478061371, "grad_norm": 0.2713833451271057, "learning_rate": 1.6985563254999055e-06, "loss": 0.2826, "step": 15830 }, { "epoch": 2.2700028677946658, "grad_norm": 0.30472150444984436, "learning_rate": 1.6979298450117776e-06, "loss": 0.2846, "step": 15831 }, { "epoch": 2.270146257527961, "grad_norm": 0.2809695601463318, "learning_rate": 1.6973034564482033e-06, "loss": 0.2814, "step": 15832 }, { "epoch": 2.270289647261256, "grad_norm": 0.26277437806129456, "learning_rate": 1.6966771598266201e-06, "loss": 0.2843, "step": 15833 }, { "epoch": 2.270433036994551, "grad_norm": 0.26879480481147766, "learning_rate": 1.6960509551644633e-06, "loss": 0.2858, "step": 15834 }, { "epoch": 2.270576426727846, "grad_norm": 0.2707369923591614, "learning_rate": 1.695424842479166e-06, "loss": 0.2875, "step": 15835 }, { "epoch": 2.2707198164611415, "grad_norm": 0.27648991346359253, "learning_rate": 1.6947988217881573e-06, "loss": 0.2762, "step": 15836 }, { "epoch": 2.2708632061944365, "grad_norm": 0.27361541986465454, "learning_rate": 1.6941728931088664e-06, "loss": 0.3003, "step": 15837 }, { "epoch": 2.2710065959277315, "grad_norm": 0.2665000855922699, "learning_rate": 1.6935470564587153e-06, "loss": 0.2697, "step": 15838 }, { "epoch": 2.271149985661027, "grad_norm": 0.26481691002845764, "learning_rate": 1.692921311855128e-06, "loss": 0.2763, "step": 15839 }, { "epoch": 2.271293375394322, "grad_norm": 0.30134451389312744, "learning_rate": 1.6922956593155243e-06, "loss": 0.2834, "step": 15840 }, { "epoch": 2.271436765127617, "grad_norm": 0.2585769593715668, "learning_rate": 1.691670098857321e-06, "loss": 0.2852, "step": 15841 }, { "epoch": 2.271580154860912, "grad_norm": 0.2841658294200897, "learning_rate": 1.6910446304979333e-06, "loss": 0.2777, "step": 15842 }, { "epoch": 2.2717235445942072, "grad_norm": 0.28068628907203674, "learning_rate": 1.6904192542547743e-06, "loss": 0.2905, "step": 15843 }, { "epoch": 2.271866934327502, "grad_norm": 0.27779272198677063, "learning_rate": 1.6897939701452503e-06, "loss": 0.279, "step": 15844 }, { "epoch": 2.272010324060797, "grad_norm": 0.27460023760795593, "learning_rate": 1.6891687781867704e-06, "loss": 0.28, "step": 15845 }, { "epoch": 2.2721537137940926, "grad_norm": 0.27880024909973145, "learning_rate": 1.688543678396739e-06, "loss": 0.2785, "step": 15846 }, { "epoch": 2.2722971035273876, "grad_norm": 0.2712228000164032, "learning_rate": 1.687918670792557e-06, "loss": 0.2826, "step": 15847 }, { "epoch": 2.2724404932606825, "grad_norm": 0.27316033840179443, "learning_rate": 1.6872937553916268e-06, "loss": 0.2851, "step": 15848 }, { "epoch": 2.2725838829939775, "grad_norm": 0.2884780764579773, "learning_rate": 1.68666893221134e-06, "loss": 0.2738, "step": 15849 }, { "epoch": 2.2727272727272725, "grad_norm": 0.2841353118419647, "learning_rate": 1.6860442012690937e-06, "loss": 0.2788, "step": 15850 }, { "epoch": 2.272870662460568, "grad_norm": 0.2702103853225708, "learning_rate": 1.6854195625822788e-06, "loss": 0.2661, "step": 15851 }, { "epoch": 2.273014052193863, "grad_norm": 0.2611599564552307, "learning_rate": 1.6847950161682835e-06, "loss": 0.2798, "step": 15852 }, { "epoch": 2.273157441927158, "grad_norm": 0.27950775623321533, "learning_rate": 1.6841705620444977e-06, "loss": 0.2897, "step": 15853 }, { "epoch": 2.2733008316604533, "grad_norm": 0.28634393215179443, "learning_rate": 1.6835462002283016e-06, "loss": 0.2814, "step": 15854 }, { "epoch": 2.2734442213937482, "grad_norm": 0.28616729378700256, "learning_rate": 1.6829219307370776e-06, "loss": 0.2769, "step": 15855 }, { "epoch": 2.273587611127043, "grad_norm": 0.27121105790138245, "learning_rate": 1.682297753588204e-06, "loss": 0.2886, "step": 15856 }, { "epoch": 2.273731000860338, "grad_norm": 0.2730848491191864, "learning_rate": 1.6816736687990575e-06, "loss": 0.2811, "step": 15857 }, { "epoch": 2.2738743905936336, "grad_norm": 0.26354503631591797, "learning_rate": 1.6810496763870115e-06, "loss": 0.2794, "step": 15858 }, { "epoch": 2.2740177803269286, "grad_norm": 0.2810295820236206, "learning_rate": 1.6804257763694393e-06, "loss": 0.2854, "step": 15859 }, { "epoch": 2.2741611700602236, "grad_norm": 0.26389652490615845, "learning_rate": 1.6798019687637045e-06, "loss": 0.274, "step": 15860 }, { "epoch": 2.274304559793519, "grad_norm": 0.2799338698387146, "learning_rate": 1.6791782535871759e-06, "loss": 0.2989, "step": 15861 }, { "epoch": 2.274447949526814, "grad_norm": 0.2582043707370758, "learning_rate": 1.6785546308572165e-06, "loss": 0.2892, "step": 15862 }, { "epoch": 2.274591339260109, "grad_norm": 0.2665391266345978, "learning_rate": 1.6779311005911864e-06, "loss": 0.294, "step": 15863 }, { "epoch": 2.274734728993404, "grad_norm": 0.25821247696876526, "learning_rate": 1.6773076628064462e-06, "loss": 0.2855, "step": 15864 }, { "epoch": 2.2748781187266993, "grad_norm": 0.26808488368988037, "learning_rate": 1.6766843175203473e-06, "loss": 0.2747, "step": 15865 }, { "epoch": 2.2750215084599943, "grad_norm": 0.2771892845630646, "learning_rate": 1.6760610647502452e-06, "loss": 0.2753, "step": 15866 }, { "epoch": 2.2751648981932893, "grad_norm": 0.29079121351242065, "learning_rate": 1.67543790451349e-06, "loss": 0.2855, "step": 15867 }, { "epoch": 2.2753082879265847, "grad_norm": 0.2678767442703247, "learning_rate": 1.674814836827429e-06, "loss": 0.2944, "step": 15868 }, { "epoch": 2.2754516776598797, "grad_norm": 0.28062206506729126, "learning_rate": 1.6741918617094083e-06, "loss": 0.2895, "step": 15869 }, { "epoch": 2.2755950673931746, "grad_norm": 0.2750469744205475, "learning_rate": 1.6735689791767696e-06, "loss": 0.2815, "step": 15870 }, { "epoch": 2.2757384571264696, "grad_norm": 0.2697911560535431, "learning_rate": 1.6729461892468534e-06, "loss": 0.2669, "step": 15871 }, { "epoch": 2.275881846859765, "grad_norm": 0.301025927066803, "learning_rate": 1.6723234919369978e-06, "loss": 0.2691, "step": 15872 }, { "epoch": 2.27602523659306, "grad_norm": 0.25820234417915344, "learning_rate": 1.6717008872645375e-06, "loss": 0.2671, "step": 15873 }, { "epoch": 2.276168626326355, "grad_norm": 0.2742551565170288, "learning_rate": 1.671078375246804e-06, "loss": 0.285, "step": 15874 }, { "epoch": 2.27631201605965, "grad_norm": 0.2796693444252014, "learning_rate": 1.6704559559011297e-06, "loss": 0.2883, "step": 15875 }, { "epoch": 2.2764554057929454, "grad_norm": 0.2787068784236908, "learning_rate": 1.669833629244838e-06, "loss": 0.2905, "step": 15876 }, { "epoch": 2.2765987955262403, "grad_norm": 0.27549615502357483, "learning_rate": 1.669211395295255e-06, "loss": 0.2835, "step": 15877 }, { "epoch": 2.2767421852595353, "grad_norm": 0.28992924094200134, "learning_rate": 1.6685892540697036e-06, "loss": 0.2822, "step": 15878 }, { "epoch": 2.2768855749928303, "grad_norm": 0.2631838917732239, "learning_rate": 1.6679672055855023e-06, "loss": 0.2792, "step": 15879 }, { "epoch": 2.2770289647261257, "grad_norm": 0.25814205408096313, "learning_rate": 1.6673452498599701e-06, "loss": 0.2828, "step": 15880 }, { "epoch": 2.2771723544594207, "grad_norm": 0.269435316324234, "learning_rate": 1.6667233869104183e-06, "loss": 0.268, "step": 15881 }, { "epoch": 2.2773157441927157, "grad_norm": 0.2659202814102173, "learning_rate": 1.6661016167541593e-06, "loss": 0.2788, "step": 15882 }, { "epoch": 2.277459133926011, "grad_norm": 0.2837490439414978, "learning_rate": 1.665479939408503e-06, "loss": 0.2861, "step": 15883 }, { "epoch": 2.277602523659306, "grad_norm": 0.2756569981575012, "learning_rate": 1.664858354890756e-06, "loss": 0.2891, "step": 15884 }, { "epoch": 2.277745913392601, "grad_norm": 0.2671923041343689, "learning_rate": 1.6642368632182216e-06, "loss": 0.2806, "step": 15885 }, { "epoch": 2.277889303125896, "grad_norm": 0.25389721989631653, "learning_rate": 1.663615464408202e-06, "loss": 0.2769, "step": 15886 }, { "epoch": 2.2780326928591914, "grad_norm": 0.2904200255870819, "learning_rate": 1.662994158477995e-06, "loss": 0.2996, "step": 15887 }, { "epoch": 2.2781760825924864, "grad_norm": 0.27242279052734375, "learning_rate": 1.6623729454448979e-06, "loss": 0.286, "step": 15888 }, { "epoch": 2.2783194723257814, "grad_norm": 0.27456167340278625, "learning_rate": 1.6617518253262034e-06, "loss": 0.2685, "step": 15889 }, { "epoch": 2.2784628620590768, "grad_norm": 0.26398757100105286, "learning_rate": 1.6611307981392026e-06, "loss": 0.2706, "step": 15890 }, { "epoch": 2.2786062517923718, "grad_norm": 0.26662665605545044, "learning_rate": 1.6605098639011869e-06, "loss": 0.2669, "step": 15891 }, { "epoch": 2.2787496415256667, "grad_norm": 0.25648975372314453, "learning_rate": 1.6598890226294367e-06, "loss": 0.272, "step": 15892 }, { "epoch": 2.2788930312589617, "grad_norm": 0.28400692343711853, "learning_rate": 1.6592682743412387e-06, "loss": 0.2711, "step": 15893 }, { "epoch": 2.279036420992257, "grad_norm": 0.26800498366355896, "learning_rate": 1.6586476190538731e-06, "loss": 0.2852, "step": 15894 }, { "epoch": 2.279179810725552, "grad_norm": 0.2667850852012634, "learning_rate": 1.6580270567846174e-06, "loss": 0.2784, "step": 15895 }, { "epoch": 2.279323200458847, "grad_norm": 0.2980619966983795, "learning_rate": 1.6574065875507479e-06, "loss": 0.2902, "step": 15896 }, { "epoch": 2.2794665901921425, "grad_norm": 0.29158762097358704, "learning_rate": 1.6567862113695383e-06, "loss": 0.2828, "step": 15897 }, { "epoch": 2.2796099799254375, "grad_norm": 0.2539096474647522, "learning_rate": 1.6561659282582565e-06, "loss": 0.2786, "step": 15898 }, { "epoch": 2.2797533696587324, "grad_norm": 0.2748478949069977, "learning_rate": 1.6555457382341711e-06, "loss": 0.2591, "step": 15899 }, { "epoch": 2.2798967593920274, "grad_norm": 0.2942037582397461, "learning_rate": 1.6549256413145476e-06, "loss": 0.2911, "step": 15900 }, { "epoch": 2.2800401491253224, "grad_norm": 0.27222079038619995, "learning_rate": 1.6543056375166488e-06, "loss": 0.2656, "step": 15901 }, { "epoch": 2.280183538858618, "grad_norm": 0.2642134428024292, "learning_rate": 1.653685726857736e-06, "loss": 0.2875, "step": 15902 }, { "epoch": 2.2803269285919128, "grad_norm": 0.3011690378189087, "learning_rate": 1.653065909355064e-06, "loss": 0.2856, "step": 15903 }, { "epoch": 2.2804703183252077, "grad_norm": 0.2891407310962677, "learning_rate": 1.6524461850258876e-06, "loss": 0.3031, "step": 15904 }, { "epoch": 2.280613708058503, "grad_norm": 0.29000797867774963, "learning_rate": 1.65182655388746e-06, "loss": 0.3229, "step": 15905 }, { "epoch": 2.280757097791798, "grad_norm": 0.2808435559272766, "learning_rate": 1.6512070159570309e-06, "loss": 0.2802, "step": 15906 }, { "epoch": 2.280900487525093, "grad_norm": 0.267723023891449, "learning_rate": 1.650587571251847e-06, "loss": 0.2915, "step": 15907 }, { "epoch": 2.281043877258388, "grad_norm": 0.2578825056552887, "learning_rate": 1.6499682197891532e-06, "loss": 0.2723, "step": 15908 }, { "epoch": 2.2811872669916835, "grad_norm": 0.27979904413223267, "learning_rate": 1.64934896158619e-06, "loss": 0.2735, "step": 15909 }, { "epoch": 2.2813306567249785, "grad_norm": 0.2744722068309784, "learning_rate": 1.6487297966601979e-06, "loss": 0.2783, "step": 15910 }, { "epoch": 2.2814740464582735, "grad_norm": 0.2736952006816864, "learning_rate": 1.6481107250284135e-06, "loss": 0.2787, "step": 15911 }, { "epoch": 2.281617436191569, "grad_norm": 0.2824798822402954, "learning_rate": 1.6474917467080698e-06, "loss": 0.2721, "step": 15912 }, { "epoch": 2.281760825924864, "grad_norm": 0.2585921287536621, "learning_rate": 1.6468728617164003e-06, "loss": 0.266, "step": 15913 }, { "epoch": 2.281904215658159, "grad_norm": 0.27908119559288025, "learning_rate": 1.646254070070631e-06, "loss": 0.2951, "step": 15914 }, { "epoch": 2.282047605391454, "grad_norm": 0.2795200049877167, "learning_rate": 1.6456353717879887e-06, "loss": 0.2776, "step": 15915 }, { "epoch": 2.282190995124749, "grad_norm": 0.2863902747631073, "learning_rate": 1.645016766885698e-06, "loss": 0.2512, "step": 15916 }, { "epoch": 2.282334384858044, "grad_norm": 0.2680417597293854, "learning_rate": 1.6443982553809802e-06, "loss": 0.2917, "step": 15917 }, { "epoch": 2.282477774591339, "grad_norm": 0.2738342583179474, "learning_rate": 1.6437798372910541e-06, "loss": 0.2836, "step": 15918 }, { "epoch": 2.2826211643246346, "grad_norm": 0.2735961675643921, "learning_rate": 1.643161512633133e-06, "loss": 0.2826, "step": 15919 }, { "epoch": 2.2827645540579296, "grad_norm": 0.296565979719162, "learning_rate": 1.6425432814244314e-06, "loss": 0.2952, "step": 15920 }, { "epoch": 2.2829079437912245, "grad_norm": 0.2769438624382019, "learning_rate": 1.6419251436821605e-06, "loss": 0.2838, "step": 15921 }, { "epoch": 2.2830513335245195, "grad_norm": 0.281594842672348, "learning_rate": 1.6413070994235276e-06, "loss": 0.2639, "step": 15922 }, { "epoch": 2.283194723257815, "grad_norm": 0.2815891206264496, "learning_rate": 1.6406891486657383e-06, "loss": 0.2867, "step": 15923 }, { "epoch": 2.28333811299111, "grad_norm": 0.27912282943725586, "learning_rate": 1.6400712914259959e-06, "loss": 0.2775, "step": 15924 }, { "epoch": 2.283481502724405, "grad_norm": 0.2836027443408966, "learning_rate": 1.6394535277215001e-06, "loss": 0.2446, "step": 15925 }, { "epoch": 2.2836248924577, "grad_norm": 0.2828578054904938, "learning_rate": 1.6388358575694484e-06, "loss": 0.3133, "step": 15926 }, { "epoch": 2.2837682821909953, "grad_norm": 0.2680123746395111, "learning_rate": 1.6382182809870361e-06, "loss": 0.2765, "step": 15927 }, { "epoch": 2.2839116719242902, "grad_norm": 0.26400190591812134, "learning_rate": 1.6376007979914555e-06, "loss": 0.2951, "step": 15928 }, { "epoch": 2.284055061657585, "grad_norm": 0.2646219730377197, "learning_rate": 1.636983408599898e-06, "loss": 0.2854, "step": 15929 }, { "epoch": 2.28419845139088, "grad_norm": 0.2670300304889679, "learning_rate": 1.6363661128295472e-06, "loss": 0.3092, "step": 15930 }, { "epoch": 2.2843418411241756, "grad_norm": 0.27078142762184143, "learning_rate": 1.63574891069759e-06, "loss": 0.2835, "step": 15931 }, { "epoch": 2.2844852308574706, "grad_norm": 0.2841905951499939, "learning_rate": 1.6351318022212075e-06, "loss": 0.2842, "step": 15932 }, { "epoch": 2.2846286205907655, "grad_norm": 0.2928292453289032, "learning_rate": 1.6345147874175798e-06, "loss": 0.2689, "step": 15933 }, { "epoch": 2.284772010324061, "grad_norm": 0.2583334743976593, "learning_rate": 1.6338978663038852e-06, "loss": 0.2694, "step": 15934 }, { "epoch": 2.284915400057356, "grad_norm": 0.27951836585998535, "learning_rate": 1.6332810388972941e-06, "loss": 0.2982, "step": 15935 }, { "epoch": 2.285058789790651, "grad_norm": 0.2565602660179138, "learning_rate": 1.6326643052149798e-06, "loss": 0.2878, "step": 15936 }, { "epoch": 2.285202179523946, "grad_norm": 0.2859475612640381, "learning_rate": 1.6320476652741113e-06, "loss": 0.2826, "step": 15937 }, { "epoch": 2.2853455692572413, "grad_norm": 0.2696382999420166, "learning_rate": 1.6314311190918553e-06, "loss": 0.2828, "step": 15938 }, { "epoch": 2.2854889589905363, "grad_norm": 0.2891370952129364, "learning_rate": 1.6308146666853748e-06, "loss": 0.2751, "step": 15939 }, { "epoch": 2.2856323487238313, "grad_norm": 0.28161144256591797, "learning_rate": 1.6301983080718325e-06, "loss": 0.2789, "step": 15940 }, { "epoch": 2.2857757384571267, "grad_norm": 0.29669424891471863, "learning_rate": 1.6295820432683828e-06, "loss": 0.2803, "step": 15941 }, { "epoch": 2.2859191281904216, "grad_norm": 0.28505662083625793, "learning_rate": 1.6289658722921858e-06, "loss": 0.2737, "step": 15942 }, { "epoch": 2.2860625179237166, "grad_norm": 0.284982830286026, "learning_rate": 1.6283497951603934e-06, "loss": 0.2829, "step": 15943 }, { "epoch": 2.2862059076570116, "grad_norm": 0.2542119324207306, "learning_rate": 1.6277338118901559e-06, "loss": 0.266, "step": 15944 }, { "epoch": 2.286349297390307, "grad_norm": 0.279413104057312, "learning_rate": 1.6271179224986234e-06, "loss": 0.284, "step": 15945 }, { "epoch": 2.286492687123602, "grad_norm": 0.2641865611076355, "learning_rate": 1.6265021270029375e-06, "loss": 0.2871, "step": 15946 }, { "epoch": 2.286636076856897, "grad_norm": 0.271432489156723, "learning_rate": 1.6258864254202433e-06, "loss": 0.2786, "step": 15947 }, { "epoch": 2.2867794665901924, "grad_norm": 0.275264710187912, "learning_rate": 1.6252708177676806e-06, "loss": 0.2526, "step": 15948 }, { "epoch": 2.2869228563234874, "grad_norm": 0.2860961854457855, "learning_rate": 1.6246553040623869e-06, "loss": 0.2688, "step": 15949 }, { "epoch": 2.2870662460567823, "grad_norm": 0.28325507044792175, "learning_rate": 1.6240398843214978e-06, "loss": 0.2903, "step": 15950 }, { "epoch": 2.2872096357900773, "grad_norm": 0.2712536156177521, "learning_rate": 1.623424558562146e-06, "loss": 0.2859, "step": 15951 }, { "epoch": 2.2873530255233727, "grad_norm": 0.2753163278102875, "learning_rate": 1.6228093268014595e-06, "loss": 0.2825, "step": 15952 }, { "epoch": 2.2874964152566677, "grad_norm": 0.2876656949520111, "learning_rate": 1.6221941890565656e-06, "loss": 0.3055, "step": 15953 }, { "epoch": 2.2876398049899627, "grad_norm": 0.2651551067829132, "learning_rate": 1.6215791453445894e-06, "loss": 0.2577, "step": 15954 }, { "epoch": 2.2877831947232576, "grad_norm": 0.2540189027786255, "learning_rate": 1.6209641956826533e-06, "loss": 0.2722, "step": 15955 }, { "epoch": 2.287926584456553, "grad_norm": 0.26960113644599915, "learning_rate": 1.6203493400878774e-06, "loss": 0.2937, "step": 15956 }, { "epoch": 2.288069974189848, "grad_norm": 0.2744259834289551, "learning_rate": 1.6197345785773756e-06, "loss": 0.2836, "step": 15957 }, { "epoch": 2.288213363923143, "grad_norm": 0.26817086338996887, "learning_rate": 1.6191199111682627e-06, "loss": 0.2718, "step": 15958 }, { "epoch": 2.288356753656438, "grad_norm": 0.2628229856491089, "learning_rate": 1.618505337877651e-06, "loss": 0.2778, "step": 15959 }, { "epoch": 2.2885001433897334, "grad_norm": 0.2733226418495178, "learning_rate": 1.6178908587226494e-06, "loss": 0.2843, "step": 15960 }, { "epoch": 2.2886435331230284, "grad_norm": 0.28672611713409424, "learning_rate": 1.6172764737203633e-06, "loss": 0.2638, "step": 15961 }, { "epoch": 2.2887869228563233, "grad_norm": 0.26604026556015015, "learning_rate": 1.6166621828878965e-06, "loss": 0.2704, "step": 15962 }, { "epoch": 2.2889303125896188, "grad_norm": 0.2856563627719879, "learning_rate": 1.61604798624235e-06, "loss": 0.2798, "step": 15963 }, { "epoch": 2.2890737023229137, "grad_norm": 0.2920713424682617, "learning_rate": 1.6154338838008227e-06, "loss": 0.2823, "step": 15964 }, { "epoch": 2.2892170920562087, "grad_norm": 0.2652086913585663, "learning_rate": 1.614819875580409e-06, "loss": 0.2862, "step": 15965 }, { "epoch": 2.2893604817895037, "grad_norm": 0.2868072986602783, "learning_rate": 1.6142059615982025e-06, "loss": 0.278, "step": 15966 }, { "epoch": 2.289503871522799, "grad_norm": 0.282830148935318, "learning_rate": 1.6135921418712959e-06, "loss": 0.2941, "step": 15967 }, { "epoch": 2.289647261256094, "grad_norm": 0.2757398188114166, "learning_rate": 1.6129784164167723e-06, "loss": 0.2688, "step": 15968 }, { "epoch": 2.289790650989389, "grad_norm": 0.26540783047676086, "learning_rate": 1.6123647852517198e-06, "loss": 0.2962, "step": 15969 }, { "epoch": 2.2899340407226845, "grad_norm": 0.29721304774284363, "learning_rate": 1.6117512483932208e-06, "loss": 0.2702, "step": 15970 }, { "epoch": 2.2900774304559794, "grad_norm": 0.264392614364624, "learning_rate": 1.611137805858355e-06, "loss": 0.2873, "step": 15971 }, { "epoch": 2.2902208201892744, "grad_norm": 0.28001028299331665, "learning_rate": 1.6105244576642004e-06, "loss": 0.2681, "step": 15972 }, { "epoch": 2.2903642099225694, "grad_norm": 0.28574138879776, "learning_rate": 1.6099112038278302e-06, "loss": 0.2944, "step": 15973 }, { "epoch": 2.290507599655865, "grad_norm": 0.29010358452796936, "learning_rate": 1.6092980443663164e-06, "loss": 0.2757, "step": 15974 }, { "epoch": 2.29065098938916, "grad_norm": 0.2808097302913666, "learning_rate": 1.6086849792967296e-06, "loss": 0.3015, "step": 15975 }, { "epoch": 2.2907943791224548, "grad_norm": 0.27750781178474426, "learning_rate": 1.6080720086361361e-06, "loss": 0.2846, "step": 15976 }, { "epoch": 2.2909377688557497, "grad_norm": 0.27345994114875793, "learning_rate": 1.6074591324016004e-06, "loss": 0.2953, "step": 15977 }, { "epoch": 2.291081158589045, "grad_norm": 0.2824264168739319, "learning_rate": 1.6068463506101833e-06, "loss": 0.2882, "step": 15978 }, { "epoch": 2.29122454832234, "grad_norm": 0.2896766662597656, "learning_rate": 1.6062336632789445e-06, "loss": 0.2936, "step": 15979 }, { "epoch": 2.291367938055635, "grad_norm": 0.26147177815437317, "learning_rate": 1.6056210704249398e-06, "loss": 0.2832, "step": 15980 }, { "epoch": 2.29151132778893, "grad_norm": 0.2638866603374481, "learning_rate": 1.605008572065223e-06, "loss": 0.2785, "step": 15981 }, { "epoch": 2.2916547175222255, "grad_norm": 0.266916424036026, "learning_rate": 1.6043961682168452e-06, "loss": 0.2722, "step": 15982 }, { "epoch": 2.2917981072555205, "grad_norm": 0.2755494713783264, "learning_rate": 1.6037838588968557e-06, "loss": 0.2716, "step": 15983 }, { "epoch": 2.2919414969888154, "grad_norm": 0.26264843344688416, "learning_rate": 1.6031716441222982e-06, "loss": 0.2837, "step": 15984 }, { "epoch": 2.292084886722111, "grad_norm": 0.2713884711265564, "learning_rate": 1.6025595239102165e-06, "loss": 0.2853, "step": 15985 }, { "epoch": 2.292228276455406, "grad_norm": 0.28156062960624695, "learning_rate": 1.6019474982776517e-06, "loss": 0.2796, "step": 15986 }, { "epoch": 2.292371666188701, "grad_norm": 0.2749059796333313, "learning_rate": 1.6013355672416413e-06, "loss": 0.2819, "step": 15987 }, { "epoch": 2.292515055921996, "grad_norm": 0.2767476439476013, "learning_rate": 1.600723730819222e-06, "loss": 0.2866, "step": 15988 }, { "epoch": 2.292658445655291, "grad_norm": 0.2691134214401245, "learning_rate": 1.6001119890274237e-06, "loss": 0.2818, "step": 15989 }, { "epoch": 2.292801835388586, "grad_norm": 0.26491579413414, "learning_rate": 1.5995003418832772e-06, "loss": 0.2877, "step": 15990 }, { "epoch": 2.292945225121881, "grad_norm": 0.29030176997184753, "learning_rate": 1.5988887894038102e-06, "loss": 0.2743, "step": 15991 }, { "epoch": 2.2930886148551766, "grad_norm": 0.2688451409339905, "learning_rate": 1.5982773316060475e-06, "loss": 0.2672, "step": 15992 }, { "epoch": 2.2932320045884715, "grad_norm": 0.27785149216651917, "learning_rate": 1.5976659685070112e-06, "loss": 0.2854, "step": 15993 }, { "epoch": 2.2933753943217665, "grad_norm": 0.28903618454933167, "learning_rate": 1.5970547001237224e-06, "loss": 0.2889, "step": 15994 }, { "epoch": 2.2935187840550615, "grad_norm": 0.2786373794078827, "learning_rate": 1.5964435264731943e-06, "loss": 0.2703, "step": 15995 }, { "epoch": 2.293662173788357, "grad_norm": 0.2774244546890259, "learning_rate": 1.5958324475724423e-06, "loss": 0.2816, "step": 15996 }, { "epoch": 2.293805563521652, "grad_norm": 0.25363782048225403, "learning_rate": 1.5952214634384772e-06, "loss": 0.2841, "step": 15997 }, { "epoch": 2.293948953254947, "grad_norm": 0.2815702259540558, "learning_rate": 1.5946105740883106e-06, "loss": 0.2745, "step": 15998 }, { "epoch": 2.2940923429882423, "grad_norm": 0.29623159766197205, "learning_rate": 1.5939997795389483e-06, "loss": 0.3033, "step": 15999 }, { "epoch": 2.2942357327215372, "grad_norm": 0.28347331285476685, "learning_rate": 1.593389079807392e-06, "loss": 0.2801, "step": 16000 }, { "epoch": 2.294379122454832, "grad_norm": 0.2687966525554657, "learning_rate": 1.5927784749106429e-06, "loss": 0.2671, "step": 16001 }, { "epoch": 2.294522512188127, "grad_norm": 0.2743277847766876, "learning_rate": 1.5921679648656997e-06, "loss": 0.2804, "step": 16002 }, { "epoch": 2.2946659019214226, "grad_norm": 0.2693708837032318, "learning_rate": 1.591557549689558e-06, "loss": 0.2816, "step": 16003 }, { "epoch": 2.2948092916547176, "grad_norm": 0.2830987274646759, "learning_rate": 1.5909472293992113e-06, "loss": 0.2877, "step": 16004 }, { "epoch": 2.2949526813880126, "grad_norm": 0.293212890625, "learning_rate": 1.5903370040116512e-06, "loss": 0.2802, "step": 16005 }, { "epoch": 2.2950960711213075, "grad_norm": 0.2751302719116211, "learning_rate": 1.589726873543862e-06, "loss": 0.2692, "step": 16006 }, { "epoch": 2.295239460854603, "grad_norm": 0.2686130106449127, "learning_rate": 1.5891168380128307e-06, "loss": 0.262, "step": 16007 }, { "epoch": 2.295382850587898, "grad_norm": 0.27029213309288025, "learning_rate": 1.5885068974355395e-06, "loss": 0.2906, "step": 16008 }, { "epoch": 2.295526240321193, "grad_norm": 0.2721913754940033, "learning_rate": 1.5878970518289688e-06, "loss": 0.2763, "step": 16009 }, { "epoch": 2.295669630054488, "grad_norm": 0.2760290205478668, "learning_rate": 1.587287301210097e-06, "loss": 0.2678, "step": 16010 }, { "epoch": 2.2958130197877833, "grad_norm": 0.27628135681152344, "learning_rate": 1.5866776455958954e-06, "loss": 0.2814, "step": 16011 }, { "epoch": 2.2959564095210783, "grad_norm": 0.2995634078979492, "learning_rate": 1.5860680850033372e-06, "loss": 0.2871, "step": 16012 }, { "epoch": 2.2960997992543732, "grad_norm": 0.29296770691871643, "learning_rate": 1.585458619449392e-06, "loss": 0.2916, "step": 16013 }, { "epoch": 2.2962431889876687, "grad_norm": 0.2728198170661926, "learning_rate": 1.5848492489510263e-06, "loss": 0.2693, "step": 16014 }, { "epoch": 2.2963865787209636, "grad_norm": 0.2785836458206177, "learning_rate": 1.584239973525204e-06, "loss": 0.3004, "step": 16015 }, { "epoch": 2.2965299684542586, "grad_norm": 0.25188496708869934, "learning_rate": 1.5836307931888866e-06, "loss": 0.2631, "step": 16016 }, { "epoch": 2.2966733581875536, "grad_norm": 0.2609885334968567, "learning_rate": 1.5830217079590327e-06, "loss": 0.288, "step": 16017 }, { "epoch": 2.296816747920849, "grad_norm": 0.2651975154876709, "learning_rate": 1.5824127178525977e-06, "loss": 0.284, "step": 16018 }, { "epoch": 2.296960137654144, "grad_norm": 0.28611621260643005, "learning_rate": 1.5818038228865357e-06, "loss": 0.2858, "step": 16019 }, { "epoch": 2.297103527387439, "grad_norm": 0.2550414204597473, "learning_rate": 1.5811950230777966e-06, "loss": 0.2635, "step": 16020 }, { "epoch": 2.2972469171207344, "grad_norm": 0.28768694400787354, "learning_rate": 1.5805863184433312e-06, "loss": 0.2785, "step": 16021 }, { "epoch": 2.2973903068540293, "grad_norm": 0.2639598846435547, "learning_rate": 1.5799777090000807e-06, "loss": 0.284, "step": 16022 }, { "epoch": 2.2975336965873243, "grad_norm": 0.27237507700920105, "learning_rate": 1.5793691947649897e-06, "loss": 0.2966, "step": 16023 }, { "epoch": 2.2976770863206193, "grad_norm": 0.27304044365882874, "learning_rate": 1.578760775754999e-06, "loss": 0.2687, "step": 16024 }, { "epoch": 2.2978204760539147, "grad_norm": 0.28980422019958496, "learning_rate": 1.578152451987045e-06, "loss": 0.2687, "step": 16025 }, { "epoch": 2.2979638657872097, "grad_norm": 0.2757693827152252, "learning_rate": 1.5775442234780647e-06, "loss": 0.2638, "step": 16026 }, { "epoch": 2.2981072555205047, "grad_norm": 0.286590039730072, "learning_rate": 1.5769360902449864e-06, "loss": 0.2904, "step": 16027 }, { "epoch": 2.2982506452538, "grad_norm": 0.2562280297279358, "learning_rate": 1.5763280523047419e-06, "loss": 0.26, "step": 16028 }, { "epoch": 2.298394034987095, "grad_norm": 0.2666354775428772, "learning_rate": 1.5757201096742581e-06, "loss": 0.2724, "step": 16029 }, { "epoch": 2.29853742472039, "grad_norm": 0.2679380774497986, "learning_rate": 1.575112262370459e-06, "loss": 0.2928, "step": 16030 }, { "epoch": 2.298680814453685, "grad_norm": 0.27580803632736206, "learning_rate": 1.5745045104102657e-06, "loss": 0.2618, "step": 16031 }, { "epoch": 2.29882420418698, "grad_norm": 0.2751731276512146, "learning_rate": 1.5738968538105976e-06, "loss": 0.2675, "step": 16032 }, { "epoch": 2.2989675939202754, "grad_norm": 0.2765854299068451, "learning_rate": 1.5732892925883708e-06, "loss": 0.2871, "step": 16033 }, { "epoch": 2.2991109836535704, "grad_norm": 0.27710846066474915, "learning_rate": 1.5726818267604987e-06, "loss": 0.2645, "step": 16034 }, { "epoch": 2.2992543733868653, "grad_norm": 0.28125980496406555, "learning_rate": 1.5720744563438929e-06, "loss": 0.2914, "step": 16035 }, { "epoch": 2.2993977631201608, "grad_norm": 0.27342191338539124, "learning_rate": 1.5714671813554605e-06, "loss": 0.2819, "step": 16036 }, { "epoch": 2.2995411528534557, "grad_norm": 0.278262197971344, "learning_rate": 1.5708600018121095e-06, "loss": 0.264, "step": 16037 }, { "epoch": 2.2996845425867507, "grad_norm": 0.27256321907043457, "learning_rate": 1.5702529177307392e-06, "loss": 0.2783, "step": 16038 }, { "epoch": 2.2998279323200457, "grad_norm": 0.25315046310424805, "learning_rate": 1.5696459291282518e-06, "loss": 0.2766, "step": 16039 }, { "epoch": 2.299971322053341, "grad_norm": 0.27503079175949097, "learning_rate": 1.5690390360215452e-06, "loss": 0.2814, "step": 16040 }, { "epoch": 2.300114711786636, "grad_norm": 0.27315592765808105, "learning_rate": 1.5684322384275136e-06, "loss": 0.2723, "step": 16041 }, { "epoch": 2.300258101519931, "grad_norm": 0.2559513449668884, "learning_rate": 1.5678255363630512e-06, "loss": 0.2801, "step": 16042 }, { "epoch": 2.3004014912532265, "grad_norm": 0.2559855580329895, "learning_rate": 1.5672189298450451e-06, "loss": 0.2736, "step": 16043 }, { "epoch": 2.3005448809865214, "grad_norm": 0.29439136385917664, "learning_rate": 1.5666124188903831e-06, "loss": 0.2939, "step": 16044 }, { "epoch": 2.3006882707198164, "grad_norm": 0.2649913728237152, "learning_rate": 1.5660060035159502e-06, "loss": 0.2745, "step": 16045 }, { "epoch": 2.3008316604531114, "grad_norm": 0.2751180827617645, "learning_rate": 1.565399683738627e-06, "loss": 0.2832, "step": 16046 }, { "epoch": 2.300975050186407, "grad_norm": 0.29204294085502625, "learning_rate": 1.564793459575294e-06, "loss": 0.2993, "step": 16047 }, { "epoch": 2.3011184399197018, "grad_norm": 0.26664167642593384, "learning_rate": 1.564187331042828e-06, "loss": 0.2726, "step": 16048 }, { "epoch": 2.3012618296529967, "grad_norm": 0.26806458830833435, "learning_rate": 1.5635812981580994e-06, "loss": 0.297, "step": 16049 }, { "epoch": 2.301405219386292, "grad_norm": 0.2811361849308014, "learning_rate": 1.5629753609379821e-06, "loss": 0.2706, "step": 16050 }, { "epoch": 2.301548609119587, "grad_norm": 0.2842871844768524, "learning_rate": 1.5623695193993431e-06, "loss": 0.2773, "step": 16051 }, { "epoch": 2.301691998852882, "grad_norm": 0.2773159146308899, "learning_rate": 1.5617637735590485e-06, "loss": 0.2596, "step": 16052 }, { "epoch": 2.301835388586177, "grad_norm": 0.2756746709346771, "learning_rate": 1.5611581234339619e-06, "loss": 0.2828, "step": 16053 }, { "epoch": 2.3019787783194725, "grad_norm": 0.27286967635154724, "learning_rate": 1.560552569040943e-06, "loss": 0.2837, "step": 16054 }, { "epoch": 2.3021221680527675, "grad_norm": 0.2764778435230255, "learning_rate": 1.5599471103968494e-06, "loss": 0.2823, "step": 16055 }, { "epoch": 2.3022655577860625, "grad_norm": 0.26427897810935974, "learning_rate": 1.5593417475185368e-06, "loss": 0.2747, "step": 16056 }, { "epoch": 2.3024089475193574, "grad_norm": 0.27685612440109253, "learning_rate": 1.5587364804228573e-06, "loss": 0.2665, "step": 16057 }, { "epoch": 2.302552337252653, "grad_norm": 0.2768869400024414, "learning_rate": 1.5581313091266604e-06, "loss": 0.2751, "step": 16058 }, { "epoch": 2.302695726985948, "grad_norm": 0.2918972373008728, "learning_rate": 1.5575262336467945e-06, "loss": 0.2776, "step": 16059 }, { "epoch": 2.302839116719243, "grad_norm": 0.2920622229576111, "learning_rate": 1.5569212540001015e-06, "loss": 0.2745, "step": 16060 }, { "epoch": 2.3029825064525378, "grad_norm": 0.2695295512676239, "learning_rate": 1.5563163702034246e-06, "loss": 0.2678, "step": 16061 }, { "epoch": 2.303125896185833, "grad_norm": 0.2795342206954956, "learning_rate": 1.5557115822736019e-06, "loss": 0.3014, "step": 16062 }, { "epoch": 2.303269285919128, "grad_norm": 0.2762601673603058, "learning_rate": 1.5551068902274706e-06, "loss": 0.2632, "step": 16063 }, { "epoch": 2.303412675652423, "grad_norm": 0.2932548522949219, "learning_rate": 1.5545022940818665e-06, "loss": 0.2736, "step": 16064 }, { "epoch": 2.3035560653857186, "grad_norm": 0.26755291223526, "learning_rate": 1.5538977938536159e-06, "loss": 0.3022, "step": 16065 }, { "epoch": 2.3036994551190135, "grad_norm": 0.2950375974178314, "learning_rate": 1.5532933895595502e-06, "loss": 0.2687, "step": 16066 }, { "epoch": 2.3038428448523085, "grad_norm": 0.2583381235599518, "learning_rate": 1.5526890812164947e-06, "loss": 0.2736, "step": 16067 }, { "epoch": 2.3039862345856035, "grad_norm": 0.2581648826599121, "learning_rate": 1.5520848688412715e-06, "loss": 0.269, "step": 16068 }, { "epoch": 2.304129624318899, "grad_norm": 0.25709185004234314, "learning_rate": 1.551480752450702e-06, "loss": 0.2835, "step": 16069 }, { "epoch": 2.304273014052194, "grad_norm": 0.27900633215904236, "learning_rate": 1.5508767320616036e-06, "loss": 0.2887, "step": 16070 }, { "epoch": 2.304416403785489, "grad_norm": 0.2873438000679016, "learning_rate": 1.5502728076907913e-06, "loss": 0.2527, "step": 16071 }, { "epoch": 2.3045597935187843, "grad_norm": 0.27180078625679016, "learning_rate": 1.5496689793550773e-06, "loss": 0.2677, "step": 16072 }, { "epoch": 2.3047031832520792, "grad_norm": 0.2707682251930237, "learning_rate": 1.5490652470712714e-06, "loss": 0.2934, "step": 16073 }, { "epoch": 2.304846572985374, "grad_norm": 0.2729596197605133, "learning_rate": 1.54846161085618e-06, "loss": 0.2833, "step": 16074 }, { "epoch": 2.304989962718669, "grad_norm": 0.2647123336791992, "learning_rate": 1.54785807072661e-06, "loss": 0.2954, "step": 16075 }, { "epoch": 2.3051333524519646, "grad_norm": 0.2789974510669708, "learning_rate": 1.5472546266993593e-06, "loss": 0.2803, "step": 16076 }, { "epoch": 2.3052767421852596, "grad_norm": 0.28069210052490234, "learning_rate": 1.5466512787912285e-06, "loss": 0.2886, "step": 16077 }, { "epoch": 2.3054201319185545, "grad_norm": 0.28147462010383606, "learning_rate": 1.5460480270190137e-06, "loss": 0.2761, "step": 16078 }, { "epoch": 2.30556352165185, "grad_norm": 0.26157498359680176, "learning_rate": 1.5454448713995085e-06, "loss": 0.2841, "step": 16079 }, { "epoch": 2.305706911385145, "grad_norm": 0.2710428833961487, "learning_rate": 1.5448418119495057e-06, "loss": 0.27, "step": 16080 }, { "epoch": 2.30585030111844, "grad_norm": 0.273284912109375, "learning_rate": 1.5442388486857906e-06, "loss": 0.2955, "step": 16081 }, { "epoch": 2.305993690851735, "grad_norm": 0.28065672516822815, "learning_rate": 1.5436359816251494e-06, "loss": 0.2786, "step": 16082 }, { "epoch": 2.30613708058503, "grad_norm": 0.29414060711860657, "learning_rate": 1.543033210784366e-06, "loss": 0.2927, "step": 16083 }, { "epoch": 2.3062804703183253, "grad_norm": 0.2724486291408539, "learning_rate": 1.5424305361802199e-06, "loss": 0.2864, "step": 16084 }, { "epoch": 2.3064238600516203, "grad_norm": 0.2865172326564789, "learning_rate": 1.5418279578294893e-06, "loss": 0.283, "step": 16085 }, { "epoch": 2.3065672497849152, "grad_norm": 0.27630677819252014, "learning_rate": 1.5412254757489482e-06, "loss": 0.2749, "step": 16086 }, { "epoch": 2.3067106395182106, "grad_norm": 0.2676531970500946, "learning_rate": 1.5406230899553699e-06, "loss": 0.2814, "step": 16087 }, { "epoch": 2.3068540292515056, "grad_norm": 0.2825198173522949, "learning_rate": 1.5400208004655227e-06, "loss": 0.2898, "step": 16088 }, { "epoch": 2.3069974189848006, "grad_norm": 0.2891598045825958, "learning_rate": 1.5394186072961743e-06, "loss": 0.2746, "step": 16089 }, { "epoch": 2.3071408087180956, "grad_norm": 0.28206151723861694, "learning_rate": 1.5388165104640885e-06, "loss": 0.2961, "step": 16090 }, { "epoch": 2.307284198451391, "grad_norm": 0.2895515561103821, "learning_rate": 1.538214509986028e-06, "loss": 0.2847, "step": 16091 }, { "epoch": 2.307427588184686, "grad_norm": 0.28846848011016846, "learning_rate": 1.5376126058787488e-06, "loss": 0.2826, "step": 16092 }, { "epoch": 2.307570977917981, "grad_norm": 0.2660902440547943, "learning_rate": 1.5370107981590087e-06, "loss": 0.2816, "step": 16093 }, { "epoch": 2.3077143676512764, "grad_norm": 0.268562376499176, "learning_rate": 1.5364090868435611e-06, "loss": 0.2761, "step": 16094 }, { "epoch": 2.3078577573845713, "grad_norm": 0.2672036290168762, "learning_rate": 1.5358074719491562e-06, "loss": 0.291, "step": 16095 }, { "epoch": 2.3080011471178663, "grad_norm": 0.26966598629951477, "learning_rate": 1.535205953492544e-06, "loss": 0.2844, "step": 16096 }, { "epoch": 2.3081445368511613, "grad_norm": 0.2833053767681122, "learning_rate": 1.534604531490466e-06, "loss": 0.2731, "step": 16097 }, { "epoch": 2.3082879265844567, "grad_norm": 0.2667677402496338, "learning_rate": 1.5340032059596681e-06, "loss": 0.2716, "step": 16098 }, { "epoch": 2.3084313163177517, "grad_norm": 0.2575417459011078, "learning_rate": 1.533401976916889e-06, "loss": 0.2682, "step": 16099 }, { "epoch": 2.3085747060510466, "grad_norm": 0.28470680117607117, "learning_rate": 1.532800844378866e-06, "loss": 0.2842, "step": 16100 }, { "epoch": 2.308718095784342, "grad_norm": 0.278067409992218, "learning_rate": 1.532199808362334e-06, "loss": 0.2812, "step": 16101 }, { "epoch": 2.308861485517637, "grad_norm": 0.27510955929756165, "learning_rate": 1.531598868884026e-06, "loss": 0.2874, "step": 16102 }, { "epoch": 2.309004875250932, "grad_norm": 0.2866281270980835, "learning_rate": 1.5309980259606694e-06, "loss": 0.2794, "step": 16103 }, { "epoch": 2.309148264984227, "grad_norm": 0.26260918378829956, "learning_rate": 1.530397279608991e-06, "loss": 0.2713, "step": 16104 }, { "epoch": 2.3092916547175224, "grad_norm": 0.28945600986480713, "learning_rate": 1.529796629845715e-06, "loss": 0.2564, "step": 16105 }, { "epoch": 2.3094350444508174, "grad_norm": 0.2759048640727997, "learning_rate": 1.5291960766875623e-06, "loss": 0.2822, "step": 16106 }, { "epoch": 2.3095784341841123, "grad_norm": 0.26483985781669617, "learning_rate": 1.5285956201512526e-06, "loss": 0.2755, "step": 16107 }, { "epoch": 2.3097218239174073, "grad_norm": 0.2561725676059723, "learning_rate": 1.5279952602535002e-06, "loss": 0.2647, "step": 16108 }, { "epoch": 2.3098652136507027, "grad_norm": 0.26477423310279846, "learning_rate": 1.5273949970110192e-06, "loss": 0.2905, "step": 16109 }, { "epoch": 2.3100086033839977, "grad_norm": 0.29678282141685486, "learning_rate": 1.5267948304405194e-06, "loss": 0.2795, "step": 16110 }, { "epoch": 2.3101519931172927, "grad_norm": 0.28067079186439514, "learning_rate": 1.5261947605587086e-06, "loss": 0.2856, "step": 16111 }, { "epoch": 2.3102953828505877, "grad_norm": 0.27398696541786194, "learning_rate": 1.525594787382292e-06, "loss": 0.2814, "step": 16112 }, { "epoch": 2.310438772583883, "grad_norm": 0.2722437381744385, "learning_rate": 1.524994910927974e-06, "loss": 0.2746, "step": 16113 }, { "epoch": 2.310582162317178, "grad_norm": 0.27426788210868835, "learning_rate": 1.5243951312124494e-06, "loss": 0.2845, "step": 16114 }, { "epoch": 2.310725552050473, "grad_norm": 0.27791035175323486, "learning_rate": 1.5237954482524187e-06, "loss": 0.2833, "step": 16115 }, { "epoch": 2.3108689417837684, "grad_norm": 0.2751106917858124, "learning_rate": 1.5231958620645754e-06, "loss": 0.2949, "step": 16116 }, { "epoch": 2.3110123315170634, "grad_norm": 0.2712244391441345, "learning_rate": 1.5225963726656106e-06, "loss": 0.2795, "step": 16117 }, { "epoch": 2.3111557212503584, "grad_norm": 0.271562784910202, "learning_rate": 1.5219969800722151e-06, "loss": 0.2799, "step": 16118 }, { "epoch": 2.3112991109836534, "grad_norm": 0.2622414529323578, "learning_rate": 1.5213976843010726e-06, "loss": 0.2924, "step": 16119 }, { "epoch": 2.311442500716949, "grad_norm": 0.2514861226081848, "learning_rate": 1.5207984853688668e-06, "loss": 0.2948, "step": 16120 }, { "epoch": 2.3115858904502438, "grad_norm": 0.2596287131309509, "learning_rate": 1.520199383292279e-06, "loss": 0.2738, "step": 16121 }, { "epoch": 2.3117292801835387, "grad_norm": 0.2554210424423218, "learning_rate": 1.5196003780879876e-06, "loss": 0.2756, "step": 16122 }, { "epoch": 2.311872669916834, "grad_norm": 0.270251989364624, "learning_rate": 1.519001469772668e-06, "loss": 0.2625, "step": 16123 }, { "epoch": 2.312016059650129, "grad_norm": 0.2863355278968811, "learning_rate": 1.5184026583629928e-06, "loss": 0.2803, "step": 16124 }, { "epoch": 2.312159449383424, "grad_norm": 0.28580352663993835, "learning_rate": 1.517803943875632e-06, "loss": 0.2791, "step": 16125 }, { "epoch": 2.312302839116719, "grad_norm": 0.270944744348526, "learning_rate": 1.5172053263272523e-06, "loss": 0.2751, "step": 16126 }, { "epoch": 2.3124462288500145, "grad_norm": 0.27006620168685913, "learning_rate": 1.516606805734519e-06, "loss": 0.2964, "step": 16127 }, { "epoch": 2.3125896185833095, "grad_norm": 0.30829715728759766, "learning_rate": 1.5160083821140935e-06, "loss": 0.2887, "step": 16128 }, { "epoch": 2.3127330083166044, "grad_norm": 0.2833797037601471, "learning_rate": 1.5154100554826373e-06, "loss": 0.2994, "step": 16129 }, { "epoch": 2.3128763980499, "grad_norm": 0.26938456296920776, "learning_rate": 1.514811825856803e-06, "loss": 0.2791, "step": 16130 }, { "epoch": 2.313019787783195, "grad_norm": 0.290700227022171, "learning_rate": 1.5142136932532464e-06, "loss": 0.2771, "step": 16131 }, { "epoch": 2.31316317751649, "grad_norm": 0.26788341999053955, "learning_rate": 1.5136156576886185e-06, "loss": 0.2663, "step": 16132 }, { "epoch": 2.313306567249785, "grad_norm": 0.28800633549690247, "learning_rate": 1.5130177191795676e-06, "loss": 0.2731, "step": 16133 }, { "epoch": 2.31344995698308, "grad_norm": 0.2863621115684509, "learning_rate": 1.5124198777427407e-06, "loss": 0.2758, "step": 16134 }, { "epoch": 2.313593346716375, "grad_norm": 0.26963964104652405, "learning_rate": 1.511822133394778e-06, "loss": 0.2871, "step": 16135 }, { "epoch": 2.31373673644967, "grad_norm": 0.28410211205482483, "learning_rate": 1.5112244861523212e-06, "loss": 0.2703, "step": 16136 }, { "epoch": 2.313880126182965, "grad_norm": 0.27419817447662354, "learning_rate": 1.510626936032008e-06, "loss": 0.3044, "step": 16137 }, { "epoch": 2.3140235159162605, "grad_norm": 0.2848987877368927, "learning_rate": 1.5100294830504735e-06, "loss": 0.2658, "step": 16138 }, { "epoch": 2.3141669056495555, "grad_norm": 0.2881658375263214, "learning_rate": 1.5094321272243495e-06, "loss": 0.2856, "step": 16139 }, { "epoch": 2.3143102953828505, "grad_norm": 0.27785399556159973, "learning_rate": 1.5088348685702676e-06, "loss": 0.2732, "step": 16140 }, { "epoch": 2.3144536851161455, "grad_norm": 0.27995386719703674, "learning_rate": 1.5082377071048503e-06, "loss": 0.2619, "step": 16141 }, { "epoch": 2.314597074849441, "grad_norm": 0.2945365011692047, "learning_rate": 1.5076406428447237e-06, "loss": 0.2648, "step": 16142 }, { "epoch": 2.314740464582736, "grad_norm": 0.26495346426963806, "learning_rate": 1.5070436758065088e-06, "loss": 0.2825, "step": 16143 }, { "epoch": 2.314883854316031, "grad_norm": 0.270832896232605, "learning_rate": 1.5064468060068254e-06, "loss": 0.2964, "step": 16144 }, { "epoch": 2.3150272440493262, "grad_norm": 0.2710391581058502, "learning_rate": 1.505850033462291e-06, "loss": 0.2864, "step": 16145 }, { "epoch": 2.3151706337826212, "grad_norm": 0.27465271949768066, "learning_rate": 1.5052533581895145e-06, "loss": 0.2876, "step": 16146 }, { "epoch": 2.315314023515916, "grad_norm": 0.2766377627849579, "learning_rate": 1.5046567802051093e-06, "loss": 0.2671, "step": 16147 }, { "epoch": 2.315457413249211, "grad_norm": 0.2798292338848114, "learning_rate": 1.504060299525682e-06, "loss": 0.293, "step": 16148 }, { "epoch": 2.3156008029825066, "grad_norm": 0.2705625295639038, "learning_rate": 1.5034639161678384e-06, "loss": 0.2768, "step": 16149 }, { "epoch": 2.3157441927158016, "grad_norm": 0.2654151916503906, "learning_rate": 1.502867630148182e-06, "loss": 0.2917, "step": 16150 }, { "epoch": 2.3158875824490965, "grad_norm": 0.2835780680179596, "learning_rate": 1.5022714414833094e-06, "loss": 0.2965, "step": 16151 }, { "epoch": 2.316030972182392, "grad_norm": 0.27175217866897583, "learning_rate": 1.5016753501898202e-06, "loss": 0.2894, "step": 16152 }, { "epoch": 2.316174361915687, "grad_norm": 0.27210065722465515, "learning_rate": 1.5010793562843074e-06, "loss": 0.2809, "step": 16153 }, { "epoch": 2.316317751648982, "grad_norm": 0.2741928696632385, "learning_rate": 1.5004834597833629e-06, "loss": 0.2943, "step": 16154 }, { "epoch": 2.316461141382277, "grad_norm": 0.2553321123123169, "learning_rate": 1.4998876607035756e-06, "loss": 0.2684, "step": 16155 }, { "epoch": 2.3166045311155723, "grad_norm": 0.2900979816913605, "learning_rate": 1.4992919590615329e-06, "loss": 0.2962, "step": 16156 }, { "epoch": 2.3167479208488673, "grad_norm": 0.2612946033477783, "learning_rate": 1.498696354873816e-06, "loss": 0.2685, "step": 16157 }, { "epoch": 2.3168913105821622, "grad_norm": 0.29692190885543823, "learning_rate": 1.4981008481570065e-06, "loss": 0.2925, "step": 16158 }, { "epoch": 2.3170347003154577, "grad_norm": 0.27309322357177734, "learning_rate": 1.4975054389276822e-06, "loss": 0.2635, "step": 16159 }, { "epoch": 2.3171780900487526, "grad_norm": 0.27393487095832825, "learning_rate": 1.4969101272024184e-06, "loss": 0.2857, "step": 16160 }, { "epoch": 2.3173214797820476, "grad_norm": 0.2704678475856781, "learning_rate": 1.4963149129977884e-06, "loss": 0.2753, "step": 16161 }, { "epoch": 2.3174648695153426, "grad_norm": 0.27995380759239197, "learning_rate": 1.495719796330361e-06, "loss": 0.2759, "step": 16162 }, { "epoch": 2.3176082592486376, "grad_norm": 0.272324800491333, "learning_rate": 1.4951247772167044e-06, "loss": 0.2669, "step": 16163 }, { "epoch": 2.317751648981933, "grad_norm": 0.305825799703598, "learning_rate": 1.4945298556733823e-06, "loss": 0.2946, "step": 16164 }, { "epoch": 2.317895038715228, "grad_norm": 0.25893232226371765, "learning_rate": 1.4939350317169566e-06, "loss": 0.252, "step": 16165 }, { "epoch": 2.318038428448523, "grad_norm": 0.28161871433258057, "learning_rate": 1.4933403053639867e-06, "loss": 0.2922, "step": 16166 }, { "epoch": 2.3181818181818183, "grad_norm": 0.27427899837493896, "learning_rate": 1.4927456766310294e-06, "loss": 0.2918, "step": 16167 }, { "epoch": 2.3183252079151133, "grad_norm": 0.26831957697868347, "learning_rate": 1.492151145534636e-06, "loss": 0.2578, "step": 16168 }, { "epoch": 2.3184685976484083, "grad_norm": 0.2662809491157532, "learning_rate": 1.4915567120913581e-06, "loss": 0.2979, "step": 16169 }, { "epoch": 2.3186119873817033, "grad_norm": 0.2775609791278839, "learning_rate": 1.490962376317745e-06, "loss": 0.2882, "step": 16170 }, { "epoch": 2.3187553771149987, "grad_norm": 0.27309808135032654, "learning_rate": 1.4903681382303415e-06, "loss": 0.2738, "step": 16171 }, { "epoch": 2.3188987668482937, "grad_norm": 0.2733897864818573, "learning_rate": 1.4897739978456916e-06, "loss": 0.276, "step": 16172 }, { "epoch": 2.3190421565815886, "grad_norm": 0.2887718677520752, "learning_rate": 1.4891799551803321e-06, "loss": 0.2802, "step": 16173 }, { "epoch": 2.319185546314884, "grad_norm": 0.28251439332962036, "learning_rate": 1.4885860102508021e-06, "loss": 0.2778, "step": 16174 }, { "epoch": 2.319328936048179, "grad_norm": 0.28613927960395813, "learning_rate": 1.487992163073636e-06, "loss": 0.2742, "step": 16175 }, { "epoch": 2.319472325781474, "grad_norm": 0.2657339870929718, "learning_rate": 1.4873984136653651e-06, "loss": 0.2774, "step": 16176 }, { "epoch": 2.319615715514769, "grad_norm": 0.27267470955848694, "learning_rate": 1.4868047620425196e-06, "loss": 0.2859, "step": 16177 }, { "epoch": 2.3197591052480644, "grad_norm": 0.24285587668418884, "learning_rate": 1.4862112082216252e-06, "loss": 0.2687, "step": 16178 }, { "epoch": 2.3199024949813594, "grad_norm": 0.2595980167388916, "learning_rate": 1.4856177522192056e-06, "loss": 0.2712, "step": 16179 }, { "epoch": 2.3200458847146543, "grad_norm": 0.27232083678245544, "learning_rate": 1.4850243940517812e-06, "loss": 0.2704, "step": 16180 }, { "epoch": 2.3201892744479498, "grad_norm": 0.2814542353153229, "learning_rate": 1.4844311337358708e-06, "loss": 0.2654, "step": 16181 }, { "epoch": 2.3203326641812447, "grad_norm": 0.2575214207172394, "learning_rate": 1.4838379712879896e-06, "loss": 0.2886, "step": 16182 }, { "epoch": 2.3204760539145397, "grad_norm": 0.27700355648994446, "learning_rate": 1.4832449067246525e-06, "loss": 0.286, "step": 16183 }, { "epoch": 2.3206194436478347, "grad_norm": 0.27328255772590637, "learning_rate": 1.4826519400623657e-06, "loss": 0.2673, "step": 16184 }, { "epoch": 2.32076283338113, "grad_norm": 0.2812561094760895, "learning_rate": 1.4820590713176374e-06, "loss": 0.2938, "step": 16185 }, { "epoch": 2.320906223114425, "grad_norm": 0.29277652502059937, "learning_rate": 1.481466300506974e-06, "loss": 0.307, "step": 16186 }, { "epoch": 2.32104961284772, "grad_norm": 0.27795523405075073, "learning_rate": 1.4808736276468755e-06, "loss": 0.2709, "step": 16187 }, { "epoch": 2.321193002581015, "grad_norm": 0.2814495861530304, "learning_rate": 1.4802810527538436e-06, "loss": 0.2831, "step": 16188 }, { "epoch": 2.3213363923143104, "grad_norm": 0.2724767029285431, "learning_rate": 1.4796885758443713e-06, "loss": 0.28, "step": 16189 }, { "epoch": 2.3214797820476054, "grad_norm": 0.2883996069431305, "learning_rate": 1.4790961969349542e-06, "loss": 0.2824, "step": 16190 }, { "epoch": 2.3216231717809004, "grad_norm": 0.2796645164489746, "learning_rate": 1.4785039160420822e-06, "loss": 0.2806, "step": 16191 }, { "epoch": 2.3217665615141954, "grad_norm": 0.26500657200813293, "learning_rate": 1.4779117331822446e-06, "loss": 0.2936, "step": 16192 }, { "epoch": 2.3219099512474908, "grad_norm": 0.2668549716472626, "learning_rate": 1.477319648371926e-06, "loss": 0.2909, "step": 16193 }, { "epoch": 2.3220533409807858, "grad_norm": 0.2745973765850067, "learning_rate": 1.476727661627611e-06, "loss": 0.2789, "step": 16194 }, { "epoch": 2.3221967307140807, "grad_norm": 0.2794622480869293, "learning_rate": 1.4761357729657767e-06, "loss": 0.2862, "step": 16195 }, { "epoch": 2.322340120447376, "grad_norm": 0.2879396677017212, "learning_rate": 1.4755439824029016e-06, "loss": 0.2864, "step": 16196 }, { "epoch": 2.322483510180671, "grad_norm": 0.275468111038208, "learning_rate": 1.474952289955461e-06, "loss": 0.291, "step": 16197 }, { "epoch": 2.322626899913966, "grad_norm": 0.26457998156547546, "learning_rate": 1.4743606956399254e-06, "loss": 0.2788, "step": 16198 }, { "epoch": 2.322770289647261, "grad_norm": 0.2684290111064911, "learning_rate": 1.4737691994727648e-06, "loss": 0.2839, "step": 16199 }, { "epoch": 2.3229136793805565, "grad_norm": 0.28615817427635193, "learning_rate": 1.4731778014704457e-06, "loss": 0.2939, "step": 16200 }, { "epoch": 2.3230570691138515, "grad_norm": 0.27374377846717834, "learning_rate": 1.4725865016494307e-06, "loss": 0.2785, "step": 16201 }, { "epoch": 2.3232004588471464, "grad_norm": 0.26424264907836914, "learning_rate": 1.4719953000261817e-06, "loss": 0.2806, "step": 16202 }, { "epoch": 2.323343848580442, "grad_norm": 0.2640242576599121, "learning_rate": 1.4714041966171567e-06, "loss": 0.2798, "step": 16203 }, { "epoch": 2.323487238313737, "grad_norm": 0.26958003640174866, "learning_rate": 1.4708131914388118e-06, "loss": 0.2937, "step": 16204 }, { "epoch": 2.323630628047032, "grad_norm": 0.2655643820762634, "learning_rate": 1.4702222845075975e-06, "loss": 0.2744, "step": 16205 }, { "epoch": 2.3237740177803268, "grad_norm": 0.29461780190467834, "learning_rate": 1.469631475839965e-06, "loss": 0.2778, "step": 16206 }, { "epoch": 2.323917407513622, "grad_norm": 0.27592796087265015, "learning_rate": 1.4690407654523613e-06, "loss": 0.2937, "step": 16207 }, { "epoch": 2.324060797246917, "grad_norm": 0.2535076141357422, "learning_rate": 1.468450153361231e-06, "loss": 0.2926, "step": 16208 }, { "epoch": 2.324204186980212, "grad_norm": 0.28944069147109985, "learning_rate": 1.4678596395830164e-06, "loss": 0.2737, "step": 16209 }, { "epoch": 2.3243475767135076, "grad_norm": 0.2639159560203552, "learning_rate": 1.4672692241341574e-06, "loss": 0.2802, "step": 16210 }, { "epoch": 2.3244909664468025, "grad_norm": 0.2664530575275421, "learning_rate": 1.466678907031087e-06, "loss": 0.2777, "step": 16211 }, { "epoch": 2.3246343561800975, "grad_norm": 0.2716902494430542, "learning_rate": 1.4660886882902408e-06, "loss": 0.2719, "step": 16212 }, { "epoch": 2.3247777459133925, "grad_norm": 0.27049726247787476, "learning_rate": 1.4654985679280493e-06, "loss": 0.2924, "step": 16213 }, { "epoch": 2.3249211356466875, "grad_norm": 0.2752832770347595, "learning_rate": 1.4649085459609402e-06, "loss": 0.2761, "step": 16214 }, { "epoch": 2.325064525379983, "grad_norm": 0.2982228994369507, "learning_rate": 1.4643186224053397e-06, "loss": 0.277, "step": 16215 }, { "epoch": 2.325207915113278, "grad_norm": 0.28321704268455505, "learning_rate": 1.46372879727767e-06, "loss": 0.2635, "step": 16216 }, { "epoch": 2.325351304846573, "grad_norm": 0.2929808795452118, "learning_rate": 1.46313907059435e-06, "loss": 0.268, "step": 16217 }, { "epoch": 2.3254946945798682, "grad_norm": 0.276183545589447, "learning_rate": 1.4625494423717979e-06, "loss": 0.2734, "step": 16218 }, { "epoch": 2.325638084313163, "grad_norm": 0.2602872848510742, "learning_rate": 1.4619599126264279e-06, "loss": 0.2908, "step": 16219 }, { "epoch": 2.325781474046458, "grad_norm": 0.27922162413597107, "learning_rate": 1.4613704813746516e-06, "loss": 0.2955, "step": 16220 }, { "epoch": 2.325924863779753, "grad_norm": 0.26585254073143005, "learning_rate": 1.4607811486328787e-06, "loss": 0.2776, "step": 16221 }, { "epoch": 2.3260682535130486, "grad_norm": 0.2628755569458008, "learning_rate": 1.460191914417513e-06, "loss": 0.2831, "step": 16222 }, { "epoch": 2.3262116432463436, "grad_norm": 0.2690536677837372, "learning_rate": 1.459602778744959e-06, "loss": 0.2706, "step": 16223 }, { "epoch": 2.3263550329796385, "grad_norm": 0.27393004298210144, "learning_rate": 1.459013741631618e-06, "loss": 0.2946, "step": 16224 }, { "epoch": 2.326498422712934, "grad_norm": 0.27115315198898315, "learning_rate": 1.458424803093887e-06, "loss": 0.293, "step": 16225 }, { "epoch": 2.326641812446229, "grad_norm": 0.2767401933670044, "learning_rate": 1.4578359631481632e-06, "loss": 0.2874, "step": 16226 }, { "epoch": 2.326785202179524, "grad_norm": 0.27132412791252136, "learning_rate": 1.4572472218108353e-06, "loss": 0.269, "step": 16227 }, { "epoch": 2.326928591912819, "grad_norm": 0.288542240858078, "learning_rate": 1.4566585790982952e-06, "loss": 0.2846, "step": 16228 }, { "epoch": 2.3270719816461143, "grad_norm": 0.27780526876449585, "learning_rate": 1.4560700350269297e-06, "loss": 0.2635, "step": 16229 }, { "epoch": 2.3272153713794093, "grad_norm": 0.2646896541118622, "learning_rate": 1.455481589613123e-06, "loss": 0.2763, "step": 16230 }, { "epoch": 2.3273587611127042, "grad_norm": 0.2721984386444092, "learning_rate": 1.4548932428732576e-06, "loss": 0.2646, "step": 16231 }, { "epoch": 2.3275021508459997, "grad_norm": 0.2721317708492279, "learning_rate": 1.454304994823708e-06, "loss": 0.2968, "step": 16232 }, { "epoch": 2.3276455405792946, "grad_norm": 0.2823401987552643, "learning_rate": 1.4537168454808548e-06, "loss": 0.2716, "step": 16233 }, { "epoch": 2.3277889303125896, "grad_norm": 0.25514769554138184, "learning_rate": 1.4531287948610689e-06, "loss": 0.2744, "step": 16234 }, { "epoch": 2.3279323200458846, "grad_norm": 0.2852766513824463, "learning_rate": 1.4525408429807214e-06, "loss": 0.2766, "step": 16235 }, { "epoch": 2.32807570977918, "grad_norm": 0.26271721720695496, "learning_rate": 1.4519529898561801e-06, "loss": 0.2708, "step": 16236 }, { "epoch": 2.328219099512475, "grad_norm": 0.2761736512184143, "learning_rate": 1.451365235503811e-06, "loss": 0.2739, "step": 16237 }, { "epoch": 2.32836248924577, "grad_norm": 0.27142101526260376, "learning_rate": 1.4507775799399732e-06, "loss": 0.2672, "step": 16238 }, { "epoch": 2.328505878979065, "grad_norm": 0.27425482869148254, "learning_rate": 1.450190023181028e-06, "loss": 0.2747, "step": 16239 }, { "epoch": 2.3286492687123603, "grad_norm": 0.268348753452301, "learning_rate": 1.4496025652433322e-06, "loss": 0.2802, "step": 16240 }, { "epoch": 2.3287926584456553, "grad_norm": 0.27316945791244507, "learning_rate": 1.449015206143239e-06, "loss": 0.2672, "step": 16241 }, { "epoch": 2.3289360481789503, "grad_norm": 0.2886018753051758, "learning_rate": 1.4484279458971017e-06, "loss": 0.2658, "step": 16242 }, { "epoch": 2.3290794379122453, "grad_norm": 0.2647758424282074, "learning_rate": 1.4478407845212656e-06, "loss": 0.2662, "step": 16243 }, { "epoch": 2.3292228276455407, "grad_norm": 0.2453915923833847, "learning_rate": 1.4472537220320781e-06, "loss": 0.2806, "step": 16244 }, { "epoch": 2.3293662173788356, "grad_norm": 0.2589021921157837, "learning_rate": 1.4466667584458815e-06, "loss": 0.2803, "step": 16245 }, { "epoch": 2.3295096071121306, "grad_norm": 0.2539064884185791, "learning_rate": 1.446079893779016e-06, "loss": 0.2922, "step": 16246 }, { "epoch": 2.329652996845426, "grad_norm": 0.28214386105537415, "learning_rate": 1.4454931280478197e-06, "loss": 0.2661, "step": 16247 }, { "epoch": 2.329796386578721, "grad_norm": 0.2693588435649872, "learning_rate": 1.4449064612686282e-06, "loss": 0.2669, "step": 16248 }, { "epoch": 2.329939776312016, "grad_norm": 0.2630290389060974, "learning_rate": 1.4443198934577706e-06, "loss": 0.2776, "step": 16249 }, { "epoch": 2.330083166045311, "grad_norm": 0.3058275580406189, "learning_rate": 1.4437334246315776e-06, "loss": 0.2843, "step": 16250 }, { "epoch": 2.3302265557786064, "grad_norm": 0.27159401774406433, "learning_rate": 1.4431470548063754e-06, "loss": 0.2857, "step": 16251 }, { "epoch": 2.3303699455119014, "grad_norm": 0.28040990233421326, "learning_rate": 1.4425607839984878e-06, "loss": 0.2768, "step": 16252 }, { "epoch": 2.3305133352451963, "grad_norm": 0.27494123578071594, "learning_rate": 1.4419746122242356e-06, "loss": 0.2763, "step": 16253 }, { "epoch": 2.3306567249784917, "grad_norm": 0.2766663730144501, "learning_rate": 1.4413885394999372e-06, "loss": 0.29, "step": 16254 }, { "epoch": 2.3308001147117867, "grad_norm": 0.2732710838317871, "learning_rate": 1.4408025658419073e-06, "loss": 0.2913, "step": 16255 }, { "epoch": 2.3309435044450817, "grad_norm": 0.26263147592544556, "learning_rate": 1.4402166912664589e-06, "loss": 0.2864, "step": 16256 }, { "epoch": 2.3310868941783767, "grad_norm": 0.28390562534332275, "learning_rate": 1.4396309157899018e-06, "loss": 0.2916, "step": 16257 }, { "epoch": 2.331230283911672, "grad_norm": 0.25993219017982483, "learning_rate": 1.4390452394285449e-06, "loss": 0.275, "step": 16258 }, { "epoch": 2.331373673644967, "grad_norm": 0.28387728333473206, "learning_rate": 1.438459662198689e-06, "loss": 0.2826, "step": 16259 }, { "epoch": 2.331517063378262, "grad_norm": 0.2642388939857483, "learning_rate": 1.4378741841166377e-06, "loss": 0.2758, "step": 16260 }, { "epoch": 2.3316604531115575, "grad_norm": 0.2889520525932312, "learning_rate": 1.4372888051986895e-06, "loss": 0.2719, "step": 16261 }, { "epoch": 2.3318038428448524, "grad_norm": 0.27898120880126953, "learning_rate": 1.43670352546114e-06, "loss": 0.2773, "step": 16262 }, { "epoch": 2.3319472325781474, "grad_norm": 0.27442705631256104, "learning_rate": 1.436118344920283e-06, "loss": 0.2767, "step": 16263 }, { "epoch": 2.3320906223114424, "grad_norm": 0.259199857711792, "learning_rate": 1.4355332635924114e-06, "loss": 0.2696, "step": 16264 }, { "epoch": 2.3322340120447373, "grad_norm": 0.27802395820617676, "learning_rate": 1.434948281493808e-06, "loss": 0.2797, "step": 16265 }, { "epoch": 2.3323774017780328, "grad_norm": 0.2680080831050873, "learning_rate": 1.4343633986407613e-06, "loss": 0.2872, "step": 16266 }, { "epoch": 2.3325207915113277, "grad_norm": 0.2888089418411255, "learning_rate": 1.433778615049552e-06, "loss": 0.2956, "step": 16267 }, { "epoch": 2.3326641812446227, "grad_norm": 0.25464335083961487, "learning_rate": 1.4331939307364605e-06, "loss": 0.2699, "step": 16268 }, { "epoch": 2.332807570977918, "grad_norm": 0.27952975034713745, "learning_rate": 1.4326093457177632e-06, "loss": 0.2662, "step": 16269 }, { "epoch": 2.332950960711213, "grad_norm": 0.2663708031177521, "learning_rate": 1.4320248600097337e-06, "loss": 0.2943, "step": 16270 }, { "epoch": 2.333094350444508, "grad_norm": 0.29321834444999695, "learning_rate": 1.4314404736286436e-06, "loss": 0.2871, "step": 16271 }, { "epoch": 2.333237740177803, "grad_norm": 0.2693978548049927, "learning_rate": 1.4308561865907617e-06, "loss": 0.2782, "step": 16272 }, { "epoch": 2.3333811299110985, "grad_norm": 0.27246448397636414, "learning_rate": 1.4302719989123525e-06, "loss": 0.2846, "step": 16273 }, { "epoch": 2.3335245196443934, "grad_norm": 0.27928823232650757, "learning_rate": 1.42968791060968e-06, "loss": 0.2829, "step": 16274 }, { "epoch": 2.3336679093776884, "grad_norm": 0.27876266837120056, "learning_rate": 1.4291039216990054e-06, "loss": 0.2875, "step": 16275 }, { "epoch": 2.333811299110984, "grad_norm": 0.2649765610694885, "learning_rate": 1.428520032196583e-06, "loss": 0.2742, "step": 16276 }, { "epoch": 2.333954688844279, "grad_norm": 0.27305370569229126, "learning_rate": 1.4279362421186688e-06, "loss": 0.2761, "step": 16277 }, { "epoch": 2.334098078577574, "grad_norm": 0.26292145252227783, "learning_rate": 1.4273525514815152e-06, "loss": 0.2656, "step": 16278 }, { "epoch": 2.3342414683108688, "grad_norm": 0.2669651508331299, "learning_rate": 1.4267689603013702e-06, "loss": 0.2467, "step": 16279 }, { "epoch": 2.334384858044164, "grad_norm": 0.25459903478622437, "learning_rate": 1.4261854685944827e-06, "loss": 0.2747, "step": 16280 }, { "epoch": 2.334528247777459, "grad_norm": 0.2581961750984192, "learning_rate": 1.4256020763770922e-06, "loss": 0.2729, "step": 16281 }, { "epoch": 2.334671637510754, "grad_norm": 0.26350417733192444, "learning_rate": 1.4250187836654418e-06, "loss": 0.2888, "step": 16282 }, { "epoch": 2.3348150272440495, "grad_norm": 0.2865633964538574, "learning_rate": 1.4244355904757689e-06, "loss": 0.2891, "step": 16283 }, { "epoch": 2.3349584169773445, "grad_norm": 0.26526084542274475, "learning_rate": 1.4238524968243088e-06, "loss": 0.2968, "step": 16284 }, { "epoch": 2.3351018067106395, "grad_norm": 0.26960355043411255, "learning_rate": 1.4232695027272958e-06, "loss": 0.2711, "step": 16285 }, { "epoch": 2.3352451964439345, "grad_norm": 0.2805197834968567, "learning_rate": 1.4226866082009565e-06, "loss": 0.2764, "step": 16286 }, { "epoch": 2.33538858617723, "grad_norm": 0.2748042941093445, "learning_rate": 1.422103813261519e-06, "loss": 0.2712, "step": 16287 }, { "epoch": 2.335531975910525, "grad_norm": 0.27794596552848816, "learning_rate": 1.4215211179252053e-06, "loss": 0.2731, "step": 16288 }, { "epoch": 2.33567536564382, "grad_norm": 0.29451486468315125, "learning_rate": 1.4209385222082411e-06, "loss": 0.2696, "step": 16289 }, { "epoch": 2.335818755377115, "grad_norm": 0.2708883285522461, "learning_rate": 1.420356026126843e-06, "loss": 0.269, "step": 16290 }, { "epoch": 2.3359621451104102, "grad_norm": 0.2726970314979553, "learning_rate": 1.4197736296972274e-06, "loss": 0.2928, "step": 16291 }, { "epoch": 2.336105534843705, "grad_norm": 0.2750295400619507, "learning_rate": 1.4191913329356055e-06, "loss": 0.2905, "step": 16292 }, { "epoch": 2.336248924577, "grad_norm": 0.2695609927177429, "learning_rate": 1.4186091358581889e-06, "loss": 0.2892, "step": 16293 }, { "epoch": 2.336392314310295, "grad_norm": 0.26541978120803833, "learning_rate": 1.4180270384811844e-06, "loss": 0.2889, "step": 16294 }, { "epoch": 2.3365357040435906, "grad_norm": 0.26618608832359314, "learning_rate": 1.4174450408207968e-06, "loss": 0.2957, "step": 16295 }, { "epoch": 2.3366790937768855, "grad_norm": 0.2816403806209564, "learning_rate": 1.41686314289323e-06, "loss": 0.2741, "step": 16296 }, { "epoch": 2.3368224835101805, "grad_norm": 0.26603981852531433, "learning_rate": 1.41628134471468e-06, "loss": 0.2744, "step": 16297 }, { "epoch": 2.336965873243476, "grad_norm": 0.28584060072898865, "learning_rate": 1.4156996463013445e-06, "loss": 0.2984, "step": 16298 }, { "epoch": 2.337109262976771, "grad_norm": 0.2804533839225769, "learning_rate": 1.415118047669417e-06, "loss": 0.2665, "step": 16299 }, { "epoch": 2.337252652710066, "grad_norm": 0.2694946229457855, "learning_rate": 1.4145365488350887e-06, "loss": 0.2869, "step": 16300 }, { "epoch": 2.337396042443361, "grad_norm": 0.27502813935279846, "learning_rate": 1.413955149814547e-06, "loss": 0.2736, "step": 16301 }, { "epoch": 2.3375394321766563, "grad_norm": 0.26626327633857727, "learning_rate": 1.4133738506239796e-06, "loss": 0.2777, "step": 16302 }, { "epoch": 2.3376828219099512, "grad_norm": 0.2831478714942932, "learning_rate": 1.4127926512795653e-06, "loss": 0.2797, "step": 16303 }, { "epoch": 2.337826211643246, "grad_norm": 0.2747756540775299, "learning_rate": 1.412211551797485e-06, "loss": 0.2753, "step": 16304 }, { "epoch": 2.3379696013765416, "grad_norm": 0.2862269878387451, "learning_rate": 1.4116305521939166e-06, "loss": 0.267, "step": 16305 }, { "epoch": 2.3381129911098366, "grad_norm": 0.260719358921051, "learning_rate": 1.4110496524850331e-06, "loss": 0.2736, "step": 16306 }, { "epoch": 2.3382563808431316, "grad_norm": 0.2732934057712555, "learning_rate": 1.410468852687007e-06, "loss": 0.2804, "step": 16307 }, { "epoch": 2.3383997705764266, "grad_norm": 0.289597749710083, "learning_rate": 1.4098881528160058e-06, "loss": 0.2808, "step": 16308 }, { "epoch": 2.338543160309722, "grad_norm": 0.2907901406288147, "learning_rate": 1.409307552888196e-06, "loss": 0.2781, "step": 16309 }, { "epoch": 2.338686550043017, "grad_norm": 0.27904272079467773, "learning_rate": 1.4087270529197406e-06, "loss": 0.2692, "step": 16310 }, { "epoch": 2.338829939776312, "grad_norm": 0.2806587815284729, "learning_rate": 1.4081466529267995e-06, "loss": 0.2929, "step": 16311 }, { "epoch": 2.3389733295096073, "grad_norm": 0.3060486614704132, "learning_rate": 1.4075663529255317e-06, "loss": 0.2718, "step": 16312 }, { "epoch": 2.3391167192429023, "grad_norm": 0.26804783940315247, "learning_rate": 1.4069861529320894e-06, "loss": 0.2906, "step": 16313 }, { "epoch": 2.3392601089761973, "grad_norm": 0.27676472067832947, "learning_rate": 1.406406052962625e-06, "loss": 0.2751, "step": 16314 }, { "epoch": 2.3394034987094923, "grad_norm": 0.28737273812294006, "learning_rate": 1.4058260530332885e-06, "loss": 0.297, "step": 16315 }, { "epoch": 2.3395468884427877, "grad_norm": 0.2610411047935486, "learning_rate": 1.405246153160225e-06, "loss": 0.2745, "step": 16316 }, { "epoch": 2.3396902781760827, "grad_norm": 0.25652623176574707, "learning_rate": 1.4046663533595796e-06, "loss": 0.2803, "step": 16317 }, { "epoch": 2.3398336679093776, "grad_norm": 0.28981903195381165, "learning_rate": 1.404086653647494e-06, "loss": 0.2792, "step": 16318 }, { "epoch": 2.3399770576426726, "grad_norm": 0.27550745010375977, "learning_rate": 1.4035070540401019e-06, "loss": 0.2856, "step": 16319 }, { "epoch": 2.340120447375968, "grad_norm": 0.27551013231277466, "learning_rate": 1.4029275545535421e-06, "loss": 0.2755, "step": 16320 }, { "epoch": 2.340263837109263, "grad_norm": 0.2599582076072693, "learning_rate": 1.4023481552039448e-06, "loss": 0.2846, "step": 16321 }, { "epoch": 2.340407226842558, "grad_norm": 0.2667364478111267, "learning_rate": 1.4017688560074411e-06, "loss": 0.2553, "step": 16322 }, { "epoch": 2.340550616575853, "grad_norm": 0.290701299905777, "learning_rate": 1.4011896569801575e-06, "loss": 0.3015, "step": 16323 }, { "epoch": 2.3406940063091484, "grad_norm": 0.26733553409576416, "learning_rate": 1.400610558138218e-06, "loss": 0.2818, "step": 16324 }, { "epoch": 2.3408373960424433, "grad_norm": 0.270829975605011, "learning_rate": 1.4000315594977432e-06, "loss": 0.2897, "step": 16325 }, { "epoch": 2.3409807857757383, "grad_norm": 0.2795179486274719, "learning_rate": 1.3994526610748521e-06, "loss": 0.2762, "step": 16326 }, { "epoch": 2.3411241755090337, "grad_norm": 0.2554607689380646, "learning_rate": 1.39887386288566e-06, "loss": 0.2771, "step": 16327 }, { "epoch": 2.3412675652423287, "grad_norm": 0.2709166407585144, "learning_rate": 1.3982951649462805e-06, "loss": 0.2918, "step": 16328 }, { "epoch": 2.3414109549756237, "grad_norm": 0.28682035207748413, "learning_rate": 1.3977165672728243e-06, "loss": 0.287, "step": 16329 }, { "epoch": 2.3415543447089187, "grad_norm": 0.25923478603363037, "learning_rate": 1.3971380698813963e-06, "loss": 0.2727, "step": 16330 }, { "epoch": 2.341697734442214, "grad_norm": 0.27359870076179504, "learning_rate": 1.396559672788102e-06, "loss": 0.271, "step": 16331 }, { "epoch": 2.341841124175509, "grad_norm": 0.27752742171287537, "learning_rate": 1.395981376009043e-06, "loss": 0.2871, "step": 16332 }, { "epoch": 2.341984513908804, "grad_norm": 0.2764907479286194, "learning_rate": 1.395403179560319e-06, "loss": 0.2787, "step": 16333 }, { "epoch": 2.3421279036420994, "grad_norm": 0.27559787034988403, "learning_rate": 1.3948250834580267e-06, "loss": 0.2805, "step": 16334 }, { "epoch": 2.3422712933753944, "grad_norm": 0.26972416043281555, "learning_rate": 1.3942470877182568e-06, "loss": 0.2809, "step": 16335 }, { "epoch": 2.3424146831086894, "grad_norm": 0.3150515556335449, "learning_rate": 1.3936691923571016e-06, "loss": 0.281, "step": 16336 }, { "epoch": 2.3425580728419844, "grad_norm": 0.27572202682495117, "learning_rate": 1.3930913973906485e-06, "loss": 0.2924, "step": 16337 }, { "epoch": 2.34270146257528, "grad_norm": 0.25042441487312317, "learning_rate": 1.3925137028349822e-06, "loss": 0.2855, "step": 16338 }, { "epoch": 2.3428448523085748, "grad_norm": 0.2780173420906067, "learning_rate": 1.3919361087061867e-06, "loss": 0.2949, "step": 16339 }, { "epoch": 2.3429882420418697, "grad_norm": 0.28917092084884644, "learning_rate": 1.391358615020338e-06, "loss": 0.2759, "step": 16340 }, { "epoch": 2.343131631775165, "grad_norm": 0.27827975153923035, "learning_rate": 1.390781221793514e-06, "loss": 0.2951, "step": 16341 }, { "epoch": 2.34327502150846, "grad_norm": 0.26253175735473633, "learning_rate": 1.3902039290417896e-06, "loss": 0.2569, "step": 16342 }, { "epoch": 2.343418411241755, "grad_norm": 0.26958590745925903, "learning_rate": 1.3896267367812344e-06, "loss": 0.2745, "step": 16343 }, { "epoch": 2.34356180097505, "grad_norm": 0.28165745735168457, "learning_rate": 1.3890496450279156e-06, "loss": 0.2783, "step": 16344 }, { "epoch": 2.343705190708345, "grad_norm": 0.2610563039779663, "learning_rate": 1.3884726537979033e-06, "loss": 0.2842, "step": 16345 }, { "epoch": 2.3438485804416405, "grad_norm": 0.2689966857433319, "learning_rate": 1.3878957631072548e-06, "loss": 0.2848, "step": 16346 }, { "epoch": 2.3439919701749354, "grad_norm": 0.27890700101852417, "learning_rate": 1.3873189729720322e-06, "loss": 0.2816, "step": 16347 }, { "epoch": 2.3441353599082304, "grad_norm": 0.2893869876861572, "learning_rate": 1.3867422834082917e-06, "loss": 0.2875, "step": 16348 }, { "epoch": 2.344278749641526, "grad_norm": 0.2878156900405884, "learning_rate": 1.3861656944320878e-06, "loss": 0.2958, "step": 16349 }, { "epoch": 2.344422139374821, "grad_norm": 0.29130470752716064, "learning_rate": 1.3855892060594733e-06, "loss": 0.2791, "step": 16350 }, { "epoch": 2.3445655291081158, "grad_norm": 0.2944248616695404, "learning_rate": 1.3850128183064942e-06, "loss": 0.2809, "step": 16351 }, { "epoch": 2.3447089188414107, "grad_norm": 0.28272688388824463, "learning_rate": 1.3844365311891972e-06, "loss": 0.3016, "step": 16352 }, { "epoch": 2.344852308574706, "grad_norm": 0.2679407298564911, "learning_rate": 1.3838603447236255e-06, "loss": 0.2885, "step": 16353 }, { "epoch": 2.344995698308001, "grad_norm": 0.2939906418323517, "learning_rate": 1.3832842589258189e-06, "loss": 0.2793, "step": 16354 }, { "epoch": 2.345139088041296, "grad_norm": 0.2908352017402649, "learning_rate": 1.3827082738118148e-06, "loss": 0.2798, "step": 16355 }, { "epoch": 2.3452824777745915, "grad_norm": 0.2894577980041504, "learning_rate": 1.3821323893976495e-06, "loss": 0.2653, "step": 16356 }, { "epoch": 2.3454258675078865, "grad_norm": 0.29365676641464233, "learning_rate": 1.3815566056993524e-06, "loss": 0.2853, "step": 16357 }, { "epoch": 2.3455692572411815, "grad_norm": 0.2784363925457001, "learning_rate": 1.3809809227329525e-06, "loss": 0.2844, "step": 16358 }, { "epoch": 2.3457126469744765, "grad_norm": 0.2682340145111084, "learning_rate": 1.3804053405144768e-06, "loss": 0.2849, "step": 16359 }, { "epoch": 2.345856036707772, "grad_norm": 0.26912614703178406, "learning_rate": 1.379829859059948e-06, "loss": 0.2937, "step": 16360 }, { "epoch": 2.345999426441067, "grad_norm": 0.2805274426937103, "learning_rate": 1.3792544783853878e-06, "loss": 0.2813, "step": 16361 }, { "epoch": 2.346142816174362, "grad_norm": 0.2639305591583252, "learning_rate": 1.3786791985068126e-06, "loss": 0.2786, "step": 16362 }, { "epoch": 2.3462862059076572, "grad_norm": 0.2593790590763092, "learning_rate": 1.3781040194402378e-06, "loss": 0.2846, "step": 16363 }, { "epoch": 2.346429595640952, "grad_norm": 0.265705943107605, "learning_rate": 1.3775289412016762e-06, "loss": 0.2787, "step": 16364 }, { "epoch": 2.346572985374247, "grad_norm": 0.2789872884750366, "learning_rate": 1.376953963807136e-06, "loss": 0.2997, "step": 16365 }, { "epoch": 2.346716375107542, "grad_norm": 0.28363850712776184, "learning_rate": 1.3763790872726257e-06, "loss": 0.2803, "step": 16366 }, { "epoch": 2.3468597648408376, "grad_norm": 0.27498510479927063, "learning_rate": 1.3758043116141462e-06, "loss": 0.3, "step": 16367 }, { "epoch": 2.3470031545741326, "grad_norm": 0.2871544361114502, "learning_rate": 1.3752296368476993e-06, "loss": 0.2748, "step": 16368 }, { "epoch": 2.3471465443074275, "grad_norm": 0.2838138937950134, "learning_rate": 1.3746550629892834e-06, "loss": 0.2791, "step": 16369 }, { "epoch": 2.3472899340407225, "grad_norm": 0.2622626721858978, "learning_rate": 1.3740805900548943e-06, "loss": 0.2921, "step": 16370 }, { "epoch": 2.347433323774018, "grad_norm": 0.2690763771533966, "learning_rate": 1.373506218060523e-06, "loss": 0.2781, "step": 16371 }, { "epoch": 2.347576713507313, "grad_norm": 0.2724727690219879, "learning_rate": 1.372931947022162e-06, "loss": 0.2778, "step": 16372 }, { "epoch": 2.347720103240608, "grad_norm": 0.2750568389892578, "learning_rate": 1.3723577769557944e-06, "loss": 0.2897, "step": 16373 }, { "epoch": 2.347863492973903, "grad_norm": 0.2832760214805603, "learning_rate": 1.371783707877406e-06, "loss": 0.2801, "step": 16374 }, { "epoch": 2.3480068827071983, "grad_norm": 0.2764328122138977, "learning_rate": 1.3712097398029783e-06, "loss": 0.2913, "step": 16375 }, { "epoch": 2.3481502724404932, "grad_norm": 0.2831994295120239, "learning_rate": 1.3706358727484887e-06, "loss": 0.2855, "step": 16376 }, { "epoch": 2.348293662173788, "grad_norm": 0.26684340834617615, "learning_rate": 1.3700621067299135e-06, "loss": 0.2924, "step": 16377 }, { "epoch": 2.3484370519070836, "grad_norm": 0.27193716168403625, "learning_rate": 1.3694884417632259e-06, "loss": 0.2794, "step": 16378 }, { "epoch": 2.3485804416403786, "grad_norm": 0.27118444442749023, "learning_rate": 1.3689148778643946e-06, "loss": 0.3007, "step": 16379 }, { "epoch": 2.3487238313736736, "grad_norm": 0.28294652700424194, "learning_rate": 1.3683414150493878e-06, "loss": 0.2734, "step": 16380 }, { "epoch": 2.3488672211069686, "grad_norm": 0.27880027890205383, "learning_rate": 1.3677680533341696e-06, "loss": 0.2725, "step": 16381 }, { "epoch": 2.349010610840264, "grad_norm": 0.2563149631023407, "learning_rate": 1.3671947927347013e-06, "loss": 0.2571, "step": 16382 }, { "epoch": 2.349154000573559, "grad_norm": 0.2694992125034332, "learning_rate": 1.366621633266943e-06, "loss": 0.2856, "step": 16383 }, { "epoch": 2.349297390306854, "grad_norm": 0.2968001961708069, "learning_rate": 1.3660485749468477e-06, "loss": 0.2831, "step": 16384 }, { "epoch": 2.3494407800401493, "grad_norm": 0.26403626799583435, "learning_rate": 1.3654756177903705e-06, "loss": 0.2624, "step": 16385 }, { "epoch": 2.3495841697734443, "grad_norm": 0.27477210760116577, "learning_rate": 1.3649027618134608e-06, "loss": 0.2789, "step": 16386 }, { "epoch": 2.3497275595067393, "grad_norm": 0.3039310574531555, "learning_rate": 1.3643300070320665e-06, "loss": 0.2761, "step": 16387 }, { "epoch": 2.3498709492400343, "grad_norm": 0.26992350816726685, "learning_rate": 1.3637573534621335e-06, "loss": 0.2768, "step": 16388 }, { "epoch": 2.3500143389733297, "grad_norm": 0.2667110860347748, "learning_rate": 1.3631848011196004e-06, "loss": 0.2735, "step": 16389 }, { "epoch": 2.3501577287066246, "grad_norm": 0.2575920820236206, "learning_rate": 1.362612350020408e-06, "loss": 0.2857, "step": 16390 }, { "epoch": 2.3503011184399196, "grad_norm": 0.2679300010204315, "learning_rate": 1.3620400001804929e-06, "loss": 0.2801, "step": 16391 }, { "epoch": 2.350444508173215, "grad_norm": 0.28238821029663086, "learning_rate": 1.3614677516157877e-06, "loss": 0.2847, "step": 16392 }, { "epoch": 2.35058789790651, "grad_norm": 0.2810402810573578, "learning_rate": 1.360895604342225e-06, "loss": 0.2766, "step": 16393 }, { "epoch": 2.350731287639805, "grad_norm": 0.256661593914032, "learning_rate": 1.3603235583757284e-06, "loss": 0.2768, "step": 16394 }, { "epoch": 2.3508746773731, "grad_norm": 0.2652093470096588, "learning_rate": 1.359751613732226e-06, "loss": 0.2694, "step": 16395 }, { "epoch": 2.351018067106395, "grad_norm": 0.27310460805892944, "learning_rate": 1.3591797704276383e-06, "loss": 0.2858, "step": 16396 }, { "epoch": 2.3511614568396904, "grad_norm": 0.26891541481018066, "learning_rate": 1.3586080284778853e-06, "loss": 0.2857, "step": 16397 }, { "epoch": 2.3513048465729853, "grad_norm": 0.27017781138420105, "learning_rate": 1.3580363878988834e-06, "loss": 0.2903, "step": 16398 }, { "epoch": 2.3514482363062803, "grad_norm": 0.29740485548973083, "learning_rate": 1.357464848706546e-06, "loss": 0.2809, "step": 16399 }, { "epoch": 2.3515916260395757, "grad_norm": 0.2769845426082611, "learning_rate": 1.3568934109167837e-06, "loss": 0.2712, "step": 16400 }, { "epoch": 2.3517350157728707, "grad_norm": 0.28048497438430786, "learning_rate": 1.3563220745455052e-06, "loss": 0.2863, "step": 16401 }, { "epoch": 2.3518784055061657, "grad_norm": 0.2703050971031189, "learning_rate": 1.355750839608615e-06, "loss": 0.2776, "step": 16402 }, { "epoch": 2.3520217952394606, "grad_norm": 0.2602159380912781, "learning_rate": 1.3551797061220151e-06, "loss": 0.283, "step": 16403 }, { "epoch": 2.352165184972756, "grad_norm": 0.28271809220314026, "learning_rate": 1.3546086741016074e-06, "loss": 0.2603, "step": 16404 }, { "epoch": 2.352308574706051, "grad_norm": 0.2661699056625366, "learning_rate": 1.3540377435632846e-06, "loss": 0.2813, "step": 16405 }, { "epoch": 2.352451964439346, "grad_norm": 0.2630980312824249, "learning_rate": 1.3534669145229428e-06, "loss": 0.2546, "step": 16406 }, { "epoch": 2.3525953541726414, "grad_norm": 0.2754247486591339, "learning_rate": 1.3528961869964724e-06, "loss": 0.2698, "step": 16407 }, { "epoch": 2.3527387439059364, "grad_norm": 0.26490285992622375, "learning_rate": 1.352325560999762e-06, "loss": 0.2748, "step": 16408 }, { "epoch": 2.3528821336392314, "grad_norm": 0.27717125415802, "learning_rate": 1.351755036548697e-06, "loss": 0.2774, "step": 16409 }, { "epoch": 2.3530255233725264, "grad_norm": 0.2708047032356262, "learning_rate": 1.351184613659161e-06, "loss": 0.2937, "step": 16410 }, { "epoch": 2.3531689131058218, "grad_norm": 0.2753632962703705, "learning_rate": 1.350614292347031e-06, "loss": 0.27, "step": 16411 }, { "epoch": 2.3533123028391167, "grad_norm": 0.2729107439517975, "learning_rate": 1.350044072628185e-06, "loss": 0.2824, "step": 16412 }, { "epoch": 2.3534556925724117, "grad_norm": 0.27361929416656494, "learning_rate": 1.3494739545184976e-06, "loss": 0.2928, "step": 16413 }, { "epoch": 2.353599082305707, "grad_norm": 0.26357993483543396, "learning_rate": 1.3489039380338398e-06, "loss": 0.2902, "step": 16414 }, { "epoch": 2.353742472039002, "grad_norm": 0.2683350145816803, "learning_rate": 1.3483340231900798e-06, "loss": 0.2775, "step": 16415 }, { "epoch": 2.353885861772297, "grad_norm": 0.2582593560218811, "learning_rate": 1.3477642100030836e-06, "loss": 0.2839, "step": 16416 }, { "epoch": 2.354029251505592, "grad_norm": 0.28667715191841125, "learning_rate": 1.3471944984887136e-06, "loss": 0.266, "step": 16417 }, { "epoch": 2.3541726412388875, "grad_norm": 0.2702709436416626, "learning_rate": 1.3466248886628291e-06, "loss": 0.2637, "step": 16418 }, { "epoch": 2.3543160309721824, "grad_norm": 0.26133474707603455, "learning_rate": 1.3460553805412884e-06, "loss": 0.2915, "step": 16419 }, { "epoch": 2.3544594207054774, "grad_norm": 0.27644869685173035, "learning_rate": 1.3454859741399445e-06, "loss": 0.292, "step": 16420 }, { "epoch": 2.3546028104387724, "grad_norm": 0.28245964646339417, "learning_rate": 1.3449166694746513e-06, "loss": 0.2657, "step": 16421 }, { "epoch": 2.354746200172068, "grad_norm": 0.2750246226787567, "learning_rate": 1.3443474665612538e-06, "loss": 0.2719, "step": 16422 }, { "epoch": 2.354889589905363, "grad_norm": 0.2724056541919708, "learning_rate": 1.3437783654155994e-06, "loss": 0.2896, "step": 16423 }, { "epoch": 2.3550329796386578, "grad_norm": 0.2591932415962219, "learning_rate": 1.3432093660535312e-06, "loss": 0.2695, "step": 16424 }, { "epoch": 2.3551763693719527, "grad_norm": 0.26877906918525696, "learning_rate": 1.342640468490889e-06, "loss": 0.2952, "step": 16425 }, { "epoch": 2.355319759105248, "grad_norm": 0.2959631681442261, "learning_rate": 1.3420716727435113e-06, "loss": 0.2951, "step": 16426 }, { "epoch": 2.355463148838543, "grad_norm": 0.26337093114852905, "learning_rate": 1.34150297882723e-06, "loss": 0.2847, "step": 16427 }, { "epoch": 2.355606538571838, "grad_norm": 0.2685135304927826, "learning_rate": 1.3409343867578778e-06, "loss": 0.2882, "step": 16428 }, { "epoch": 2.3557499283051335, "grad_norm": 0.25268617272377014, "learning_rate": 1.3403658965512834e-06, "loss": 0.2864, "step": 16429 }, { "epoch": 2.3558933180384285, "grad_norm": 0.2701241672039032, "learning_rate": 1.3397975082232734e-06, "loss": 0.2999, "step": 16430 }, { "epoch": 2.3560367077717235, "grad_norm": 0.2798317074775696, "learning_rate": 1.3392292217896713e-06, "loss": 0.293, "step": 16431 }, { "epoch": 2.3561800975050184, "grad_norm": 0.2662425637245178, "learning_rate": 1.338661037266295e-06, "loss": 0.3041, "step": 16432 }, { "epoch": 2.356323487238314, "grad_norm": 0.26554152369499207, "learning_rate": 1.3380929546689619e-06, "loss": 0.2866, "step": 16433 }, { "epoch": 2.356466876971609, "grad_norm": 0.2824923098087311, "learning_rate": 1.3375249740134893e-06, "loss": 0.2957, "step": 16434 }, { "epoch": 2.356610266704904, "grad_norm": 0.26897934079170227, "learning_rate": 1.3369570953156875e-06, "loss": 0.2892, "step": 16435 }, { "epoch": 2.3567536564381992, "grad_norm": 0.2781849801540375, "learning_rate": 1.3363893185913652e-06, "loss": 0.2763, "step": 16436 }, { "epoch": 2.356897046171494, "grad_norm": 0.27797725796699524, "learning_rate": 1.3358216438563304e-06, "loss": 0.273, "step": 16437 }, { "epoch": 2.357040435904789, "grad_norm": 0.27280744910240173, "learning_rate": 1.335254071126383e-06, "loss": 0.2715, "step": 16438 }, { "epoch": 2.357183825638084, "grad_norm": 0.27298837900161743, "learning_rate": 1.3346866004173242e-06, "loss": 0.2822, "step": 16439 }, { "epoch": 2.3573272153713796, "grad_norm": 0.2748989462852478, "learning_rate": 1.3341192317449524e-06, "loss": 0.3039, "step": 16440 }, { "epoch": 2.3574706051046745, "grad_norm": 0.2778742015361786, "learning_rate": 1.3335519651250623e-06, "loss": 0.2927, "step": 16441 }, { "epoch": 2.3576139948379695, "grad_norm": 0.25539106130599976, "learning_rate": 1.332984800573447e-06, "loss": 0.2744, "step": 16442 }, { "epoch": 2.357757384571265, "grad_norm": 0.2752004861831665, "learning_rate": 1.3324177381058922e-06, "loss": 0.2713, "step": 16443 }, { "epoch": 2.35790077430456, "grad_norm": 0.27325528860092163, "learning_rate": 1.3318507777381856e-06, "loss": 0.2717, "step": 16444 }, { "epoch": 2.358044164037855, "grad_norm": 0.29286324977874756, "learning_rate": 1.331283919486111e-06, "loss": 0.2884, "step": 16445 }, { "epoch": 2.35818755377115, "grad_norm": 0.25912782549858093, "learning_rate": 1.330717163365448e-06, "loss": 0.2898, "step": 16446 }, { "epoch": 2.358330943504445, "grad_norm": 0.2864707112312317, "learning_rate": 1.3301505093919752e-06, "loss": 0.2802, "step": 16447 }, { "epoch": 2.3584743332377403, "grad_norm": 0.28474748134613037, "learning_rate": 1.3295839575814685e-06, "loss": 0.2618, "step": 16448 }, { "epoch": 2.3586177229710352, "grad_norm": 0.27631884813308716, "learning_rate": 1.329017507949697e-06, "loss": 0.2853, "step": 16449 }, { "epoch": 2.35876111270433, "grad_norm": 0.27633702754974365, "learning_rate": 1.3284511605124305e-06, "loss": 0.2749, "step": 16450 }, { "epoch": 2.3589045024376256, "grad_norm": 0.27456316351890564, "learning_rate": 1.3278849152854362e-06, "loss": 0.2788, "step": 16451 }, { "epoch": 2.3590478921709206, "grad_norm": 0.30529630184173584, "learning_rate": 1.3273187722844765e-06, "loss": 0.2883, "step": 16452 }, { "epoch": 2.3591912819042156, "grad_norm": 0.3076287508010864, "learning_rate": 1.3267527315253132e-06, "loss": 0.2861, "step": 16453 }, { "epoch": 2.3593346716375105, "grad_norm": 0.26587632298469543, "learning_rate": 1.3261867930237027e-06, "loss": 0.2815, "step": 16454 }, { "epoch": 2.359478061370806, "grad_norm": 0.27140626311302185, "learning_rate": 1.3256209567954008e-06, "loss": 0.2612, "step": 16455 }, { "epoch": 2.359621451104101, "grad_norm": 0.2816394567489624, "learning_rate": 1.325055222856159e-06, "loss": 0.2922, "step": 16456 }, { "epoch": 2.359764840837396, "grad_norm": 0.29757940769195557, "learning_rate": 1.324489591221727e-06, "loss": 0.2807, "step": 16457 }, { "epoch": 2.3599082305706913, "grad_norm": 0.2813064157962799, "learning_rate": 1.3239240619078525e-06, "loss": 0.2822, "step": 16458 }, { "epoch": 2.3600516203039863, "grad_norm": 0.27468180656433105, "learning_rate": 1.3233586349302757e-06, "loss": 0.2745, "step": 16459 }, { "epoch": 2.3601950100372813, "grad_norm": 0.2737017571926117, "learning_rate": 1.3227933103047385e-06, "loss": 0.2887, "step": 16460 }, { "epoch": 2.3603383997705762, "grad_norm": 0.31427299976348877, "learning_rate": 1.322228088046979e-06, "loss": 0.2874, "step": 16461 }, { "epoch": 2.3604817895038717, "grad_norm": 0.2722386121749878, "learning_rate": 1.3216629681727317e-06, "loss": 0.2612, "step": 16462 }, { "epoch": 2.3606251792371666, "grad_norm": 0.28598400950431824, "learning_rate": 1.3210979506977295e-06, "loss": 0.2639, "step": 16463 }, { "epoch": 2.3607685689704616, "grad_norm": 0.2662205100059509, "learning_rate": 1.3205330356377023e-06, "loss": 0.2905, "step": 16464 }, { "epoch": 2.360911958703757, "grad_norm": 0.27165794372558594, "learning_rate": 1.3199682230083743e-06, "loss": 0.2795, "step": 16465 }, { "epoch": 2.361055348437052, "grad_norm": 0.2896343469619751, "learning_rate": 1.3194035128254695e-06, "loss": 0.2674, "step": 16466 }, { "epoch": 2.361198738170347, "grad_norm": 0.2785177528858185, "learning_rate": 1.3188389051047095e-06, "loss": 0.2873, "step": 16467 }, { "epoch": 2.361342127903642, "grad_norm": 0.279610812664032, "learning_rate": 1.3182743998618115e-06, "loss": 0.2823, "step": 16468 }, { "epoch": 2.3614855176369374, "grad_norm": 0.28663012385368347, "learning_rate": 1.3177099971124907e-06, "loss": 0.2791, "step": 16469 }, { "epoch": 2.3616289073702323, "grad_norm": 0.26382049918174744, "learning_rate": 1.3171456968724588e-06, "loss": 0.2814, "step": 16470 }, { "epoch": 2.3617722971035273, "grad_norm": 0.2761213183403015, "learning_rate": 1.316581499157426e-06, "loss": 0.273, "step": 16471 }, { "epoch": 2.3619156868368223, "grad_norm": 0.28844112157821655, "learning_rate": 1.3160174039830975e-06, "loss": 0.2738, "step": 16472 }, { "epoch": 2.3620590765701177, "grad_norm": 0.25542861223220825, "learning_rate": 1.315453411365178e-06, "loss": 0.2804, "step": 16473 }, { "epoch": 2.3622024663034127, "grad_norm": 0.25989580154418945, "learning_rate": 1.3148895213193675e-06, "loss": 0.3016, "step": 16474 }, { "epoch": 2.3623458560367077, "grad_norm": 0.266579806804657, "learning_rate": 1.3143257338613653e-06, "loss": 0.2758, "step": 16475 }, { "epoch": 2.3624892457700026, "grad_norm": 0.28268685936927795, "learning_rate": 1.3137620490068636e-06, "loss": 0.2873, "step": 16476 }, { "epoch": 2.362632635503298, "grad_norm": 0.25977209210395813, "learning_rate": 1.3131984667715559e-06, "loss": 0.2891, "step": 16477 }, { "epoch": 2.362776025236593, "grad_norm": 0.2768896520137787, "learning_rate": 1.3126349871711313e-06, "loss": 0.2833, "step": 16478 }, { "epoch": 2.362919414969888, "grad_norm": 0.2814129889011383, "learning_rate": 1.3120716102212766e-06, "loss": 0.2733, "step": 16479 }, { "epoch": 2.3630628047031834, "grad_norm": 0.27703920006752014, "learning_rate": 1.3115083359376768e-06, "loss": 0.3064, "step": 16480 }, { "epoch": 2.3632061944364784, "grad_norm": 0.2902873456478119, "learning_rate": 1.3109451643360099e-06, "loss": 0.2871, "step": 16481 }, { "epoch": 2.3633495841697734, "grad_norm": 0.2668858766555786, "learning_rate": 1.3103820954319541e-06, "loss": 0.2914, "step": 16482 }, { "epoch": 2.3634929739030683, "grad_norm": 0.26697278022766113, "learning_rate": 1.3098191292411854e-06, "loss": 0.2776, "step": 16483 }, { "epoch": 2.3636363636363638, "grad_norm": 0.2772578299045563, "learning_rate": 1.3092562657793755e-06, "loss": 0.2818, "step": 16484 }, { "epoch": 2.3637797533696587, "grad_norm": 0.28303369879722595, "learning_rate": 1.3086935050621952e-06, "loss": 0.2811, "step": 16485 }, { "epoch": 2.3639231431029537, "grad_norm": 0.25592902302742004, "learning_rate": 1.3081308471053078e-06, "loss": 0.2849, "step": 16486 }, { "epoch": 2.364066532836249, "grad_norm": 0.27243438363075256, "learning_rate": 1.3075682919243788e-06, "loss": 0.2933, "step": 16487 }, { "epoch": 2.364209922569544, "grad_norm": 0.2703000605106354, "learning_rate": 1.3070058395350682e-06, "loss": 0.2808, "step": 16488 }, { "epoch": 2.364353312302839, "grad_norm": 0.27354225516319275, "learning_rate": 1.3064434899530326e-06, "loss": 0.2891, "step": 16489 }, { "epoch": 2.364496702036134, "grad_norm": 0.26993393898010254, "learning_rate": 1.3058812431939304e-06, "loss": 0.2752, "step": 16490 }, { "epoch": 2.3646400917694295, "grad_norm": 0.2753278613090515, "learning_rate": 1.3053190992734127e-06, "loss": 0.2764, "step": 16491 }, { "epoch": 2.3647834815027244, "grad_norm": 0.28535667061805725, "learning_rate": 1.3047570582071267e-06, "loss": 0.2977, "step": 16492 }, { "epoch": 2.3649268712360194, "grad_norm": 0.26465263962745667, "learning_rate": 1.30419512001072e-06, "loss": 0.2786, "step": 16493 }, { "epoch": 2.365070260969315, "grad_norm": 0.2643249034881592, "learning_rate": 1.3036332846998357e-06, "loss": 0.2732, "step": 16494 }, { "epoch": 2.36521365070261, "grad_norm": 0.2782583236694336, "learning_rate": 1.3030715522901143e-06, "loss": 0.27, "step": 16495 }, { "epoch": 2.365357040435905, "grad_norm": 0.2656029164791107, "learning_rate": 1.302509922797196e-06, "loss": 0.2681, "step": 16496 }, { "epoch": 2.3655004301691998, "grad_norm": 0.27373504638671875, "learning_rate": 1.3019483962367118e-06, "loss": 0.2891, "step": 16497 }, { "epoch": 2.365643819902495, "grad_norm": 0.29501572251319885, "learning_rate": 1.3013869726242962e-06, "loss": 0.2897, "step": 16498 }, { "epoch": 2.36578720963579, "grad_norm": 0.2609526216983795, "learning_rate": 1.3008256519755775e-06, "loss": 0.2731, "step": 16499 }, { "epoch": 2.365930599369085, "grad_norm": 0.2806423008441925, "learning_rate": 1.3002644343061815e-06, "loss": 0.304, "step": 16500 }, { "epoch": 2.36607398910238, "grad_norm": 0.2631125748157501, "learning_rate": 1.2997033196317332e-06, "loss": 0.2858, "step": 16501 }, { "epoch": 2.3662173788356755, "grad_norm": 0.25287488102912903, "learning_rate": 1.2991423079678533e-06, "loss": 0.265, "step": 16502 }, { "epoch": 2.3663607685689705, "grad_norm": 0.2679084539413452, "learning_rate": 1.2985813993301577e-06, "loss": 0.2803, "step": 16503 }, { "epoch": 2.3665041583022655, "grad_norm": 0.2651456892490387, "learning_rate": 1.2980205937342617e-06, "loss": 0.285, "step": 16504 }, { "epoch": 2.3666475480355604, "grad_norm": 0.2781986594200134, "learning_rate": 1.2974598911957776e-06, "loss": 0.2838, "step": 16505 }, { "epoch": 2.366790937768856, "grad_norm": 0.2601288855075836, "learning_rate": 1.2968992917303147e-06, "loss": 0.2747, "step": 16506 }, { "epoch": 2.366934327502151, "grad_norm": 0.2816157937049866, "learning_rate": 1.296338795353479e-06, "loss": 0.2637, "step": 16507 }, { "epoch": 2.367077717235446, "grad_norm": 0.27660661935806274, "learning_rate": 1.2957784020808738e-06, "loss": 0.2676, "step": 16508 }, { "epoch": 2.367221106968741, "grad_norm": 0.27232423424720764, "learning_rate": 1.2952181119281004e-06, "loss": 0.2773, "step": 16509 }, { "epoch": 2.367364496702036, "grad_norm": 0.2789285182952881, "learning_rate": 1.294657924910755e-06, "loss": 0.2751, "step": 16510 }, { "epoch": 2.367507886435331, "grad_norm": 0.27132827043533325, "learning_rate": 1.2940978410444332e-06, "loss": 0.2792, "step": 16511 }, { "epoch": 2.367651276168626, "grad_norm": 0.2706957757472992, "learning_rate": 1.2935378603447285e-06, "loss": 0.2686, "step": 16512 }, { "epoch": 2.3677946659019216, "grad_norm": 0.2836093604564667, "learning_rate": 1.2929779828272265e-06, "loss": 0.2654, "step": 16513 }, { "epoch": 2.3679380556352165, "grad_norm": 0.2804381847381592, "learning_rate": 1.2924182085075153e-06, "loss": 0.2932, "step": 16514 }, { "epoch": 2.3680814453685115, "grad_norm": 0.26584458351135254, "learning_rate": 1.2918585374011778e-06, "loss": 0.2914, "step": 16515 }, { "epoch": 2.368224835101807, "grad_norm": 0.2852959632873535, "learning_rate": 1.2912989695237944e-06, "loss": 0.2965, "step": 16516 }, { "epoch": 2.368368224835102, "grad_norm": 0.27753761410713196, "learning_rate": 1.2907395048909422e-06, "loss": 0.2927, "step": 16517 }, { "epoch": 2.368511614568397, "grad_norm": 0.2694056034088135, "learning_rate": 1.2901801435181983e-06, "loss": 0.289, "step": 16518 }, { "epoch": 2.368655004301692, "grad_norm": 0.2911851108074188, "learning_rate": 1.2896208854211301e-06, "loss": 0.2833, "step": 16519 }, { "epoch": 2.3687983940349873, "grad_norm": 0.2674497365951538, "learning_rate": 1.2890617306153097e-06, "loss": 0.3026, "step": 16520 }, { "epoch": 2.3689417837682822, "grad_norm": 0.2645195424556732, "learning_rate": 1.288502679116302e-06, "loss": 0.2812, "step": 16521 }, { "epoch": 2.369085173501577, "grad_norm": 0.2712460458278656, "learning_rate": 1.2879437309396702e-06, "loss": 0.2644, "step": 16522 }, { "epoch": 2.3692285632348726, "grad_norm": 0.2951548099517822, "learning_rate": 1.2873848861009748e-06, "loss": 0.2766, "step": 16523 }, { "epoch": 2.3693719529681676, "grad_norm": 0.2741716206073761, "learning_rate": 1.2868261446157731e-06, "loss": 0.2709, "step": 16524 }, { "epoch": 2.3695153427014626, "grad_norm": 0.2899153232574463, "learning_rate": 1.2862675064996194e-06, "loss": 0.2606, "step": 16525 }, { "epoch": 2.3696587324347576, "grad_norm": 0.2847764790058136, "learning_rate": 1.285708971768066e-06, "loss": 0.2769, "step": 16526 }, { "epoch": 2.3698021221680525, "grad_norm": 0.2589833736419678, "learning_rate": 1.285150540436661e-06, "loss": 0.2956, "step": 16527 }, { "epoch": 2.369945511901348, "grad_norm": 0.2728312313556671, "learning_rate": 1.2845922125209497e-06, "loss": 0.2886, "step": 16528 }, { "epoch": 2.370088901634643, "grad_norm": 0.28740444779396057, "learning_rate": 1.2840339880364783e-06, "loss": 0.2678, "step": 16529 }, { "epoch": 2.370232291367938, "grad_norm": 0.26733696460723877, "learning_rate": 1.283475866998783e-06, "loss": 0.2623, "step": 16530 }, { "epoch": 2.3703756811012333, "grad_norm": 0.2785557210445404, "learning_rate": 1.2829178494234018e-06, "loss": 0.2719, "step": 16531 }, { "epoch": 2.3705190708345283, "grad_norm": 0.27261197566986084, "learning_rate": 1.2823599353258697e-06, "loss": 0.2914, "step": 16532 }, { "epoch": 2.3706624605678233, "grad_norm": 0.2690759301185608, "learning_rate": 1.281802124721719e-06, "loss": 0.2876, "step": 16533 }, { "epoch": 2.3708058503011182, "grad_norm": 0.2696482837200165, "learning_rate": 1.2812444176264783e-06, "loss": 0.2786, "step": 16534 }, { "epoch": 2.3709492400344137, "grad_norm": 0.2779867351055145, "learning_rate": 1.2806868140556711e-06, "loss": 0.2749, "step": 16535 }, { "epoch": 2.3710926297677086, "grad_norm": 0.273176908493042, "learning_rate": 1.2801293140248217e-06, "loss": 0.275, "step": 16536 }, { "epoch": 2.3712360195010036, "grad_norm": 0.2680237889289856, "learning_rate": 1.2795719175494502e-06, "loss": 0.2864, "step": 16537 }, { "epoch": 2.371379409234299, "grad_norm": 0.28207337856292725, "learning_rate": 1.279014624645073e-06, "loss": 0.2985, "step": 16538 }, { "epoch": 2.371522798967594, "grad_norm": 0.27920129895210266, "learning_rate": 1.2784574353272067e-06, "loss": 0.2729, "step": 16539 }, { "epoch": 2.371666188700889, "grad_norm": 0.28958722949028015, "learning_rate": 1.2779003496113585e-06, "loss": 0.2772, "step": 16540 }, { "epoch": 2.371809578434184, "grad_norm": 0.2701221704483032, "learning_rate": 1.2773433675130393e-06, "loss": 0.2825, "step": 16541 }, { "epoch": 2.3719529681674794, "grad_norm": 0.2601030170917511, "learning_rate": 1.2767864890477545e-06, "loss": 0.2788, "step": 16542 }, { "epoch": 2.3720963579007743, "grad_norm": 0.2832806408405304, "learning_rate": 1.276229714231006e-06, "loss": 0.2717, "step": 16543 }, { "epoch": 2.3722397476340693, "grad_norm": 0.30086466670036316, "learning_rate": 1.2756730430782943e-06, "loss": 0.2806, "step": 16544 }, { "epoch": 2.3723831373673647, "grad_norm": 0.2777857780456543, "learning_rate": 1.2751164756051155e-06, "loss": 0.268, "step": 16545 }, { "epoch": 2.3725265271006597, "grad_norm": 0.2926784157752991, "learning_rate": 1.2745600118269646e-06, "loss": 0.2703, "step": 16546 }, { "epoch": 2.3726699168339547, "grad_norm": 0.27004578709602356, "learning_rate": 1.2740036517593318e-06, "loss": 0.2952, "step": 16547 }, { "epoch": 2.3728133065672496, "grad_norm": 0.26412513852119446, "learning_rate": 1.2734473954177057e-06, "loss": 0.2873, "step": 16548 }, { "epoch": 2.372956696300545, "grad_norm": 0.2663252055644989, "learning_rate": 1.2728912428175722e-06, "loss": 0.2962, "step": 16549 }, { "epoch": 2.37310008603384, "grad_norm": 0.2800375819206238, "learning_rate": 1.272335193974414e-06, "loss": 0.2859, "step": 16550 }, { "epoch": 2.373243475767135, "grad_norm": 0.26621121168136597, "learning_rate": 1.2717792489037085e-06, "loss": 0.2869, "step": 16551 }, { "epoch": 2.37338686550043, "grad_norm": 0.2616465985774994, "learning_rate": 1.2712234076209335e-06, "loss": 0.2823, "step": 16552 }, { "epoch": 2.3735302552337254, "grad_norm": 0.2848285734653473, "learning_rate": 1.2706676701415631e-06, "loss": 0.2516, "step": 16553 }, { "epoch": 2.3736736449670204, "grad_norm": 0.2754361629486084, "learning_rate": 1.2701120364810677e-06, "loss": 0.2678, "step": 16554 }, { "epoch": 2.3738170347003154, "grad_norm": 0.26457929611206055, "learning_rate": 1.269556506654916e-06, "loss": 0.2616, "step": 16555 }, { "epoch": 2.3739604244336103, "grad_norm": 0.2782074809074402, "learning_rate": 1.2690010806785735e-06, "loss": 0.2669, "step": 16556 }, { "epoch": 2.3741038141669057, "grad_norm": 0.2639082372188568, "learning_rate": 1.2684457585675008e-06, "loss": 0.2723, "step": 16557 }, { "epoch": 2.3742472039002007, "grad_norm": 0.2706489861011505, "learning_rate": 1.2678905403371578e-06, "loss": 0.27, "step": 16558 }, { "epoch": 2.3743905936334957, "grad_norm": 0.2746860682964325, "learning_rate": 1.2673354260030007e-06, "loss": 0.2658, "step": 16559 }, { "epoch": 2.374533983366791, "grad_norm": 0.2691013514995575, "learning_rate": 1.2667804155804836e-06, "loss": 0.2721, "step": 16560 }, { "epoch": 2.374677373100086, "grad_norm": 0.26956042647361755, "learning_rate": 1.2662255090850573e-06, "loss": 0.2698, "step": 16561 }, { "epoch": 2.374820762833381, "grad_norm": 0.2739047706127167, "learning_rate": 1.2656707065321688e-06, "loss": 0.2919, "step": 16562 }, { "epoch": 2.374964152566676, "grad_norm": 0.28645145893096924, "learning_rate": 1.2651160079372632e-06, "loss": 0.2778, "step": 16563 }, { "epoch": 2.3751075422999715, "grad_norm": 0.262360155582428, "learning_rate": 1.264561413315783e-06, "loss": 0.2994, "step": 16564 }, { "epoch": 2.3752509320332664, "grad_norm": 0.28119486570358276, "learning_rate": 1.264006922683167e-06, "loss": 0.2743, "step": 16565 }, { "epoch": 2.3753943217665614, "grad_norm": 0.2680036425590515, "learning_rate": 1.2634525360548522e-06, "loss": 0.2794, "step": 16566 }, { "epoch": 2.375537711499857, "grad_norm": 0.26155325770378113, "learning_rate": 1.2628982534462696e-06, "loss": 0.2822, "step": 16567 }, { "epoch": 2.375681101233152, "grad_norm": 0.2824972867965698, "learning_rate": 1.2623440748728511e-06, "loss": 0.2858, "step": 16568 }, { "epoch": 2.3758244909664468, "grad_norm": 0.2780006527900696, "learning_rate": 1.261790000350024e-06, "loss": 0.2777, "step": 16569 }, { "epoch": 2.3759678806997417, "grad_norm": 0.28908467292785645, "learning_rate": 1.2612360298932126e-06, "loss": 0.2741, "step": 16570 }, { "epoch": 2.376111270433037, "grad_norm": 0.301044225692749, "learning_rate": 1.260682163517839e-06, "loss": 0.2956, "step": 16571 }, { "epoch": 2.376254660166332, "grad_norm": 0.26393088698387146, "learning_rate": 1.2601284012393232e-06, "loss": 0.2862, "step": 16572 }, { "epoch": 2.376398049899627, "grad_norm": 0.2670386731624603, "learning_rate": 1.2595747430730781e-06, "loss": 0.2834, "step": 16573 }, { "epoch": 2.3765414396329225, "grad_norm": 0.2716423571109772, "learning_rate": 1.259021189034519e-06, "loss": 0.2774, "step": 16574 }, { "epoch": 2.3766848293662175, "grad_norm": 0.2828661799430847, "learning_rate": 1.2584677391390543e-06, "loss": 0.3064, "step": 16575 }, { "epoch": 2.3768282190995125, "grad_norm": 0.27092981338500977, "learning_rate": 1.2579143934020928e-06, "loss": 0.2725, "step": 16576 }, { "epoch": 2.3769716088328074, "grad_norm": 0.2702680826187134, "learning_rate": 1.2573611518390393e-06, "loss": 0.279, "step": 16577 }, { "epoch": 2.3771149985661024, "grad_norm": 0.27679067850112915, "learning_rate": 1.2568080144652918e-06, "loss": 0.2624, "step": 16578 }, { "epoch": 2.377258388299398, "grad_norm": 0.27686184644699097, "learning_rate": 1.2562549812962515e-06, "loss": 0.2776, "step": 16579 }, { "epoch": 2.377401778032693, "grad_norm": 0.2680324912071228, "learning_rate": 1.2557020523473146e-06, "loss": 0.2774, "step": 16580 }, { "epoch": 2.377545167765988, "grad_norm": 0.27780961990356445, "learning_rate": 1.2551492276338723e-06, "loss": 0.2784, "step": 16581 }, { "epoch": 2.377688557499283, "grad_norm": 0.2756621241569519, "learning_rate": 1.2545965071713145e-06, "loss": 0.2826, "step": 16582 }, { "epoch": 2.377831947232578, "grad_norm": 0.29064932465553284, "learning_rate": 1.2540438909750302e-06, "loss": 0.2848, "step": 16583 }, { "epoch": 2.377975336965873, "grad_norm": 0.2891003489494324, "learning_rate": 1.2534913790604003e-06, "loss": 0.2698, "step": 16584 }, { "epoch": 2.378118726699168, "grad_norm": 0.28326016664505005, "learning_rate": 1.252938971442807e-06, "loss": 0.2883, "step": 16585 }, { "epoch": 2.3782621164324635, "grad_norm": 0.2719571888446808, "learning_rate": 1.252386668137629e-06, "loss": 0.2806, "step": 16586 }, { "epoch": 2.3784055061657585, "grad_norm": 0.27116453647613525, "learning_rate": 1.2518344691602409e-06, "loss": 0.286, "step": 16587 }, { "epoch": 2.3785488958990535, "grad_norm": 0.2634568214416504, "learning_rate": 1.2512823745260166e-06, "loss": 0.2835, "step": 16588 }, { "epoch": 2.378692285632349, "grad_norm": 0.28641584515571594, "learning_rate": 1.2507303842503237e-06, "loss": 0.2833, "step": 16589 }, { "epoch": 2.378835675365644, "grad_norm": 0.28588366508483887, "learning_rate": 1.2501784983485287e-06, "loss": 0.2876, "step": 16590 }, { "epoch": 2.378979065098939, "grad_norm": 0.2648952305316925, "learning_rate": 1.2496267168359965e-06, "loss": 0.2764, "step": 16591 }, { "epoch": 2.379122454832234, "grad_norm": 0.2725205719470978, "learning_rate": 1.249075039728087e-06, "loss": 0.2751, "step": 16592 }, { "epoch": 2.3792658445655293, "grad_norm": 0.272274911403656, "learning_rate": 1.2485234670401603e-06, "loss": 0.2542, "step": 16593 }, { "epoch": 2.3794092342988242, "grad_norm": 0.27309879660606384, "learning_rate": 1.2479719987875677e-06, "loss": 0.278, "step": 16594 }, { "epoch": 2.379552624032119, "grad_norm": 0.2664056420326233, "learning_rate": 1.2474206349856626e-06, "loss": 0.2845, "step": 16595 }, { "epoch": 2.3796960137654146, "grad_norm": 0.27441349625587463, "learning_rate": 1.2468693756497952e-06, "loss": 0.2606, "step": 16596 }, { "epoch": 2.3798394034987096, "grad_norm": 0.2857763469219208, "learning_rate": 1.2463182207953106e-06, "loss": 0.2637, "step": 16597 }, { "epoch": 2.3799827932320046, "grad_norm": 0.27218854427337646, "learning_rate": 1.2457671704375524e-06, "loss": 0.3015, "step": 16598 }, { "epoch": 2.3801261829652995, "grad_norm": 0.2552609443664551, "learning_rate": 1.2452162245918615e-06, "loss": 0.3011, "step": 16599 }, { "epoch": 2.380269572698595, "grad_norm": 0.2757749855518341, "learning_rate": 1.2446653832735745e-06, "loss": 0.2854, "step": 16600 }, { "epoch": 2.38041296243189, "grad_norm": 0.2721192538738251, "learning_rate": 1.2441146464980264e-06, "loss": 0.291, "step": 16601 }, { "epoch": 2.380556352165185, "grad_norm": 0.26730912923812866, "learning_rate": 1.2435640142805493e-06, "loss": 0.2797, "step": 16602 }, { "epoch": 2.38069974189848, "grad_norm": 0.2955242991447449, "learning_rate": 1.2430134866364714e-06, "loss": 0.2722, "step": 16603 }, { "epoch": 2.3808431316317753, "grad_norm": 0.29502683877944946, "learning_rate": 1.2424630635811202e-06, "loss": 0.2934, "step": 16604 }, { "epoch": 2.3809865213650703, "grad_norm": 0.27207520604133606, "learning_rate": 1.2419127451298157e-06, "loss": 0.2772, "step": 16605 }, { "epoch": 2.3811299110983652, "grad_norm": 0.28738248348236084, "learning_rate": 1.2413625312978788e-06, "loss": 0.2701, "step": 16606 }, { "epoch": 2.3812733008316602, "grad_norm": 0.28018298745155334, "learning_rate": 1.240812422100628e-06, "loss": 0.2937, "step": 16607 }, { "epoch": 2.3814166905649556, "grad_norm": 0.27858293056488037, "learning_rate": 1.240262417553376e-06, "loss": 0.2748, "step": 16608 }, { "epoch": 2.3815600802982506, "grad_norm": 0.27367347478866577, "learning_rate": 1.2397125176714353e-06, "loss": 0.2677, "step": 16609 }, { "epoch": 2.3817034700315456, "grad_norm": 0.2866431772708893, "learning_rate": 1.2391627224701147e-06, "loss": 0.2684, "step": 16610 }, { "epoch": 2.381846859764841, "grad_norm": 0.2843715250492096, "learning_rate": 1.2386130319647172e-06, "loss": 0.284, "step": 16611 }, { "epoch": 2.381990249498136, "grad_norm": 0.26916825771331787, "learning_rate": 1.2380634461705471e-06, "loss": 0.2874, "step": 16612 }, { "epoch": 2.382133639231431, "grad_norm": 0.27239298820495605, "learning_rate": 1.2375139651029039e-06, "loss": 0.2787, "step": 16613 }, { "epoch": 2.382277028964726, "grad_norm": 0.2870551645755768, "learning_rate": 1.2369645887770837e-06, "loss": 0.3066, "step": 16614 }, { "epoch": 2.3824204186980213, "grad_norm": 0.2915690541267395, "learning_rate": 1.2364153172083804e-06, "loss": 0.2783, "step": 16615 }, { "epoch": 2.3825638084313163, "grad_norm": 0.267689973115921, "learning_rate": 1.2358661504120855e-06, "loss": 0.278, "step": 16616 }, { "epoch": 2.3827071981646113, "grad_norm": 0.2898937463760376, "learning_rate": 1.2353170884034871e-06, "loss": 0.2766, "step": 16617 }, { "epoch": 2.3828505878979067, "grad_norm": 0.2587593197822571, "learning_rate": 1.2347681311978694e-06, "loss": 0.292, "step": 16618 }, { "epoch": 2.3829939776312017, "grad_norm": 0.26632043719291687, "learning_rate": 1.234219278810515e-06, "loss": 0.2891, "step": 16619 }, { "epoch": 2.3831373673644967, "grad_norm": 0.2740492522716522, "learning_rate": 1.2336705312567044e-06, "loss": 0.274, "step": 16620 }, { "epoch": 2.3832807570977916, "grad_norm": 0.29195013642311096, "learning_rate": 1.233121888551711e-06, "loss": 0.2975, "step": 16621 }, { "epoch": 2.383424146831087, "grad_norm": 0.2736786901950836, "learning_rate": 1.23257335071081e-06, "loss": 0.2621, "step": 16622 }, { "epoch": 2.383567536564382, "grad_norm": 0.25613123178482056, "learning_rate": 1.232024917749271e-06, "loss": 0.2849, "step": 16623 }, { "epoch": 2.383710926297677, "grad_norm": 0.2686079740524292, "learning_rate": 1.2314765896823626e-06, "loss": 0.2763, "step": 16624 }, { "epoch": 2.3838543160309724, "grad_norm": 0.2860800623893738, "learning_rate": 1.2309283665253486e-06, "loss": 0.2774, "step": 16625 }, { "epoch": 2.3839977057642674, "grad_norm": 0.2879040241241455, "learning_rate": 1.2303802482934923e-06, "loss": 0.2953, "step": 16626 }, { "epoch": 2.3841410954975624, "grad_norm": 0.2632448971271515, "learning_rate": 1.2298322350020498e-06, "loss": 0.275, "step": 16627 }, { "epoch": 2.3842844852308573, "grad_norm": 0.28086382150650024, "learning_rate": 1.2292843266662786e-06, "loss": 0.2862, "step": 16628 }, { "epoch": 2.3844278749641528, "grad_norm": 0.29549863934516907, "learning_rate": 1.2287365233014309e-06, "loss": 0.2703, "step": 16629 }, { "epoch": 2.3845712646974477, "grad_norm": 0.2703265845775604, "learning_rate": 1.228188824922757e-06, "loss": 0.2806, "step": 16630 }, { "epoch": 2.3847146544307427, "grad_norm": 0.26823508739471436, "learning_rate": 1.227641231545506e-06, "loss": 0.2918, "step": 16631 }, { "epoch": 2.3848580441640377, "grad_norm": 0.3037734031677246, "learning_rate": 1.2270937431849184e-06, "loss": 0.2855, "step": 16632 }, { "epoch": 2.385001433897333, "grad_norm": 0.2785499691963196, "learning_rate": 1.2265463598562372e-06, "loss": 0.2827, "step": 16633 }, { "epoch": 2.385144823630628, "grad_norm": 0.2659006714820862, "learning_rate": 1.2259990815747004e-06, "loss": 0.2967, "step": 16634 }, { "epoch": 2.385288213363923, "grad_norm": 0.27313902974128723, "learning_rate": 1.2254519083555427e-06, "loss": 0.2523, "step": 16635 }, { "epoch": 2.385431603097218, "grad_norm": 0.26638951897621155, "learning_rate": 1.2249048402139985e-06, "loss": 0.2923, "step": 16636 }, { "epoch": 2.3855749928305134, "grad_norm": 0.267593115568161, "learning_rate": 1.2243578771652981e-06, "loss": 0.3036, "step": 16637 }, { "epoch": 2.3857183825638084, "grad_norm": 0.2810761630535126, "learning_rate": 1.223811019224665e-06, "loss": 0.2781, "step": 16638 }, { "epoch": 2.3858617722971034, "grad_norm": 0.2824682891368866, "learning_rate": 1.2232642664073241e-06, "loss": 0.2956, "step": 16639 }, { "epoch": 2.386005162030399, "grad_norm": 0.33295172452926636, "learning_rate": 1.2227176187284961e-06, "loss": 0.279, "step": 16640 }, { "epoch": 2.386148551763694, "grad_norm": 0.27660951018333435, "learning_rate": 1.2221710762033995e-06, "loss": 0.3038, "step": 16641 }, { "epoch": 2.3862919414969888, "grad_norm": 0.27462977170944214, "learning_rate": 1.2216246388472503e-06, "loss": 0.2636, "step": 16642 }, { "epoch": 2.3864353312302837, "grad_norm": 0.2851315140724182, "learning_rate": 1.221078306675257e-06, "loss": 0.2857, "step": 16643 }, { "epoch": 2.386578720963579, "grad_norm": 0.27411630749702454, "learning_rate": 1.2205320797026304e-06, "loss": 0.2862, "step": 16644 }, { "epoch": 2.386722110696874, "grad_norm": 0.2588344216346741, "learning_rate": 1.2199859579445772e-06, "loss": 0.277, "step": 16645 }, { "epoch": 2.386865500430169, "grad_norm": 0.26216569542884827, "learning_rate": 1.2194399414163005e-06, "loss": 0.2746, "step": 16646 }, { "epoch": 2.3870088901634645, "grad_norm": 0.30591917037963867, "learning_rate": 1.218894030133001e-06, "loss": 0.2842, "step": 16647 }, { "epoch": 2.3871522798967595, "grad_norm": 0.2761591970920563, "learning_rate": 1.2183482241098742e-06, "loss": 0.26, "step": 16648 }, { "epoch": 2.3872956696300545, "grad_norm": 0.2638307511806488, "learning_rate": 1.2178025233621154e-06, "loss": 0.2918, "step": 16649 }, { "epoch": 2.3874390593633494, "grad_norm": 0.27680936455726624, "learning_rate": 1.2172569279049167e-06, "loss": 0.2734, "step": 16650 }, { "epoch": 2.387582449096645, "grad_norm": 0.25588497519493103, "learning_rate": 1.2167114377534655e-06, "loss": 0.2988, "step": 16651 }, { "epoch": 2.38772583882994, "grad_norm": 0.26806747913360596, "learning_rate": 1.2161660529229485e-06, "loss": 0.2767, "step": 16652 }, { "epoch": 2.387869228563235, "grad_norm": 0.28860655426979065, "learning_rate": 1.215620773428548e-06, "loss": 0.2931, "step": 16653 }, { "epoch": 2.3880126182965298, "grad_norm": 0.2635938823223114, "learning_rate": 1.2150755992854435e-06, "loss": 0.2808, "step": 16654 }, { "epoch": 2.388156008029825, "grad_norm": 0.26954329013824463, "learning_rate": 1.2145305305088118e-06, "loss": 0.2862, "step": 16655 }, { "epoch": 2.38829939776312, "grad_norm": 0.27667900919914246, "learning_rate": 1.2139855671138272e-06, "loss": 0.278, "step": 16656 }, { "epoch": 2.388442787496415, "grad_norm": 0.26358044147491455, "learning_rate": 1.2134407091156607e-06, "loss": 0.2653, "step": 16657 }, { "epoch": 2.38858617722971, "grad_norm": 0.27612513303756714, "learning_rate": 1.2128959565294812e-06, "loss": 0.2729, "step": 16658 }, { "epoch": 2.3887295669630055, "grad_norm": 0.27090224623680115, "learning_rate": 1.2123513093704515e-06, "loss": 0.2782, "step": 16659 }, { "epoch": 2.3888729566963005, "grad_norm": 0.2560124695301056, "learning_rate": 1.2118067676537344e-06, "loss": 0.2766, "step": 16660 }, { "epoch": 2.3890163464295955, "grad_norm": 0.2658321261405945, "learning_rate": 1.21126233139449e-06, "loss": 0.2951, "step": 16661 }, { "epoch": 2.389159736162891, "grad_norm": 0.28235676884651184, "learning_rate": 1.210718000607874e-06, "loss": 0.277, "step": 16662 }, { "epoch": 2.389303125896186, "grad_norm": 0.26316720247268677, "learning_rate": 1.2101737753090393e-06, "loss": 0.2635, "step": 16663 }, { "epoch": 2.389446515629481, "grad_norm": 0.2772999703884125, "learning_rate": 1.2096296555131386e-06, "loss": 0.2965, "step": 16664 }, { "epoch": 2.389589905362776, "grad_norm": 0.25494807958602905, "learning_rate": 1.2090856412353164e-06, "loss": 0.2824, "step": 16665 }, { "epoch": 2.3897332950960712, "grad_norm": 0.2623266577720642, "learning_rate": 1.2085417324907183e-06, "loss": 0.2914, "step": 16666 }, { "epoch": 2.389876684829366, "grad_norm": 0.2856162488460541, "learning_rate": 1.207997929294486e-06, "loss": 0.2852, "step": 16667 }, { "epoch": 2.390020074562661, "grad_norm": 0.272309809923172, "learning_rate": 1.2074542316617582e-06, "loss": 0.2692, "step": 16668 }, { "epoch": 2.3901634642959566, "grad_norm": 0.2688281834125519, "learning_rate": 1.2069106396076707e-06, "loss": 0.2875, "step": 16669 }, { "epoch": 2.3903068540292516, "grad_norm": 0.2764608561992645, "learning_rate": 1.206367153147356e-06, "loss": 0.2842, "step": 16670 }, { "epoch": 2.3904502437625466, "grad_norm": 0.2627934515476227, "learning_rate": 1.2058237722959442e-06, "loss": 0.2784, "step": 16671 }, { "epoch": 2.3905936334958415, "grad_norm": 0.2628084719181061, "learning_rate": 1.2052804970685616e-06, "loss": 0.2852, "step": 16672 }, { "epoch": 2.390737023229137, "grad_norm": 0.26791343092918396, "learning_rate": 1.2047373274803332e-06, "loss": 0.2842, "step": 16673 }, { "epoch": 2.390880412962432, "grad_norm": 0.2907997965812683, "learning_rate": 1.2041942635463804e-06, "loss": 0.2925, "step": 16674 }, { "epoch": 2.391023802695727, "grad_norm": 0.26473337411880493, "learning_rate": 1.2036513052818187e-06, "loss": 0.292, "step": 16675 }, { "epoch": 2.3911671924290223, "grad_norm": 0.27242377400398254, "learning_rate": 1.203108452701765e-06, "loss": 0.2828, "step": 16676 }, { "epoch": 2.3913105821623173, "grad_norm": 0.26400241255760193, "learning_rate": 1.2025657058213314e-06, "loss": 0.2785, "step": 16677 }, { "epoch": 2.3914539718956123, "grad_norm": 0.2578360438346863, "learning_rate": 1.202023064655627e-06, "loss": 0.277, "step": 16678 }, { "epoch": 2.3915973616289072, "grad_norm": 0.27190518379211426, "learning_rate": 1.2014805292197579e-06, "loss": 0.284, "step": 16679 }, { "epoch": 2.3917407513622027, "grad_norm": 0.26832252740859985, "learning_rate": 1.200938099528829e-06, "loss": 0.3049, "step": 16680 }, { "epoch": 2.3918841410954976, "grad_norm": 0.25029096007347107, "learning_rate": 1.200395775597938e-06, "loss": 0.2851, "step": 16681 }, { "epoch": 2.3920275308287926, "grad_norm": 0.27572140097618103, "learning_rate": 1.1998535574421837e-06, "loss": 0.3077, "step": 16682 }, { "epoch": 2.3921709205620876, "grad_norm": 0.2894400656223297, "learning_rate": 1.1993114450766607e-06, "loss": 0.2977, "step": 16683 }, { "epoch": 2.392314310295383, "grad_norm": 0.28059977293014526, "learning_rate": 1.1987694385164605e-06, "loss": 0.2728, "step": 16684 }, { "epoch": 2.392457700028678, "grad_norm": 0.258976548910141, "learning_rate": 1.1982275377766734e-06, "loss": 0.286, "step": 16685 }, { "epoch": 2.392601089761973, "grad_norm": 0.28785380721092224, "learning_rate": 1.197685742872382e-06, "loss": 0.274, "step": 16686 }, { "epoch": 2.392744479495268, "grad_norm": 0.285912424325943, "learning_rate": 1.1971440538186702e-06, "loss": 0.2731, "step": 16687 }, { "epoch": 2.3928878692285633, "grad_norm": 0.26119574904441833, "learning_rate": 1.196602470630618e-06, "loss": 0.2803, "step": 16688 }, { "epoch": 2.3930312589618583, "grad_norm": 0.28692227602005005, "learning_rate": 1.1960609933233025e-06, "loss": 0.2772, "step": 16689 }, { "epoch": 2.3931746486951533, "grad_norm": 0.2779251039028168, "learning_rate": 1.1955196219117977e-06, "loss": 0.2879, "step": 16690 }, { "epoch": 2.3933180384284487, "grad_norm": 0.28826066851615906, "learning_rate": 1.1949783564111733e-06, "loss": 0.2893, "step": 16691 }, { "epoch": 2.3934614281617437, "grad_norm": 0.2752123177051544, "learning_rate": 1.1944371968364988e-06, "loss": 0.2873, "step": 16692 }, { "epoch": 2.3936048178950387, "grad_norm": 0.2735508978366852, "learning_rate": 1.193896143202839e-06, "loss": 0.3046, "step": 16693 }, { "epoch": 2.3937482076283336, "grad_norm": 0.26683279871940613, "learning_rate": 1.1933551955252554e-06, "loss": 0.2912, "step": 16694 }, { "epoch": 2.393891597361629, "grad_norm": 0.25932547450065613, "learning_rate": 1.1928143538188076e-06, "loss": 0.2755, "step": 16695 }, { "epoch": 2.394034987094924, "grad_norm": 0.28964895009994507, "learning_rate": 1.1922736180985523e-06, "loss": 0.2998, "step": 16696 }, { "epoch": 2.394178376828219, "grad_norm": 0.28329265117645264, "learning_rate": 1.1917329883795415e-06, "loss": 0.289, "step": 16697 }, { "epoch": 2.3943217665615144, "grad_norm": 0.2747417390346527, "learning_rate": 1.1911924646768258e-06, "loss": 0.2612, "step": 16698 }, { "epoch": 2.3944651562948094, "grad_norm": 0.267752081155777, "learning_rate": 1.1906520470054529e-06, "loss": 0.2729, "step": 16699 }, { "epoch": 2.3946085460281044, "grad_norm": 0.27939289808273315, "learning_rate": 1.190111735380467e-06, "loss": 0.277, "step": 16700 }, { "epoch": 2.3947519357613993, "grad_norm": 0.2710534334182739, "learning_rate": 1.1895715298169114e-06, "loss": 0.2698, "step": 16701 }, { "epoch": 2.3948953254946947, "grad_norm": 0.2725379168987274, "learning_rate": 1.1890314303298211e-06, "loss": 0.2977, "step": 16702 }, { "epoch": 2.3950387152279897, "grad_norm": 0.24989451467990875, "learning_rate": 1.1884914369342337e-06, "loss": 0.2842, "step": 16703 }, { "epoch": 2.3951821049612847, "grad_norm": 0.2681390643119812, "learning_rate": 1.187951549645181e-06, "loss": 0.2719, "step": 16704 }, { "epoch": 2.39532549469458, "grad_norm": 0.2834169268608093, "learning_rate": 1.1874117684776937e-06, "loss": 0.2994, "step": 16705 }, { "epoch": 2.395468884427875, "grad_norm": 0.274888813495636, "learning_rate": 1.1868720934467975e-06, "loss": 0.2667, "step": 16706 }, { "epoch": 2.39561227416117, "grad_norm": 0.26438406109809875, "learning_rate": 1.1863325245675172e-06, "loss": 0.2805, "step": 16707 }, { "epoch": 2.395755663894465, "grad_norm": 0.2642524242401123, "learning_rate": 1.1857930618548725e-06, "loss": 0.2642, "step": 16708 }, { "epoch": 2.39589905362776, "grad_norm": 0.28045496344566345, "learning_rate": 1.185253705323881e-06, "loss": 0.2985, "step": 16709 }, { "epoch": 2.3960424433610554, "grad_norm": 0.2767944931983948, "learning_rate": 1.184714454989559e-06, "loss": 0.2899, "step": 16710 }, { "epoch": 2.3961858330943504, "grad_norm": 0.2897087335586548, "learning_rate": 1.1841753108669173e-06, "loss": 0.2777, "step": 16711 }, { "epoch": 2.3963292228276454, "grad_norm": 0.276065468788147, "learning_rate": 1.1836362729709666e-06, "loss": 0.2688, "step": 16712 }, { "epoch": 2.396472612560941, "grad_norm": 0.2770257890224457, "learning_rate": 1.1830973413167096e-06, "loss": 0.2767, "step": 16713 }, { "epoch": 2.3966160022942358, "grad_norm": 0.3025895953178406, "learning_rate": 1.1825585159191515e-06, "loss": 0.2949, "step": 16714 }, { "epoch": 2.3967593920275307, "grad_norm": 0.2742312550544739, "learning_rate": 1.1820197967932918e-06, "loss": 0.2686, "step": 16715 }, { "epoch": 2.3969027817608257, "grad_norm": 0.26886090636253357, "learning_rate": 1.1814811839541278e-06, "loss": 0.298, "step": 16716 }, { "epoch": 2.397046171494121, "grad_norm": 0.2658839523792267, "learning_rate": 1.1809426774166537e-06, "loss": 0.2865, "step": 16717 }, { "epoch": 2.397189561227416, "grad_norm": 0.2839847505092621, "learning_rate": 1.1804042771958623e-06, "loss": 0.2835, "step": 16718 }, { "epoch": 2.397332950960711, "grad_norm": 0.2762209177017212, "learning_rate": 1.1798659833067382e-06, "loss": 0.2735, "step": 16719 }, { "epoch": 2.3974763406940065, "grad_norm": 0.2902509570121765, "learning_rate": 1.1793277957642696e-06, "loss": 0.3008, "step": 16720 }, { "epoch": 2.3976197304273015, "grad_norm": 0.25850194692611694, "learning_rate": 1.1787897145834371e-06, "loss": 0.2871, "step": 16721 }, { "epoch": 2.3977631201605965, "grad_norm": 0.2684374153614044, "learning_rate": 1.178251739779221e-06, "loss": 0.2986, "step": 16722 }, { "epoch": 2.3979065098938914, "grad_norm": 0.261598140001297, "learning_rate": 1.1777138713665986e-06, "loss": 0.2785, "step": 16723 }, { "epoch": 2.398049899627187, "grad_norm": 0.2820523679256439, "learning_rate": 1.17717610936054e-06, "loss": 0.2892, "step": 16724 }, { "epoch": 2.398193289360482, "grad_norm": 0.2658481001853943, "learning_rate": 1.176638453776019e-06, "loss": 0.2769, "step": 16725 }, { "epoch": 2.398336679093777, "grad_norm": 0.24646323919296265, "learning_rate": 1.1761009046280025e-06, "loss": 0.2581, "step": 16726 }, { "epoch": 2.398480068827072, "grad_norm": 0.2799528241157532, "learning_rate": 1.1755634619314537e-06, "loss": 0.3084, "step": 16727 }, { "epoch": 2.398623458560367, "grad_norm": 0.2602333724498749, "learning_rate": 1.1750261257013373e-06, "loss": 0.2782, "step": 16728 }, { "epoch": 2.398766848293662, "grad_norm": 0.27851343154907227, "learning_rate": 1.1744888959526073e-06, "loss": 0.2622, "step": 16729 }, { "epoch": 2.398910238026957, "grad_norm": 0.2829034924507141, "learning_rate": 1.1739517727002225e-06, "loss": 0.2884, "step": 16730 }, { "epoch": 2.3990536277602525, "grad_norm": 0.2948135435581207, "learning_rate": 1.1734147559591347e-06, "loss": 0.2797, "step": 16731 }, { "epoch": 2.3991970174935475, "grad_norm": 0.2708529233932495, "learning_rate": 1.1728778457442936e-06, "loss": 0.2846, "step": 16732 }, { "epoch": 2.3993404072268425, "grad_norm": 0.30688610672950745, "learning_rate": 1.172341042070646e-06, "loss": 0.2796, "step": 16733 }, { "epoch": 2.3994837969601375, "grad_norm": 0.26290571689605713, "learning_rate": 1.171804344953137e-06, "loss": 0.2831, "step": 16734 }, { "epoch": 2.399627186693433, "grad_norm": 0.257548451423645, "learning_rate": 1.1712677544067046e-06, "loss": 0.2701, "step": 16735 }, { "epoch": 2.399770576426728, "grad_norm": 0.27779561281204224, "learning_rate": 1.1707312704462887e-06, "loss": 0.2878, "step": 16736 }, { "epoch": 2.399913966160023, "grad_norm": 0.2679117023944855, "learning_rate": 1.1701948930868235e-06, "loss": 0.2643, "step": 16737 }, { "epoch": 2.400057355893318, "grad_norm": 0.275828093290329, "learning_rate": 1.169658622343241e-06, "loss": 0.2764, "step": 16738 }, { "epoch": 2.4002007456266132, "grad_norm": 0.25731611251831055, "learning_rate": 1.1691224582304723e-06, "loss": 0.2776, "step": 16739 }, { "epoch": 2.400344135359908, "grad_norm": 0.2758069932460785, "learning_rate": 1.1685864007634396e-06, "loss": 0.2746, "step": 16740 }, { "epoch": 2.400487525093203, "grad_norm": 0.2631203830242157, "learning_rate": 1.1680504499570672e-06, "loss": 0.2752, "step": 16741 }, { "epoch": 2.4006309148264986, "grad_norm": 0.28129446506500244, "learning_rate": 1.167514605826276e-06, "loss": 0.2602, "step": 16742 }, { "epoch": 2.4007743045597936, "grad_norm": 0.2949689030647278, "learning_rate": 1.1669788683859828e-06, "loss": 0.2655, "step": 16743 }, { "epoch": 2.4009176942930885, "grad_norm": 0.278559148311615, "learning_rate": 1.1664432376511015e-06, "loss": 0.284, "step": 16744 }, { "epoch": 2.4010610840263835, "grad_norm": 0.2855384349822998, "learning_rate": 1.1659077136365438e-06, "loss": 0.2778, "step": 16745 }, { "epoch": 2.401204473759679, "grad_norm": 0.2908572852611542, "learning_rate": 1.1653722963572166e-06, "loss": 0.2895, "step": 16746 }, { "epoch": 2.401347863492974, "grad_norm": 0.2842949330806732, "learning_rate": 1.1648369858280267e-06, "loss": 0.2771, "step": 16747 }, { "epoch": 2.401491253226269, "grad_norm": 0.2716433107852936, "learning_rate": 1.1643017820638753e-06, "loss": 0.2739, "step": 16748 }, { "epoch": 2.4016346429595643, "grad_norm": 0.2685062885284424, "learning_rate": 1.1637666850796615e-06, "loss": 0.2731, "step": 16749 }, { "epoch": 2.4017780326928593, "grad_norm": 0.2703174948692322, "learning_rate": 1.1632316948902832e-06, "loss": 0.2786, "step": 16750 }, { "epoch": 2.4019214224261543, "grad_norm": 0.27594834566116333, "learning_rate": 1.1626968115106318e-06, "loss": 0.2919, "step": 16751 }, { "epoch": 2.4020648121594492, "grad_norm": 0.279633492231369, "learning_rate": 1.1621620349555978e-06, "loss": 0.2933, "step": 16752 }, { "epoch": 2.4022082018927446, "grad_norm": 0.29658421874046326, "learning_rate": 1.1616273652400695e-06, "loss": 0.305, "step": 16753 }, { "epoch": 2.4023515916260396, "grad_norm": 0.28206077218055725, "learning_rate": 1.1610928023789303e-06, "loss": 0.2768, "step": 16754 }, { "epoch": 2.4024949813593346, "grad_norm": 0.2599683403968811, "learning_rate": 1.1605583463870634e-06, "loss": 0.2781, "step": 16755 }, { "epoch": 2.40263837109263, "grad_norm": 0.2694186866283417, "learning_rate": 1.1600239972793454e-06, "loss": 0.2833, "step": 16756 }, { "epoch": 2.402781760825925, "grad_norm": 0.27067431807518005, "learning_rate": 1.159489755070652e-06, "loss": 0.2765, "step": 16757 }, { "epoch": 2.40292515055922, "grad_norm": 0.2781684994697571, "learning_rate": 1.1589556197758556e-06, "loss": 0.2768, "step": 16758 }, { "epoch": 2.403068540292515, "grad_norm": 0.2810301184654236, "learning_rate": 1.1584215914098262e-06, "loss": 0.2823, "step": 16759 }, { "epoch": 2.40321193002581, "grad_norm": 0.27073416113853455, "learning_rate": 1.1578876699874304e-06, "loss": 0.2696, "step": 16760 }, { "epoch": 2.4033553197591053, "grad_norm": 0.27354803681373596, "learning_rate": 1.1573538555235315e-06, "loss": 0.2856, "step": 16761 }, { "epoch": 2.4034987094924003, "grad_norm": 0.29363900423049927, "learning_rate": 1.1568201480329906e-06, "loss": 0.2974, "step": 16762 }, { "epoch": 2.4036420992256953, "grad_norm": 0.27603888511657715, "learning_rate": 1.1562865475306645e-06, "loss": 0.3013, "step": 16763 }, { "epoch": 2.4037854889589907, "grad_norm": 0.2683188319206238, "learning_rate": 1.1557530540314077e-06, "loss": 0.2802, "step": 16764 }, { "epoch": 2.4039288786922857, "grad_norm": 0.2646462023258209, "learning_rate": 1.1552196675500732e-06, "loss": 0.2747, "step": 16765 }, { "epoch": 2.4040722684255806, "grad_norm": 0.2880774438381195, "learning_rate": 1.1546863881015096e-06, "loss": 0.2953, "step": 16766 }, { "epoch": 2.4042156581588756, "grad_norm": 0.2797292172908783, "learning_rate": 1.1541532157005603e-06, "loss": 0.2786, "step": 16767 }, { "epoch": 2.404359047892171, "grad_norm": 0.2701910734176636, "learning_rate": 1.1536201503620698e-06, "loss": 0.2992, "step": 16768 }, { "epoch": 2.404502437625466, "grad_norm": 0.304606556892395, "learning_rate": 1.1530871921008773e-06, "loss": 0.2715, "step": 16769 }, { "epoch": 2.404645827358761, "grad_norm": 0.2698928415775299, "learning_rate": 1.1525543409318203e-06, "loss": 0.267, "step": 16770 }, { "epoch": 2.4047892170920564, "grad_norm": 0.2909920811653137, "learning_rate": 1.1520215968697312e-06, "loss": 0.2718, "step": 16771 }, { "epoch": 2.4049326068253514, "grad_norm": 0.2667100131511688, "learning_rate": 1.1514889599294433e-06, "loss": 0.2899, "step": 16772 }, { "epoch": 2.4050759965586463, "grad_norm": 0.3033098578453064, "learning_rate": 1.1509564301257808e-06, "loss": 0.2674, "step": 16773 }, { "epoch": 2.4052193862919413, "grad_norm": 0.28630414605140686, "learning_rate": 1.1504240074735706e-06, "loss": 0.2768, "step": 16774 }, { "epoch": 2.4053627760252367, "grad_norm": 0.2665379047393799, "learning_rate": 1.1498916919876345e-06, "loss": 0.2987, "step": 16775 }, { "epoch": 2.4055061657585317, "grad_norm": 0.27307769656181335, "learning_rate": 1.149359483682791e-06, "loss": 0.2664, "step": 16776 }, { "epoch": 2.4056495554918267, "grad_norm": 0.25514066219329834, "learning_rate": 1.1488273825738571e-06, "loss": 0.272, "step": 16777 }, { "epoch": 2.405792945225122, "grad_norm": 0.276010662317276, "learning_rate": 1.1482953886756437e-06, "loss": 0.2774, "step": 16778 }, { "epoch": 2.405936334958417, "grad_norm": 0.252909392118454, "learning_rate": 1.1477635020029614e-06, "loss": 0.2647, "step": 16779 }, { "epoch": 2.406079724691712, "grad_norm": 0.2719218134880066, "learning_rate": 1.1472317225706165e-06, "loss": 0.2723, "step": 16780 }, { "epoch": 2.406223114425007, "grad_norm": 0.2765905559062958, "learning_rate": 1.1467000503934146e-06, "loss": 0.2815, "step": 16781 }, { "epoch": 2.4063665041583024, "grad_norm": 0.2819790840148926, "learning_rate": 1.1461684854861576e-06, "loss": 0.2859, "step": 16782 }, { "epoch": 2.4065098938915974, "grad_norm": 0.2566244602203369, "learning_rate": 1.1456370278636397e-06, "loss": 0.277, "step": 16783 }, { "epoch": 2.4066532836248924, "grad_norm": 0.2790950536727905, "learning_rate": 1.1451056775406582e-06, "loss": 0.2827, "step": 16784 }, { "epoch": 2.4067966733581874, "grad_norm": 0.2610010802745819, "learning_rate": 1.1445744345320047e-06, "loss": 0.2947, "step": 16785 }, { "epoch": 2.406940063091483, "grad_norm": 0.2657743990421295, "learning_rate": 1.144043298852468e-06, "loss": 0.2919, "step": 16786 }, { "epoch": 2.4070834528247778, "grad_norm": 0.2898116409778595, "learning_rate": 1.1435122705168345e-06, "loss": 0.2707, "step": 16787 }, { "epoch": 2.4072268425580727, "grad_norm": 0.2889156937599182, "learning_rate": 1.1429813495398883e-06, "loss": 0.2759, "step": 16788 }, { "epoch": 2.4073702322913677, "grad_norm": 0.26430532336235046, "learning_rate": 1.1424505359364069e-06, "loss": 0.2682, "step": 16789 }, { "epoch": 2.407513622024663, "grad_norm": 0.26185229420661926, "learning_rate": 1.1419198297211686e-06, "loss": 0.3051, "step": 16790 }, { "epoch": 2.407657011757958, "grad_norm": 0.28939422965049744, "learning_rate": 1.141389230908947e-06, "loss": 0.2735, "step": 16791 }, { "epoch": 2.407800401491253, "grad_norm": 0.2779415547847748, "learning_rate": 1.1408587395145137e-06, "loss": 0.2783, "step": 16792 }, { "epoch": 2.4079437912245485, "grad_norm": 0.26652058959007263, "learning_rate": 1.1403283555526383e-06, "loss": 0.2915, "step": 16793 }, { "epoch": 2.4080871809578435, "grad_norm": 0.2854967713356018, "learning_rate": 1.1397980790380824e-06, "loss": 0.2821, "step": 16794 }, { "epoch": 2.4082305706911384, "grad_norm": 0.27175968885421753, "learning_rate": 1.1392679099856103e-06, "loss": 0.2782, "step": 16795 }, { "epoch": 2.4083739604244334, "grad_norm": 0.2780255675315857, "learning_rate": 1.1387378484099803e-06, "loss": 0.2792, "step": 16796 }, { "epoch": 2.408517350157729, "grad_norm": 0.2743391692638397, "learning_rate": 1.1382078943259489e-06, "loss": 0.2783, "step": 16797 }, { "epoch": 2.408660739891024, "grad_norm": 0.3005966246128082, "learning_rate": 1.1376780477482696e-06, "loss": 0.2629, "step": 16798 }, { "epoch": 2.408804129624319, "grad_norm": 0.2752080261707306, "learning_rate": 1.1371483086916918e-06, "loss": 0.2853, "step": 16799 }, { "epoch": 2.408947519357614, "grad_norm": 0.265627384185791, "learning_rate": 1.1366186771709625e-06, "loss": 0.274, "step": 16800 }, { "epoch": 2.409090909090909, "grad_norm": 0.2866620719432831, "learning_rate": 1.1360891532008266e-06, "loss": 0.2657, "step": 16801 }, { "epoch": 2.409234298824204, "grad_norm": 0.28257885575294495, "learning_rate": 1.1355597367960247e-06, "loss": 0.2819, "step": 16802 }, { "epoch": 2.409377688557499, "grad_norm": 0.2513890266418457, "learning_rate": 1.1350304279712953e-06, "loss": 0.2902, "step": 16803 }, { "epoch": 2.4095210782907945, "grad_norm": 0.27357062697410583, "learning_rate": 1.1345012267413746e-06, "loss": 0.2878, "step": 16804 }, { "epoch": 2.4096644680240895, "grad_norm": 0.29020988941192627, "learning_rate": 1.1339721331209924e-06, "loss": 0.2752, "step": 16805 }, { "epoch": 2.4098078577573845, "grad_norm": 0.2692256271839142, "learning_rate": 1.133443147124879e-06, "loss": 0.2732, "step": 16806 }, { "epoch": 2.40995124749068, "grad_norm": 0.2763682007789612, "learning_rate": 1.1329142687677603e-06, "loss": 0.2892, "step": 16807 }, { "epoch": 2.410094637223975, "grad_norm": 0.2737116515636444, "learning_rate": 1.1323854980643595e-06, "loss": 0.2641, "step": 16808 }, { "epoch": 2.41023802695727, "grad_norm": 0.2677277624607086, "learning_rate": 1.1318568350293985e-06, "loss": 0.2858, "step": 16809 }, { "epoch": 2.410381416690565, "grad_norm": 0.2906891405582428, "learning_rate": 1.1313282796775915e-06, "loss": 0.2916, "step": 16810 }, { "epoch": 2.4105248064238602, "grad_norm": 0.2566615045070648, "learning_rate": 1.130799832023654e-06, "loss": 0.279, "step": 16811 }, { "epoch": 2.410668196157155, "grad_norm": 0.2908230721950531, "learning_rate": 1.1302714920822972e-06, "loss": 0.2856, "step": 16812 }, { "epoch": 2.41081158589045, "grad_norm": 0.28223440051078796, "learning_rate": 1.129743259868229e-06, "loss": 0.2967, "step": 16813 }, { "epoch": 2.410954975623745, "grad_norm": 0.25543320178985596, "learning_rate": 1.1292151353961555e-06, "loss": 0.2764, "step": 16814 }, { "epoch": 2.4110983653570406, "grad_norm": 0.27038806676864624, "learning_rate": 1.128687118680778e-06, "loss": 0.2668, "step": 16815 }, { "epoch": 2.4112417550903356, "grad_norm": 0.26377949118614197, "learning_rate": 1.128159209736796e-06, "loss": 0.283, "step": 16816 }, { "epoch": 2.4113851448236305, "grad_norm": 0.2837519645690918, "learning_rate": 1.1276314085789053e-06, "loss": 0.287, "step": 16817 }, { "epoch": 2.4115285345569255, "grad_norm": 0.30455467104911804, "learning_rate": 1.1271037152217996e-06, "loss": 0.2888, "step": 16818 }, { "epoch": 2.411671924290221, "grad_norm": 0.24566450715065002, "learning_rate": 1.1265761296801686e-06, "loss": 0.2813, "step": 16819 }, { "epoch": 2.411815314023516, "grad_norm": 0.2736491858959198, "learning_rate": 1.1260486519687015e-06, "loss": 0.2755, "step": 16820 }, { "epoch": 2.411958703756811, "grad_norm": 0.2584967017173767, "learning_rate": 1.1255212821020789e-06, "loss": 0.2897, "step": 16821 }, { "epoch": 2.4121020934901063, "grad_norm": 0.2704310715198517, "learning_rate": 1.1249940200949839e-06, "loss": 0.2762, "step": 16822 }, { "epoch": 2.4122454832234013, "grad_norm": 0.27454209327697754, "learning_rate": 1.1244668659620945e-06, "loss": 0.263, "step": 16823 }, { "epoch": 2.4123888729566962, "grad_norm": 0.2916644513607025, "learning_rate": 1.1239398197180862e-06, "loss": 0.2703, "step": 16824 }, { "epoch": 2.412532262689991, "grad_norm": 0.2877090573310852, "learning_rate": 1.12341288137763e-06, "loss": 0.2935, "step": 16825 }, { "epoch": 2.4126756524232866, "grad_norm": 0.2831893265247345, "learning_rate": 1.122886050955398e-06, "loss": 0.2733, "step": 16826 }, { "epoch": 2.4128190421565816, "grad_norm": 0.26818016171455383, "learning_rate": 1.1223593284660529e-06, "loss": 0.2842, "step": 16827 }, { "epoch": 2.4129624318898766, "grad_norm": 0.28009337186813354, "learning_rate": 1.1218327139242586e-06, "loss": 0.2762, "step": 16828 }, { "epoch": 2.413105821623172, "grad_norm": 0.25489917397499084, "learning_rate": 1.1213062073446763e-06, "loss": 0.271, "step": 16829 }, { "epoch": 2.413249211356467, "grad_norm": 0.27321600914001465, "learning_rate": 1.1207798087419618e-06, "loss": 0.2833, "step": 16830 }, { "epoch": 2.413392601089762, "grad_norm": 0.26341304183006287, "learning_rate": 1.1202535181307722e-06, "loss": 0.3098, "step": 16831 }, { "epoch": 2.413535990823057, "grad_norm": 0.26662883162498474, "learning_rate": 1.1197273355257555e-06, "loss": 0.2771, "step": 16832 }, { "epoch": 2.4136793805563523, "grad_norm": 0.2948091924190521, "learning_rate": 1.1192012609415598e-06, "loss": 0.2867, "step": 16833 }, { "epoch": 2.4138227702896473, "grad_norm": 0.2958199977874756, "learning_rate": 1.118675294392832e-06, "loss": 0.2677, "step": 16834 }, { "epoch": 2.4139661600229423, "grad_norm": 0.27528467774391174, "learning_rate": 1.118149435894213e-06, "loss": 0.2638, "step": 16835 }, { "epoch": 2.4141095497562373, "grad_norm": 0.27660539746284485, "learning_rate": 1.1176236854603422e-06, "loss": 0.2818, "step": 16836 }, { "epoch": 2.4142529394895327, "grad_norm": 0.24337345361709595, "learning_rate": 1.1170980431058559e-06, "loss": 0.2732, "step": 16837 }, { "epoch": 2.4143963292228277, "grad_norm": 0.28968334197998047, "learning_rate": 1.1165725088453872e-06, "loss": 0.2952, "step": 16838 }, { "epoch": 2.4145397189561226, "grad_norm": 0.28396856784820557, "learning_rate": 1.116047082693566e-06, "loss": 0.301, "step": 16839 }, { "epoch": 2.4146831086894176, "grad_norm": 0.2767668664455414, "learning_rate": 1.115521764665019e-06, "loss": 0.2851, "step": 16840 }, { "epoch": 2.414826498422713, "grad_norm": 0.28562918305397034, "learning_rate": 1.1149965547743714e-06, "loss": 0.2939, "step": 16841 }, { "epoch": 2.414969888156008, "grad_norm": 0.27875250577926636, "learning_rate": 1.1144714530362443e-06, "loss": 0.2689, "step": 16842 }, { "epoch": 2.415113277889303, "grad_norm": 0.2545015215873718, "learning_rate": 1.1139464594652533e-06, "loss": 0.2763, "step": 16843 }, { "epoch": 2.4152566676225984, "grad_norm": 0.29184678196907043, "learning_rate": 1.1134215740760152e-06, "loss": 0.2783, "step": 16844 }, { "epoch": 2.4154000573558934, "grad_norm": 0.2695866525173187, "learning_rate": 1.1128967968831422e-06, "loss": 0.2785, "step": 16845 }, { "epoch": 2.4155434470891883, "grad_norm": 0.2855058014392853, "learning_rate": 1.1123721279012424e-06, "loss": 0.2754, "step": 16846 }, { "epoch": 2.4156868368224833, "grad_norm": 0.25092634558677673, "learning_rate": 1.1118475671449241e-06, "loss": 0.2654, "step": 16847 }, { "epoch": 2.4158302265557787, "grad_norm": 0.27731913328170776, "learning_rate": 1.1113231146287867e-06, "loss": 0.2979, "step": 16848 }, { "epoch": 2.4159736162890737, "grad_norm": 0.27840501070022583, "learning_rate": 1.1107987703674317e-06, "loss": 0.2873, "step": 16849 }, { "epoch": 2.4161170060223687, "grad_norm": 0.27533388137817383, "learning_rate": 1.1102745343754568e-06, "loss": 0.2888, "step": 16850 }, { "epoch": 2.416260395755664, "grad_norm": 0.26129940152168274, "learning_rate": 1.1097504066674548e-06, "loss": 0.2904, "step": 16851 }, { "epoch": 2.416403785488959, "grad_norm": 0.27996084094047546, "learning_rate": 1.1092263872580173e-06, "loss": 0.292, "step": 16852 }, { "epoch": 2.416547175222254, "grad_norm": 0.2719103693962097, "learning_rate": 1.108702476161732e-06, "loss": 0.2811, "step": 16853 }, { "epoch": 2.416690564955549, "grad_norm": 0.27715808153152466, "learning_rate": 1.1081786733931843e-06, "loss": 0.271, "step": 16854 }, { "epoch": 2.4168339546888444, "grad_norm": 0.27726301550865173, "learning_rate": 1.107654978966955e-06, "loss": 0.2833, "step": 16855 }, { "epoch": 2.4169773444221394, "grad_norm": 0.27714717388153076, "learning_rate": 1.107131392897624e-06, "loss": 0.2655, "step": 16856 }, { "epoch": 2.4171207341554344, "grad_norm": 0.2720699608325958, "learning_rate": 1.106607915199766e-06, "loss": 0.2691, "step": 16857 }, { "epoch": 2.41726412388873, "grad_norm": 0.2530585527420044, "learning_rate": 1.1060845458879565e-06, "loss": 0.2628, "step": 16858 }, { "epoch": 2.4174075136220248, "grad_norm": 0.27403777837753296, "learning_rate": 1.1055612849767616e-06, "loss": 0.2681, "step": 16859 }, { "epoch": 2.4175509033553197, "grad_norm": 0.28213948011398315, "learning_rate": 1.1050381324807501e-06, "loss": 0.2835, "step": 16860 }, { "epoch": 2.4176942930886147, "grad_norm": 0.256848007440567, "learning_rate": 1.104515088414485e-06, "loss": 0.2801, "step": 16861 }, { "epoch": 2.41783768282191, "grad_norm": 0.29889199137687683, "learning_rate": 1.103992152792528e-06, "loss": 0.2836, "step": 16862 }, { "epoch": 2.417981072555205, "grad_norm": 0.28636667132377625, "learning_rate": 1.103469325629437e-06, "loss": 0.3012, "step": 16863 }, { "epoch": 2.4181244622885, "grad_norm": 0.2694484293460846, "learning_rate": 1.1029466069397648e-06, "loss": 0.2753, "step": 16864 }, { "epoch": 2.418267852021795, "grad_norm": 0.28603601455688477, "learning_rate": 1.1024239967380645e-06, "loss": 0.2742, "step": 16865 }, { "epoch": 2.4184112417550905, "grad_norm": 0.27591824531555176, "learning_rate": 1.1019014950388845e-06, "loss": 0.2636, "step": 16866 }, { "epoch": 2.4185546314883855, "grad_norm": 0.28988775610923767, "learning_rate": 1.1013791018567705e-06, "loss": 0.2706, "step": 16867 }, { "epoch": 2.4186980212216804, "grad_norm": 0.24131958186626434, "learning_rate": 1.100856817206265e-06, "loss": 0.2929, "step": 16868 }, { "epoch": 2.4188414109549754, "grad_norm": 0.29230451583862305, "learning_rate": 1.1003346411019072e-06, "loss": 0.2752, "step": 16869 }, { "epoch": 2.418984800688271, "grad_norm": 0.2701079249382019, "learning_rate": 1.099812573558235e-06, "loss": 0.2763, "step": 16870 }, { "epoch": 2.419128190421566, "grad_norm": 0.2758597433567047, "learning_rate": 1.0992906145897808e-06, "loss": 0.2976, "step": 16871 }, { "epoch": 2.4192715801548608, "grad_norm": 0.2565661370754242, "learning_rate": 1.0987687642110757e-06, "loss": 0.2704, "step": 16872 }, { "epoch": 2.419414969888156, "grad_norm": 0.2797495424747467, "learning_rate": 1.0982470224366466e-06, "loss": 0.283, "step": 16873 }, { "epoch": 2.419558359621451, "grad_norm": 0.2898758053779602, "learning_rate": 1.0977253892810197e-06, "loss": 0.277, "step": 16874 }, { "epoch": 2.419701749354746, "grad_norm": 0.2886584997177124, "learning_rate": 1.0972038647587142e-06, "loss": 0.2933, "step": 16875 }, { "epoch": 2.419845139088041, "grad_norm": 0.2807684540748596, "learning_rate": 1.0966824488842493e-06, "loss": 0.2705, "step": 16876 }, { "epoch": 2.4199885288213365, "grad_norm": 0.2781473398208618, "learning_rate": 1.0961611416721407e-06, "loss": 0.2858, "step": 16877 }, { "epoch": 2.4201319185546315, "grad_norm": 0.2815185487270355, "learning_rate": 1.0956399431369003e-06, "loss": 0.2905, "step": 16878 }, { "epoch": 2.4202753082879265, "grad_norm": 0.2864536643028259, "learning_rate": 1.0951188532930378e-06, "loss": 0.2678, "step": 16879 }, { "epoch": 2.420418698021222, "grad_norm": 0.2703660726547241, "learning_rate": 1.0945978721550614e-06, "loss": 0.2613, "step": 16880 }, { "epoch": 2.420562087754517, "grad_norm": 0.2827255427837372, "learning_rate": 1.094076999737471e-06, "loss": 0.2731, "step": 16881 }, { "epoch": 2.420705477487812, "grad_norm": 0.26952412724494934, "learning_rate": 1.0935562360547692e-06, "loss": 0.2861, "step": 16882 }, { "epoch": 2.420848867221107, "grad_norm": 0.27441585063934326, "learning_rate": 1.0930355811214516e-06, "loss": 0.2906, "step": 16883 }, { "epoch": 2.4209922569544022, "grad_norm": 0.28372859954833984, "learning_rate": 1.0925150349520136e-06, "loss": 0.2764, "step": 16884 }, { "epoch": 2.421135646687697, "grad_norm": 0.264163613319397, "learning_rate": 1.091994597560948e-06, "loss": 0.28, "step": 16885 }, { "epoch": 2.421279036420992, "grad_norm": 0.28187796473503113, "learning_rate": 1.0914742689627394e-06, "loss": 0.2638, "step": 16886 }, { "epoch": 2.4214224261542876, "grad_norm": 0.27034705877304077, "learning_rate": 1.0909540491718745e-06, "loss": 0.2603, "step": 16887 }, { "epoch": 2.4215658158875826, "grad_norm": 0.26560255885124207, "learning_rate": 1.090433938202836e-06, "loss": 0.2898, "step": 16888 }, { "epoch": 2.4217092056208775, "grad_norm": 0.2647673189640045, "learning_rate": 1.0899139360701029e-06, "loss": 0.2822, "step": 16889 }, { "epoch": 2.4218525953541725, "grad_norm": 0.2775924801826477, "learning_rate": 1.0893940427881505e-06, "loss": 0.2625, "step": 16890 }, { "epoch": 2.4219959850874675, "grad_norm": 0.2830142378807068, "learning_rate": 1.0888742583714524e-06, "loss": 0.3143, "step": 16891 }, { "epoch": 2.422139374820763, "grad_norm": 0.28386417031288147, "learning_rate": 1.0883545828344783e-06, "loss": 0.3027, "step": 16892 }, { "epoch": 2.422282764554058, "grad_norm": 0.29181569814682007, "learning_rate": 1.0878350161916956e-06, "loss": 0.268, "step": 16893 }, { "epoch": 2.422426154287353, "grad_norm": 0.2599848210811615, "learning_rate": 1.0873155584575685e-06, "loss": 0.2701, "step": 16894 }, { "epoch": 2.4225695440206483, "grad_norm": 0.27834352850914, "learning_rate": 1.0867962096465568e-06, "loss": 0.2736, "step": 16895 }, { "epoch": 2.4227129337539433, "grad_norm": 0.27029019594192505, "learning_rate": 1.08627696977312e-06, "loss": 0.2832, "step": 16896 }, { "epoch": 2.4228563234872382, "grad_norm": 0.27478906512260437, "learning_rate": 1.0857578388517115e-06, "loss": 0.2934, "step": 16897 }, { "epoch": 2.422999713220533, "grad_norm": 0.28534626960754395, "learning_rate": 1.0852388168967832e-06, "loss": 0.2935, "step": 16898 }, { "epoch": 2.4231431029538286, "grad_norm": 0.2671879529953003, "learning_rate": 1.084719903922784e-06, "loss": 0.2641, "step": 16899 }, { "epoch": 2.4232864926871236, "grad_norm": 0.29220443964004517, "learning_rate": 1.0842010999441599e-06, "loss": 0.2793, "step": 16900 }, { "epoch": 2.4234298824204186, "grad_norm": 0.2704564332962036, "learning_rate": 1.0836824049753546e-06, "loss": 0.3106, "step": 16901 }, { "epoch": 2.423573272153714, "grad_norm": 0.27100956439971924, "learning_rate": 1.0831638190308057e-06, "loss": 0.2718, "step": 16902 }, { "epoch": 2.423716661887009, "grad_norm": 0.27870413661003113, "learning_rate": 1.082645342124951e-06, "loss": 0.2922, "step": 16903 }, { "epoch": 2.423860051620304, "grad_norm": 0.27806857228279114, "learning_rate": 1.082126974272224e-06, "loss": 0.2731, "step": 16904 }, { "epoch": 2.424003441353599, "grad_norm": 0.2656290531158447, "learning_rate": 1.0816087154870553e-06, "loss": 0.28, "step": 16905 }, { "epoch": 2.4241468310868943, "grad_norm": 0.2748897969722748, "learning_rate": 1.0810905657838717e-06, "loss": 0.283, "step": 16906 }, { "epoch": 2.4242902208201893, "grad_norm": 0.2882576882839203, "learning_rate": 1.0805725251770988e-06, "loss": 0.2954, "step": 16907 }, { "epoch": 2.4244336105534843, "grad_norm": 0.3041004240512848, "learning_rate": 1.0800545936811574e-06, "loss": 0.2892, "step": 16908 }, { "epoch": 2.4245770002867797, "grad_norm": 0.28181371092796326, "learning_rate": 1.0795367713104665e-06, "loss": 0.2897, "step": 16909 }, { "epoch": 2.4247203900200747, "grad_norm": 0.2662647068500519, "learning_rate": 1.0790190580794407e-06, "loss": 0.2842, "step": 16910 }, { "epoch": 2.4248637797533696, "grad_norm": 0.29050418734550476, "learning_rate": 1.078501454002493e-06, "loss": 0.3092, "step": 16911 }, { "epoch": 2.4250071694866646, "grad_norm": 0.2719659209251404, "learning_rate": 1.0779839590940334e-06, "loss": 0.2728, "step": 16912 }, { "epoch": 2.42515055921996, "grad_norm": 0.26929283142089844, "learning_rate": 1.0774665733684664e-06, "loss": 0.2754, "step": 16913 }, { "epoch": 2.425293948953255, "grad_norm": 0.270169734954834, "learning_rate": 1.0769492968401956e-06, "loss": 0.2834, "step": 16914 }, { "epoch": 2.42543733868655, "grad_norm": 0.29232925176620483, "learning_rate": 1.0764321295236213e-06, "loss": 0.294, "step": 16915 }, { "epoch": 2.425580728419845, "grad_norm": 0.2853270471096039, "learning_rate": 1.0759150714331412e-06, "loss": 0.2819, "step": 16916 }, { "epoch": 2.4257241181531404, "grad_norm": 0.286668986082077, "learning_rate": 1.0753981225831505e-06, "loss": 0.297, "step": 16917 }, { "epoch": 2.4258675078864353, "grad_norm": 0.3012402355670929, "learning_rate": 1.0748812829880378e-06, "loss": 0.2776, "step": 16918 }, { "epoch": 2.4260108976197303, "grad_norm": 0.2825976610183716, "learning_rate": 1.0743645526621921e-06, "loss": 0.2797, "step": 16919 }, { "epoch": 2.4261542873530253, "grad_norm": 0.2625141441822052, "learning_rate": 1.0738479316199984e-06, "loss": 0.2812, "step": 16920 }, { "epoch": 2.4262976770863207, "grad_norm": 0.26311686635017395, "learning_rate": 1.0733314198758387e-06, "loss": 0.2872, "step": 16921 }, { "epoch": 2.4264410668196157, "grad_norm": 0.28699204325675964, "learning_rate": 1.072815017444092e-06, "loss": 0.268, "step": 16922 }, { "epoch": 2.4265844565529107, "grad_norm": 0.24754196405410767, "learning_rate": 1.0722987243391358e-06, "loss": 0.2617, "step": 16923 }, { "epoch": 2.426727846286206, "grad_norm": 0.2678649425506592, "learning_rate": 1.0717825405753391e-06, "loss": 0.2775, "step": 16924 }, { "epoch": 2.426871236019501, "grad_norm": 0.25829339027404785, "learning_rate": 1.071266466167073e-06, "loss": 0.2839, "step": 16925 }, { "epoch": 2.427014625752796, "grad_norm": 0.2717263102531433, "learning_rate": 1.0707505011287057e-06, "loss": 0.2798, "step": 16926 }, { "epoch": 2.427158015486091, "grad_norm": 0.28409215807914734, "learning_rate": 1.0702346454746003e-06, "loss": 0.2888, "step": 16927 }, { "epoch": 2.4273014052193864, "grad_norm": 0.2679605185985565, "learning_rate": 1.0697188992191193e-06, "loss": 0.2752, "step": 16928 }, { "epoch": 2.4274447949526814, "grad_norm": 0.2668163776397705, "learning_rate": 1.0692032623766163e-06, "loss": 0.2935, "step": 16929 }, { "epoch": 2.4275881846859764, "grad_norm": 0.2686143219470978, "learning_rate": 1.0686877349614478e-06, "loss": 0.2677, "step": 16930 }, { "epoch": 2.427731574419272, "grad_norm": 0.259733647108078, "learning_rate": 1.0681723169879654e-06, "loss": 0.2691, "step": 16931 }, { "epoch": 2.4278749641525668, "grad_norm": 0.2742239832878113, "learning_rate": 1.0676570084705173e-06, "loss": 0.2718, "step": 16932 }, { "epoch": 2.4280183538858617, "grad_norm": 0.2691670060157776, "learning_rate": 1.0671418094234488e-06, "loss": 0.2725, "step": 16933 }, { "epoch": 2.4281617436191567, "grad_norm": 0.2632358968257904, "learning_rate": 1.0666267198611042e-06, "loss": 0.2748, "step": 16934 }, { "epoch": 2.428305133352452, "grad_norm": 0.2820151746273041, "learning_rate": 1.0661117397978195e-06, "loss": 0.279, "step": 16935 }, { "epoch": 2.428448523085747, "grad_norm": 0.25767654180526733, "learning_rate": 1.0655968692479325e-06, "loss": 0.2798, "step": 16936 }, { "epoch": 2.428591912819042, "grad_norm": 0.26769503951072693, "learning_rate": 1.065082108225776e-06, "loss": 0.2946, "step": 16937 }, { "epoch": 2.4287353025523375, "grad_norm": 0.262288898229599, "learning_rate": 1.0645674567456815e-06, "loss": 0.2852, "step": 16938 }, { "epoch": 2.4288786922856325, "grad_norm": 0.2690523564815521, "learning_rate": 1.0640529148219758e-06, "loss": 0.2612, "step": 16939 }, { "epoch": 2.4290220820189274, "grad_norm": 0.25160422921180725, "learning_rate": 1.0635384824689815e-06, "loss": 0.293, "step": 16940 }, { "epoch": 2.4291654717522224, "grad_norm": 0.2652023136615753, "learning_rate": 1.0630241597010198e-06, "loss": 0.3016, "step": 16941 }, { "epoch": 2.4293088614855174, "grad_norm": 0.2531493604183197, "learning_rate": 1.06250994653241e-06, "loss": 0.2695, "step": 16942 }, { "epoch": 2.429452251218813, "grad_norm": 0.2627871334552765, "learning_rate": 1.0619958429774658e-06, "loss": 0.2953, "step": 16943 }, { "epoch": 2.429595640952108, "grad_norm": 0.281283438205719, "learning_rate": 1.0614818490504997e-06, "loss": 0.2798, "step": 16944 }, { "epoch": 2.4297390306854028, "grad_norm": 0.2789616584777832, "learning_rate": 1.0609679647658206e-06, "loss": 0.258, "step": 16945 }, { "epoch": 2.429882420418698, "grad_norm": 0.26768654584884644, "learning_rate": 1.0604541901377336e-06, "loss": 0.2845, "step": 16946 }, { "epoch": 2.430025810151993, "grad_norm": 0.2841426432132721, "learning_rate": 1.0599405251805416e-06, "loss": 0.2804, "step": 16947 }, { "epoch": 2.430169199885288, "grad_norm": 0.2819914221763611, "learning_rate": 1.0594269699085452e-06, "loss": 0.2644, "step": 16948 }, { "epoch": 2.430312589618583, "grad_norm": 0.2702043354511261, "learning_rate": 1.0589135243360399e-06, "loss": 0.28, "step": 16949 }, { "epoch": 2.4304559793518785, "grad_norm": 0.26703187823295593, "learning_rate": 1.0584001884773204e-06, "loss": 0.2706, "step": 16950 }, { "epoch": 2.4305993690851735, "grad_norm": 0.2732352614402771, "learning_rate": 1.0578869623466753e-06, "loss": 0.2894, "step": 16951 }, { "epoch": 2.4307427588184685, "grad_norm": 0.28077226877212524, "learning_rate": 1.0573738459583931e-06, "loss": 0.2847, "step": 16952 }, { "epoch": 2.430886148551764, "grad_norm": 0.27394363284111023, "learning_rate": 1.0568608393267581e-06, "loss": 0.3161, "step": 16953 }, { "epoch": 2.431029538285059, "grad_norm": 0.28587237000465393, "learning_rate": 1.0563479424660521e-06, "loss": 0.2869, "step": 16954 }, { "epoch": 2.431172928018354, "grad_norm": 0.26892614364624023, "learning_rate": 1.0558351553905538e-06, "loss": 0.2807, "step": 16955 }, { "epoch": 2.431316317751649, "grad_norm": 0.2646847069263458, "learning_rate": 1.055322478114536e-06, "loss": 0.2964, "step": 16956 }, { "epoch": 2.431459707484944, "grad_norm": 0.28485697507858276, "learning_rate": 1.054809910652273e-06, "loss": 0.2713, "step": 16957 }, { "epoch": 2.431603097218239, "grad_norm": 0.2631223201751709, "learning_rate": 1.0542974530180327e-06, "loss": 0.2609, "step": 16958 }, { "epoch": 2.431746486951534, "grad_norm": 0.2643178701400757, "learning_rate": 1.0537851052260816e-06, "loss": 0.2734, "step": 16959 }, { "epoch": 2.4318898766848296, "grad_norm": 0.2741139829158783, "learning_rate": 1.0532728672906834e-06, "loss": 0.2811, "step": 16960 }, { "epoch": 2.4320332664181246, "grad_norm": 0.28081896901130676, "learning_rate": 1.052760739226097e-06, "loss": 0.2714, "step": 16961 }, { "epoch": 2.4321766561514195, "grad_norm": 0.27454420924186707, "learning_rate": 1.0522487210465793e-06, "loss": 0.2786, "step": 16962 }, { "epoch": 2.4323200458847145, "grad_norm": 0.27040785551071167, "learning_rate": 1.0517368127663846e-06, "loss": 0.2779, "step": 16963 }, { "epoch": 2.43246343561801, "grad_norm": 0.2782193124294281, "learning_rate": 1.0512250143997639e-06, "loss": 0.2897, "step": 16964 }, { "epoch": 2.432606825351305, "grad_norm": 0.2793956398963928, "learning_rate": 1.0507133259609637e-06, "loss": 0.2734, "step": 16965 }, { "epoch": 2.4327502150846, "grad_norm": 0.2948145270347595, "learning_rate": 1.0502017474642312e-06, "loss": 0.2895, "step": 16966 }, { "epoch": 2.432893604817895, "grad_norm": 0.29291608929634094, "learning_rate": 1.049690278923805e-06, "loss": 0.2944, "step": 16967 }, { "epoch": 2.4330369945511903, "grad_norm": 0.27043622732162476, "learning_rate": 1.0491789203539243e-06, "loss": 0.2897, "step": 16968 }, { "epoch": 2.4331803842844852, "grad_norm": 0.27095648646354675, "learning_rate": 1.0486676717688249e-06, "loss": 0.2773, "step": 16969 }, { "epoch": 2.43332377401778, "grad_norm": 0.2660464942455292, "learning_rate": 1.0481565331827393e-06, "loss": 0.2612, "step": 16970 }, { "epoch": 2.433467163751075, "grad_norm": 0.27990081906318665, "learning_rate": 1.0476455046098983e-06, "loss": 0.2882, "step": 16971 }, { "epoch": 2.4336105534843706, "grad_norm": 0.256708025932312, "learning_rate": 1.0471345860645255e-06, "loss": 0.2947, "step": 16972 }, { "epoch": 2.4337539432176656, "grad_norm": 0.2889617383480072, "learning_rate": 1.0466237775608445e-06, "loss": 0.267, "step": 16973 }, { "epoch": 2.4338973329509606, "grad_norm": 0.2738781273365021, "learning_rate": 1.0461130791130763e-06, "loss": 0.2844, "step": 16974 }, { "epoch": 2.434040722684256, "grad_norm": 0.27696388959884644, "learning_rate": 1.0456024907354378e-06, "loss": 0.2704, "step": 16975 }, { "epoch": 2.434184112417551, "grad_norm": 0.2574365735054016, "learning_rate": 1.0450920124421431e-06, "loss": 0.2922, "step": 16976 }, { "epoch": 2.434327502150846, "grad_norm": 0.2561745047569275, "learning_rate": 1.0445816442474044e-06, "loss": 0.2699, "step": 16977 }, { "epoch": 2.434470891884141, "grad_norm": 0.2714192271232605, "learning_rate": 1.0440713861654266e-06, "loss": 0.2593, "step": 16978 }, { "epoch": 2.4346142816174363, "grad_norm": 0.2850162386894226, "learning_rate": 1.043561238210416e-06, "loss": 0.2799, "step": 16979 }, { "epoch": 2.4347576713507313, "grad_norm": 0.2768383324146271, "learning_rate": 1.0430512003965747e-06, "loss": 0.2722, "step": 16980 }, { "epoch": 2.4349010610840263, "grad_norm": 0.25447651743888855, "learning_rate": 1.0425412727380996e-06, "loss": 0.2646, "step": 16981 }, { "epoch": 2.4350444508173217, "grad_norm": 0.26301419734954834, "learning_rate": 1.0420314552491901e-06, "loss": 0.2842, "step": 16982 }, { "epoch": 2.4351878405506167, "grad_norm": 0.2508382797241211, "learning_rate": 1.0415217479440353e-06, "loss": 0.2752, "step": 16983 }, { "epoch": 2.4353312302839116, "grad_norm": 0.26472994685173035, "learning_rate": 1.0410121508368259e-06, "loss": 0.2798, "step": 16984 }, { "epoch": 2.4354746200172066, "grad_norm": 0.26243487000465393, "learning_rate": 1.0405026639417482e-06, "loss": 0.2837, "step": 16985 }, { "epoch": 2.435618009750502, "grad_norm": 0.2772742211818695, "learning_rate": 1.039993287272985e-06, "loss": 0.2848, "step": 16986 }, { "epoch": 2.435761399483797, "grad_norm": 0.261052668094635, "learning_rate": 1.0394840208447176e-06, "loss": 0.2862, "step": 16987 }, { "epoch": 2.435904789217092, "grad_norm": 0.27267077565193176, "learning_rate": 1.0389748646711235e-06, "loss": 0.2748, "step": 16988 }, { "epoch": 2.4360481789503874, "grad_norm": 0.26687806844711304, "learning_rate": 1.0384658187663748e-06, "loss": 0.3004, "step": 16989 }, { "epoch": 2.4361915686836824, "grad_norm": 0.2559608221054077, "learning_rate": 1.037956883144644e-06, "loss": 0.2667, "step": 16990 }, { "epoch": 2.4363349584169773, "grad_norm": 0.2702636420726776, "learning_rate": 1.0374480578200985e-06, "loss": 0.2885, "step": 16991 }, { "epoch": 2.4364783481502723, "grad_norm": 0.2936350107192993, "learning_rate": 1.0369393428069037e-06, "loss": 0.2805, "step": 16992 }, { "epoch": 2.4366217378835677, "grad_norm": 0.2519626319408417, "learning_rate": 1.0364307381192223e-06, "loss": 0.292, "step": 16993 }, { "epoch": 2.4367651276168627, "grad_norm": 0.26417312026023865, "learning_rate": 1.0359222437712112e-06, "loss": 0.2679, "step": 16994 }, { "epoch": 2.4369085173501577, "grad_norm": 0.2616124749183655, "learning_rate": 1.0354138597770264e-06, "loss": 0.2909, "step": 16995 }, { "epoch": 2.4370519070834527, "grad_norm": 0.2660977840423584, "learning_rate": 1.0349055861508218e-06, "loss": 0.2864, "step": 16996 }, { "epoch": 2.437195296816748, "grad_norm": 0.2579175531864166, "learning_rate": 1.0343974229067456e-06, "loss": 0.2812, "step": 16997 }, { "epoch": 2.437338686550043, "grad_norm": 0.29194116592407227, "learning_rate": 1.0338893700589448e-06, "loss": 0.286, "step": 16998 }, { "epoch": 2.437482076283338, "grad_norm": 0.2663504481315613, "learning_rate": 1.0333814276215631e-06, "loss": 0.2602, "step": 16999 }, { "epoch": 2.437625466016633, "grad_norm": 0.268621563911438, "learning_rate": 1.0328735956087411e-06, "loss": 0.2763, "step": 17000 }, { "epoch": 2.4377688557499284, "grad_norm": 0.29233595728874207, "learning_rate": 1.0323658740346148e-06, "loss": 0.2727, "step": 17001 }, { "epoch": 2.4379122454832234, "grad_norm": 0.28187355399131775, "learning_rate": 1.03185826291332e-06, "loss": 0.2866, "step": 17002 }, { "epoch": 2.4380556352165184, "grad_norm": 0.2587447464466095, "learning_rate": 1.0313507622589864e-06, "loss": 0.2726, "step": 17003 }, { "epoch": 2.4381990249498138, "grad_norm": 0.2728140354156494, "learning_rate": 1.0308433720857442e-06, "loss": 0.2957, "step": 17004 }, { "epoch": 2.4383424146831087, "grad_norm": 0.2721283733844757, "learning_rate": 1.0303360924077155e-06, "loss": 0.2778, "step": 17005 }, { "epoch": 2.4384858044164037, "grad_norm": 0.2660357654094696, "learning_rate": 1.0298289232390234e-06, "loss": 0.2907, "step": 17006 }, { "epoch": 2.4386291941496987, "grad_norm": 0.2668243646621704, "learning_rate": 1.0293218645937874e-06, "loss": 0.3097, "step": 17007 }, { "epoch": 2.438772583882994, "grad_norm": 0.24912910163402557, "learning_rate": 1.0288149164861223e-06, "loss": 0.2836, "step": 17008 }, { "epoch": 2.438915973616289, "grad_norm": 0.28538021445274353, "learning_rate": 1.028308078930142e-06, "loss": 0.2835, "step": 17009 }, { "epoch": 2.439059363349584, "grad_norm": 0.26644036173820496, "learning_rate": 1.0278013519399544e-06, "loss": 0.2753, "step": 17010 }, { "epoch": 2.4392027530828795, "grad_norm": 0.3020894229412079, "learning_rate": 1.0272947355296664e-06, "loss": 0.2781, "step": 17011 }, { "epoch": 2.4393461428161745, "grad_norm": 0.25183001160621643, "learning_rate": 1.026788229713382e-06, "loss": 0.2757, "step": 17012 }, { "epoch": 2.4394895325494694, "grad_norm": 0.2639676332473755, "learning_rate": 1.0262818345052011e-06, "loss": 0.2714, "step": 17013 }, { "epoch": 2.4396329222827644, "grad_norm": 0.29164305329322815, "learning_rate": 1.0257755499192212e-06, "loss": 0.3083, "step": 17014 }, { "epoch": 2.43977631201606, "grad_norm": 0.26029521226882935, "learning_rate": 1.025269375969537e-06, "loss": 0.2731, "step": 17015 }, { "epoch": 2.439919701749355, "grad_norm": 0.2881318926811218, "learning_rate": 1.0247633126702388e-06, "loss": 0.28, "step": 17016 }, { "epoch": 2.4400630914826498, "grad_norm": 0.29177823662757874, "learning_rate": 1.0242573600354144e-06, "loss": 0.2737, "step": 17017 }, { "epoch": 2.4402064812159447, "grad_norm": 0.2714393734931946, "learning_rate": 1.02375151807915e-06, "loss": 0.2912, "step": 17018 }, { "epoch": 2.44034987094924, "grad_norm": 0.2685631215572357, "learning_rate": 1.0232457868155266e-06, "loss": 0.2881, "step": 17019 }, { "epoch": 2.440493260682535, "grad_norm": 0.2853418290615082, "learning_rate": 1.022740166258624e-06, "loss": 0.2905, "step": 17020 }, { "epoch": 2.44063665041583, "grad_norm": 0.2794409394264221, "learning_rate": 1.022234656422516e-06, "loss": 0.2774, "step": 17021 }, { "epoch": 2.440780040149125, "grad_norm": 0.28624778985977173, "learning_rate": 1.0217292573212762e-06, "loss": 0.2928, "step": 17022 }, { "epoch": 2.4409234298824205, "grad_norm": 0.28931230306625366, "learning_rate": 1.0212239689689741e-06, "loss": 0.2725, "step": 17023 }, { "epoch": 2.4410668196157155, "grad_norm": 0.2738966643810272, "learning_rate": 1.0207187913796763e-06, "loss": 0.2628, "step": 17024 }, { "epoch": 2.4412102093490105, "grad_norm": 0.28333258628845215, "learning_rate": 1.020213724567447e-06, "loss": 0.2995, "step": 17025 }, { "epoch": 2.441353599082306, "grad_norm": 0.28751423954963684, "learning_rate": 1.0197087685463447e-06, "loss": 0.2899, "step": 17026 }, { "epoch": 2.441496988815601, "grad_norm": 0.256279319524765, "learning_rate": 1.0192039233304274e-06, "loss": 0.2725, "step": 17027 }, { "epoch": 2.441640378548896, "grad_norm": 0.25964149832725525, "learning_rate": 1.018699188933749e-06, "loss": 0.2855, "step": 17028 }, { "epoch": 2.441783768282191, "grad_norm": 0.26996538043022156, "learning_rate": 1.0181945653703613e-06, "loss": 0.2823, "step": 17029 }, { "epoch": 2.441927158015486, "grad_norm": 0.28037065267562866, "learning_rate": 1.017690052654311e-06, "loss": 0.269, "step": 17030 }, { "epoch": 2.442070547748781, "grad_norm": 0.2659149765968323, "learning_rate": 1.0171856507996453e-06, "loss": 0.3033, "step": 17031 }, { "epoch": 2.442213937482076, "grad_norm": 0.27155861258506775, "learning_rate": 1.0166813598204035e-06, "loss": 0.2656, "step": 17032 }, { "epoch": 2.4423573272153716, "grad_norm": 0.2591562271118164, "learning_rate": 1.0161771797306246e-06, "loss": 0.283, "step": 17033 }, { "epoch": 2.4425007169486666, "grad_norm": 0.2759060859680176, "learning_rate": 1.0156731105443446e-06, "loss": 0.2831, "step": 17034 }, { "epoch": 2.4426441066819615, "grad_norm": 0.28505948185920715, "learning_rate": 1.015169152275597e-06, "loss": 0.2701, "step": 17035 }, { "epoch": 2.4427874964152565, "grad_norm": 0.2789469361305237, "learning_rate": 1.0146653049384097e-06, "loss": 0.2781, "step": 17036 }, { "epoch": 2.442930886148552, "grad_norm": 0.26057320833206177, "learning_rate": 1.0141615685468098e-06, "loss": 0.2687, "step": 17037 }, { "epoch": 2.443074275881847, "grad_norm": 0.28647857904434204, "learning_rate": 1.0136579431148203e-06, "loss": 0.2829, "step": 17038 }, { "epoch": 2.443217665615142, "grad_norm": 0.27194079756736755, "learning_rate": 1.0131544286564621e-06, "loss": 0.2639, "step": 17039 }, { "epoch": 2.4433610553484373, "grad_norm": 0.27665379643440247, "learning_rate": 1.0126510251857513e-06, "loss": 0.2742, "step": 17040 }, { "epoch": 2.4435044450817323, "grad_norm": 0.26930803060531616, "learning_rate": 1.0121477327167023e-06, "loss": 0.273, "step": 17041 }, { "epoch": 2.4436478348150272, "grad_norm": 0.27478307485580444, "learning_rate": 1.0116445512633272e-06, "loss": 0.2712, "step": 17042 }, { "epoch": 2.443791224548322, "grad_norm": 0.2741760015487671, "learning_rate": 1.0111414808396314e-06, "loss": 0.2926, "step": 17043 }, { "epoch": 2.4439346142816176, "grad_norm": 0.263481467962265, "learning_rate": 1.0106385214596204e-06, "loss": 0.2751, "step": 17044 }, { "epoch": 2.4440780040149126, "grad_norm": 0.24688854813575745, "learning_rate": 1.0101356731372964e-06, "loss": 0.2928, "step": 17045 }, { "epoch": 2.4442213937482076, "grad_norm": 0.27654197812080383, "learning_rate": 1.009632935886658e-06, "loss": 0.2854, "step": 17046 }, { "epoch": 2.4443647834815025, "grad_norm": 0.25480958819389343, "learning_rate": 1.0091303097217014e-06, "loss": 0.288, "step": 17047 }, { "epoch": 2.444508173214798, "grad_norm": 0.24610932171344757, "learning_rate": 1.0086277946564166e-06, "loss": 0.2719, "step": 17048 }, { "epoch": 2.444651562948093, "grad_norm": 0.27112454175949097, "learning_rate": 1.0081253907047938e-06, "loss": 0.2632, "step": 17049 }, { "epoch": 2.444794952681388, "grad_norm": 0.26403969526290894, "learning_rate": 1.00762309788082e-06, "loss": 0.2928, "step": 17050 }, { "epoch": 2.444938342414683, "grad_norm": 0.2999196946620941, "learning_rate": 1.0071209161984773e-06, "loss": 0.288, "step": 17051 }, { "epoch": 2.4450817321479783, "grad_norm": 0.27749577164649963, "learning_rate": 1.0066188456717464e-06, "loss": 0.2952, "step": 17052 }, { "epoch": 2.4452251218812733, "grad_norm": 0.25948333740234375, "learning_rate": 1.0061168863146038e-06, "loss": 0.2604, "step": 17053 }, { "epoch": 2.4453685116145683, "grad_norm": 0.2712482511997223, "learning_rate": 1.0056150381410228e-06, "loss": 0.2823, "step": 17054 }, { "epoch": 2.4455119013478637, "grad_norm": 0.26177918910980225, "learning_rate": 1.0051133011649755e-06, "loss": 0.2692, "step": 17055 }, { "epoch": 2.4456552910811586, "grad_norm": 0.2709351181983948, "learning_rate": 1.0046116754004282e-06, "loss": 0.2619, "step": 17056 }, { "epoch": 2.4457986808144536, "grad_norm": 0.2916593551635742, "learning_rate": 1.0041101608613457e-06, "loss": 0.2859, "step": 17057 }, { "epoch": 2.4459420705477486, "grad_norm": 0.28233763575553894, "learning_rate": 1.003608757561691e-06, "loss": 0.2801, "step": 17058 }, { "epoch": 2.446085460281044, "grad_norm": 0.2820802628993988, "learning_rate": 1.0031074655154194e-06, "loss": 0.2822, "step": 17059 }, { "epoch": 2.446228850014339, "grad_norm": 0.26710641384124756, "learning_rate": 1.0026062847364876e-06, "loss": 0.285, "step": 17060 }, { "epoch": 2.446372239747634, "grad_norm": 0.27836087346076965, "learning_rate": 1.0021052152388477e-06, "loss": 0.2723, "step": 17061 }, { "epoch": 2.4465156294809294, "grad_norm": 0.2717452645301819, "learning_rate": 1.0016042570364486e-06, "loss": 0.2822, "step": 17062 }, { "epoch": 2.4466590192142244, "grad_norm": 0.25632244348526, "learning_rate": 1.0011034101432383e-06, "loss": 0.2963, "step": 17063 }, { "epoch": 2.4468024089475193, "grad_norm": 0.2810530960559845, "learning_rate": 1.0006026745731556e-06, "loss": 0.2772, "step": 17064 }, { "epoch": 2.4469457986808143, "grad_norm": 0.2756761610507965, "learning_rate": 1.0001020503401422e-06, "loss": 0.3027, "step": 17065 }, { "epoch": 2.4470891884141097, "grad_norm": 0.2654971480369568, "learning_rate": 9.996015374581348e-07, "loss": 0.2802, "step": 17066 }, { "epoch": 2.4472325781474047, "grad_norm": 0.26261255145072937, "learning_rate": 9.991011359410668e-07, "loss": 0.2839, "step": 17067 }, { "epoch": 2.4473759678806997, "grad_norm": 0.28183308243751526, "learning_rate": 9.98600845802869e-07, "loss": 0.2905, "step": 17068 }, { "epoch": 2.447519357613995, "grad_norm": 0.2631075978279114, "learning_rate": 9.981006670574695e-07, "loss": 0.2868, "step": 17069 }, { "epoch": 2.44766274734729, "grad_norm": 0.269278347492218, "learning_rate": 9.976005997187887e-07, "loss": 0.2777, "step": 17070 }, { "epoch": 2.447806137080585, "grad_norm": 0.26991260051727295, "learning_rate": 9.971006438007513e-07, "loss": 0.2743, "step": 17071 }, { "epoch": 2.44794952681388, "grad_norm": 0.2697541415691376, "learning_rate": 9.96600799317275e-07, "loss": 0.2726, "step": 17072 }, { "epoch": 2.448092916547175, "grad_norm": 0.28284960985183716, "learning_rate": 9.961010662822734e-07, "loss": 0.2882, "step": 17073 }, { "epoch": 2.4482363062804704, "grad_norm": 0.2655734121799469, "learning_rate": 9.956014447096607e-07, "loss": 0.2662, "step": 17074 }, { "epoch": 2.4483796960137654, "grad_norm": 0.2681213915348053, "learning_rate": 9.951019346133427e-07, "loss": 0.2738, "step": 17075 }, { "epoch": 2.4485230857470603, "grad_norm": 0.272633820772171, "learning_rate": 9.946025360072259e-07, "loss": 0.275, "step": 17076 }, { "epoch": 2.4486664754803558, "grad_norm": 0.2666719853878021, "learning_rate": 9.941032489052127e-07, "loss": 0.2774, "step": 17077 }, { "epoch": 2.4488098652136507, "grad_norm": 0.2721862196922302, "learning_rate": 9.936040733212033e-07, "loss": 0.2762, "step": 17078 }, { "epoch": 2.4489532549469457, "grad_norm": 0.2839010953903198, "learning_rate": 9.931050092690947e-07, "loss": 0.2804, "step": 17079 }, { "epoch": 2.4490966446802407, "grad_norm": 0.2938324511051178, "learning_rate": 9.926060567627777e-07, "loss": 0.2915, "step": 17080 }, { "epoch": 2.449240034413536, "grad_norm": 0.2725674510002136, "learning_rate": 9.921072158161432e-07, "loss": 0.2931, "step": 17081 }, { "epoch": 2.449383424146831, "grad_norm": 0.26304784417152405, "learning_rate": 9.916084864430792e-07, "loss": 0.2739, "step": 17082 }, { "epoch": 2.449526813880126, "grad_norm": 0.2917264401912689, "learning_rate": 9.911098686574682e-07, "loss": 0.2982, "step": 17083 }, { "epoch": 2.4496702036134215, "grad_norm": 0.2690448760986328, "learning_rate": 9.906113624731916e-07, "loss": 0.2835, "step": 17084 }, { "epoch": 2.4498135933467164, "grad_norm": 0.26268470287323, "learning_rate": 9.901129679041289e-07, "loss": 0.2618, "step": 17085 }, { "epoch": 2.4499569830800114, "grad_norm": 0.2610850930213928, "learning_rate": 9.896146849641513e-07, "loss": 0.2746, "step": 17086 }, { "epoch": 2.4501003728133064, "grad_norm": 0.2630974352359772, "learning_rate": 9.891165136671315e-07, "loss": 0.2989, "step": 17087 }, { "epoch": 2.450243762546602, "grad_norm": 0.2800029516220093, "learning_rate": 9.886184540269384e-07, "loss": 0.2776, "step": 17088 }, { "epoch": 2.450387152279897, "grad_norm": 0.2671317756175995, "learning_rate": 9.881205060574366e-07, "loss": 0.2616, "step": 17089 }, { "epoch": 2.4505305420131918, "grad_norm": 0.2647775411605835, "learning_rate": 9.876226697724888e-07, "loss": 0.2869, "step": 17090 }, { "epoch": 2.450673931746487, "grad_norm": 0.2803693413734436, "learning_rate": 9.871249451859532e-07, "loss": 0.2759, "step": 17091 }, { "epoch": 2.450817321479782, "grad_norm": 0.2757967710494995, "learning_rate": 9.866273323116866e-07, "loss": 0.2865, "step": 17092 }, { "epoch": 2.450960711213077, "grad_norm": 0.2687993347644806, "learning_rate": 9.861298311635414e-07, "loss": 0.2553, "step": 17093 }, { "epoch": 2.451104100946372, "grad_norm": 0.2766888439655304, "learning_rate": 9.85632441755367e-07, "loss": 0.2946, "step": 17094 }, { "epoch": 2.4512474906796675, "grad_norm": 0.284700870513916, "learning_rate": 9.8513516410101e-07, "loss": 0.2891, "step": 17095 }, { "epoch": 2.4513908804129625, "grad_norm": 0.28389835357666016, "learning_rate": 9.846379982143157e-07, "loss": 0.2762, "step": 17096 }, { "epoch": 2.4515342701462575, "grad_norm": 0.28972354531288147, "learning_rate": 9.84140944109121e-07, "loss": 0.2779, "step": 17097 }, { "epoch": 2.4516776598795524, "grad_norm": 0.2869870066642761, "learning_rate": 9.836440017992654e-07, "loss": 0.2603, "step": 17098 }, { "epoch": 2.451821049612848, "grad_norm": 0.280843585729599, "learning_rate": 9.83147171298582e-07, "loss": 0.2898, "step": 17099 }, { "epoch": 2.451964439346143, "grad_norm": 0.26115772128105164, "learning_rate": 9.826504526209024e-07, "loss": 0.2734, "step": 17100 }, { "epoch": 2.452107829079438, "grad_norm": 0.2795184552669525, "learning_rate": 9.821538457800556e-07, "loss": 0.2985, "step": 17101 }, { "epoch": 2.452251218812733, "grad_norm": 0.2658481299877167, "learning_rate": 9.81657350789864e-07, "loss": 0.2742, "step": 17102 }, { "epoch": 2.452394608546028, "grad_norm": 0.28216126561164856, "learning_rate": 9.811609676641503e-07, "loss": 0.2711, "step": 17103 }, { "epoch": 2.452537998279323, "grad_norm": 0.29358839988708496, "learning_rate": 9.806646964167333e-07, "loss": 0.2894, "step": 17104 }, { "epoch": 2.452681388012618, "grad_norm": 0.27840861678123474, "learning_rate": 9.801685370614284e-07, "loss": 0.2822, "step": 17105 }, { "epoch": 2.4528247777459136, "grad_norm": 0.2740229666233063, "learning_rate": 9.796724896120473e-07, "loss": 0.2967, "step": 17106 }, { "epoch": 2.4529681674792085, "grad_norm": 0.2626824676990509, "learning_rate": 9.791765540823995e-07, "loss": 0.2676, "step": 17107 }, { "epoch": 2.4531115572125035, "grad_norm": 0.28399014472961426, "learning_rate": 9.786807304862916e-07, "loss": 0.2669, "step": 17108 }, { "epoch": 2.4532549469457985, "grad_norm": 0.28125354647636414, "learning_rate": 9.781850188375264e-07, "loss": 0.2972, "step": 17109 }, { "epoch": 2.453398336679094, "grad_norm": 0.2991318106651306, "learning_rate": 9.77689419149903e-07, "loss": 0.274, "step": 17110 }, { "epoch": 2.453541726412389, "grad_norm": 0.2579072415828705, "learning_rate": 9.771939314372192e-07, "loss": 0.2885, "step": 17111 }, { "epoch": 2.453685116145684, "grad_norm": 0.27920204401016235, "learning_rate": 9.766985557132696e-07, "loss": 0.2923, "step": 17112 }, { "epoch": 2.4538285058789793, "grad_norm": 0.26282063126564026, "learning_rate": 9.762032919918413e-07, "loss": 0.2779, "step": 17113 }, { "epoch": 2.4539718956122742, "grad_norm": 0.26933377981185913, "learning_rate": 9.757081402867242e-07, "loss": 0.2711, "step": 17114 }, { "epoch": 2.454115285345569, "grad_norm": 0.2723315954208374, "learning_rate": 9.752131006117017e-07, "loss": 0.2874, "step": 17115 }, { "epoch": 2.454258675078864, "grad_norm": 0.2916134297847748, "learning_rate": 9.74718172980555e-07, "loss": 0.2818, "step": 17116 }, { "epoch": 2.4544020648121596, "grad_norm": 0.25632065534591675, "learning_rate": 9.742233574070635e-07, "loss": 0.2886, "step": 17117 }, { "epoch": 2.4545454545454546, "grad_norm": 0.2710495591163635, "learning_rate": 9.73728653905e-07, "loss": 0.276, "step": 17118 }, { "epoch": 2.4546888442787496, "grad_norm": 0.26323965191841125, "learning_rate": 9.73234062488137e-07, "loss": 0.3064, "step": 17119 }, { "epoch": 2.454832234012045, "grad_norm": 0.2545980215072632, "learning_rate": 9.727395831702436e-07, "loss": 0.2723, "step": 17120 }, { "epoch": 2.45497562374534, "grad_norm": 0.26563215255737305, "learning_rate": 9.722452159650852e-07, "loss": 0.2934, "step": 17121 }, { "epoch": 2.455119013478635, "grad_norm": 0.2580123841762543, "learning_rate": 9.71750960886424e-07, "loss": 0.2699, "step": 17122 }, { "epoch": 2.45526240321193, "grad_norm": 0.25045543909072876, "learning_rate": 9.712568179480203e-07, "loss": 0.2755, "step": 17123 }, { "epoch": 2.455405792945225, "grad_norm": 0.2646133303642273, "learning_rate": 9.707627871636289e-07, "loss": 0.2565, "step": 17124 }, { "epoch": 2.4555491826785203, "grad_norm": 0.2677997052669525, "learning_rate": 9.702688685470024e-07, "loss": 0.263, "step": 17125 }, { "epoch": 2.4556925724118153, "grad_norm": 0.26707157492637634, "learning_rate": 9.697750621118924e-07, "loss": 0.2847, "step": 17126 }, { "epoch": 2.4558359621451102, "grad_norm": 0.2662630081176758, "learning_rate": 9.692813678720435e-07, "loss": 0.2763, "step": 17127 }, { "epoch": 2.4559793518784057, "grad_norm": 0.25951480865478516, "learning_rate": 9.687877858412037e-07, "loss": 0.2726, "step": 17128 }, { "epoch": 2.4561227416117006, "grad_norm": 0.27681007981300354, "learning_rate": 9.682943160331093e-07, "loss": 0.2618, "step": 17129 }, { "epoch": 2.4562661313449956, "grad_norm": 0.2553827464580536, "learning_rate": 9.678009584614995e-07, "loss": 0.2892, "step": 17130 }, { "epoch": 2.4564095210782906, "grad_norm": 0.2702310085296631, "learning_rate": 9.67307713140108e-07, "loss": 0.2775, "step": 17131 }, { "epoch": 2.456552910811586, "grad_norm": 0.2830609977245331, "learning_rate": 9.668145800826668e-07, "loss": 0.296, "step": 17132 }, { "epoch": 2.456696300544881, "grad_norm": 0.2800072431564331, "learning_rate": 9.66321559302904e-07, "loss": 0.2804, "step": 17133 }, { "epoch": 2.456839690278176, "grad_norm": 0.2574842572212219, "learning_rate": 9.658286508145432e-07, "loss": 0.2762, "step": 17134 }, { "epoch": 2.4569830800114714, "grad_norm": 0.27241382002830505, "learning_rate": 9.653358546313073e-07, "loss": 0.2859, "step": 17135 }, { "epoch": 2.4571264697447663, "grad_norm": 0.29122042655944824, "learning_rate": 9.648431707669142e-07, "loss": 0.2579, "step": 17136 }, { "epoch": 2.4572698594780613, "grad_norm": 0.2768210172653198, "learning_rate": 9.643505992350805e-07, "loss": 0.2972, "step": 17137 }, { "epoch": 2.4574132492113563, "grad_norm": 0.2813914120197296, "learning_rate": 9.638581400495177e-07, "loss": 0.2843, "step": 17138 }, { "epoch": 2.4575566389446517, "grad_norm": 0.27384719252586365, "learning_rate": 9.633657932239372e-07, "loss": 0.2928, "step": 17139 }, { "epoch": 2.4577000286779467, "grad_norm": 0.2879297733306885, "learning_rate": 9.628735587720417e-07, "loss": 0.2664, "step": 17140 }, { "epoch": 2.4578434184112417, "grad_norm": 0.26549580693244934, "learning_rate": 9.623814367075362e-07, "loss": 0.2832, "step": 17141 }, { "epoch": 2.457986808144537, "grad_norm": 0.28644660115242004, "learning_rate": 9.618894270441208e-07, "loss": 0.2815, "step": 17142 }, { "epoch": 2.458130197877832, "grad_norm": 0.2846398651599884, "learning_rate": 9.61397529795492e-07, "loss": 0.2842, "step": 17143 }, { "epoch": 2.458273587611127, "grad_norm": 0.29190629720687866, "learning_rate": 9.609057449753435e-07, "loss": 0.2918, "step": 17144 }, { "epoch": 2.458416977344422, "grad_norm": 0.29095733165740967, "learning_rate": 9.604140725973655e-07, "loss": 0.2867, "step": 17145 }, { "epoch": 2.4585603670777174, "grad_norm": 0.25944092869758606, "learning_rate": 9.599225126752454e-07, "loss": 0.2824, "step": 17146 }, { "epoch": 2.4587037568110124, "grad_norm": 0.2857953608036041, "learning_rate": 9.59431065222668e-07, "loss": 0.2731, "step": 17147 }, { "epoch": 2.4588471465443074, "grad_norm": 0.2675286531448364, "learning_rate": 9.589397302533143e-07, "loss": 0.2819, "step": 17148 }, { "epoch": 2.4589905362776023, "grad_norm": 0.26329389214515686, "learning_rate": 9.58448507780862e-07, "loss": 0.284, "step": 17149 }, { "epoch": 2.4591339260108978, "grad_norm": 0.2772362530231476, "learning_rate": 9.579573978189876e-07, "loss": 0.2657, "step": 17150 }, { "epoch": 2.4592773157441927, "grad_norm": 0.3053354322910309, "learning_rate": 9.574664003813599e-07, "loss": 0.2994, "step": 17151 }, { "epoch": 2.4594207054774877, "grad_norm": 0.27154114842414856, "learning_rate": 9.569755154816495e-07, "loss": 0.298, "step": 17152 }, { "epoch": 2.4595640952107827, "grad_norm": 0.28207141160964966, "learning_rate": 9.56484743133521e-07, "loss": 0.2879, "step": 17153 }, { "epoch": 2.459707484944078, "grad_norm": 0.2693260908126831, "learning_rate": 9.559940833506376e-07, "loss": 0.2797, "step": 17154 }, { "epoch": 2.459850874677373, "grad_norm": 0.26973381638526917, "learning_rate": 9.555035361466592e-07, "loss": 0.2922, "step": 17155 }, { "epoch": 2.459994264410668, "grad_norm": 0.25753894448280334, "learning_rate": 9.550131015352392e-07, "loss": 0.2813, "step": 17156 }, { "epoch": 2.4601376541439635, "grad_norm": 0.2883181869983673, "learning_rate": 9.545227795300322e-07, "loss": 0.2697, "step": 17157 }, { "epoch": 2.4602810438772584, "grad_norm": 0.27857157588005066, "learning_rate": 9.54032570144688e-07, "loss": 0.2776, "step": 17158 }, { "epoch": 2.4604244336105534, "grad_norm": 0.26900288462638855, "learning_rate": 9.535424733928528e-07, "loss": 0.2858, "step": 17159 }, { "epoch": 2.4605678233438484, "grad_norm": 0.27790501713752747, "learning_rate": 9.530524892881709e-07, "loss": 0.2763, "step": 17160 }, { "epoch": 2.460711213077144, "grad_norm": 0.27287352085113525, "learning_rate": 9.52562617844282e-07, "loss": 0.2725, "step": 17161 }, { "epoch": 2.4608546028104388, "grad_norm": 0.2752114236354828, "learning_rate": 9.52072859074824e-07, "loss": 0.3014, "step": 17162 }, { "epoch": 2.4609979925437337, "grad_norm": 0.2692005932331085, "learning_rate": 9.515832129934311e-07, "loss": 0.2799, "step": 17163 }, { "epoch": 2.461141382277029, "grad_norm": 0.2776808738708496, "learning_rate": 9.510936796137332e-07, "loss": 0.2725, "step": 17164 }, { "epoch": 2.461284772010324, "grad_norm": 0.284166157245636, "learning_rate": 9.506042589493591e-07, "loss": 0.2931, "step": 17165 }, { "epoch": 2.461428161743619, "grad_norm": 0.26514285802841187, "learning_rate": 9.501149510139351e-07, "loss": 0.2583, "step": 17166 }, { "epoch": 2.461571551476914, "grad_norm": 0.2694348692893982, "learning_rate": 9.496257558210791e-07, "loss": 0.291, "step": 17167 }, { "epoch": 2.4617149412102095, "grad_norm": 0.28177008032798767, "learning_rate": 9.491366733844115e-07, "loss": 0.2733, "step": 17168 }, { "epoch": 2.4618583309435045, "grad_norm": 0.2652280628681183, "learning_rate": 9.486477037175473e-07, "loss": 0.2807, "step": 17169 }, { "epoch": 2.4620017206767995, "grad_norm": 0.26670923829078674, "learning_rate": 9.481588468340997e-07, "loss": 0.2833, "step": 17170 }, { "epoch": 2.462145110410095, "grad_norm": 0.26866811513900757, "learning_rate": 9.476701027476776e-07, "loss": 0.2671, "step": 17171 }, { "epoch": 2.46228850014339, "grad_norm": 0.28530704975128174, "learning_rate": 9.471814714718846e-07, "loss": 0.275, "step": 17172 }, { "epoch": 2.462431889876685, "grad_norm": 0.2932378053665161, "learning_rate": 9.466929530203256e-07, "loss": 0.2667, "step": 17173 }, { "epoch": 2.46257527960998, "grad_norm": 0.26470640301704407, "learning_rate": 9.462045474065996e-07, "loss": 0.2545, "step": 17174 }, { "epoch": 2.462718669343275, "grad_norm": 0.277990460395813, "learning_rate": 9.457162546443027e-07, "loss": 0.2956, "step": 17175 }, { "epoch": 2.46286205907657, "grad_norm": 0.2661646902561188, "learning_rate": 9.452280747470288e-07, "loss": 0.282, "step": 17176 }, { "epoch": 2.463005448809865, "grad_norm": 0.27411946654319763, "learning_rate": 9.447400077283691e-07, "loss": 0.2846, "step": 17177 }, { "epoch": 2.46314883854316, "grad_norm": 0.28414690494537354, "learning_rate": 9.442520536019084e-07, "loss": 0.2843, "step": 17178 }, { "epoch": 2.4632922282764556, "grad_norm": 0.2826526463031769, "learning_rate": 9.437642123812313e-07, "loss": 0.2782, "step": 17179 }, { "epoch": 2.4634356180097505, "grad_norm": 0.2822800278663635, "learning_rate": 9.432764840799191e-07, "loss": 0.2915, "step": 17180 }, { "epoch": 2.4635790077430455, "grad_norm": 0.26234036684036255, "learning_rate": 9.427888687115488e-07, "loss": 0.2973, "step": 17181 }, { "epoch": 2.4637223974763405, "grad_norm": 0.26692596077919006, "learning_rate": 9.423013662896957e-07, "loss": 0.2606, "step": 17182 }, { "epoch": 2.463865787209636, "grad_norm": 0.269027978181839, "learning_rate": 9.418139768279306e-07, "loss": 0.2715, "step": 17183 }, { "epoch": 2.464009176942931, "grad_norm": 0.25893425941467285, "learning_rate": 9.413267003398219e-07, "loss": 0.2826, "step": 17184 }, { "epoch": 2.464152566676226, "grad_norm": 0.26474496722221375, "learning_rate": 9.408395368389339e-07, "loss": 0.2817, "step": 17185 }, { "epoch": 2.4642959564095213, "grad_norm": 0.2656356990337372, "learning_rate": 9.403524863388291e-07, "loss": 0.2752, "step": 17186 }, { "epoch": 2.4644393461428162, "grad_norm": 0.25861167907714844, "learning_rate": 9.398655488530678e-07, "loss": 0.273, "step": 17187 }, { "epoch": 2.464582735876111, "grad_norm": 0.2664620578289032, "learning_rate": 9.393787243952023e-07, "loss": 0.2742, "step": 17188 }, { "epoch": 2.464726125609406, "grad_norm": 0.26902347803115845, "learning_rate": 9.388920129787871e-07, "loss": 0.287, "step": 17189 }, { "epoch": 2.4648695153427016, "grad_norm": 0.2619074881076813, "learning_rate": 9.38405414617371e-07, "loss": 0.2824, "step": 17190 }, { "epoch": 2.4650129050759966, "grad_norm": 0.2798500061035156, "learning_rate": 9.379189293245e-07, "loss": 0.28, "step": 17191 }, { "epoch": 2.4651562948092915, "grad_norm": 0.2732082009315491, "learning_rate": 9.374325571137177e-07, "loss": 0.2876, "step": 17192 }, { "epoch": 2.465299684542587, "grad_norm": 0.2754988968372345, "learning_rate": 9.369462979985649e-07, "loss": 0.2676, "step": 17193 }, { "epoch": 2.465443074275882, "grad_norm": 0.2756264805793762, "learning_rate": 9.364601519925754e-07, "loss": 0.2928, "step": 17194 }, { "epoch": 2.465586464009177, "grad_norm": 0.273945152759552, "learning_rate": 9.359741191092847e-07, "loss": 0.2858, "step": 17195 }, { "epoch": 2.465729853742472, "grad_norm": 0.2740696668624878, "learning_rate": 9.354881993622228e-07, "loss": 0.2837, "step": 17196 }, { "epoch": 2.4658732434757673, "grad_norm": 0.27159929275512695, "learning_rate": 9.350023927649171e-07, "loss": 0.2748, "step": 17197 }, { "epoch": 2.4660166332090623, "grad_norm": 0.2617041766643524, "learning_rate": 9.345166993308913e-07, "loss": 0.2675, "step": 17198 }, { "epoch": 2.4661600229423573, "grad_norm": 0.28332385420799255, "learning_rate": 9.340311190736668e-07, "loss": 0.2851, "step": 17199 }, { "epoch": 2.4663034126756522, "grad_norm": 0.2817283868789673, "learning_rate": 9.335456520067615e-07, "loss": 0.2738, "step": 17200 }, { "epoch": 2.4664468024089476, "grad_norm": 0.2661806344985962, "learning_rate": 9.330602981436898e-07, "loss": 0.2696, "step": 17201 }, { "epoch": 2.4665901921422426, "grad_norm": 0.270221084356308, "learning_rate": 9.325750574979631e-07, "loss": 0.2855, "step": 17202 }, { "epoch": 2.4667335818755376, "grad_norm": 0.26129770278930664, "learning_rate": 9.320899300830905e-07, "loss": 0.2748, "step": 17203 }, { "epoch": 2.4668769716088326, "grad_norm": 0.2660316526889801, "learning_rate": 9.316049159125772e-07, "loss": 0.2728, "step": 17204 }, { "epoch": 2.467020361342128, "grad_norm": 0.2528309226036072, "learning_rate": 9.311200149999239e-07, "loss": 0.27, "step": 17205 }, { "epoch": 2.467163751075423, "grad_norm": 0.28730979561805725, "learning_rate": 9.3063522735863e-07, "loss": 0.2775, "step": 17206 }, { "epoch": 2.467307140808718, "grad_norm": 0.26349106431007385, "learning_rate": 9.301505530021915e-07, "loss": 0.2804, "step": 17207 }, { "epoch": 2.4674505305420134, "grad_norm": 0.2599713206291199, "learning_rate": 9.296659919441015e-07, "loss": 0.2631, "step": 17208 }, { "epoch": 2.4675939202753083, "grad_norm": 0.261260449886322, "learning_rate": 9.291815441978496e-07, "loss": 0.2826, "step": 17209 }, { "epoch": 2.4677373100086033, "grad_norm": 0.25528451800346375, "learning_rate": 9.286972097769204e-07, "loss": 0.2787, "step": 17210 }, { "epoch": 2.4678806997418983, "grad_norm": 0.2762739658355713, "learning_rate": 9.282129886947977e-07, "loss": 0.2857, "step": 17211 }, { "epoch": 2.4680240894751937, "grad_norm": 0.2805118262767792, "learning_rate": 9.277288809649626e-07, "loss": 0.2754, "step": 17212 }, { "epoch": 2.4681674792084887, "grad_norm": 0.26373907923698425, "learning_rate": 9.272448866008904e-07, "loss": 0.2973, "step": 17213 }, { "epoch": 2.4683108689417836, "grad_norm": 0.2712341547012329, "learning_rate": 9.26761005616057e-07, "loss": 0.2809, "step": 17214 }, { "epoch": 2.468454258675079, "grad_norm": 0.27594316005706787, "learning_rate": 9.262772380239299e-07, "loss": 0.2769, "step": 17215 }, { "epoch": 2.468597648408374, "grad_norm": 0.2697714865207672, "learning_rate": 9.257935838379772e-07, "loss": 0.2776, "step": 17216 }, { "epoch": 2.468741038141669, "grad_norm": 0.27821144461631775, "learning_rate": 9.253100430716643e-07, "loss": 0.2872, "step": 17217 }, { "epoch": 2.468884427874964, "grad_norm": 0.2640848457813263, "learning_rate": 9.248266157384528e-07, "loss": 0.282, "step": 17218 }, { "epoch": 2.4690278176082594, "grad_norm": 0.26065754890441895, "learning_rate": 9.243433018517989e-07, "loss": 0.2791, "step": 17219 }, { "epoch": 2.4691712073415544, "grad_norm": 0.2755809426307678, "learning_rate": 9.238601014251591e-07, "loss": 0.2693, "step": 17220 }, { "epoch": 2.4693145970748493, "grad_norm": 0.27242934703826904, "learning_rate": 9.233770144719828e-07, "loss": 0.2746, "step": 17221 }, { "epoch": 2.4694579868081448, "grad_norm": 0.2660197913646698, "learning_rate": 9.228940410057191e-07, "loss": 0.2866, "step": 17222 }, { "epoch": 2.4696013765414397, "grad_norm": 0.2746866047382355, "learning_rate": 9.22411181039814e-07, "loss": 0.2924, "step": 17223 }, { "epoch": 2.4697447662747347, "grad_norm": 0.2754678428173065, "learning_rate": 9.219284345877089e-07, "loss": 0.2793, "step": 17224 }, { "epoch": 2.4698881560080297, "grad_norm": 0.2647949457168579, "learning_rate": 9.214458016628441e-07, "loss": 0.2679, "step": 17225 }, { "epoch": 2.470031545741325, "grad_norm": 0.2588789463043213, "learning_rate": 9.209632822786529e-07, "loss": 0.2752, "step": 17226 }, { "epoch": 2.47017493547462, "grad_norm": 0.2738129794597626, "learning_rate": 9.20480876448569e-07, "loss": 0.2678, "step": 17227 }, { "epoch": 2.470318325207915, "grad_norm": 0.2814016044139862, "learning_rate": 9.19998584186022e-07, "loss": 0.295, "step": 17228 }, { "epoch": 2.47046171494121, "grad_norm": 0.27398964762687683, "learning_rate": 9.195164055044387e-07, "loss": 0.2776, "step": 17229 }, { "epoch": 2.4706051046745054, "grad_norm": 0.2665664553642273, "learning_rate": 9.190343404172414e-07, "loss": 0.2645, "step": 17230 }, { "epoch": 2.4707484944078004, "grad_norm": 0.2595365345478058, "learning_rate": 9.185523889378517e-07, "loss": 0.2615, "step": 17231 }, { "epoch": 2.4708918841410954, "grad_norm": 0.25839659571647644, "learning_rate": 9.180705510796839e-07, "loss": 0.2902, "step": 17232 }, { "epoch": 2.4710352738743904, "grad_norm": 0.2872255742549896, "learning_rate": 9.175888268561528e-07, "loss": 0.2918, "step": 17233 }, { "epoch": 2.471178663607686, "grad_norm": 0.25561192631721497, "learning_rate": 9.171072162806688e-07, "loss": 0.278, "step": 17234 }, { "epoch": 2.4713220533409808, "grad_norm": 0.2758803963661194, "learning_rate": 9.166257193666389e-07, "loss": 0.2755, "step": 17235 }, { "epoch": 2.4714654430742757, "grad_norm": 0.27671289443969727, "learning_rate": 9.16144336127468e-07, "loss": 0.2612, "step": 17236 }, { "epoch": 2.471608832807571, "grad_norm": 0.28927838802337646, "learning_rate": 9.156630665765564e-07, "loss": 0.2914, "step": 17237 }, { "epoch": 2.471752222540866, "grad_norm": 0.29005691409111023, "learning_rate": 9.151819107273019e-07, "loss": 0.2971, "step": 17238 }, { "epoch": 2.471895612274161, "grad_norm": 0.27074816823005676, "learning_rate": 9.147008685930998e-07, "loss": 0.2841, "step": 17239 }, { "epoch": 2.472039002007456, "grad_norm": 0.27270272374153137, "learning_rate": 9.142199401873408e-07, "loss": 0.2883, "step": 17240 }, { "epoch": 2.4721823917407515, "grad_norm": 0.2769874334335327, "learning_rate": 9.13739125523414e-07, "loss": 0.2753, "step": 17241 }, { "epoch": 2.4723257814740465, "grad_norm": 0.264172226190567, "learning_rate": 9.132584246147052e-07, "loss": 0.2782, "step": 17242 }, { "epoch": 2.4724691712073414, "grad_norm": 0.2746313214302063, "learning_rate": 9.127778374745938e-07, "loss": 0.2863, "step": 17243 }, { "epoch": 2.472612560940637, "grad_norm": 0.278982937335968, "learning_rate": 9.1229736411646e-07, "loss": 0.2684, "step": 17244 }, { "epoch": 2.472755950673932, "grad_norm": 0.2607458233833313, "learning_rate": 9.118170045536801e-07, "loss": 0.2934, "step": 17245 }, { "epoch": 2.472899340407227, "grad_norm": 0.29685065150260925, "learning_rate": 9.113367587996253e-07, "loss": 0.2862, "step": 17246 }, { "epoch": 2.473042730140522, "grad_norm": 0.2784724831581116, "learning_rate": 9.108566268676672e-07, "loss": 0.2913, "step": 17247 }, { "epoch": 2.473186119873817, "grad_norm": 0.2712395191192627, "learning_rate": 9.10376608771169e-07, "loss": 0.2871, "step": 17248 }, { "epoch": 2.473329509607112, "grad_norm": 0.2849612534046173, "learning_rate": 9.098967045234952e-07, "loss": 0.2793, "step": 17249 }, { "epoch": 2.473472899340407, "grad_norm": 0.26952412724494934, "learning_rate": 9.094169141380049e-07, "loss": 0.2693, "step": 17250 }, { "epoch": 2.4736162890737026, "grad_norm": 0.28925707936286926, "learning_rate": 9.089372376280558e-07, "loss": 0.2882, "step": 17251 }, { "epoch": 2.4737596788069975, "grad_norm": 0.2672143876552582, "learning_rate": 9.084576750070002e-07, "loss": 0.2767, "step": 17252 }, { "epoch": 2.4739030685402925, "grad_norm": 0.2825127840042114, "learning_rate": 9.079782262881892e-07, "loss": 0.2666, "step": 17253 }, { "epoch": 2.4740464582735875, "grad_norm": 0.26626864075660706, "learning_rate": 9.074988914849698e-07, "loss": 0.285, "step": 17254 }, { "epoch": 2.4741898480068825, "grad_norm": 0.27100518345832825, "learning_rate": 9.070196706106859e-07, "loss": 0.293, "step": 17255 }, { "epoch": 2.474333237740178, "grad_norm": 0.26973098516464233, "learning_rate": 9.065405636786778e-07, "loss": 0.2732, "step": 17256 }, { "epoch": 2.474476627473473, "grad_norm": 0.29809051752090454, "learning_rate": 9.06061570702284e-07, "loss": 0.2864, "step": 17257 }, { "epoch": 2.474620017206768, "grad_norm": 0.2807309329509735, "learning_rate": 9.055826916948391e-07, "loss": 0.2756, "step": 17258 }, { "epoch": 2.4747634069400632, "grad_norm": 0.2687409818172455, "learning_rate": 9.051039266696726e-07, "loss": 0.3048, "step": 17259 }, { "epoch": 2.4749067966733582, "grad_norm": 0.2600811719894409, "learning_rate": 9.046252756401141e-07, "loss": 0.2895, "step": 17260 }, { "epoch": 2.475050186406653, "grad_norm": 0.2778419554233551, "learning_rate": 9.041467386194875e-07, "loss": 0.2837, "step": 17261 }, { "epoch": 2.475193576139948, "grad_norm": 0.2803914546966553, "learning_rate": 9.036683156211157e-07, "loss": 0.2679, "step": 17262 }, { "epoch": 2.4753369658732436, "grad_norm": 0.2679755687713623, "learning_rate": 9.031900066583171e-07, "loss": 0.2778, "step": 17263 }, { "epoch": 2.4754803556065386, "grad_norm": 0.27555927634239197, "learning_rate": 9.027118117444061e-07, "loss": 0.2779, "step": 17264 }, { "epoch": 2.4756237453398335, "grad_norm": 0.24901048839092255, "learning_rate": 9.022337308926949e-07, "loss": 0.2759, "step": 17265 }, { "epoch": 2.475767135073129, "grad_norm": 0.2526859939098358, "learning_rate": 9.017557641164931e-07, "loss": 0.275, "step": 17266 }, { "epoch": 2.475910524806424, "grad_norm": 0.2575969994068146, "learning_rate": 9.012779114291071e-07, "loss": 0.2855, "step": 17267 }, { "epoch": 2.476053914539719, "grad_norm": 0.2782914638519287, "learning_rate": 9.008001728438387e-07, "loss": 0.2918, "step": 17268 }, { "epoch": 2.476197304273014, "grad_norm": 0.28271484375, "learning_rate": 9.003225483739891e-07, "loss": 0.2861, "step": 17269 }, { "epoch": 2.4763406940063093, "grad_norm": 0.26249417662620544, "learning_rate": 8.998450380328516e-07, "loss": 0.2674, "step": 17270 }, { "epoch": 2.4764840837396043, "grad_norm": 0.26416197419166565, "learning_rate": 8.993676418337216e-07, "loss": 0.2727, "step": 17271 }, { "epoch": 2.4766274734728992, "grad_norm": 0.267686128616333, "learning_rate": 8.988903597898868e-07, "loss": 0.2906, "step": 17272 }, { "epoch": 2.4767708632061947, "grad_norm": 0.270698219537735, "learning_rate": 8.984131919146371e-07, "loss": 0.2773, "step": 17273 }, { "epoch": 2.4769142529394896, "grad_norm": 0.2630748748779297, "learning_rate": 8.979361382212559e-07, "loss": 0.2832, "step": 17274 }, { "epoch": 2.4770576426727846, "grad_norm": 0.26817065477371216, "learning_rate": 8.974591987230214e-07, "loss": 0.2815, "step": 17275 }, { "epoch": 2.4772010324060796, "grad_norm": 0.26191240549087524, "learning_rate": 8.969823734332117e-07, "loss": 0.2719, "step": 17276 }, { "epoch": 2.477344422139375, "grad_norm": 0.28291404247283936, "learning_rate": 8.965056623651014e-07, "loss": 0.2662, "step": 17277 }, { "epoch": 2.47748781187267, "grad_norm": 0.2733970582485199, "learning_rate": 8.960290655319609e-07, "loss": 0.2853, "step": 17278 }, { "epoch": 2.477631201605965, "grad_norm": 0.2737099230289459, "learning_rate": 8.955525829470596e-07, "loss": 0.2671, "step": 17279 }, { "epoch": 2.47777459133926, "grad_norm": 0.27834054827690125, "learning_rate": 8.950762146236592e-07, "loss": 0.2933, "step": 17280 }, { "epoch": 2.4779179810725553, "grad_norm": 0.26306572556495667, "learning_rate": 8.945999605750222e-07, "loss": 0.2735, "step": 17281 }, { "epoch": 2.4780613708058503, "grad_norm": 0.27179059386253357, "learning_rate": 8.941238208144071e-07, "loss": 0.2774, "step": 17282 }, { "epoch": 2.4782047605391453, "grad_norm": 0.25861263275146484, "learning_rate": 8.936477953550688e-07, "loss": 0.2604, "step": 17283 }, { "epoch": 2.4783481502724403, "grad_norm": 0.2852877974510193, "learning_rate": 8.931718842102588e-07, "loss": 0.2931, "step": 17284 }, { "epoch": 2.4784915400057357, "grad_norm": 0.28801751136779785, "learning_rate": 8.926960873932278e-07, "loss": 0.2668, "step": 17285 }, { "epoch": 2.4786349297390307, "grad_norm": 0.2541063725948334, "learning_rate": 8.922204049172184e-07, "loss": 0.2759, "step": 17286 }, { "epoch": 2.4787783194723256, "grad_norm": 0.26167890429496765, "learning_rate": 8.917448367954734e-07, "loss": 0.2923, "step": 17287 }, { "epoch": 2.478921709205621, "grad_norm": 0.2812225818634033, "learning_rate": 8.912693830412328e-07, "loss": 0.2813, "step": 17288 }, { "epoch": 2.479065098938916, "grad_norm": 0.2661999762058258, "learning_rate": 8.907940436677321e-07, "loss": 0.2871, "step": 17289 }, { "epoch": 2.479208488672211, "grad_norm": 0.27409178018569946, "learning_rate": 8.903188186882045e-07, "loss": 0.2842, "step": 17290 }, { "epoch": 2.479351878405506, "grad_norm": 0.2922925353050232, "learning_rate": 8.898437081158784e-07, "loss": 0.2916, "step": 17291 }, { "epoch": 2.4794952681388014, "grad_norm": 0.271455317735672, "learning_rate": 8.893687119639815e-07, "loss": 0.2639, "step": 17292 }, { "epoch": 2.4796386578720964, "grad_norm": 0.27188652753829956, "learning_rate": 8.888938302457356e-07, "loss": 0.3058, "step": 17293 }, { "epoch": 2.4797820476053913, "grad_norm": 0.26267093420028687, "learning_rate": 8.884190629743622e-07, "loss": 0.2689, "step": 17294 }, { "epoch": 2.4799254373386868, "grad_norm": 0.28593602776527405, "learning_rate": 8.879444101630769e-07, "loss": 0.2792, "step": 17295 }, { "epoch": 2.4800688270719817, "grad_norm": 0.28289124369621277, "learning_rate": 8.874698718250952e-07, "loss": 0.2648, "step": 17296 }, { "epoch": 2.4802122168052767, "grad_norm": 0.2651815414428711, "learning_rate": 8.869954479736243e-07, "loss": 0.2806, "step": 17297 }, { "epoch": 2.4803556065385717, "grad_norm": 0.2751825153827667, "learning_rate": 8.865211386218731e-07, "loss": 0.2692, "step": 17298 }, { "epoch": 2.480498996271867, "grad_norm": 0.30000850558280945, "learning_rate": 8.86046943783046e-07, "loss": 0.2693, "step": 17299 }, { "epoch": 2.480642386005162, "grad_norm": 0.29068219661712646, "learning_rate": 8.855728634703437e-07, "loss": 0.2848, "step": 17300 }, { "epoch": 2.480785775738457, "grad_norm": 0.274423211812973, "learning_rate": 8.850988976969649e-07, "loss": 0.2931, "step": 17301 }, { "epoch": 2.4809291654717525, "grad_norm": 0.2630007863044739, "learning_rate": 8.846250464761014e-07, "loss": 0.2862, "step": 17302 }, { "epoch": 2.4810725552050474, "grad_norm": 0.25987252593040466, "learning_rate": 8.841513098209458e-07, "loss": 0.2596, "step": 17303 }, { "epoch": 2.4812159449383424, "grad_norm": 0.2747786045074463, "learning_rate": 8.836776877446868e-07, "loss": 0.2912, "step": 17304 }, { "epoch": 2.4813593346716374, "grad_norm": 0.26829829812049866, "learning_rate": 8.832041802605085e-07, "loss": 0.2869, "step": 17305 }, { "epoch": 2.4815027244049324, "grad_norm": 0.27233514189720154, "learning_rate": 8.827307873815932e-07, "loss": 0.2768, "step": 17306 }, { "epoch": 2.4816461141382278, "grad_norm": 0.2874075770378113, "learning_rate": 8.822575091211194e-07, "loss": 0.279, "step": 17307 }, { "epoch": 2.4817895038715228, "grad_norm": 0.26810869574546814, "learning_rate": 8.81784345492262e-07, "loss": 0.2851, "step": 17308 }, { "epoch": 2.4819328936048177, "grad_norm": 0.260720431804657, "learning_rate": 8.813112965081932e-07, "loss": 0.2825, "step": 17309 }, { "epoch": 2.482076283338113, "grad_norm": 0.27661606669425964, "learning_rate": 8.808383621820827e-07, "loss": 0.2766, "step": 17310 }, { "epoch": 2.482219673071408, "grad_norm": 0.27015405893325806, "learning_rate": 8.803655425270957e-07, "loss": 0.2832, "step": 17311 }, { "epoch": 2.482363062804703, "grad_norm": 0.2676735520362854, "learning_rate": 8.798928375563959e-07, "loss": 0.2705, "step": 17312 }, { "epoch": 2.482506452537998, "grad_norm": 0.2781173884868622, "learning_rate": 8.794202472831403e-07, "loss": 0.3038, "step": 17313 }, { "epoch": 2.4826498422712935, "grad_norm": 0.27205991744995117, "learning_rate": 8.789477717204864e-07, "loss": 0.2718, "step": 17314 }, { "epoch": 2.4827932320045885, "grad_norm": 0.27965837717056274, "learning_rate": 8.784754108815874e-07, "loss": 0.2848, "step": 17315 }, { "epoch": 2.4829366217378834, "grad_norm": 0.2693590819835663, "learning_rate": 8.780031647795922e-07, "loss": 0.2825, "step": 17316 }, { "epoch": 2.483080011471179, "grad_norm": 0.27073827385902405, "learning_rate": 8.775310334276499e-07, "loss": 0.2787, "step": 17317 }, { "epoch": 2.483223401204474, "grad_norm": 0.27933695912361145, "learning_rate": 8.770590168389009e-07, "loss": 0.2686, "step": 17318 }, { "epoch": 2.483366790937769, "grad_norm": 0.27322331070899963, "learning_rate": 8.765871150264866e-07, "loss": 0.2964, "step": 17319 }, { "epoch": 2.4835101806710638, "grad_norm": 0.28484052419662476, "learning_rate": 8.761153280035433e-07, "loss": 0.2989, "step": 17320 }, { "epoch": 2.483653570404359, "grad_norm": 0.2705839276313782, "learning_rate": 8.756436557832066e-07, "loss": 0.2827, "step": 17321 }, { "epoch": 2.483796960137654, "grad_norm": 0.28093594312667847, "learning_rate": 8.751720983786055e-07, "loss": 0.3107, "step": 17322 }, { "epoch": 2.483940349870949, "grad_norm": 0.2806980311870575, "learning_rate": 8.747006558028692e-07, "loss": 0.2891, "step": 17323 }, { "epoch": 2.4840837396042446, "grad_norm": 0.27940621972084045, "learning_rate": 8.742293280691194e-07, "loss": 0.2945, "step": 17324 }, { "epoch": 2.4842271293375395, "grad_norm": 0.2644280791282654, "learning_rate": 8.73758115190479e-07, "loss": 0.2691, "step": 17325 }, { "epoch": 2.4843705190708345, "grad_norm": 0.25141048431396484, "learning_rate": 8.73287017180065e-07, "loss": 0.2733, "step": 17326 }, { "epoch": 2.4845139088041295, "grad_norm": 0.26210817694664, "learning_rate": 8.728160340509922e-07, "loss": 0.2753, "step": 17327 }, { "epoch": 2.484657298537425, "grad_norm": 0.2921537160873413, "learning_rate": 8.723451658163723e-07, "loss": 0.2901, "step": 17328 }, { "epoch": 2.48480068827072, "grad_norm": 0.2542608976364136, "learning_rate": 8.718744124893136e-07, "loss": 0.2884, "step": 17329 }, { "epoch": 2.484944078004015, "grad_norm": 0.2700762450695038, "learning_rate": 8.714037740829212e-07, "loss": 0.2834, "step": 17330 }, { "epoch": 2.48508746773731, "grad_norm": 0.2762964963912964, "learning_rate": 8.709332506102964e-07, "loss": 0.3025, "step": 17331 }, { "epoch": 2.4852308574706052, "grad_norm": 0.26964691281318665, "learning_rate": 8.704628420845385e-07, "loss": 0.2682, "step": 17332 }, { "epoch": 2.4853742472039, "grad_norm": 0.2762758731842041, "learning_rate": 8.699925485187433e-07, "loss": 0.2707, "step": 17333 }, { "epoch": 2.485517636937195, "grad_norm": 0.26588135957717896, "learning_rate": 8.69522369926002e-07, "loss": 0.2965, "step": 17334 }, { "epoch": 2.48566102667049, "grad_norm": 0.28124526143074036, "learning_rate": 8.690523063194034e-07, "loss": 0.2726, "step": 17335 }, { "epoch": 2.4858044164037856, "grad_norm": 0.27869531512260437, "learning_rate": 8.685823577120345e-07, "loss": 0.2839, "step": 17336 }, { "epoch": 2.4859478061370806, "grad_norm": 0.29054009914398193, "learning_rate": 8.681125241169768e-07, "loss": 0.2954, "step": 17337 }, { "epoch": 2.4860911958703755, "grad_norm": 0.2960636019706726, "learning_rate": 8.676428055473108e-07, "loss": 0.2718, "step": 17338 }, { "epoch": 2.486234585603671, "grad_norm": 0.2610247731208801, "learning_rate": 8.671732020161139e-07, "loss": 0.2911, "step": 17339 }, { "epoch": 2.486377975336966, "grad_norm": 0.2653716504573822, "learning_rate": 8.667037135364558e-07, "loss": 0.2832, "step": 17340 }, { "epoch": 2.486521365070261, "grad_norm": 0.2675389051437378, "learning_rate": 8.662343401214085e-07, "loss": 0.2691, "step": 17341 }, { "epoch": 2.486664754803556, "grad_norm": 0.26194655895233154, "learning_rate": 8.657650817840379e-07, "loss": 0.2816, "step": 17342 }, { "epoch": 2.4868081445368513, "grad_norm": 0.26377418637275696, "learning_rate": 8.652959385374077e-07, "loss": 0.2873, "step": 17343 }, { "epoch": 2.4869515342701463, "grad_norm": 0.29905959963798523, "learning_rate": 8.648269103945789e-07, "loss": 0.2749, "step": 17344 }, { "epoch": 2.4870949240034412, "grad_norm": 0.281450092792511, "learning_rate": 8.643579973686073e-07, "loss": 0.2924, "step": 17345 }, { "epoch": 2.4872383137367367, "grad_norm": 0.2767638862133026, "learning_rate": 8.638891994725474e-07, "loss": 0.2727, "step": 17346 }, { "epoch": 2.4873817034700316, "grad_norm": 0.27872180938720703, "learning_rate": 8.634205167194493e-07, "loss": 0.2885, "step": 17347 }, { "epoch": 2.4875250932033266, "grad_norm": 0.26208657026290894, "learning_rate": 8.629519491223609e-07, "loss": 0.2911, "step": 17348 }, { "epoch": 2.4876684829366216, "grad_norm": 0.26794329285621643, "learning_rate": 8.624834966943268e-07, "loss": 0.2688, "step": 17349 }, { "epoch": 2.487811872669917, "grad_norm": 0.2604277431964874, "learning_rate": 8.620151594483883e-07, "loss": 0.2831, "step": 17350 }, { "epoch": 2.487955262403212, "grad_norm": 0.2777998149394989, "learning_rate": 8.61546937397581e-07, "loss": 0.2844, "step": 17351 }, { "epoch": 2.488098652136507, "grad_norm": 0.27479079365730286, "learning_rate": 8.610788305549406e-07, "loss": 0.3008, "step": 17352 }, { "epoch": 2.4882420418698024, "grad_norm": 0.2771228551864624, "learning_rate": 8.606108389334994e-07, "loss": 0.3145, "step": 17353 }, { "epoch": 2.4883854316030973, "grad_norm": 0.26168638467788696, "learning_rate": 8.601429625462843e-07, "loss": 0.2855, "step": 17354 }, { "epoch": 2.4885288213363923, "grad_norm": 0.2699836492538452, "learning_rate": 8.596752014063226e-07, "loss": 0.2785, "step": 17355 }, { "epoch": 2.4886722110696873, "grad_norm": 0.2643275856971741, "learning_rate": 8.592075555266327e-07, "loss": 0.2725, "step": 17356 }, { "epoch": 2.4888156008029827, "grad_norm": 0.27686065435409546, "learning_rate": 8.587400249202343e-07, "loss": 0.2754, "step": 17357 }, { "epoch": 2.4889589905362777, "grad_norm": 0.26511090993881226, "learning_rate": 8.582726096001437e-07, "loss": 0.281, "step": 17358 }, { "epoch": 2.4891023802695726, "grad_norm": 0.2491900473833084, "learning_rate": 8.578053095793726e-07, "loss": 0.2677, "step": 17359 }, { "epoch": 2.4892457700028676, "grad_norm": 0.26305457949638367, "learning_rate": 8.573381248709305e-07, "loss": 0.2815, "step": 17360 }, { "epoch": 2.489389159736163, "grad_norm": 0.2678532302379608, "learning_rate": 8.568710554878207e-07, "loss": 0.279, "step": 17361 }, { "epoch": 2.489532549469458, "grad_norm": 0.27280670404434204, "learning_rate": 8.56404101443048e-07, "loss": 0.2703, "step": 17362 }, { "epoch": 2.489675939202753, "grad_norm": 0.26051297783851624, "learning_rate": 8.55937262749611e-07, "loss": 0.291, "step": 17363 }, { "epoch": 2.489819328936048, "grad_norm": 0.28373292088508606, "learning_rate": 8.55470539420506e-07, "loss": 0.2929, "step": 17364 }, { "epoch": 2.4899627186693434, "grad_norm": 0.2667713463306427, "learning_rate": 8.550039314687258e-07, "loss": 0.2731, "step": 17365 }, { "epoch": 2.4901061084026384, "grad_norm": 0.2748473584651947, "learning_rate": 8.545374389072613e-07, "loss": 0.2724, "step": 17366 }, { "epoch": 2.4902494981359333, "grad_norm": 0.27180948853492737, "learning_rate": 8.540710617490961e-07, "loss": 0.29, "step": 17367 }, { "epoch": 2.4903928878692287, "grad_norm": 0.28347307443618774, "learning_rate": 8.53604800007215e-07, "loss": 0.2957, "step": 17368 }, { "epoch": 2.4905362776025237, "grad_norm": 0.2823455035686493, "learning_rate": 8.531386536945979e-07, "loss": 0.2767, "step": 17369 }, { "epoch": 2.4906796673358187, "grad_norm": 0.25109341740608215, "learning_rate": 8.526726228242216e-07, "loss": 0.2861, "step": 17370 }, { "epoch": 2.4908230570691137, "grad_norm": 0.2764136791229248, "learning_rate": 8.522067074090612e-07, "loss": 0.3001, "step": 17371 }, { "epoch": 2.490966446802409, "grad_norm": 0.25383010506629944, "learning_rate": 8.517409074620842e-07, "loss": 0.2832, "step": 17372 }, { "epoch": 2.491109836535704, "grad_norm": 0.2685762345790863, "learning_rate": 8.51275222996259e-07, "loss": 0.2785, "step": 17373 }, { "epoch": 2.491253226268999, "grad_norm": 0.2677266001701355, "learning_rate": 8.508096540245497e-07, "loss": 0.2841, "step": 17374 }, { "epoch": 2.4913966160022945, "grad_norm": 0.2728860080242157, "learning_rate": 8.503442005599166e-07, "loss": 0.2684, "step": 17375 }, { "epoch": 2.4915400057355894, "grad_norm": 0.26795634627342224, "learning_rate": 8.498788626153182e-07, "loss": 0.2841, "step": 17376 }, { "epoch": 2.4916833954688844, "grad_norm": 0.2924855053424835, "learning_rate": 8.49413640203709e-07, "loss": 0.2996, "step": 17377 }, { "epoch": 2.4918267852021794, "grad_norm": 0.25970879197120667, "learning_rate": 8.489485333380382e-07, "loss": 0.2712, "step": 17378 }, { "epoch": 2.491970174935475, "grad_norm": 0.28060194849967957, "learning_rate": 8.484835420312548e-07, "loss": 0.2686, "step": 17379 }, { "epoch": 2.4921135646687698, "grad_norm": 0.26204976439476013, "learning_rate": 8.480186662963031e-07, "loss": 0.2624, "step": 17380 }, { "epoch": 2.4922569544020647, "grad_norm": 0.28172242641448975, "learning_rate": 8.475539061461252e-07, "loss": 0.2949, "step": 17381 }, { "epoch": 2.49240034413536, "grad_norm": 0.2740919589996338, "learning_rate": 8.470892615936588e-07, "loss": 0.2829, "step": 17382 }, { "epoch": 2.492543733868655, "grad_norm": 0.28044241666793823, "learning_rate": 8.466247326518384e-07, "loss": 0.2818, "step": 17383 }, { "epoch": 2.49268712360195, "grad_norm": 0.282574862241745, "learning_rate": 8.461603193335971e-07, "loss": 0.2824, "step": 17384 }, { "epoch": 2.492830513335245, "grad_norm": 0.26195603609085083, "learning_rate": 8.456960216518622e-07, "loss": 0.2936, "step": 17385 }, { "epoch": 2.49297390306854, "grad_norm": 0.262132465839386, "learning_rate": 8.452318396195597e-07, "loss": 0.3123, "step": 17386 }, { "epoch": 2.4931172928018355, "grad_norm": 0.2827681303024292, "learning_rate": 8.447677732496129e-07, "loss": 0.2768, "step": 17387 }, { "epoch": 2.4932606825351304, "grad_norm": 0.2879921495914459, "learning_rate": 8.443038225549383e-07, "loss": 0.2838, "step": 17388 }, { "epoch": 2.4934040722684254, "grad_norm": 0.2514417767524719, "learning_rate": 8.438399875484521e-07, "loss": 0.2763, "step": 17389 }, { "epoch": 2.493547462001721, "grad_norm": 0.26341572403907776, "learning_rate": 8.433762682430674e-07, "loss": 0.289, "step": 17390 }, { "epoch": 2.493690851735016, "grad_norm": 0.26911258697509766, "learning_rate": 8.429126646516933e-07, "loss": 0.285, "step": 17391 }, { "epoch": 2.493834241468311, "grad_norm": 0.2615707516670227, "learning_rate": 8.424491767872362e-07, "loss": 0.2713, "step": 17392 }, { "epoch": 2.4939776312016058, "grad_norm": 0.2645258903503418, "learning_rate": 8.419858046625995e-07, "loss": 0.2611, "step": 17393 }, { "epoch": 2.494121020934901, "grad_norm": 0.2845172882080078, "learning_rate": 8.415225482906803e-07, "loss": 0.2888, "step": 17394 }, { "epoch": 2.494264410668196, "grad_norm": 0.2511334717273712, "learning_rate": 8.41059407684377e-07, "loss": 0.2845, "step": 17395 }, { "epoch": 2.494407800401491, "grad_norm": 0.2907858192920685, "learning_rate": 8.405963828565816e-07, "loss": 0.2941, "step": 17396 }, { "epoch": 2.4945511901347865, "grad_norm": 0.2619715631008148, "learning_rate": 8.40133473820185e-07, "loss": 0.262, "step": 17397 }, { "epoch": 2.4946945798680815, "grad_norm": 0.26032334566116333, "learning_rate": 8.396706805880728e-07, "loss": 0.2862, "step": 17398 }, { "epoch": 2.4948379696013765, "grad_norm": 0.28064993023872375, "learning_rate": 8.392080031731298e-07, "loss": 0.2831, "step": 17399 }, { "epoch": 2.4949813593346715, "grad_norm": 0.26587092876434326, "learning_rate": 8.387454415882345e-07, "loss": 0.2776, "step": 17400 }, { "epoch": 2.495124749067967, "grad_norm": 0.2687458097934723, "learning_rate": 8.382829958462657e-07, "loss": 0.2812, "step": 17401 }, { "epoch": 2.495268138801262, "grad_norm": 0.2742874026298523, "learning_rate": 8.378206659600962e-07, "loss": 0.2794, "step": 17402 }, { "epoch": 2.495411528534557, "grad_norm": 0.25751447677612305, "learning_rate": 8.37358451942597e-07, "loss": 0.3049, "step": 17403 }, { "epoch": 2.4955549182678523, "grad_norm": 0.2872285544872284, "learning_rate": 8.36896353806636e-07, "loss": 0.2808, "step": 17404 }, { "epoch": 2.4956983080011472, "grad_norm": 0.2623577117919922, "learning_rate": 8.364343715650753e-07, "loss": 0.2759, "step": 17405 }, { "epoch": 2.495841697734442, "grad_norm": 0.25605666637420654, "learning_rate": 8.359725052307771e-07, "loss": 0.284, "step": 17406 }, { "epoch": 2.495985087467737, "grad_norm": 0.2792198956012726, "learning_rate": 8.355107548165986e-07, "loss": 0.266, "step": 17407 }, { "epoch": 2.4961284772010326, "grad_norm": 0.29304060339927673, "learning_rate": 8.350491203353944e-07, "loss": 0.2758, "step": 17408 }, { "epoch": 2.4962718669343276, "grad_norm": 0.2629040479660034, "learning_rate": 8.345876018000176e-07, "loss": 0.2914, "step": 17409 }, { "epoch": 2.4964152566676225, "grad_norm": 0.286602258682251, "learning_rate": 8.341261992233123e-07, "loss": 0.2707, "step": 17410 }, { "epoch": 2.4965586464009175, "grad_norm": 0.27024945616722107, "learning_rate": 8.336649126181263e-07, "loss": 0.3017, "step": 17411 }, { "epoch": 2.496702036134213, "grad_norm": 0.2562786340713501, "learning_rate": 8.332037419972994e-07, "loss": 0.2886, "step": 17412 }, { "epoch": 2.496845425867508, "grad_norm": 0.27313414216041565, "learning_rate": 8.327426873736705e-07, "loss": 0.2875, "step": 17413 }, { "epoch": 2.496988815600803, "grad_norm": 0.2823520600795746, "learning_rate": 8.322817487600765e-07, "loss": 0.2972, "step": 17414 }, { "epoch": 2.497132205334098, "grad_norm": 0.2718965411186218, "learning_rate": 8.31820926169346e-07, "loss": 0.3067, "step": 17415 }, { "epoch": 2.4972755950673933, "grad_norm": 0.2765142619609833, "learning_rate": 8.31360219614309e-07, "loss": 0.2723, "step": 17416 }, { "epoch": 2.4974189848006882, "grad_norm": 0.2718026340007782, "learning_rate": 8.308996291077898e-07, "loss": 0.2696, "step": 17417 }, { "epoch": 2.497562374533983, "grad_norm": 0.2663307189941406, "learning_rate": 8.304391546626129e-07, "loss": 0.2846, "step": 17418 }, { "epoch": 2.4977057642672786, "grad_norm": 0.2832356095314026, "learning_rate": 8.299787962915956e-07, "loss": 0.2862, "step": 17419 }, { "epoch": 2.4978491540005736, "grad_norm": 0.26383399963378906, "learning_rate": 8.295185540075557e-07, "loss": 0.2664, "step": 17420 }, { "epoch": 2.4979925437338686, "grad_norm": 0.28936150670051575, "learning_rate": 8.290584278233022e-07, "loss": 0.2859, "step": 17421 }, { "epoch": 2.4981359334671636, "grad_norm": 0.25042280554771423, "learning_rate": 8.28598417751646e-07, "loss": 0.2652, "step": 17422 }, { "epoch": 2.498279323200459, "grad_norm": 0.3089815378189087, "learning_rate": 8.28138523805393e-07, "loss": 0.2809, "step": 17423 }, { "epoch": 2.498422712933754, "grad_norm": 0.2559347152709961, "learning_rate": 8.276787459973463e-07, "loss": 0.2733, "step": 17424 }, { "epoch": 2.498566102667049, "grad_norm": 0.27265533804893494, "learning_rate": 8.27219084340306e-07, "loss": 0.2704, "step": 17425 }, { "epoch": 2.4987094924003443, "grad_norm": 0.2606692314147949, "learning_rate": 8.267595388470661e-07, "loss": 0.2793, "step": 17426 }, { "epoch": 2.4988528821336393, "grad_norm": 0.27334529161453247, "learning_rate": 8.263001095304212e-07, "loss": 0.2936, "step": 17427 }, { "epoch": 2.4989962718669343, "grad_norm": 0.2964775860309601, "learning_rate": 8.258407964031612e-07, "loss": 0.2714, "step": 17428 }, { "epoch": 2.4991396616002293, "grad_norm": 0.26553604006767273, "learning_rate": 8.253815994780718e-07, "loss": 0.2766, "step": 17429 }, { "epoch": 2.4992830513335247, "grad_norm": 0.25200551748275757, "learning_rate": 8.249225187679372e-07, "loss": 0.2938, "step": 17430 }, { "epoch": 2.4994264410668197, "grad_norm": 0.27801764011383057, "learning_rate": 8.244635542855383e-07, "loss": 0.282, "step": 17431 }, { "epoch": 2.4995698308001146, "grad_norm": 0.26206347346305847, "learning_rate": 8.240047060436501e-07, "loss": 0.3024, "step": 17432 }, { "epoch": 2.49971322053341, "grad_norm": 0.2818707823753357, "learning_rate": 8.235459740550467e-07, "loss": 0.2755, "step": 17433 }, { "epoch": 2.499856610266705, "grad_norm": 0.29132938385009766, "learning_rate": 8.230873583324983e-07, "loss": 0.2833, "step": 17434 }, { "epoch": 2.5, "grad_norm": 0.2518065273761749, "learning_rate": 8.226288588887732e-07, "loss": 0.2808, "step": 17435 }, { "epoch": 2.500143389733295, "grad_norm": 0.2758678197860718, "learning_rate": 8.221704757366345e-07, "loss": 0.2738, "step": 17436 }, { "epoch": 2.50028677946659, "grad_norm": 0.261810302734375, "learning_rate": 8.21712208888843e-07, "loss": 0.29, "step": 17437 }, { "epoch": 2.5004301691998854, "grad_norm": 0.2753077745437622, "learning_rate": 8.212540583581563e-07, "loss": 0.2832, "step": 17438 }, { "epoch": 2.5005735589331803, "grad_norm": 0.28200069069862366, "learning_rate": 8.207960241573281e-07, "loss": 0.2811, "step": 17439 }, { "epoch": 2.5007169486664753, "grad_norm": 0.27445468306541443, "learning_rate": 8.203381062991101e-07, "loss": 0.2955, "step": 17440 }, { "epoch": 2.5008603383997707, "grad_norm": 0.2710411548614502, "learning_rate": 8.198803047962511e-07, "loss": 0.2919, "step": 17441 }, { "epoch": 2.5010037281330657, "grad_norm": 0.26685354113578796, "learning_rate": 8.19422619661493e-07, "loss": 0.2897, "step": 17442 }, { "epoch": 2.5011471178663607, "grad_norm": 0.27305251359939575, "learning_rate": 8.189650509075775e-07, "loss": 0.3034, "step": 17443 }, { "epoch": 2.5012905075996557, "grad_norm": 0.2772948145866394, "learning_rate": 8.185075985472441e-07, "loss": 0.2938, "step": 17444 }, { "epoch": 2.501433897332951, "grad_norm": 0.28030019998550415, "learning_rate": 8.180502625932262e-07, "loss": 0.2966, "step": 17445 }, { "epoch": 2.501577287066246, "grad_norm": 0.28173601627349854, "learning_rate": 8.175930430582568e-07, "loss": 0.2841, "step": 17446 }, { "epoch": 2.501720676799541, "grad_norm": 0.2805814743041992, "learning_rate": 8.171359399550638e-07, "loss": 0.2804, "step": 17447 }, { "epoch": 2.5018640665328364, "grad_norm": 0.2716620862483978, "learning_rate": 8.166789532963709e-07, "loss": 0.2783, "step": 17448 }, { "epoch": 2.5020074562661314, "grad_norm": 0.28457584977149963, "learning_rate": 8.162220830949008e-07, "loss": 0.285, "step": 17449 }, { "epoch": 2.5021508459994264, "grad_norm": 0.2674603760242462, "learning_rate": 8.157653293633716e-07, "loss": 0.2588, "step": 17450 }, { "epoch": 2.5022942357327214, "grad_norm": 0.2843928337097168, "learning_rate": 8.153086921144998e-07, "loss": 0.2977, "step": 17451 }, { "epoch": 2.502437625466017, "grad_norm": 0.2744307219982147, "learning_rate": 8.148521713609964e-07, "loss": 0.2813, "step": 17452 }, { "epoch": 2.5025810151993118, "grad_norm": 0.2604086399078369, "learning_rate": 8.143957671155705e-07, "loss": 0.2926, "step": 17453 }, { "epoch": 2.5027244049326067, "grad_norm": 0.26622986793518066, "learning_rate": 8.139394793909277e-07, "loss": 0.2802, "step": 17454 }, { "epoch": 2.502867794665902, "grad_norm": 0.27261218428611755, "learning_rate": 8.134833081997706e-07, "loss": 0.2743, "step": 17455 }, { "epoch": 2.503011184399197, "grad_norm": 0.2667188048362732, "learning_rate": 8.130272535547979e-07, "loss": 0.2829, "step": 17456 }, { "epoch": 2.503154574132492, "grad_norm": 0.28301361203193665, "learning_rate": 8.12571315468706e-07, "loss": 0.2709, "step": 17457 }, { "epoch": 2.503297963865787, "grad_norm": 0.2655820846557617, "learning_rate": 8.121154939541881e-07, "loss": 0.2846, "step": 17458 }, { "epoch": 2.503441353599082, "grad_norm": 0.27186480164527893, "learning_rate": 8.11659789023932e-07, "loss": 0.2856, "step": 17459 }, { "epoch": 2.5035847433323775, "grad_norm": 0.2754139304161072, "learning_rate": 8.11204200690624e-07, "loss": 0.3062, "step": 17460 }, { "epoch": 2.5037281330656724, "grad_norm": 0.2743273973464966, "learning_rate": 8.107487289669475e-07, "loss": 0.2687, "step": 17461 }, { "epoch": 2.503871522798968, "grad_norm": 0.28376197814941406, "learning_rate": 8.102933738655821e-07, "loss": 0.29, "step": 17462 }, { "epoch": 2.504014912532263, "grad_norm": 0.2783510684967041, "learning_rate": 8.098381353992058e-07, "loss": 0.2815, "step": 17463 }, { "epoch": 2.504158302265558, "grad_norm": 0.2735987901687622, "learning_rate": 8.093830135804887e-07, "loss": 0.2741, "step": 17464 }, { "epoch": 2.5043016919988528, "grad_norm": 0.2696879208087921, "learning_rate": 8.089280084221019e-07, "loss": 0.2599, "step": 17465 }, { "epoch": 2.5044450817321477, "grad_norm": 0.2734542787075043, "learning_rate": 8.084731199367124e-07, "loss": 0.2865, "step": 17466 }, { "epoch": 2.504588471465443, "grad_norm": 0.27109137177467346, "learning_rate": 8.080183481369835e-07, "loss": 0.2701, "step": 17467 }, { "epoch": 2.504731861198738, "grad_norm": 0.2794357240200043, "learning_rate": 8.075636930355768e-07, "loss": 0.2889, "step": 17468 }, { "epoch": 2.504875250932033, "grad_norm": 0.2769365608692169, "learning_rate": 8.07109154645146e-07, "loss": 0.2715, "step": 17469 }, { "epoch": 2.5050186406653285, "grad_norm": 0.2694598436355591, "learning_rate": 8.066547329783469e-07, "loss": 0.2532, "step": 17470 }, { "epoch": 2.5051620303986235, "grad_norm": 0.2736717462539673, "learning_rate": 8.062004280478292e-07, "loss": 0.2879, "step": 17471 }, { "epoch": 2.5053054201319185, "grad_norm": 0.269206166267395, "learning_rate": 8.057462398662403e-07, "loss": 0.2745, "step": 17472 }, { "epoch": 2.5054488098652135, "grad_norm": 0.26467883586883545, "learning_rate": 8.052921684462234e-07, "loss": 0.2803, "step": 17473 }, { "epoch": 2.505592199598509, "grad_norm": 0.2719871699810028, "learning_rate": 8.048382138004218e-07, "loss": 0.2721, "step": 17474 }, { "epoch": 2.505735589331804, "grad_norm": 0.25610411167144775, "learning_rate": 8.043843759414693e-07, "loss": 0.2623, "step": 17475 }, { "epoch": 2.505878979065099, "grad_norm": 0.28566253185272217, "learning_rate": 8.039306548820025e-07, "loss": 0.28, "step": 17476 }, { "epoch": 2.5060223687983942, "grad_norm": 0.291952908039093, "learning_rate": 8.034770506346512e-07, "loss": 0.2825, "step": 17477 }, { "epoch": 2.506165758531689, "grad_norm": 0.28312230110168457, "learning_rate": 8.030235632120437e-07, "loss": 0.2797, "step": 17478 }, { "epoch": 2.506309148264984, "grad_norm": 0.2579645812511444, "learning_rate": 8.025701926268048e-07, "loss": 0.2893, "step": 17479 }, { "epoch": 2.506452537998279, "grad_norm": 0.26487696170806885, "learning_rate": 8.021169388915534e-07, "loss": 0.2789, "step": 17480 }, { "epoch": 2.5065959277315746, "grad_norm": 0.2827228903770447, "learning_rate": 8.016638020189093e-07, "loss": 0.2925, "step": 17481 }, { "epoch": 2.5067393174648696, "grad_norm": 0.2779321074485779, "learning_rate": 8.012107820214871e-07, "loss": 0.2653, "step": 17482 }, { "epoch": 2.5068827071981645, "grad_norm": 0.2873271703720093, "learning_rate": 8.007578789118969e-07, "loss": 0.2821, "step": 17483 }, { "epoch": 2.50702609693146, "grad_norm": 0.2673683166503906, "learning_rate": 8.003050927027483e-07, "loss": 0.2787, "step": 17484 }, { "epoch": 2.507169486664755, "grad_norm": 0.27165576815605164, "learning_rate": 7.998524234066468e-07, "loss": 0.2575, "step": 17485 }, { "epoch": 2.50731287639805, "grad_norm": 0.2642611861228943, "learning_rate": 7.99399871036191e-07, "loss": 0.2893, "step": 17486 }, { "epoch": 2.507456266131345, "grad_norm": 0.270065039396286, "learning_rate": 7.989474356039817e-07, "loss": 0.2961, "step": 17487 }, { "epoch": 2.50759965586464, "grad_norm": 0.2715264558792114, "learning_rate": 7.984951171226129e-07, "loss": 0.291, "step": 17488 }, { "epoch": 2.5077430455979353, "grad_norm": 0.2615927755832672, "learning_rate": 7.980429156046776e-07, "loss": 0.2673, "step": 17489 }, { "epoch": 2.5078864353312302, "grad_norm": 0.2747010588645935, "learning_rate": 7.975908310627628e-07, "loss": 0.2797, "step": 17490 }, { "epoch": 2.5080298250645257, "grad_norm": 0.28658032417297363, "learning_rate": 7.971388635094557e-07, "loss": 0.2834, "step": 17491 }, { "epoch": 2.5081732147978206, "grad_norm": 0.27294284105300903, "learning_rate": 7.966870129573368e-07, "loss": 0.277, "step": 17492 }, { "epoch": 2.5083166045311156, "grad_norm": 0.29422450065612793, "learning_rate": 7.962352794189853e-07, "loss": 0.2757, "step": 17493 }, { "epoch": 2.5084599942644106, "grad_norm": 0.26129254698753357, "learning_rate": 7.957836629069776e-07, "loss": 0.2822, "step": 17494 }, { "epoch": 2.5086033839977055, "grad_norm": 0.2735610902309418, "learning_rate": 7.953321634338867e-07, "loss": 0.2806, "step": 17495 }, { "epoch": 2.508746773731001, "grad_norm": 0.268062949180603, "learning_rate": 7.948807810122789e-07, "loss": 0.2865, "step": 17496 }, { "epoch": 2.508890163464296, "grad_norm": 0.2814280390739441, "learning_rate": 7.944295156547216e-07, "loss": 0.2972, "step": 17497 }, { "epoch": 2.509033553197591, "grad_norm": 0.26057860255241394, "learning_rate": 7.939783673737766e-07, "loss": 0.3037, "step": 17498 }, { "epoch": 2.5091769429308863, "grad_norm": 0.2910690903663635, "learning_rate": 7.935273361820045e-07, "loss": 0.2738, "step": 17499 }, { "epoch": 2.5093203326641813, "grad_norm": 0.28572824597358704, "learning_rate": 7.930764220919601e-07, "loss": 0.3043, "step": 17500 }, { "epoch": 2.5094637223974763, "grad_norm": 0.2549434304237366, "learning_rate": 7.926256251161985e-07, "loss": 0.2733, "step": 17501 }, { "epoch": 2.5096071121307713, "grad_norm": 0.2746901512145996, "learning_rate": 7.921749452672655e-07, "loss": 0.2893, "step": 17502 }, { "epoch": 2.5097505018640667, "grad_norm": 0.2751208245754242, "learning_rate": 7.917243825577098e-07, "loss": 0.2672, "step": 17503 }, { "epoch": 2.5098938915973616, "grad_norm": 0.28541937470436096, "learning_rate": 7.912739370000732e-07, "loss": 0.269, "step": 17504 }, { "epoch": 2.5100372813306566, "grad_norm": 0.2618429362773895, "learning_rate": 7.90823608606896e-07, "loss": 0.2691, "step": 17505 }, { "epoch": 2.510180671063952, "grad_norm": 0.26823094487190247, "learning_rate": 7.903733973907146e-07, "loss": 0.2883, "step": 17506 }, { "epoch": 2.510324060797247, "grad_norm": 0.28038397431373596, "learning_rate": 7.899233033640624e-07, "loss": 0.2732, "step": 17507 }, { "epoch": 2.510467450530542, "grad_norm": 0.3093322813510895, "learning_rate": 7.894733265394694e-07, "loss": 0.2863, "step": 17508 }, { "epoch": 2.510610840263837, "grad_norm": 0.3047889471054077, "learning_rate": 7.890234669294616e-07, "loss": 0.2872, "step": 17509 }, { "epoch": 2.510754229997132, "grad_norm": 0.25929561257362366, "learning_rate": 7.885737245465624e-07, "loss": 0.2674, "step": 17510 }, { "epoch": 2.5108976197304274, "grad_norm": 0.26191142201423645, "learning_rate": 7.881240994032929e-07, "loss": 0.2952, "step": 17511 }, { "epoch": 2.5110410094637223, "grad_norm": 0.28308239579200745, "learning_rate": 7.876745915121709e-07, "loss": 0.2762, "step": 17512 }, { "epoch": 2.5111843991970177, "grad_norm": 0.27960145473480225, "learning_rate": 7.872252008857068e-07, "loss": 0.2927, "step": 17513 }, { "epoch": 2.5113277889303127, "grad_norm": 0.2709474265575409, "learning_rate": 7.867759275364123e-07, "loss": 0.28, "step": 17514 }, { "epoch": 2.5114711786636077, "grad_norm": 0.27674657106399536, "learning_rate": 7.863267714767953e-07, "loss": 0.2913, "step": 17515 }, { "epoch": 2.5116145683969027, "grad_norm": 0.30221691727638245, "learning_rate": 7.85877732719359e-07, "loss": 0.2848, "step": 17516 }, { "epoch": 2.5117579581301976, "grad_norm": 0.252855509519577, "learning_rate": 7.854288112766056e-07, "loss": 0.2635, "step": 17517 }, { "epoch": 2.511901347863493, "grad_norm": 0.2720628082752228, "learning_rate": 7.849800071610292e-07, "loss": 0.2762, "step": 17518 }, { "epoch": 2.512044737596788, "grad_norm": 0.2693127393722534, "learning_rate": 7.845313203851257e-07, "loss": 0.2754, "step": 17519 }, { "epoch": 2.512188127330083, "grad_norm": 0.27273616194725037, "learning_rate": 7.840827509613857e-07, "loss": 0.2802, "step": 17520 }, { "epoch": 2.5123315170633784, "grad_norm": 0.27566564083099365, "learning_rate": 7.836342989022966e-07, "loss": 0.2843, "step": 17521 }, { "epoch": 2.5124749067966734, "grad_norm": 0.2738046944141388, "learning_rate": 7.831859642203437e-07, "loss": 0.294, "step": 17522 }, { "epoch": 2.5126182965299684, "grad_norm": 0.27376222610473633, "learning_rate": 7.827377469280057e-07, "loss": 0.2968, "step": 17523 }, { "epoch": 2.5127616862632633, "grad_norm": 0.2625519335269928, "learning_rate": 7.822896470377616e-07, "loss": 0.2862, "step": 17524 }, { "epoch": 2.5129050759965588, "grad_norm": 0.2846762537956238, "learning_rate": 7.818416645620858e-07, "loss": 0.2943, "step": 17525 }, { "epoch": 2.5130484657298537, "grad_norm": 0.28363871574401855, "learning_rate": 7.81393799513449e-07, "loss": 0.2867, "step": 17526 }, { "epoch": 2.5131918554631487, "grad_norm": 0.27929988503456116, "learning_rate": 7.809460519043194e-07, "loss": 0.293, "step": 17527 }, { "epoch": 2.513335245196444, "grad_norm": 0.2748061716556549, "learning_rate": 7.804984217471617e-07, "loss": 0.2753, "step": 17528 }, { "epoch": 2.513478634929739, "grad_norm": 0.2682221829891205, "learning_rate": 7.800509090544373e-07, "loss": 0.2763, "step": 17529 }, { "epoch": 2.513622024663034, "grad_norm": 0.2926463484764099, "learning_rate": 7.796035138386038e-07, "loss": 0.2884, "step": 17530 }, { "epoch": 2.513765414396329, "grad_norm": 0.26483508944511414, "learning_rate": 7.791562361121163e-07, "loss": 0.2687, "step": 17531 }, { "epoch": 2.5139088041296245, "grad_norm": 0.2654360830783844, "learning_rate": 7.787090758874266e-07, "loss": 0.2707, "step": 17532 }, { "epoch": 2.5140521938629194, "grad_norm": 0.2785012125968933, "learning_rate": 7.782620331769836e-07, "loss": 0.2675, "step": 17533 }, { "epoch": 2.5141955835962144, "grad_norm": 0.2859848737716675, "learning_rate": 7.7781510799323e-07, "loss": 0.265, "step": 17534 }, { "epoch": 2.51433897332951, "grad_norm": 0.2747311592102051, "learning_rate": 7.773683003486093e-07, "loss": 0.262, "step": 17535 }, { "epoch": 2.514482363062805, "grad_norm": 0.26224133372306824, "learning_rate": 7.769216102555593e-07, "loss": 0.2527, "step": 17536 }, { "epoch": 2.5146257527961, "grad_norm": 0.27512288093566895, "learning_rate": 7.764750377265152e-07, "loss": 0.2909, "step": 17537 }, { "epoch": 2.5147691425293948, "grad_norm": 0.2645678222179413, "learning_rate": 7.760285827739094e-07, "loss": 0.2888, "step": 17538 }, { "epoch": 2.5149125322626897, "grad_norm": 0.28179067373275757, "learning_rate": 7.755822454101708e-07, "loss": 0.2948, "step": 17539 }, { "epoch": 2.515055921995985, "grad_norm": 0.27857306599617004, "learning_rate": 7.751360256477236e-07, "loss": 0.2863, "step": 17540 }, { "epoch": 2.51519931172928, "grad_norm": 0.24886173009872437, "learning_rate": 7.746899234989902e-07, "loss": 0.2683, "step": 17541 }, { "epoch": 2.5153427014625755, "grad_norm": 0.2677164673805237, "learning_rate": 7.742439389763889e-07, "loss": 0.2905, "step": 17542 }, { "epoch": 2.5154860911958705, "grad_norm": 0.2693333625793457, "learning_rate": 7.737980720923365e-07, "loss": 0.2679, "step": 17543 }, { "epoch": 2.5156294809291655, "grad_norm": 0.2740071415901184, "learning_rate": 7.733523228592449e-07, "loss": 0.2904, "step": 17544 }, { "epoch": 2.5157728706624605, "grad_norm": 0.2821419835090637, "learning_rate": 7.729066912895222e-07, "loss": 0.3003, "step": 17545 }, { "epoch": 2.5159162603957554, "grad_norm": 0.26600635051727295, "learning_rate": 7.724611773955754e-07, "loss": 0.2601, "step": 17546 }, { "epoch": 2.516059650129051, "grad_norm": 0.2829003632068634, "learning_rate": 7.72015781189806e-07, "loss": 0.2831, "step": 17547 }, { "epoch": 2.516203039862346, "grad_norm": 0.27706730365753174, "learning_rate": 7.715705026846132e-07, "loss": 0.2926, "step": 17548 }, { "epoch": 2.516346429595641, "grad_norm": 0.27309471368789673, "learning_rate": 7.711253418923941e-07, "loss": 0.282, "step": 17549 }, { "epoch": 2.5164898193289362, "grad_norm": 0.28687024116516113, "learning_rate": 7.706802988255396e-07, "loss": 0.2761, "step": 17550 }, { "epoch": 2.516633209062231, "grad_norm": 0.2962646782398224, "learning_rate": 7.702353734964396e-07, "loss": 0.2947, "step": 17551 }, { "epoch": 2.516776598795526, "grad_norm": 0.2564546465873718, "learning_rate": 7.697905659174798e-07, "loss": 0.2689, "step": 17552 }, { "epoch": 2.516919988528821, "grad_norm": 0.2710265815258026, "learning_rate": 7.693458761010436e-07, "loss": 0.2699, "step": 17553 }, { "epoch": 2.5170633782621166, "grad_norm": 0.2583041787147522, "learning_rate": 7.6890130405951e-07, "loss": 0.2682, "step": 17554 }, { "epoch": 2.5172067679954115, "grad_norm": 0.2984446883201599, "learning_rate": 7.68456849805257e-07, "loss": 0.2717, "step": 17555 }, { "epoch": 2.5173501577287065, "grad_norm": 0.28082963824272156, "learning_rate": 7.680125133506545e-07, "loss": 0.2796, "step": 17556 }, { "epoch": 2.517493547462002, "grad_norm": 0.27601099014282227, "learning_rate": 7.675682947080737e-07, "loss": 0.2892, "step": 17557 }, { "epoch": 2.517636937195297, "grad_norm": 0.2763773500919342, "learning_rate": 7.67124193889881e-07, "loss": 0.2774, "step": 17558 }, { "epoch": 2.517780326928592, "grad_norm": 0.26478642225265503, "learning_rate": 7.666802109084392e-07, "loss": 0.2867, "step": 17559 }, { "epoch": 2.517923716661887, "grad_norm": 0.26571688055992126, "learning_rate": 7.662363457761096e-07, "loss": 0.2619, "step": 17560 }, { "epoch": 2.518067106395182, "grad_norm": 0.26616108417510986, "learning_rate": 7.657925985052461e-07, "loss": 0.2817, "step": 17561 }, { "epoch": 2.5182104961284772, "grad_norm": 0.25006264448165894, "learning_rate": 7.653489691082034e-07, "loss": 0.2608, "step": 17562 }, { "epoch": 2.5183538858617722, "grad_norm": 0.27612823247909546, "learning_rate": 7.649054575973297e-07, "loss": 0.2877, "step": 17563 }, { "epoch": 2.5184972755950676, "grad_norm": 0.2601013481616974, "learning_rate": 7.644620639849748e-07, "loss": 0.2869, "step": 17564 }, { "epoch": 2.5186406653283626, "grad_norm": 0.2797134816646576, "learning_rate": 7.640187882834804e-07, "loss": 0.2879, "step": 17565 }, { "epoch": 2.5187840550616576, "grad_norm": 0.2696768343448639, "learning_rate": 7.635756305051878e-07, "loss": 0.2714, "step": 17566 }, { "epoch": 2.5189274447949526, "grad_norm": 0.26089930534362793, "learning_rate": 7.631325906624321e-07, "loss": 0.2952, "step": 17567 }, { "epoch": 2.5190708345282475, "grad_norm": 0.27214866876602173, "learning_rate": 7.626896687675473e-07, "loss": 0.2907, "step": 17568 }, { "epoch": 2.519214224261543, "grad_norm": 0.2540636360645294, "learning_rate": 7.62246864832864e-07, "loss": 0.2832, "step": 17569 }, { "epoch": 2.519357613994838, "grad_norm": 0.2763226628303528, "learning_rate": 7.618041788707087e-07, "loss": 0.292, "step": 17570 }, { "epoch": 2.519501003728133, "grad_norm": 0.25746962428092957, "learning_rate": 7.613616108934068e-07, "loss": 0.2821, "step": 17571 }, { "epoch": 2.5196443934614283, "grad_norm": 0.2821323573589325, "learning_rate": 7.609191609132766e-07, "loss": 0.29, "step": 17572 }, { "epoch": 2.5197877831947233, "grad_norm": 0.26556989550590515, "learning_rate": 7.604768289426356e-07, "loss": 0.2827, "step": 17573 }, { "epoch": 2.5199311729280183, "grad_norm": 0.2584940195083618, "learning_rate": 7.600346149937987e-07, "loss": 0.2901, "step": 17574 }, { "epoch": 2.5200745626613132, "grad_norm": 0.27522894740104675, "learning_rate": 7.595925190790754e-07, "loss": 0.282, "step": 17575 }, { "epoch": 2.5202179523946087, "grad_norm": 0.2618634104728699, "learning_rate": 7.591505412107746e-07, "loss": 0.2736, "step": 17576 }, { "epoch": 2.5203613421279036, "grad_norm": 0.27011021971702576, "learning_rate": 7.587086814011979e-07, "loss": 0.2564, "step": 17577 }, { "epoch": 2.5205047318611986, "grad_norm": 0.2664108872413635, "learning_rate": 7.582669396626468e-07, "loss": 0.2814, "step": 17578 }, { "epoch": 2.520648121594494, "grad_norm": 0.273393452167511, "learning_rate": 7.578253160074195e-07, "loss": 0.2871, "step": 17579 }, { "epoch": 2.520791511327789, "grad_norm": 0.2686905264854431, "learning_rate": 7.573838104478099e-07, "loss": 0.2818, "step": 17580 }, { "epoch": 2.520934901061084, "grad_norm": 0.25887587666511536, "learning_rate": 7.569424229961087e-07, "loss": 0.2775, "step": 17581 }, { "epoch": 2.521078290794379, "grad_norm": 0.29825422167778015, "learning_rate": 7.565011536646027e-07, "loss": 0.2827, "step": 17582 }, { "epoch": 2.5212216805276744, "grad_norm": 0.29534047842025757, "learning_rate": 7.560600024655773e-07, "loss": 0.2691, "step": 17583 }, { "epoch": 2.5213650702609693, "grad_norm": 0.2578994929790497, "learning_rate": 7.556189694113131e-07, "loss": 0.2774, "step": 17584 }, { "epoch": 2.5215084599942643, "grad_norm": 0.26517152786254883, "learning_rate": 7.55178054514088e-07, "loss": 0.2902, "step": 17585 }, { "epoch": 2.5216518497275597, "grad_norm": 0.26651909947395325, "learning_rate": 7.547372577861755e-07, "loss": 0.274, "step": 17586 }, { "epoch": 2.5217952394608547, "grad_norm": 0.27511975169181824, "learning_rate": 7.542965792398488e-07, "loss": 0.2902, "step": 17587 }, { "epoch": 2.5219386291941497, "grad_norm": 0.27921050786972046, "learning_rate": 7.538560188873728e-07, "loss": 0.2981, "step": 17588 }, { "epoch": 2.5220820189274447, "grad_norm": 0.2602830231189728, "learning_rate": 7.53415576741014e-07, "loss": 0.2688, "step": 17589 }, { "epoch": 2.5222254086607396, "grad_norm": 0.27828022837638855, "learning_rate": 7.529752528130325e-07, "loss": 0.2592, "step": 17590 }, { "epoch": 2.522368798394035, "grad_norm": 0.26063796877861023, "learning_rate": 7.525350471156867e-07, "loss": 0.2839, "step": 17591 }, { "epoch": 2.52251218812733, "grad_norm": 0.2677718698978424, "learning_rate": 7.52094959661232e-07, "loss": 0.2847, "step": 17592 }, { "epoch": 2.5226555778606254, "grad_norm": 0.26933300495147705, "learning_rate": 7.516549904619197e-07, "loss": 0.2723, "step": 17593 }, { "epoch": 2.5227989675939204, "grad_norm": 0.2779543101787567, "learning_rate": 7.512151395299966e-07, "loss": 0.2923, "step": 17594 }, { "epoch": 2.5229423573272154, "grad_norm": 0.2727167308330536, "learning_rate": 7.507754068777079e-07, "loss": 0.2752, "step": 17595 }, { "epoch": 2.5230857470605104, "grad_norm": 0.2937084138393402, "learning_rate": 7.503357925172955e-07, "loss": 0.2765, "step": 17596 }, { "epoch": 2.5232291367938053, "grad_norm": 0.27643170952796936, "learning_rate": 7.498962964609969e-07, "loss": 0.2677, "step": 17597 }, { "epoch": 2.5233725265271008, "grad_norm": 0.25698530673980713, "learning_rate": 7.494569187210482e-07, "loss": 0.2789, "step": 17598 }, { "epoch": 2.5235159162603957, "grad_norm": 0.28652265667915344, "learning_rate": 7.490176593096798e-07, "loss": 0.2694, "step": 17599 }, { "epoch": 2.5236593059936907, "grad_norm": 0.26339849829673767, "learning_rate": 7.485785182391209e-07, "loss": 0.2655, "step": 17600 }, { "epoch": 2.523802695726986, "grad_norm": 0.27624252438545227, "learning_rate": 7.481394955215959e-07, "loss": 0.2942, "step": 17601 }, { "epoch": 2.523946085460281, "grad_norm": 0.2505016624927521, "learning_rate": 7.477005911693269e-07, "loss": 0.2765, "step": 17602 }, { "epoch": 2.524089475193576, "grad_norm": 0.26526501774787903, "learning_rate": 7.472618051945335e-07, "loss": 0.2809, "step": 17603 }, { "epoch": 2.524232864926871, "grad_norm": 0.25366610288619995, "learning_rate": 7.468231376094282e-07, "loss": 0.2769, "step": 17604 }, { "epoch": 2.5243762546601665, "grad_norm": 0.26827797293663025, "learning_rate": 7.463845884262239e-07, "loss": 0.278, "step": 17605 }, { "epoch": 2.5245196443934614, "grad_norm": 0.26555827260017395, "learning_rate": 7.459461576571297e-07, "loss": 0.2754, "step": 17606 }, { "epoch": 2.5246630341267564, "grad_norm": 0.2685638666152954, "learning_rate": 7.4550784531435e-07, "loss": 0.2972, "step": 17607 }, { "epoch": 2.524806423860052, "grad_norm": 0.27846401929855347, "learning_rate": 7.45069651410088e-07, "loss": 0.2659, "step": 17608 }, { "epoch": 2.524949813593347, "grad_norm": 0.2763945758342743, "learning_rate": 7.44631575956542e-07, "loss": 0.2669, "step": 17609 }, { "epoch": 2.5250932033266418, "grad_norm": 0.27519023418426514, "learning_rate": 7.441936189659065e-07, "loss": 0.2831, "step": 17610 }, { "epoch": 2.5252365930599368, "grad_norm": 0.2599234879016876, "learning_rate": 7.437557804503736e-07, "loss": 0.2638, "step": 17611 }, { "epoch": 2.525379982793232, "grad_norm": 0.2822144627571106, "learning_rate": 7.433180604221324e-07, "loss": 0.2904, "step": 17612 }, { "epoch": 2.525523372526527, "grad_norm": 0.2711162865161896, "learning_rate": 7.428804588933686e-07, "loss": 0.3011, "step": 17613 }, { "epoch": 2.525666762259822, "grad_norm": 0.26833170652389526, "learning_rate": 7.424429758762652e-07, "loss": 0.28, "step": 17614 }, { "epoch": 2.5258101519931175, "grad_norm": 0.2838515639305115, "learning_rate": 7.420056113829987e-07, "loss": 0.2757, "step": 17615 }, { "epoch": 2.5259535417264125, "grad_norm": 0.2825128138065338, "learning_rate": 7.41568365425746e-07, "loss": 0.2737, "step": 17616 }, { "epoch": 2.5260969314597075, "grad_norm": 0.2589660882949829, "learning_rate": 7.411312380166797e-07, "loss": 0.2848, "step": 17617 }, { "epoch": 2.5262403211930025, "grad_norm": 0.27390098571777344, "learning_rate": 7.406942291679681e-07, "loss": 0.2591, "step": 17618 }, { "epoch": 2.5263837109262974, "grad_norm": 0.28788504004478455, "learning_rate": 7.402573388917755e-07, "loss": 0.2958, "step": 17619 }, { "epoch": 2.526527100659593, "grad_norm": 0.27572017908096313, "learning_rate": 7.398205672002689e-07, "loss": 0.2812, "step": 17620 }, { "epoch": 2.526670490392888, "grad_norm": 0.264274001121521, "learning_rate": 7.393839141056025e-07, "loss": 0.2535, "step": 17621 }, { "epoch": 2.5268138801261832, "grad_norm": 0.2686313986778259, "learning_rate": 7.389473796199348e-07, "loss": 0.2703, "step": 17622 }, { "epoch": 2.526957269859478, "grad_norm": 0.2720356285572052, "learning_rate": 7.385109637554167e-07, "loss": 0.2794, "step": 17623 }, { "epoch": 2.527100659592773, "grad_norm": 0.2773328423500061, "learning_rate": 7.380746665241978e-07, "loss": 0.283, "step": 17624 }, { "epoch": 2.527244049326068, "grad_norm": 0.2664979100227356, "learning_rate": 7.376384879384258e-07, "loss": 0.2721, "step": 17625 }, { "epoch": 2.527387439059363, "grad_norm": 0.2853381931781769, "learning_rate": 7.372024280102397e-07, "loss": 0.3038, "step": 17626 }, { "epoch": 2.5275308287926586, "grad_norm": 0.28425300121307373, "learning_rate": 7.36766486751781e-07, "loss": 0.2629, "step": 17627 }, { "epoch": 2.5276742185259535, "grad_norm": 0.27862927317619324, "learning_rate": 7.363306641751855e-07, "loss": 0.3019, "step": 17628 }, { "epoch": 2.5278176082592485, "grad_norm": 0.25641193985939026, "learning_rate": 7.358949602925852e-07, "loss": 0.2633, "step": 17629 }, { "epoch": 2.527960997992544, "grad_norm": 0.28807806968688965, "learning_rate": 7.35459375116111e-07, "loss": 0.2699, "step": 17630 }, { "epoch": 2.528104387725839, "grad_norm": 0.2822771668434143, "learning_rate": 7.350239086578864e-07, "loss": 0.2868, "step": 17631 }, { "epoch": 2.528247777459134, "grad_norm": 0.2741137444972992, "learning_rate": 7.345885609300358e-07, "loss": 0.2865, "step": 17632 }, { "epoch": 2.528391167192429, "grad_norm": 0.27740564942359924, "learning_rate": 7.341533319446781e-07, "loss": 0.2962, "step": 17633 }, { "epoch": 2.5285345569257243, "grad_norm": 0.25472986698150635, "learning_rate": 7.337182217139293e-07, "loss": 0.2792, "step": 17634 }, { "epoch": 2.5286779466590192, "grad_norm": 0.24961549043655396, "learning_rate": 7.332832302499026e-07, "loss": 0.2813, "step": 17635 }, { "epoch": 2.528821336392314, "grad_norm": 0.2558457851409912, "learning_rate": 7.328483575647077e-07, "loss": 0.2987, "step": 17636 }, { "epoch": 2.5289647261256096, "grad_norm": 0.2793508768081665, "learning_rate": 7.324136036704498e-07, "loss": 0.2867, "step": 17637 }, { "epoch": 2.5291081158589046, "grad_norm": 0.26757797598838806, "learning_rate": 7.319789685792328e-07, "loss": 0.2889, "step": 17638 }, { "epoch": 2.5292515055921996, "grad_norm": 0.276029109954834, "learning_rate": 7.315444523031556e-07, "loss": 0.2851, "step": 17639 }, { "epoch": 2.5293948953254946, "grad_norm": 0.2764667868614197, "learning_rate": 7.311100548543148e-07, "loss": 0.2745, "step": 17640 }, { "epoch": 2.5295382850587895, "grad_norm": 0.2728434205055237, "learning_rate": 7.306757762448047e-07, "loss": 0.2711, "step": 17641 }, { "epoch": 2.529681674792085, "grad_norm": 0.2543422281742096, "learning_rate": 7.302416164867121e-07, "loss": 0.2789, "step": 17642 }, { "epoch": 2.52982506452538, "grad_norm": 0.25969207286834717, "learning_rate": 7.298075755921247e-07, "loss": 0.2671, "step": 17643 }, { "epoch": 2.5299684542586753, "grad_norm": 0.2730489671230316, "learning_rate": 7.293736535731261e-07, "loss": 0.3048, "step": 17644 }, { "epoch": 2.5301118439919703, "grad_norm": 0.26942452788352966, "learning_rate": 7.28939850441795e-07, "loss": 0.2674, "step": 17645 }, { "epoch": 2.5302552337252653, "grad_norm": 0.2871679961681366, "learning_rate": 7.285061662102083e-07, "loss": 0.2741, "step": 17646 }, { "epoch": 2.5303986234585603, "grad_norm": 0.2698851227760315, "learning_rate": 7.280726008904409e-07, "loss": 0.2947, "step": 17647 }, { "epoch": 2.5305420131918552, "grad_norm": 0.2628771960735321, "learning_rate": 7.276391544945593e-07, "loss": 0.2777, "step": 17648 }, { "epoch": 2.5306854029251507, "grad_norm": 0.2744997441768646, "learning_rate": 7.272058270346316e-07, "loss": 0.2652, "step": 17649 }, { "epoch": 2.5308287926584456, "grad_norm": 0.2782365381717682, "learning_rate": 7.267726185227208e-07, "loss": 0.2749, "step": 17650 }, { "epoch": 2.5309721823917406, "grad_norm": 0.2718490660190582, "learning_rate": 7.263395289708869e-07, "loss": 0.2761, "step": 17651 }, { "epoch": 2.531115572125036, "grad_norm": 0.26214566826820374, "learning_rate": 7.259065583911862e-07, "loss": 0.2839, "step": 17652 }, { "epoch": 2.531258961858331, "grad_norm": 0.2825898826122284, "learning_rate": 7.254737067956725e-07, "loss": 0.2755, "step": 17653 }, { "epoch": 2.531402351591626, "grad_norm": 0.27215203642845154, "learning_rate": 7.250409741963949e-07, "loss": 0.2644, "step": 17654 }, { "epoch": 2.531545741324921, "grad_norm": 0.2585347592830658, "learning_rate": 7.246083606054006e-07, "loss": 0.293, "step": 17655 }, { "epoch": 2.5316891310582164, "grad_norm": 0.2854952812194824, "learning_rate": 7.241758660347331e-07, "loss": 0.3019, "step": 17656 }, { "epoch": 2.5318325207915113, "grad_norm": 0.27024948596954346, "learning_rate": 7.23743490496433e-07, "loss": 0.2699, "step": 17657 }, { "epoch": 2.5319759105248063, "grad_norm": 0.28229856491088867, "learning_rate": 7.233112340025344e-07, "loss": 0.3021, "step": 17658 }, { "epoch": 2.5321193002581017, "grad_norm": 0.2846072018146515, "learning_rate": 7.228790965650728e-07, "loss": 0.2852, "step": 17659 }, { "epoch": 2.5322626899913967, "grad_norm": 0.263678640127182, "learning_rate": 7.224470781960774e-07, "loss": 0.2831, "step": 17660 }, { "epoch": 2.5324060797246917, "grad_norm": 0.26904889941215515, "learning_rate": 7.220151789075758e-07, "loss": 0.2718, "step": 17661 }, { "epoch": 2.5325494694579866, "grad_norm": 0.2600853145122528, "learning_rate": 7.215833987115906e-07, "loss": 0.2734, "step": 17662 }, { "epoch": 2.532692859191282, "grad_norm": 0.27982965111732483, "learning_rate": 7.211517376201427e-07, "loss": 0.2788, "step": 17663 }, { "epoch": 2.532836248924577, "grad_norm": 0.2732338607311249, "learning_rate": 7.207201956452475e-07, "loss": 0.266, "step": 17664 }, { "epoch": 2.532979638657872, "grad_norm": 0.2735196053981781, "learning_rate": 7.202887727989194e-07, "loss": 0.2965, "step": 17665 }, { "epoch": 2.5331230283911674, "grad_norm": 0.27904582023620605, "learning_rate": 7.198574690931687e-07, "loss": 0.295, "step": 17666 }, { "epoch": 2.5332664181244624, "grad_norm": 0.27127358317375183, "learning_rate": 7.194262845400018e-07, "loss": 0.298, "step": 17667 }, { "epoch": 2.5334098078577574, "grad_norm": 0.2914595305919647, "learning_rate": 7.189952191514233e-07, "loss": 0.2969, "step": 17668 }, { "epoch": 2.5335531975910524, "grad_norm": 0.26768943667411804, "learning_rate": 7.185642729394315e-07, "loss": 0.2955, "step": 17669 }, { "epoch": 2.5336965873243473, "grad_norm": 0.26225921511650085, "learning_rate": 7.181334459160245e-07, "loss": 0.2787, "step": 17670 }, { "epoch": 2.5338399770576427, "grad_norm": 0.2724139392375946, "learning_rate": 7.17702738093195e-07, "loss": 0.2771, "step": 17671 }, { "epoch": 2.5339833667909377, "grad_norm": 0.27437543869018555, "learning_rate": 7.172721494829343e-07, "loss": 0.2906, "step": 17672 }, { "epoch": 2.534126756524233, "grad_norm": 0.27057573199272156, "learning_rate": 7.168416800972289e-07, "loss": 0.2774, "step": 17673 }, { "epoch": 2.534270146257528, "grad_norm": 0.2617594301700592, "learning_rate": 7.164113299480624e-07, "loss": 0.2824, "step": 17674 }, { "epoch": 2.534413535990823, "grad_norm": 0.2711386978626251, "learning_rate": 7.159810990474147e-07, "loss": 0.2729, "step": 17675 }, { "epoch": 2.534556925724118, "grad_norm": 0.26653531193733215, "learning_rate": 7.155509874072636e-07, "loss": 0.2844, "step": 17676 }, { "epoch": 2.534700315457413, "grad_norm": 0.26800626516342163, "learning_rate": 7.151209950395821e-07, "loss": 0.287, "step": 17677 }, { "epoch": 2.5348437051907085, "grad_norm": 0.2699291706085205, "learning_rate": 7.146911219563407e-07, "loss": 0.2993, "step": 17678 }, { "epoch": 2.5349870949240034, "grad_norm": 0.25367265939712524, "learning_rate": 7.142613681695076e-07, "loss": 0.2852, "step": 17679 }, { "epoch": 2.5351304846572984, "grad_norm": 0.28679049015045166, "learning_rate": 7.138317336910439e-07, "loss": 0.2784, "step": 17680 }, { "epoch": 2.535273874390594, "grad_norm": 0.2569158971309662, "learning_rate": 7.134022185329121e-07, "loss": 0.2765, "step": 17681 }, { "epoch": 2.535417264123889, "grad_norm": 0.2817922830581665, "learning_rate": 7.129728227070676e-07, "loss": 0.2845, "step": 17682 }, { "epoch": 2.5355606538571838, "grad_norm": 0.27416643500328064, "learning_rate": 7.125435462254654e-07, "loss": 0.2722, "step": 17683 }, { "epoch": 2.5357040435904787, "grad_norm": 0.2735583782196045, "learning_rate": 7.121143891000571e-07, "loss": 0.3032, "step": 17684 }, { "epoch": 2.535847433323774, "grad_norm": 0.29005444049835205, "learning_rate": 7.116853513427868e-07, "loss": 0.26, "step": 17685 }, { "epoch": 2.535990823057069, "grad_norm": 0.2627468705177307, "learning_rate": 7.112564329655991e-07, "loss": 0.2732, "step": 17686 }, { "epoch": 2.536134212790364, "grad_norm": 0.2734581232070923, "learning_rate": 7.10827633980436e-07, "loss": 0.2923, "step": 17687 }, { "epoch": 2.5362776025236595, "grad_norm": 0.2695436477661133, "learning_rate": 7.103989543992329e-07, "loss": 0.2746, "step": 17688 }, { "epoch": 2.5364209922569545, "grad_norm": 0.28682947158813477, "learning_rate": 7.099703942339242e-07, "loss": 0.2841, "step": 17689 }, { "epoch": 2.5365643819902495, "grad_norm": 0.26953765749931335, "learning_rate": 7.095419534964409e-07, "loss": 0.2888, "step": 17690 }, { "epoch": 2.5367077717235444, "grad_norm": 0.28782913088798523, "learning_rate": 7.091136321987091e-07, "loss": 0.2765, "step": 17691 }, { "epoch": 2.5368511614568394, "grad_norm": 0.28666627407073975, "learning_rate": 7.086854303526536e-07, "loss": 0.2986, "step": 17692 }, { "epoch": 2.536994551190135, "grad_norm": 0.2701205611228943, "learning_rate": 7.082573479701943e-07, "loss": 0.2785, "step": 17693 }, { "epoch": 2.53713794092343, "grad_norm": 0.2628941833972931, "learning_rate": 7.078293850632484e-07, "loss": 0.2814, "step": 17694 }, { "epoch": 2.5372813306567252, "grad_norm": 0.2775682210922241, "learning_rate": 7.074015416437307e-07, "loss": 0.2695, "step": 17695 }, { "epoch": 2.53742472039002, "grad_norm": 0.2684594690799713, "learning_rate": 7.069738177235502e-07, "loss": 0.2857, "step": 17696 }, { "epoch": 2.537568110123315, "grad_norm": 0.26028358936309814, "learning_rate": 7.065462133146145e-07, "loss": 0.2779, "step": 17697 }, { "epoch": 2.53771149985661, "grad_norm": 0.25904104113578796, "learning_rate": 7.061187284288279e-07, "loss": 0.288, "step": 17698 }, { "epoch": 2.537854889589905, "grad_norm": 0.2625373303890228, "learning_rate": 7.056913630780904e-07, "loss": 0.2946, "step": 17699 }, { "epoch": 2.5379982793232005, "grad_norm": 0.27516576647758484, "learning_rate": 7.052641172742997e-07, "loss": 0.2982, "step": 17700 }, { "epoch": 2.5381416690564955, "grad_norm": 0.2580111026763916, "learning_rate": 7.048369910293501e-07, "loss": 0.283, "step": 17701 }, { "epoch": 2.5382850587897905, "grad_norm": 0.2677015960216522, "learning_rate": 7.044099843551305e-07, "loss": 0.2845, "step": 17702 }, { "epoch": 2.538428448523086, "grad_norm": 0.2691486179828644, "learning_rate": 7.039830972635293e-07, "loss": 0.2816, "step": 17703 }, { "epoch": 2.538571838256381, "grad_norm": 0.2896246612071991, "learning_rate": 7.035563297664299e-07, "loss": 0.271, "step": 17704 }, { "epoch": 2.538715227989676, "grad_norm": 0.2840336263179779, "learning_rate": 7.031296818757132e-07, "loss": 0.2831, "step": 17705 }, { "epoch": 2.538858617722971, "grad_norm": 0.29094627499580383, "learning_rate": 7.02703153603258e-07, "loss": 0.2851, "step": 17706 }, { "epoch": 2.5390020074562663, "grad_norm": 0.2731197774410248, "learning_rate": 7.022767449609347e-07, "loss": 0.2867, "step": 17707 }, { "epoch": 2.5391453971895612, "grad_norm": 0.2611902356147766, "learning_rate": 7.018504559606148e-07, "loss": 0.2788, "step": 17708 }, { "epoch": 2.539288786922856, "grad_norm": 0.2697961926460266, "learning_rate": 7.014242866141674e-07, "loss": 0.2776, "step": 17709 }, { "epoch": 2.5394321766561516, "grad_norm": 0.29083529114723206, "learning_rate": 7.009982369334556e-07, "loss": 0.2736, "step": 17710 }, { "epoch": 2.5395755663894466, "grad_norm": 0.2784990072250366, "learning_rate": 7.005723069303411e-07, "loss": 0.2855, "step": 17711 }, { "epoch": 2.5397189561227416, "grad_norm": 0.2679513692855835, "learning_rate": 7.001464966166782e-07, "loss": 0.2594, "step": 17712 }, { "epoch": 2.5398623458560365, "grad_norm": 0.2610788345336914, "learning_rate": 6.997208060043225e-07, "loss": 0.2901, "step": 17713 }, { "epoch": 2.540005735589332, "grad_norm": 0.2835608124732971, "learning_rate": 6.992952351051253e-07, "loss": 0.2817, "step": 17714 }, { "epoch": 2.540149125322627, "grad_norm": 0.25198474526405334, "learning_rate": 6.988697839309322e-07, "loss": 0.2836, "step": 17715 }, { "epoch": 2.540292515055922, "grad_norm": 0.2794552743434906, "learning_rate": 6.98444452493588e-07, "loss": 0.281, "step": 17716 }, { "epoch": 2.5404359047892173, "grad_norm": 0.28052735328674316, "learning_rate": 6.980192408049346e-07, "loss": 0.2622, "step": 17717 }, { "epoch": 2.5405792945225123, "grad_norm": 0.25757288932800293, "learning_rate": 6.975941488768062e-07, "loss": 0.2955, "step": 17718 }, { "epoch": 2.5407226842558073, "grad_norm": 0.27709323167800903, "learning_rate": 6.971691767210387e-07, "loss": 0.2662, "step": 17719 }, { "epoch": 2.5408660739891022, "grad_norm": 0.26615390181541443, "learning_rate": 6.967443243494626e-07, "loss": 0.2795, "step": 17720 }, { "epoch": 2.541009463722397, "grad_norm": 0.29621630907058716, "learning_rate": 6.963195917739046e-07, "loss": 0.2794, "step": 17721 }, { "epoch": 2.5411528534556926, "grad_norm": 0.2598537802696228, "learning_rate": 6.958949790061898e-07, "loss": 0.2807, "step": 17722 }, { "epoch": 2.5412962431889876, "grad_norm": 0.2688443660736084, "learning_rate": 6.954704860581368e-07, "loss": 0.2733, "step": 17723 }, { "epoch": 2.541439632922283, "grad_norm": 0.26700401306152344, "learning_rate": 6.950461129415643e-07, "loss": 0.2665, "step": 17724 }, { "epoch": 2.541583022655578, "grad_norm": 0.2753719985485077, "learning_rate": 6.946218596682853e-07, "loss": 0.2852, "step": 17725 }, { "epoch": 2.541726412388873, "grad_norm": 0.2808988094329834, "learning_rate": 6.941977262501109e-07, "loss": 0.2819, "step": 17726 }, { "epoch": 2.541869802122168, "grad_norm": 0.2653898000717163, "learning_rate": 6.937737126988486e-07, "loss": 0.2804, "step": 17727 }, { "epoch": 2.542013191855463, "grad_norm": 0.2666373550891876, "learning_rate": 6.933498190263016e-07, "loss": 0.2768, "step": 17728 }, { "epoch": 2.5421565815887583, "grad_norm": 0.26509687304496765, "learning_rate": 6.929260452442704e-07, "loss": 0.2753, "step": 17729 }, { "epoch": 2.5422999713220533, "grad_norm": 0.2599506378173828, "learning_rate": 6.925023913645528e-07, "loss": 0.2848, "step": 17730 }, { "epoch": 2.5424433610553483, "grad_norm": 0.2545040249824524, "learning_rate": 6.920788573989429e-07, "loss": 0.2852, "step": 17731 }, { "epoch": 2.5425867507886437, "grad_norm": 0.2747679054737091, "learning_rate": 6.916554433592304e-07, "loss": 0.2794, "step": 17732 }, { "epoch": 2.5427301405219387, "grad_norm": 0.28036436438560486, "learning_rate": 6.912321492572044e-07, "loss": 0.2945, "step": 17733 }, { "epoch": 2.5428735302552337, "grad_norm": 0.26548343896865845, "learning_rate": 6.908089751046454e-07, "loss": 0.2869, "step": 17734 }, { "epoch": 2.5430169199885286, "grad_norm": 0.27721524238586426, "learning_rate": 6.903859209133362e-07, "loss": 0.2779, "step": 17735 }, { "epoch": 2.543160309721824, "grad_norm": 0.26392459869384766, "learning_rate": 6.899629866950536e-07, "loss": 0.2758, "step": 17736 }, { "epoch": 2.543303699455119, "grad_norm": 0.2820843756198883, "learning_rate": 6.895401724615714e-07, "loss": 0.2852, "step": 17737 }, { "epoch": 2.543447089188414, "grad_norm": 0.25803667306900024, "learning_rate": 6.891174782246606e-07, "loss": 0.2838, "step": 17738 }, { "epoch": 2.5435904789217094, "grad_norm": 0.2528005540370941, "learning_rate": 6.886949039960878e-07, "loss": 0.2728, "step": 17739 }, { "epoch": 2.5437338686550044, "grad_norm": 0.2710532248020172, "learning_rate": 6.882724497876159e-07, "loss": 0.2937, "step": 17740 }, { "epoch": 2.5438772583882994, "grad_norm": 0.26470717787742615, "learning_rate": 6.878501156110068e-07, "loss": 0.2831, "step": 17741 }, { "epoch": 2.5440206481215943, "grad_norm": 0.2494121789932251, "learning_rate": 6.874279014780167e-07, "loss": 0.2986, "step": 17742 }, { "epoch": 2.5441640378548893, "grad_norm": 0.2764316201210022, "learning_rate": 6.870058074004005e-07, "loss": 0.2828, "step": 17743 }, { "epoch": 2.5443074275881847, "grad_norm": 0.2679421305656433, "learning_rate": 6.865838333899072e-07, "loss": 0.261, "step": 17744 }, { "epoch": 2.5444508173214797, "grad_norm": 0.2768389582633972, "learning_rate": 6.861619794582852e-07, "loss": 0.2848, "step": 17745 }, { "epoch": 2.544594207054775, "grad_norm": 0.26001212000846863, "learning_rate": 6.85740245617278e-07, "loss": 0.2825, "step": 17746 }, { "epoch": 2.54473759678807, "grad_norm": 0.2687617242336273, "learning_rate": 6.853186318786253e-07, "loss": 0.2712, "step": 17747 }, { "epoch": 2.544880986521365, "grad_norm": 0.25834372639656067, "learning_rate": 6.848971382540648e-07, "loss": 0.2948, "step": 17748 }, { "epoch": 2.54502437625466, "grad_norm": 0.2946169972419739, "learning_rate": 6.844757647553313e-07, "loss": 0.2786, "step": 17749 }, { "epoch": 2.545167765987955, "grad_norm": 0.2643238306045532, "learning_rate": 6.840545113941527e-07, "loss": 0.2879, "step": 17750 }, { "epoch": 2.5453111557212504, "grad_norm": 0.27108234167099, "learning_rate": 6.836333781822574e-07, "loss": 0.2939, "step": 17751 }, { "epoch": 2.5454545454545454, "grad_norm": 0.26934725046157837, "learning_rate": 6.832123651313693e-07, "loss": 0.2747, "step": 17752 }, { "epoch": 2.5455979351878404, "grad_norm": 0.2570881247520447, "learning_rate": 6.82791472253208e-07, "loss": 0.2843, "step": 17753 }, { "epoch": 2.545741324921136, "grad_norm": 0.273809015750885, "learning_rate": 6.823706995594914e-07, "loss": 0.2819, "step": 17754 }, { "epoch": 2.545884714654431, "grad_norm": 0.2927882671356201, "learning_rate": 6.819500470619339e-07, "loss": 0.275, "step": 17755 }, { "epoch": 2.5460281043877258, "grad_norm": 0.26462629437446594, "learning_rate": 6.815295147722434e-07, "loss": 0.2828, "step": 17756 }, { "epoch": 2.5461714941210207, "grad_norm": 0.2475006878376007, "learning_rate": 6.811091027021283e-07, "loss": 0.2641, "step": 17757 }, { "epoch": 2.546314883854316, "grad_norm": 0.28514841198921204, "learning_rate": 6.806888108632914e-07, "loss": 0.281, "step": 17758 }, { "epoch": 2.546458273587611, "grad_norm": 0.275630384683609, "learning_rate": 6.802686392674346e-07, "loss": 0.2761, "step": 17759 }, { "epoch": 2.546601663320906, "grad_norm": 0.26119276881217957, "learning_rate": 6.798485879262546e-07, "loss": 0.2826, "step": 17760 }, { "epoch": 2.5467450530542015, "grad_norm": 0.27643364667892456, "learning_rate": 6.79428656851443e-07, "loss": 0.2779, "step": 17761 }, { "epoch": 2.5468884427874965, "grad_norm": 0.274600088596344, "learning_rate": 6.790088460546917e-07, "loss": 0.2942, "step": 17762 }, { "epoch": 2.5470318325207915, "grad_norm": 0.2807158827781677, "learning_rate": 6.78589155547687e-07, "loss": 0.2781, "step": 17763 }, { "epoch": 2.5471752222540864, "grad_norm": 0.27353933453559875, "learning_rate": 6.781695853421117e-07, "loss": 0.2904, "step": 17764 }, { "epoch": 2.547318611987382, "grad_norm": 0.2880569100379944, "learning_rate": 6.777501354496486e-07, "loss": 0.2727, "step": 17765 }, { "epoch": 2.547462001720677, "grad_norm": 0.2655280828475952, "learning_rate": 6.773308058819722e-07, "loss": 0.2783, "step": 17766 }, { "epoch": 2.547605391453972, "grad_norm": 0.28035619854927063, "learning_rate": 6.769115966507566e-07, "loss": 0.2855, "step": 17767 }, { "epoch": 2.547748781187267, "grad_norm": 0.25713515281677246, "learning_rate": 6.76492507767672e-07, "loss": 0.2615, "step": 17768 }, { "epoch": 2.547892170920562, "grad_norm": 0.2834949791431427, "learning_rate": 6.760735392443851e-07, "loss": 0.2784, "step": 17769 }, { "epoch": 2.548035560653857, "grad_norm": 0.2578565776348114, "learning_rate": 6.756546910925593e-07, "loss": 0.2676, "step": 17770 }, { "epoch": 2.548178950387152, "grad_norm": 0.2714434564113617, "learning_rate": 6.752359633238564e-07, "loss": 0.2836, "step": 17771 }, { "epoch": 2.548322340120447, "grad_norm": 0.26795512437820435, "learning_rate": 6.748173559499299e-07, "loss": 0.2898, "step": 17772 }, { "epoch": 2.5484657298537425, "grad_norm": 0.2613663673400879, "learning_rate": 6.743988689824348e-07, "loss": 0.3112, "step": 17773 }, { "epoch": 2.5486091195870375, "grad_norm": 0.28910815715789795, "learning_rate": 6.739805024330215e-07, "loss": 0.2997, "step": 17774 }, { "epoch": 2.548752509320333, "grad_norm": 0.27317848801612854, "learning_rate": 6.735622563133365e-07, "loss": 0.269, "step": 17775 }, { "epoch": 2.548895899053628, "grad_norm": 0.2764770984649658, "learning_rate": 6.731441306350239e-07, "loss": 0.2658, "step": 17776 }, { "epoch": 2.549039288786923, "grad_norm": 0.27169090509414673, "learning_rate": 6.727261254097217e-07, "loss": 0.2828, "step": 17777 }, { "epoch": 2.549182678520218, "grad_norm": 0.2495211958885193, "learning_rate": 6.723082406490672e-07, "loss": 0.261, "step": 17778 }, { "epoch": 2.549326068253513, "grad_norm": 0.26978859305381775, "learning_rate": 6.718904763646944e-07, "loss": 0.2815, "step": 17779 }, { "epoch": 2.5494694579868082, "grad_norm": 0.27660447359085083, "learning_rate": 6.71472832568233e-07, "loss": 0.2954, "step": 17780 }, { "epoch": 2.549612847720103, "grad_norm": 0.2561139762401581, "learning_rate": 6.71055309271309e-07, "loss": 0.2836, "step": 17781 }, { "epoch": 2.549756237453398, "grad_norm": 0.2658979892730713, "learning_rate": 6.706379064855461e-07, "loss": 0.2756, "step": 17782 }, { "epoch": 2.5498996271866936, "grad_norm": 0.274763286113739, "learning_rate": 6.702206242225645e-07, "loss": 0.2766, "step": 17783 }, { "epoch": 2.5500430169199886, "grad_norm": 0.2692829370498657, "learning_rate": 6.698034624939798e-07, "loss": 0.272, "step": 17784 }, { "epoch": 2.5501864066532836, "grad_norm": 0.28751739859580994, "learning_rate": 6.693864213114059e-07, "loss": 0.2676, "step": 17785 }, { "epoch": 2.5503297963865785, "grad_norm": 0.26162898540496826, "learning_rate": 6.689695006864522e-07, "loss": 0.2752, "step": 17786 }, { "epoch": 2.550473186119874, "grad_norm": 0.2854633331298828, "learning_rate": 6.685527006307263e-07, "loss": 0.2906, "step": 17787 }, { "epoch": 2.550616575853169, "grad_norm": 0.2699282765388489, "learning_rate": 6.681360211558291e-07, "loss": 0.282, "step": 17788 }, { "epoch": 2.550759965586464, "grad_norm": 0.2843814194202423, "learning_rate": 6.677194622733613e-07, "loss": 0.2974, "step": 17789 }, { "epoch": 2.5509033553197593, "grad_norm": 0.2825353145599365, "learning_rate": 6.673030239949196e-07, "loss": 0.2844, "step": 17790 }, { "epoch": 2.5510467450530543, "grad_norm": 0.27688124775886536, "learning_rate": 6.668867063320966e-07, "loss": 0.2837, "step": 17791 }, { "epoch": 2.5511901347863493, "grad_norm": 0.2631082236766815, "learning_rate": 6.66470509296483e-07, "loss": 0.274, "step": 17792 }, { "epoch": 2.5513335245196442, "grad_norm": 0.2745191156864166, "learning_rate": 6.660544328996638e-07, "loss": 0.2908, "step": 17793 }, { "epoch": 2.5514769142529397, "grad_norm": 0.28390786051750183, "learning_rate": 6.656384771532215e-07, "loss": 0.2746, "step": 17794 }, { "epoch": 2.5516203039862346, "grad_norm": 0.2537398338317871, "learning_rate": 6.652226420687369e-07, "loss": 0.2726, "step": 17795 }, { "epoch": 2.5517636937195296, "grad_norm": 0.27574169635772705, "learning_rate": 6.648069276577862e-07, "loss": 0.2701, "step": 17796 }, { "epoch": 2.551907083452825, "grad_norm": 0.25007501244544983, "learning_rate": 6.643913339319413e-07, "loss": 0.2678, "step": 17797 }, { "epoch": 2.55205047318612, "grad_norm": 0.28395551443099976, "learning_rate": 6.639758609027725e-07, "loss": 0.2812, "step": 17798 }, { "epoch": 2.552193862919415, "grad_norm": 0.27112120389938354, "learning_rate": 6.635605085818459e-07, "loss": 0.2789, "step": 17799 }, { "epoch": 2.55233725265271, "grad_norm": 0.28162682056427, "learning_rate": 6.631452769807234e-07, "loss": 0.2677, "step": 17800 }, { "epoch": 2.552480642386005, "grad_norm": 0.27277588844299316, "learning_rate": 6.627301661109653e-07, "loss": 0.2651, "step": 17801 }, { "epoch": 2.5526240321193003, "grad_norm": 0.26812881231307983, "learning_rate": 6.623151759841279e-07, "loss": 0.2819, "step": 17802 }, { "epoch": 2.5527674218525953, "grad_norm": 0.2721693217754364, "learning_rate": 6.619003066117641e-07, "loss": 0.2855, "step": 17803 }, { "epoch": 2.5529108115858907, "grad_norm": 0.25335708260536194, "learning_rate": 6.614855580054214e-07, "loss": 0.2967, "step": 17804 }, { "epoch": 2.5530542013191857, "grad_norm": 0.2805125415325165, "learning_rate": 6.610709301766466e-07, "loss": 0.2869, "step": 17805 }, { "epoch": 2.5531975910524807, "grad_norm": 0.26066094636917114, "learning_rate": 6.606564231369828e-07, "loss": 0.2741, "step": 17806 }, { "epoch": 2.5533409807857756, "grad_norm": 0.2593991458415985, "learning_rate": 6.602420368979696e-07, "loss": 0.2655, "step": 17807 }, { "epoch": 2.5534843705190706, "grad_norm": 0.26276224851608276, "learning_rate": 6.598277714711415e-07, "loss": 0.2618, "step": 17808 }, { "epoch": 2.553627760252366, "grad_norm": 0.30245962738990784, "learning_rate": 6.594136268680334e-07, "loss": 0.2973, "step": 17809 }, { "epoch": 2.553771149985661, "grad_norm": 0.2702839970588684, "learning_rate": 6.589996031001716e-07, "loss": 0.3019, "step": 17810 }, { "epoch": 2.553914539718956, "grad_norm": 0.27997180819511414, "learning_rate": 6.585857001790829e-07, "loss": 0.279, "step": 17811 }, { "epoch": 2.5540579294522514, "grad_norm": 0.2653872072696686, "learning_rate": 6.581719181162899e-07, "loss": 0.2567, "step": 17812 }, { "epoch": 2.5542013191855464, "grad_norm": 0.2736421525478363, "learning_rate": 6.57758256923312e-07, "loss": 0.2833, "step": 17813 }, { "epoch": 2.5543447089188414, "grad_norm": 0.26312148571014404, "learning_rate": 6.573447166116658e-07, "loss": 0.272, "step": 17814 }, { "epoch": 2.5544880986521363, "grad_norm": 0.25678950548171997, "learning_rate": 6.569312971928615e-07, "loss": 0.288, "step": 17815 }, { "epoch": 2.5546314883854317, "grad_norm": 0.2803541421890259, "learning_rate": 6.565179986784087e-07, "loss": 0.2807, "step": 17816 }, { "epoch": 2.5547748781187267, "grad_norm": 0.2979462742805481, "learning_rate": 6.561048210798132e-07, "loss": 0.2924, "step": 17817 }, { "epoch": 2.5549182678520217, "grad_norm": 0.2808898687362671, "learning_rate": 6.556917644085775e-07, "loss": 0.2761, "step": 17818 }, { "epoch": 2.555061657585317, "grad_norm": 0.2799051105976105, "learning_rate": 6.552788286762002e-07, "loss": 0.2747, "step": 17819 }, { "epoch": 2.555205047318612, "grad_norm": 0.2842099666595459, "learning_rate": 6.548660138941765e-07, "loss": 0.2815, "step": 17820 }, { "epoch": 2.555348437051907, "grad_norm": 0.29207849502563477, "learning_rate": 6.544533200739995e-07, "loss": 0.2914, "step": 17821 }, { "epoch": 2.555491826785202, "grad_norm": 0.27284732460975647, "learning_rate": 6.540407472271571e-07, "loss": 0.2692, "step": 17822 }, { "epoch": 2.555635216518497, "grad_norm": 0.2674098312854767, "learning_rate": 6.536282953651352e-07, "loss": 0.2902, "step": 17823 }, { "epoch": 2.5557786062517924, "grad_norm": 0.2696925401687622, "learning_rate": 6.532159644994151e-07, "loss": 0.2812, "step": 17824 }, { "epoch": 2.5559219959850874, "grad_norm": 0.27186521887779236, "learning_rate": 6.528037546414773e-07, "loss": 0.2633, "step": 17825 }, { "epoch": 2.556065385718383, "grad_norm": 0.2634642422199249, "learning_rate": 6.523916658027951e-07, "loss": 0.2646, "step": 17826 }, { "epoch": 2.556208775451678, "grad_norm": 0.274929404258728, "learning_rate": 6.519796979948406e-07, "loss": 0.2615, "step": 17827 }, { "epoch": 2.5563521651849728, "grad_norm": 0.2593154013156891, "learning_rate": 6.515678512290824e-07, "loss": 0.2873, "step": 17828 }, { "epoch": 2.5564955549182677, "grad_norm": 0.26651617884635925, "learning_rate": 6.511561255169869e-07, "loss": 0.2851, "step": 17829 }, { "epoch": 2.5566389446515627, "grad_norm": 0.2643832266330719, "learning_rate": 6.507445208700153e-07, "loss": 0.2851, "step": 17830 }, { "epoch": 2.556782334384858, "grad_norm": 0.2731779217720032, "learning_rate": 6.503330372996252e-07, "loss": 0.2926, "step": 17831 }, { "epoch": 2.556925724118153, "grad_norm": 0.26111137866973877, "learning_rate": 6.499216748172726e-07, "loss": 0.2904, "step": 17832 }, { "epoch": 2.557069113851448, "grad_norm": 0.2587219774723053, "learning_rate": 6.495104334344088e-07, "loss": 0.2823, "step": 17833 }, { "epoch": 2.5572125035847435, "grad_norm": 0.25705134868621826, "learning_rate": 6.490993131624818e-07, "loss": 0.2764, "step": 17834 }, { "epoch": 2.5573558933180385, "grad_norm": 0.275346964597702, "learning_rate": 6.486883140129374e-07, "loss": 0.2842, "step": 17835 }, { "epoch": 2.5574992830513334, "grad_norm": 0.25208136439323425, "learning_rate": 6.482774359972166e-07, "loss": 0.2829, "step": 17836 }, { "epoch": 2.5576426727846284, "grad_norm": 0.27147018909454346, "learning_rate": 6.478666791267573e-07, "loss": 0.2901, "step": 17837 }, { "epoch": 2.557786062517924, "grad_norm": 0.2765122056007385, "learning_rate": 6.474560434129956e-07, "loss": 0.2811, "step": 17838 }, { "epoch": 2.557929452251219, "grad_norm": 0.26770275831222534, "learning_rate": 6.470455288673616e-07, "loss": 0.2823, "step": 17839 }, { "epoch": 2.558072841984514, "grad_norm": 0.2664330005645752, "learning_rate": 6.466351355012845e-07, "loss": 0.2634, "step": 17840 }, { "epoch": 2.558216231717809, "grad_norm": 0.27125295996665955, "learning_rate": 6.462248633261891e-07, "loss": 0.2986, "step": 17841 }, { "epoch": 2.558359621451104, "grad_norm": 0.2787274122238159, "learning_rate": 6.458147123534953e-07, "loss": 0.2611, "step": 17842 }, { "epoch": 2.558503011184399, "grad_norm": 0.2789868116378784, "learning_rate": 6.454046825946219e-07, "loss": 0.2705, "step": 17843 }, { "epoch": 2.558646400917694, "grad_norm": 0.270033597946167, "learning_rate": 6.449947740609836e-07, "loss": 0.2908, "step": 17844 }, { "epoch": 2.5587897906509895, "grad_norm": 0.2763731777667999, "learning_rate": 6.445849867639914e-07, "loss": 0.2745, "step": 17845 }, { "epoch": 2.5589331803842845, "grad_norm": 0.26701757311820984, "learning_rate": 6.441753207150547e-07, "loss": 0.295, "step": 17846 }, { "epoch": 2.5590765701175795, "grad_norm": 0.26386815309524536, "learning_rate": 6.43765775925575e-07, "loss": 0.2823, "step": 17847 }, { "epoch": 2.559219959850875, "grad_norm": 0.26789942383766174, "learning_rate": 6.433563524069553e-07, "loss": 0.3042, "step": 17848 }, { "epoch": 2.55936334958417, "grad_norm": 0.26535120606422424, "learning_rate": 6.429470501705926e-07, "loss": 0.287, "step": 17849 }, { "epoch": 2.559506739317465, "grad_norm": 0.26249146461486816, "learning_rate": 6.425378692278822e-07, "loss": 0.2957, "step": 17850 }, { "epoch": 2.55965012905076, "grad_norm": 0.263263076543808, "learning_rate": 6.42128809590214e-07, "loss": 0.2807, "step": 17851 }, { "epoch": 2.559793518784055, "grad_norm": 0.2725130021572113, "learning_rate": 6.41719871268977e-07, "loss": 0.2756, "step": 17852 }, { "epoch": 2.5599369085173502, "grad_norm": 0.287090927362442, "learning_rate": 6.413110542755529e-07, "loss": 0.2761, "step": 17853 }, { "epoch": 2.560080298250645, "grad_norm": 0.2714363634586334, "learning_rate": 6.409023586213248e-07, "loss": 0.2788, "step": 17854 }, { "epoch": 2.5602236879839406, "grad_norm": 0.27819785475730896, "learning_rate": 6.404937843176695e-07, "loss": 0.2915, "step": 17855 }, { "epoch": 2.5603670777172356, "grad_norm": 0.2574084997177124, "learning_rate": 6.400853313759614e-07, "loss": 0.3132, "step": 17856 }, { "epoch": 2.5605104674505306, "grad_norm": 0.2582416236400604, "learning_rate": 6.396769998075714e-07, "loss": 0.2772, "step": 17857 }, { "epoch": 2.5606538571838255, "grad_norm": 0.2700755298137665, "learning_rate": 6.392687896238658e-07, "loss": 0.2983, "step": 17858 }, { "epoch": 2.5607972469171205, "grad_norm": 0.26086798310279846, "learning_rate": 6.388607008362086e-07, "loss": 0.2836, "step": 17859 }, { "epoch": 2.560940636650416, "grad_norm": 0.26721251010894775, "learning_rate": 6.38452733455961e-07, "loss": 0.2783, "step": 17860 }, { "epoch": 2.561084026383711, "grad_norm": 0.27188217639923096, "learning_rate": 6.3804488749448e-07, "loss": 0.2655, "step": 17861 }, { "epoch": 2.561227416117006, "grad_norm": 0.25933775305747986, "learning_rate": 6.376371629631195e-07, "loss": 0.2491, "step": 17862 }, { "epoch": 2.5613708058503013, "grad_norm": 0.27142199873924255, "learning_rate": 6.372295598732309e-07, "loss": 0.2903, "step": 17863 }, { "epoch": 2.5615141955835963, "grad_norm": 0.260256826877594, "learning_rate": 6.36822078236159e-07, "loss": 0.2743, "step": 17864 }, { "epoch": 2.5616575853168913, "grad_norm": 0.27383506298065186, "learning_rate": 6.364147180632485e-07, "loss": 0.2826, "step": 17865 }, { "epoch": 2.5618009750501862, "grad_norm": 0.26707762479782104, "learning_rate": 6.360074793658394e-07, "loss": 0.2733, "step": 17866 }, { "epoch": 2.5619443647834816, "grad_norm": 0.2643657624721527, "learning_rate": 6.356003621552698e-07, "loss": 0.2689, "step": 17867 }, { "epoch": 2.5620877545167766, "grad_norm": 0.2838556468486786, "learning_rate": 6.351933664428728e-07, "loss": 0.3086, "step": 17868 }, { "epoch": 2.5622311442500716, "grad_norm": 0.27252310514450073, "learning_rate": 6.347864922399776e-07, "loss": 0.2866, "step": 17869 }, { "epoch": 2.562374533983367, "grad_norm": 0.28138428926467896, "learning_rate": 6.34379739557911e-07, "loss": 0.2663, "step": 17870 }, { "epoch": 2.562517923716662, "grad_norm": 0.28499311208724976, "learning_rate": 6.339731084079975e-07, "loss": 0.2719, "step": 17871 }, { "epoch": 2.562661313449957, "grad_norm": 0.2754743993282318, "learning_rate": 6.33566598801556e-07, "loss": 0.2869, "step": 17872 }, { "epoch": 2.562804703183252, "grad_norm": 0.27296826243400574, "learning_rate": 6.331602107499035e-07, "loss": 0.2738, "step": 17873 }, { "epoch": 2.562948092916547, "grad_norm": 0.272389680147171, "learning_rate": 6.327539442643538e-07, "loss": 0.2698, "step": 17874 }, { "epoch": 2.5630914826498423, "grad_norm": 0.28683164715766907, "learning_rate": 6.323477993562155e-07, "loss": 0.2909, "step": 17875 }, { "epoch": 2.5632348723831373, "grad_norm": 0.27726128697395325, "learning_rate": 6.319417760367963e-07, "loss": 0.3035, "step": 17876 }, { "epoch": 2.5633782621164327, "grad_norm": 0.26824846863746643, "learning_rate": 6.315358743173983e-07, "loss": 0.2851, "step": 17877 }, { "epoch": 2.5635216518497277, "grad_norm": 0.273775190114975, "learning_rate": 6.311300942093218e-07, "loss": 0.2725, "step": 17878 }, { "epoch": 2.5636650415830227, "grad_norm": 0.27998408675193787, "learning_rate": 6.30724435723864e-07, "loss": 0.29, "step": 17879 }, { "epoch": 2.5638084313163176, "grad_norm": 0.2633374333381653, "learning_rate": 6.303188988723158e-07, "loss": 0.2799, "step": 17880 }, { "epoch": 2.5639518210496126, "grad_norm": 0.27023643255233765, "learning_rate": 6.299134836659676e-07, "loss": 0.3153, "step": 17881 }, { "epoch": 2.564095210782908, "grad_norm": 0.27695226669311523, "learning_rate": 6.295081901161054e-07, "loss": 0.3008, "step": 17882 }, { "epoch": 2.564238600516203, "grad_norm": 0.2779178321361542, "learning_rate": 6.291030182340119e-07, "loss": 0.2765, "step": 17883 }, { "epoch": 2.564381990249498, "grad_norm": 0.26486486196517944, "learning_rate": 6.286979680309679e-07, "loss": 0.2794, "step": 17884 }, { "epoch": 2.5645253799827934, "grad_norm": 0.26574400067329407, "learning_rate": 6.282930395182474e-07, "loss": 0.2702, "step": 17885 }, { "epoch": 2.5646687697160884, "grad_norm": 0.2734512388706207, "learning_rate": 6.278882327071234e-07, "loss": 0.2881, "step": 17886 }, { "epoch": 2.5648121594493833, "grad_norm": 0.2572616636753082, "learning_rate": 6.274835476088653e-07, "loss": 0.2872, "step": 17887 }, { "epoch": 2.5649555491826783, "grad_norm": 0.26283740997314453, "learning_rate": 6.270789842347397e-07, "loss": 0.2882, "step": 17888 }, { "epoch": 2.5650989389159737, "grad_norm": 0.27688583731651306, "learning_rate": 6.266745425960075e-07, "loss": 0.2813, "step": 17889 }, { "epoch": 2.5652423286492687, "grad_norm": 0.2844636142253876, "learning_rate": 6.262702227039291e-07, "loss": 0.2723, "step": 17890 }, { "epoch": 2.5653857183825637, "grad_norm": 0.2572542130947113, "learning_rate": 6.258660245697596e-07, "loss": 0.2877, "step": 17891 }, { "epoch": 2.565529108115859, "grad_norm": 0.27424943447113037, "learning_rate": 6.254619482047514e-07, "loss": 0.2954, "step": 17892 }, { "epoch": 2.565672497849154, "grad_norm": 0.2806653380393982, "learning_rate": 6.250579936201529e-07, "loss": 0.2675, "step": 17893 }, { "epoch": 2.565815887582449, "grad_norm": 0.2683069109916687, "learning_rate": 6.246541608272106e-07, "loss": 0.2676, "step": 17894 }, { "epoch": 2.565959277315744, "grad_norm": 0.26261016726493835, "learning_rate": 6.242504498371665e-07, "loss": 0.2662, "step": 17895 }, { "epoch": 2.5661026670490394, "grad_norm": 0.2810928225517273, "learning_rate": 6.238468606612575e-07, "loss": 0.289, "step": 17896 }, { "epoch": 2.5662460567823344, "grad_norm": 0.2706128656864166, "learning_rate": 6.234433933107204e-07, "loss": 0.274, "step": 17897 }, { "epoch": 2.5663894465156294, "grad_norm": 0.28372129797935486, "learning_rate": 6.230400477967869e-07, "loss": 0.2741, "step": 17898 }, { "epoch": 2.566532836248925, "grad_norm": 0.265971839427948, "learning_rate": 6.226368241306852e-07, "loss": 0.2661, "step": 17899 }, { "epoch": 2.56667622598222, "grad_norm": 0.278933048248291, "learning_rate": 6.222337223236418e-07, "loss": 0.2747, "step": 17900 }, { "epoch": 2.5668196157155148, "grad_norm": 0.2625856399536133, "learning_rate": 6.218307423868763e-07, "loss": 0.2904, "step": 17901 }, { "epoch": 2.5669630054488097, "grad_norm": 0.27334994077682495, "learning_rate": 6.214278843316079e-07, "loss": 0.2796, "step": 17902 }, { "epoch": 2.5671063951821047, "grad_norm": 0.28546378016471863, "learning_rate": 6.210251481690516e-07, "loss": 0.2759, "step": 17903 }, { "epoch": 2.5672497849154, "grad_norm": 0.26689448952674866, "learning_rate": 6.206225339104194e-07, "loss": 0.2957, "step": 17904 }, { "epoch": 2.567393174648695, "grad_norm": 0.2714923918247223, "learning_rate": 6.20220041566919e-07, "loss": 0.2739, "step": 17905 }, { "epoch": 2.5675365643819905, "grad_norm": 0.26300540566444397, "learning_rate": 6.198176711497561e-07, "loss": 0.2738, "step": 17906 }, { "epoch": 2.5676799541152855, "grad_norm": 0.26579198241233826, "learning_rate": 6.194154226701304e-07, "loss": 0.2967, "step": 17907 }, { "epoch": 2.5678233438485805, "grad_norm": 0.2752390205860138, "learning_rate": 6.190132961392409e-07, "loss": 0.2844, "step": 17908 }, { "epoch": 2.5679667335818754, "grad_norm": 0.27509769797325134, "learning_rate": 6.186112915682807e-07, "loss": 0.2838, "step": 17909 }, { "epoch": 2.5681101233151704, "grad_norm": 0.2583586275577545, "learning_rate": 6.182094089684432e-07, "loss": 0.2988, "step": 17910 }, { "epoch": 2.568253513048466, "grad_norm": 0.28462734818458557, "learning_rate": 6.178076483509166e-07, "loss": 0.2802, "step": 17911 }, { "epoch": 2.568396902781761, "grad_norm": 0.25758665800094604, "learning_rate": 6.17406009726883e-07, "loss": 0.2879, "step": 17912 }, { "epoch": 2.568540292515056, "grad_norm": 0.2702629268169403, "learning_rate": 6.170044931075242e-07, "loss": 0.2774, "step": 17913 }, { "epoch": 2.568683682248351, "grad_norm": 0.265625536441803, "learning_rate": 6.166030985040183e-07, "loss": 0.2881, "step": 17914 }, { "epoch": 2.568827071981646, "grad_norm": 0.254523366689682, "learning_rate": 6.162018259275393e-07, "loss": 0.2704, "step": 17915 }, { "epoch": 2.568970461714941, "grad_norm": 0.2640296518802643, "learning_rate": 6.158006753892576e-07, "loss": 0.2662, "step": 17916 }, { "epoch": 2.569113851448236, "grad_norm": 0.2836167812347412, "learning_rate": 6.153996469003421e-07, "loss": 0.2862, "step": 17917 }, { "epoch": 2.5692572411815315, "grad_norm": 0.2637462913990021, "learning_rate": 6.149987404719543e-07, "loss": 0.2636, "step": 17918 }, { "epoch": 2.5694006309148265, "grad_norm": 0.2871636152267456, "learning_rate": 6.145979561152565e-07, "loss": 0.2726, "step": 17919 }, { "epoch": 2.5695440206481215, "grad_norm": 0.26230987906455994, "learning_rate": 6.141972938414054e-07, "loss": 0.2781, "step": 17920 }, { "epoch": 2.569687410381417, "grad_norm": 0.2701966464519501, "learning_rate": 6.137967536615552e-07, "loss": 0.261, "step": 17921 }, { "epoch": 2.569830800114712, "grad_norm": 0.2668413519859314, "learning_rate": 6.133963355868572e-07, "loss": 0.2747, "step": 17922 }, { "epoch": 2.569974189848007, "grad_norm": 0.26916444301605225, "learning_rate": 6.129960396284562e-07, "loss": 0.2707, "step": 17923 }, { "epoch": 2.570117579581302, "grad_norm": 0.2899882197380066, "learning_rate": 6.125958657974968e-07, "loss": 0.2733, "step": 17924 }, { "epoch": 2.570260969314597, "grad_norm": 0.27430593967437744, "learning_rate": 6.121958141051193e-07, "loss": 0.2755, "step": 17925 }, { "epoch": 2.570404359047892, "grad_norm": 0.2854495048522949, "learning_rate": 6.117958845624611e-07, "loss": 0.2924, "step": 17926 }, { "epoch": 2.570547748781187, "grad_norm": 0.25582411885261536, "learning_rate": 6.113960771806548e-07, "loss": 0.2793, "step": 17927 }, { "epoch": 2.5706911385144826, "grad_norm": 0.2627231478691101, "learning_rate": 6.109963919708311e-07, "loss": 0.2777, "step": 17928 }, { "epoch": 2.5708345282477776, "grad_norm": 0.27116668224334717, "learning_rate": 6.105968289441161e-07, "loss": 0.2805, "step": 17929 }, { "epoch": 2.5709779179810726, "grad_norm": 0.2711769640445709, "learning_rate": 6.101973881116335e-07, "loss": 0.2805, "step": 17930 }, { "epoch": 2.5711213077143675, "grad_norm": 0.2942255139350891, "learning_rate": 6.097980694845024e-07, "loss": 0.309, "step": 17931 }, { "epoch": 2.5712646974476625, "grad_norm": 0.27911657094955444, "learning_rate": 6.093988730738399e-07, "loss": 0.295, "step": 17932 }, { "epoch": 2.571408087180958, "grad_norm": 0.28928521275520325, "learning_rate": 6.089997988907598e-07, "loss": 0.2884, "step": 17933 }, { "epoch": 2.571551476914253, "grad_norm": 0.2737824320793152, "learning_rate": 6.086008469463695e-07, "loss": 0.2646, "step": 17934 }, { "epoch": 2.571694866647548, "grad_norm": 0.27086517214775085, "learning_rate": 6.082020172517766e-07, "loss": 0.2868, "step": 17935 }, { "epoch": 2.5718382563808433, "grad_norm": 0.27671605348587036, "learning_rate": 6.078033098180835e-07, "loss": 0.2713, "step": 17936 }, { "epoch": 2.5719816461141383, "grad_norm": 0.2744416296482086, "learning_rate": 6.074047246563896e-07, "loss": 0.2791, "step": 17937 }, { "epoch": 2.5721250358474332, "grad_norm": 0.2781071662902832, "learning_rate": 6.070062617777922e-07, "loss": 0.2765, "step": 17938 }, { "epoch": 2.572268425580728, "grad_norm": 0.27803897857666016, "learning_rate": 6.066079211933823e-07, "loss": 0.2754, "step": 17939 }, { "epoch": 2.5724118153140236, "grad_norm": 0.27752870321273804, "learning_rate": 6.062097029142489e-07, "loss": 0.2757, "step": 17940 }, { "epoch": 2.5725552050473186, "grad_norm": 0.26148995757102966, "learning_rate": 6.058116069514785e-07, "loss": 0.2854, "step": 17941 }, { "epoch": 2.5726985947806136, "grad_norm": 0.25916579365730286, "learning_rate": 6.054136333161542e-07, "loss": 0.2778, "step": 17942 }, { "epoch": 2.572841984513909, "grad_norm": 0.2692815065383911, "learning_rate": 6.050157820193536e-07, "loss": 0.2649, "step": 17943 }, { "epoch": 2.572985374247204, "grad_norm": 0.26493924856185913, "learning_rate": 6.04618053072153e-07, "loss": 0.2767, "step": 17944 }, { "epoch": 2.573128763980499, "grad_norm": 0.2634076476097107, "learning_rate": 6.042204464856243e-07, "loss": 0.2766, "step": 17945 }, { "epoch": 2.573272153713794, "grad_norm": 0.30044808983802795, "learning_rate": 6.038229622708364e-07, "loss": 0.304, "step": 17946 }, { "epoch": 2.5734155434470893, "grad_norm": 0.2532671093940735, "learning_rate": 6.034256004388545e-07, "loss": 0.2846, "step": 17947 }, { "epoch": 2.5735589331803843, "grad_norm": 0.276376336812973, "learning_rate": 6.030283610007409e-07, "loss": 0.2819, "step": 17948 }, { "epoch": 2.5737023229136793, "grad_norm": 0.26294001936912537, "learning_rate": 6.026312439675553e-07, "loss": 0.2909, "step": 17949 }, { "epoch": 2.5738457126469747, "grad_norm": 0.26623010635375977, "learning_rate": 6.022342493503497e-07, "loss": 0.2769, "step": 17950 }, { "epoch": 2.5739891023802697, "grad_norm": 0.2448861002922058, "learning_rate": 6.018373771601782e-07, "loss": 0.2843, "step": 17951 }, { "epoch": 2.5741324921135647, "grad_norm": 0.29088544845581055, "learning_rate": 6.01440627408088e-07, "loss": 0.2916, "step": 17952 }, { "epoch": 2.5742758818468596, "grad_norm": 0.26894038915634155, "learning_rate": 6.010440001051248e-07, "loss": 0.2926, "step": 17953 }, { "epoch": 2.5744192715801546, "grad_norm": 0.25642284750938416, "learning_rate": 6.006474952623309e-07, "loss": 0.2747, "step": 17954 }, { "epoch": 2.57456266131345, "grad_norm": 0.2762921452522278, "learning_rate": 6.002511128907418e-07, "loss": 0.2815, "step": 17955 }, { "epoch": 2.574706051046745, "grad_norm": 0.2851441502571106, "learning_rate": 5.998548530013937e-07, "loss": 0.2914, "step": 17956 }, { "epoch": 2.5748494407800404, "grad_norm": 0.2768298089504242, "learning_rate": 5.994587156053183e-07, "loss": 0.2819, "step": 17957 }, { "epoch": 2.5749928305133354, "grad_norm": 0.2823502719402313, "learning_rate": 5.990627007135425e-07, "loss": 0.2946, "step": 17958 }, { "epoch": 2.5751362202466304, "grad_norm": 0.2784554958343506, "learning_rate": 5.986668083370911e-07, "loss": 0.3199, "step": 17959 }, { "epoch": 2.5752796099799253, "grad_norm": 0.2748110890388489, "learning_rate": 5.982710384869867e-07, "loss": 0.2742, "step": 17960 }, { "epoch": 2.5754229997132203, "grad_norm": 0.2725352942943573, "learning_rate": 5.978753911742441e-07, "loss": 0.2884, "step": 17961 }, { "epoch": 2.5755663894465157, "grad_norm": 0.27107611298561096, "learning_rate": 5.97479866409879e-07, "loss": 0.2927, "step": 17962 }, { "epoch": 2.5757097791798107, "grad_norm": 0.27542147040367126, "learning_rate": 5.970844642049023e-07, "loss": 0.2871, "step": 17963 }, { "epoch": 2.5758531689131057, "grad_norm": 0.27689120173454285, "learning_rate": 5.96689184570321e-07, "loss": 0.2807, "step": 17964 }, { "epoch": 2.575996558646401, "grad_norm": 0.26436272263526917, "learning_rate": 5.962940275171392e-07, "loss": 0.2913, "step": 17965 }, { "epoch": 2.576139948379696, "grad_norm": 0.275483101606369, "learning_rate": 5.958989930563575e-07, "loss": 0.2716, "step": 17966 }, { "epoch": 2.576283338112991, "grad_norm": 0.283413827419281, "learning_rate": 5.955040811989726e-07, "loss": 0.3112, "step": 17967 }, { "epoch": 2.576426727846286, "grad_norm": 0.2747838497161865, "learning_rate": 5.951092919559792e-07, "loss": 0.2675, "step": 17968 }, { "epoch": 2.5765701175795814, "grad_norm": 0.2914254665374756, "learning_rate": 5.947146253383673e-07, "loss": 0.2658, "step": 17969 }, { "epoch": 2.5767135073128764, "grad_norm": 0.27112963795661926, "learning_rate": 5.943200813571231e-07, "loss": 0.2735, "step": 17970 }, { "epoch": 2.5768568970461714, "grad_norm": 0.280496746301651, "learning_rate": 5.939256600232318e-07, "loss": 0.2654, "step": 17971 }, { "epoch": 2.577000286779467, "grad_norm": 0.2830597758293152, "learning_rate": 5.935313613476717e-07, "loss": 0.2869, "step": 17972 }, { "epoch": 2.5771436765127618, "grad_norm": 0.2614940404891968, "learning_rate": 5.931371853414197e-07, "loss": 0.2895, "step": 17973 }, { "epoch": 2.5772870662460567, "grad_norm": 0.27347615361213684, "learning_rate": 5.927431320154497e-07, "loss": 0.2747, "step": 17974 }, { "epoch": 2.5774304559793517, "grad_norm": 0.27852314710617065, "learning_rate": 5.923492013807308e-07, "loss": 0.2961, "step": 17975 }, { "epoch": 2.577573845712647, "grad_norm": 0.2754024267196655, "learning_rate": 5.919553934482314e-07, "loss": 0.2788, "step": 17976 }, { "epoch": 2.577717235445942, "grad_norm": 0.2828894257545471, "learning_rate": 5.915617082289121e-07, "loss": 0.2762, "step": 17977 }, { "epoch": 2.577860625179237, "grad_norm": 0.28332534432411194, "learning_rate": 5.911681457337332e-07, "loss": 0.2775, "step": 17978 }, { "epoch": 2.5780040149125325, "grad_norm": 0.29242631793022156, "learning_rate": 5.907747059736507e-07, "loss": 0.2806, "step": 17979 }, { "epoch": 2.5781474046458275, "grad_norm": 0.2655653655529022, "learning_rate": 5.903813889596177e-07, "loss": 0.289, "step": 17980 }, { "epoch": 2.5782907943791225, "grad_norm": 0.27251437306404114, "learning_rate": 5.899881947025843e-07, "loss": 0.2645, "step": 17981 }, { "epoch": 2.5784341841124174, "grad_norm": 0.27850526571273804, "learning_rate": 5.89595123213495e-07, "loss": 0.2758, "step": 17982 }, { "epoch": 2.5785775738457124, "grad_norm": 0.26996567845344543, "learning_rate": 5.892021745032933e-07, "loss": 0.2722, "step": 17983 }, { "epoch": 2.578720963579008, "grad_norm": 0.25376203656196594, "learning_rate": 5.888093485829177e-07, "loss": 0.2805, "step": 17984 }, { "epoch": 2.578864353312303, "grad_norm": 0.2763822674751282, "learning_rate": 5.884166454633044e-07, "loss": 0.2834, "step": 17985 }, { "epoch": 2.579007743045598, "grad_norm": 0.26811033487319946, "learning_rate": 5.880240651553853e-07, "loss": 0.2903, "step": 17986 }, { "epoch": 2.579151132778893, "grad_norm": 0.2742629647254944, "learning_rate": 5.876316076700905e-07, "loss": 0.2605, "step": 17987 }, { "epoch": 2.579294522512188, "grad_norm": 0.27144482731819153, "learning_rate": 5.872392730183435e-07, "loss": 0.275, "step": 17988 }, { "epoch": 2.579437912245483, "grad_norm": 0.2770262360572815, "learning_rate": 5.868470612110666e-07, "loss": 0.3047, "step": 17989 }, { "epoch": 2.579581301978778, "grad_norm": 0.25871703028678894, "learning_rate": 5.864549722591789e-07, "loss": 0.2959, "step": 17990 }, { "epoch": 2.5797246917120735, "grad_norm": 0.2651616930961609, "learning_rate": 5.860630061735955e-07, "loss": 0.2893, "step": 17991 }, { "epoch": 2.5798680814453685, "grad_norm": 0.2944222688674927, "learning_rate": 5.856711629652289e-07, "loss": 0.2925, "step": 17992 }, { "epoch": 2.5800114711786635, "grad_norm": 0.27255409955978394, "learning_rate": 5.852794426449859e-07, "loss": 0.2891, "step": 17993 }, { "epoch": 2.580154860911959, "grad_norm": 0.2571471333503723, "learning_rate": 5.848878452237722e-07, "loss": 0.2843, "step": 17994 }, { "epoch": 2.580298250645254, "grad_norm": 0.27781423926353455, "learning_rate": 5.84496370712489e-07, "loss": 0.2914, "step": 17995 }, { "epoch": 2.580441640378549, "grad_norm": 0.2517641484737396, "learning_rate": 5.841050191220343e-07, "loss": 0.2687, "step": 17996 }, { "epoch": 2.580585030111844, "grad_norm": 0.2643176317214966, "learning_rate": 5.837137904633034e-07, "loss": 0.2807, "step": 17997 }, { "epoch": 2.5807284198451392, "grad_norm": 0.277920663356781, "learning_rate": 5.833226847471868e-07, "loss": 0.2725, "step": 17998 }, { "epoch": 2.580871809578434, "grad_norm": 0.2727457284927368, "learning_rate": 5.829317019845726e-07, "loss": 0.284, "step": 17999 }, { "epoch": 2.581015199311729, "grad_norm": 0.2594926059246063, "learning_rate": 5.825408421863449e-07, "loss": 0.2815, "step": 18000 }, { "epoch": 2.5811585890450246, "grad_norm": 0.2720824182033539, "learning_rate": 5.821501053633849e-07, "loss": 0.2882, "step": 18001 }, { "epoch": 2.5813019787783196, "grad_norm": 0.2644180655479431, "learning_rate": 5.817594915265701e-07, "loss": 0.2755, "step": 18002 }, { "epoch": 2.5814453685116145, "grad_norm": 0.26937615871429443, "learning_rate": 5.813690006867756e-07, "loss": 0.2734, "step": 18003 }, { "epoch": 2.5815887582449095, "grad_norm": 0.27289146184921265, "learning_rate": 5.8097863285487e-07, "loss": 0.2807, "step": 18004 }, { "epoch": 2.5817321479782045, "grad_norm": 0.2837333083152771, "learning_rate": 5.805883880417213e-07, "loss": 0.2714, "step": 18005 }, { "epoch": 2.5818755377115, "grad_norm": 0.27929797768592834, "learning_rate": 5.80198266258194e-07, "loss": 0.2868, "step": 18006 }, { "epoch": 2.582018927444795, "grad_norm": 0.27379441261291504, "learning_rate": 5.798082675151478e-07, "loss": 0.2877, "step": 18007 }, { "epoch": 2.5821623171780903, "grad_norm": 0.26589930057525635, "learning_rate": 5.794183918234409e-07, "loss": 0.2681, "step": 18008 }, { "epoch": 2.5823057069113853, "grad_norm": 0.2767714560031891, "learning_rate": 5.790286391939248e-07, "loss": 0.2965, "step": 18009 }, { "epoch": 2.5824490966446803, "grad_norm": 0.2985346019268036, "learning_rate": 5.786390096374505e-07, "loss": 0.2762, "step": 18010 }, { "epoch": 2.5825924863779752, "grad_norm": 0.2734147310256958, "learning_rate": 5.782495031648649e-07, "loss": 0.2861, "step": 18011 }, { "epoch": 2.58273587611127, "grad_norm": 0.2679561376571655, "learning_rate": 5.778601197870115e-07, "loss": 0.2784, "step": 18012 }, { "epoch": 2.5828792658445656, "grad_norm": 0.2760981619358063, "learning_rate": 5.774708595147294e-07, "loss": 0.2878, "step": 18013 }, { "epoch": 2.5830226555778606, "grad_norm": 0.27449509501457214, "learning_rate": 5.770817223588571e-07, "loss": 0.2778, "step": 18014 }, { "epoch": 2.5831660453111556, "grad_norm": 0.2642173767089844, "learning_rate": 5.766927083302243e-07, "loss": 0.2844, "step": 18015 }, { "epoch": 2.583309435044451, "grad_norm": 0.3013385534286499, "learning_rate": 5.763038174396624e-07, "loss": 0.2716, "step": 18016 }, { "epoch": 2.583452824777746, "grad_norm": 0.3063717186450958, "learning_rate": 5.759150496979971e-07, "loss": 0.2813, "step": 18017 }, { "epoch": 2.583596214511041, "grad_norm": 0.2721595764160156, "learning_rate": 5.755264051160514e-07, "loss": 0.2749, "step": 18018 }, { "epoch": 2.583739604244336, "grad_norm": 0.27933651208877563, "learning_rate": 5.751378837046445e-07, "loss": 0.2999, "step": 18019 }, { "epoch": 2.5838829939776313, "grad_norm": 0.26739752292633057, "learning_rate": 5.747494854745922e-07, "loss": 0.2862, "step": 18020 }, { "epoch": 2.5840263837109263, "grad_norm": 0.26752379536628723, "learning_rate": 5.743612104367075e-07, "loss": 0.2641, "step": 18021 }, { "epoch": 2.5841697734442213, "grad_norm": 0.250833123922348, "learning_rate": 5.739730586017978e-07, "loss": 0.2612, "step": 18022 }, { "epoch": 2.5843131631775167, "grad_norm": 0.2961835563182831, "learning_rate": 5.735850299806705e-07, "loss": 0.2716, "step": 18023 }, { "epoch": 2.5844565529108117, "grad_norm": 0.2831290066242218, "learning_rate": 5.731971245841261e-07, "loss": 0.2866, "step": 18024 }, { "epoch": 2.5845999426441066, "grad_norm": 0.25371912121772766, "learning_rate": 5.728093424229658e-07, "loss": 0.2789, "step": 18025 }, { "epoch": 2.5847433323774016, "grad_norm": 0.24268250167369843, "learning_rate": 5.724216835079815e-07, "loss": 0.304, "step": 18026 }, { "epoch": 2.584886722110697, "grad_norm": 0.26573050022125244, "learning_rate": 5.720341478499669e-07, "loss": 0.2718, "step": 18027 }, { "epoch": 2.585030111843992, "grad_norm": 0.2890997529029846, "learning_rate": 5.7164673545971e-07, "loss": 0.2661, "step": 18028 }, { "epoch": 2.585173501577287, "grad_norm": 0.27636364102363586, "learning_rate": 5.712594463479959e-07, "loss": 0.285, "step": 18029 }, { "epoch": 2.5853168913105824, "grad_norm": 0.2719647288322449, "learning_rate": 5.708722805256073e-07, "loss": 0.2706, "step": 18030 }, { "epoch": 2.5854602810438774, "grad_norm": 0.2788918912410736, "learning_rate": 5.704852380033205e-07, "loss": 0.2803, "step": 18031 }, { "epoch": 2.5856036707771723, "grad_norm": 0.28371497988700867, "learning_rate": 5.700983187919102e-07, "loss": 0.2879, "step": 18032 }, { "epoch": 2.5857470605104673, "grad_norm": 0.27285048365592957, "learning_rate": 5.697115229021482e-07, "loss": 0.2769, "step": 18033 }, { "epoch": 2.5858904502437623, "grad_norm": 0.2724670171737671, "learning_rate": 5.693248503448029e-07, "loss": 0.2912, "step": 18034 }, { "epoch": 2.5860338399770577, "grad_norm": 0.2583789825439453, "learning_rate": 5.689383011306377e-07, "loss": 0.2886, "step": 18035 }, { "epoch": 2.5861772297103527, "grad_norm": 0.26662522554397583, "learning_rate": 5.685518752704139e-07, "loss": 0.2686, "step": 18036 }, { "epoch": 2.586320619443648, "grad_norm": 0.25912922620773315, "learning_rate": 5.681655727748891e-07, "loss": 0.2987, "step": 18037 }, { "epoch": 2.586464009176943, "grad_norm": 0.27670523524284363, "learning_rate": 5.677793936548175e-07, "loss": 0.2898, "step": 18038 }, { "epoch": 2.586607398910238, "grad_norm": 0.27418023347854614, "learning_rate": 5.673933379209495e-07, "loss": 0.2817, "step": 18039 }, { "epoch": 2.586750788643533, "grad_norm": 0.25785163044929504, "learning_rate": 5.670074055840324e-07, "loss": 0.281, "step": 18040 }, { "epoch": 2.586894178376828, "grad_norm": 0.28761252760887146, "learning_rate": 5.666215966548106e-07, "loss": 0.2697, "step": 18041 }, { "epoch": 2.5870375681101234, "grad_norm": 0.28175732493400574, "learning_rate": 5.66235911144023e-07, "loss": 0.283, "step": 18042 }, { "epoch": 2.5871809578434184, "grad_norm": 0.29312366247177124, "learning_rate": 5.658503490624073e-07, "loss": 0.277, "step": 18043 }, { "epoch": 2.5873243475767134, "grad_norm": 0.269601047039032, "learning_rate": 5.654649104206966e-07, "loss": 0.284, "step": 18044 }, { "epoch": 2.587467737310009, "grad_norm": 0.24647246301174164, "learning_rate": 5.650795952296212e-07, "loss": 0.2901, "step": 18045 }, { "epoch": 2.5876111270433038, "grad_norm": 0.2601780295372009, "learning_rate": 5.646944034999091e-07, "loss": 0.2677, "step": 18046 }, { "epoch": 2.5877545167765987, "grad_norm": 0.25307929515838623, "learning_rate": 5.643093352422807e-07, "loss": 0.29, "step": 18047 }, { "epoch": 2.5878979065098937, "grad_norm": 0.2664043605327606, "learning_rate": 5.639243904674568e-07, "loss": 0.2735, "step": 18048 }, { "epoch": 2.588041296243189, "grad_norm": 0.2757214903831482, "learning_rate": 5.635395691861545e-07, "loss": 0.2868, "step": 18049 }, { "epoch": 2.588184685976484, "grad_norm": 0.2616255581378937, "learning_rate": 5.631548714090856e-07, "loss": 0.2863, "step": 18050 }, { "epoch": 2.588328075709779, "grad_norm": 0.2712137699127197, "learning_rate": 5.627702971469601e-07, "loss": 0.3015, "step": 18051 }, { "epoch": 2.5884714654430745, "grad_norm": 0.268257200717926, "learning_rate": 5.623858464104848e-07, "loss": 0.2644, "step": 18052 }, { "epoch": 2.5886148551763695, "grad_norm": 0.26319462060928345, "learning_rate": 5.620015192103601e-07, "loss": 0.2684, "step": 18053 }, { "epoch": 2.5887582449096644, "grad_norm": 0.26288965344429016, "learning_rate": 5.616173155572862e-07, "loss": 0.2804, "step": 18054 }, { "epoch": 2.5889016346429594, "grad_norm": 0.27387526631355286, "learning_rate": 5.612332354619577e-07, "loss": 0.267, "step": 18055 }, { "epoch": 2.5890450243762544, "grad_norm": 0.27896496653556824, "learning_rate": 5.608492789350689e-07, "loss": 0.2825, "step": 18056 }, { "epoch": 2.58918841410955, "grad_norm": 0.27299198508262634, "learning_rate": 5.604654459873083e-07, "loss": 0.3053, "step": 18057 }, { "epoch": 2.589331803842845, "grad_norm": 0.28177982568740845, "learning_rate": 5.600817366293598e-07, "loss": 0.2846, "step": 18058 }, { "epoch": 2.58947519357614, "grad_norm": 0.27533820271492004, "learning_rate": 5.596981508719057e-07, "loss": 0.2863, "step": 18059 }, { "epoch": 2.589618583309435, "grad_norm": 0.2696508467197418, "learning_rate": 5.593146887256245e-07, "loss": 0.2889, "step": 18060 }, { "epoch": 2.58976197304273, "grad_norm": 0.27534955739974976, "learning_rate": 5.589313502011911e-07, "loss": 0.294, "step": 18061 }, { "epoch": 2.589905362776025, "grad_norm": 0.2712952196598053, "learning_rate": 5.585481353092775e-07, "loss": 0.2831, "step": 18062 }, { "epoch": 2.59004875250932, "grad_norm": 0.2509784698486328, "learning_rate": 5.58165044060553e-07, "loss": 0.2953, "step": 18063 }, { "epoch": 2.5901921422426155, "grad_norm": 0.2774987816810608, "learning_rate": 5.577820764656794e-07, "loss": 0.2589, "step": 18064 }, { "epoch": 2.5903355319759105, "grad_norm": 0.2685502767562866, "learning_rate": 5.573992325353194e-07, "loss": 0.2916, "step": 18065 }, { "epoch": 2.5904789217092055, "grad_norm": 0.25223544239997864, "learning_rate": 5.570165122801307e-07, "loss": 0.282, "step": 18066 }, { "epoch": 2.590622311442501, "grad_norm": 0.250813752412796, "learning_rate": 5.566339157107681e-07, "loss": 0.2745, "step": 18067 }, { "epoch": 2.590765701175796, "grad_norm": 0.2669017016887665, "learning_rate": 5.562514428378824e-07, "loss": 0.289, "step": 18068 }, { "epoch": 2.590909090909091, "grad_norm": 0.28064998984336853, "learning_rate": 5.558690936721206e-07, "loss": 0.2894, "step": 18069 }, { "epoch": 2.591052480642386, "grad_norm": 0.27077075839042664, "learning_rate": 5.55486868224126e-07, "loss": 0.2749, "step": 18070 }, { "epoch": 2.591195870375681, "grad_norm": 0.25859832763671875, "learning_rate": 5.551047665045406e-07, "loss": 0.2765, "step": 18071 }, { "epoch": 2.591339260108976, "grad_norm": 0.2640326917171478, "learning_rate": 5.547227885240009e-07, "loss": 0.2744, "step": 18072 }, { "epoch": 2.591482649842271, "grad_norm": 0.25721973180770874, "learning_rate": 5.543409342931405e-07, "loss": 0.2831, "step": 18073 }, { "epoch": 2.5916260395755666, "grad_norm": 0.2750016152858734, "learning_rate": 5.539592038225899e-07, "loss": 0.2894, "step": 18074 }, { "epoch": 2.5917694293088616, "grad_norm": 0.26926419138908386, "learning_rate": 5.535775971229762e-07, "loss": 0.2767, "step": 18075 }, { "epoch": 2.5919128190421565, "grad_norm": 0.27853235602378845, "learning_rate": 5.531961142049219e-07, "loss": 0.2998, "step": 18076 }, { "epoch": 2.5920562087754515, "grad_norm": 0.25044968724250793, "learning_rate": 5.528147550790474e-07, "loss": 0.2971, "step": 18077 }, { "epoch": 2.592199598508747, "grad_norm": 0.27838918566703796, "learning_rate": 5.524335197559688e-07, "loss": 0.281, "step": 18078 }, { "epoch": 2.592342988242042, "grad_norm": 0.27358323335647583, "learning_rate": 5.520524082463008e-07, "loss": 0.2786, "step": 18079 }, { "epoch": 2.592486377975337, "grad_norm": 0.29980891942977905, "learning_rate": 5.5167142056065e-07, "loss": 0.3089, "step": 18080 }, { "epoch": 2.5926297677086323, "grad_norm": 0.2862519323825836, "learning_rate": 5.512905567096244e-07, "loss": 0.2967, "step": 18081 }, { "epoch": 2.5927731574419273, "grad_norm": 0.27005088329315186, "learning_rate": 5.509098167038263e-07, "loss": 0.2729, "step": 18082 }, { "epoch": 2.5929165471752222, "grad_norm": 0.2776012122631073, "learning_rate": 5.505292005538548e-07, "loss": 0.2738, "step": 18083 }, { "epoch": 2.593059936908517, "grad_norm": 0.268659383058548, "learning_rate": 5.501487082703066e-07, "loss": 0.2873, "step": 18084 }, { "epoch": 2.593203326641812, "grad_norm": 0.27835899591445923, "learning_rate": 5.497683398637727e-07, "loss": 0.2751, "step": 18085 }, { "epoch": 2.5933467163751076, "grad_norm": 0.2665044963359833, "learning_rate": 5.493880953448421e-07, "loss": 0.2799, "step": 18086 }, { "epoch": 2.5934901061084026, "grad_norm": 0.2896680235862732, "learning_rate": 5.490079747241006e-07, "loss": 0.2937, "step": 18087 }, { "epoch": 2.593633495841698, "grad_norm": 0.27814921736717224, "learning_rate": 5.486279780121307e-07, "loss": 0.2856, "step": 18088 }, { "epoch": 2.593776885574993, "grad_norm": 0.2617473304271698, "learning_rate": 5.482481052195099e-07, "loss": 0.28, "step": 18089 }, { "epoch": 2.593920275308288, "grad_norm": 0.2654195725917816, "learning_rate": 5.478683563568138e-07, "loss": 0.2909, "step": 18090 }, { "epoch": 2.594063665041583, "grad_norm": 0.27013471722602844, "learning_rate": 5.474887314346139e-07, "loss": 0.2972, "step": 18091 }, { "epoch": 2.594207054774878, "grad_norm": 0.2755710482597351, "learning_rate": 5.47109230463479e-07, "loss": 0.2844, "step": 18092 }, { "epoch": 2.5943504445081733, "grad_norm": 0.2736252546310425, "learning_rate": 5.467298534539728e-07, "loss": 0.291, "step": 18093 }, { "epoch": 2.5944938342414683, "grad_norm": 0.25036755204200745, "learning_rate": 5.463506004166574e-07, "loss": 0.2726, "step": 18094 }, { "epoch": 2.5946372239747633, "grad_norm": 0.2820056080818176, "learning_rate": 5.459714713620906e-07, "loss": 0.2711, "step": 18095 }, { "epoch": 2.5947806137080587, "grad_norm": 0.2680610418319702, "learning_rate": 5.455924663008261e-07, "loss": 0.2668, "step": 18096 }, { "epoch": 2.5949240034413537, "grad_norm": 0.2840786278247833, "learning_rate": 5.452135852434154e-07, "loss": 0.2688, "step": 18097 }, { "epoch": 2.5950673931746486, "grad_norm": 0.270418256521225, "learning_rate": 5.448348282004051e-07, "loss": 0.2932, "step": 18098 }, { "epoch": 2.5952107829079436, "grad_norm": 0.2510296106338501, "learning_rate": 5.444561951823402e-07, "loss": 0.276, "step": 18099 }, { "epoch": 2.595354172641239, "grad_norm": 0.27643224596977234, "learning_rate": 5.440776861997616e-07, "loss": 0.2666, "step": 18100 }, { "epoch": 2.595497562374534, "grad_norm": 0.27867385745048523, "learning_rate": 5.436993012632052e-07, "loss": 0.2635, "step": 18101 }, { "epoch": 2.595640952107829, "grad_norm": 0.27363619208335876, "learning_rate": 5.43321040383205e-07, "loss": 0.288, "step": 18102 }, { "epoch": 2.5957843418411244, "grad_norm": 0.26880669593811035, "learning_rate": 5.42942903570291e-07, "loss": 0.2845, "step": 18103 }, { "epoch": 2.5959277315744194, "grad_norm": 0.2776077389717102, "learning_rate": 5.425648908349907e-07, "loss": 0.2864, "step": 18104 }, { "epoch": 2.5960711213077143, "grad_norm": 0.2757014334201813, "learning_rate": 5.421870021878273e-07, "loss": 0.2817, "step": 18105 }, { "epoch": 2.5962145110410093, "grad_norm": 0.267038494348526, "learning_rate": 5.418092376393208e-07, "loss": 0.2733, "step": 18106 }, { "epoch": 2.5963579007743043, "grad_norm": 0.2608933746814728, "learning_rate": 5.414315971999861e-07, "loss": 0.2847, "step": 18107 }, { "epoch": 2.5965012905075997, "grad_norm": 0.27233996987342834, "learning_rate": 5.410540808803377e-07, "loss": 0.2912, "step": 18108 }, { "epoch": 2.5966446802408947, "grad_norm": 0.2734033763408661, "learning_rate": 5.406766886908843e-07, "loss": 0.2705, "step": 18109 }, { "epoch": 2.59678806997419, "grad_norm": 0.255521297454834, "learning_rate": 5.402994206421319e-07, "loss": 0.2962, "step": 18110 }, { "epoch": 2.596931459707485, "grad_norm": 0.26547887921333313, "learning_rate": 5.399222767445838e-07, "loss": 0.2698, "step": 18111 }, { "epoch": 2.59707484944078, "grad_norm": 0.27265995740890503, "learning_rate": 5.395452570087383e-07, "loss": 0.282, "step": 18112 }, { "epoch": 2.597218239174075, "grad_norm": 0.2768533229827881, "learning_rate": 5.391683614450915e-07, "loss": 0.2732, "step": 18113 }, { "epoch": 2.59736162890737, "grad_norm": 0.2727862000465393, "learning_rate": 5.387915900641356e-07, "loss": 0.2935, "step": 18114 }, { "epoch": 2.5975050186406654, "grad_norm": 0.2682781517505646, "learning_rate": 5.384149428763591e-07, "loss": 0.2583, "step": 18115 }, { "epoch": 2.5976484083739604, "grad_norm": 0.2687991261482239, "learning_rate": 5.380384198922472e-07, "loss": 0.2729, "step": 18116 }, { "epoch": 2.5977917981072554, "grad_norm": 0.2550208270549774, "learning_rate": 5.376620211222833e-07, "loss": 0.2835, "step": 18117 }, { "epoch": 2.5979351878405508, "grad_norm": 0.26108232140541077, "learning_rate": 5.372857465769426e-07, "loss": 0.2753, "step": 18118 }, { "epoch": 2.5980785775738457, "grad_norm": 0.2519855797290802, "learning_rate": 5.369095962667026e-07, "loss": 0.2766, "step": 18119 }, { "epoch": 2.5982219673071407, "grad_norm": 0.25361165404319763, "learning_rate": 5.365335702020335e-07, "loss": 0.2841, "step": 18120 }, { "epoch": 2.5983653570404357, "grad_norm": 0.2659986615180969, "learning_rate": 5.361576683934038e-07, "loss": 0.2601, "step": 18121 }, { "epoch": 2.598508746773731, "grad_norm": 0.2719220221042633, "learning_rate": 5.357818908512791e-07, "loss": 0.3007, "step": 18122 }, { "epoch": 2.598652136507026, "grad_norm": 0.29249587655067444, "learning_rate": 5.35406237586118e-07, "loss": 0.2719, "step": 18123 }, { "epoch": 2.598795526240321, "grad_norm": 0.26452767848968506, "learning_rate": 5.350307086083795e-07, "loss": 0.2546, "step": 18124 }, { "epoch": 2.5989389159736165, "grad_norm": 0.26361507177352905, "learning_rate": 5.346553039285179e-07, "loss": 0.2673, "step": 18125 }, { "epoch": 2.5990823057069115, "grad_norm": 0.26454445719718933, "learning_rate": 5.342800235569834e-07, "loss": 0.266, "step": 18126 }, { "epoch": 2.5992256954402064, "grad_norm": 0.2714826464653015, "learning_rate": 5.339048675042235e-07, "loss": 0.2898, "step": 18127 }, { "epoch": 2.5993690851735014, "grad_norm": 0.26862403750419617, "learning_rate": 5.335298357806823e-07, "loss": 0.2766, "step": 18128 }, { "epoch": 2.599512474906797, "grad_norm": 0.2818847894668579, "learning_rate": 5.331549283967991e-07, "loss": 0.2877, "step": 18129 }, { "epoch": 2.599655864640092, "grad_norm": 0.2718586325645447, "learning_rate": 5.327801453630121e-07, "loss": 0.2799, "step": 18130 }, { "epoch": 2.5997992543733868, "grad_norm": 0.26191964745521545, "learning_rate": 5.324054866897538e-07, "loss": 0.2744, "step": 18131 }, { "epoch": 2.599942644106682, "grad_norm": 0.2783224284648895, "learning_rate": 5.320309523874539e-07, "loss": 0.287, "step": 18132 }, { "epoch": 2.600086033839977, "grad_norm": 0.28869009017944336, "learning_rate": 5.316565424665404e-07, "loss": 0.3004, "step": 18133 }, { "epoch": 2.600229423573272, "grad_norm": 0.27290013432502747, "learning_rate": 5.312822569374342e-07, "loss": 0.2859, "step": 18134 }, { "epoch": 2.600372813306567, "grad_norm": 0.28514131903648376, "learning_rate": 5.309080958105561e-07, "loss": 0.2824, "step": 18135 }, { "epoch": 2.600516203039862, "grad_norm": 0.28249499201774597, "learning_rate": 5.305340590963214e-07, "loss": 0.3049, "step": 18136 }, { "epoch": 2.6006595927731575, "grad_norm": 0.2614343464374542, "learning_rate": 5.301601468051438e-07, "loss": 0.2787, "step": 18137 }, { "epoch": 2.6008029825064525, "grad_norm": 0.26840606331825256, "learning_rate": 5.297863589474322e-07, "loss": 0.273, "step": 18138 }, { "epoch": 2.600946372239748, "grad_norm": 0.2784227132797241, "learning_rate": 5.29412695533591e-07, "loss": 0.294, "step": 18139 }, { "epoch": 2.601089761973043, "grad_norm": 0.27711164951324463, "learning_rate": 5.290391565740233e-07, "loss": 0.2801, "step": 18140 }, { "epoch": 2.601233151706338, "grad_norm": 0.2762817442417145, "learning_rate": 5.286657420791281e-07, "loss": 0.2562, "step": 18141 }, { "epoch": 2.601376541439633, "grad_norm": 0.24461819231510162, "learning_rate": 5.282924520593003e-07, "loss": 0.2826, "step": 18142 }, { "epoch": 2.601519931172928, "grad_norm": 0.28012070059776306, "learning_rate": 5.279192865249317e-07, "loss": 0.2869, "step": 18143 }, { "epoch": 2.601663320906223, "grad_norm": 0.2791907787322998, "learning_rate": 5.275462454864111e-07, "loss": 0.2862, "step": 18144 }, { "epoch": 2.601806710639518, "grad_norm": 0.2790689170360565, "learning_rate": 5.271733289541231e-07, "loss": 0.281, "step": 18145 }, { "epoch": 2.601950100372813, "grad_norm": 0.2633396089076996, "learning_rate": 5.268005369384488e-07, "loss": 0.2782, "step": 18146 }, { "epoch": 2.6020934901061086, "grad_norm": 0.26010140776634216, "learning_rate": 5.264278694497666e-07, "loss": 0.2802, "step": 18147 }, { "epoch": 2.6022368798394035, "grad_norm": 0.2638972997665405, "learning_rate": 5.260553264984508e-07, "loss": 0.2894, "step": 18148 }, { "epoch": 2.6023802695726985, "grad_norm": 0.27068817615509033, "learning_rate": 5.256829080948739e-07, "loss": 0.2716, "step": 18149 }, { "epoch": 2.6025236593059935, "grad_norm": 0.26913517713546753, "learning_rate": 5.253106142494007e-07, "loss": 0.2821, "step": 18150 }, { "epoch": 2.602667049039289, "grad_norm": 0.2686922550201416, "learning_rate": 5.249384449723965e-07, "loss": 0.3083, "step": 18151 }, { "epoch": 2.602810438772584, "grad_norm": 0.2815602123737335, "learning_rate": 5.245664002742224e-07, "loss": 0.2852, "step": 18152 }, { "epoch": 2.602953828505879, "grad_norm": 0.24803654849529266, "learning_rate": 5.241944801652349e-07, "loss": 0.2853, "step": 18153 }, { "epoch": 2.6030972182391743, "grad_norm": 0.2880285680294037, "learning_rate": 5.238226846557892e-07, "loss": 0.2789, "step": 18154 }, { "epoch": 2.6032406079724693, "grad_norm": 0.27238228917121887, "learning_rate": 5.234510137562338e-07, "loss": 0.2805, "step": 18155 }, { "epoch": 2.6033839977057642, "grad_norm": 0.24798288941383362, "learning_rate": 5.230794674769152e-07, "loss": 0.2768, "step": 18156 }, { "epoch": 2.603527387439059, "grad_norm": 0.2660260498523712, "learning_rate": 5.227080458281775e-07, "loss": 0.2798, "step": 18157 }, { "epoch": 2.6036707771723546, "grad_norm": 0.26194435358047485, "learning_rate": 5.223367488203607e-07, "loss": 0.2751, "step": 18158 }, { "epoch": 2.6038141669056496, "grad_norm": 0.26926496624946594, "learning_rate": 5.219655764638009e-07, "loss": 0.2743, "step": 18159 }, { "epoch": 2.6039575566389446, "grad_norm": 0.27163562178611755, "learning_rate": 5.215945287688318e-07, "loss": 0.2868, "step": 18160 }, { "epoch": 2.60410094637224, "grad_norm": 0.2639540433883667, "learning_rate": 5.212236057457809e-07, "loss": 0.2833, "step": 18161 }, { "epoch": 2.604244336105535, "grad_norm": 0.2826041579246521, "learning_rate": 5.208528074049751e-07, "loss": 0.2903, "step": 18162 }, { "epoch": 2.60438772583883, "grad_norm": 0.2652445137500763, "learning_rate": 5.204821337567372e-07, "loss": 0.2683, "step": 18163 }, { "epoch": 2.604531115572125, "grad_norm": 0.2697046995162964, "learning_rate": 5.201115848113858e-07, "loss": 0.2785, "step": 18164 }, { "epoch": 2.60467450530542, "grad_norm": 0.27664440870285034, "learning_rate": 5.197411605792363e-07, "loss": 0.2786, "step": 18165 }, { "epoch": 2.6048178950387153, "grad_norm": 0.26572439074516296, "learning_rate": 5.193708610706011e-07, "loss": 0.279, "step": 18166 }, { "epoch": 2.6049612847720103, "grad_norm": 0.27512452006340027, "learning_rate": 5.190006862957892e-07, "loss": 0.2912, "step": 18167 }, { "epoch": 2.6051046745053057, "grad_norm": 0.2828461229801178, "learning_rate": 5.186306362651044e-07, "loss": 0.2809, "step": 18168 }, { "epoch": 2.6052480642386007, "grad_norm": 0.2615675926208496, "learning_rate": 5.182607109888493e-07, "loss": 0.2775, "step": 18169 }, { "epoch": 2.6053914539718956, "grad_norm": 0.26350468397140503, "learning_rate": 5.178909104773216e-07, "loss": 0.2681, "step": 18170 }, { "epoch": 2.6055348437051906, "grad_norm": 0.2652977406978607, "learning_rate": 5.175212347408171e-07, "loss": 0.2923, "step": 18171 }, { "epoch": 2.6056782334384856, "grad_norm": 0.2573118209838867, "learning_rate": 5.171516837896257e-07, "loss": 0.2943, "step": 18172 }, { "epoch": 2.605821623171781, "grad_norm": 0.28183820843696594, "learning_rate": 5.167822576340347e-07, "loss": 0.2704, "step": 18173 }, { "epoch": 2.605965012905076, "grad_norm": 0.2749970555305481, "learning_rate": 5.164129562843295e-07, "loss": 0.2706, "step": 18174 }, { "epoch": 2.606108402638371, "grad_norm": 0.28332746028900146, "learning_rate": 5.160437797507912e-07, "loss": 0.2935, "step": 18175 }, { "epoch": 2.6062517923716664, "grad_norm": 0.25596362352371216, "learning_rate": 5.156747280436963e-07, "loss": 0.2825, "step": 18176 }, { "epoch": 2.6063951821049613, "grad_norm": 0.26158973574638367, "learning_rate": 5.153058011733187e-07, "loss": 0.2688, "step": 18177 }, { "epoch": 2.6065385718382563, "grad_norm": 0.2693232297897339, "learning_rate": 5.149369991499286e-07, "loss": 0.2764, "step": 18178 }, { "epoch": 2.6066819615715513, "grad_norm": 0.26735919713974, "learning_rate": 5.145683219837927e-07, "loss": 0.2996, "step": 18179 }, { "epoch": 2.6068253513048467, "grad_norm": 0.26941004395484924, "learning_rate": 5.141997696851752e-07, "loss": 0.2826, "step": 18180 }, { "epoch": 2.6069687410381417, "grad_norm": 0.27331218123435974, "learning_rate": 5.138313422643359e-07, "loss": 0.2736, "step": 18181 }, { "epoch": 2.6071121307714367, "grad_norm": 0.2774023115634918, "learning_rate": 5.134630397315305e-07, "loss": 0.2672, "step": 18182 }, { "epoch": 2.607255520504732, "grad_norm": 0.27301982045173645, "learning_rate": 5.130948620970128e-07, "loss": 0.2947, "step": 18183 }, { "epoch": 2.607398910238027, "grad_norm": 0.2740527391433716, "learning_rate": 5.127268093710319e-07, "loss": 0.2725, "step": 18184 }, { "epoch": 2.607542299971322, "grad_norm": 0.2704005241394043, "learning_rate": 5.123588815638341e-07, "loss": 0.2736, "step": 18185 }, { "epoch": 2.607685689704617, "grad_norm": 0.30318254232406616, "learning_rate": 5.119910786856614e-07, "loss": 0.2805, "step": 18186 }, { "epoch": 2.607829079437912, "grad_norm": 0.2642338275909424, "learning_rate": 5.116234007467547e-07, "loss": 0.2944, "step": 18187 }, { "epoch": 2.6079724691712074, "grad_norm": 0.2342650443315506, "learning_rate": 5.112558477573465e-07, "loss": 0.2852, "step": 18188 }, { "epoch": 2.6081158589045024, "grad_norm": 0.25783994793891907, "learning_rate": 5.108884197276709e-07, "loss": 0.2682, "step": 18189 }, { "epoch": 2.608259248637798, "grad_norm": 0.26960989832878113, "learning_rate": 5.105211166679557e-07, "loss": 0.2623, "step": 18190 }, { "epoch": 2.6084026383710928, "grad_norm": 0.2743004560470581, "learning_rate": 5.101539385884269e-07, "loss": 0.3107, "step": 18191 }, { "epoch": 2.6085460281043877, "grad_norm": 0.2683221697807312, "learning_rate": 5.097868854993071e-07, "loss": 0.2639, "step": 18192 }, { "epoch": 2.6086894178376827, "grad_norm": 0.26698586344718933, "learning_rate": 5.094199574108116e-07, "loss": 0.277, "step": 18193 }, { "epoch": 2.6088328075709777, "grad_norm": 0.2593124210834503, "learning_rate": 5.090531543331567e-07, "loss": 0.2755, "step": 18194 }, { "epoch": 2.608976197304273, "grad_norm": 0.273966521024704, "learning_rate": 5.086864762765542e-07, "loss": 0.2731, "step": 18195 }, { "epoch": 2.609119587037568, "grad_norm": 0.29496175050735474, "learning_rate": 5.083199232512104e-07, "loss": 0.2766, "step": 18196 }, { "epoch": 2.609262976770863, "grad_norm": 0.2579447031021118, "learning_rate": 5.079534952673309e-07, "loss": 0.2895, "step": 18197 }, { "epoch": 2.6094063665041585, "grad_norm": 0.26340892910957336, "learning_rate": 5.075871923351172e-07, "loss": 0.2876, "step": 18198 }, { "epoch": 2.6095497562374534, "grad_norm": 0.27269473671913147, "learning_rate": 5.072210144647643e-07, "loss": 0.2662, "step": 18199 }, { "epoch": 2.6096931459707484, "grad_norm": 0.2574236989021301, "learning_rate": 5.068549616664658e-07, "loss": 0.2811, "step": 18200 }, { "epoch": 2.6098365357040434, "grad_norm": 0.26336392760276794, "learning_rate": 5.064890339504153e-07, "loss": 0.2741, "step": 18201 }, { "epoch": 2.609979925437339, "grad_norm": 0.2588596045970917, "learning_rate": 5.061232313267972e-07, "loss": 0.2738, "step": 18202 }, { "epoch": 2.610123315170634, "grad_norm": 0.26328322291374207, "learning_rate": 5.057575538057963e-07, "loss": 0.2996, "step": 18203 }, { "epoch": 2.6102667049039288, "grad_norm": 0.2612912952899933, "learning_rate": 5.053920013975904e-07, "loss": 0.2541, "step": 18204 }, { "epoch": 2.610410094637224, "grad_norm": 0.2701142430305481, "learning_rate": 5.050265741123577e-07, "loss": 0.2736, "step": 18205 }, { "epoch": 2.610553484370519, "grad_norm": 0.2608804702758789, "learning_rate": 5.046612719602706e-07, "loss": 0.2677, "step": 18206 }, { "epoch": 2.610696874103814, "grad_norm": 0.2752951681613922, "learning_rate": 5.042960949514986e-07, "loss": 0.276, "step": 18207 }, { "epoch": 2.610840263837109, "grad_norm": 0.2821773588657379, "learning_rate": 5.039310430962086e-07, "loss": 0.2633, "step": 18208 }, { "epoch": 2.6109836535704045, "grad_norm": 0.2792504131793976, "learning_rate": 5.035661164045608e-07, "loss": 0.2818, "step": 18209 }, { "epoch": 2.6111270433036995, "grad_norm": 0.2865445017814636, "learning_rate": 5.03201314886716e-07, "loss": 0.2883, "step": 18210 }, { "epoch": 2.6112704330369945, "grad_norm": 0.28695401549339294, "learning_rate": 5.028366385528289e-07, "loss": 0.2733, "step": 18211 }, { "epoch": 2.61141382277029, "grad_norm": 0.273495614528656, "learning_rate": 5.024720874130517e-07, "loss": 0.2697, "step": 18212 }, { "epoch": 2.611557212503585, "grad_norm": 0.2783278226852417, "learning_rate": 5.021076614775333e-07, "loss": 0.2838, "step": 18213 }, { "epoch": 2.61170060223688, "grad_norm": 0.2697804868221283, "learning_rate": 5.017433607564199e-07, "loss": 0.2757, "step": 18214 }, { "epoch": 2.611843991970175, "grad_norm": 0.2727268934249878, "learning_rate": 5.013791852598504e-07, "loss": 0.2792, "step": 18215 }, { "epoch": 2.61198738170347, "grad_norm": 0.2650168240070343, "learning_rate": 5.010151349979642e-07, "loss": 0.2693, "step": 18216 }, { "epoch": 2.612130771436765, "grad_norm": 0.26236623525619507, "learning_rate": 5.006512099808958e-07, "loss": 0.2694, "step": 18217 }, { "epoch": 2.61227416117006, "grad_norm": 0.2750675678253174, "learning_rate": 5.002874102187766e-07, "loss": 0.2824, "step": 18218 }, { "epoch": 2.6124175509033556, "grad_norm": 0.24779652059078217, "learning_rate": 4.999237357217335e-07, "loss": 0.2753, "step": 18219 }, { "epoch": 2.6125609406366506, "grad_norm": 0.2802276015281677, "learning_rate": 4.995601864998917e-07, "loss": 0.2844, "step": 18220 }, { "epoch": 2.6127043303699455, "grad_norm": 0.2709026634693146, "learning_rate": 4.991967625633715e-07, "loss": 0.2736, "step": 18221 }, { "epoch": 2.6128477201032405, "grad_norm": 0.27947747707366943, "learning_rate": 4.988334639222897e-07, "loss": 0.2714, "step": 18222 }, { "epoch": 2.6129911098365355, "grad_norm": 0.2671414613723755, "learning_rate": 4.9847029058676e-07, "loss": 0.2695, "step": 18223 }, { "epoch": 2.613134499569831, "grad_norm": 0.2735843360424042, "learning_rate": 4.981072425668932e-07, "loss": 0.3003, "step": 18224 }, { "epoch": 2.613277889303126, "grad_norm": 0.24907588958740234, "learning_rate": 4.97744319872796e-07, "loss": 0.2829, "step": 18225 }, { "epoch": 2.613421279036421, "grad_norm": 0.2753910720348358, "learning_rate": 4.973815225145706e-07, "loss": 0.2789, "step": 18226 }, { "epoch": 2.6135646687697163, "grad_norm": 0.2594339847564697, "learning_rate": 4.970188505023172e-07, "loss": 0.281, "step": 18227 }, { "epoch": 2.6137080585030112, "grad_norm": 0.27381330728530884, "learning_rate": 4.96656303846132e-07, "loss": 0.2816, "step": 18228 }, { "epoch": 2.613851448236306, "grad_norm": 0.2711069583892822, "learning_rate": 4.962938825561076e-07, "loss": 0.2903, "step": 18229 }, { "epoch": 2.613994837969601, "grad_norm": 0.2616758346557617, "learning_rate": 4.959315866423347e-07, "loss": 0.2942, "step": 18230 }, { "epoch": 2.6141382277028966, "grad_norm": 0.268319696187973, "learning_rate": 4.955694161148971e-07, "loss": 0.2901, "step": 18231 }, { "epoch": 2.6142816174361916, "grad_norm": 0.2638682425022125, "learning_rate": 4.952073709838779e-07, "loss": 0.2797, "step": 18232 }, { "epoch": 2.6144250071694866, "grad_norm": 0.29511430859565735, "learning_rate": 4.948454512593554e-07, "loss": 0.262, "step": 18233 }, { "epoch": 2.614568396902782, "grad_norm": 0.26266488432884216, "learning_rate": 4.944836569514055e-07, "loss": 0.2791, "step": 18234 }, { "epoch": 2.614711786636077, "grad_norm": 0.24775756895542145, "learning_rate": 4.941219880700998e-07, "loss": 0.281, "step": 18235 }, { "epoch": 2.614855176369372, "grad_norm": 0.26434069871902466, "learning_rate": 4.937604446255067e-07, "loss": 0.2816, "step": 18236 }, { "epoch": 2.614998566102667, "grad_norm": 0.26799625158309937, "learning_rate": 4.933990266276911e-07, "loss": 0.2547, "step": 18237 }, { "epoch": 2.615141955835962, "grad_norm": 0.2633011043071747, "learning_rate": 4.930377340867137e-07, "loss": 0.2897, "step": 18238 }, { "epoch": 2.6152853455692573, "grad_norm": 0.2761378884315491, "learning_rate": 4.926765670126333e-07, "loss": 0.2976, "step": 18239 }, { "epoch": 2.6154287353025523, "grad_norm": 0.2662227153778076, "learning_rate": 4.923155254155032e-07, "loss": 0.2772, "step": 18240 }, { "epoch": 2.6155721250358477, "grad_norm": 0.27406060695648193, "learning_rate": 4.919546093053762e-07, "loss": 0.2913, "step": 18241 }, { "epoch": 2.6157155147691427, "grad_norm": 0.2658739984035492, "learning_rate": 4.91593818692297e-07, "loss": 0.2738, "step": 18242 }, { "epoch": 2.6158589045024376, "grad_norm": 0.2852099537849426, "learning_rate": 4.912331535863102e-07, "loss": 0.2591, "step": 18243 }, { "epoch": 2.6160022942357326, "grad_norm": 0.26282230019569397, "learning_rate": 4.908726139974573e-07, "loss": 0.2966, "step": 18244 }, { "epoch": 2.6161456839690276, "grad_norm": 0.2688959240913391, "learning_rate": 4.905121999357743e-07, "loss": 0.28, "step": 18245 }, { "epoch": 2.616289073702323, "grad_norm": 0.26806095242500305, "learning_rate": 4.901519114112957e-07, "loss": 0.2623, "step": 18246 }, { "epoch": 2.616432463435618, "grad_norm": 0.26745447516441345, "learning_rate": 4.897917484340492e-07, "loss": 0.2958, "step": 18247 }, { "epoch": 2.616575853168913, "grad_norm": 0.25440582633018494, "learning_rate": 4.894317110140628e-07, "loss": 0.2832, "step": 18248 }, { "epoch": 2.6167192429022084, "grad_norm": 0.2833434045314789, "learning_rate": 4.890717991613591e-07, "loss": 0.2775, "step": 18249 }, { "epoch": 2.6168626326355033, "grad_norm": 0.26835060119628906, "learning_rate": 4.887120128859568e-07, "loss": 0.2736, "step": 18250 }, { "epoch": 2.6170060223687983, "grad_norm": 0.25460371375083923, "learning_rate": 4.883523521978728e-07, "loss": 0.2769, "step": 18251 }, { "epoch": 2.6171494121020933, "grad_norm": 0.27985918521881104, "learning_rate": 4.879928171071203e-07, "loss": 0.2685, "step": 18252 }, { "epoch": 2.6172928018353887, "grad_norm": 0.2814078629016876, "learning_rate": 4.87633407623706e-07, "loss": 0.2906, "step": 18253 }, { "epoch": 2.6174361915686837, "grad_norm": 0.2647518813610077, "learning_rate": 4.872741237576362e-07, "loss": 0.2741, "step": 18254 }, { "epoch": 2.6175795813019787, "grad_norm": 0.27296146750450134, "learning_rate": 4.869149655189126e-07, "loss": 0.2895, "step": 18255 }, { "epoch": 2.617722971035274, "grad_norm": 0.2884150743484497, "learning_rate": 4.865559329175329e-07, "loss": 0.2749, "step": 18256 }, { "epoch": 2.617866360768569, "grad_norm": 0.26722168922424316, "learning_rate": 4.861970259634957e-07, "loss": 0.283, "step": 18257 }, { "epoch": 2.618009750501864, "grad_norm": 0.25791484117507935, "learning_rate": 4.85838244666788e-07, "loss": 0.2966, "step": 18258 }, { "epoch": 2.618153140235159, "grad_norm": 0.2736978530883789, "learning_rate": 4.854795890374003e-07, "loss": 0.282, "step": 18259 }, { "epoch": 2.6182965299684544, "grad_norm": 0.2724796235561371, "learning_rate": 4.851210590853156e-07, "loss": 0.2711, "step": 18260 }, { "epoch": 2.6184399197017494, "grad_norm": 0.2951454520225525, "learning_rate": 4.847626548205154e-07, "loss": 0.2686, "step": 18261 }, { "epoch": 2.6185833094350444, "grad_norm": 0.2612159550189972, "learning_rate": 4.844043762529787e-07, "loss": 0.2885, "step": 18262 }, { "epoch": 2.6187266991683398, "grad_norm": 0.29148927330970764, "learning_rate": 4.840462233926762e-07, "loss": 0.275, "step": 18263 }, { "epoch": 2.6188700889016348, "grad_norm": 0.287575364112854, "learning_rate": 4.836881962495805e-07, "loss": 0.287, "step": 18264 }, { "epoch": 2.6190134786349297, "grad_norm": 0.2707708775997162, "learning_rate": 4.833302948336576e-07, "loss": 0.2654, "step": 18265 }, { "epoch": 2.6191568683682247, "grad_norm": 0.278751939535141, "learning_rate": 4.829725191548717e-07, "loss": 0.2707, "step": 18266 }, { "epoch": 2.6193002581015197, "grad_norm": 0.25749534368515015, "learning_rate": 4.826148692231825e-07, "loss": 0.2772, "step": 18267 }, { "epoch": 2.619443647834815, "grad_norm": 0.2883718013763428, "learning_rate": 4.822573450485468e-07, "loss": 0.2885, "step": 18268 }, { "epoch": 2.61958703756811, "grad_norm": 0.25736233592033386, "learning_rate": 4.818999466409163e-07, "loss": 0.2907, "step": 18269 }, { "epoch": 2.6197304273014055, "grad_norm": 0.271950364112854, "learning_rate": 4.815426740102413e-07, "loss": 0.2961, "step": 18270 }, { "epoch": 2.6198738170347005, "grad_norm": 0.29341354966163635, "learning_rate": 4.811855271664672e-07, "loss": 0.2907, "step": 18271 }, { "epoch": 2.6200172067679954, "grad_norm": 0.27595046162605286, "learning_rate": 4.808285061195373e-07, "loss": 0.2921, "step": 18272 }, { "epoch": 2.6201605965012904, "grad_norm": 0.28285351395606995, "learning_rate": 4.8047161087939e-07, "loss": 0.2831, "step": 18273 }, { "epoch": 2.6203039862345854, "grad_norm": 0.2643803656101227, "learning_rate": 4.801148414559603e-07, "loss": 0.2781, "step": 18274 }, { "epoch": 2.620447375967881, "grad_norm": 0.2628922760486603, "learning_rate": 4.797581978591803e-07, "loss": 0.2959, "step": 18275 }, { "epoch": 2.6205907657011758, "grad_norm": 0.26455825567245483, "learning_rate": 4.794016800989793e-07, "loss": 0.2746, "step": 18276 }, { "epoch": 2.6207341554344707, "grad_norm": 0.251784086227417, "learning_rate": 4.790452881852814e-07, "loss": 0.269, "step": 18277 }, { "epoch": 2.620877545167766, "grad_norm": 0.28621625900268555, "learning_rate": 4.786890221280077e-07, "loss": 0.294, "step": 18278 }, { "epoch": 2.621020934901061, "grad_norm": 0.25134894251823425, "learning_rate": 4.78332881937078e-07, "loss": 0.2931, "step": 18279 }, { "epoch": 2.621164324634356, "grad_norm": 0.291103720664978, "learning_rate": 4.779768676224039e-07, "loss": 0.2984, "step": 18280 }, { "epoch": 2.621307714367651, "grad_norm": 0.2652286887168884, "learning_rate": 4.77620979193898e-07, "loss": 0.2609, "step": 18281 }, { "epoch": 2.6214511041009465, "grad_norm": 0.28581032156944275, "learning_rate": 4.772652166614672e-07, "loss": 0.2715, "step": 18282 }, { "epoch": 2.6215944938342415, "grad_norm": 0.28034406900405884, "learning_rate": 4.769095800350155e-07, "loss": 0.2884, "step": 18283 }, { "epoch": 2.6217378835675365, "grad_norm": 0.27438417077064514, "learning_rate": 4.765540693244436e-07, "loss": 0.2998, "step": 18284 }, { "epoch": 2.621881273300832, "grad_norm": 0.27269965410232544, "learning_rate": 4.761986845396477e-07, "loss": 0.2691, "step": 18285 }, { "epoch": 2.622024663034127, "grad_norm": 0.2785588800907135, "learning_rate": 4.7584342569052147e-07, "loss": 0.2741, "step": 18286 }, { "epoch": 2.622168052767422, "grad_norm": 0.27404430508613586, "learning_rate": 4.7548829278695473e-07, "loss": 0.2878, "step": 18287 }, { "epoch": 2.622311442500717, "grad_norm": 0.2781665325164795, "learning_rate": 4.751332858388341e-07, "loss": 0.2739, "step": 18288 }, { "epoch": 2.6224548322340118, "grad_norm": 0.26175788044929504, "learning_rate": 4.747784048560422e-07, "loss": 0.2925, "step": 18289 }, { "epoch": 2.622598221967307, "grad_norm": 0.24833688139915466, "learning_rate": 4.7442364984845833e-07, "loss": 0.265, "step": 18290 }, { "epoch": 2.622741611700602, "grad_norm": 0.27003535628318787, "learning_rate": 4.740690208259585e-07, "loss": 0.2808, "step": 18291 }, { "epoch": 2.6228850014338976, "grad_norm": 0.23733432590961456, "learning_rate": 4.7371451779841484e-07, "loss": 0.2926, "step": 18292 }, { "epoch": 2.6230283911671926, "grad_norm": 0.2724244296550751, "learning_rate": 4.73360140775696e-07, "loss": 0.2841, "step": 18293 }, { "epoch": 2.6231717809004875, "grad_norm": 0.27722904086112976, "learning_rate": 4.7300588976766817e-07, "loss": 0.2748, "step": 18294 }, { "epoch": 2.6233151706337825, "grad_norm": 0.25937265157699585, "learning_rate": 4.726517647841933e-07, "loss": 0.2717, "step": 18295 }, { "epoch": 2.6234585603670775, "grad_norm": 0.2858022153377533, "learning_rate": 4.7229776583512743e-07, "loss": 0.2549, "step": 18296 }, { "epoch": 2.623601950100373, "grad_norm": 0.2668007016181946, "learning_rate": 4.7194389293032716e-07, "loss": 0.265, "step": 18297 }, { "epoch": 2.623745339833668, "grad_norm": 0.25064972043037415, "learning_rate": 4.71590146079644e-07, "loss": 0.2801, "step": 18298 }, { "epoch": 2.623888729566963, "grad_norm": 0.29287242889404297, "learning_rate": 4.712365252929246e-07, "loss": 0.2618, "step": 18299 }, { "epoch": 2.6240321193002583, "grad_norm": 0.28276628255844116, "learning_rate": 4.7088303058001494e-07, "loss": 0.2591, "step": 18300 }, { "epoch": 2.6241755090335532, "grad_norm": 0.26873621344566345, "learning_rate": 4.705296619507538e-07, "loss": 0.2882, "step": 18301 }, { "epoch": 2.624318898766848, "grad_norm": 0.26716554164886475, "learning_rate": 4.7017641941497883e-07, "loss": 0.2766, "step": 18302 }, { "epoch": 2.624462288500143, "grad_norm": 0.24995949864387512, "learning_rate": 4.698233029825244e-07, "loss": 0.2852, "step": 18303 }, { "epoch": 2.6246056782334386, "grad_norm": 0.2596213221549988, "learning_rate": 4.694703126632205e-07, "loss": 0.2815, "step": 18304 }, { "epoch": 2.6247490679667336, "grad_norm": 0.2813998758792877, "learning_rate": 4.691174484668937e-07, "loss": 0.2809, "step": 18305 }, { "epoch": 2.6248924577000285, "grad_norm": 0.2898346781730652, "learning_rate": 4.687647104033682e-07, "loss": 0.2822, "step": 18306 }, { "epoch": 2.625035847433324, "grad_norm": 0.25581762194633484, "learning_rate": 4.684120984824625e-07, "loss": 0.274, "step": 18307 }, { "epoch": 2.625179237166619, "grad_norm": 0.28252217173576355, "learning_rate": 4.680596127139925e-07, "loss": 0.27, "step": 18308 }, { "epoch": 2.625322626899914, "grad_norm": 0.2709548771381378, "learning_rate": 4.6770725310777143e-07, "loss": 0.3036, "step": 18309 }, { "epoch": 2.625466016633209, "grad_norm": 0.27145451307296753, "learning_rate": 4.6735501967360873e-07, "loss": 0.2842, "step": 18310 }, { "epoch": 2.6256094063665043, "grad_norm": 0.2709968090057373, "learning_rate": 4.670029124213099e-07, "loss": 0.2673, "step": 18311 }, { "epoch": 2.6257527960997993, "grad_norm": 0.26635560393333435, "learning_rate": 4.6665093136067653e-07, "loss": 0.2812, "step": 18312 }, { "epoch": 2.6258961858330943, "grad_norm": 0.2641478180885315, "learning_rate": 4.6629907650150796e-07, "loss": 0.2664, "step": 18313 }, { "epoch": 2.6260395755663897, "grad_norm": 0.2735673189163208, "learning_rate": 4.6594734785359864e-07, "loss": 0.2816, "step": 18314 }, { "epoch": 2.6261829652996846, "grad_norm": 0.2490343153476715, "learning_rate": 4.6559574542674066e-07, "loss": 0.2973, "step": 18315 }, { "epoch": 2.6263263550329796, "grad_norm": 0.25960609316825867, "learning_rate": 4.6524426923072296e-07, "loss": 0.2868, "step": 18316 }, { "epoch": 2.6264697447662746, "grad_norm": 0.2730712592601776, "learning_rate": 4.648929192753282e-07, "loss": 0.2767, "step": 18317 }, { "epoch": 2.6266131344995696, "grad_norm": 0.2878624498844147, "learning_rate": 4.64541695570338e-07, "loss": 0.2747, "step": 18318 }, { "epoch": 2.626756524232865, "grad_norm": 0.27495595812797546, "learning_rate": 4.6419059812553015e-07, "loss": 0.2897, "step": 18319 }, { "epoch": 2.62689991396616, "grad_norm": 0.2729871869087219, "learning_rate": 4.638396269506784e-07, "loss": 0.2781, "step": 18320 }, { "epoch": 2.6270433036994554, "grad_norm": 0.27399492263793945, "learning_rate": 4.6348878205555393e-07, "loss": 0.2856, "step": 18321 }, { "epoch": 2.6271866934327504, "grad_norm": 0.2637602686882019, "learning_rate": 4.631380634499244e-07, "loss": 0.28, "step": 18322 }, { "epoch": 2.6273300831660453, "grad_norm": 0.2507646083831787, "learning_rate": 4.627874711435509e-07, "loss": 0.2766, "step": 18323 }, { "epoch": 2.6274734728993403, "grad_norm": 0.2740817666053772, "learning_rate": 4.624370051461946e-07, "loss": 0.2937, "step": 18324 }, { "epoch": 2.6276168626326353, "grad_norm": 0.2524893879890442, "learning_rate": 4.6208666546761206e-07, "loss": 0.2799, "step": 18325 }, { "epoch": 2.6277602523659307, "grad_norm": 0.26847270131111145, "learning_rate": 4.617364521175566e-07, "loss": 0.2758, "step": 18326 }, { "epoch": 2.6279036420992257, "grad_norm": 0.27537813782691956, "learning_rate": 4.613863651057765e-07, "loss": 0.2845, "step": 18327 }, { "epoch": 2.6280470318325206, "grad_norm": 0.27048128843307495, "learning_rate": 4.6103640444201904e-07, "loss": 0.2705, "step": 18328 }, { "epoch": 2.628190421565816, "grad_norm": 0.27647310495376587, "learning_rate": 4.6068657013602526e-07, "loss": 0.2941, "step": 18329 }, { "epoch": 2.628333811299111, "grad_norm": 0.2924833595752716, "learning_rate": 4.603368621975346e-07, "loss": 0.2812, "step": 18330 }, { "epoch": 2.628477201032406, "grad_norm": 0.27560582756996155, "learning_rate": 4.5998728063628264e-07, "loss": 0.294, "step": 18331 }, { "epoch": 2.628620590765701, "grad_norm": 0.2738381624221802, "learning_rate": 4.596378254620004e-07, "loss": 0.304, "step": 18332 }, { "epoch": 2.6287639804989964, "grad_norm": 0.27427637577056885, "learning_rate": 4.5928849668441853e-07, "loss": 0.3041, "step": 18333 }, { "epoch": 2.6289073702322914, "grad_norm": 0.2652705907821655, "learning_rate": 4.5893929431325813e-07, "loss": 0.3007, "step": 18334 }, { "epoch": 2.6290507599655863, "grad_norm": 0.27206331491470337, "learning_rate": 4.5859021835824303e-07, "loss": 0.2684, "step": 18335 }, { "epoch": 2.6291941496988818, "grad_norm": 0.24610695242881775, "learning_rate": 4.5824126882908993e-07, "loss": 0.2981, "step": 18336 }, { "epoch": 2.6293375394321767, "grad_norm": 0.2751685380935669, "learning_rate": 4.578924457355133e-07, "loss": 0.2908, "step": 18337 }, { "epoch": 2.6294809291654717, "grad_norm": 0.25205135345458984, "learning_rate": 4.5754374908722534e-07, "loss": 0.2877, "step": 18338 }, { "epoch": 2.6296243188987667, "grad_norm": 0.2695328891277313, "learning_rate": 4.571951788939305e-07, "loss": 0.2835, "step": 18339 }, { "epoch": 2.629767708632062, "grad_norm": 0.25608518719673157, "learning_rate": 4.568467351653333e-07, "loss": 0.2543, "step": 18340 }, { "epoch": 2.629911098365357, "grad_norm": 0.2779909372329712, "learning_rate": 4.5649841791113483e-07, "loss": 0.2759, "step": 18341 }, { "epoch": 2.630054488098652, "grad_norm": 0.29011017084121704, "learning_rate": 4.561502271410312e-07, "loss": 0.28, "step": 18342 }, { "epoch": 2.6301978778319475, "grad_norm": 0.27974748611450195, "learning_rate": 4.5580216286471635e-07, "loss": 0.2729, "step": 18343 }, { "epoch": 2.6303412675652424, "grad_norm": 0.2799401879310608, "learning_rate": 4.554542250918781e-07, "loss": 0.2689, "step": 18344 }, { "epoch": 2.6304846572985374, "grad_norm": 0.2500668168067932, "learning_rate": 4.551064138322026e-07, "loss": 0.2772, "step": 18345 }, { "epoch": 2.6306280470318324, "grad_norm": 0.2729676067829132, "learning_rate": 4.5475872909537366e-07, "loss": 0.2712, "step": 18346 }, { "epoch": 2.6307714367651274, "grad_norm": 0.26969704031944275, "learning_rate": 4.5441117089107036e-07, "loss": 0.2741, "step": 18347 }, { "epoch": 2.630914826498423, "grad_norm": 0.24047602713108063, "learning_rate": 4.540637392289671e-07, "loss": 0.2919, "step": 18348 }, { "epoch": 2.6310582162317178, "grad_norm": 0.26755866408348083, "learning_rate": 4.5371643411873776e-07, "loss": 0.2851, "step": 18349 }, { "epoch": 2.631201605965013, "grad_norm": 0.26526308059692383, "learning_rate": 4.533692555700481e-07, "loss": 0.2739, "step": 18350 }, { "epoch": 2.631344995698308, "grad_norm": 0.2730149030685425, "learning_rate": 4.530222035925647e-07, "loss": 0.2715, "step": 18351 }, { "epoch": 2.631488385431603, "grad_norm": 0.28531694412231445, "learning_rate": 4.5267527819594813e-07, "loss": 0.3117, "step": 18352 }, { "epoch": 2.631631775164898, "grad_norm": 0.26594895124435425, "learning_rate": 4.5232847938985746e-07, "loss": 0.2788, "step": 18353 }, { "epoch": 2.631775164898193, "grad_norm": 0.2863040566444397, "learning_rate": 4.519818071839466e-07, "loss": 0.2926, "step": 18354 }, { "epoch": 2.6319185546314885, "grad_norm": 0.2768495976924896, "learning_rate": 4.5163526158786495e-07, "loss": 0.2938, "step": 18355 }, { "epoch": 2.6320619443647835, "grad_norm": 0.24176055192947388, "learning_rate": 4.512888426112616e-07, "loss": 0.2669, "step": 18356 }, { "epoch": 2.6322053340980784, "grad_norm": 0.259031742811203, "learning_rate": 4.509425502637793e-07, "loss": 0.2814, "step": 18357 }, { "epoch": 2.632348723831374, "grad_norm": 0.2875799536705017, "learning_rate": 4.505963845550587e-07, "loss": 0.285, "step": 18358 }, { "epoch": 2.632492113564669, "grad_norm": 0.2680011987686157, "learning_rate": 4.5025034549473657e-07, "loss": 0.2829, "step": 18359 }, { "epoch": 2.632635503297964, "grad_norm": 0.27754101157188416, "learning_rate": 4.499044330924468e-07, "loss": 0.2737, "step": 18360 }, { "epoch": 2.632778893031259, "grad_norm": 0.25821778178215027, "learning_rate": 4.495586473578173e-07, "loss": 0.2722, "step": 18361 }, { "epoch": 2.632922282764554, "grad_norm": 0.25720685720443726, "learning_rate": 4.492129883004748e-07, "loss": 0.2774, "step": 18362 }, { "epoch": 2.633065672497849, "grad_norm": 0.2788395881652832, "learning_rate": 4.4886745593004265e-07, "loss": 0.2767, "step": 18363 }, { "epoch": 2.633209062231144, "grad_norm": 0.2647020220756531, "learning_rate": 4.485220502561394e-07, "loss": 0.2832, "step": 18364 }, { "epoch": 2.6333524519644396, "grad_norm": 0.2689802348613739, "learning_rate": 4.481767712883811e-07, "loss": 0.2938, "step": 18365 }, { "epoch": 2.6334958416977345, "grad_norm": 0.2628062069416046, "learning_rate": 4.478316190363796e-07, "loss": 0.2928, "step": 18366 }, { "epoch": 2.6336392314310295, "grad_norm": 0.26898229122161865, "learning_rate": 4.4748659350974275e-07, "loss": 0.2846, "step": 18367 }, { "epoch": 2.6337826211643245, "grad_norm": 0.2705710530281067, "learning_rate": 4.4714169471807623e-07, "loss": 0.2782, "step": 18368 }, { "epoch": 2.6339260108976195, "grad_norm": 0.27384644746780396, "learning_rate": 4.467969226709817e-07, "loss": 0.2843, "step": 18369 }, { "epoch": 2.634069400630915, "grad_norm": 0.26765429973602295, "learning_rate": 4.464522773780572e-07, "loss": 0.2756, "step": 18370 }, { "epoch": 2.63421279036421, "grad_norm": 0.2838367521762848, "learning_rate": 4.4610775884889603e-07, "loss": 0.2913, "step": 18371 }, { "epoch": 2.6343561800975053, "grad_norm": 0.26993316411972046, "learning_rate": 4.457633670930894e-07, "loss": 0.2806, "step": 18372 }, { "epoch": 2.6344995698308002, "grad_norm": 0.25110137462615967, "learning_rate": 4.454191021202248e-07, "loss": 0.2655, "step": 18373 }, { "epoch": 2.634642959564095, "grad_norm": 0.2907731831073761, "learning_rate": 4.450749639398866e-07, "loss": 0.2748, "step": 18374 }, { "epoch": 2.63478634929739, "grad_norm": 0.27157434821128845, "learning_rate": 4.447309525616539e-07, "loss": 0.3006, "step": 18375 }, { "epoch": 2.634929739030685, "grad_norm": 0.2711907625198364, "learning_rate": 4.4438706799510566e-07, "loss": 0.2864, "step": 18376 }, { "epoch": 2.6350731287639806, "grad_norm": 0.2639229893684387, "learning_rate": 4.440433102498126e-07, "loss": 0.2832, "step": 18377 }, { "epoch": 2.6352165184972756, "grad_norm": 0.263276606798172, "learning_rate": 4.436996793353454e-07, "loss": 0.3013, "step": 18378 }, { "epoch": 2.6353599082305705, "grad_norm": 0.27524229884147644, "learning_rate": 4.433561752612697e-07, "loss": 0.285, "step": 18379 }, { "epoch": 2.635503297963866, "grad_norm": 0.2685283422470093, "learning_rate": 4.430127980371496e-07, "loss": 0.2774, "step": 18380 }, { "epoch": 2.635646687697161, "grad_norm": 0.258926659822464, "learning_rate": 4.4266954767254234e-07, "loss": 0.2783, "step": 18381 }, { "epoch": 2.635790077430456, "grad_norm": 0.2492731809616089, "learning_rate": 4.4232642417700476e-07, "loss": 0.2797, "step": 18382 }, { "epoch": 2.635933467163751, "grad_norm": 0.2871544361114502, "learning_rate": 4.419834275600887e-07, "loss": 0.2901, "step": 18383 }, { "epoch": 2.6360768568970463, "grad_norm": 0.27967211604118347, "learning_rate": 4.4164055783134265e-07, "loss": 0.2926, "step": 18384 }, { "epoch": 2.6362202466303413, "grad_norm": 0.2605155408382416, "learning_rate": 4.4129781500031057e-07, "loss": 0.2749, "step": 18385 }, { "epoch": 2.6363636363636362, "grad_norm": 0.260553240776062, "learning_rate": 4.409551990765354e-07, "loss": 0.2677, "step": 18386 }, { "epoch": 2.6365070260969317, "grad_norm": 0.2682062089443207, "learning_rate": 4.4061271006955507e-07, "loss": 0.285, "step": 18387 }, { "epoch": 2.6366504158302266, "grad_norm": 0.26321059465408325, "learning_rate": 4.4027034798890255e-07, "loss": 0.2881, "step": 18388 }, { "epoch": 2.6367938055635216, "grad_norm": 0.2606220543384552, "learning_rate": 4.3992811284410907e-07, "loss": 0.2785, "step": 18389 }, { "epoch": 2.6369371952968166, "grad_norm": 0.254536896944046, "learning_rate": 4.39586004644702e-07, "loss": 0.268, "step": 18390 }, { "epoch": 2.637080585030112, "grad_norm": 0.2597663104534149, "learning_rate": 4.39244023400206e-07, "loss": 0.2845, "step": 18391 }, { "epoch": 2.637223974763407, "grad_norm": 0.26750802993774414, "learning_rate": 4.3890216912014117e-07, "loss": 0.2974, "step": 18392 }, { "epoch": 2.637367364496702, "grad_norm": 0.2559394836425781, "learning_rate": 4.385604418140227e-07, "loss": 0.2905, "step": 18393 }, { "epoch": 2.6375107542299974, "grad_norm": 0.2688159942626953, "learning_rate": 4.382188414913646e-07, "loss": 0.2731, "step": 18394 }, { "epoch": 2.6376541439632923, "grad_norm": 0.2617476284503937, "learning_rate": 4.3787736816167713e-07, "loss": 0.2881, "step": 18395 }, { "epoch": 2.6377975336965873, "grad_norm": 0.28884512186050415, "learning_rate": 4.375360218344654e-07, "loss": 0.2768, "step": 18396 }, { "epoch": 2.6379409234298823, "grad_norm": 0.28513479232788086, "learning_rate": 4.3719480251923354e-07, "loss": 0.2851, "step": 18397 }, { "epoch": 2.6380843131631773, "grad_norm": 0.2659580409526825, "learning_rate": 4.3685371022547886e-07, "loss": 0.2745, "step": 18398 }, { "epoch": 2.6382277028964727, "grad_norm": 0.27166345715522766, "learning_rate": 4.365127449626966e-07, "loss": 0.2797, "step": 18399 }, { "epoch": 2.6383710926297677, "grad_norm": 0.26358848810195923, "learning_rate": 4.361719067403802e-07, "loss": 0.2952, "step": 18400 }, { "epoch": 2.638514482363063, "grad_norm": 0.26019975543022156, "learning_rate": 4.358311955680167e-07, "loss": 0.2975, "step": 18401 }, { "epoch": 2.638657872096358, "grad_norm": 0.2585071623325348, "learning_rate": 4.354906114550922e-07, "loss": 0.2817, "step": 18402 }, { "epoch": 2.638801261829653, "grad_norm": 0.2705267071723938, "learning_rate": 4.3515015441108865e-07, "loss": 0.2673, "step": 18403 }, { "epoch": 2.638944651562948, "grad_norm": 0.2788936495780945, "learning_rate": 4.348098244454818e-07, "loss": 0.2721, "step": 18404 }, { "epoch": 2.639088041296243, "grad_norm": 0.28008541464805603, "learning_rate": 4.3446962156774686e-07, "loss": 0.2983, "step": 18405 }, { "epoch": 2.6392314310295384, "grad_norm": 0.28447675704956055, "learning_rate": 4.341295457873546e-07, "loss": 0.2812, "step": 18406 }, { "epoch": 2.6393748207628334, "grad_norm": 0.29853153228759766, "learning_rate": 4.3378959711377176e-07, "loss": 0.2749, "step": 18407 }, { "epoch": 2.6395182104961283, "grad_norm": 0.2823694944381714, "learning_rate": 4.3344977555646373e-07, "loss": 0.2778, "step": 18408 }, { "epoch": 2.6396616002294238, "grad_norm": 0.269444078207016, "learning_rate": 4.331100811248884e-07, "loss": 0.2839, "step": 18409 }, { "epoch": 2.6398049899627187, "grad_norm": 0.2732265293598175, "learning_rate": 4.3277051382850266e-07, "loss": 0.2749, "step": 18410 }, { "epoch": 2.6399483796960137, "grad_norm": 0.2758520543575287, "learning_rate": 4.3243107367676063e-07, "loss": 0.2729, "step": 18411 }, { "epoch": 2.6400917694293087, "grad_norm": 0.255036860704422, "learning_rate": 4.320917606791114e-07, "loss": 0.2662, "step": 18412 }, { "epoch": 2.640235159162604, "grad_norm": 0.263922780752182, "learning_rate": 4.3175257484500023e-07, "loss": 0.2636, "step": 18413 }, { "epoch": 2.640378548895899, "grad_norm": 0.27119719982147217, "learning_rate": 4.314135161838712e-07, "loss": 0.2743, "step": 18414 }, { "epoch": 2.640521938629194, "grad_norm": 0.26593101024627686, "learning_rate": 4.310745847051617e-07, "loss": 0.2698, "step": 18415 }, { "epoch": 2.6406653283624895, "grad_norm": 0.26258742809295654, "learning_rate": 4.3073578041830653e-07, "loss": 0.2808, "step": 18416 }, { "epoch": 2.6408087180957844, "grad_norm": 0.256645530462265, "learning_rate": 4.3039710333273923e-07, "loss": 0.2888, "step": 18417 }, { "epoch": 2.6409521078290794, "grad_norm": 0.26646971702575684, "learning_rate": 4.300585534578866e-07, "loss": 0.2728, "step": 18418 }, { "epoch": 2.6410954975623744, "grad_norm": 0.27690285444259644, "learning_rate": 4.2972013080317454e-07, "loss": 0.2604, "step": 18419 }, { "epoch": 2.6412388872956694, "grad_norm": 0.2854980528354645, "learning_rate": 4.2938183537802327e-07, "loss": 0.2844, "step": 18420 }, { "epoch": 2.6413822770289648, "grad_norm": 0.28446686267852783, "learning_rate": 4.2904366719185074e-07, "loss": 0.2931, "step": 18421 }, { "epoch": 2.6415256667622597, "grad_norm": 0.2610352635383606, "learning_rate": 4.287056262540712e-07, "loss": 0.2612, "step": 18422 }, { "epoch": 2.641669056495555, "grad_norm": 0.25232523679733276, "learning_rate": 4.283677125740948e-07, "loss": 0.2852, "step": 18423 }, { "epoch": 2.64181244622885, "grad_norm": 0.2752395570278168, "learning_rate": 4.2802992616133023e-07, "loss": 0.2786, "step": 18424 }, { "epoch": 2.641955835962145, "grad_norm": 0.27421924471855164, "learning_rate": 4.2769226702517817e-07, "loss": 0.2863, "step": 18425 }, { "epoch": 2.64209922569544, "grad_norm": 0.2867724597454071, "learning_rate": 4.2735473517503957e-07, "loss": 0.2837, "step": 18426 }, { "epoch": 2.642242615428735, "grad_norm": 0.2805095613002777, "learning_rate": 4.270173306203118e-07, "loss": 0.265, "step": 18427 }, { "epoch": 2.6423860051620305, "grad_norm": 0.268126517534256, "learning_rate": 4.266800533703863e-07, "loss": 0.2852, "step": 18428 }, { "epoch": 2.6425293948953255, "grad_norm": 0.26558998227119446, "learning_rate": 4.263429034346539e-07, "loss": 0.2841, "step": 18429 }, { "epoch": 2.6426727846286204, "grad_norm": 0.26715919375419617, "learning_rate": 4.2600588082249983e-07, "loss": 0.2564, "step": 18430 }, { "epoch": 2.642816174361916, "grad_norm": 0.2811047434806824, "learning_rate": 4.2566898554330493e-07, "loss": 0.2634, "step": 18431 }, { "epoch": 2.642959564095211, "grad_norm": 0.2809959650039673, "learning_rate": 4.253322176064489e-07, "loss": 0.2868, "step": 18432 }, { "epoch": 2.643102953828506, "grad_norm": 0.2762182652950287, "learning_rate": 4.2499557702130653e-07, "loss": 0.279, "step": 18433 }, { "epoch": 2.6432463435618008, "grad_norm": 0.2447112500667572, "learning_rate": 4.246590637972503e-07, "loss": 0.2772, "step": 18434 }, { "epoch": 2.643389733295096, "grad_norm": 0.2656029760837555, "learning_rate": 4.243226779436471e-07, "loss": 0.2898, "step": 18435 }, { "epoch": 2.643533123028391, "grad_norm": 0.288692444562912, "learning_rate": 4.2398641946986173e-07, "loss": 0.2991, "step": 18436 }, { "epoch": 2.643676512761686, "grad_norm": 0.2791176438331604, "learning_rate": 4.2365028838525557e-07, "loss": 0.2893, "step": 18437 }, { "epoch": 2.6438199024949816, "grad_norm": 0.279212087392807, "learning_rate": 4.2331428469918554e-07, "loss": 0.2896, "step": 18438 }, { "epoch": 2.6439632922282765, "grad_norm": 0.2703326344490051, "learning_rate": 4.2297840842100533e-07, "loss": 0.2858, "step": 18439 }, { "epoch": 2.6441066819615715, "grad_norm": 0.26857733726501465, "learning_rate": 4.226426595600658e-07, "loss": 0.2862, "step": 18440 }, { "epoch": 2.6442500716948665, "grad_norm": 0.26625362038612366, "learning_rate": 4.2230703812571385e-07, "loss": 0.2899, "step": 18441 }, { "epoch": 2.644393461428162, "grad_norm": 0.2693723738193512, "learning_rate": 4.219715441272915e-07, "loss": 0.2666, "step": 18442 }, { "epoch": 2.644536851161457, "grad_norm": 0.2646978795528412, "learning_rate": 4.21636177574139e-07, "loss": 0.2629, "step": 18443 }, { "epoch": 2.644680240894752, "grad_norm": 0.28741756081581116, "learning_rate": 4.213009384755923e-07, "loss": 0.2662, "step": 18444 }, { "epoch": 2.6448236306280473, "grad_norm": 0.25087976455688477, "learning_rate": 4.2096582684098443e-07, "loss": 0.2751, "step": 18445 }, { "epoch": 2.6449670203613422, "grad_norm": 0.27882152795791626, "learning_rate": 4.206308426796446e-07, "loss": 0.2919, "step": 18446 }, { "epoch": 2.645110410094637, "grad_norm": 0.2644335627555847, "learning_rate": 4.2029598600089703e-07, "loss": 0.2981, "step": 18447 }, { "epoch": 2.645253799827932, "grad_norm": 0.28195565938949585, "learning_rate": 4.199612568140643e-07, "loss": 0.2837, "step": 18448 }, { "epoch": 2.645397189561227, "grad_norm": 0.27118733525276184, "learning_rate": 4.1962665512846445e-07, "loss": 0.2821, "step": 18449 }, { "epoch": 2.6455405792945226, "grad_norm": 0.26744499802589417, "learning_rate": 4.1929218095341283e-07, "loss": 0.2768, "step": 18450 }, { "epoch": 2.6456839690278176, "grad_norm": 0.26607975363731384, "learning_rate": 4.1895783429822147e-07, "loss": 0.2797, "step": 18451 }, { "epoch": 2.645827358761113, "grad_norm": 0.24022626876831055, "learning_rate": 4.186236151721962e-07, "loss": 0.2717, "step": 18452 }, { "epoch": 2.645970748494408, "grad_norm": 0.27110224962234497, "learning_rate": 4.182895235846418e-07, "loss": 0.2875, "step": 18453 }, { "epoch": 2.646114138227703, "grad_norm": 0.2692674994468689, "learning_rate": 4.1795555954485924e-07, "loss": 0.2999, "step": 18454 }, { "epoch": 2.646257527960998, "grad_norm": 0.27764755487442017, "learning_rate": 4.1762172306214486e-07, "loss": 0.2783, "step": 18455 }, { "epoch": 2.646400917694293, "grad_norm": 0.25715216994285583, "learning_rate": 4.172880141457936e-07, "loss": 0.2656, "step": 18456 }, { "epoch": 2.6465443074275883, "grad_norm": 0.2700841724872589, "learning_rate": 4.1695443280509393e-07, "loss": 0.2828, "step": 18457 }, { "epoch": 2.6466876971608833, "grad_norm": 0.2788294553756714, "learning_rate": 4.166209790493325e-07, "loss": 0.2901, "step": 18458 }, { "epoch": 2.6468310868941782, "grad_norm": 0.27805978059768677, "learning_rate": 4.1628765288779295e-07, "loss": 0.3086, "step": 18459 }, { "epoch": 2.6469744766274736, "grad_norm": 0.26880529522895813, "learning_rate": 4.159544543297539e-07, "loss": 0.2625, "step": 18460 }, { "epoch": 2.6471178663607686, "grad_norm": 0.27041861414909363, "learning_rate": 4.1562138338449134e-07, "loss": 0.2755, "step": 18461 }, { "epoch": 2.6472612560940636, "grad_norm": 0.2678588032722473, "learning_rate": 4.1528844006127835e-07, "loss": 0.2837, "step": 18462 }, { "epoch": 2.6474046458273586, "grad_norm": 0.2539837062358856, "learning_rate": 4.149556243693814e-07, "loss": 0.2742, "step": 18463 }, { "epoch": 2.647548035560654, "grad_norm": 0.2636735141277313, "learning_rate": 4.1462293631806706e-07, "loss": 0.2782, "step": 18464 }, { "epoch": 2.647691425293949, "grad_norm": 0.26855430006980896, "learning_rate": 4.142903759165967e-07, "loss": 0.2782, "step": 18465 }, { "epoch": 2.647834815027244, "grad_norm": 0.26765063405036926, "learning_rate": 4.1395794317422746e-07, "loss": 0.2701, "step": 18466 }, { "epoch": 2.6479782047605394, "grad_norm": 0.2844906449317932, "learning_rate": 4.1362563810021515e-07, "loss": 0.2842, "step": 18467 }, { "epoch": 2.6481215944938343, "grad_norm": 0.2732553482055664, "learning_rate": 4.132934607038103e-07, "loss": 0.2644, "step": 18468 }, { "epoch": 2.6482649842271293, "grad_norm": 0.2509608864784241, "learning_rate": 4.1296141099425923e-07, "loss": 0.2872, "step": 18469 }, { "epoch": 2.6484083739604243, "grad_norm": 0.26581594347953796, "learning_rate": 4.126294889808058e-07, "loss": 0.2789, "step": 18470 }, { "epoch": 2.6485517636937197, "grad_norm": 0.2574683427810669, "learning_rate": 4.1229769467269144e-07, "loss": 0.2815, "step": 18471 }, { "epoch": 2.6486951534270147, "grad_norm": 0.2675013542175293, "learning_rate": 4.119660280791515e-07, "loss": 0.2914, "step": 18472 }, { "epoch": 2.6488385431603096, "grad_norm": 0.2731499671936035, "learning_rate": 4.1163448920941986e-07, "loss": 0.2777, "step": 18473 }, { "epoch": 2.648981932893605, "grad_norm": 0.27304884791374207, "learning_rate": 4.1130307807272565e-07, "loss": 0.2615, "step": 18474 }, { "epoch": 2.6491253226269, "grad_norm": 0.29305562376976013, "learning_rate": 4.109717946782954e-07, "loss": 0.2702, "step": 18475 }, { "epoch": 2.649268712360195, "grad_norm": 0.2687063217163086, "learning_rate": 4.1064063903535067e-07, "loss": 0.2687, "step": 18476 }, { "epoch": 2.64941210209349, "grad_norm": 0.2681821286678314, "learning_rate": 4.103096111531107e-07, "loss": 0.2731, "step": 18477 }, { "epoch": 2.649555491826785, "grad_norm": 0.2576240301132202, "learning_rate": 4.099787110407921e-07, "loss": 0.2626, "step": 18478 }, { "epoch": 2.6496988815600804, "grad_norm": 0.26747897267341614, "learning_rate": 4.0964793870760466e-07, "loss": 0.2843, "step": 18479 }, { "epoch": 2.6498422712933754, "grad_norm": 0.26433950662612915, "learning_rate": 4.093172941627571e-07, "loss": 0.2718, "step": 18480 }, { "epoch": 2.6499856610266703, "grad_norm": 0.28500130772590637, "learning_rate": 4.0898677741545433e-07, "loss": 0.2834, "step": 18481 }, { "epoch": 2.6501290507599657, "grad_norm": 0.27176743745803833, "learning_rate": 4.086563884748973e-07, "loss": 0.2682, "step": 18482 }, { "epoch": 2.6502724404932607, "grad_norm": 0.27279046177864075, "learning_rate": 4.0832612735028363e-07, "loss": 0.2792, "step": 18483 }, { "epoch": 2.6504158302265557, "grad_norm": 0.2797253727912903, "learning_rate": 4.0799599405080824e-07, "loss": 0.2775, "step": 18484 }, { "epoch": 2.6505592199598507, "grad_norm": 0.25937336683273315, "learning_rate": 4.0766598858565933e-07, "loss": 0.2834, "step": 18485 }, { "epoch": 2.650702609693146, "grad_norm": 0.25761401653289795, "learning_rate": 4.0733611096402505e-07, "loss": 0.2783, "step": 18486 }, { "epoch": 2.650845999426441, "grad_norm": 0.28030550479888916, "learning_rate": 4.070063611950892e-07, "loss": 0.2675, "step": 18487 }, { "epoch": 2.650989389159736, "grad_norm": 0.29506242275238037, "learning_rate": 4.0667673928802997e-07, "loss": 0.2758, "step": 18488 }, { "epoch": 2.6511327788930314, "grad_norm": 0.28081053495407104, "learning_rate": 4.063472452520262e-07, "loss": 0.2748, "step": 18489 }, { "epoch": 2.6512761686263264, "grad_norm": 0.26010385155677795, "learning_rate": 4.0601787909624657e-07, "loss": 0.2771, "step": 18490 }, { "epoch": 2.6514195583596214, "grad_norm": 0.27400603890419006, "learning_rate": 4.0568864082986324e-07, "loss": 0.2755, "step": 18491 }, { "epoch": 2.6515629480929164, "grad_norm": 0.2559742033481598, "learning_rate": 4.0535953046204055e-07, "loss": 0.264, "step": 18492 }, { "epoch": 2.651706337826212, "grad_norm": 0.26025158166885376, "learning_rate": 4.050305480019412e-07, "loss": 0.2985, "step": 18493 }, { "epoch": 2.6518497275595068, "grad_norm": 0.2743544280529022, "learning_rate": 4.047016934587228e-07, "loss": 0.2704, "step": 18494 }, { "epoch": 2.6519931172928017, "grad_norm": 0.2745239734649658, "learning_rate": 4.043729668415408e-07, "loss": 0.2552, "step": 18495 }, { "epoch": 2.652136507026097, "grad_norm": 0.25516682863235474, "learning_rate": 4.040443681595457e-07, "loss": 0.2992, "step": 18496 }, { "epoch": 2.652279896759392, "grad_norm": 0.26760151982307434, "learning_rate": 4.037158974218852e-07, "loss": 0.2899, "step": 18497 }, { "epoch": 2.652423286492687, "grad_norm": 0.2760702967643738, "learning_rate": 4.033875546377036e-07, "loss": 0.2877, "step": 18498 }, { "epoch": 2.652566676225982, "grad_norm": 0.25533658266067505, "learning_rate": 4.0305933981614197e-07, "loss": 0.2955, "step": 18499 }, { "epoch": 2.652710065959277, "grad_norm": 0.2553878724575043, "learning_rate": 4.0273125296633743e-07, "loss": 0.2658, "step": 18500 }, { "epoch": 2.6528534556925725, "grad_norm": 0.2736473083496094, "learning_rate": 4.0240329409742206e-07, "loss": 0.2808, "step": 18501 }, { "epoch": 2.6529968454258674, "grad_norm": 0.2738438546657562, "learning_rate": 4.0207546321852695e-07, "loss": 0.2691, "step": 18502 }, { "epoch": 2.653140235159163, "grad_norm": 0.27152854204177856, "learning_rate": 4.017477603387776e-07, "loss": 0.2746, "step": 18503 }, { "epoch": 2.653283624892458, "grad_norm": 0.2676781713962555, "learning_rate": 4.014201854672972e-07, "loss": 0.2698, "step": 18504 }, { "epoch": 2.653427014625753, "grad_norm": 0.24954964220523834, "learning_rate": 4.010927386132057e-07, "loss": 0.2861, "step": 18505 }, { "epoch": 2.653570404359048, "grad_norm": 0.27592143416404724, "learning_rate": 4.00765419785617e-07, "loss": 0.2908, "step": 18506 }, { "epoch": 2.6537137940923428, "grad_norm": 0.2684306204319, "learning_rate": 4.004382289936443e-07, "loss": 0.279, "step": 18507 }, { "epoch": 2.653857183825638, "grad_norm": 0.2637382745742798, "learning_rate": 4.001111662463958e-07, "loss": 0.2886, "step": 18508 }, { "epoch": 2.654000573558933, "grad_norm": 0.2518823444843292, "learning_rate": 3.997842315529765e-07, "loss": 0.2817, "step": 18509 }, { "epoch": 2.654143963292228, "grad_norm": 0.2575942575931549, "learning_rate": 3.994574249224875e-07, "loss": 0.2922, "step": 18510 }, { "epoch": 2.6542873530255235, "grad_norm": 0.28190848231315613, "learning_rate": 3.9913074636402703e-07, "loss": 0.2676, "step": 18511 }, { "epoch": 2.6544307427588185, "grad_norm": 0.27123159170150757, "learning_rate": 3.9880419588668893e-07, "loss": 0.2613, "step": 18512 }, { "epoch": 2.6545741324921135, "grad_norm": 0.2675991654396057, "learning_rate": 3.984777734995643e-07, "loss": 0.2785, "step": 18513 }, { "epoch": 2.6547175222254085, "grad_norm": 0.27491992712020874, "learning_rate": 3.9815147921174026e-07, "loss": 0.3051, "step": 18514 }, { "epoch": 2.654860911958704, "grad_norm": 0.2694512903690338, "learning_rate": 3.9782531303229954e-07, "loss": 0.279, "step": 18515 }, { "epoch": 2.655004301691999, "grad_norm": 0.2740125060081482, "learning_rate": 3.974992749703238e-07, "loss": 0.2918, "step": 18516 }, { "epoch": 2.655147691425294, "grad_norm": 0.25730857253074646, "learning_rate": 3.971733650348869e-07, "loss": 0.271, "step": 18517 }, { "epoch": 2.6552910811585893, "grad_norm": 0.2586209177970886, "learning_rate": 3.968475832350638e-07, "loss": 0.2755, "step": 18518 }, { "epoch": 2.6554344708918842, "grad_norm": 0.2779841721057892, "learning_rate": 3.965219295799222e-07, "loss": 0.2859, "step": 18519 }, { "epoch": 2.655577860625179, "grad_norm": 0.28661057353019714, "learning_rate": 3.961964040785293e-07, "loss": 0.2717, "step": 18520 }, { "epoch": 2.655721250358474, "grad_norm": 0.27803367376327515, "learning_rate": 3.958710067399463e-07, "loss": 0.2771, "step": 18521 }, { "epoch": 2.6558646400917696, "grad_norm": 0.2669910788536072, "learning_rate": 3.95545737573233e-07, "loss": 0.2658, "step": 18522 }, { "epoch": 2.6560080298250646, "grad_norm": 0.25922057032585144, "learning_rate": 3.952205965874423e-07, "loss": 0.2854, "step": 18523 }, { "epoch": 2.6561514195583595, "grad_norm": 0.26924532651901245, "learning_rate": 3.948955837916263e-07, "loss": 0.2816, "step": 18524 }, { "epoch": 2.656294809291655, "grad_norm": 0.26125022768974304, "learning_rate": 3.9457069919483395e-07, "loss": 0.2814, "step": 18525 }, { "epoch": 2.65643819902495, "grad_norm": 0.26237472891807556, "learning_rate": 3.942459428061085e-07, "loss": 0.292, "step": 18526 }, { "epoch": 2.656581588758245, "grad_norm": 0.28876808285713196, "learning_rate": 3.939213146344911e-07, "loss": 0.2969, "step": 18527 }, { "epoch": 2.65672497849154, "grad_norm": 0.30644750595092773, "learning_rate": 3.93596814689019e-07, "loss": 0.2681, "step": 18528 }, { "epoch": 2.656868368224835, "grad_norm": 0.27620363235473633, "learning_rate": 3.932724429787255e-07, "loss": 0.2805, "step": 18529 }, { "epoch": 2.6570117579581303, "grad_norm": 0.2642882168292999, "learning_rate": 3.929481995126405e-07, "loss": 0.2932, "step": 18530 }, { "epoch": 2.6571551476914252, "grad_norm": 0.282798171043396, "learning_rate": 3.926240842997908e-07, "loss": 0.2633, "step": 18531 }, { "epoch": 2.6572985374247207, "grad_norm": 0.2878855764865875, "learning_rate": 3.9230009734920026e-07, "loss": 0.2738, "step": 18532 }, { "epoch": 2.6574419271580156, "grad_norm": 0.260179340839386, "learning_rate": 3.919762386698861e-07, "loss": 0.2843, "step": 18533 }, { "epoch": 2.6575853168913106, "grad_norm": 0.2628210783004761, "learning_rate": 3.9165250827086497e-07, "loss": 0.2894, "step": 18534 }, { "epoch": 2.6577287066246056, "grad_norm": 0.27224427461624146, "learning_rate": 3.913289061611486e-07, "loss": 0.2851, "step": 18535 }, { "epoch": 2.6578720963579006, "grad_norm": 0.2635331451892853, "learning_rate": 3.910054323497464e-07, "loss": 0.2826, "step": 18536 }, { "epoch": 2.658015486091196, "grad_norm": 0.2510768175125122, "learning_rate": 3.9068208684566287e-07, "loss": 0.2682, "step": 18537 }, { "epoch": 2.658158875824491, "grad_norm": 0.27332842350006104, "learning_rate": 3.903588696579008e-07, "loss": 0.2811, "step": 18538 }, { "epoch": 2.658302265557786, "grad_norm": 0.269569456577301, "learning_rate": 3.900357807954552e-07, "loss": 0.2955, "step": 18539 }, { "epoch": 2.6584456552910813, "grad_norm": 0.2924606204032898, "learning_rate": 3.8971282026732283e-07, "loss": 0.2715, "step": 18540 }, { "epoch": 2.6585890450243763, "grad_norm": 0.2862823009490967, "learning_rate": 3.8938998808249305e-07, "loss": 0.2809, "step": 18541 }, { "epoch": 2.6587324347576713, "grad_norm": 0.28948476910591125, "learning_rate": 3.890672842499538e-07, "loss": 0.2917, "step": 18542 }, { "epoch": 2.6588758244909663, "grad_norm": 0.2576621472835541, "learning_rate": 3.887447087786894e-07, "loss": 0.2895, "step": 18543 }, { "epoch": 2.6590192142242617, "grad_norm": 0.27436861395835876, "learning_rate": 3.8842226167767784e-07, "loss": 0.2986, "step": 18544 }, { "epoch": 2.6591626039575567, "grad_norm": 0.28245437145233154, "learning_rate": 3.8809994295589626e-07, "loss": 0.2905, "step": 18545 }, { "epoch": 2.6593059936908516, "grad_norm": 0.26141357421875, "learning_rate": 3.877777526223181e-07, "loss": 0.274, "step": 18546 }, { "epoch": 2.659449383424147, "grad_norm": 0.2700308561325073, "learning_rate": 3.874556906859117e-07, "loss": 0.2892, "step": 18547 }, { "epoch": 2.659592773157442, "grad_norm": 0.27989184856414795, "learning_rate": 3.871337571556444e-07, "loss": 0.2757, "step": 18548 }, { "epoch": 2.659736162890737, "grad_norm": 0.28702887892723083, "learning_rate": 3.8681195204047785e-07, "loss": 0.2765, "step": 18549 }, { "epoch": 2.659879552624032, "grad_norm": 0.2707885503768921, "learning_rate": 3.8649027534936933e-07, "loss": 0.2888, "step": 18550 }, { "epoch": 2.660022942357327, "grad_norm": 0.2771904468536377, "learning_rate": 3.8616872709127506e-07, "loss": 0.2591, "step": 18551 }, { "epoch": 2.6601663320906224, "grad_norm": 0.26327523589134216, "learning_rate": 3.8584730727514563e-07, "loss": 0.2794, "step": 18552 }, { "epoch": 2.6603097218239173, "grad_norm": 0.26270949840545654, "learning_rate": 3.8552601590992947e-07, "loss": 0.2701, "step": 18553 }, { "epoch": 2.6604531115572128, "grad_norm": 0.25612953305244446, "learning_rate": 3.8520485300457155e-07, "loss": 0.2742, "step": 18554 }, { "epoch": 2.6605965012905077, "grad_norm": 0.25143349170684814, "learning_rate": 3.8488381856801036e-07, "loss": 0.277, "step": 18555 }, { "epoch": 2.6607398910238027, "grad_norm": 0.2657763659954071, "learning_rate": 3.845629126091843e-07, "loss": 0.2837, "step": 18556 }, { "epoch": 2.6608832807570977, "grad_norm": 0.2706960141658783, "learning_rate": 3.8424213513702735e-07, "loss": 0.2539, "step": 18557 }, { "epoch": 2.6610266704903927, "grad_norm": 0.26231807470321655, "learning_rate": 3.8392148616046897e-07, "loss": 0.2711, "step": 18558 }, { "epoch": 2.661170060223688, "grad_norm": 0.2771472632884979, "learning_rate": 3.8360096568843595e-07, "loss": 0.2869, "step": 18559 }, { "epoch": 2.661313449956983, "grad_norm": 0.27653926610946655, "learning_rate": 3.8328057372985006e-07, "loss": 0.2826, "step": 18560 }, { "epoch": 2.661456839690278, "grad_norm": 0.28408533334732056, "learning_rate": 3.829603102936313e-07, "loss": 0.2652, "step": 18561 }, { "epoch": 2.6616002294235734, "grad_norm": 0.2878568768501282, "learning_rate": 3.826401753886949e-07, "loss": 0.29, "step": 18562 }, { "epoch": 2.6617436191568684, "grad_norm": 0.28149449825286865, "learning_rate": 3.8232016902395364e-07, "loss": 0.2687, "step": 18563 }, { "epoch": 2.6618870088901634, "grad_norm": 0.2703918516635895, "learning_rate": 3.8200029120831484e-07, "loss": 0.2715, "step": 18564 }, { "epoch": 2.6620303986234584, "grad_norm": 0.27947720885276794, "learning_rate": 3.816805419506847e-07, "loss": 0.2854, "step": 18565 }, { "epoch": 2.662173788356754, "grad_norm": 0.2832013666629791, "learning_rate": 3.813609212599639e-07, "loss": 0.278, "step": 18566 }, { "epoch": 2.6623171780900488, "grad_norm": 0.2688981890678406, "learning_rate": 3.810414291450504e-07, "loss": 0.2668, "step": 18567 }, { "epoch": 2.6624605678233437, "grad_norm": 0.2683030068874359, "learning_rate": 3.807220656148375e-07, "loss": 0.2993, "step": 18568 }, { "epoch": 2.662603957556639, "grad_norm": 0.2767840325832367, "learning_rate": 3.8040283067821704e-07, "loss": 0.2764, "step": 18569 }, { "epoch": 2.662747347289934, "grad_norm": 0.28609007596969604, "learning_rate": 3.800837243440764e-07, "loss": 0.2721, "step": 18570 }, { "epoch": 2.662890737023229, "grad_norm": 0.27651461958885193, "learning_rate": 3.7976474662129726e-07, "loss": 0.3014, "step": 18571 }, { "epoch": 2.663034126756524, "grad_norm": 0.26859399676322937, "learning_rate": 3.7944589751876037e-07, "loss": 0.2742, "step": 18572 }, { "epoch": 2.6631775164898195, "grad_norm": 0.2709936201572418, "learning_rate": 3.791271770453414e-07, "loss": 0.2754, "step": 18573 }, { "epoch": 2.6633209062231145, "grad_norm": 0.262062132358551, "learning_rate": 3.788085852099144e-07, "loss": 0.2782, "step": 18574 }, { "epoch": 2.6634642959564094, "grad_norm": 0.2777186632156372, "learning_rate": 3.7849012202134715e-07, "loss": 0.2845, "step": 18575 }, { "epoch": 2.663607685689705, "grad_norm": 0.27579769492149353, "learning_rate": 3.781717874885066e-07, "loss": 0.2784, "step": 18576 }, { "epoch": 2.663751075423, "grad_norm": 0.26173725724220276, "learning_rate": 3.7785358162025277e-07, "loss": 0.2927, "step": 18577 }, { "epoch": 2.663894465156295, "grad_norm": 0.2658431828022003, "learning_rate": 3.775355044254453e-07, "loss": 0.2695, "step": 18578 }, { "epoch": 2.6640378548895898, "grad_norm": 0.2675530016422272, "learning_rate": 3.7721755591293874e-07, "loss": 0.2843, "step": 18579 }, { "epoch": 2.6641812446228847, "grad_norm": 0.2491147369146347, "learning_rate": 3.7689973609158384e-07, "loss": 0.2778, "step": 18580 }, { "epoch": 2.66432463435618, "grad_norm": 0.26267746090888977, "learning_rate": 3.7658204497022897e-07, "loss": 0.2796, "step": 18581 }, { "epoch": 2.664468024089475, "grad_norm": 0.2579921782016754, "learning_rate": 3.7626448255771774e-07, "loss": 0.2855, "step": 18582 }, { "epoch": 2.6646114138227706, "grad_norm": 0.27406561374664307, "learning_rate": 3.759470488628908e-07, "loss": 0.2811, "step": 18583 }, { "epoch": 2.6647548035560655, "grad_norm": 0.2634023427963257, "learning_rate": 3.7562974389458496e-07, "loss": 0.271, "step": 18584 }, { "epoch": 2.6648981932893605, "grad_norm": 0.2828993499279022, "learning_rate": 3.7531256766163314e-07, "loss": 0.2741, "step": 18585 }, { "epoch": 2.6650415830226555, "grad_norm": 0.2557659447193146, "learning_rate": 3.749955201728667e-07, "loss": 0.2692, "step": 18586 }, { "epoch": 2.6651849727559505, "grad_norm": 0.2538847029209137, "learning_rate": 3.74678601437109e-07, "loss": 0.2917, "step": 18587 }, { "epoch": 2.665328362489246, "grad_norm": 0.2654203772544861, "learning_rate": 3.7436181146318417e-07, "loss": 0.2759, "step": 18588 }, { "epoch": 2.665471752222541, "grad_norm": 0.2707730531692505, "learning_rate": 3.740451502599107e-07, "loss": 0.272, "step": 18589 }, { "epoch": 2.665615141955836, "grad_norm": 0.2641429007053375, "learning_rate": 3.737286178361044e-07, "loss": 0.2803, "step": 18590 }, { "epoch": 2.6657585316891312, "grad_norm": 0.2716241478919983, "learning_rate": 3.734122142005769e-07, "loss": 0.2718, "step": 18591 }, { "epoch": 2.665901921422426, "grad_norm": 0.2727046012878418, "learning_rate": 3.7309593936213693e-07, "loss": 0.2718, "step": 18592 }, { "epoch": 2.666045311155721, "grad_norm": 0.2622983455657959, "learning_rate": 3.727797933295879e-07, "loss": 0.2816, "step": 18593 }, { "epoch": 2.666188700889016, "grad_norm": 0.2695329487323761, "learning_rate": 3.7246377611173167e-07, "loss": 0.2808, "step": 18594 }, { "epoch": 2.6663320906223116, "grad_norm": 0.2732781767845154, "learning_rate": 3.721478877173651e-07, "loss": 0.294, "step": 18595 }, { "epoch": 2.6664754803556066, "grad_norm": 0.256816029548645, "learning_rate": 3.7183212815528227e-07, "loss": 0.2819, "step": 18596 }, { "epoch": 2.6666188700889015, "grad_norm": 0.26788273453712463, "learning_rate": 3.715164974342744e-07, "loss": 0.2771, "step": 18597 }, { "epoch": 2.666762259822197, "grad_norm": 0.26024773716926575, "learning_rate": 3.712009955631263e-07, "loss": 0.256, "step": 18598 }, { "epoch": 2.666905649555492, "grad_norm": 0.2704850733280182, "learning_rate": 3.7088562255062246e-07, "loss": 0.2811, "step": 18599 }, { "epoch": 2.667049039288787, "grad_norm": 0.2637545168399811, "learning_rate": 3.70570378405542e-07, "loss": 0.2791, "step": 18600 }, { "epoch": 2.667192429022082, "grad_norm": 0.26296743750572205, "learning_rate": 3.702552631366607e-07, "loss": 0.2801, "step": 18601 }, { "epoch": 2.667335818755377, "grad_norm": 0.27149081230163574, "learning_rate": 3.6994027675275044e-07, "loss": 0.283, "step": 18602 }, { "epoch": 2.6674792084886723, "grad_norm": 0.26774778962135315, "learning_rate": 3.6962541926258145e-07, "loss": 0.2919, "step": 18603 }, { "epoch": 2.6676225982219672, "grad_norm": 0.284298300743103, "learning_rate": 3.693106906749172e-07, "loss": 0.279, "step": 18604 }, { "epoch": 2.6677659879552627, "grad_norm": 0.2623765766620636, "learning_rate": 3.6899609099852017e-07, "loss": 0.28, "step": 18605 }, { "epoch": 2.6679093776885576, "grad_norm": 0.2618964612483978, "learning_rate": 3.686816202421484e-07, "loss": 0.285, "step": 18606 }, { "epoch": 2.6680527674218526, "grad_norm": 0.2583613097667694, "learning_rate": 3.683672784145559e-07, "loss": 0.2694, "step": 18607 }, { "epoch": 2.6681961571551476, "grad_norm": 0.26602187752723694, "learning_rate": 3.680530655244946e-07, "loss": 0.2941, "step": 18608 }, { "epoch": 2.6683395468884425, "grad_norm": 0.27909111976623535, "learning_rate": 3.677389815807092e-07, "loss": 0.2869, "step": 18609 }, { "epoch": 2.668482936621738, "grad_norm": 0.25542116165161133, "learning_rate": 3.6742502659194544e-07, "loss": 0.2671, "step": 18610 }, { "epoch": 2.668626326355033, "grad_norm": 0.27099594473838806, "learning_rate": 3.671112005669425e-07, "loss": 0.3035, "step": 18611 }, { "epoch": 2.668769716088328, "grad_norm": 0.2615414261817932, "learning_rate": 3.667975035144372e-07, "loss": 0.2731, "step": 18612 }, { "epoch": 2.6689131058216233, "grad_norm": 0.27287495136260986, "learning_rate": 3.664839354431626e-07, "loss": 0.2644, "step": 18613 }, { "epoch": 2.6690564955549183, "grad_norm": 0.256360799074173, "learning_rate": 3.6617049636184676e-07, "loss": 0.2641, "step": 18614 }, { "epoch": 2.6691998852882133, "grad_norm": 0.2690151035785675, "learning_rate": 3.658571862792165e-07, "loss": 0.2807, "step": 18615 }, { "epoch": 2.6693432750215083, "grad_norm": 0.26876479387283325, "learning_rate": 3.655440052039932e-07, "loss": 0.2855, "step": 18616 }, { "epoch": 2.6694866647548037, "grad_norm": 0.2727351188659668, "learning_rate": 3.652309531448961e-07, "loss": 0.2729, "step": 18617 }, { "epoch": 2.6696300544880986, "grad_norm": 0.2447717934846878, "learning_rate": 3.649180301106392e-07, "loss": 0.2847, "step": 18618 }, { "epoch": 2.6697734442213936, "grad_norm": 0.2876528799533844, "learning_rate": 3.6460523610993446e-07, "loss": 0.2824, "step": 18619 }, { "epoch": 2.669916833954689, "grad_norm": 0.27823713421821594, "learning_rate": 3.642925711514894e-07, "loss": 0.2898, "step": 18620 }, { "epoch": 2.670060223687984, "grad_norm": 0.27156639099121094, "learning_rate": 3.6398003524400814e-07, "loss": 0.2851, "step": 18621 }, { "epoch": 2.670203613421279, "grad_norm": 0.27266138792037964, "learning_rate": 3.63667628396191e-07, "loss": 0.27, "step": 18622 }, { "epoch": 2.670347003154574, "grad_norm": 0.2973605692386627, "learning_rate": 3.6335535061673534e-07, "loss": 0.2949, "step": 18623 }, { "epoch": 2.6704903928878694, "grad_norm": 0.2754124402999878, "learning_rate": 3.6304320191433487e-07, "loss": 0.286, "step": 18624 }, { "epoch": 2.6706337826211644, "grad_norm": 0.2722415626049042, "learning_rate": 3.6273118229767757e-07, "loss": 0.2854, "step": 18625 }, { "epoch": 2.6707771723544593, "grad_norm": 0.292437881231308, "learning_rate": 3.62419291775451e-07, "loss": 0.2754, "step": 18626 }, { "epoch": 2.6709205620877547, "grad_norm": 0.27891555428504944, "learning_rate": 3.621075303563376e-07, "loss": 0.2888, "step": 18627 }, { "epoch": 2.6710639518210497, "grad_norm": 0.26349809765815735, "learning_rate": 3.6179589804901607e-07, "loss": 0.2673, "step": 18628 }, { "epoch": 2.6712073415543447, "grad_norm": 0.26793020963668823, "learning_rate": 3.614843948621616e-07, "loss": 0.2716, "step": 18629 }, { "epoch": 2.6713507312876397, "grad_norm": 0.26026663184165955, "learning_rate": 3.611730208044467e-07, "loss": 0.2805, "step": 18630 }, { "epoch": 2.6714941210209346, "grad_norm": 0.2782345414161682, "learning_rate": 3.60861775884539e-07, "loss": 0.2605, "step": 18631 }, { "epoch": 2.67163751075423, "grad_norm": 0.28639379143714905, "learning_rate": 3.605506601111025e-07, "loss": 0.2894, "step": 18632 }, { "epoch": 2.671780900487525, "grad_norm": 0.267711341381073, "learning_rate": 3.602396734927993e-07, "loss": 0.269, "step": 18633 }, { "epoch": 2.6719242902208205, "grad_norm": 0.2853085994720459, "learning_rate": 3.599288160382858e-07, "loss": 0.2746, "step": 18634 }, { "epoch": 2.6720676799541154, "grad_norm": 0.27366703748703003, "learning_rate": 3.5961808775621776e-07, "loss": 0.2919, "step": 18635 }, { "epoch": 2.6722110696874104, "grad_norm": 0.269022673368454, "learning_rate": 3.5930748865524166e-07, "loss": 0.2757, "step": 18636 }, { "epoch": 2.6723544594207054, "grad_norm": 0.259884238243103, "learning_rate": 3.589970187440073e-07, "loss": 0.2762, "step": 18637 }, { "epoch": 2.6724978491540003, "grad_norm": 0.27199748158454895, "learning_rate": 3.586866780311571e-07, "loss": 0.2568, "step": 18638 }, { "epoch": 2.6726412388872958, "grad_norm": 0.2687314748764038, "learning_rate": 3.583764665253298e-07, "loss": 0.2866, "step": 18639 }, { "epoch": 2.6727846286205907, "grad_norm": 0.26349326968193054, "learning_rate": 3.580663842351623e-07, "loss": 0.28, "step": 18640 }, { "epoch": 2.6729280183538857, "grad_norm": 0.2991408407688141, "learning_rate": 3.577564311692855e-07, "loss": 0.2855, "step": 18641 }, { "epoch": 2.673071408087181, "grad_norm": 0.28612130880355835, "learning_rate": 3.574466073363286e-07, "loss": 0.2837, "step": 18642 }, { "epoch": 2.673214797820476, "grad_norm": 0.28273019194602966, "learning_rate": 3.5713691274491593e-07, "loss": 0.3007, "step": 18643 }, { "epoch": 2.673358187553771, "grad_norm": 0.2575751841068268, "learning_rate": 3.5682734740366987e-07, "loss": 0.2786, "step": 18644 }, { "epoch": 2.673501577287066, "grad_norm": 0.250471293926239, "learning_rate": 3.5651791132120803e-07, "loss": 0.2788, "step": 18645 }, { "epoch": 2.6736449670203615, "grad_norm": 0.27641353011131287, "learning_rate": 3.562086045061458e-07, "loss": 0.2753, "step": 18646 }, { "epoch": 2.6737883567536564, "grad_norm": 0.2566068172454834, "learning_rate": 3.558994269670912e-07, "loss": 0.2662, "step": 18647 }, { "epoch": 2.6739317464869514, "grad_norm": 0.2731013000011444, "learning_rate": 3.5559037871265246e-07, "loss": 0.2748, "step": 18648 }, { "epoch": 2.674075136220247, "grad_norm": 0.2562820315361023, "learning_rate": 3.5528145975143314e-07, "loss": 0.267, "step": 18649 }, { "epoch": 2.674218525953542, "grad_norm": 0.29433774948120117, "learning_rate": 3.5497267009203305e-07, "loss": 0.2856, "step": 18650 }, { "epoch": 2.674361915686837, "grad_norm": 0.2847748398780823, "learning_rate": 3.546640097430498e-07, "loss": 0.288, "step": 18651 }, { "epoch": 2.6745053054201318, "grad_norm": 0.2785896360874176, "learning_rate": 3.5435547871307316e-07, "loss": 0.2981, "step": 18652 }, { "epoch": 2.674648695153427, "grad_norm": 0.2721053659915924, "learning_rate": 3.5404707701069406e-07, "loss": 0.3019, "step": 18653 }, { "epoch": 2.674792084886722, "grad_norm": 0.28651267290115356, "learning_rate": 3.537388046444973e-07, "loss": 0.278, "step": 18654 }, { "epoch": 2.674935474620017, "grad_norm": 0.27482539415359497, "learning_rate": 3.534306616230654e-07, "loss": 0.301, "step": 18655 }, { "epoch": 2.6750788643533125, "grad_norm": 0.27883201837539673, "learning_rate": 3.5312264795497543e-07, "loss": 0.2811, "step": 18656 }, { "epoch": 2.6752222540866075, "grad_norm": 0.25510507822036743, "learning_rate": 3.5281476364880327e-07, "loss": 0.2761, "step": 18657 }, { "epoch": 2.6753656438199025, "grad_norm": 0.2826399803161621, "learning_rate": 3.525070087131188e-07, "loss": 0.2789, "step": 18658 }, { "epoch": 2.6755090335531975, "grad_norm": 0.24882666766643524, "learning_rate": 3.521993831564907e-07, "loss": 0.2856, "step": 18659 }, { "epoch": 2.6756524232864924, "grad_norm": 0.2828296720981598, "learning_rate": 3.5189188698748157e-07, "loss": 0.3119, "step": 18660 }, { "epoch": 2.675795813019788, "grad_norm": 0.2552279233932495, "learning_rate": 3.515845202146523e-07, "loss": 0.2814, "step": 18661 }, { "epoch": 2.675939202753083, "grad_norm": 0.25949254631996155, "learning_rate": 3.512772828465605e-07, "loss": 0.2865, "step": 18662 }, { "epoch": 2.676082592486378, "grad_norm": 0.2777039408683777, "learning_rate": 3.509701748917571e-07, "loss": 0.2622, "step": 18663 }, { "epoch": 2.6762259822196732, "grad_norm": 0.2947899103164673, "learning_rate": 3.50663196358792e-07, "loss": 0.265, "step": 18664 }, { "epoch": 2.676369371952968, "grad_norm": 0.26644986867904663, "learning_rate": 3.5035634725621217e-07, "loss": 0.2829, "step": 18665 }, { "epoch": 2.676512761686263, "grad_norm": 0.2722526490688324, "learning_rate": 3.500496275925591e-07, "loss": 0.2912, "step": 18666 }, { "epoch": 2.676656151419558, "grad_norm": 0.257769912481308, "learning_rate": 3.497430373763716e-07, "loss": 0.298, "step": 18667 }, { "epoch": 2.6767995411528536, "grad_norm": 0.276184618473053, "learning_rate": 3.4943657661618445e-07, "loss": 0.2934, "step": 18668 }, { "epoch": 2.6769429308861485, "grad_norm": 0.2958261966705322, "learning_rate": 3.4913024532052864e-07, "loss": 0.2872, "step": 18669 }, { "epoch": 2.6770863206194435, "grad_norm": 0.2700308859348297, "learning_rate": 3.488240434979323e-07, "loss": 0.2659, "step": 18670 }, { "epoch": 2.677229710352739, "grad_norm": 0.27206045389175415, "learning_rate": 3.485179711569203e-07, "loss": 0.2935, "step": 18671 }, { "epoch": 2.677373100086034, "grad_norm": 0.26738977432250977, "learning_rate": 3.4821202830601253e-07, "loss": 0.2757, "step": 18672 }, { "epoch": 2.677516489819329, "grad_norm": 0.2677251398563385, "learning_rate": 3.4790621495372546e-07, "loss": 0.2765, "step": 18673 }, { "epoch": 2.677659879552624, "grad_norm": 0.2831365168094635, "learning_rate": 3.4760053110857394e-07, "loss": 0.2709, "step": 18674 }, { "epoch": 2.6778032692859193, "grad_norm": 0.2726142108440399, "learning_rate": 3.472949767790662e-07, "loss": 0.2789, "step": 18675 }, { "epoch": 2.6779466590192142, "grad_norm": 0.26223576068878174, "learning_rate": 3.4698955197370987e-07, "loss": 0.2776, "step": 18676 }, { "epoch": 2.6780900487525092, "grad_norm": 0.27322664856910706, "learning_rate": 3.466842567010059e-07, "loss": 0.2604, "step": 18677 }, { "epoch": 2.6782334384858046, "grad_norm": 0.2761940360069275, "learning_rate": 3.463790909694553e-07, "loss": 0.2834, "step": 18678 }, { "epoch": 2.6783768282190996, "grad_norm": 0.29000455141067505, "learning_rate": 3.460740547875513e-07, "loss": 0.2957, "step": 18679 }, { "epoch": 2.6785202179523946, "grad_norm": 0.2651733458042145, "learning_rate": 3.4576914816378704e-07, "loss": 0.2824, "step": 18680 }, { "epoch": 2.6786636076856896, "grad_norm": 0.266392320394516, "learning_rate": 3.4546437110664965e-07, "loss": 0.2774, "step": 18681 }, { "epoch": 2.6788069974189845, "grad_norm": 0.27731046080589294, "learning_rate": 3.45159723624624e-07, "loss": 0.2701, "step": 18682 }, { "epoch": 2.67895038715228, "grad_norm": 0.26691168546676636, "learning_rate": 3.448552057261917e-07, "loss": 0.2872, "step": 18683 }, { "epoch": 2.679093776885575, "grad_norm": 0.2763025462627411, "learning_rate": 3.4455081741982976e-07, "loss": 0.2907, "step": 18684 }, { "epoch": 2.6792371666188703, "grad_norm": 0.26430442929267883, "learning_rate": 3.442465587140109e-07, "loss": 0.277, "step": 18685 }, { "epoch": 2.6793805563521653, "grad_norm": 0.28908154368400574, "learning_rate": 3.4394242961720614e-07, "loss": 0.2858, "step": 18686 }, { "epoch": 2.6795239460854603, "grad_norm": 0.27290764451026917, "learning_rate": 3.4363843013788203e-07, "loss": 0.2752, "step": 18687 }, { "epoch": 2.6796673358187553, "grad_norm": 0.27715858817100525, "learning_rate": 3.433345602845012e-07, "loss": 0.2832, "step": 18688 }, { "epoch": 2.6798107255520502, "grad_norm": 0.2718944549560547, "learning_rate": 3.43030820065523e-07, "loss": 0.2728, "step": 18689 }, { "epoch": 2.6799541152853457, "grad_norm": 0.28938284516334534, "learning_rate": 3.4272720948940297e-07, "loss": 0.2858, "step": 18690 }, { "epoch": 2.6800975050186406, "grad_norm": 0.2665835916996002, "learning_rate": 3.424237285645926e-07, "loss": 0.2693, "step": 18691 }, { "epoch": 2.6802408947519356, "grad_norm": 0.26989272236824036, "learning_rate": 3.4212037729954075e-07, "loss": 0.2841, "step": 18692 }, { "epoch": 2.680384284485231, "grad_norm": 0.2754828631877899, "learning_rate": 3.418171557026928e-07, "loss": 0.283, "step": 18693 }, { "epoch": 2.680527674218526, "grad_norm": 0.27695828676223755, "learning_rate": 3.4151406378249043e-07, "loss": 0.2915, "step": 18694 }, { "epoch": 2.680671063951821, "grad_norm": 0.2658023238182068, "learning_rate": 3.4121110154737015e-07, "loss": 0.2783, "step": 18695 }, { "epoch": 2.680814453685116, "grad_norm": 0.2713191509246826, "learning_rate": 3.409082690057658e-07, "loss": 0.2861, "step": 18696 }, { "epoch": 2.6809578434184114, "grad_norm": 0.2640271782875061, "learning_rate": 3.4060556616610786e-07, "loss": 0.2633, "step": 18697 }, { "epoch": 2.6811012331517063, "grad_norm": 0.2662912905216217, "learning_rate": 3.40302993036824e-07, "loss": 0.2828, "step": 18698 }, { "epoch": 2.6812446228850013, "grad_norm": 0.28955039381980896, "learning_rate": 3.4000054962633645e-07, "loss": 0.2753, "step": 18699 }, { "epoch": 2.6813880126182967, "grad_norm": 0.27621781826019287, "learning_rate": 3.3969823594306674e-07, "loss": 0.2718, "step": 18700 }, { "epoch": 2.6815314023515917, "grad_norm": 0.2768319249153137, "learning_rate": 3.3939605199542756e-07, "loss": 0.2724, "step": 18701 }, { "epoch": 2.6816747920848867, "grad_norm": 0.254944771528244, "learning_rate": 3.390939977918334e-07, "loss": 0.2935, "step": 18702 }, { "epoch": 2.6818181818181817, "grad_norm": 0.2558412253856659, "learning_rate": 3.387920733406924e-07, "loss": 0.2801, "step": 18703 }, { "epoch": 2.681961571551477, "grad_norm": 0.2676963210105896, "learning_rate": 3.384902786504102e-07, "loss": 0.2789, "step": 18704 }, { "epoch": 2.682104961284772, "grad_norm": 0.25892215967178345, "learning_rate": 3.3818861372938826e-07, "loss": 0.2767, "step": 18705 }, { "epoch": 2.682248351018067, "grad_norm": 0.2857593297958374, "learning_rate": 3.378870785860233e-07, "loss": 0.2549, "step": 18706 }, { "epoch": 2.6823917407513624, "grad_norm": 0.26658540964126587, "learning_rate": 3.375856732287103e-07, "loss": 0.2816, "step": 18707 }, { "epoch": 2.6825351304846574, "grad_norm": 0.2653970718383789, "learning_rate": 3.372843976658402e-07, "loss": 0.2634, "step": 18708 }, { "epoch": 2.6826785202179524, "grad_norm": 0.26839640736579895, "learning_rate": 3.369832519057997e-07, "loss": 0.2704, "step": 18709 }, { "epoch": 2.6828219099512474, "grad_norm": 0.2535676956176758, "learning_rate": 3.366822359569727e-07, "loss": 0.2947, "step": 18710 }, { "epoch": 2.6829652996845423, "grad_norm": 0.26264068484306335, "learning_rate": 3.363813498277385e-07, "loss": 0.2725, "step": 18711 }, { "epoch": 2.6831086894178378, "grad_norm": 0.28243544697761536, "learning_rate": 3.360805935264738e-07, "loss": 0.25, "step": 18712 }, { "epoch": 2.6832520791511327, "grad_norm": 0.280831515789032, "learning_rate": 3.357799670615508e-07, "loss": 0.2798, "step": 18713 }, { "epoch": 2.683395468884428, "grad_norm": 0.2985627055168152, "learning_rate": 3.3547947044133833e-07, "loss": 0.2911, "step": 18714 }, { "epoch": 2.683538858617723, "grad_norm": 0.2713707983493805, "learning_rate": 3.351791036742025e-07, "loss": 0.2789, "step": 18715 }, { "epoch": 2.683682248351018, "grad_norm": 0.26864126324653625, "learning_rate": 3.348788667685049e-07, "loss": 0.2944, "step": 18716 }, { "epoch": 2.683825638084313, "grad_norm": 0.25091663002967834, "learning_rate": 3.345787597326028e-07, "loss": 0.2879, "step": 18717 }, { "epoch": 2.683969027817608, "grad_norm": 0.2492465078830719, "learning_rate": 3.342787825748511e-07, "loss": 0.2707, "step": 18718 }, { "epoch": 2.6841124175509035, "grad_norm": 0.27196943759918213, "learning_rate": 3.339789353036016e-07, "loss": 0.2717, "step": 18719 }, { "epoch": 2.6842558072841984, "grad_norm": 0.2712220251560211, "learning_rate": 3.3367921792720025e-07, "loss": 0.2838, "step": 18720 }, { "epoch": 2.6843991970174934, "grad_norm": 0.26072996854782104, "learning_rate": 3.3337963045399216e-07, "loss": 0.2988, "step": 18721 }, { "epoch": 2.684542586750789, "grad_norm": 0.2488214671611786, "learning_rate": 3.3308017289231554e-07, "loss": 0.2813, "step": 18722 }, { "epoch": 2.684685976484084, "grad_norm": 0.2844717502593994, "learning_rate": 3.3278084525050823e-07, "loss": 0.2906, "step": 18723 }, { "epoch": 2.6848293662173788, "grad_norm": 0.2701677978038788, "learning_rate": 3.324816475369025e-07, "loss": 0.2766, "step": 18724 }, { "epoch": 2.6849727559506738, "grad_norm": 0.24805842339992523, "learning_rate": 3.3218257975982713e-07, "loss": 0.2739, "step": 18725 }, { "epoch": 2.685116145683969, "grad_norm": 0.25966182351112366, "learning_rate": 3.318836419276089e-07, "loss": 0.2873, "step": 18726 }, { "epoch": 2.685259535417264, "grad_norm": 0.26949816942214966, "learning_rate": 3.3158483404856947e-07, "loss": 0.2803, "step": 18727 }, { "epoch": 2.685402925150559, "grad_norm": 0.25771164894104004, "learning_rate": 3.3128615613102655e-07, "loss": 0.2844, "step": 18728 }, { "epoch": 2.6855463148838545, "grad_norm": 0.2577681839466095, "learning_rate": 3.309876081832952e-07, "loss": 0.2562, "step": 18729 }, { "epoch": 2.6856897046171495, "grad_norm": 0.2756328284740448, "learning_rate": 3.306891902136866e-07, "loss": 0.2778, "step": 18730 }, { "epoch": 2.6858330943504445, "grad_norm": 0.26627296209335327, "learning_rate": 3.3039090223050797e-07, "loss": 0.2845, "step": 18731 }, { "epoch": 2.6859764840837395, "grad_norm": 0.2822319567203522, "learning_rate": 3.300927442420643e-07, "loss": 0.2899, "step": 18732 }, { "epoch": 2.6861198738170344, "grad_norm": 0.26732608675956726, "learning_rate": 3.297947162566545e-07, "loss": 0.2614, "step": 18733 }, { "epoch": 2.68626326355033, "grad_norm": 0.26629728078842163, "learning_rate": 3.294968182825753e-07, "loss": 0.2749, "step": 18734 }, { "epoch": 2.686406653283625, "grad_norm": 0.28825026750564575, "learning_rate": 3.2919905032812003e-07, "loss": 0.2773, "step": 18735 }, { "epoch": 2.6865500430169202, "grad_norm": 0.2428542524576187, "learning_rate": 3.289014124015788e-07, "loss": 0.2656, "step": 18736 }, { "epoch": 2.686693432750215, "grad_norm": 0.27543896436691284, "learning_rate": 3.2860390451123604e-07, "loss": 0.2597, "step": 18737 }, { "epoch": 2.68683682248351, "grad_norm": 0.26122748851776123, "learning_rate": 3.2830652666537567e-07, "loss": 0.2756, "step": 18738 }, { "epoch": 2.686980212216805, "grad_norm": 0.27439117431640625, "learning_rate": 3.280092788722744e-07, "loss": 0.2737, "step": 18739 }, { "epoch": 2.6871236019501, "grad_norm": 0.2675781846046448, "learning_rate": 3.277121611402079e-07, "loss": 0.2697, "step": 18740 }, { "epoch": 2.6872669916833956, "grad_norm": 0.24751290678977966, "learning_rate": 3.2741517347744777e-07, "loss": 0.2772, "step": 18741 }, { "epoch": 2.6874103814166905, "grad_norm": 0.25881993770599365, "learning_rate": 3.271183158922614e-07, "loss": 0.2725, "step": 18742 }, { "epoch": 2.6875537711499855, "grad_norm": 0.26181739568710327, "learning_rate": 3.2682158839291323e-07, "loss": 0.2612, "step": 18743 }, { "epoch": 2.687697160883281, "grad_norm": 0.26703980565071106, "learning_rate": 3.265249909876628e-07, "loss": 0.2933, "step": 18744 }, { "epoch": 2.687840550616576, "grad_norm": 0.2580951452255249, "learning_rate": 3.2622852368476734e-07, "loss": 0.2803, "step": 18745 }, { "epoch": 2.687983940349871, "grad_norm": 0.2827056646347046, "learning_rate": 3.2593218649248083e-07, "loss": 0.2984, "step": 18746 }, { "epoch": 2.688127330083166, "grad_norm": 0.2604816257953644, "learning_rate": 3.256359794190511e-07, "loss": 0.2921, "step": 18747 }, { "epoch": 2.6882707198164613, "grad_norm": 0.2649175226688385, "learning_rate": 3.2533990247272606e-07, "loss": 0.2793, "step": 18748 }, { "epoch": 2.6884141095497562, "grad_norm": 0.2609694004058838, "learning_rate": 3.250439556617468e-07, "loss": 0.2825, "step": 18749 }, { "epoch": 2.688557499283051, "grad_norm": 0.2591434717178345, "learning_rate": 3.247481389943524e-07, "loss": 0.273, "step": 18750 }, { "epoch": 2.6887008890163466, "grad_norm": 0.2693521976470947, "learning_rate": 3.244524524787779e-07, "loss": 0.2831, "step": 18751 }, { "epoch": 2.6888442787496416, "grad_norm": 0.2836190164089203, "learning_rate": 3.2415689612325497e-07, "loss": 0.264, "step": 18752 }, { "epoch": 2.6889876684829366, "grad_norm": 0.2693842053413391, "learning_rate": 3.23861469936011e-07, "loss": 0.2897, "step": 18753 }, { "epoch": 2.6891310582162316, "grad_norm": 0.2765960097312927, "learning_rate": 3.2356617392527155e-07, "loss": 0.2833, "step": 18754 }, { "epoch": 2.689274447949527, "grad_norm": 0.26209545135498047, "learning_rate": 3.232710080992552e-07, "loss": 0.2892, "step": 18755 }, { "epoch": 2.689417837682822, "grad_norm": 0.2878626585006714, "learning_rate": 3.229759724661796e-07, "loss": 0.2851, "step": 18756 }, { "epoch": 2.689561227416117, "grad_norm": 0.2580866813659668, "learning_rate": 3.2268106703425897e-07, "loss": 0.2768, "step": 18757 }, { "epoch": 2.6897046171494123, "grad_norm": 0.2686643898487091, "learning_rate": 3.2238629181170157e-07, "loss": 0.2931, "step": 18758 }, { "epoch": 2.6898480068827073, "grad_norm": 0.27651435136795044, "learning_rate": 3.2209164680671533e-07, "loss": 0.2729, "step": 18759 }, { "epoch": 2.6899913966160023, "grad_norm": 0.2737554907798767, "learning_rate": 3.2179713202750094e-07, "loss": 0.2778, "step": 18760 }, { "epoch": 2.6901347863492973, "grad_norm": 0.2603320777416229, "learning_rate": 3.2150274748225797e-07, "loss": 0.2762, "step": 18761 }, { "epoch": 2.6902781760825922, "grad_norm": 0.26847514510154724, "learning_rate": 3.21208493179182e-07, "loss": 0.2861, "step": 18762 }, { "epoch": 2.6904215658158877, "grad_norm": 0.28288429975509644, "learning_rate": 3.2091436912646377e-07, "loss": 0.2695, "step": 18763 }, { "epoch": 2.6905649555491826, "grad_norm": 0.28657886385917664, "learning_rate": 3.206203753322917e-07, "loss": 0.2848, "step": 18764 }, { "epoch": 2.690708345282478, "grad_norm": 0.2818087637424469, "learning_rate": 3.203265118048504e-07, "loss": 0.2807, "step": 18765 }, { "epoch": 2.690851735015773, "grad_norm": 0.2689223885536194, "learning_rate": 3.200327785523205e-07, "loss": 0.2917, "step": 18766 }, { "epoch": 2.690995124749068, "grad_norm": 0.2723068594932556, "learning_rate": 3.1973917558287826e-07, "loss": 0.2823, "step": 18767 }, { "epoch": 2.691138514482363, "grad_norm": 0.269185334444046, "learning_rate": 3.1944570290469825e-07, "loss": 0.2816, "step": 18768 }, { "epoch": 2.691281904215658, "grad_norm": 0.28242284059524536, "learning_rate": 3.1915236052595e-07, "loss": 0.2743, "step": 18769 }, { "epoch": 2.6914252939489534, "grad_norm": 0.26474228501319885, "learning_rate": 3.188591484547998e-07, "loss": 0.2873, "step": 18770 }, { "epoch": 2.6915686836822483, "grad_norm": 0.27159470319747925, "learning_rate": 3.185660666994095e-07, "loss": 0.278, "step": 18771 }, { "epoch": 2.6917120734155433, "grad_norm": 0.2736022472381592, "learning_rate": 3.1827311526793857e-07, "loss": 0.2692, "step": 18772 }, { "epoch": 2.6918554631488387, "grad_norm": 0.25431299209594727, "learning_rate": 3.1798029416854225e-07, "loss": 0.275, "step": 18773 }, { "epoch": 2.6919988528821337, "grad_norm": 0.27166736125946045, "learning_rate": 3.176876034093718e-07, "loss": 0.2788, "step": 18774 }, { "epoch": 2.6921422426154287, "grad_norm": 0.2799991965293884, "learning_rate": 3.173950429985767e-07, "loss": 0.2755, "step": 18775 }, { "epoch": 2.6922856323487236, "grad_norm": 0.2714713215827942, "learning_rate": 3.171026129443e-07, "loss": 0.2683, "step": 18776 }, { "epoch": 2.692429022082019, "grad_norm": 0.24936732649803162, "learning_rate": 3.168103132546829e-07, "loss": 0.275, "step": 18777 }, { "epoch": 2.692572411815314, "grad_norm": 0.26859191060066223, "learning_rate": 3.1651814393786227e-07, "loss": 0.2878, "step": 18778 }, { "epoch": 2.692715801548609, "grad_norm": 0.2578189969062805, "learning_rate": 3.1622610500197213e-07, "loss": 0.2875, "step": 18779 }, { "epoch": 2.6928591912819044, "grad_norm": 0.28312936425209045, "learning_rate": 3.159341964551421e-07, "loss": 0.2909, "step": 18780 }, { "epoch": 2.6930025810151994, "grad_norm": 0.28441447019577026, "learning_rate": 3.1564241830549844e-07, "loss": 0.2815, "step": 18781 }, { "epoch": 2.6931459707484944, "grad_norm": 0.26644831895828247, "learning_rate": 3.1535077056116416e-07, "loss": 0.2777, "step": 18782 }, { "epoch": 2.6932893604817894, "grad_norm": 0.26260116696357727, "learning_rate": 3.150592532302582e-07, "loss": 0.2872, "step": 18783 }, { "epoch": 2.6934327502150843, "grad_norm": 0.27748221158981323, "learning_rate": 3.1476786632089583e-07, "loss": 0.2815, "step": 18784 }, { "epoch": 2.6935761399483797, "grad_norm": 0.28386449813842773, "learning_rate": 3.1447660984118887e-07, "loss": 0.2899, "step": 18785 }, { "epoch": 2.6937195296816747, "grad_norm": 0.24998870491981506, "learning_rate": 3.1418548379924585e-07, "loss": 0.2817, "step": 18786 }, { "epoch": 2.69386291941497, "grad_norm": 0.2852890193462372, "learning_rate": 3.1389448820317026e-07, "loss": 0.2828, "step": 18787 }, { "epoch": 2.694006309148265, "grad_norm": 0.2618379294872284, "learning_rate": 3.136036230610634e-07, "loss": 0.276, "step": 18788 }, { "epoch": 2.69414969888156, "grad_norm": 0.24753247201442719, "learning_rate": 3.1331288838102267e-07, "loss": 0.2686, "step": 18789 }, { "epoch": 2.694293088614855, "grad_norm": 0.2585183382034302, "learning_rate": 3.130222841711422e-07, "loss": 0.291, "step": 18790 }, { "epoch": 2.69443647834815, "grad_norm": 0.27595555782318115, "learning_rate": 3.127318104395105e-07, "loss": 0.2936, "step": 18791 }, { "epoch": 2.6945798680814455, "grad_norm": 0.2686630189418793, "learning_rate": 3.1244146719421606e-07, "loss": 0.288, "step": 18792 }, { "epoch": 2.6947232578147404, "grad_norm": 0.26963546872138977, "learning_rate": 3.121512544433397e-07, "loss": 0.2779, "step": 18793 }, { "epoch": 2.6948666475480354, "grad_norm": 0.2643958032131195, "learning_rate": 3.1186117219496105e-07, "loss": 0.2736, "step": 18794 }, { "epoch": 2.695010037281331, "grad_norm": 0.2688984274864197, "learning_rate": 3.115712204571558e-07, "loss": 0.2718, "step": 18795 }, { "epoch": 2.695153427014626, "grad_norm": 0.27421244978904724, "learning_rate": 3.112813992379954e-07, "loss": 0.2813, "step": 18796 }, { "epoch": 2.6952968167479208, "grad_norm": 0.2652210295200348, "learning_rate": 3.1099170854554937e-07, "loss": 0.2653, "step": 18797 }, { "epoch": 2.6954402064812157, "grad_norm": 0.2738104462623596, "learning_rate": 3.107021483878797e-07, "loss": 0.2871, "step": 18798 }, { "epoch": 2.695583596214511, "grad_norm": 0.26172763109207153, "learning_rate": 3.1041271877304933e-07, "loss": 0.3104, "step": 18799 }, { "epoch": 2.695726985947806, "grad_norm": 0.26946717500686646, "learning_rate": 3.1012341970911464e-07, "loss": 0.2757, "step": 18800 }, { "epoch": 2.695870375681101, "grad_norm": 0.2678937613964081, "learning_rate": 3.098342512041297e-07, "loss": 0.2945, "step": 18801 }, { "epoch": 2.6960137654143965, "grad_norm": 0.2839365005493164, "learning_rate": 3.0954521326614417e-07, "loss": 0.2845, "step": 18802 }, { "epoch": 2.6961571551476915, "grad_norm": 0.27752983570098877, "learning_rate": 3.09256305903205e-07, "loss": 0.2593, "step": 18803 }, { "epoch": 2.6963005448809865, "grad_norm": 0.26230230927467346, "learning_rate": 3.08967529123354e-07, "loss": 0.2789, "step": 18804 }, { "epoch": 2.6964439346142814, "grad_norm": 0.2653108239173889, "learning_rate": 3.0867888293463156e-07, "loss": 0.2832, "step": 18805 }, { "epoch": 2.696587324347577, "grad_norm": 0.24858269095420837, "learning_rate": 3.0839036734507167e-07, "loss": 0.2701, "step": 18806 }, { "epoch": 2.696730714080872, "grad_norm": 0.2699275314807892, "learning_rate": 3.081019823627074e-07, "loss": 0.2897, "step": 18807 }, { "epoch": 2.696874103814167, "grad_norm": 0.27295854687690735, "learning_rate": 3.078137279955673e-07, "loss": 0.2953, "step": 18808 }, { "epoch": 2.6970174935474622, "grad_norm": 0.26137128472328186, "learning_rate": 3.0752560425167386e-07, "loss": 0.2921, "step": 18809 }, { "epoch": 2.697160883280757, "grad_norm": 0.25915300846099854, "learning_rate": 3.0723761113904957e-07, "loss": 0.2915, "step": 18810 }, { "epoch": 2.697304273014052, "grad_norm": 0.2812020480632782, "learning_rate": 3.069497486657114e-07, "loss": 0.3123, "step": 18811 }, { "epoch": 2.697447662747347, "grad_norm": 0.26125088334083557, "learning_rate": 3.066620168396728e-07, "loss": 0.3004, "step": 18812 }, { "epoch": 2.697591052480642, "grad_norm": 0.26031526923179626, "learning_rate": 3.063744156689447e-07, "loss": 0.2945, "step": 18813 }, { "epoch": 2.6977344422139375, "grad_norm": 0.2860131859779358, "learning_rate": 3.060869451615317e-07, "loss": 0.2695, "step": 18814 }, { "epoch": 2.6978778319472325, "grad_norm": 0.2700563967227936, "learning_rate": 3.0579960532543805e-07, "loss": 0.2709, "step": 18815 }, { "epoch": 2.698021221680528, "grad_norm": 0.2817544639110565, "learning_rate": 3.055123961686618e-07, "loss": 0.2979, "step": 18816 }, { "epoch": 2.698164611413823, "grad_norm": 0.27816715836524963, "learning_rate": 3.052253176991993e-07, "loss": 0.2973, "step": 18817 }, { "epoch": 2.698308001147118, "grad_norm": 0.27208825945854187, "learning_rate": 3.0493836992504187e-07, "loss": 0.2707, "step": 18818 }, { "epoch": 2.698451390880413, "grad_norm": 0.24420209228992462, "learning_rate": 3.0465155285417824e-07, "loss": 0.2694, "step": 18819 }, { "epoch": 2.698594780613708, "grad_norm": 0.28584492206573486, "learning_rate": 3.043648664945925e-07, "loss": 0.2855, "step": 18820 }, { "epoch": 2.6987381703470033, "grad_norm": 0.25353795289993286, "learning_rate": 3.040783108542655e-07, "loss": 0.2625, "step": 18821 }, { "epoch": 2.6988815600802982, "grad_norm": 0.28462547063827515, "learning_rate": 3.037918859411748e-07, "loss": 0.2684, "step": 18822 }, { "epoch": 2.699024949813593, "grad_norm": 0.27117928862571716, "learning_rate": 3.035055917632934e-07, "loss": 0.2704, "step": 18823 }, { "epoch": 2.6991683395468886, "grad_norm": 0.27732598781585693, "learning_rate": 3.0321942832859275e-07, "loss": 0.2769, "step": 18824 }, { "epoch": 2.6993117292801836, "grad_norm": 0.2656736969947815, "learning_rate": 3.0293339564503757e-07, "loss": 0.2859, "step": 18825 }, { "epoch": 2.6994551190134786, "grad_norm": 0.2637788653373718, "learning_rate": 3.0264749372059145e-07, "loss": 0.2825, "step": 18826 }, { "epoch": 2.6995985087467735, "grad_norm": 0.2691616714000702, "learning_rate": 3.023617225632131e-07, "loss": 0.2714, "step": 18827 }, { "epoch": 2.699741898480069, "grad_norm": 0.27086642384529114, "learning_rate": 3.020760821808577e-07, "loss": 0.2856, "step": 18828 }, { "epoch": 2.699885288213364, "grad_norm": 0.2604876756668091, "learning_rate": 3.017905725814785e-07, "loss": 0.2906, "step": 18829 }, { "epoch": 2.700028677946659, "grad_norm": 0.279217004776001, "learning_rate": 3.0150519377302126e-07, "loss": 0.2763, "step": 18830 }, { "epoch": 2.7001720676799543, "grad_norm": 0.270429790019989, "learning_rate": 3.012199457634324e-07, "loss": 0.2807, "step": 18831 }, { "epoch": 2.7003154574132493, "grad_norm": 0.24662932753562927, "learning_rate": 3.0093482856065225e-07, "loss": 0.2643, "step": 18832 }, { "epoch": 2.7004588471465443, "grad_norm": 0.2802385985851288, "learning_rate": 3.0064984217261726e-07, "loss": 0.2804, "step": 18833 }, { "epoch": 2.7006022368798392, "grad_norm": 0.2703372538089752, "learning_rate": 3.003649866072622e-07, "loss": 0.2839, "step": 18834 }, { "epoch": 2.7007456266131347, "grad_norm": 0.28563210368156433, "learning_rate": 3.000802618725174e-07, "loss": 0.2462, "step": 18835 }, { "epoch": 2.7008890163464296, "grad_norm": 0.27661070227622986, "learning_rate": 2.9979566797630757e-07, "loss": 0.2826, "step": 18836 }, { "epoch": 2.7010324060797246, "grad_norm": 0.29271984100341797, "learning_rate": 2.995112049265547e-07, "loss": 0.2787, "step": 18837 }, { "epoch": 2.70117579581302, "grad_norm": 0.27244970202445984, "learning_rate": 2.9922687273118033e-07, "loss": 0.285, "step": 18838 }, { "epoch": 2.701319185546315, "grad_norm": 0.2717861831188202, "learning_rate": 2.989426713980986e-07, "loss": 0.2961, "step": 18839 }, { "epoch": 2.70146257527961, "grad_norm": 0.28061825037002563, "learning_rate": 2.9865860093522216e-07, "loss": 0.2779, "step": 18840 }, { "epoch": 2.701605965012905, "grad_norm": 0.2495419979095459, "learning_rate": 2.983746613504579e-07, "loss": 0.2901, "step": 18841 }, { "epoch": 2.7017493547462, "grad_norm": 0.2955063581466675, "learning_rate": 2.980908526517101e-07, "loss": 0.2849, "step": 18842 }, { "epoch": 2.7018927444794953, "grad_norm": 0.26862430572509766, "learning_rate": 2.978071748468803e-07, "loss": 0.2818, "step": 18843 }, { "epoch": 2.7020361342127903, "grad_norm": 0.2629246115684509, "learning_rate": 2.9752362794386534e-07, "loss": 0.2858, "step": 18844 }, { "epoch": 2.7021795239460857, "grad_norm": 0.2668945789337158, "learning_rate": 2.9724021195055854e-07, "loss": 0.2703, "step": 18845 }, { "epoch": 2.7023229136793807, "grad_norm": 0.2822495102882385, "learning_rate": 2.969569268748512e-07, "loss": 0.2874, "step": 18846 }, { "epoch": 2.7024663034126757, "grad_norm": 0.27055320143699646, "learning_rate": 2.9667377272462715e-07, "loss": 0.2777, "step": 18847 }, { "epoch": 2.7026096931459707, "grad_norm": 0.25697699189186096, "learning_rate": 2.9639074950777056e-07, "loss": 0.2858, "step": 18848 }, { "epoch": 2.7027530828792656, "grad_norm": 0.24899236857891083, "learning_rate": 2.9610785723216017e-07, "loss": 0.2685, "step": 18849 }, { "epoch": 2.702896472612561, "grad_norm": 0.2765251398086548, "learning_rate": 2.958250959056708e-07, "loss": 0.2799, "step": 18850 }, { "epoch": 2.703039862345856, "grad_norm": 0.2554253339767456, "learning_rate": 2.9554246553617493e-07, "loss": 0.263, "step": 18851 }, { "epoch": 2.703183252079151, "grad_norm": 0.2939891219139099, "learning_rate": 2.952599661315397e-07, "loss": 0.2797, "step": 18852 }, { "epoch": 2.7033266418124464, "grad_norm": 0.2598605751991272, "learning_rate": 2.949775976996294e-07, "loss": 0.2762, "step": 18853 }, { "epoch": 2.7034700315457414, "grad_norm": 0.25724491477012634, "learning_rate": 2.9469536024830545e-07, "loss": 0.2429, "step": 18854 }, { "epoch": 2.7036134212790364, "grad_norm": 0.27437564730644226, "learning_rate": 2.944132537854244e-07, "loss": 0.2848, "step": 18855 }, { "epoch": 2.7037568110123313, "grad_norm": 0.27390769124031067, "learning_rate": 2.941312783188399e-07, "loss": 0.275, "step": 18856 }, { "epoch": 2.7039002007456268, "grad_norm": 0.293153315782547, "learning_rate": 2.938494338564013e-07, "loss": 0.3011, "step": 18857 }, { "epoch": 2.7040435904789217, "grad_norm": 0.27730828523635864, "learning_rate": 2.9356772040595503e-07, "loss": 0.2684, "step": 18858 }, { "epoch": 2.7041869802122167, "grad_norm": 0.250643253326416, "learning_rate": 2.932861379753443e-07, "loss": 0.3027, "step": 18859 }, { "epoch": 2.704330369945512, "grad_norm": 0.27271729707717896, "learning_rate": 2.9300468657240676e-07, "loss": 0.2814, "step": 18860 }, { "epoch": 2.704473759678807, "grad_norm": 0.24222491681575775, "learning_rate": 2.9272336620497776e-07, "loss": 0.2888, "step": 18861 }, { "epoch": 2.704617149412102, "grad_norm": 0.26169276237487793, "learning_rate": 2.9244217688088996e-07, "loss": 0.281, "step": 18862 }, { "epoch": 2.704760539145397, "grad_norm": 0.2589544951915741, "learning_rate": 2.9216111860796983e-07, "loss": 0.2651, "step": 18863 }, { "epoch": 2.704903928878692, "grad_norm": 0.27606692910194397, "learning_rate": 2.918801913940422e-07, "loss": 0.2974, "step": 18864 }, { "epoch": 2.7050473186119874, "grad_norm": 0.27234774827957153, "learning_rate": 2.915993952469276e-07, "loss": 0.2781, "step": 18865 }, { "epoch": 2.7051907083452824, "grad_norm": 0.2767215669155121, "learning_rate": 2.9131873017444245e-07, "loss": 0.2824, "step": 18866 }, { "epoch": 2.705334098078578, "grad_norm": 0.2642807066440582, "learning_rate": 2.910381961844017e-07, "loss": 0.2902, "step": 18867 }, { "epoch": 2.705477487811873, "grad_norm": 0.2811524569988251, "learning_rate": 2.9075779328461284e-07, "loss": 0.2731, "step": 18868 }, { "epoch": 2.705620877545168, "grad_norm": 0.2812933921813965, "learning_rate": 2.904775214828831e-07, "loss": 0.2755, "step": 18869 }, { "epoch": 2.7057642672784628, "grad_norm": 0.26186639070510864, "learning_rate": 2.9019738078701454e-07, "loss": 0.2911, "step": 18870 }, { "epoch": 2.7059076570117577, "grad_norm": 0.26359525322914124, "learning_rate": 2.899173712048059e-07, "loss": 0.2742, "step": 18871 }, { "epoch": 2.706051046745053, "grad_norm": 0.2697475254535675, "learning_rate": 2.8963749274405206e-07, "loss": 0.2787, "step": 18872 }, { "epoch": 2.706194436478348, "grad_norm": 0.2824420928955078, "learning_rate": 2.8935774541254456e-07, "loss": 0.3045, "step": 18873 }, { "epoch": 2.706337826211643, "grad_norm": 0.2635331451892853, "learning_rate": 2.890781292180711e-07, "loss": 0.2623, "step": 18874 }, { "epoch": 2.7064812159449385, "grad_norm": 0.29074040055274963, "learning_rate": 2.88798644168416e-07, "loss": 0.2815, "step": 18875 }, { "epoch": 2.7066246056782335, "grad_norm": 0.2688930630683899, "learning_rate": 2.8851929027135903e-07, "loss": 0.2778, "step": 18876 }, { "epoch": 2.7067679954115285, "grad_norm": 0.26292452216148376, "learning_rate": 2.882400675346775e-07, "loss": 0.2655, "step": 18877 }, { "epoch": 2.7069113851448234, "grad_norm": 0.25394195318222046, "learning_rate": 2.8796097596614556e-07, "loss": 0.2833, "step": 18878 }, { "epoch": 2.707054774878119, "grad_norm": 0.26220259070396423, "learning_rate": 2.876820155735305e-07, "loss": 0.2986, "step": 18879 }, { "epoch": 2.707198164611414, "grad_norm": 0.27357274293899536, "learning_rate": 2.874031863645993e-07, "loss": 0.2701, "step": 18880 }, { "epoch": 2.707341554344709, "grad_norm": 0.2524903416633606, "learning_rate": 2.871244883471142e-07, "loss": 0.2866, "step": 18881 }, { "epoch": 2.707484944078004, "grad_norm": 0.26457372307777405, "learning_rate": 2.868459215288333e-07, "loss": 0.2779, "step": 18882 }, { "epoch": 2.707628333811299, "grad_norm": 0.26378488540649414, "learning_rate": 2.8656748591751216e-07, "loss": 0.2763, "step": 18883 }, { "epoch": 2.707771723544594, "grad_norm": 0.26430827379226685, "learning_rate": 2.862891815209018e-07, "loss": 0.2737, "step": 18884 }, { "epoch": 2.707915113277889, "grad_norm": 0.2767122685909271, "learning_rate": 2.8601100834674936e-07, "loss": 0.2715, "step": 18885 }, { "epoch": 2.7080585030111846, "grad_norm": 0.27277639508247375, "learning_rate": 2.857329664027986e-07, "loss": 0.2633, "step": 18886 }, { "epoch": 2.7082018927444795, "grad_norm": 0.25861307978630066, "learning_rate": 2.8545505569679e-07, "loss": 0.271, "step": 18887 }, { "epoch": 2.7083452824777745, "grad_norm": 0.25424709916114807, "learning_rate": 2.851772762364607e-07, "loss": 0.2807, "step": 18888 }, { "epoch": 2.70848867221107, "grad_norm": 0.28191274404525757, "learning_rate": 2.848996280295441e-07, "loss": 0.2795, "step": 18889 }, { "epoch": 2.708632061944365, "grad_norm": 0.2727961540222168, "learning_rate": 2.846221110837677e-07, "loss": 0.281, "step": 18890 }, { "epoch": 2.70877545167766, "grad_norm": 0.2920137941837311, "learning_rate": 2.843447254068582e-07, "loss": 0.2778, "step": 18891 }, { "epoch": 2.708918841410955, "grad_norm": 0.2539607882499695, "learning_rate": 2.840674710065372e-07, "loss": 0.2826, "step": 18892 }, { "epoch": 2.70906223114425, "grad_norm": 0.2779552638530731, "learning_rate": 2.8379034789052297e-07, "loss": 0.295, "step": 18893 }, { "epoch": 2.7092056208775452, "grad_norm": 0.26483896374702454, "learning_rate": 2.835133560665321e-07, "loss": 0.2947, "step": 18894 }, { "epoch": 2.70934901061084, "grad_norm": 0.280987948179245, "learning_rate": 2.8323649554227285e-07, "loss": 0.2859, "step": 18895 }, { "epoch": 2.7094924003441356, "grad_norm": 0.25116151571273804, "learning_rate": 2.829597663254546e-07, "loss": 0.2887, "step": 18896 }, { "epoch": 2.7096357900774306, "grad_norm": 0.27012863755226135, "learning_rate": 2.8268316842377963e-07, "loss": 0.259, "step": 18897 }, { "epoch": 2.7097791798107256, "grad_norm": 0.2584811747074127, "learning_rate": 2.8240670184494887e-07, "loss": 0.2785, "step": 18898 }, { "epoch": 2.7099225695440206, "grad_norm": 0.28060758113861084, "learning_rate": 2.821303665966585e-07, "loss": 0.2644, "step": 18899 }, { "epoch": 2.7100659592773155, "grad_norm": 0.26412931084632874, "learning_rate": 2.8185416268660227e-07, "loss": 0.2926, "step": 18900 }, { "epoch": 2.710209349010611, "grad_norm": 0.2762925624847412, "learning_rate": 2.8157809012246685e-07, "loss": 0.2986, "step": 18901 }, { "epoch": 2.710352738743906, "grad_norm": 0.25533315539360046, "learning_rate": 2.8130214891194e-07, "loss": 0.2744, "step": 18902 }, { "epoch": 2.710496128477201, "grad_norm": 0.27664312720298767, "learning_rate": 2.8102633906270217e-07, "loss": 0.2785, "step": 18903 }, { "epoch": 2.7106395182104963, "grad_norm": 0.2720828652381897, "learning_rate": 2.8075066058243175e-07, "loss": 0.2966, "step": 18904 }, { "epoch": 2.7107829079437913, "grad_norm": 0.2552720606327057, "learning_rate": 2.804751134788042e-07, "loss": 0.2694, "step": 18905 }, { "epoch": 2.7109262976770863, "grad_norm": 0.2550972104072571, "learning_rate": 2.8019969775948896e-07, "loss": 0.2756, "step": 18906 }, { "epoch": 2.7110696874103812, "grad_norm": 0.27819737792015076, "learning_rate": 2.7992441343215383e-07, "loss": 0.2745, "step": 18907 }, { "epoch": 2.7112130771436767, "grad_norm": 0.29570966958999634, "learning_rate": 2.796492605044615e-07, "loss": 0.2955, "step": 18908 }, { "epoch": 2.7113564668769716, "grad_norm": 0.2688334882259369, "learning_rate": 2.793742389840731e-07, "loss": 0.274, "step": 18909 }, { "epoch": 2.7114998566102666, "grad_norm": 0.2507857382297516, "learning_rate": 2.790993488786442e-07, "loss": 0.2772, "step": 18910 }, { "epoch": 2.711643246343562, "grad_norm": 0.25975993275642395, "learning_rate": 2.7882459019582696e-07, "loss": 0.2678, "step": 18911 }, { "epoch": 2.711786636076857, "grad_norm": 0.25192898511886597, "learning_rate": 2.7854996294327086e-07, "loss": 0.2969, "step": 18912 }, { "epoch": 2.711930025810152, "grad_norm": 0.2711797058582306, "learning_rate": 2.7827546712862084e-07, "loss": 0.2824, "step": 18913 }, { "epoch": 2.712073415543447, "grad_norm": 0.28104832768440247, "learning_rate": 2.780011027595186e-07, "loss": 0.272, "step": 18914 }, { "epoch": 2.712216805276742, "grad_norm": 0.25748568773269653, "learning_rate": 2.777268698436014e-07, "loss": 0.2744, "step": 18915 }, { "epoch": 2.7123601950100373, "grad_norm": 0.2812234163284302, "learning_rate": 2.7745276838850534e-07, "loss": 0.2758, "step": 18916 }, { "epoch": 2.7125035847433323, "grad_norm": 0.26198041439056396, "learning_rate": 2.7717879840185814e-07, "loss": 0.2663, "step": 18917 }, { "epoch": 2.7126469744766277, "grad_norm": 0.2642805874347687, "learning_rate": 2.769049598912887e-07, "loss": 0.2696, "step": 18918 }, { "epoch": 2.7127903642099227, "grad_norm": 0.26922526955604553, "learning_rate": 2.766312528644194e-07, "loss": 0.2994, "step": 18919 }, { "epoch": 2.7129337539432177, "grad_norm": 0.2574470043182373, "learning_rate": 2.763576773288701e-07, "loss": 0.286, "step": 18920 }, { "epoch": 2.7130771436765126, "grad_norm": 0.27414676547050476, "learning_rate": 2.760842332922575e-07, "loss": 0.2678, "step": 18921 }, { "epoch": 2.7132205334098076, "grad_norm": 0.2769516408443451, "learning_rate": 2.758109207621923e-07, "loss": 0.2715, "step": 18922 }, { "epoch": 2.713363923143103, "grad_norm": 0.24992892146110535, "learning_rate": 2.755377397462838e-07, "loss": 0.2592, "step": 18923 }, { "epoch": 2.713507312876398, "grad_norm": 0.258693665266037, "learning_rate": 2.752646902521372e-07, "loss": 0.2904, "step": 18924 }, { "epoch": 2.713650702609693, "grad_norm": 0.2585277855396271, "learning_rate": 2.749917722873541e-07, "loss": 0.2822, "step": 18925 }, { "epoch": 2.7137940923429884, "grad_norm": 0.25316959619522095, "learning_rate": 2.7471898585953117e-07, "loss": 0.2704, "step": 18926 }, { "epoch": 2.7139374820762834, "grad_norm": 0.25791317224502563, "learning_rate": 2.744463309762624e-07, "loss": 0.2717, "step": 18927 }, { "epoch": 2.7140808718095784, "grad_norm": 0.2661535441875458, "learning_rate": 2.741738076451389e-07, "loss": 0.2644, "step": 18928 }, { "epoch": 2.7142242615428733, "grad_norm": 0.2764412760734558, "learning_rate": 2.739014158737474e-07, "loss": 0.2754, "step": 18929 }, { "epoch": 2.7143676512761687, "grad_norm": 0.2728055715560913, "learning_rate": 2.7362915566966965e-07, "loss": 0.2731, "step": 18930 }, { "epoch": 2.7145110410094637, "grad_norm": 0.2741873264312744, "learning_rate": 2.733570270404862e-07, "loss": 0.2847, "step": 18931 }, { "epoch": 2.7146544307427587, "grad_norm": 0.28279542922973633, "learning_rate": 2.730850299937726e-07, "loss": 0.2829, "step": 18932 }, { "epoch": 2.714797820476054, "grad_norm": 0.2642427682876587, "learning_rate": 2.7281316453709963e-07, "loss": 0.2841, "step": 18933 }, { "epoch": 2.714941210209349, "grad_norm": 0.263832151889801, "learning_rate": 2.725414306780366e-07, "loss": 0.283, "step": 18934 }, { "epoch": 2.715084599942644, "grad_norm": 0.2618862986564636, "learning_rate": 2.722698284241476e-07, "loss": 0.3062, "step": 18935 }, { "epoch": 2.715227989675939, "grad_norm": 0.2880186140537262, "learning_rate": 2.719983577829943e-07, "loss": 0.2921, "step": 18936 }, { "epoch": 2.7153713794092345, "grad_norm": 0.26875606179237366, "learning_rate": 2.7172701876213404e-07, "loss": 0.2838, "step": 18937 }, { "epoch": 2.7155147691425294, "grad_norm": 0.2833535373210907, "learning_rate": 2.7145581136912015e-07, "loss": 0.2774, "step": 18938 }, { "epoch": 2.7156581588758244, "grad_norm": 0.27756011486053467, "learning_rate": 2.7118473561150214e-07, "loss": 0.2933, "step": 18939 }, { "epoch": 2.71580154860912, "grad_norm": 0.2723044157028198, "learning_rate": 2.7091379149682683e-07, "loss": 0.2893, "step": 18940 }, { "epoch": 2.715944938342415, "grad_norm": 0.26646047830581665, "learning_rate": 2.7064297903263703e-07, "loss": 0.291, "step": 18941 }, { "epoch": 2.7160883280757098, "grad_norm": 0.26088422536849976, "learning_rate": 2.703722982264712e-07, "loss": 0.2823, "step": 18942 }, { "epoch": 2.7162317178090047, "grad_norm": 0.25531554222106934, "learning_rate": 2.7010174908586596e-07, "loss": 0.3081, "step": 18943 }, { "epoch": 2.7163751075422997, "grad_norm": 0.26904550194740295, "learning_rate": 2.698313316183515e-07, "loss": 0.2836, "step": 18944 }, { "epoch": 2.716518497275595, "grad_norm": 0.2558952867984772, "learning_rate": 2.6956104583145624e-07, "loss": 0.2759, "step": 18945 }, { "epoch": 2.71666188700889, "grad_norm": 0.2641671597957611, "learning_rate": 2.6929089173270473e-07, "loss": 0.274, "step": 18946 }, { "epoch": 2.7168052767421855, "grad_norm": 0.27029213309288025, "learning_rate": 2.6902086932961756e-07, "loss": 0.2699, "step": 18947 }, { "epoch": 2.7169486664754805, "grad_norm": 0.2537602186203003, "learning_rate": 2.687509786297121e-07, "loss": 0.2634, "step": 18948 }, { "epoch": 2.7170920562087755, "grad_norm": 0.28991684317588806, "learning_rate": 2.6848121964050123e-07, "loss": 0.2891, "step": 18949 }, { "epoch": 2.7172354459420704, "grad_norm": 0.2590716481208801, "learning_rate": 2.6821159236949445e-07, "loss": 0.2565, "step": 18950 }, { "epoch": 2.7173788356753654, "grad_norm": 0.2696641981601715, "learning_rate": 2.6794209682419803e-07, "loss": 0.3007, "step": 18951 }, { "epoch": 2.717522225408661, "grad_norm": 0.26292258501052856, "learning_rate": 2.676727330121148e-07, "loss": 0.2697, "step": 18952 }, { "epoch": 2.717665615141956, "grad_norm": 0.2660684883594513, "learning_rate": 2.6740350094074276e-07, "loss": 0.2792, "step": 18953 }, { "epoch": 2.717809004875251, "grad_norm": 0.26727303862571716, "learning_rate": 2.67134400617578e-07, "loss": 0.2778, "step": 18954 }, { "epoch": 2.717952394608546, "grad_norm": 0.23782944679260254, "learning_rate": 2.6686543205011016e-07, "loss": 0.2759, "step": 18955 }, { "epoch": 2.718095784341841, "grad_norm": 0.27543121576309204, "learning_rate": 2.665965952458277e-07, "loss": 0.2653, "step": 18956 }, { "epoch": 2.718239174075136, "grad_norm": 0.27383875846862793, "learning_rate": 2.6632789021221516e-07, "loss": 0.2823, "step": 18957 }, { "epoch": 2.718382563808431, "grad_norm": 0.2725674510002136, "learning_rate": 2.6605931695675214e-07, "loss": 0.2779, "step": 18958 }, { "epoch": 2.7185259535417265, "grad_norm": 0.2626052796840668, "learning_rate": 2.6579087548691593e-07, "loss": 0.2742, "step": 18959 }, { "epoch": 2.7186693432750215, "grad_norm": 0.2740599811077118, "learning_rate": 2.6552256581017897e-07, "loss": 0.2921, "step": 18960 }, { "epoch": 2.7188127330083165, "grad_norm": 0.26707860827445984, "learning_rate": 2.6525438793401026e-07, "loss": 0.2734, "step": 18961 }, { "epoch": 2.718956122741612, "grad_norm": 0.26557353138923645, "learning_rate": 2.649863418658766e-07, "loss": 0.2806, "step": 18962 }, { "epoch": 2.719099512474907, "grad_norm": 0.2765669822692871, "learning_rate": 2.6471842761323863e-07, "loss": 0.257, "step": 18963 }, { "epoch": 2.719242902208202, "grad_norm": 0.2575458884239197, "learning_rate": 2.64450645183556e-07, "loss": 0.2898, "step": 18964 }, { "epoch": 2.719386291941497, "grad_norm": 0.2752246558666229, "learning_rate": 2.641829945842828e-07, "loss": 0.2995, "step": 18965 }, { "epoch": 2.719529681674792, "grad_norm": 0.26451796293258667, "learning_rate": 2.639154758228696e-07, "loss": 0.2837, "step": 18966 }, { "epoch": 2.7196730714080872, "grad_norm": 0.2761358320713043, "learning_rate": 2.6364808890676387e-07, "loss": 0.2801, "step": 18967 }, { "epoch": 2.719816461141382, "grad_norm": 0.2756084203720093, "learning_rate": 2.6338083384341016e-07, "loss": 0.2752, "step": 18968 }, { "epoch": 2.7199598508746776, "grad_norm": 0.2667330205440521, "learning_rate": 2.631137106402476e-07, "loss": 0.2792, "step": 18969 }, { "epoch": 2.7201032406079726, "grad_norm": 0.264321506023407, "learning_rate": 2.6284671930471297e-07, "loss": 0.2704, "step": 18970 }, { "epoch": 2.7202466303412676, "grad_norm": 0.27648112177848816, "learning_rate": 2.625798598442381e-07, "loss": 0.2759, "step": 18971 }, { "epoch": 2.7203900200745625, "grad_norm": 0.2779375910758972, "learning_rate": 2.6231313226625253e-07, "loss": 0.2834, "step": 18972 }, { "epoch": 2.7205334098078575, "grad_norm": 0.25911685824394226, "learning_rate": 2.62046536578181e-07, "loss": 0.2791, "step": 18973 }, { "epoch": 2.720676799541153, "grad_norm": 0.2683616876602173, "learning_rate": 2.617800727874459e-07, "loss": 0.2996, "step": 18974 }, { "epoch": 2.720820189274448, "grad_norm": 0.2685355842113495, "learning_rate": 2.6151374090146565e-07, "loss": 0.2852, "step": 18975 }, { "epoch": 2.720963579007743, "grad_norm": 0.2632482051849365, "learning_rate": 2.612475409276527e-07, "loss": 0.2705, "step": 18976 }, { "epoch": 2.7211069687410383, "grad_norm": 0.2581298351287842, "learning_rate": 2.609814728734189e-07, "loss": 0.272, "step": 18977 }, { "epoch": 2.7212503584743333, "grad_norm": 0.2744193375110626, "learning_rate": 2.6071553674617057e-07, "loss": 0.2705, "step": 18978 }, { "epoch": 2.7213937482076282, "grad_norm": 0.2795672118663788, "learning_rate": 2.6044973255331174e-07, "loss": 0.2828, "step": 18979 }, { "epoch": 2.7215371379409232, "grad_norm": 0.27732372283935547, "learning_rate": 2.6018406030224094e-07, "loss": 0.2669, "step": 18980 }, { "epoch": 2.7216805276742186, "grad_norm": 0.261652410030365, "learning_rate": 2.599185200003562e-07, "loss": 0.2769, "step": 18981 }, { "epoch": 2.7218239174075136, "grad_norm": 0.2713529169559479, "learning_rate": 2.596531116550466e-07, "loss": 0.282, "step": 18982 }, { "epoch": 2.7219673071408086, "grad_norm": 0.24939705431461334, "learning_rate": 2.593878352737028e-07, "loss": 0.2876, "step": 18983 }, { "epoch": 2.722110696874104, "grad_norm": 0.2641223669052124, "learning_rate": 2.59122690863709e-07, "loss": 0.2972, "step": 18984 }, { "epoch": 2.722254086607399, "grad_norm": 0.25542008876800537, "learning_rate": 2.5885767843244703e-07, "loss": 0.2783, "step": 18985 }, { "epoch": 2.722397476340694, "grad_norm": 0.2620999217033386, "learning_rate": 2.5859279798729485e-07, "loss": 0.2766, "step": 18986 }, { "epoch": 2.722540866073989, "grad_norm": 0.2704363167285919, "learning_rate": 2.5832804953562496e-07, "loss": 0.2808, "step": 18987 }, { "epoch": 2.7226842558072843, "grad_norm": 0.281674861907959, "learning_rate": 2.580634330848075e-07, "loss": 0.2662, "step": 18988 }, { "epoch": 2.7228276455405793, "grad_norm": 0.2682898938655853, "learning_rate": 2.5779894864221e-07, "loss": 0.2613, "step": 18989 }, { "epoch": 2.7229710352738743, "grad_norm": 0.2766127288341522, "learning_rate": 2.575345962151948e-07, "loss": 0.2849, "step": 18990 }, { "epoch": 2.7231144250071697, "grad_norm": 0.2677706182003021, "learning_rate": 2.5727037581112114e-07, "loss": 0.286, "step": 18991 }, { "epoch": 2.7232578147404647, "grad_norm": 0.27411484718322754, "learning_rate": 2.570062874373452e-07, "loss": 0.2659, "step": 18992 }, { "epoch": 2.7234012044737597, "grad_norm": 0.2786683142185211, "learning_rate": 2.5674233110121794e-07, "loss": 0.2863, "step": 18993 }, { "epoch": 2.7235445942070546, "grad_norm": 0.24505147337913513, "learning_rate": 2.5647850681008723e-07, "loss": 0.2732, "step": 18994 }, { "epoch": 2.7236879839403496, "grad_norm": 0.27242815494537354, "learning_rate": 2.562148145712978e-07, "loss": 0.2668, "step": 18995 }, { "epoch": 2.723831373673645, "grad_norm": 0.25951698422431946, "learning_rate": 2.5595125439219103e-07, "loss": 0.279, "step": 18996 }, { "epoch": 2.72397476340694, "grad_norm": 0.2579039931297302, "learning_rate": 2.5568782628010383e-07, "loss": 0.3049, "step": 18997 }, { "epoch": 2.7241181531402354, "grad_norm": 0.25499141216278076, "learning_rate": 2.554245302423691e-07, "loss": 0.2837, "step": 18998 }, { "epoch": 2.7242615428735304, "grad_norm": 0.26782408356666565, "learning_rate": 2.551613662863167e-07, "loss": 0.2841, "step": 18999 }, { "epoch": 2.7244049326068254, "grad_norm": 0.26294276118278503, "learning_rate": 2.5489833441927345e-07, "loss": 0.2704, "step": 19000 }, { "epoch": 2.7245483223401203, "grad_norm": 0.2723701000213623, "learning_rate": 2.5463543464856076e-07, "loss": 0.2712, "step": 19001 }, { "epoch": 2.7246917120734153, "grad_norm": 0.2702106833457947, "learning_rate": 2.5437266698149774e-07, "loss": 0.2711, "step": 19002 }, { "epoch": 2.7248351018067107, "grad_norm": 0.25771617889404297, "learning_rate": 2.541100314253997e-07, "loss": 0.2816, "step": 19003 }, { "epoch": 2.7249784915400057, "grad_norm": 0.2589498460292816, "learning_rate": 2.5384752798757794e-07, "loss": 0.2814, "step": 19004 }, { "epoch": 2.7251218812733007, "grad_norm": 0.2681945860385895, "learning_rate": 2.5358515667534e-07, "loss": 0.2681, "step": 19005 }, { "epoch": 2.725265271006596, "grad_norm": 0.2580878436565399, "learning_rate": 2.5332291749599003e-07, "loss": 0.2719, "step": 19006 }, { "epoch": 2.725408660739891, "grad_norm": 0.25466135144233704, "learning_rate": 2.530608104568283e-07, "loss": 0.2756, "step": 19007 }, { "epoch": 2.725552050473186, "grad_norm": 0.2761548161506653, "learning_rate": 2.527988355651517e-07, "loss": 0.2959, "step": 19008 }, { "epoch": 2.725695440206481, "grad_norm": 0.2587999105453491, "learning_rate": 2.5253699282825285e-07, "loss": 0.2892, "step": 19009 }, { "epoch": 2.7258388299397764, "grad_norm": 0.28582659363746643, "learning_rate": 2.522752822534208e-07, "loss": 0.274, "step": 19010 }, { "epoch": 2.7259822196730714, "grad_norm": 0.25666457414627075, "learning_rate": 2.520137038479414e-07, "loss": 0.2936, "step": 19011 }, { "epoch": 2.7261256094063664, "grad_norm": 0.2587989270687103, "learning_rate": 2.517522576190973e-07, "loss": 0.2837, "step": 19012 }, { "epoch": 2.726268999139662, "grad_norm": 0.26895010471343994, "learning_rate": 2.514909435741664e-07, "loss": 0.2797, "step": 19013 }, { "epoch": 2.726412388872957, "grad_norm": 0.2992308437824249, "learning_rate": 2.5122976172042237e-07, "loss": 0.2716, "step": 19014 }, { "epoch": 2.7265557786062518, "grad_norm": 0.2708776593208313, "learning_rate": 2.5096871206513664e-07, "loss": 0.2918, "step": 19015 }, { "epoch": 2.7266991683395467, "grad_norm": 0.2626526355743408, "learning_rate": 2.507077946155767e-07, "loss": 0.2817, "step": 19016 }, { "epoch": 2.726842558072842, "grad_norm": 0.2623720169067383, "learning_rate": 2.5044700937900624e-07, "loss": 0.2636, "step": 19017 }, { "epoch": 2.726985947806137, "grad_norm": 0.28004926443099976, "learning_rate": 2.5018635636268496e-07, "loss": 0.2703, "step": 19018 }, { "epoch": 2.727129337539432, "grad_norm": 0.26604774594306946, "learning_rate": 2.4992583557386873e-07, "loss": 0.2887, "step": 19019 }, { "epoch": 2.7272727272727275, "grad_norm": 0.26755183935165405, "learning_rate": 2.4966544701981013e-07, "loss": 0.2895, "step": 19020 }, { "epoch": 2.7274161170060225, "grad_norm": 0.2618410289287567, "learning_rate": 2.4940519070775826e-07, "loss": 0.2753, "step": 19021 }, { "epoch": 2.7275595067393175, "grad_norm": 0.24906527996063232, "learning_rate": 2.4914506664495796e-07, "loss": 0.2799, "step": 19022 }, { "epoch": 2.7277028964726124, "grad_norm": 0.2701036334037781, "learning_rate": 2.4888507483865063e-07, "loss": 0.2823, "step": 19023 }, { "epoch": 2.7278462862059074, "grad_norm": 0.28257572650909424, "learning_rate": 2.48625215296075e-07, "loss": 0.2841, "step": 19024 }, { "epoch": 2.727989675939203, "grad_norm": 0.2544114589691162, "learning_rate": 2.4836548802446404e-07, "loss": 0.2661, "step": 19025 }, { "epoch": 2.728133065672498, "grad_norm": 0.2692151367664337, "learning_rate": 2.4810589303104815e-07, "loss": 0.2736, "step": 19026 }, { "epoch": 2.728276455405793, "grad_norm": 0.26370155811309814, "learning_rate": 2.478464303230549e-07, "loss": 0.278, "step": 19027 }, { "epoch": 2.728419845139088, "grad_norm": 0.27691423892974854, "learning_rate": 2.4758709990770626e-07, "loss": 0.3042, "step": 19028 }, { "epoch": 2.728563234872383, "grad_norm": 0.2696586549282074, "learning_rate": 2.4732790179222313e-07, "loss": 0.2754, "step": 19029 }, { "epoch": 2.728706624605678, "grad_norm": 0.26095452904701233, "learning_rate": 2.4706883598381967e-07, "loss": 0.2986, "step": 19030 }, { "epoch": 2.728850014338973, "grad_norm": 0.2771945595741272, "learning_rate": 2.468099024897086e-07, "loss": 0.2727, "step": 19031 }, { "epoch": 2.7289934040722685, "grad_norm": 0.25898605585098267, "learning_rate": 2.465511013170979e-07, "loss": 0.2891, "step": 19032 }, { "epoch": 2.7291367938055635, "grad_norm": 0.2612774968147278, "learning_rate": 2.462924324731919e-07, "loss": 0.2704, "step": 19033 }, { "epoch": 2.7292801835388585, "grad_norm": 0.27682143449783325, "learning_rate": 2.4603389596519254e-07, "loss": 0.2771, "step": 19034 }, { "epoch": 2.729423573272154, "grad_norm": 0.25456753373146057, "learning_rate": 2.457754918002969e-07, "loss": 0.2964, "step": 19035 }, { "epoch": 2.729566963005449, "grad_norm": 0.2585645318031311, "learning_rate": 2.4551721998569813e-07, "loss": 0.2802, "step": 19036 }, { "epoch": 2.729710352738744, "grad_norm": 0.2545549273490906, "learning_rate": 2.4525908052858536e-07, "loss": 0.2742, "step": 19037 }, { "epoch": 2.729853742472039, "grad_norm": 0.28493180871009827, "learning_rate": 2.4500107343614633e-07, "loss": 0.2813, "step": 19038 }, { "epoch": 2.7299971322053342, "grad_norm": 0.2623644173145294, "learning_rate": 2.4474319871556185e-07, "loss": 0.2788, "step": 19039 }, { "epoch": 2.730140521938629, "grad_norm": 0.26377755403518677, "learning_rate": 2.4448545637401287e-07, "loss": 0.2753, "step": 19040 }, { "epoch": 2.730283911671924, "grad_norm": 0.26873207092285156, "learning_rate": 2.442278464186731e-07, "loss": 0.2792, "step": 19041 }, { "epoch": 2.7304273014052196, "grad_norm": 0.27609822154045105, "learning_rate": 2.439703688567141e-07, "loss": 0.2732, "step": 19042 }, { "epoch": 2.7305706911385146, "grad_norm": 0.2552575469017029, "learning_rate": 2.437130236953045e-07, "loss": 0.2782, "step": 19043 }, { "epoch": 2.7307140808718096, "grad_norm": 0.2670683264732361, "learning_rate": 2.4345581094160685e-07, "loss": 0.2763, "step": 19044 }, { "epoch": 2.7308574706051045, "grad_norm": 0.28374066948890686, "learning_rate": 2.4319873060278333e-07, "loss": 0.2897, "step": 19045 }, { "epoch": 2.7310008603383995, "grad_norm": 0.2666372060775757, "learning_rate": 2.429417826859898e-07, "loss": 0.2686, "step": 19046 }, { "epoch": 2.731144250071695, "grad_norm": 0.2776396572589874, "learning_rate": 2.4268496719837884e-07, "loss": 0.2739, "step": 19047 }, { "epoch": 2.73128763980499, "grad_norm": 0.2758757472038269, "learning_rate": 2.424282841471004e-07, "loss": 0.2773, "step": 19048 }, { "epoch": 2.7314310295382853, "grad_norm": 0.27384623885154724, "learning_rate": 2.4217173353930036e-07, "loss": 0.2862, "step": 19049 }, { "epoch": 2.7315744192715803, "grad_norm": 0.2610374391078949, "learning_rate": 2.4191531538211963e-07, "loss": 0.2773, "step": 19050 }, { "epoch": 2.7317178090048753, "grad_norm": 0.26076066493988037, "learning_rate": 2.4165902968269874e-07, "loss": 0.268, "step": 19051 }, { "epoch": 2.7318611987381702, "grad_norm": 0.2692798674106598, "learning_rate": 2.414028764481696e-07, "loss": 0.2764, "step": 19052 }, { "epoch": 2.732004588471465, "grad_norm": 0.2644728422164917, "learning_rate": 2.411468556856639e-07, "loss": 0.2899, "step": 19053 }, { "epoch": 2.7321479782047606, "grad_norm": 0.25797197222709656, "learning_rate": 2.408909674023097e-07, "loss": 0.2783, "step": 19054 }, { "epoch": 2.7322913679380556, "grad_norm": 0.2728690803050995, "learning_rate": 2.406352116052302e-07, "loss": 0.303, "step": 19055 }, { "epoch": 2.7324347576713506, "grad_norm": 0.25789251923561096, "learning_rate": 2.403795883015447e-07, "loss": 0.2778, "step": 19056 }, { "epoch": 2.732578147404646, "grad_norm": 0.26753130555152893, "learning_rate": 2.401240974983704e-07, "loss": 0.2892, "step": 19057 }, { "epoch": 2.732721537137941, "grad_norm": 0.26471179723739624, "learning_rate": 2.398687392028187e-07, "loss": 0.2713, "step": 19058 }, { "epoch": 2.732864926871236, "grad_norm": 0.26872509717941284, "learning_rate": 2.39613513421999e-07, "loss": 0.284, "step": 19059 }, { "epoch": 2.733008316604531, "grad_norm": 0.2745152711868286, "learning_rate": 2.3935842016301613e-07, "loss": 0.2822, "step": 19060 }, { "epoch": 2.7331517063378263, "grad_norm": 0.2702047824859619, "learning_rate": 2.3910345943297166e-07, "loss": 0.2844, "step": 19061 }, { "epoch": 2.7332950960711213, "grad_norm": 0.26824721693992615, "learning_rate": 2.388486312389637e-07, "loss": 0.2772, "step": 19062 }, { "epoch": 2.7334384858044163, "grad_norm": 0.28237900137901306, "learning_rate": 2.3859393558808566e-07, "loss": 0.2856, "step": 19063 }, { "epoch": 2.7335818755377117, "grad_norm": 0.28652775287628174, "learning_rate": 2.3833937248742778e-07, "loss": 0.2831, "step": 19064 }, { "epoch": 2.7337252652710067, "grad_norm": 0.2677241861820221, "learning_rate": 2.3808494194407672e-07, "loss": 0.2743, "step": 19065 }, { "epoch": 2.7338686550043017, "grad_norm": 0.24439263343811035, "learning_rate": 2.3783064396511623e-07, "loss": 0.2616, "step": 19066 }, { "epoch": 2.7340120447375966, "grad_norm": 0.2737894058227539, "learning_rate": 2.375764785576251e-07, "loss": 0.2732, "step": 19067 }, { "epoch": 2.734155434470892, "grad_norm": 0.2537834346294403, "learning_rate": 2.3732244572867824e-07, "loss": 0.2818, "step": 19068 }, { "epoch": 2.734298824204187, "grad_norm": 0.2478361874818802, "learning_rate": 2.370685454853483e-07, "loss": 0.2872, "step": 19069 }, { "epoch": 2.734442213937482, "grad_norm": 0.26076826453208923, "learning_rate": 2.3681477783470351e-07, "loss": 0.2707, "step": 19070 }, { "epoch": 2.7345856036707774, "grad_norm": 0.26798340678215027, "learning_rate": 2.3656114278380772e-07, "loss": 0.2769, "step": 19071 }, { "epoch": 2.7347289934040724, "grad_norm": 0.2671459913253784, "learning_rate": 2.3630764033972243e-07, "loss": 0.296, "step": 19072 }, { "epoch": 2.7348723831373674, "grad_norm": 0.2605930268764496, "learning_rate": 2.360542705095048e-07, "loss": 0.2746, "step": 19073 }, { "epoch": 2.7350157728706623, "grad_norm": 0.2662355601787567, "learning_rate": 2.3580103330020753e-07, "loss": 0.2857, "step": 19074 }, { "epoch": 2.7351591626039573, "grad_norm": 0.26872849464416504, "learning_rate": 2.3554792871888111e-07, "loss": 0.2872, "step": 19075 }, { "epoch": 2.7353025523372527, "grad_norm": 0.26935485005378723, "learning_rate": 2.3529495677257096e-07, "loss": 0.2886, "step": 19076 }, { "epoch": 2.7354459420705477, "grad_norm": 0.27075016498565674, "learning_rate": 2.3504211746831985e-07, "loss": 0.2675, "step": 19077 }, { "epoch": 2.735589331803843, "grad_norm": 0.25931859016418457, "learning_rate": 2.3478941081316654e-07, "loss": 0.2866, "step": 19078 }, { "epoch": 2.735732721537138, "grad_norm": 0.27487877011299133, "learning_rate": 2.345368368141454e-07, "loss": 0.2963, "step": 19079 }, { "epoch": 2.735876111270433, "grad_norm": 0.27644285559654236, "learning_rate": 2.3428439547828807e-07, "loss": 0.2872, "step": 19080 }, { "epoch": 2.736019501003728, "grad_norm": 0.28372085094451904, "learning_rate": 2.3403208681262224e-07, "loss": 0.2799, "step": 19081 }, { "epoch": 2.736162890737023, "grad_norm": 0.3031657636165619, "learning_rate": 2.3377991082417172e-07, "loss": 0.2821, "step": 19082 }, { "epoch": 2.7363062804703184, "grad_norm": 0.26385506987571716, "learning_rate": 2.3352786751995704e-07, "loss": 0.2767, "step": 19083 }, { "epoch": 2.7364496702036134, "grad_norm": 0.3054744005203247, "learning_rate": 2.3327595690699423e-07, "loss": 0.2711, "step": 19084 }, { "epoch": 2.7365930599369084, "grad_norm": 0.2940986454486847, "learning_rate": 2.3302417899229546e-07, "loss": 0.2977, "step": 19085 }, { "epoch": 2.736736449670204, "grad_norm": 0.27500277757644653, "learning_rate": 2.3277253378287122e-07, "loss": 0.27, "step": 19086 }, { "epoch": 2.7368798394034988, "grad_norm": 0.26907774806022644, "learning_rate": 2.3252102128572596e-07, "loss": 0.277, "step": 19087 }, { "epoch": 2.7370232291367937, "grad_norm": 0.25513285398483276, "learning_rate": 2.322696415078618e-07, "loss": 0.2746, "step": 19088 }, { "epoch": 2.7371666188700887, "grad_norm": 0.2614436149597168, "learning_rate": 2.3201839445627704e-07, "loss": 0.2767, "step": 19089 }, { "epoch": 2.737310008603384, "grad_norm": 0.2598949372768402, "learning_rate": 2.3176728013796501e-07, "loss": 0.2654, "step": 19090 }, { "epoch": 2.737453398336679, "grad_norm": 0.251021146774292, "learning_rate": 2.315162985599173e-07, "loss": 0.2914, "step": 19091 }, { "epoch": 2.737596788069974, "grad_norm": 0.28312239050865173, "learning_rate": 2.3126544972912058e-07, "loss": 0.2761, "step": 19092 }, { "epoch": 2.7377401778032695, "grad_norm": 0.27160871028900146, "learning_rate": 2.3101473365255756e-07, "loss": 0.2789, "step": 19093 }, { "epoch": 2.7378835675365645, "grad_norm": 0.2565254271030426, "learning_rate": 2.3076415033720877e-07, "loss": 0.2795, "step": 19094 }, { "epoch": 2.7380269572698595, "grad_norm": 0.2702280879020691, "learning_rate": 2.3051369979004922e-07, "loss": 0.2861, "step": 19095 }, { "epoch": 2.7381703470031544, "grad_norm": 0.26666760444641113, "learning_rate": 2.3026338201805166e-07, "loss": 0.2744, "step": 19096 }, { "epoch": 2.7383137367364494, "grad_norm": 0.2724059820175171, "learning_rate": 2.3001319702818437e-07, "loss": 0.2475, "step": 19097 }, { "epoch": 2.738457126469745, "grad_norm": 0.2621314823627472, "learning_rate": 2.297631448274118e-07, "loss": 0.28, "step": 19098 }, { "epoch": 2.73860051620304, "grad_norm": 0.26351597905158997, "learning_rate": 2.2951322542269561e-07, "loss": 0.2859, "step": 19099 }, { "epoch": 2.738743905936335, "grad_norm": 0.27397435903549194, "learning_rate": 2.2926343882099355e-07, "loss": 0.2642, "step": 19100 }, { "epoch": 2.73888729566963, "grad_norm": 0.25684046745300293, "learning_rate": 2.2901378502925731e-07, "loss": 0.2628, "step": 19101 }, { "epoch": 2.739030685402925, "grad_norm": 0.2739541232585907, "learning_rate": 2.2876426405443852e-07, "loss": 0.2801, "step": 19102 }, { "epoch": 2.73917407513622, "grad_norm": 0.27745890617370605, "learning_rate": 2.285148759034833e-07, "loss": 0.2936, "step": 19103 }, { "epoch": 2.739317464869515, "grad_norm": 0.2792493402957916, "learning_rate": 2.282656205833339e-07, "loss": 0.2898, "step": 19104 }, { "epoch": 2.7394608546028105, "grad_norm": 0.27098262310028076, "learning_rate": 2.2801649810092975e-07, "loss": 0.2746, "step": 19105 }, { "epoch": 2.7396042443361055, "grad_norm": 0.2928994596004486, "learning_rate": 2.2776750846320528e-07, "loss": 0.3106, "step": 19106 }, { "epoch": 2.7397476340694005, "grad_norm": 0.26999083161354065, "learning_rate": 2.275186516770922e-07, "loss": 0.2742, "step": 19107 }, { "epoch": 2.739891023802696, "grad_norm": 0.2605553865432739, "learning_rate": 2.2726992774951828e-07, "loss": 0.2766, "step": 19108 }, { "epoch": 2.740034413535991, "grad_norm": 0.2800056040287018, "learning_rate": 2.2702133668740745e-07, "loss": 0.2741, "step": 19109 }, { "epoch": 2.740177803269286, "grad_norm": 0.2720387578010559, "learning_rate": 2.2677287849768082e-07, "loss": 0.2533, "step": 19110 }, { "epoch": 2.740321193002581, "grad_norm": 0.26475444436073303, "learning_rate": 2.2652455318725452e-07, "loss": 0.2699, "step": 19111 }, { "epoch": 2.7404645827358762, "grad_norm": 0.2637031376361847, "learning_rate": 2.2627636076304194e-07, "loss": 0.2684, "step": 19112 }, { "epoch": 2.740607972469171, "grad_norm": 0.2663588523864746, "learning_rate": 2.2602830123195142e-07, "loss": 0.2756, "step": 19113 }, { "epoch": 2.740751362202466, "grad_norm": 0.2525522708892822, "learning_rate": 2.2578037460088963e-07, "loss": 0.2708, "step": 19114 }, { "epoch": 2.7408947519357616, "grad_norm": 0.27180349826812744, "learning_rate": 2.2553258087675832e-07, "loss": 0.2798, "step": 19115 }, { "epoch": 2.7410381416690566, "grad_norm": 0.2644978165626526, "learning_rate": 2.2528492006645587e-07, "loss": 0.2806, "step": 19116 }, { "epoch": 2.7411815314023515, "grad_norm": 0.2914731204509735, "learning_rate": 2.2503739217687615e-07, "loss": 0.2771, "step": 19117 }, { "epoch": 2.7413249211356465, "grad_norm": 0.25987666845321655, "learning_rate": 2.247899972149098e-07, "loss": 0.271, "step": 19118 }, { "epoch": 2.741468310868942, "grad_norm": 0.26444828510284424, "learning_rate": 2.2454273518744407e-07, "loss": 0.2838, "step": 19119 }, { "epoch": 2.741611700602237, "grad_norm": 0.2636229991912842, "learning_rate": 2.242956061013629e-07, "loss": 0.2609, "step": 19120 }, { "epoch": 2.741755090335532, "grad_norm": 0.25358453392982483, "learning_rate": 2.2404860996354583e-07, "loss": 0.2751, "step": 19121 }, { "epoch": 2.7418984800688273, "grad_norm": 0.2693180441856384, "learning_rate": 2.238017467808684e-07, "loss": 0.2983, "step": 19122 }, { "epoch": 2.7420418698021223, "grad_norm": 0.2797333896160126, "learning_rate": 2.235550165602035e-07, "loss": 0.2784, "step": 19123 }, { "epoch": 2.7421852595354173, "grad_norm": 0.28639429807662964, "learning_rate": 2.2330841930841895e-07, "loss": 0.2866, "step": 19124 }, { "epoch": 2.7423286492687122, "grad_norm": 0.2723079323768616, "learning_rate": 2.2306195503237983e-07, "loss": 0.2621, "step": 19125 }, { "epoch": 2.742472039002007, "grad_norm": 0.2502460181713104, "learning_rate": 2.2281562373894782e-07, "loss": 0.2827, "step": 19126 }, { "epoch": 2.7426154287353026, "grad_norm": 0.2688086926937103, "learning_rate": 2.2256942543498085e-07, "loss": 0.2989, "step": 19127 }, { "epoch": 2.7427588184685976, "grad_norm": 0.2690618634223938, "learning_rate": 2.2232336012733113e-07, "loss": 0.2915, "step": 19128 }, { "epoch": 2.742902208201893, "grad_norm": 0.3036329448223114, "learning_rate": 2.2207742782284936e-07, "loss": 0.2763, "step": 19129 }, { "epoch": 2.743045597935188, "grad_norm": 0.27235665917396545, "learning_rate": 2.218316285283828e-07, "loss": 0.2799, "step": 19130 }, { "epoch": 2.743188987668483, "grad_norm": 0.285500705242157, "learning_rate": 2.2158596225077377e-07, "loss": 0.2831, "step": 19131 }, { "epoch": 2.743332377401778, "grad_norm": 0.27996617555618286, "learning_rate": 2.2134042899686126e-07, "loss": 0.2885, "step": 19132 }, { "epoch": 2.743475767135073, "grad_norm": 0.28262853622436523, "learning_rate": 2.210950287734792e-07, "loss": 0.2928, "step": 19133 }, { "epoch": 2.7436191568683683, "grad_norm": 0.271562784910202, "learning_rate": 2.2084976158746052e-07, "loss": 0.3038, "step": 19134 }, { "epoch": 2.7437625466016633, "grad_norm": 0.26973479986190796, "learning_rate": 2.2060462744563304e-07, "loss": 0.2799, "step": 19135 }, { "epoch": 2.7439059363349583, "grad_norm": 0.2577759325504303, "learning_rate": 2.203596263548202e-07, "loss": 0.2903, "step": 19136 }, { "epoch": 2.7440493260682537, "grad_norm": 0.2694447338581085, "learning_rate": 2.201147583218438e-07, "loss": 0.2701, "step": 19137 }, { "epoch": 2.7441927158015487, "grad_norm": 0.26890647411346436, "learning_rate": 2.1987002335351947e-07, "loss": 0.2883, "step": 19138 }, { "epoch": 2.7443361055348436, "grad_norm": 0.2715364396572113, "learning_rate": 2.1962542145665955e-07, "loss": 0.2641, "step": 19139 }, { "epoch": 2.7444794952681386, "grad_norm": 0.27254799008369446, "learning_rate": 2.1938095263807525e-07, "loss": 0.2923, "step": 19140 }, { "epoch": 2.744622885001434, "grad_norm": 0.2707917094230652, "learning_rate": 2.1913661690457054e-07, "loss": 0.2758, "step": 19141 }, { "epoch": 2.744766274734729, "grad_norm": 0.2729191780090332, "learning_rate": 2.1889241426294837e-07, "loss": 0.2946, "step": 19142 }, { "epoch": 2.744909664468024, "grad_norm": 0.2719533443450928, "learning_rate": 2.1864834472000717e-07, "loss": 0.2774, "step": 19143 }, { "epoch": 2.7450530542013194, "grad_norm": 0.2556860148906708, "learning_rate": 2.1840440828254039e-07, "loss": 0.285, "step": 19144 }, { "epoch": 2.7451964439346144, "grad_norm": 0.2698756158351898, "learning_rate": 2.1816060495733925e-07, "loss": 0.3029, "step": 19145 }, { "epoch": 2.7453398336679093, "grad_norm": 0.25780677795410156, "learning_rate": 2.1791693475119115e-07, "loss": 0.2885, "step": 19146 }, { "epoch": 2.7454832234012043, "grad_norm": 0.2549867630004883, "learning_rate": 2.1767339767087948e-07, "loss": 0.2836, "step": 19147 }, { "epoch": 2.7456266131344993, "grad_norm": 0.2774147689342499, "learning_rate": 2.1742999372318385e-07, "loss": 0.2749, "step": 19148 }, { "epoch": 2.7457700028677947, "grad_norm": 0.25323620438575745, "learning_rate": 2.1718672291487996e-07, "loss": 0.2841, "step": 19149 }, { "epoch": 2.7459133926010897, "grad_norm": 0.2669677138328552, "learning_rate": 2.1694358525274073e-07, "loss": 0.2873, "step": 19150 }, { "epoch": 2.746056782334385, "grad_norm": 0.2681700587272644, "learning_rate": 2.1670058074353406e-07, "loss": 0.2866, "step": 19151 }, { "epoch": 2.74620017206768, "grad_norm": 0.27497613430023193, "learning_rate": 2.164577093940251e-07, "loss": 0.2924, "step": 19152 }, { "epoch": 2.746343561800975, "grad_norm": 0.27324795722961426, "learning_rate": 2.162149712109751e-07, "loss": 0.2883, "step": 19153 }, { "epoch": 2.74648695153427, "grad_norm": 0.279127299785614, "learning_rate": 2.1597236620114259e-07, "loss": 0.2636, "step": 19154 }, { "epoch": 2.746630341267565, "grad_norm": 0.2769262194633484, "learning_rate": 2.1572989437127879e-07, "loss": 0.2823, "step": 19155 }, { "epoch": 2.7467737310008604, "grad_norm": 0.2589435577392578, "learning_rate": 2.1548755572813552e-07, "loss": 0.2728, "step": 19156 }, { "epoch": 2.7469171207341554, "grad_norm": 0.25583314895629883, "learning_rate": 2.152453502784585e-07, "loss": 0.3081, "step": 19157 }, { "epoch": 2.7470605104674504, "grad_norm": 0.26300424337387085, "learning_rate": 2.1500327802899122e-07, "loss": 0.2608, "step": 19158 }, { "epoch": 2.747203900200746, "grad_norm": 0.26857367157936096, "learning_rate": 2.1476133898647223e-07, "loss": 0.2792, "step": 19159 }, { "epoch": 2.7473472899340408, "grad_norm": 0.25863558053970337, "learning_rate": 2.1451953315763552e-07, "loss": 0.2761, "step": 19160 }, { "epoch": 2.7474906796673357, "grad_norm": 0.28891634941101074, "learning_rate": 2.1427786054921407e-07, "loss": 0.2857, "step": 19161 }, { "epoch": 2.7476340694006307, "grad_norm": 0.27953556180000305, "learning_rate": 2.140363211679347e-07, "loss": 0.2595, "step": 19162 }, { "epoch": 2.747777459133926, "grad_norm": 0.2795710265636444, "learning_rate": 2.1379491502052207e-07, "loss": 0.2946, "step": 19163 }, { "epoch": 2.747920848867221, "grad_norm": 0.2679044306278229, "learning_rate": 2.135536421136969e-07, "loss": 0.2904, "step": 19164 }, { "epoch": 2.748064238600516, "grad_norm": 0.26793527603149414, "learning_rate": 2.133125024541749e-07, "loss": 0.2741, "step": 19165 }, { "epoch": 2.7482076283338115, "grad_norm": 0.2768501937389374, "learning_rate": 2.130714960486696e-07, "loss": 0.2896, "step": 19166 }, { "epoch": 2.7483510180671065, "grad_norm": 0.25926756858825684, "learning_rate": 2.1283062290389068e-07, "loss": 0.2751, "step": 19167 }, { "epoch": 2.7484944078004014, "grad_norm": 0.2693321704864502, "learning_rate": 2.125898830265427e-07, "loss": 0.2765, "step": 19168 }, { "epoch": 2.7486377975336964, "grad_norm": 0.27223020792007446, "learning_rate": 2.123492764233276e-07, "loss": 0.2673, "step": 19169 }, { "epoch": 2.748781187266992, "grad_norm": 0.2574272155761719, "learning_rate": 2.1210880310094495e-07, "loss": 0.2839, "step": 19170 }, { "epoch": 2.748924577000287, "grad_norm": 0.2978326082229614, "learning_rate": 2.1186846306608777e-07, "loss": 0.2786, "step": 19171 }, { "epoch": 2.749067966733582, "grad_norm": 0.2873396575450897, "learning_rate": 2.1162825632544625e-07, "loss": 0.2782, "step": 19172 }, { "epoch": 2.749211356466877, "grad_norm": 0.28281816840171814, "learning_rate": 2.1138818288570896e-07, "loss": 0.2842, "step": 19173 }, { "epoch": 2.749354746200172, "grad_norm": 0.2533956468105316, "learning_rate": 2.111482427535577e-07, "loss": 0.2846, "step": 19174 }, { "epoch": 2.749498135933467, "grad_norm": 0.26983150839805603, "learning_rate": 2.109084359356739e-07, "loss": 0.3012, "step": 19175 }, { "epoch": 2.749641525666762, "grad_norm": 0.2910747826099396, "learning_rate": 2.1066876243873157e-07, "loss": 0.2579, "step": 19176 }, { "epoch": 2.749784915400057, "grad_norm": 0.26555612683296204, "learning_rate": 2.1042922226940322e-07, "loss": 0.2794, "step": 19177 }, { "epoch": 2.7499283051333525, "grad_norm": 0.2587452232837677, "learning_rate": 2.101898154343579e-07, "loss": 0.2752, "step": 19178 }, { "epoch": 2.7500716948666475, "grad_norm": 0.2715988755226135, "learning_rate": 2.0995054194025978e-07, "loss": 0.2671, "step": 19179 }, { "epoch": 2.750215084599943, "grad_norm": 0.26868775486946106, "learning_rate": 2.0971140179377013e-07, "loss": 0.2755, "step": 19180 }, { "epoch": 2.750358474333238, "grad_norm": 0.2829989790916443, "learning_rate": 2.0947239500154649e-07, "loss": 0.2802, "step": 19181 }, { "epoch": 2.750501864066533, "grad_norm": 0.2648436427116394, "learning_rate": 2.0923352157024179e-07, "loss": 0.2759, "step": 19182 }, { "epoch": 2.750645253799828, "grad_norm": 0.2736935317516327, "learning_rate": 2.0899478150650688e-07, "loss": 0.2721, "step": 19183 }, { "epoch": 2.750788643533123, "grad_norm": 0.26784205436706543, "learning_rate": 2.0875617481698585e-07, "loss": 0.2801, "step": 19184 }, { "epoch": 2.750932033266418, "grad_norm": 0.2696950435638428, "learning_rate": 2.085177015083234e-07, "loss": 0.2864, "step": 19185 }, { "epoch": 2.751075422999713, "grad_norm": 0.2792324125766754, "learning_rate": 2.0827936158715868e-07, "loss": 0.282, "step": 19186 }, { "epoch": 2.751218812733008, "grad_norm": 0.26531311869621277, "learning_rate": 2.0804115506012413e-07, "loss": 0.3181, "step": 19187 }, { "epoch": 2.7513622024663036, "grad_norm": 0.2633002698421478, "learning_rate": 2.078030819338528e-07, "loss": 0.2694, "step": 19188 }, { "epoch": 2.7515055921995986, "grad_norm": 0.26621827483177185, "learning_rate": 2.075651422149716e-07, "loss": 0.2688, "step": 19189 }, { "epoch": 2.7516489819328935, "grad_norm": 0.2537405788898468, "learning_rate": 2.0732733591010523e-07, "loss": 0.2561, "step": 19190 }, { "epoch": 2.7517923716661885, "grad_norm": 0.27725356817245483, "learning_rate": 2.0708966302587342e-07, "loss": 0.2914, "step": 19191 }, { "epoch": 2.751935761399484, "grad_norm": 0.2726204991340637, "learning_rate": 2.0685212356889195e-07, "loss": 0.2844, "step": 19192 }, { "epoch": 2.752079151132779, "grad_norm": 0.27214348316192627, "learning_rate": 2.0661471754577444e-07, "loss": 0.2739, "step": 19193 }, { "epoch": 2.752222540866074, "grad_norm": 0.27511438727378845, "learning_rate": 2.0637744496312894e-07, "loss": 0.2499, "step": 19194 }, { "epoch": 2.7523659305993693, "grad_norm": 0.28232645988464355, "learning_rate": 2.0614030582756184e-07, "loss": 0.2992, "step": 19195 }, { "epoch": 2.7525093203326643, "grad_norm": 0.28919717669487, "learning_rate": 2.059033001456745e-07, "loss": 0.2877, "step": 19196 }, { "epoch": 2.7526527100659592, "grad_norm": 0.2772729992866516, "learning_rate": 2.05666427924065e-07, "loss": 0.2841, "step": 19197 }, { "epoch": 2.752796099799254, "grad_norm": 0.2789570093154907, "learning_rate": 2.0542968916932638e-07, "loss": 0.2828, "step": 19198 }, { "epoch": 2.7529394895325496, "grad_norm": 0.26825934648513794, "learning_rate": 2.0519308388805003e-07, "loss": 0.2852, "step": 19199 }, { "epoch": 2.7530828792658446, "grad_norm": 0.2749183475971222, "learning_rate": 2.0495661208682183e-07, "loss": 0.2826, "step": 19200 }, { "epoch": 2.7532262689991396, "grad_norm": 0.2478608787059784, "learning_rate": 2.0472027377222592e-07, "loss": 0.2779, "step": 19201 }, { "epoch": 2.753369658732435, "grad_norm": 0.2704136371612549, "learning_rate": 2.0448406895084094e-07, "loss": 0.2743, "step": 19202 }, { "epoch": 2.75351304846573, "grad_norm": 0.26480987668037415, "learning_rate": 2.0424799762924276e-07, "loss": 0.2846, "step": 19203 }, { "epoch": 2.753656438199025, "grad_norm": 0.2736962139606476, "learning_rate": 2.0401205981400273e-07, "loss": 0.2883, "step": 19204 }, { "epoch": 2.75379982793232, "grad_norm": 0.271665096282959, "learning_rate": 2.0377625551168957e-07, "loss": 0.274, "step": 19205 }, { "epoch": 2.753943217665615, "grad_norm": 0.2611429691314697, "learning_rate": 2.0354058472886739e-07, "loss": 0.2445, "step": 19206 }, { "epoch": 2.7540866073989103, "grad_norm": 0.2710186839103699, "learning_rate": 2.0330504747209711e-07, "loss": 0.2812, "step": 19207 }, { "epoch": 2.7542299971322053, "grad_norm": 0.2737439274787903, "learning_rate": 2.0306964374793624e-07, "loss": 0.2862, "step": 19208 }, { "epoch": 2.7543733868655007, "grad_norm": 0.27479472756385803, "learning_rate": 2.0283437356293678e-07, "loss": 0.2957, "step": 19209 }, { "epoch": 2.7545167765987957, "grad_norm": 0.2680504322052002, "learning_rate": 2.0259923692364902e-07, "loss": 0.2717, "step": 19210 }, { "epoch": 2.7546601663320907, "grad_norm": 0.26180288195610046, "learning_rate": 2.023642338366183e-07, "loss": 0.3039, "step": 19211 }, { "epoch": 2.7548035560653856, "grad_norm": 0.28343161940574646, "learning_rate": 2.021293643083877e-07, "loss": 0.2747, "step": 19212 }, { "epoch": 2.7549469457986806, "grad_norm": 0.27220338582992554, "learning_rate": 2.0189462834549534e-07, "loss": 0.2942, "step": 19213 }, { "epoch": 2.755090335531976, "grad_norm": 0.2535424530506134, "learning_rate": 2.0166002595447487e-07, "loss": 0.2708, "step": 19214 }, { "epoch": 2.755233725265271, "grad_norm": 0.267135351896286, "learning_rate": 2.014255571418583e-07, "loss": 0.2657, "step": 19215 }, { "epoch": 2.755377114998566, "grad_norm": 0.26295721530914307, "learning_rate": 2.0119122191417262e-07, "loss": 0.2581, "step": 19216 }, { "epoch": 2.7555205047318614, "grad_norm": 0.2837994694709778, "learning_rate": 2.009570202779415e-07, "loss": 0.2721, "step": 19217 }, { "epoch": 2.7556638944651564, "grad_norm": 0.2614782750606537, "learning_rate": 2.0072295223968476e-07, "loss": 0.2662, "step": 19218 }, { "epoch": 2.7558072841984513, "grad_norm": 0.2555781304836273, "learning_rate": 2.0048901780591823e-07, "loss": 0.2723, "step": 19219 }, { "epoch": 2.7559506739317463, "grad_norm": 0.25067514181137085, "learning_rate": 2.0025521698315398e-07, "loss": 0.2778, "step": 19220 }, { "epoch": 2.7560940636650417, "grad_norm": 0.27644044160842896, "learning_rate": 2.000215497779018e-07, "loss": 0.2916, "step": 19221 }, { "epoch": 2.7562374533983367, "grad_norm": 0.277019739151001, "learning_rate": 1.9978801619666532e-07, "loss": 0.2843, "step": 19222 }, { "epoch": 2.7563808431316317, "grad_norm": 0.2768750488758087, "learning_rate": 1.9955461624594663e-07, "loss": 0.2889, "step": 19223 }, { "epoch": 2.756524232864927, "grad_norm": 0.2581595480442047, "learning_rate": 1.9932134993224328e-07, "loss": 0.2851, "step": 19224 }, { "epoch": 2.756667622598222, "grad_norm": 0.2548566460609436, "learning_rate": 1.9908821726204842e-07, "loss": 0.2912, "step": 19225 }, { "epoch": 2.756811012331517, "grad_norm": 0.25827309489250183, "learning_rate": 1.9885521824185238e-07, "loss": 0.3055, "step": 19226 }, { "epoch": 2.756954402064812, "grad_norm": 0.2583523690700531, "learning_rate": 1.9862235287814168e-07, "loss": 0.2732, "step": 19227 }, { "epoch": 2.757097791798107, "grad_norm": 0.26393869519233704, "learning_rate": 1.9838962117739834e-07, "loss": 0.2697, "step": 19228 }, { "epoch": 2.7572411815314024, "grad_norm": 0.2659062147140503, "learning_rate": 1.9815702314610275e-07, "loss": 0.2879, "step": 19229 }, { "epoch": 2.7573845712646974, "grad_norm": 0.27364811301231384, "learning_rate": 1.9792455879072803e-07, "loss": 0.2879, "step": 19230 }, { "epoch": 2.757527960997993, "grad_norm": 0.2735760509967804, "learning_rate": 1.976922281177468e-07, "loss": 0.2945, "step": 19231 }, { "epoch": 2.7576713507312878, "grad_norm": 0.2678731679916382, "learning_rate": 1.9746003113362665e-07, "loss": 0.29, "step": 19232 }, { "epoch": 2.7578147404645827, "grad_norm": 0.2633747458457947, "learning_rate": 1.9722796784483133e-07, "loss": 0.2815, "step": 19233 }, { "epoch": 2.7579581301978777, "grad_norm": 0.2825380861759186, "learning_rate": 1.9699603825782177e-07, "loss": 0.2748, "step": 19234 }, { "epoch": 2.7581015199311727, "grad_norm": 0.26787394285202026, "learning_rate": 1.9676424237905445e-07, "loss": 0.2908, "step": 19235 }, { "epoch": 2.758244909664468, "grad_norm": 0.2736760973930359, "learning_rate": 1.965325802149809e-07, "loss": 0.2677, "step": 19236 }, { "epoch": 2.758388299397763, "grad_norm": 0.44002801179885864, "learning_rate": 1.963010517720515e-07, "loss": 0.2826, "step": 19237 }, { "epoch": 2.758531689131058, "grad_norm": 0.2759598195552826, "learning_rate": 1.9606965705671167e-07, "loss": 0.2646, "step": 19238 }, { "epoch": 2.7586750788643535, "grad_norm": 0.2777186632156372, "learning_rate": 1.9583839607540233e-07, "loss": 0.2831, "step": 19239 }, { "epoch": 2.7588184685976485, "grad_norm": 0.2736745774745941, "learning_rate": 1.9560726883456172e-07, "loss": 0.2699, "step": 19240 }, { "epoch": 2.7589618583309434, "grad_norm": 0.264628142118454, "learning_rate": 1.9537627534062465e-07, "loss": 0.2862, "step": 19241 }, { "epoch": 2.7591052480642384, "grad_norm": 0.26972582936286926, "learning_rate": 1.9514541560002098e-07, "loss": 0.269, "step": 19242 }, { "epoch": 2.759248637797534, "grad_norm": 0.28149405121803284, "learning_rate": 1.949146896191778e-07, "loss": 0.2818, "step": 19243 }, { "epoch": 2.759392027530829, "grad_norm": 0.24556636810302734, "learning_rate": 1.9468409740451777e-07, "loss": 0.2867, "step": 19244 }, { "epoch": 2.7595354172641238, "grad_norm": 0.28504592180252075, "learning_rate": 1.944536389624613e-07, "loss": 0.2739, "step": 19245 }, { "epoch": 2.759678806997419, "grad_norm": 0.2551411986351013, "learning_rate": 1.9422331429942266e-07, "loss": 0.2616, "step": 19246 }, { "epoch": 2.759822196730714, "grad_norm": 0.2618476450443268, "learning_rate": 1.9399312342181452e-07, "loss": 0.3045, "step": 19247 }, { "epoch": 2.759965586464009, "grad_norm": 0.26655128598213196, "learning_rate": 1.9376306633604459e-07, "loss": 0.2705, "step": 19248 }, { "epoch": 2.760108976197304, "grad_norm": 0.2521322965621948, "learning_rate": 1.9353314304851713e-07, "loss": 0.2755, "step": 19249 }, { "epoch": 2.7602523659305995, "grad_norm": 0.27109917998313904, "learning_rate": 1.9330335356563368e-07, "loss": 0.2895, "step": 19250 }, { "epoch": 2.7603957556638945, "grad_norm": 0.26422595977783203, "learning_rate": 1.9307369789379137e-07, "loss": 0.2671, "step": 19251 }, { "epoch": 2.7605391453971895, "grad_norm": 0.2727474272251129, "learning_rate": 1.9284417603938177e-07, "loss": 0.2933, "step": 19252 }, { "epoch": 2.760682535130485, "grad_norm": 0.27709299325942993, "learning_rate": 1.9261478800879586e-07, "loss": 0.287, "step": 19253 }, { "epoch": 2.76082592486378, "grad_norm": 0.27564603090286255, "learning_rate": 1.9238553380841906e-07, "loss": 0.2795, "step": 19254 }, { "epoch": 2.760969314597075, "grad_norm": 0.27168038487434387, "learning_rate": 1.921564134446341e-07, "loss": 0.2826, "step": 19255 }, { "epoch": 2.76111270433037, "grad_norm": 0.2700217664241791, "learning_rate": 1.9192742692381805e-07, "loss": 0.2664, "step": 19256 }, { "epoch": 2.761256094063665, "grad_norm": 0.26336124539375305, "learning_rate": 1.9169857425234638e-07, "loss": 0.2887, "step": 19257 }, { "epoch": 2.76139948379696, "grad_norm": 0.26473504304885864, "learning_rate": 1.914698554365896e-07, "loss": 0.2856, "step": 19258 }, { "epoch": 2.761542873530255, "grad_norm": 0.2538434565067291, "learning_rate": 1.912412704829153e-07, "loss": 0.2572, "step": 19259 }, { "epoch": 2.7616862632635506, "grad_norm": 0.27077236771583557, "learning_rate": 1.9101281939768734e-07, "loss": 0.2891, "step": 19260 }, { "epoch": 2.7618296529968456, "grad_norm": 0.270150363445282, "learning_rate": 1.9078450218726397e-07, "loss": 0.2859, "step": 19261 }, { "epoch": 2.7619730427301405, "grad_norm": 0.2536330819129944, "learning_rate": 1.9055631885800342e-07, "loss": 0.2736, "step": 19262 }, { "epoch": 2.7621164324634355, "grad_norm": 0.25385424494743347, "learning_rate": 1.903282694162556e-07, "loss": 0.3125, "step": 19263 }, { "epoch": 2.7622598221967305, "grad_norm": 0.2630299925804138, "learning_rate": 1.9010035386837045e-07, "loss": 0.2784, "step": 19264 }, { "epoch": 2.762403211930026, "grad_norm": 0.2851463854312897, "learning_rate": 1.8987257222069232e-07, "loss": 0.2729, "step": 19265 }, { "epoch": 2.762546601663321, "grad_norm": 0.27503639459609985, "learning_rate": 1.8964492447956228e-07, "loss": 0.2747, "step": 19266 }, { "epoch": 2.762689991396616, "grad_norm": 0.281733900308609, "learning_rate": 1.8941741065131859e-07, "loss": 0.2824, "step": 19267 }, { "epoch": 2.7628333811299113, "grad_norm": 0.25193655490875244, "learning_rate": 1.8919003074229337e-07, "loss": 0.2741, "step": 19268 }, { "epoch": 2.7629767708632063, "grad_norm": 0.26793891191482544, "learning_rate": 1.8896278475881712e-07, "loss": 0.3101, "step": 19269 }, { "epoch": 2.7631201605965012, "grad_norm": 0.26833638548851013, "learning_rate": 1.8873567270721648e-07, "loss": 0.2705, "step": 19270 }, { "epoch": 2.763263550329796, "grad_norm": 0.2717937231063843, "learning_rate": 1.885086945938136e-07, "loss": 0.2626, "step": 19271 }, { "epoch": 2.7634069400630916, "grad_norm": 0.2767457067966461, "learning_rate": 1.8828185042492787e-07, "loss": 0.2779, "step": 19272 }, { "epoch": 2.7635503297963866, "grad_norm": 0.2784159481525421, "learning_rate": 1.88055140206872e-07, "loss": 0.2636, "step": 19273 }, { "epoch": 2.7636937195296816, "grad_norm": 0.2624930143356323, "learning_rate": 1.8782856394595984e-07, "loss": 0.2774, "step": 19274 }, { "epoch": 2.763837109262977, "grad_norm": 0.28912198543548584, "learning_rate": 1.87602121648498e-07, "loss": 0.2723, "step": 19275 }, { "epoch": 2.763980498996272, "grad_norm": 0.283792644739151, "learning_rate": 1.873758133207898e-07, "loss": 0.2632, "step": 19276 }, { "epoch": 2.764123888729567, "grad_norm": 0.2554209530353546, "learning_rate": 1.8714963896913628e-07, "loss": 0.2753, "step": 19277 }, { "epoch": 2.764267278462862, "grad_norm": 0.24705500900745392, "learning_rate": 1.8692359859983357e-07, "loss": 0.2701, "step": 19278 }, { "epoch": 2.764410668196157, "grad_norm": 0.29080870747566223, "learning_rate": 1.8669769221917323e-07, "loss": 0.2726, "step": 19279 }, { "epoch": 2.7645540579294523, "grad_norm": 0.27510491013526917, "learning_rate": 1.8647191983344527e-07, "loss": 0.2797, "step": 19280 }, { "epoch": 2.7646974476627473, "grad_norm": 0.26800599694252014, "learning_rate": 1.8624628144893463e-07, "loss": 0.2829, "step": 19281 }, { "epoch": 2.7648408373960427, "grad_norm": 0.2706959843635559, "learning_rate": 1.860207770719219e-07, "loss": 0.277, "step": 19282 }, { "epoch": 2.7649842271293377, "grad_norm": 0.2668168544769287, "learning_rate": 1.8579540670868644e-07, "loss": 0.2774, "step": 19283 }, { "epoch": 2.7651276168626326, "grad_norm": 0.2596869170665741, "learning_rate": 1.855701703655005e-07, "loss": 0.2857, "step": 19284 }, { "epoch": 2.7652710065959276, "grad_norm": 0.2640456259250641, "learning_rate": 1.8534506804863518e-07, "loss": 0.275, "step": 19285 }, { "epoch": 2.7654143963292226, "grad_norm": 0.26992711424827576, "learning_rate": 1.8512009976435652e-07, "loss": 0.2762, "step": 19286 }, { "epoch": 2.765557786062518, "grad_norm": 0.27354660630226135, "learning_rate": 1.848952655189279e-07, "loss": 0.2691, "step": 19287 }, { "epoch": 2.765701175795813, "grad_norm": 0.28255558013916016, "learning_rate": 1.8467056531860817e-07, "loss": 0.2864, "step": 19288 }, { "epoch": 2.765844565529108, "grad_norm": 0.2607920467853546, "learning_rate": 1.844459991696529e-07, "loss": 0.2573, "step": 19289 }, { "epoch": 2.7659879552624034, "grad_norm": 0.2703031897544861, "learning_rate": 1.8422156707831318e-07, "loss": 0.2662, "step": 19290 }, { "epoch": 2.7661313449956983, "grad_norm": 0.26506611704826355, "learning_rate": 1.8399726905083627e-07, "loss": 0.2668, "step": 19291 }, { "epoch": 2.7662747347289933, "grad_norm": 0.27382394671440125, "learning_rate": 1.8377310509346712e-07, "loss": 0.2679, "step": 19292 }, { "epoch": 2.7664181244622883, "grad_norm": 0.25162196159362793, "learning_rate": 1.8354907521244635e-07, "loss": 0.2783, "step": 19293 }, { "epoch": 2.7665615141955837, "grad_norm": 0.2720259428024292, "learning_rate": 1.8332517941401006e-07, "loss": 0.2849, "step": 19294 }, { "epoch": 2.7667049039288787, "grad_norm": 0.2743809223175049, "learning_rate": 1.8310141770439162e-07, "loss": 0.2754, "step": 19295 }, { "epoch": 2.7668482936621737, "grad_norm": 0.2793428897857666, "learning_rate": 1.828777900898193e-07, "loss": 0.2835, "step": 19296 }, { "epoch": 2.766991683395469, "grad_norm": 0.274136483669281, "learning_rate": 1.826542965765199e-07, "loss": 0.2759, "step": 19297 }, { "epoch": 2.767135073128764, "grad_norm": 0.27385154366493225, "learning_rate": 1.8243093717071392e-07, "loss": 0.2708, "step": 19298 }, { "epoch": 2.767278462862059, "grad_norm": 0.27988430857658386, "learning_rate": 1.8220771187862028e-07, "loss": 0.2872, "step": 19299 }, { "epoch": 2.767421852595354, "grad_norm": 0.26790115237236023, "learning_rate": 1.819846207064524e-07, "loss": 0.2703, "step": 19300 }, { "epoch": 2.7675652423286494, "grad_norm": 0.2588547170162201, "learning_rate": 1.8176166366042135e-07, "loss": 0.2703, "step": 19301 }, { "epoch": 2.7677086320619444, "grad_norm": 0.27149516344070435, "learning_rate": 1.815388407467339e-07, "loss": 0.2939, "step": 19302 }, { "epoch": 2.7678520217952394, "grad_norm": 0.270376056432724, "learning_rate": 1.813161519715928e-07, "loss": 0.2532, "step": 19303 }, { "epoch": 2.767995411528535, "grad_norm": 0.2572254538536072, "learning_rate": 1.8109359734119703e-07, "loss": 0.2874, "step": 19304 }, { "epoch": 2.7681388012618298, "grad_norm": 0.26302024722099304, "learning_rate": 1.8087117686174383e-07, "loss": 0.2521, "step": 19305 }, { "epoch": 2.7682821909951247, "grad_norm": 0.24269892275333405, "learning_rate": 1.8064889053942326e-07, "loss": 0.2734, "step": 19306 }, { "epoch": 2.7684255807284197, "grad_norm": 0.2708253860473633, "learning_rate": 1.8042673838042368e-07, "loss": 0.2842, "step": 19307 }, { "epoch": 2.7685689704617147, "grad_norm": 0.2805648446083069, "learning_rate": 1.8020472039092962e-07, "loss": 0.2533, "step": 19308 }, { "epoch": 2.76871236019501, "grad_norm": 0.25218406319618225, "learning_rate": 1.7998283657712224e-07, "loss": 0.302, "step": 19309 }, { "epoch": 2.768855749928305, "grad_norm": 0.2656272351741791, "learning_rate": 1.7976108694517824e-07, "loss": 0.2654, "step": 19310 }, { "epoch": 2.7689991396616005, "grad_norm": 0.27237796783447266, "learning_rate": 1.795394715012705e-07, "loss": 0.281, "step": 19311 }, { "epoch": 2.7691425293948955, "grad_norm": 0.26711076498031616, "learning_rate": 1.7931799025156848e-07, "loss": 0.2936, "step": 19312 }, { "epoch": 2.7692859191281904, "grad_norm": 0.24923574924468994, "learning_rate": 1.7909664320223841e-07, "loss": 0.2783, "step": 19313 }, { "epoch": 2.7694293088614854, "grad_norm": 0.26702436804771423, "learning_rate": 1.7887543035944144e-07, "loss": 0.268, "step": 19314 }, { "epoch": 2.7695726985947804, "grad_norm": 0.25451111793518066, "learning_rate": 1.7865435172933654e-07, "loss": 0.2742, "step": 19315 }, { "epoch": 2.769716088328076, "grad_norm": 0.2652062177658081, "learning_rate": 1.7843340731807822e-07, "loss": 0.2772, "step": 19316 }, { "epoch": 2.769859478061371, "grad_norm": 0.27206453680992126, "learning_rate": 1.782125971318166e-07, "loss": 0.2903, "step": 19317 }, { "epoch": 2.7700028677946658, "grad_norm": 0.278184711933136, "learning_rate": 1.7799192117669838e-07, "loss": 0.273, "step": 19318 }, { "epoch": 2.770146257527961, "grad_norm": 0.2666791081428528, "learning_rate": 1.7777137945886814e-07, "loss": 0.2903, "step": 19319 }, { "epoch": 2.770289647261256, "grad_norm": 0.26335591077804565, "learning_rate": 1.7755097198446425e-07, "loss": 0.2884, "step": 19320 }, { "epoch": 2.770433036994551, "grad_norm": 0.268418550491333, "learning_rate": 1.773306987596235e-07, "loss": 0.2718, "step": 19321 }, { "epoch": 2.770576426727846, "grad_norm": 0.2878900170326233, "learning_rate": 1.7711055979047708e-07, "loss": 0.2894, "step": 19322 }, { "epoch": 2.7707198164611415, "grad_norm": 0.2751958966255188, "learning_rate": 1.76890555083154e-07, "loss": 0.2763, "step": 19323 }, { "epoch": 2.7708632061944365, "grad_norm": 0.273781955242157, "learning_rate": 1.7667068464377823e-07, "loss": 0.2808, "step": 19324 }, { "epoch": 2.7710065959277315, "grad_norm": 0.26639798283576965, "learning_rate": 1.7645094847847043e-07, "loss": 0.2907, "step": 19325 }, { "epoch": 2.771149985661027, "grad_norm": 0.2716447114944458, "learning_rate": 1.7623134659334962e-07, "loss": 0.2889, "step": 19326 }, { "epoch": 2.771293375394322, "grad_norm": 0.27267172932624817, "learning_rate": 1.7601187899452643e-07, "loss": 0.2894, "step": 19327 }, { "epoch": 2.771436765127617, "grad_norm": 0.2519206702709198, "learning_rate": 1.7579254568811266e-07, "loss": 0.2727, "step": 19328 }, { "epoch": 2.771580154860912, "grad_norm": 0.27534377574920654, "learning_rate": 1.7557334668021231e-07, "loss": 0.2855, "step": 19329 }, { "epoch": 2.771723544594207, "grad_norm": 0.26151180267333984, "learning_rate": 1.7535428197692938e-07, "loss": 0.2654, "step": 19330 }, { "epoch": 2.771866934327502, "grad_norm": 0.25536853075027466, "learning_rate": 1.7513535158436124e-07, "loss": 0.2874, "step": 19331 }, { "epoch": 2.772010324060797, "grad_norm": 0.269937664270401, "learning_rate": 1.7491655550860353e-07, "loss": 0.2693, "step": 19332 }, { "epoch": 2.7721537137940926, "grad_norm": 0.26756390929222107, "learning_rate": 1.746978937557464e-07, "loss": 0.2922, "step": 19333 }, { "epoch": 2.7722971035273876, "grad_norm": 0.27151671051979065, "learning_rate": 1.7447936633187668e-07, "loss": 0.2646, "step": 19334 }, { "epoch": 2.7724404932606825, "grad_norm": 0.26025575399398804, "learning_rate": 1.7426097324307888e-07, "loss": 0.2625, "step": 19335 }, { "epoch": 2.7725838829939775, "grad_norm": 0.2630026340484619, "learning_rate": 1.7404271449543154e-07, "loss": 0.2882, "step": 19336 }, { "epoch": 2.7727272727272725, "grad_norm": 0.2600478231906891, "learning_rate": 1.738245900950125e-07, "loss": 0.2997, "step": 19337 }, { "epoch": 2.772870662460568, "grad_norm": 0.25756195187568665, "learning_rate": 1.73606600047892e-07, "loss": 0.2773, "step": 19338 }, { "epoch": 2.773014052193863, "grad_norm": 0.2545440196990967, "learning_rate": 1.7338874436013953e-07, "loss": 0.2785, "step": 19339 }, { "epoch": 2.773157441927158, "grad_norm": 0.2756306827068329, "learning_rate": 1.7317102303781974e-07, "loss": 0.2801, "step": 19340 }, { "epoch": 2.7733008316604533, "grad_norm": 0.26422542333602905, "learning_rate": 1.7295343608699332e-07, "loss": 0.2889, "step": 19341 }, { "epoch": 2.7734442213937482, "grad_norm": 0.2708044946193695, "learning_rate": 1.7273598351371768e-07, "loss": 0.2822, "step": 19342 }, { "epoch": 2.773587611127043, "grad_norm": 0.2782455086708069, "learning_rate": 1.7251866532404737e-07, "loss": 0.2974, "step": 19343 }, { "epoch": 2.773731000860338, "grad_norm": 0.2842322885990143, "learning_rate": 1.7230148152403094e-07, "loss": 0.274, "step": 19344 }, { "epoch": 2.7738743905936336, "grad_norm": 0.27090558409690857, "learning_rate": 1.720844321197146e-07, "loss": 0.2828, "step": 19345 }, { "epoch": 2.7740177803269286, "grad_norm": 0.2610776424407959, "learning_rate": 1.7186751711714133e-07, "loss": 0.2792, "step": 19346 }, { "epoch": 2.7741611700602236, "grad_norm": 0.24955514073371887, "learning_rate": 1.716507365223491e-07, "loss": 0.266, "step": 19347 }, { "epoch": 2.774304559793519, "grad_norm": 0.2745818793773651, "learning_rate": 1.7143409034137303e-07, "loss": 0.2854, "step": 19348 }, { "epoch": 2.774447949526814, "grad_norm": 0.2627568244934082, "learning_rate": 1.712175785802439e-07, "loss": 0.293, "step": 19349 }, { "epoch": 2.774591339260109, "grad_norm": 0.2885534167289734, "learning_rate": 1.7100120124498964e-07, "loss": 0.2736, "step": 19350 }, { "epoch": 2.774734728993404, "grad_norm": 0.27161893248558044, "learning_rate": 1.7078495834163323e-07, "loss": 0.2811, "step": 19351 }, { "epoch": 2.7748781187266993, "grad_norm": 0.2605935037136078, "learning_rate": 1.7056884987619537e-07, "loss": 0.2739, "step": 19352 }, { "epoch": 2.7750215084599943, "grad_norm": 0.26429957151412964, "learning_rate": 1.7035287585469186e-07, "loss": 0.2711, "step": 19353 }, { "epoch": 2.7751648981932893, "grad_norm": 0.2655799090862274, "learning_rate": 1.7013703628313393e-07, "loss": 0.2869, "step": 19354 }, { "epoch": 2.7753082879265847, "grad_norm": 0.27596986293792725, "learning_rate": 1.6992133116753185e-07, "loss": 0.2843, "step": 19355 }, { "epoch": 2.7754516776598797, "grad_norm": 0.283935010433197, "learning_rate": 1.6970576051388965e-07, "loss": 0.2829, "step": 19356 }, { "epoch": 2.7755950673931746, "grad_norm": 0.27128297090530396, "learning_rate": 1.6949032432820867e-07, "loss": 0.2676, "step": 19357 }, { "epoch": 2.7757384571264696, "grad_norm": 0.2760198712348938, "learning_rate": 1.6927502261648631e-07, "loss": 0.2525, "step": 19358 }, { "epoch": 2.7758818468597646, "grad_norm": 0.27015355229377747, "learning_rate": 1.6905985538471727e-07, "loss": 0.2633, "step": 19359 }, { "epoch": 2.77602523659306, "grad_norm": 0.24726131558418274, "learning_rate": 1.6884482263888947e-07, "loss": 0.2625, "step": 19360 }, { "epoch": 2.776168626326355, "grad_norm": 0.2769942283630371, "learning_rate": 1.6862992438498983e-07, "loss": 0.3063, "step": 19361 }, { "epoch": 2.7763120160596504, "grad_norm": 0.2882062792778015, "learning_rate": 1.684151606290013e-07, "loss": 0.2642, "step": 19362 }, { "epoch": 2.7764554057929454, "grad_norm": 0.2742874026298523, "learning_rate": 1.682005313769025e-07, "loss": 0.2848, "step": 19363 }, { "epoch": 2.7765987955262403, "grad_norm": 0.27611804008483887, "learning_rate": 1.6798603663466805e-07, "loss": 0.2965, "step": 19364 }, { "epoch": 2.7767421852595353, "grad_norm": 0.26640191674232483, "learning_rate": 1.677716764082693e-07, "loss": 0.2898, "step": 19365 }, { "epoch": 2.7768855749928303, "grad_norm": 0.27685272693634033, "learning_rate": 1.6755745070367425e-07, "loss": 0.2892, "step": 19366 }, { "epoch": 2.7770289647261257, "grad_norm": 0.2699509263038635, "learning_rate": 1.6734335952684533e-07, "loss": 0.2944, "step": 19367 }, { "epoch": 2.7771723544594207, "grad_norm": 0.2788344919681549, "learning_rate": 1.671294028837439e-07, "loss": 0.2907, "step": 19368 }, { "epoch": 2.7773157441927157, "grad_norm": 0.27879849076271057, "learning_rate": 1.669155807803252e-07, "loss": 0.277, "step": 19369 }, { "epoch": 2.777459133926011, "grad_norm": 0.262471467256546, "learning_rate": 1.6670189322254228e-07, "loss": 0.2815, "step": 19370 }, { "epoch": 2.777602523659306, "grad_norm": 0.26709070801734924, "learning_rate": 1.6648834021634364e-07, "loss": 0.2906, "step": 19371 }, { "epoch": 2.777745913392601, "grad_norm": 0.2757374942302704, "learning_rate": 1.66274921767674e-07, "loss": 0.2801, "step": 19372 }, { "epoch": 2.777889303125896, "grad_norm": 0.2735673487186432, "learning_rate": 1.6606163788247477e-07, "loss": 0.283, "step": 19373 }, { "epoch": 2.7780326928591914, "grad_norm": 0.27293118834495544, "learning_rate": 1.658484885666839e-07, "loss": 0.2698, "step": 19374 }, { "epoch": 2.7781760825924864, "grad_norm": 0.26605910062789917, "learning_rate": 1.6563547382623502e-07, "loss": 0.2668, "step": 19375 }, { "epoch": 2.7783194723257814, "grad_norm": 0.28409048914909363, "learning_rate": 1.654225936670578e-07, "loss": 0.2855, "step": 19376 }, { "epoch": 2.7784628620590768, "grad_norm": 0.2640797197818756, "learning_rate": 1.652098480950781e-07, "loss": 0.2921, "step": 19377 }, { "epoch": 2.7786062517923718, "grad_norm": 0.2688756287097931, "learning_rate": 1.6499723711621895e-07, "loss": 0.2682, "step": 19378 }, { "epoch": 2.7787496415256667, "grad_norm": 0.280983030796051, "learning_rate": 1.647847607363995e-07, "loss": 0.2768, "step": 19379 }, { "epoch": 2.7788930312589617, "grad_norm": 0.2852209508419037, "learning_rate": 1.6457241896153442e-07, "loss": 0.2674, "step": 19380 }, { "epoch": 2.779036420992257, "grad_norm": 0.2774766683578491, "learning_rate": 1.643602117975346e-07, "loss": 0.2796, "step": 19381 }, { "epoch": 2.779179810725552, "grad_norm": 0.28872713446617126, "learning_rate": 1.6414813925030803e-07, "loss": 0.2855, "step": 19382 }, { "epoch": 2.779323200458847, "grad_norm": 0.27565619349479675, "learning_rate": 1.6393620132575837e-07, "loss": 0.2729, "step": 19383 }, { "epoch": 2.7794665901921425, "grad_norm": 0.2678241729736328, "learning_rate": 1.6372439802978534e-07, "loss": 0.3099, "step": 19384 }, { "epoch": 2.7796099799254375, "grad_norm": 0.2655450999736786, "learning_rate": 1.6351272936828478e-07, "loss": 0.2762, "step": 19385 }, { "epoch": 2.7797533696587324, "grad_norm": 0.2581920921802521, "learning_rate": 1.6330119534715138e-07, "loss": 0.2705, "step": 19386 }, { "epoch": 2.7798967593920274, "grad_norm": 0.28553780913352966, "learning_rate": 1.630897959722716e-07, "loss": 0.2668, "step": 19387 }, { "epoch": 2.7800401491253224, "grad_norm": 0.27537357807159424, "learning_rate": 1.6287853124953178e-07, "loss": 0.2814, "step": 19388 }, { "epoch": 2.780183538858618, "grad_norm": 0.25055477023124695, "learning_rate": 1.6266740118481227e-07, "loss": 0.2749, "step": 19389 }, { "epoch": 2.7803269285919128, "grad_norm": 0.25162506103515625, "learning_rate": 1.624564057839917e-07, "loss": 0.273, "step": 19390 }, { "epoch": 2.780470318325208, "grad_norm": 0.2655514180660248, "learning_rate": 1.622455450529431e-07, "loss": 0.3069, "step": 19391 }, { "epoch": 2.780613708058503, "grad_norm": 0.2605249583721161, "learning_rate": 1.6203481899753681e-07, "loss": 0.2703, "step": 19392 }, { "epoch": 2.780757097791798, "grad_norm": 0.2880609929561615, "learning_rate": 1.6182422762363868e-07, "loss": 0.2476, "step": 19393 }, { "epoch": 2.780900487525093, "grad_norm": 0.2644011974334717, "learning_rate": 1.6161377093711127e-07, "loss": 0.2681, "step": 19394 }, { "epoch": 2.781043877258388, "grad_norm": 0.27184009552001953, "learning_rate": 1.6140344894381376e-07, "loss": 0.2819, "step": 19395 }, { "epoch": 2.7811872669916835, "grad_norm": 0.29934048652648926, "learning_rate": 1.6119326164960148e-07, "loss": 0.2959, "step": 19396 }, { "epoch": 2.7813306567249785, "grad_norm": 0.2590693235397339, "learning_rate": 1.6098320906032529e-07, "loss": 0.2685, "step": 19397 }, { "epoch": 2.7814740464582735, "grad_norm": 0.3051212430000305, "learning_rate": 1.6077329118183272e-07, "loss": 0.2822, "step": 19398 }, { "epoch": 2.781617436191569, "grad_norm": 0.2759654223918915, "learning_rate": 1.6056350801996745e-07, "loss": 0.2858, "step": 19399 }, { "epoch": 2.781760825924864, "grad_norm": 0.256133109331131, "learning_rate": 1.6035385958056927e-07, "loss": 0.2838, "step": 19400 }, { "epoch": 2.781904215658159, "grad_norm": 0.2597547769546509, "learning_rate": 1.6014434586947513e-07, "loss": 0.269, "step": 19401 }, { "epoch": 2.782047605391454, "grad_norm": 0.2868644893169403, "learning_rate": 1.5993496689251763e-07, "loss": 0.2609, "step": 19402 }, { "epoch": 2.782190995124749, "grad_norm": 0.2662853002548218, "learning_rate": 1.597257226555249e-07, "loss": 0.2908, "step": 19403 }, { "epoch": 2.782334384858044, "grad_norm": 0.2958999574184418, "learning_rate": 1.595166131643222e-07, "loss": 0.2875, "step": 19404 }, { "epoch": 2.782477774591339, "grad_norm": 0.27250832319259644, "learning_rate": 1.5930763842473106e-07, "loss": 0.2917, "step": 19405 }, { "epoch": 2.7826211643246346, "grad_norm": 0.2742730379104614, "learning_rate": 1.5909879844256905e-07, "loss": 0.2806, "step": 19406 }, { "epoch": 2.7827645540579296, "grad_norm": 0.2658082842826843, "learning_rate": 1.5889009322365035e-07, "loss": 0.2809, "step": 19407 }, { "epoch": 2.7829079437912245, "grad_norm": 0.25918954610824585, "learning_rate": 1.5868152277378369e-07, "loss": 0.2583, "step": 19408 }, { "epoch": 2.7830513335245195, "grad_norm": 0.2706415355205536, "learning_rate": 1.5847308709877608e-07, "loss": 0.29, "step": 19409 }, { "epoch": 2.7831947232578145, "grad_norm": 0.2595064043998718, "learning_rate": 1.582647862044301e-07, "loss": 0.2673, "step": 19410 }, { "epoch": 2.78333811299111, "grad_norm": 0.2555931508541107, "learning_rate": 1.5805662009654444e-07, "loss": 0.2739, "step": 19411 }, { "epoch": 2.783481502724405, "grad_norm": 0.26996394991874695, "learning_rate": 1.5784858878091447e-07, "loss": 0.2656, "step": 19412 }, { "epoch": 2.7836248924577003, "grad_norm": 0.24823608994483948, "learning_rate": 1.576406922633317e-07, "loss": 0.2902, "step": 19413 }, { "epoch": 2.7837682821909953, "grad_norm": 0.2676447033882141, "learning_rate": 1.5743293054958196e-07, "loss": 0.2957, "step": 19414 }, { "epoch": 2.7839116719242902, "grad_norm": 0.27393993735313416, "learning_rate": 1.5722530364545075e-07, "loss": 0.2887, "step": 19415 }, { "epoch": 2.784055061657585, "grad_norm": 0.28935474157333374, "learning_rate": 1.5701781155671723e-07, "loss": 0.2828, "step": 19416 }, { "epoch": 2.78419845139088, "grad_norm": 0.2606409192085266, "learning_rate": 1.5681045428915798e-07, "loss": 0.3106, "step": 19417 }, { "epoch": 2.7843418411241756, "grad_norm": 0.2801227271556854, "learning_rate": 1.5660323184854554e-07, "loss": 0.2829, "step": 19418 }, { "epoch": 2.7844852308574706, "grad_norm": 0.2697731554508209, "learning_rate": 1.5639614424064864e-07, "loss": 0.2902, "step": 19419 }, { "epoch": 2.7846286205907655, "grad_norm": 0.26867958903312683, "learning_rate": 1.5618919147123212e-07, "loss": 0.301, "step": 19420 }, { "epoch": 2.784772010324061, "grad_norm": 0.27068930864334106, "learning_rate": 1.559823735460575e-07, "loss": 0.2832, "step": 19421 }, { "epoch": 2.784915400057356, "grad_norm": 0.26220786571502686, "learning_rate": 1.557756904708818e-07, "loss": 0.2708, "step": 19422 }, { "epoch": 2.785058789790651, "grad_norm": 0.2601422071456909, "learning_rate": 1.5556914225145935e-07, "loss": 0.2763, "step": 19423 }, { "epoch": 2.785202179523946, "grad_norm": 0.2729765474796295, "learning_rate": 1.5536272889353998e-07, "loss": 0.265, "step": 19424 }, { "epoch": 2.7853455692572413, "grad_norm": 0.2475004941225052, "learning_rate": 1.5515645040286963e-07, "loss": 0.2729, "step": 19425 }, { "epoch": 2.7854889589905363, "grad_norm": 0.27175644040107727, "learning_rate": 1.5495030678519096e-07, "loss": 0.2987, "step": 19426 }, { "epoch": 2.7856323487238313, "grad_norm": 0.2633124887943268, "learning_rate": 1.547442980462427e-07, "loss": 0.2703, "step": 19427 }, { "epoch": 2.7857757384571267, "grad_norm": 0.2719873785972595, "learning_rate": 1.5453842419175913e-07, "loss": 0.2644, "step": 19428 }, { "epoch": 2.7859191281904216, "grad_norm": 0.2600902020931244, "learning_rate": 1.5433268522747346e-07, "loss": 0.2768, "step": 19429 }, { "epoch": 2.7860625179237166, "grad_norm": 0.2509514391422272, "learning_rate": 1.5412708115911113e-07, "loss": 0.2852, "step": 19430 }, { "epoch": 2.7862059076570116, "grad_norm": 0.27597686648368835, "learning_rate": 1.5392161199239586e-07, "loss": 0.2879, "step": 19431 }, { "epoch": 2.786349297390307, "grad_norm": 0.260416179895401, "learning_rate": 1.537162777330492e-07, "loss": 0.274, "step": 19432 }, { "epoch": 2.786492687123602, "grad_norm": 0.2632486820220947, "learning_rate": 1.5351107838678547e-07, "loss": 0.2839, "step": 19433 }, { "epoch": 2.786636076856897, "grad_norm": 0.28125035762786865, "learning_rate": 1.53306013959319e-07, "loss": 0.2779, "step": 19434 }, { "epoch": 2.7867794665901924, "grad_norm": 0.26541969180107117, "learning_rate": 1.5310108445635684e-07, "loss": 0.2911, "step": 19435 }, { "epoch": 2.7869228563234874, "grad_norm": 0.27074548602104187, "learning_rate": 1.5289628988360506e-07, "loss": 0.2807, "step": 19436 }, { "epoch": 2.7870662460567823, "grad_norm": 0.27643248438835144, "learning_rate": 1.5269163024676403e-07, "loss": 0.2828, "step": 19437 }, { "epoch": 2.7872096357900773, "grad_norm": 0.26453980803489685, "learning_rate": 1.524871055515309e-07, "loss": 0.2629, "step": 19438 }, { "epoch": 2.7873530255233723, "grad_norm": 0.26216921210289, "learning_rate": 1.5228271580360055e-07, "loss": 0.2983, "step": 19439 }, { "epoch": 2.7874964152566677, "grad_norm": 0.2688891589641571, "learning_rate": 1.5207846100866176e-07, "loss": 0.266, "step": 19440 }, { "epoch": 2.7876398049899627, "grad_norm": 0.2728777825832367, "learning_rate": 1.5187434117240164e-07, "loss": 0.2871, "step": 19441 }, { "epoch": 2.787783194723258, "grad_norm": 0.27629706263542175, "learning_rate": 1.5167035630050175e-07, "loss": 0.2707, "step": 19442 }, { "epoch": 2.787926584456553, "grad_norm": 0.2597009539604187, "learning_rate": 1.514665063986409e-07, "loss": 0.2802, "step": 19443 }, { "epoch": 2.788069974189848, "grad_norm": 0.2735099792480469, "learning_rate": 1.5126279147249402e-07, "loss": 0.2707, "step": 19444 }, { "epoch": 2.788213363923143, "grad_norm": 0.25968828797340393, "learning_rate": 1.5105921152773318e-07, "loss": 0.2938, "step": 19445 }, { "epoch": 2.788356753656438, "grad_norm": 0.28041183948516846, "learning_rate": 1.5085576657002388e-07, "loss": 0.2813, "step": 19446 }, { "epoch": 2.7885001433897334, "grad_norm": 0.2917596399784088, "learning_rate": 1.5065245660503103e-07, "loss": 0.2757, "step": 19447 }, { "epoch": 2.7886435331230284, "grad_norm": 0.26769325137138367, "learning_rate": 1.5044928163841399e-07, "loss": 0.2779, "step": 19448 }, { "epoch": 2.7887869228563233, "grad_norm": 0.24850298464298248, "learning_rate": 1.502462416758288e-07, "loss": 0.2731, "step": 19449 }, { "epoch": 2.7889303125896188, "grad_norm": 0.2746366560459137, "learning_rate": 1.5004333672292816e-07, "loss": 0.2849, "step": 19450 }, { "epoch": 2.7890737023229137, "grad_norm": 0.2652992010116577, "learning_rate": 1.4984056678536086e-07, "loss": 0.2796, "step": 19451 }, { "epoch": 2.7892170920562087, "grad_norm": 0.27580526471138, "learning_rate": 1.496379318687702e-07, "loss": 0.2845, "step": 19452 }, { "epoch": 2.7893604817895037, "grad_norm": 0.2711322605609894, "learning_rate": 1.4943543197879884e-07, "loss": 0.2923, "step": 19453 }, { "epoch": 2.789503871522799, "grad_norm": 0.2628648579120636, "learning_rate": 1.4923306712108344e-07, "loss": 0.2748, "step": 19454 }, { "epoch": 2.789647261256094, "grad_norm": 0.271629273891449, "learning_rate": 1.4903083730125722e-07, "loss": 0.28, "step": 19455 }, { "epoch": 2.789790650989389, "grad_norm": 0.2803618609905243, "learning_rate": 1.488287425249507e-07, "loss": 0.2582, "step": 19456 }, { "epoch": 2.7899340407226845, "grad_norm": 0.2671644687652588, "learning_rate": 1.4862678279778942e-07, "loss": 0.2665, "step": 19457 }, { "epoch": 2.7900774304559794, "grad_norm": 0.2615163326263428, "learning_rate": 1.4842495812539549e-07, "loss": 0.2711, "step": 19458 }, { "epoch": 2.7902208201892744, "grad_norm": 0.2645741105079651, "learning_rate": 1.4822326851338776e-07, "loss": 0.2851, "step": 19459 }, { "epoch": 2.7903642099225694, "grad_norm": 0.263343870639801, "learning_rate": 1.4802171396738062e-07, "loss": 0.2825, "step": 19460 }, { "epoch": 2.7905075996558644, "grad_norm": 0.27483922243118286, "learning_rate": 1.4782029449298574e-07, "loss": 0.2812, "step": 19461 }, { "epoch": 2.79065098938916, "grad_norm": 0.2580578625202179, "learning_rate": 1.476190100958097e-07, "loss": 0.2841, "step": 19462 }, { "epoch": 2.7907943791224548, "grad_norm": 0.2674260139465332, "learning_rate": 1.4741786078145581e-07, "loss": 0.2526, "step": 19463 }, { "epoch": 2.79093776885575, "grad_norm": 0.2525271773338318, "learning_rate": 1.4721684655552403e-07, "loss": 0.2872, "step": 19464 }, { "epoch": 2.791081158589045, "grad_norm": 0.28460755944252014, "learning_rate": 1.470159674236099e-07, "loss": 0.2916, "step": 19465 }, { "epoch": 2.79122454832234, "grad_norm": 0.2589690387248993, "learning_rate": 1.4681522339130616e-07, "loss": 0.2743, "step": 19466 }, { "epoch": 2.791367938055635, "grad_norm": 0.27392634749412537, "learning_rate": 1.4661461446420166e-07, "loss": 0.2875, "step": 19467 }, { "epoch": 2.79151132778893, "grad_norm": 0.27228331565856934, "learning_rate": 1.4641414064787973e-07, "loss": 0.2624, "step": 19468 }, { "epoch": 2.7916547175222255, "grad_norm": 0.26506364345550537, "learning_rate": 1.4621380194792202e-07, "loss": 0.2748, "step": 19469 }, { "epoch": 2.7917981072555205, "grad_norm": 0.26456159353256226, "learning_rate": 1.460135983699057e-07, "loss": 0.2795, "step": 19470 }, { "epoch": 2.7919414969888154, "grad_norm": 0.2754161059856415, "learning_rate": 1.4581352991940413e-07, "loss": 0.2736, "step": 19471 }, { "epoch": 2.792084886722111, "grad_norm": 0.27200862765312195, "learning_rate": 1.456135966019867e-07, "loss": 0.2885, "step": 19472 }, { "epoch": 2.792228276455406, "grad_norm": 0.282357394695282, "learning_rate": 1.4541379842321902e-07, "loss": 0.2697, "step": 19473 }, { "epoch": 2.792371666188701, "grad_norm": 0.27741163969039917, "learning_rate": 1.452141353886627e-07, "loss": 0.2669, "step": 19474 }, { "epoch": 2.792515055921996, "grad_norm": 0.2693699598312378, "learning_rate": 1.4501460750387775e-07, "loss": 0.2835, "step": 19475 }, { "epoch": 2.792658445655291, "grad_norm": 0.28414177894592285, "learning_rate": 1.448152147744175e-07, "loss": 0.2899, "step": 19476 }, { "epoch": 2.792801835388586, "grad_norm": 0.27258431911468506, "learning_rate": 1.4461595720583311e-07, "loss": 0.275, "step": 19477 }, { "epoch": 2.792945225121881, "grad_norm": 0.26712825894355774, "learning_rate": 1.4441683480367174e-07, "loss": 0.2947, "step": 19478 }, { "epoch": 2.7930886148551766, "grad_norm": 0.2528415024280548, "learning_rate": 1.4421784757347622e-07, "loss": 0.2817, "step": 19479 }, { "epoch": 2.7932320045884715, "grad_norm": 0.2755557894706726, "learning_rate": 1.4401899552078603e-07, "loss": 0.2797, "step": 19480 }, { "epoch": 2.7933753943217665, "grad_norm": 0.26733076572418213, "learning_rate": 1.4382027865113723e-07, "loss": 0.282, "step": 19481 }, { "epoch": 2.7935187840550615, "grad_norm": 0.26219847798347473, "learning_rate": 1.4362169697006157e-07, "loss": 0.2913, "step": 19482 }, { "epoch": 2.793662173788357, "grad_norm": 0.25389134883880615, "learning_rate": 1.434232504830879e-07, "loss": 0.2883, "step": 19483 }, { "epoch": 2.793805563521652, "grad_norm": 0.26157090067863464, "learning_rate": 1.4322493919573966e-07, "loss": 0.274, "step": 19484 }, { "epoch": 2.793948953254947, "grad_norm": 0.2670969069004059, "learning_rate": 1.4302676311353793e-07, "loss": 0.2836, "step": 19485 }, { "epoch": 2.7940923429882423, "grad_norm": 0.27606531977653503, "learning_rate": 1.4282872224199995e-07, "loss": 0.2925, "step": 19486 }, { "epoch": 2.7942357327215372, "grad_norm": 0.2670372426509857, "learning_rate": 1.4263081658663802e-07, "loss": 0.2746, "step": 19487 }, { "epoch": 2.794379122454832, "grad_norm": 0.253383606672287, "learning_rate": 1.4243304615296327e-07, "loss": 0.2772, "step": 19488 }, { "epoch": 2.794522512188127, "grad_norm": 0.2727757692337036, "learning_rate": 1.4223541094647907e-07, "loss": 0.2852, "step": 19489 }, { "epoch": 2.794665901921422, "grad_norm": 0.2777811288833618, "learning_rate": 1.420379109726888e-07, "loss": 0.268, "step": 19490 }, { "epoch": 2.7948092916547176, "grad_norm": 0.2746618390083313, "learning_rate": 1.4184054623708976e-07, "loss": 0.2698, "step": 19491 }, { "epoch": 2.7949526813880126, "grad_norm": 0.28499794006347656, "learning_rate": 1.4164331674517695e-07, "loss": 0.2711, "step": 19492 }, { "epoch": 2.795096071121308, "grad_norm": 0.2718755900859833, "learning_rate": 1.41446222502441e-07, "loss": 0.2839, "step": 19493 }, { "epoch": 2.795239460854603, "grad_norm": 0.25596633553504944, "learning_rate": 1.4124926351436806e-07, "loss": 0.2787, "step": 19494 }, { "epoch": 2.795382850587898, "grad_norm": 0.2880452871322632, "learning_rate": 1.4105243978644102e-07, "loss": 0.2936, "step": 19495 }, { "epoch": 2.795526240321193, "grad_norm": 0.2573229968547821, "learning_rate": 1.4085575132414042e-07, "loss": 0.2712, "step": 19496 }, { "epoch": 2.795669630054488, "grad_norm": 0.26811739802360535, "learning_rate": 1.4065919813294028e-07, "loss": 0.2725, "step": 19497 }, { "epoch": 2.7958130197877833, "grad_norm": 0.2553532123565674, "learning_rate": 1.404627802183134e-07, "loss": 0.299, "step": 19498 }, { "epoch": 2.7959564095210783, "grad_norm": 0.2741639018058777, "learning_rate": 1.4026649758572765e-07, "loss": 0.2725, "step": 19499 }, { "epoch": 2.7960997992543732, "grad_norm": 0.26635369658470154, "learning_rate": 1.400703502406464e-07, "loss": 0.274, "step": 19500 }, { "epoch": 2.7962431889876687, "grad_norm": 0.27330493927001953, "learning_rate": 1.3987433818853035e-07, "loss": 0.2869, "step": 19501 }, { "epoch": 2.7963865787209636, "grad_norm": 0.25772377848625183, "learning_rate": 1.3967846143483677e-07, "loss": 0.2763, "step": 19502 }, { "epoch": 2.7965299684542586, "grad_norm": 0.2654520273208618, "learning_rate": 1.394827199850185e-07, "loss": 0.2679, "step": 19503 }, { "epoch": 2.7966733581875536, "grad_norm": 0.270624041557312, "learning_rate": 1.39287113844524e-07, "loss": 0.2817, "step": 19504 }, { "epoch": 2.796816747920849, "grad_norm": 0.27830007672309875, "learning_rate": 1.3909164301879996e-07, "loss": 0.2822, "step": 19505 }, { "epoch": 2.796960137654144, "grad_norm": 0.26578542590141296, "learning_rate": 1.3889630751328654e-07, "loss": 0.288, "step": 19506 }, { "epoch": 2.797103527387439, "grad_norm": 0.27655789256095886, "learning_rate": 1.3870110733342156e-07, "loss": 0.2828, "step": 19507 }, { "epoch": 2.7972469171207344, "grad_norm": 0.2672007381916046, "learning_rate": 1.3850604248464017e-07, "loss": 0.2522, "step": 19508 }, { "epoch": 2.7973903068540293, "grad_norm": 0.2942914068698883, "learning_rate": 1.3831111297237242e-07, "loss": 0.2605, "step": 19509 }, { "epoch": 2.7975336965873243, "grad_norm": 0.27470558881759644, "learning_rate": 1.3811631880204402e-07, "loss": 0.2749, "step": 19510 }, { "epoch": 2.7976770863206193, "grad_norm": 0.2721547484397888, "learning_rate": 1.3792165997907891e-07, "loss": 0.2973, "step": 19511 }, { "epoch": 2.7978204760539143, "grad_norm": 0.2734154164791107, "learning_rate": 1.3772713650889503e-07, "loss": 0.2943, "step": 19512 }, { "epoch": 2.7979638657872097, "grad_norm": 0.28316858410835266, "learning_rate": 1.37532748396908e-07, "loss": 0.2808, "step": 19513 }, { "epoch": 2.7981072555205047, "grad_norm": 0.2827335000038147, "learning_rate": 1.3733849564852964e-07, "loss": 0.2706, "step": 19514 }, { "epoch": 2.7982506452538, "grad_norm": 0.27501755952835083, "learning_rate": 1.3714437826916838e-07, "loss": 0.295, "step": 19515 }, { "epoch": 2.798394034987095, "grad_norm": 0.2882656455039978, "learning_rate": 1.3695039626422602e-07, "loss": 0.2726, "step": 19516 }, { "epoch": 2.79853742472039, "grad_norm": 0.24490363895893097, "learning_rate": 1.367565496391038e-07, "loss": 0.2811, "step": 19517 }, { "epoch": 2.798680814453685, "grad_norm": 0.27802351117134094, "learning_rate": 1.3656283839919847e-07, "loss": 0.2832, "step": 19518 }, { "epoch": 2.79882420418698, "grad_norm": 0.2577023506164551, "learning_rate": 1.3636926254990246e-07, "loss": 0.2996, "step": 19519 }, { "epoch": 2.7989675939202754, "grad_norm": 0.24540016055107117, "learning_rate": 1.361758220966042e-07, "loss": 0.271, "step": 19520 }, { "epoch": 2.7991109836535704, "grad_norm": 0.26445409655570984, "learning_rate": 1.359825170446899e-07, "loss": 0.274, "step": 19521 }, { "epoch": 2.7992543733868653, "grad_norm": 0.25295037031173706, "learning_rate": 1.357893473995392e-07, "loss": 0.2809, "step": 19522 }, { "epoch": 2.7993977631201608, "grad_norm": 0.2704191207885742, "learning_rate": 1.3559631316653056e-07, "loss": 0.2916, "step": 19523 }, { "epoch": 2.7995411528534557, "grad_norm": 0.28335660696029663, "learning_rate": 1.3540341435103799e-07, "loss": 0.282, "step": 19524 }, { "epoch": 2.7996845425867507, "grad_norm": 0.27508774399757385, "learning_rate": 1.352106509584311e-07, "loss": 0.2892, "step": 19525 }, { "epoch": 2.7998279323200457, "grad_norm": 0.269082248210907, "learning_rate": 1.3501802299407673e-07, "loss": 0.2747, "step": 19526 }, { "epoch": 2.799971322053341, "grad_norm": 0.26354002952575684, "learning_rate": 1.3482553046333614e-07, "loss": 0.273, "step": 19527 }, { "epoch": 2.800114711786636, "grad_norm": 0.2561935484409332, "learning_rate": 1.346331733715689e-07, "loss": 0.2837, "step": 19528 }, { "epoch": 2.800258101519931, "grad_norm": 0.25485193729400635, "learning_rate": 1.3444095172412962e-07, "loss": 0.2628, "step": 19529 }, { "epoch": 2.8004014912532265, "grad_norm": 0.27150821685791016, "learning_rate": 1.3424886552636963e-07, "loss": 0.2852, "step": 19530 }, { "epoch": 2.8005448809865214, "grad_norm": 0.2667487859725952, "learning_rate": 1.3405691478363624e-07, "loss": 0.2724, "step": 19531 }, { "epoch": 2.8006882707198164, "grad_norm": 0.27204272150993347, "learning_rate": 1.3386509950127357e-07, "loss": 0.2808, "step": 19532 }, { "epoch": 2.8008316604531114, "grad_norm": 0.27787482738494873, "learning_rate": 1.3367341968462067e-07, "loss": 0.295, "step": 19533 }, { "epoch": 2.800975050186407, "grad_norm": 0.25321030616760254, "learning_rate": 1.3348187533901435e-07, "loss": 0.2854, "step": 19534 }, { "epoch": 2.8011184399197018, "grad_norm": 0.2654321491718292, "learning_rate": 1.3329046646978593e-07, "loss": 0.2709, "step": 19535 }, { "epoch": 2.8012618296529967, "grad_norm": 0.2755410075187683, "learning_rate": 1.3309919308226504e-07, "loss": 0.2836, "step": 19536 }, { "epoch": 2.801405219386292, "grad_norm": 0.2658790051937103, "learning_rate": 1.329080551817763e-07, "loss": 0.268, "step": 19537 }, { "epoch": 2.801548609119587, "grad_norm": 0.29601871967315674, "learning_rate": 1.3271705277363933e-07, "loss": 0.2765, "step": 19538 }, { "epoch": 2.801691998852882, "grad_norm": 0.2862946391105652, "learning_rate": 1.3252618586317267e-07, "loss": 0.2729, "step": 19539 }, { "epoch": 2.801835388586177, "grad_norm": 0.24473237991333008, "learning_rate": 1.3233545445568986e-07, "loss": 0.283, "step": 19540 }, { "epoch": 2.801978778319472, "grad_norm": 0.2795528769493103, "learning_rate": 1.3214485855649939e-07, "loss": 0.2891, "step": 19541 }, { "epoch": 2.8021221680527675, "grad_norm": 0.26968327164649963, "learning_rate": 1.319543981709087e-07, "loss": 0.2959, "step": 19542 }, { "epoch": 2.8022655577860625, "grad_norm": 0.2674923539161682, "learning_rate": 1.317640733042186e-07, "loss": 0.2595, "step": 19543 }, { "epoch": 2.802408947519358, "grad_norm": 0.26117774844169617, "learning_rate": 1.315738839617281e-07, "loss": 0.3069, "step": 19544 }, { "epoch": 2.802552337252653, "grad_norm": 0.2685159146785736, "learning_rate": 1.3138383014873136e-07, "loss": 0.2693, "step": 19545 }, { "epoch": 2.802695726985948, "grad_norm": 0.25388309359550476, "learning_rate": 1.3119391187051966e-07, "loss": 0.2988, "step": 19546 }, { "epoch": 2.802839116719243, "grad_norm": 0.2633442282676697, "learning_rate": 1.310041291323799e-07, "loss": 0.2625, "step": 19547 }, { "epoch": 2.8029825064525378, "grad_norm": 0.26248958706855774, "learning_rate": 1.3081448193959511e-07, "loss": 0.2662, "step": 19548 }, { "epoch": 2.803125896185833, "grad_norm": 0.24577444791793823, "learning_rate": 1.3062497029744493e-07, "loss": 0.2711, "step": 19549 }, { "epoch": 2.803269285919128, "grad_norm": 0.26199376583099365, "learning_rate": 1.304355942112051e-07, "loss": 0.269, "step": 19550 }, { "epoch": 2.803412675652423, "grad_norm": 0.24431893229484558, "learning_rate": 1.3024635368614759e-07, "loss": 0.2747, "step": 19551 }, { "epoch": 2.8035560653857186, "grad_norm": 0.2624276280403137, "learning_rate": 1.3005724872754033e-07, "loss": 0.277, "step": 19552 }, { "epoch": 2.8036994551190135, "grad_norm": 0.27859535813331604, "learning_rate": 1.298682793406486e-07, "loss": 0.2831, "step": 19553 }, { "epoch": 2.8038428448523085, "grad_norm": 0.2660849094390869, "learning_rate": 1.2967944553073153e-07, "loss": 0.2818, "step": 19554 }, { "epoch": 2.8039862345856035, "grad_norm": 0.27671125531196594, "learning_rate": 1.2949074730304712e-07, "loss": 0.2787, "step": 19555 }, { "epoch": 2.804129624318899, "grad_norm": 0.2706792950630188, "learning_rate": 1.2930218466284784e-07, "loss": 0.2987, "step": 19556 }, { "epoch": 2.804273014052194, "grad_norm": 0.2745080292224884, "learning_rate": 1.291137576153828e-07, "loss": 0.285, "step": 19557 }, { "epoch": 2.804416403785489, "grad_norm": 0.27142640948295593, "learning_rate": 1.289254661658984e-07, "loss": 0.2893, "step": 19558 }, { "epoch": 2.8045597935187843, "grad_norm": 0.2707281708717346, "learning_rate": 1.2873731031963655e-07, "loss": 0.2846, "step": 19559 }, { "epoch": 2.8047031832520792, "grad_norm": 0.2772795259952545, "learning_rate": 1.285492900818336e-07, "loss": 0.2847, "step": 19560 }, { "epoch": 2.804846572985374, "grad_norm": 0.26416119933128357, "learning_rate": 1.2836140545772536e-07, "loss": 0.2823, "step": 19561 }, { "epoch": 2.804989962718669, "grad_norm": 0.2623140811920166, "learning_rate": 1.2817365645254097e-07, "loss": 0.2744, "step": 19562 }, { "epoch": 2.8051333524519646, "grad_norm": 0.26562854647636414, "learning_rate": 1.2798604307150797e-07, "loss": 0.2907, "step": 19563 }, { "epoch": 2.8052767421852596, "grad_norm": 0.28069794178009033, "learning_rate": 1.277985653198488e-07, "loss": 0.3014, "step": 19564 }, { "epoch": 2.8054201319185545, "grad_norm": 0.2628971040248871, "learning_rate": 1.276112232027832e-07, "loss": 0.2673, "step": 19565 }, { "epoch": 2.80556352165185, "grad_norm": 0.30087172985076904, "learning_rate": 1.274240167255253e-07, "loss": 0.2732, "step": 19566 }, { "epoch": 2.805706911385145, "grad_norm": 0.26348862051963806, "learning_rate": 1.2723694589328817e-07, "loss": 0.2866, "step": 19567 }, { "epoch": 2.80585030111844, "grad_norm": 0.2827030420303345, "learning_rate": 1.270500107112782e-07, "loss": 0.2785, "step": 19568 }, { "epoch": 2.805993690851735, "grad_norm": 0.2504763901233673, "learning_rate": 1.2686321118470068e-07, "loss": 0.2901, "step": 19569 }, { "epoch": 2.80613708058503, "grad_norm": 0.26944175362586975, "learning_rate": 1.2667654731875424e-07, "loss": 0.268, "step": 19570 }, { "epoch": 2.8062804703183253, "grad_norm": 0.2596670985221863, "learning_rate": 1.2649001911863634e-07, "loss": 0.2896, "step": 19571 }, { "epoch": 2.8064238600516203, "grad_norm": 0.31628182530403137, "learning_rate": 1.2630362658953955e-07, "loss": 0.2741, "step": 19572 }, { "epoch": 2.8065672497849157, "grad_norm": 0.26130393147468567, "learning_rate": 1.2611736973665246e-07, "loss": 0.273, "step": 19573 }, { "epoch": 2.8067106395182106, "grad_norm": 0.2650790512561798, "learning_rate": 1.2593124856516036e-07, "loss": 0.3148, "step": 19574 }, { "epoch": 2.8068540292515056, "grad_norm": 0.26251739263534546, "learning_rate": 1.2574526308024527e-07, "loss": 0.295, "step": 19575 }, { "epoch": 2.8069974189848006, "grad_norm": 0.2688920199871063, "learning_rate": 1.2555941328708354e-07, "loss": 0.2529, "step": 19576 }, { "epoch": 2.8071408087180956, "grad_norm": 0.2589918076992035, "learning_rate": 1.253736991908494e-07, "loss": 0.2871, "step": 19577 }, { "epoch": 2.807284198451391, "grad_norm": 0.26210519671440125, "learning_rate": 1.251881207967126e-07, "loss": 0.301, "step": 19578 }, { "epoch": 2.807427588184686, "grad_norm": 0.2711697220802307, "learning_rate": 1.2500267810984013e-07, "loss": 0.284, "step": 19579 }, { "epoch": 2.807570977917981, "grad_norm": 0.2716565728187561, "learning_rate": 1.2481737113539395e-07, "loss": 0.2835, "step": 19580 }, { "epoch": 2.8077143676512764, "grad_norm": 0.2706267237663269, "learning_rate": 1.246321998785327e-07, "loss": 0.2568, "step": 19581 }, { "epoch": 2.8078577573845713, "grad_norm": 0.26609525084495544, "learning_rate": 1.2444716434441118e-07, "loss": 0.2631, "step": 19582 }, { "epoch": 2.8080011471178663, "grad_norm": 0.2634941339492798, "learning_rate": 1.2426226453818023e-07, "loss": 0.2699, "step": 19583 }, { "epoch": 2.8081445368511613, "grad_norm": 0.2654532194137573, "learning_rate": 1.2407750046498802e-07, "loss": 0.2807, "step": 19584 }, { "epoch": 2.8082879265844567, "grad_norm": 0.27706068754196167, "learning_rate": 1.2389287212997702e-07, "loss": 0.2604, "step": 19585 }, { "epoch": 2.8084313163177517, "grad_norm": 0.2844449281692505, "learning_rate": 1.2370837953828818e-07, "loss": 0.2745, "step": 19586 }, { "epoch": 2.8085747060510466, "grad_norm": 0.26116135716438293, "learning_rate": 1.235240226950568e-07, "loss": 0.2636, "step": 19587 }, { "epoch": 2.808718095784342, "grad_norm": 0.2555239498615265, "learning_rate": 1.2333980160541493e-07, "loss": 0.2728, "step": 19588 }, { "epoch": 2.808861485517637, "grad_norm": 0.27296140789985657, "learning_rate": 1.2315571627449174e-07, "loss": 0.3028, "step": 19589 }, { "epoch": 2.809004875250932, "grad_norm": 0.2655394971370697, "learning_rate": 1.2297176670741094e-07, "loss": 0.2907, "step": 19590 }, { "epoch": 2.809148264984227, "grad_norm": 0.28287097811698914, "learning_rate": 1.2278795290929457e-07, "loss": 0.3088, "step": 19591 }, { "epoch": 2.809291654717522, "grad_norm": 0.28261232376098633, "learning_rate": 1.226042748852585e-07, "loss": 0.2987, "step": 19592 }, { "epoch": 2.8094350444508174, "grad_norm": 0.25998929142951965, "learning_rate": 1.2242073264041698e-07, "loss": 0.2611, "step": 19593 }, { "epoch": 2.8095784341841123, "grad_norm": 0.27625972032546997, "learning_rate": 1.2223732617987873e-07, "loss": 0.3105, "step": 19594 }, { "epoch": 2.8097218239174078, "grad_norm": 0.25442036986351013, "learning_rate": 1.220540555087496e-07, "loss": 0.2663, "step": 19595 }, { "epoch": 2.8098652136507027, "grad_norm": 0.26473256945610046, "learning_rate": 1.218709206321328e-07, "loss": 0.2917, "step": 19596 }, { "epoch": 2.8100086033839977, "grad_norm": 0.2728408873081207, "learning_rate": 1.2168792155512533e-07, "loss": 0.2896, "step": 19597 }, { "epoch": 2.8101519931172927, "grad_norm": 0.2659492790699005, "learning_rate": 1.2150505828282144e-07, "loss": 0.2883, "step": 19598 }, { "epoch": 2.8102953828505877, "grad_norm": 0.2537470757961273, "learning_rate": 1.2132233082031264e-07, "loss": 0.281, "step": 19599 }, { "epoch": 2.810438772583883, "grad_norm": 0.27807173132896423, "learning_rate": 1.2113973917268484e-07, "loss": 0.2872, "step": 19600 }, { "epoch": 2.810582162317178, "grad_norm": 0.27469074726104736, "learning_rate": 1.2095728334502178e-07, "loss": 0.2658, "step": 19601 }, { "epoch": 2.810725552050473, "grad_norm": 0.2487054467201233, "learning_rate": 1.2077496334240269e-07, "loss": 0.2877, "step": 19602 }, { "epoch": 2.8108689417837684, "grad_norm": 0.27435103058815, "learning_rate": 1.2059277916990297e-07, "loss": 0.2754, "step": 19603 }, { "epoch": 2.8110123315170634, "grad_norm": 0.27419278025627136, "learning_rate": 1.2041073083259414e-07, "loss": 0.2683, "step": 19604 }, { "epoch": 2.8111557212503584, "grad_norm": 0.2538985013961792, "learning_rate": 1.2022881833554433e-07, "loss": 0.2805, "step": 19605 }, { "epoch": 2.8112991109836534, "grad_norm": 0.259035587310791, "learning_rate": 1.2004704168381786e-07, "loss": 0.2644, "step": 19606 }, { "epoch": 2.811442500716949, "grad_norm": 0.27941209077835083, "learning_rate": 1.1986540088247568e-07, "loss": 0.2795, "step": 19607 }, { "epoch": 2.8115858904502438, "grad_norm": 0.27215850353240967, "learning_rate": 1.1968389593657314e-07, "loss": 0.2802, "step": 19608 }, { "epoch": 2.8117292801835387, "grad_norm": 0.25726914405822754, "learning_rate": 1.1950252685116349e-07, "loss": 0.2699, "step": 19609 }, { "epoch": 2.811872669916834, "grad_norm": 0.2540777325630188, "learning_rate": 1.1932129363129542e-07, "loss": 0.2842, "step": 19610 }, { "epoch": 2.812016059650129, "grad_norm": 0.2701942026615143, "learning_rate": 1.1914019628201545e-07, "loss": 0.2692, "step": 19611 }, { "epoch": 2.812159449383424, "grad_norm": 0.27260348200798035, "learning_rate": 1.1895923480836346e-07, "loss": 0.2872, "step": 19612 }, { "epoch": 2.812302839116719, "grad_norm": 0.2754881978034973, "learning_rate": 1.1877840921537875e-07, "loss": 0.2864, "step": 19613 }, { "epoch": 2.8124462288500145, "grad_norm": 0.27134475111961365, "learning_rate": 1.1859771950809451e-07, "loss": 0.2851, "step": 19614 }, { "epoch": 2.8125896185833095, "grad_norm": 0.27510082721710205, "learning_rate": 1.1841716569154005e-07, "loss": 0.2774, "step": 19615 }, { "epoch": 2.8127330083166044, "grad_norm": 0.26799535751342773, "learning_rate": 1.1823674777074246e-07, "loss": 0.2713, "step": 19616 }, { "epoch": 2.8128763980499, "grad_norm": 0.2606816291809082, "learning_rate": 1.1805646575072438e-07, "loss": 0.2692, "step": 19617 }, { "epoch": 2.813019787783195, "grad_norm": 0.27948471903800964, "learning_rate": 1.1787631963650514e-07, "loss": 0.283, "step": 19618 }, { "epoch": 2.81316317751649, "grad_norm": 0.2613731324672699, "learning_rate": 1.1769630943309796e-07, "loss": 0.269, "step": 19619 }, { "epoch": 2.813306567249785, "grad_norm": 0.2688257098197937, "learning_rate": 1.1751643514551548e-07, "loss": 0.2924, "step": 19620 }, { "epoch": 2.8134499569830798, "grad_norm": 0.24588806927204132, "learning_rate": 1.173366967787648e-07, "loss": 0.2761, "step": 19621 }, { "epoch": 2.813593346716375, "grad_norm": 0.2564227283000946, "learning_rate": 1.1715709433784972e-07, "loss": 0.2897, "step": 19622 }, { "epoch": 2.81373673644967, "grad_norm": 0.26531994342803955, "learning_rate": 1.1697762782777011e-07, "loss": 0.2689, "step": 19623 }, { "epoch": 2.8138801261829656, "grad_norm": 0.25140830874443054, "learning_rate": 1.1679829725352142e-07, "loss": 0.3039, "step": 19624 }, { "epoch": 2.8140235159162605, "grad_norm": 0.2614489197731018, "learning_rate": 1.1661910262009635e-07, "loss": 0.2799, "step": 19625 }, { "epoch": 2.8141669056495555, "grad_norm": 0.2777422070503235, "learning_rate": 1.1644004393248364e-07, "loss": 0.2996, "step": 19626 }, { "epoch": 2.8143102953828505, "grad_norm": 0.2556975483894348, "learning_rate": 1.1626112119566768e-07, "loss": 0.2997, "step": 19627 }, { "epoch": 2.8144536851161455, "grad_norm": 0.2774835228919983, "learning_rate": 1.1608233441462946e-07, "loss": 0.2714, "step": 19628 }, { "epoch": 2.814597074849441, "grad_norm": 0.2974098324775696, "learning_rate": 1.1590368359434667e-07, "loss": 0.2963, "step": 19629 }, { "epoch": 2.814740464582736, "grad_norm": 0.2678631544113159, "learning_rate": 1.1572516873979146e-07, "loss": 0.2751, "step": 19630 }, { "epoch": 2.814883854316031, "grad_norm": 0.26562246680259705, "learning_rate": 1.1554678985593481e-07, "loss": 0.2707, "step": 19631 }, { "epoch": 2.8150272440493262, "grad_norm": 0.2817193567752838, "learning_rate": 1.1536854694774113e-07, "loss": 0.2982, "step": 19632 }, { "epoch": 2.8151706337826212, "grad_norm": 0.2831043601036072, "learning_rate": 1.1519044002017366e-07, "loss": 0.2761, "step": 19633 }, { "epoch": 2.815314023515916, "grad_norm": 0.2630061209201813, "learning_rate": 1.1501246907819008e-07, "loss": 0.2827, "step": 19634 }, { "epoch": 2.815457413249211, "grad_norm": 0.2595553398132324, "learning_rate": 1.1483463412674479e-07, "loss": 0.2797, "step": 19635 }, { "epoch": 2.8156008029825066, "grad_norm": 0.25977587699890137, "learning_rate": 1.1465693517078824e-07, "loss": 0.2653, "step": 19636 }, { "epoch": 2.8157441927158016, "grad_norm": 0.2694670855998993, "learning_rate": 1.1447937221526762e-07, "loss": 0.2598, "step": 19637 }, { "epoch": 2.8158875824490965, "grad_norm": 0.2609173357486725, "learning_rate": 1.1430194526512561e-07, "loss": 0.2839, "step": 19638 }, { "epoch": 2.816030972182392, "grad_norm": 0.27083820104599, "learning_rate": 1.1412465432530217e-07, "loss": 0.3136, "step": 19639 }, { "epoch": 2.816174361915687, "grad_norm": 0.26686891913414, "learning_rate": 1.139474994007328e-07, "loss": 0.2765, "step": 19640 }, { "epoch": 2.816317751648982, "grad_norm": 0.24458913505077362, "learning_rate": 1.1377048049634854e-07, "loss": 0.2718, "step": 19641 }, { "epoch": 2.816461141382277, "grad_norm": 0.2833731770515442, "learning_rate": 1.1359359761707766e-07, "loss": 0.3014, "step": 19642 }, { "epoch": 2.816604531115572, "grad_norm": 0.2690969705581665, "learning_rate": 1.1341685076784404e-07, "loss": 0.2784, "step": 19643 }, { "epoch": 2.8167479208488673, "grad_norm": 0.2645922303199768, "learning_rate": 1.132402399535687e-07, "loss": 0.2614, "step": 19644 }, { "epoch": 2.8168913105821622, "grad_norm": 0.25109320878982544, "learning_rate": 1.1306376517916828e-07, "loss": 0.2828, "step": 19645 }, { "epoch": 2.8170347003154577, "grad_norm": 0.25974348187446594, "learning_rate": 1.1288742644955497e-07, "loss": 0.2898, "step": 19646 }, { "epoch": 2.8171780900487526, "grad_norm": 0.25950101017951965, "learning_rate": 1.1271122376963706e-07, "loss": 0.276, "step": 19647 }, { "epoch": 2.8173214797820476, "grad_norm": 0.2628796398639679, "learning_rate": 1.1253515714432117e-07, "loss": 0.2882, "step": 19648 }, { "epoch": 2.8174648695153426, "grad_norm": 0.273872435092926, "learning_rate": 1.123592265785084e-07, "loss": 0.275, "step": 19649 }, { "epoch": 2.8176082592486376, "grad_norm": 0.26661574840545654, "learning_rate": 1.1218343207709647e-07, "loss": 0.2908, "step": 19650 }, { "epoch": 2.817751648981933, "grad_norm": 0.2582162320613861, "learning_rate": 1.1200777364497816e-07, "loss": 0.2722, "step": 19651 }, { "epoch": 2.817895038715228, "grad_norm": 0.28806617856025696, "learning_rate": 1.1183225128704455e-07, "loss": 0.2759, "step": 19652 }, { "epoch": 2.818038428448523, "grad_norm": 0.28296583890914917, "learning_rate": 1.1165686500818118e-07, "loss": 0.2745, "step": 19653 }, { "epoch": 2.8181818181818183, "grad_norm": 0.26733067631721497, "learning_rate": 1.1148161481327136e-07, "loss": 0.2814, "step": 19654 }, { "epoch": 2.8183252079151133, "grad_norm": 0.2595284879207611, "learning_rate": 1.1130650070719395e-07, "loss": 0.2897, "step": 19655 }, { "epoch": 2.8184685976484083, "grad_norm": 0.27756479382514954, "learning_rate": 1.1113152269482285e-07, "loss": 0.2685, "step": 19656 }, { "epoch": 2.8186119873817033, "grad_norm": 0.2606181800365448, "learning_rate": 1.1095668078102917e-07, "loss": 0.2899, "step": 19657 }, { "epoch": 2.8187553771149987, "grad_norm": 0.2617108225822449, "learning_rate": 1.1078197497068178e-07, "loss": 0.2811, "step": 19658 }, { "epoch": 2.8188987668482937, "grad_norm": 0.2779790759086609, "learning_rate": 1.1060740526864233e-07, "loss": 0.254, "step": 19659 }, { "epoch": 2.8190421565815886, "grad_norm": 0.2501944899559021, "learning_rate": 1.1043297167977197e-07, "loss": 0.2888, "step": 19660 }, { "epoch": 2.819185546314884, "grad_norm": 0.25478851795196533, "learning_rate": 1.1025867420892623e-07, "loss": 0.2913, "step": 19661 }, { "epoch": 2.819328936048179, "grad_norm": 0.2720545530319214, "learning_rate": 1.1008451286095678e-07, "loss": 0.2876, "step": 19662 }, { "epoch": 2.819472325781474, "grad_norm": 0.27012377977371216, "learning_rate": 1.0991048764071255e-07, "loss": 0.3059, "step": 19663 }, { "epoch": 2.819615715514769, "grad_norm": 0.26027363538742065, "learning_rate": 1.0973659855303742e-07, "loss": 0.2731, "step": 19664 }, { "epoch": 2.8197591052480644, "grad_norm": 0.26995372772216797, "learning_rate": 1.0956284560277364e-07, "loss": 0.2722, "step": 19665 }, { "epoch": 2.8199024949813594, "grad_norm": 0.2597852051258087, "learning_rate": 1.0938922879475677e-07, "loss": 0.2913, "step": 19666 }, { "epoch": 2.8200458847146543, "grad_norm": 0.2535989582538605, "learning_rate": 1.092157481338213e-07, "loss": 0.2645, "step": 19667 }, { "epoch": 2.8201892744479498, "grad_norm": 0.2827962636947632, "learning_rate": 1.0904240362479502e-07, "loss": 0.2734, "step": 19668 }, { "epoch": 2.8203326641812447, "grad_norm": 0.27372127771377563, "learning_rate": 1.0886919527250516e-07, "loss": 0.2979, "step": 19669 }, { "epoch": 2.8204760539145397, "grad_norm": 0.25743168592453003, "learning_rate": 1.0869612308177235e-07, "loss": 0.2793, "step": 19670 }, { "epoch": 2.8206194436478347, "grad_norm": 0.2709880769252777, "learning_rate": 1.085231870574155e-07, "loss": 0.2724, "step": 19671 }, { "epoch": 2.8207628333811297, "grad_norm": 0.2658946216106415, "learning_rate": 1.0835038720424962e-07, "loss": 0.2747, "step": 19672 }, { "epoch": 2.820906223114425, "grad_norm": 0.27294501662254333, "learning_rate": 1.0817772352708312e-07, "loss": 0.2722, "step": 19673 }, { "epoch": 2.82104961284772, "grad_norm": 0.2824825942516327, "learning_rate": 1.080051960307238e-07, "loss": 0.2846, "step": 19674 }, { "epoch": 2.8211930025810155, "grad_norm": 0.26662740111351013, "learning_rate": 1.0783280471997449e-07, "loss": 0.2722, "step": 19675 }, { "epoch": 2.8213363923143104, "grad_norm": 0.24882148206233978, "learning_rate": 1.0766054959963412e-07, "loss": 0.2899, "step": 19676 }, { "epoch": 2.8214797820476054, "grad_norm": 0.26501137018203735, "learning_rate": 1.0748843067449887e-07, "loss": 0.2922, "step": 19677 }, { "epoch": 2.8216231717809004, "grad_norm": 0.2633260488510132, "learning_rate": 1.0731644794935936e-07, "loss": 0.2871, "step": 19678 }, { "epoch": 2.8217665615141954, "grad_norm": 0.2748996615409851, "learning_rate": 1.071446014290034e-07, "loss": 0.2776, "step": 19679 }, { "epoch": 2.8219099512474908, "grad_norm": 0.2797144055366516, "learning_rate": 1.0697289111821552e-07, "loss": 0.2743, "step": 19680 }, { "epoch": 2.8220533409807858, "grad_norm": 0.2636955678462982, "learning_rate": 1.0680131702177521e-07, "loss": 0.2703, "step": 19681 }, { "epoch": 2.8221967307140807, "grad_norm": 0.28659573197364807, "learning_rate": 1.0662987914445866e-07, "loss": 0.2602, "step": 19682 }, { "epoch": 2.822340120447376, "grad_norm": 0.2792588770389557, "learning_rate": 1.0645857749103982e-07, "loss": 0.2732, "step": 19683 }, { "epoch": 2.822483510180671, "grad_norm": 0.2785850763320923, "learning_rate": 1.0628741206628601e-07, "loss": 0.2987, "step": 19684 }, { "epoch": 2.822626899913966, "grad_norm": 0.2620376944541931, "learning_rate": 1.0611638287496284e-07, "loss": 0.2754, "step": 19685 }, { "epoch": 2.822770289647261, "grad_norm": 0.26794636249542236, "learning_rate": 1.0594548992183096e-07, "loss": 0.2801, "step": 19686 }, { "epoch": 2.8229136793805565, "grad_norm": 0.25987350940704346, "learning_rate": 1.0577473321164822e-07, "loss": 0.2768, "step": 19687 }, { "epoch": 2.8230570691138515, "grad_norm": 0.27062520384788513, "learning_rate": 1.0560411274916915e-07, "loss": 0.2789, "step": 19688 }, { "epoch": 2.8232004588471464, "grad_norm": 0.25933197140693665, "learning_rate": 1.0543362853914164e-07, "loss": 0.2624, "step": 19689 }, { "epoch": 2.823343848580442, "grad_norm": 0.2598872482776642, "learning_rate": 1.0526328058631297e-07, "loss": 0.2651, "step": 19690 }, { "epoch": 2.823487238313737, "grad_norm": 0.27900901436805725, "learning_rate": 1.0509306889542493e-07, "loss": 0.2828, "step": 19691 }, { "epoch": 2.823630628047032, "grad_norm": 0.2524911165237427, "learning_rate": 1.0492299347121592e-07, "loss": 0.2772, "step": 19692 }, { "epoch": 2.8237740177803268, "grad_norm": 0.2651576101779938, "learning_rate": 1.0475305431842054e-07, "loss": 0.2715, "step": 19693 }, { "epoch": 2.823917407513622, "grad_norm": 0.26878082752227783, "learning_rate": 1.0458325144176996e-07, "loss": 0.2619, "step": 19694 }, { "epoch": 2.824060797246917, "grad_norm": 0.2752624452114105, "learning_rate": 1.0441358484599152e-07, "loss": 0.2696, "step": 19695 }, { "epoch": 2.824204186980212, "grad_norm": 0.2932940125465393, "learning_rate": 1.0424405453580755e-07, "loss": 0.2782, "step": 19696 }, { "epoch": 2.8243475767135076, "grad_norm": 0.29219523072242737, "learning_rate": 1.0407466051593873e-07, "loss": 0.2752, "step": 19697 }, { "epoch": 2.8244909664468025, "grad_norm": 0.2805224657058716, "learning_rate": 1.0390540279109907e-07, "loss": 0.28, "step": 19698 }, { "epoch": 2.8246343561800975, "grad_norm": 0.2863211929798126, "learning_rate": 1.0373628136600255e-07, "loss": 0.2735, "step": 19699 }, { "epoch": 2.8247777459133925, "grad_norm": 0.2874003052711487, "learning_rate": 1.0356729624535545e-07, "loss": 0.2862, "step": 19700 }, { "epoch": 2.8249211356466875, "grad_norm": 0.2676335871219635, "learning_rate": 1.0339844743386285e-07, "loss": 0.2708, "step": 19701 }, { "epoch": 2.825064525379983, "grad_norm": 0.25757545232772827, "learning_rate": 1.032297349362249e-07, "loss": 0.2857, "step": 19702 }, { "epoch": 2.825207915113278, "grad_norm": 0.25808557868003845, "learning_rate": 1.030611587571384e-07, "loss": 0.2868, "step": 19703 }, { "epoch": 2.825351304846573, "grad_norm": 0.2537895441055298, "learning_rate": 1.0289271890129626e-07, "loss": 0.287, "step": 19704 }, { "epoch": 2.8254946945798682, "grad_norm": 0.2551862895488739, "learning_rate": 1.0272441537338807e-07, "loss": 0.2828, "step": 19705 }, { "epoch": 2.825638084313163, "grad_norm": 0.2655159533023834, "learning_rate": 1.0255624817809896e-07, "loss": 0.2773, "step": 19706 }, { "epoch": 2.825781474046458, "grad_norm": 0.2634686827659607, "learning_rate": 1.0238821732010962e-07, "loss": 0.2727, "step": 19707 }, { "epoch": 2.825924863779753, "grad_norm": 0.26246050000190735, "learning_rate": 1.0222032280409854e-07, "loss": 0.269, "step": 19708 }, { "epoch": 2.8260682535130486, "grad_norm": 0.27982476353645325, "learning_rate": 1.0205256463473923e-07, "loss": 0.2635, "step": 19709 }, { "epoch": 2.8262116432463436, "grad_norm": 0.27957987785339355, "learning_rate": 1.0188494281670236e-07, "loss": 0.2689, "step": 19710 }, { "epoch": 2.8263550329796385, "grad_norm": 0.273551881313324, "learning_rate": 1.0171745735465366e-07, "loss": 0.2695, "step": 19711 }, { "epoch": 2.826498422712934, "grad_norm": 0.27046287059783936, "learning_rate": 1.0155010825325606e-07, "loss": 0.2719, "step": 19712 }, { "epoch": 2.826641812446229, "grad_norm": 0.27202075719833374, "learning_rate": 1.0138289551716862e-07, "loss": 0.2656, "step": 19713 }, { "epoch": 2.826785202179524, "grad_norm": 0.27100491523742676, "learning_rate": 1.0121581915104539e-07, "loss": 0.2812, "step": 19714 }, { "epoch": 2.826928591912819, "grad_norm": 0.2806270122528076, "learning_rate": 1.0104887915953875e-07, "loss": 0.2894, "step": 19715 }, { "epoch": 2.8270719816461143, "grad_norm": 0.27191221714019775, "learning_rate": 1.0088207554729501e-07, "loss": 0.298, "step": 19716 }, { "epoch": 2.8272153713794093, "grad_norm": 0.26146435737609863, "learning_rate": 1.0071540831895765e-07, "loss": 0.3042, "step": 19717 }, { "epoch": 2.8273587611127042, "grad_norm": 0.27383655309677124, "learning_rate": 1.0054887747916686e-07, "loss": 0.2865, "step": 19718 }, { "epoch": 2.8275021508459997, "grad_norm": 0.27222272753715515, "learning_rate": 1.0038248303255892e-07, "loss": 0.2855, "step": 19719 }, { "epoch": 2.8276455405792946, "grad_norm": 0.2753339409828186, "learning_rate": 1.0021622498376516e-07, "loss": 0.2707, "step": 19720 }, { "epoch": 2.8277889303125896, "grad_norm": 0.2714104950428009, "learning_rate": 1.0005010333741517e-07, "loss": 0.2764, "step": 19721 }, { "epoch": 2.8279323200458846, "grad_norm": 0.2629638612270355, "learning_rate": 9.988411809813247e-08, "loss": 0.2834, "step": 19722 }, { "epoch": 2.8280757097791795, "grad_norm": 0.25232383608818054, "learning_rate": 9.971826927053785e-08, "loss": 0.2943, "step": 19723 }, { "epoch": 2.828219099512475, "grad_norm": 0.24519221484661102, "learning_rate": 9.955255685924869e-08, "loss": 0.2783, "step": 19724 }, { "epoch": 2.82836248924577, "grad_norm": 0.26467856764793396, "learning_rate": 9.938698086887799e-08, "loss": 0.273, "step": 19725 }, { "epoch": 2.8285058789790654, "grad_norm": 0.25977805256843567, "learning_rate": 9.922154130403539e-08, "loss": 0.2794, "step": 19726 }, { "epoch": 2.8286492687123603, "grad_norm": 0.2733069360256195, "learning_rate": 9.905623816932608e-08, "loss": 0.288, "step": 19727 }, { "epoch": 2.8287926584456553, "grad_norm": 0.2772485315799713, "learning_rate": 9.889107146935196e-08, "loss": 0.2734, "step": 19728 }, { "epoch": 2.8289360481789503, "grad_norm": 0.2823554277420044, "learning_rate": 9.872604120871155e-08, "loss": 0.2926, "step": 19729 }, { "epoch": 2.8290794379122453, "grad_norm": 0.2633049488067627, "learning_rate": 9.856114739199784e-08, "loss": 0.2786, "step": 19730 }, { "epoch": 2.8292228276455407, "grad_norm": 0.27591672539711, "learning_rate": 9.83963900238022e-08, "loss": 0.3026, "step": 19731 }, { "epoch": 2.8293662173788356, "grad_norm": 0.251102089881897, "learning_rate": 9.823176910871146e-08, "loss": 0.2941, "step": 19732 }, { "epoch": 2.8295096071121306, "grad_norm": 0.25060757994651794, "learning_rate": 9.806728465130755e-08, "loss": 0.2683, "step": 19733 }, { "epoch": 2.829652996845426, "grad_norm": 0.2664586305618286, "learning_rate": 9.790293665616956e-08, "loss": 0.2771, "step": 19734 }, { "epoch": 2.829796386578721, "grad_norm": 0.27001363039016724, "learning_rate": 9.77387251278733e-08, "loss": 0.2831, "step": 19735 }, { "epoch": 2.829939776312016, "grad_norm": 0.2666347622871399, "learning_rate": 9.75746500709901e-08, "loss": 0.2755, "step": 19736 }, { "epoch": 2.830083166045311, "grad_norm": 0.26200002431869507, "learning_rate": 9.741071149008741e-08, "loss": 0.2808, "step": 19737 }, { "epoch": 2.8302265557786064, "grad_norm": 0.2623433768749237, "learning_rate": 9.724690938972825e-08, "loss": 0.287, "step": 19738 }, { "epoch": 2.8303699455119014, "grad_norm": 0.2787465453147888, "learning_rate": 9.708324377447342e-08, "loss": 0.2704, "step": 19739 }, { "epoch": 2.8305133352451963, "grad_norm": 0.26386314630508423, "learning_rate": 9.691971464887928e-08, "loss": 0.2858, "step": 19740 }, { "epoch": 2.8306567249784917, "grad_norm": 0.2617809772491455, "learning_rate": 9.675632201749774e-08, "loss": 0.2828, "step": 19741 }, { "epoch": 2.8308001147117867, "grad_norm": 0.25928443670272827, "learning_rate": 9.659306588487794e-08, "loss": 0.279, "step": 19742 }, { "epoch": 2.8309435044450817, "grad_norm": 0.2804149389266968, "learning_rate": 9.642994625556401e-08, "loss": 0.2874, "step": 19743 }, { "epoch": 2.8310868941783767, "grad_norm": 0.25629737973213196, "learning_rate": 9.626696313409734e-08, "loss": 0.2919, "step": 19744 }, { "epoch": 2.831230283911672, "grad_norm": 0.27220773696899414, "learning_rate": 9.610411652501484e-08, "loss": 0.289, "step": 19745 }, { "epoch": 2.831373673644967, "grad_norm": 0.26460832357406616, "learning_rate": 9.594140643285066e-08, "loss": 0.2912, "step": 19746 }, { "epoch": 2.831517063378262, "grad_norm": 0.2553250193595886, "learning_rate": 9.577883286213341e-08, "loss": 0.2791, "step": 19747 }, { "epoch": 2.8316604531115575, "grad_norm": 0.25358766317367554, "learning_rate": 9.561639581738946e-08, "loss": 0.2852, "step": 19748 }, { "epoch": 2.8318038428448524, "grad_norm": 0.2515426278114319, "learning_rate": 9.545409530314076e-08, "loss": 0.2863, "step": 19749 }, { "epoch": 2.8319472325781474, "grad_norm": 0.2717718482017517, "learning_rate": 9.529193132390535e-08, "loss": 0.3023, "step": 19750 }, { "epoch": 2.8320906223114424, "grad_norm": 0.2879253029823303, "learning_rate": 9.51299038841974e-08, "loss": 0.269, "step": 19751 }, { "epoch": 2.8322340120447373, "grad_norm": 0.26895564794540405, "learning_rate": 9.496801298852832e-08, "loss": 0.284, "step": 19752 }, { "epoch": 2.8323774017780328, "grad_norm": 0.2723371088504791, "learning_rate": 9.480625864140447e-08, "loss": 0.2623, "step": 19753 }, { "epoch": 2.8325207915113277, "grad_norm": 0.26980823278427124, "learning_rate": 9.464464084732838e-08, "loss": 0.2845, "step": 19754 }, { "epoch": 2.832664181244623, "grad_norm": 0.25843748450279236, "learning_rate": 9.44831596107998e-08, "loss": 0.2873, "step": 19755 }, { "epoch": 2.832807570977918, "grad_norm": 0.2660541236400604, "learning_rate": 9.432181493631398e-08, "loss": 0.282, "step": 19756 }, { "epoch": 2.832950960711213, "grad_norm": 0.26501232385635376, "learning_rate": 9.416060682836292e-08, "loss": 0.263, "step": 19757 }, { "epoch": 2.833094350444508, "grad_norm": 0.26293736696243286, "learning_rate": 9.399953529143302e-08, "loss": 0.2756, "step": 19758 }, { "epoch": 2.833237740177803, "grad_norm": 0.2694896459579468, "learning_rate": 9.383860033001013e-08, "loss": 0.2941, "step": 19759 }, { "epoch": 2.8333811299110985, "grad_norm": 0.28857964277267456, "learning_rate": 9.367780194857345e-08, "loss": 0.2791, "step": 19760 }, { "epoch": 2.8335245196443934, "grad_norm": 0.27326953411102295, "learning_rate": 9.351714015159885e-08, "loss": 0.2824, "step": 19761 }, { "epoch": 2.8336679093776884, "grad_norm": 0.2624710500240326, "learning_rate": 9.335661494355997e-08, "loss": 0.2719, "step": 19762 }, { "epoch": 2.833811299110984, "grad_norm": 0.25581398606300354, "learning_rate": 9.319622632892433e-08, "loss": 0.2706, "step": 19763 }, { "epoch": 2.833954688844279, "grad_norm": 0.25308406352996826, "learning_rate": 9.303597431215894e-08, "loss": 0.2871, "step": 19764 }, { "epoch": 2.834098078577574, "grad_norm": 0.25976067781448364, "learning_rate": 9.287585889772243e-08, "loss": 0.2745, "step": 19765 }, { "epoch": 2.8342414683108688, "grad_norm": 0.26409175992012024, "learning_rate": 9.271588009007404e-08, "loss": 0.2923, "step": 19766 }, { "epoch": 2.834384858044164, "grad_norm": 0.2867307960987091, "learning_rate": 9.25560378936663e-08, "loss": 0.2953, "step": 19767 }, { "epoch": 2.834528247777459, "grad_norm": 0.26591405272483826, "learning_rate": 9.239633231295009e-08, "loss": 0.2703, "step": 19768 }, { "epoch": 2.834671637510754, "grad_norm": 0.2585049569606781, "learning_rate": 9.223676335237076e-08, "loss": 0.2887, "step": 19769 }, { "epoch": 2.8348150272440495, "grad_norm": 0.27188122272491455, "learning_rate": 9.207733101636973e-08, "loss": 0.2863, "step": 19770 }, { "epoch": 2.8349584169773445, "grad_norm": 0.2648644745349884, "learning_rate": 9.191803530938626e-08, "loss": 0.2825, "step": 19771 }, { "epoch": 2.8351018067106395, "grad_norm": 0.29615867137908936, "learning_rate": 9.175887623585456e-08, "loss": 0.2804, "step": 19772 }, { "epoch": 2.8352451964439345, "grad_norm": 0.2761286497116089, "learning_rate": 9.159985380020553e-08, "loss": 0.2797, "step": 19773 }, { "epoch": 2.8353885861772294, "grad_norm": 0.25959232449531555, "learning_rate": 9.144096800686563e-08, "loss": 0.2809, "step": 19774 }, { "epoch": 2.835531975910525, "grad_norm": 0.26208579540252686, "learning_rate": 9.128221886025968e-08, "loss": 0.2887, "step": 19775 }, { "epoch": 2.83567536564382, "grad_norm": 0.27941158413887024, "learning_rate": 9.112360636480467e-08, "loss": 0.2869, "step": 19776 }, { "epoch": 2.8358187553771153, "grad_norm": 0.2762361168861389, "learning_rate": 9.096513052491707e-08, "loss": 0.2683, "step": 19777 }, { "epoch": 2.8359621451104102, "grad_norm": 0.2565026581287384, "learning_rate": 9.080679134500892e-08, "loss": 0.2922, "step": 19778 }, { "epoch": 2.836105534843705, "grad_norm": 0.2866232097148895, "learning_rate": 9.064858882948835e-08, "loss": 0.2637, "step": 19779 }, { "epoch": 2.836248924577, "grad_norm": 0.27111056447029114, "learning_rate": 9.049052298275907e-08, "loss": 0.2791, "step": 19780 }, { "epoch": 2.836392314310295, "grad_norm": 0.2795124351978302, "learning_rate": 9.033259380922088e-08, "loss": 0.2755, "step": 19781 }, { "epoch": 2.8365357040435906, "grad_norm": 0.26947423815727234, "learning_rate": 9.017480131327138e-08, "loss": 0.2798, "step": 19782 }, { "epoch": 2.8366790937768855, "grad_norm": 0.25582849979400635, "learning_rate": 9.001714549930207e-08, "loss": 0.3022, "step": 19783 }, { "epoch": 2.8368224835101805, "grad_norm": 0.26061150431632996, "learning_rate": 8.985962637170276e-08, "loss": 0.3027, "step": 19784 }, { "epoch": 2.836965873243476, "grad_norm": 0.28061074018478394, "learning_rate": 8.970224393485827e-08, "loss": 0.2654, "step": 19785 }, { "epoch": 2.837109262976771, "grad_norm": 0.2653280794620514, "learning_rate": 8.954499819314955e-08, "loss": 0.2743, "step": 19786 }, { "epoch": 2.837252652710066, "grad_norm": 0.25807061791419983, "learning_rate": 8.938788915095476e-08, "loss": 0.2821, "step": 19787 }, { "epoch": 2.837396042443361, "grad_norm": 0.2845047414302826, "learning_rate": 8.92309168126465e-08, "loss": 0.2633, "step": 19788 }, { "epoch": 2.8375394321766563, "grad_norm": 0.25271356105804443, "learning_rate": 8.90740811825963e-08, "loss": 0.272, "step": 19789 }, { "epoch": 2.8376828219099512, "grad_norm": 0.2707446217536926, "learning_rate": 8.891738226516844e-08, "loss": 0.2762, "step": 19790 }, { "epoch": 2.837826211643246, "grad_norm": 0.2626059949398041, "learning_rate": 8.876082006472719e-08, "loss": 0.2698, "step": 19791 }, { "epoch": 2.8379696013765416, "grad_norm": 0.2747134864330292, "learning_rate": 8.860439458562852e-08, "loss": 0.2902, "step": 19792 }, { "epoch": 2.8381129911098366, "grad_norm": 0.2755906879901886, "learning_rate": 8.844810583222896e-08, "loss": 0.2837, "step": 19793 }, { "epoch": 2.8382563808431316, "grad_norm": 0.2598196864128113, "learning_rate": 8.82919538088789e-08, "loss": 0.2745, "step": 19794 }, { "epoch": 2.8383997705764266, "grad_norm": 0.24680103361606598, "learning_rate": 8.813593851992485e-08, "loss": 0.2827, "step": 19795 }, { "epoch": 2.838543160309722, "grad_norm": 0.2682311534881592, "learning_rate": 8.79800599697106e-08, "loss": 0.28, "step": 19796 }, { "epoch": 2.838686550043017, "grad_norm": 0.26852408051490784, "learning_rate": 8.782431816257542e-08, "loss": 0.2663, "step": 19797 }, { "epoch": 2.838829939776312, "grad_norm": 0.2774624526500702, "learning_rate": 8.766871310285474e-08, "loss": 0.3112, "step": 19798 }, { "epoch": 2.8389733295096073, "grad_norm": 0.2542845904827118, "learning_rate": 8.751324479488066e-08, "loss": 0.2859, "step": 19799 }, { "epoch": 2.8391167192429023, "grad_norm": 0.2702324390411377, "learning_rate": 8.735791324298082e-08, "loss": 0.2889, "step": 19800 }, { "epoch": 2.8392601089761973, "grad_norm": 0.28432101011276245, "learning_rate": 8.720271845147955e-08, "loss": 0.2928, "step": 19801 }, { "epoch": 2.8394034987094923, "grad_norm": 0.27598366141319275, "learning_rate": 8.704766042469726e-08, "loss": 0.2675, "step": 19802 }, { "epoch": 2.8395468884427872, "grad_norm": 0.29522860050201416, "learning_rate": 8.689273916695107e-08, "loss": 0.2952, "step": 19803 }, { "epoch": 2.8396902781760827, "grad_norm": 0.2873147130012512, "learning_rate": 8.673795468255253e-08, "loss": 0.2796, "step": 19804 }, { "epoch": 2.8398336679093776, "grad_norm": 0.2735750079154968, "learning_rate": 8.658330697581208e-08, "loss": 0.2786, "step": 19805 }, { "epoch": 2.839977057642673, "grad_norm": 0.2841363847255707, "learning_rate": 8.64287960510335e-08, "loss": 0.2884, "step": 19806 }, { "epoch": 2.840120447375968, "grad_norm": 0.25999805331230164, "learning_rate": 8.627442191251945e-08, "loss": 0.2814, "step": 19807 }, { "epoch": 2.840263837109263, "grad_norm": 0.27748382091522217, "learning_rate": 8.612018456456595e-08, "loss": 0.3099, "step": 19808 }, { "epoch": 2.840407226842558, "grad_norm": 0.25050482153892517, "learning_rate": 8.596608401146789e-08, "loss": 0.2636, "step": 19809 }, { "epoch": 2.840550616575853, "grad_norm": 0.26642173528671265, "learning_rate": 8.581212025751518e-08, "loss": 0.2762, "step": 19810 }, { "epoch": 2.8406940063091484, "grad_norm": 0.2670290470123291, "learning_rate": 8.565829330699326e-08, "loss": 0.2663, "step": 19811 }, { "epoch": 2.8408373960424433, "grad_norm": 0.26188281178474426, "learning_rate": 8.550460316418486e-08, "loss": 0.2812, "step": 19812 }, { "epoch": 2.8409807857757383, "grad_norm": 0.276753306388855, "learning_rate": 8.535104983336873e-08, "loss": 0.2919, "step": 19813 }, { "epoch": 2.8411241755090337, "grad_norm": 0.27323707938194275, "learning_rate": 8.51976333188187e-08, "loss": 0.2894, "step": 19814 }, { "epoch": 2.8412675652423287, "grad_norm": 0.27024251222610474, "learning_rate": 8.504435362480634e-08, "loss": 0.2757, "step": 19815 }, { "epoch": 2.8414109549756237, "grad_norm": 0.28153687715530396, "learning_rate": 8.489121075559825e-08, "loss": 0.2832, "step": 19816 }, { "epoch": 2.8415543447089187, "grad_norm": 0.2603842616081238, "learning_rate": 8.473820471545824e-08, "loss": 0.2897, "step": 19817 }, { "epoch": 2.841697734442214, "grad_norm": 0.27804285287857056, "learning_rate": 8.458533550864623e-08, "loss": 0.2925, "step": 19818 }, { "epoch": 2.841841124175509, "grad_norm": 0.27043044567108154, "learning_rate": 8.44326031394166e-08, "loss": 0.2859, "step": 19819 }, { "epoch": 2.841984513908804, "grad_norm": 0.26432672142982483, "learning_rate": 8.428000761202148e-08, "loss": 0.2886, "step": 19820 }, { "epoch": 2.8421279036420994, "grad_norm": 0.27003970742225647, "learning_rate": 8.412754893070863e-08, "loss": 0.2805, "step": 19821 }, { "epoch": 2.8422712933753944, "grad_norm": 0.26082324981689453, "learning_rate": 8.39752270997235e-08, "loss": 0.2712, "step": 19822 }, { "epoch": 2.8424146831086894, "grad_norm": 0.2671222686767578, "learning_rate": 8.382304212330606e-08, "loss": 0.2846, "step": 19823 }, { "epoch": 2.8425580728419844, "grad_norm": 0.2679899036884308, "learning_rate": 8.367099400569234e-08, "loss": 0.2847, "step": 19824 }, { "epoch": 2.8427014625752793, "grad_norm": 0.26375019550323486, "learning_rate": 8.351908275111564e-08, "loss": 0.3058, "step": 19825 }, { "epoch": 2.8428448523085748, "grad_norm": 0.28060126304626465, "learning_rate": 8.336730836380425e-08, "loss": 0.2725, "step": 19826 }, { "epoch": 2.8429882420418697, "grad_norm": 0.27161645889282227, "learning_rate": 8.321567084798421e-08, "loss": 0.3037, "step": 19827 }, { "epoch": 2.843131631775165, "grad_norm": 0.2567465305328369, "learning_rate": 8.306417020787605e-08, "loss": 0.3, "step": 19828 }, { "epoch": 2.84327502150846, "grad_norm": 0.26514673233032227, "learning_rate": 8.291280644769805e-08, "loss": 0.2731, "step": 19829 }, { "epoch": 2.843418411241755, "grad_norm": 0.2747997045516968, "learning_rate": 8.276157957166353e-08, "loss": 0.2661, "step": 19830 }, { "epoch": 2.84356180097505, "grad_norm": 0.2704046070575714, "learning_rate": 8.261048958398243e-08, "loss": 0.2774, "step": 19831 }, { "epoch": 2.843705190708345, "grad_norm": 0.28705453872680664, "learning_rate": 8.245953648886141e-08, "loss": 0.2542, "step": 19832 }, { "epoch": 2.8438485804416405, "grad_norm": 0.2648771405220032, "learning_rate": 8.230872029050152e-08, "loss": 0.2682, "step": 19833 }, { "epoch": 2.8439919701749354, "grad_norm": 0.26347196102142334, "learning_rate": 8.215804099310276e-08, "loss": 0.2746, "step": 19834 }, { "epoch": 2.8441353599082304, "grad_norm": 0.2738102376461029, "learning_rate": 8.200749860085899e-08, "loss": 0.2746, "step": 19835 }, { "epoch": 2.844278749641526, "grad_norm": 0.2769353687763214, "learning_rate": 8.185709311796076e-08, "loss": 0.2873, "step": 19836 }, { "epoch": 2.844422139374821, "grad_norm": 0.2683756351470947, "learning_rate": 8.170682454859524e-08, "loss": 0.3061, "step": 19837 }, { "epoch": 2.8445655291081158, "grad_norm": 0.2606719434261322, "learning_rate": 8.155669289694689e-08, "loss": 0.2747, "step": 19838 }, { "epoch": 2.8447089188414107, "grad_norm": 0.2683185636997223, "learning_rate": 8.140669816719349e-08, "loss": 0.2847, "step": 19839 }, { "epoch": 2.844852308574706, "grad_norm": 0.2627221941947937, "learning_rate": 8.125684036351167e-08, "loss": 0.2985, "step": 19840 }, { "epoch": 2.844995698308001, "grad_norm": 0.26496538519859314, "learning_rate": 8.110711949007255e-08, "loss": 0.2588, "step": 19841 }, { "epoch": 2.845139088041296, "grad_norm": 0.25571808218955994, "learning_rate": 8.095753555104502e-08, "loss": 0.2944, "step": 19842 }, { "epoch": 2.8452824777745915, "grad_norm": 0.2691170573234558, "learning_rate": 8.080808855059297e-08, "loss": 0.2785, "step": 19843 }, { "epoch": 2.8454258675078865, "grad_norm": 0.25682610273361206, "learning_rate": 8.06587784928764e-08, "loss": 0.2504, "step": 19844 }, { "epoch": 2.8455692572411815, "grad_norm": 0.26974594593048096, "learning_rate": 8.0509605382052e-08, "loss": 0.285, "step": 19845 }, { "epoch": 2.8457126469744765, "grad_norm": 0.25805792212486267, "learning_rate": 8.036056922227254e-08, "loss": 0.2742, "step": 19846 }, { "epoch": 2.845856036707772, "grad_norm": 0.2655390501022339, "learning_rate": 8.021167001768749e-08, "loss": 0.2908, "step": 19847 }, { "epoch": 2.845999426441067, "grad_norm": 0.26297473907470703, "learning_rate": 8.006290777244075e-08, "loss": 0.2767, "step": 19848 }, { "epoch": 2.846142816174362, "grad_norm": 0.28292253613471985, "learning_rate": 7.991428249067457e-08, "loss": 0.2927, "step": 19849 }, { "epoch": 2.8462862059076572, "grad_norm": 0.27609583735466003, "learning_rate": 7.976579417652674e-08, "loss": 0.2787, "step": 19850 }, { "epoch": 2.846429595640952, "grad_norm": 0.26792165637016296, "learning_rate": 7.961744283413009e-08, "loss": 0.2808, "step": 19851 }, { "epoch": 2.846572985374247, "grad_norm": 0.2899878919124603, "learning_rate": 7.946922846761518e-08, "loss": 0.2838, "step": 19852 }, { "epoch": 2.846716375107542, "grad_norm": 0.255310595035553, "learning_rate": 7.93211510811076e-08, "loss": 0.2764, "step": 19853 }, { "epoch": 2.846859764840837, "grad_norm": 0.2690769135951996, "learning_rate": 7.917321067872963e-08, "loss": 0.2624, "step": 19854 }, { "epoch": 2.8470031545741326, "grad_norm": 0.26543039083480835, "learning_rate": 7.902540726459962e-08, "loss": 0.2979, "step": 19855 }, { "epoch": 2.8471465443074275, "grad_norm": 0.24863380193710327, "learning_rate": 7.887774084283261e-08, "loss": 0.2972, "step": 19856 }, { "epoch": 2.847289934040723, "grad_norm": 0.2497711479663849, "learning_rate": 7.873021141753923e-08, "loss": 0.2822, "step": 19857 }, { "epoch": 2.847433323774018, "grad_norm": 0.2509517967700958, "learning_rate": 7.858281899282616e-08, "loss": 0.277, "step": 19858 }, { "epoch": 2.847576713507313, "grad_norm": 0.2798895239830017, "learning_rate": 7.84355635727968e-08, "loss": 0.2686, "step": 19859 }, { "epoch": 2.847720103240608, "grad_norm": 0.26961570978164673, "learning_rate": 7.828844516155121e-08, "loss": 0.2675, "step": 19860 }, { "epoch": 2.847863492973903, "grad_norm": 0.27827784419059753, "learning_rate": 7.814146376318388e-08, "loss": 0.2871, "step": 19861 }, { "epoch": 2.8480068827071983, "grad_norm": 0.2950156629085541, "learning_rate": 7.799461938178709e-08, "loss": 0.2861, "step": 19862 }, { "epoch": 2.8481502724404932, "grad_norm": 0.25406453013420105, "learning_rate": 7.78479120214487e-08, "loss": 0.2785, "step": 19863 }, { "epoch": 2.848293662173788, "grad_norm": 0.26781052350997925, "learning_rate": 7.770134168625266e-08, "loss": 0.2884, "step": 19864 }, { "epoch": 2.8484370519070836, "grad_norm": 0.29494020342826843, "learning_rate": 7.755490838027902e-08, "loss": 0.2877, "step": 19865 }, { "epoch": 2.8485804416403786, "grad_norm": 0.2868022322654724, "learning_rate": 7.740861210760454e-08, "loss": 0.2872, "step": 19866 }, { "epoch": 2.8487238313736736, "grad_norm": 0.26119524240493774, "learning_rate": 7.72624528723026e-08, "loss": 0.2767, "step": 19867 }, { "epoch": 2.8488672211069686, "grad_norm": 0.2807552218437195, "learning_rate": 7.71164306784411e-08, "loss": 0.2721, "step": 19868 }, { "epoch": 2.849010610840264, "grad_norm": 0.28038185834884644, "learning_rate": 7.697054553008564e-08, "loss": 0.2674, "step": 19869 }, { "epoch": 2.849154000573559, "grad_norm": 0.26908186078071594, "learning_rate": 7.682479743129635e-08, "loss": 0.2845, "step": 19870 }, { "epoch": 2.849297390306854, "grad_norm": 0.2797851264476776, "learning_rate": 7.667918638613214e-08, "loss": 0.2851, "step": 19871 }, { "epoch": 2.8494407800401493, "grad_norm": 0.28913167119026184, "learning_rate": 7.653371239864594e-08, "loss": 0.2978, "step": 19872 }, { "epoch": 2.8495841697734443, "grad_norm": 0.2659079134464264, "learning_rate": 7.638837547288724e-08, "loss": 0.2863, "step": 19873 }, { "epoch": 2.8497275595067393, "grad_norm": 0.29308032989501953, "learning_rate": 7.624317561290228e-08, "loss": 0.2752, "step": 19874 }, { "epoch": 2.8498709492400343, "grad_norm": 0.282349556684494, "learning_rate": 7.609811282273282e-08, "loss": 0.2851, "step": 19875 }, { "epoch": 2.8500143389733297, "grad_norm": 0.2683701515197754, "learning_rate": 7.595318710641785e-08, "loss": 0.2952, "step": 19876 }, { "epoch": 2.8501577287066246, "grad_norm": 0.26014408469200134, "learning_rate": 7.580839846799137e-08, "loss": 0.282, "step": 19877 }, { "epoch": 2.8503011184399196, "grad_norm": 0.2939957082271576, "learning_rate": 7.566374691148404e-08, "loss": 0.2689, "step": 19878 }, { "epoch": 2.850444508173215, "grad_norm": 0.2594049274921417, "learning_rate": 7.551923244092318e-08, "loss": 0.2887, "step": 19879 }, { "epoch": 2.85058789790651, "grad_norm": 0.2715880870819092, "learning_rate": 7.537485506033171e-08, "loss": 0.2818, "step": 19880 }, { "epoch": 2.850731287639805, "grad_norm": 0.26730072498321533, "learning_rate": 7.523061477372862e-08, "loss": 0.2743, "step": 19881 }, { "epoch": 2.8508746773731, "grad_norm": 0.2927394211292267, "learning_rate": 7.508651158512958e-08, "loss": 0.2723, "step": 19882 }, { "epoch": 2.851018067106395, "grad_norm": 0.27963465452194214, "learning_rate": 7.494254549854696e-08, "loss": 0.2694, "step": 19883 }, { "epoch": 2.8511614568396904, "grad_norm": 0.2597850561141968, "learning_rate": 7.479871651798642e-08, "loss": 0.2713, "step": 19884 }, { "epoch": 2.8513048465729853, "grad_norm": 0.2508169710636139, "learning_rate": 7.465502464745366e-08, "loss": 0.2746, "step": 19885 }, { "epoch": 2.8514482363062803, "grad_norm": 0.24251261353492737, "learning_rate": 7.451146989094882e-08, "loss": 0.2715, "step": 19886 }, { "epoch": 2.8515916260395757, "grad_norm": 0.27623817324638367, "learning_rate": 7.436805225246756e-08, "loss": 0.2655, "step": 19887 }, { "epoch": 2.8517350157728707, "grad_norm": 0.2919134497642517, "learning_rate": 7.422477173600284e-08, "loss": 0.2762, "step": 19888 }, { "epoch": 2.8518784055061657, "grad_norm": 0.26957499980926514, "learning_rate": 7.408162834554311e-08, "loss": 0.299, "step": 19889 }, { "epoch": 2.8520217952394606, "grad_norm": 0.2764844298362732, "learning_rate": 7.393862208507296e-08, "loss": 0.2852, "step": 19890 }, { "epoch": 2.852165184972756, "grad_norm": 0.26301610469818115, "learning_rate": 7.379575295857422e-08, "loss": 0.27, "step": 19891 }, { "epoch": 2.852308574706051, "grad_norm": 0.23704823851585388, "learning_rate": 7.365302097002369e-08, "loss": 0.2747, "step": 19892 }, { "epoch": 2.852451964439346, "grad_norm": 0.27515149116516113, "learning_rate": 7.351042612339487e-08, "loss": 0.2829, "step": 19893 }, { "epoch": 2.8525953541726414, "grad_norm": 0.2690143287181854, "learning_rate": 7.336796842265736e-08, "loss": 0.2844, "step": 19894 }, { "epoch": 2.8527387439059364, "grad_norm": 0.2724616825580597, "learning_rate": 7.322564787177744e-08, "loss": 0.2919, "step": 19895 }, { "epoch": 2.8528821336392314, "grad_norm": 0.27131137251853943, "learning_rate": 7.308346447471637e-08, "loss": 0.2734, "step": 19896 }, { "epoch": 2.8530255233725264, "grad_norm": 0.2653850018978119, "learning_rate": 7.294141823543266e-08, "loss": 0.2771, "step": 19897 }, { "epoch": 2.8531689131058218, "grad_norm": 0.26180651783943176, "learning_rate": 7.279950915788036e-08, "loss": 0.2754, "step": 19898 }, { "epoch": 2.8533123028391167, "grad_norm": 0.2660224735736847, "learning_rate": 7.265773724601132e-08, "loss": 0.2783, "step": 19899 }, { "epoch": 2.8534556925724117, "grad_norm": 0.28724783658981323, "learning_rate": 7.251610250377017e-08, "loss": 0.265, "step": 19900 }, { "epoch": 2.853599082305707, "grad_norm": 0.27133381366729736, "learning_rate": 7.237460493510095e-08, "loss": 0.2911, "step": 19901 }, { "epoch": 2.853742472039002, "grad_norm": 0.26495078206062317, "learning_rate": 7.223324454394275e-08, "loss": 0.2777, "step": 19902 }, { "epoch": 2.853885861772297, "grad_norm": 0.2502635717391968, "learning_rate": 7.209202133423076e-08, "loss": 0.2823, "step": 19903 }, { "epoch": 2.854029251505592, "grad_norm": 0.27330997586250305, "learning_rate": 7.195093530989683e-08, "loss": 0.2897, "step": 19904 }, { "epoch": 2.854172641238887, "grad_norm": 0.2728016674518585, "learning_rate": 7.180998647486726e-08, "loss": 0.2923, "step": 19905 }, { "epoch": 2.8543160309721824, "grad_norm": 0.2763834595680237, "learning_rate": 7.166917483306724e-08, "loss": 0.2626, "step": 19906 }, { "epoch": 2.8544594207054774, "grad_norm": 0.2707696259021759, "learning_rate": 7.152850038841586e-08, "loss": 0.2734, "step": 19907 }, { "epoch": 2.854602810438773, "grad_norm": 0.3085831105709076, "learning_rate": 7.138796314482998e-08, "loss": 0.289, "step": 19908 }, { "epoch": 2.854746200172068, "grad_norm": 0.2708190083503723, "learning_rate": 7.124756310622094e-08, "loss": 0.2959, "step": 19909 }, { "epoch": 2.854889589905363, "grad_norm": 0.26482585072517395, "learning_rate": 7.110730027649837e-08, "loss": 0.2574, "step": 19910 }, { "epoch": 2.8550329796386578, "grad_norm": 0.25523144006729126, "learning_rate": 7.096717465956637e-08, "loss": 0.2785, "step": 19911 }, { "epoch": 2.8551763693719527, "grad_norm": 0.24256868660449982, "learning_rate": 7.082718625932627e-08, "loss": 0.2749, "step": 19912 }, { "epoch": 2.855319759105248, "grad_norm": 0.2587127387523651, "learning_rate": 7.068733507967441e-08, "loss": 0.285, "step": 19913 }, { "epoch": 2.855463148838543, "grad_norm": 0.2624532878398895, "learning_rate": 7.054762112450431e-08, "loss": 0.279, "step": 19914 }, { "epoch": 2.855606538571838, "grad_norm": 0.2718483805656433, "learning_rate": 7.040804439770621e-08, "loss": 0.2782, "step": 19915 }, { "epoch": 2.8557499283051335, "grad_norm": 0.26577454805374146, "learning_rate": 7.026860490316479e-08, "loss": 0.2616, "step": 19916 }, { "epoch": 2.8558933180384285, "grad_norm": 0.2622477114200592, "learning_rate": 7.012930264476192e-08, "loss": 0.2524, "step": 19917 }, { "epoch": 2.8560367077717235, "grad_norm": 0.2517848312854767, "learning_rate": 6.999013762637564e-08, "loss": 0.2698, "step": 19918 }, { "epoch": 2.8561800975050184, "grad_norm": 0.27417147159576416, "learning_rate": 6.985110985188003e-08, "loss": 0.2816, "step": 19919 }, { "epoch": 2.856323487238314, "grad_norm": 0.27795839309692383, "learning_rate": 6.971221932514594e-08, "loss": 0.2877, "step": 19920 }, { "epoch": 2.856466876971609, "grad_norm": 0.26693862676620483, "learning_rate": 6.957346605003911e-08, "loss": 0.2736, "step": 19921 }, { "epoch": 2.856610266704904, "grad_norm": 0.25971314311027527, "learning_rate": 6.94348500304226e-08, "loss": 0.2938, "step": 19922 }, { "epoch": 2.8567536564381992, "grad_norm": 0.26866552233695984, "learning_rate": 6.929637127015554e-08, "loss": 0.2775, "step": 19923 }, { "epoch": 2.856897046171494, "grad_norm": 0.2507762312889099, "learning_rate": 6.915802977309261e-08, "loss": 0.279, "step": 19924 }, { "epoch": 2.857040435904789, "grad_norm": 0.271068274974823, "learning_rate": 6.901982554308462e-08, "loss": 0.2852, "step": 19925 }, { "epoch": 2.857183825638084, "grad_norm": 0.28331097960472107, "learning_rate": 6.888175858398016e-08, "loss": 0.2689, "step": 19926 }, { "epoch": 2.8573272153713796, "grad_norm": 0.2545226812362671, "learning_rate": 6.874382889962117e-08, "loss": 0.286, "step": 19927 }, { "epoch": 2.8574706051046745, "grad_norm": 0.2647911608219147, "learning_rate": 6.8606036493849e-08, "loss": 0.2739, "step": 19928 }, { "epoch": 2.8576139948379695, "grad_norm": 0.2794800400733948, "learning_rate": 6.846838137049839e-08, "loss": 0.3031, "step": 19929 }, { "epoch": 2.857757384571265, "grad_norm": 0.2852209508419037, "learning_rate": 6.833086353340235e-08, "loss": 0.2793, "step": 19930 }, { "epoch": 2.85790077430456, "grad_norm": 0.25823351740837097, "learning_rate": 6.819348298638839e-08, "loss": 0.28, "step": 19931 }, { "epoch": 2.858044164037855, "grad_norm": 0.27322787046432495, "learning_rate": 6.805623973328124e-08, "loss": 0.2703, "step": 19932 }, { "epoch": 2.85818755377115, "grad_norm": 0.2596404254436493, "learning_rate": 6.791913377790171e-08, "loss": 0.2815, "step": 19933 }, { "epoch": 2.858330943504445, "grad_norm": 0.2807292640209198, "learning_rate": 6.778216512406676e-08, "loss": 0.277, "step": 19934 }, { "epoch": 2.8584743332377403, "grad_norm": 0.28731393814086914, "learning_rate": 6.76453337755889e-08, "loss": 0.267, "step": 19935 }, { "epoch": 2.8586177229710352, "grad_norm": 0.2593168020248413, "learning_rate": 6.750863973627786e-08, "loss": 0.2774, "step": 19936 }, { "epoch": 2.8587611127043306, "grad_norm": 0.27395153045654297, "learning_rate": 6.737208300993891e-08, "loss": 0.2736, "step": 19937 }, { "epoch": 2.8589045024376256, "grad_norm": 0.26489904522895813, "learning_rate": 6.723566360037292e-08, "loss": 0.2925, "step": 19938 }, { "epoch": 2.8590478921709206, "grad_norm": 0.26145505905151367, "learning_rate": 6.709938151137796e-08, "loss": 0.2827, "step": 19939 }, { "epoch": 2.8591912819042156, "grad_norm": 0.26937440037727356, "learning_rate": 6.69632367467482e-08, "loss": 0.2789, "step": 19940 }, { "epoch": 2.8593346716375105, "grad_norm": 0.26416531205177307, "learning_rate": 6.682722931027341e-08, "loss": 0.2786, "step": 19941 }, { "epoch": 2.859478061370806, "grad_norm": 0.2598802149295807, "learning_rate": 6.669135920573999e-08, "loss": 0.2828, "step": 19942 }, { "epoch": 2.859621451104101, "grad_norm": 0.27844035625457764, "learning_rate": 6.655562643693048e-08, "loss": 0.2869, "step": 19943 }, { "epoch": 2.859764840837396, "grad_norm": 0.2847212255001068, "learning_rate": 6.642003100762295e-08, "loss": 0.2759, "step": 19944 }, { "epoch": 2.8599082305706913, "grad_norm": 0.2716786861419678, "learning_rate": 6.628457292159274e-08, "loss": 0.2633, "step": 19945 }, { "epoch": 2.8600516203039863, "grad_norm": 0.26079845428466797, "learning_rate": 6.614925218261015e-08, "loss": 0.2778, "step": 19946 }, { "epoch": 2.8601950100372813, "grad_norm": 0.261084645986557, "learning_rate": 6.601406879444272e-08, "loss": 0.2925, "step": 19947 }, { "epoch": 2.8603383997705762, "grad_norm": 0.2372300773859024, "learning_rate": 6.587902276085411e-08, "loss": 0.2634, "step": 19948 }, { "epoch": 2.8604817895038717, "grad_norm": 0.27335432171821594, "learning_rate": 6.574411408560299e-08, "loss": 0.2689, "step": 19949 }, { "epoch": 2.8606251792371666, "grad_norm": 0.24118943512439728, "learning_rate": 6.560934277244579e-08, "loss": 0.2899, "step": 19950 }, { "epoch": 2.8607685689704616, "grad_norm": 0.26592639088630676, "learning_rate": 6.547470882513396e-08, "loss": 0.2657, "step": 19951 }, { "epoch": 2.860911958703757, "grad_norm": 0.2753373086452484, "learning_rate": 6.534021224741504e-08, "loss": 0.2766, "step": 19952 }, { "epoch": 2.861055348437052, "grad_norm": 0.262587308883667, "learning_rate": 6.52058530430344e-08, "loss": 0.2886, "step": 19953 }, { "epoch": 2.861198738170347, "grad_norm": 0.257282018661499, "learning_rate": 6.507163121573123e-08, "loss": 0.2923, "step": 19954 }, { "epoch": 2.861342127903642, "grad_norm": 0.2516748905181885, "learning_rate": 6.493754676924258e-08, "loss": 0.277, "step": 19955 }, { "epoch": 2.861485517636937, "grad_norm": 0.25226911902427673, "learning_rate": 6.4803599707301e-08, "loss": 0.2773, "step": 19956 }, { "epoch": 2.8616289073702323, "grad_norm": 0.272981196641922, "learning_rate": 6.466979003363572e-08, "loss": 0.2624, "step": 19957 }, { "epoch": 2.8617722971035273, "grad_norm": 0.2620927095413208, "learning_rate": 6.453611775197156e-08, "loss": 0.2982, "step": 19958 }, { "epoch": 2.8619156868368227, "grad_norm": 0.25911572575569153, "learning_rate": 6.440258286602941e-08, "loss": 0.2813, "step": 19959 }, { "epoch": 2.8620590765701177, "grad_norm": 0.2658190429210663, "learning_rate": 6.426918537952686e-08, "loss": 0.2857, "step": 19960 }, { "epoch": 2.8622024663034127, "grad_norm": 0.27748411893844604, "learning_rate": 6.413592529617818e-08, "loss": 0.2661, "step": 19961 }, { "epoch": 2.8623458560367077, "grad_norm": 0.2630775272846222, "learning_rate": 6.400280261969205e-08, "loss": 0.2692, "step": 19962 }, { "epoch": 2.8624892457700026, "grad_norm": 0.27999866008758545, "learning_rate": 6.386981735377496e-08, "loss": 0.3035, "step": 19963 }, { "epoch": 2.862632635503298, "grad_norm": 0.2572595477104187, "learning_rate": 6.373696950212949e-08, "loss": 0.2828, "step": 19964 }, { "epoch": 2.862776025236593, "grad_norm": 0.26002606749534607, "learning_rate": 6.36042590684527e-08, "loss": 0.2819, "step": 19965 }, { "epoch": 2.862919414969888, "grad_norm": 0.28629282116889954, "learning_rate": 6.347168605643994e-08, "loss": 0.2718, "step": 19966 }, { "epoch": 2.8630628047031834, "grad_norm": 0.2791893482208252, "learning_rate": 6.333925046978163e-08, "loss": 0.2751, "step": 19967 }, { "epoch": 2.8632061944364784, "grad_norm": 0.2743771970272064, "learning_rate": 6.320695231216423e-08, "loss": 0.2854, "step": 19968 }, { "epoch": 2.8633495841697734, "grad_norm": 0.2665567696094513, "learning_rate": 6.307479158727203e-08, "loss": 0.282, "step": 19969 }, { "epoch": 2.8634929739030683, "grad_norm": 0.2638692557811737, "learning_rate": 6.294276829878266e-08, "loss": 0.2853, "step": 19970 }, { "epoch": 2.8636363636363638, "grad_norm": 0.26933807134628296, "learning_rate": 6.281088245037204e-08, "loss": 0.2879, "step": 19971 }, { "epoch": 2.8637797533696587, "grad_norm": 0.27035918831825256, "learning_rate": 6.267913404571169e-08, "loss": 0.266, "step": 19972 }, { "epoch": 2.8639231431029537, "grad_norm": 0.2517680525779724, "learning_rate": 6.254752308846868e-08, "loss": 0.2692, "step": 19973 }, { "epoch": 2.864066532836249, "grad_norm": 0.25982341170310974, "learning_rate": 6.241604958230785e-08, "loss": 0.2868, "step": 19974 }, { "epoch": 2.864209922569544, "grad_norm": 0.2696376442909241, "learning_rate": 6.228471353088961e-08, "loss": 0.2782, "step": 19975 }, { "epoch": 2.864353312302839, "grad_norm": 0.27659282088279724, "learning_rate": 6.215351493786826e-08, "loss": 0.2841, "step": 19976 }, { "epoch": 2.864496702036134, "grad_norm": 0.268708735704422, "learning_rate": 6.202245380689754e-08, "loss": 0.2829, "step": 19977 }, { "epoch": 2.8646400917694295, "grad_norm": 0.2549397051334381, "learning_rate": 6.189153014162563e-08, "loss": 0.2875, "step": 19978 }, { "epoch": 2.8647834815027244, "grad_norm": 0.2710186243057251, "learning_rate": 6.176074394569687e-08, "loss": 0.2865, "step": 19979 }, { "epoch": 2.8649268712360194, "grad_norm": 0.26616233587265015, "learning_rate": 6.163009522275331e-08, "loss": 0.2946, "step": 19980 }, { "epoch": 2.865070260969315, "grad_norm": 0.26343944668769836, "learning_rate": 6.149958397643097e-08, "loss": 0.2629, "step": 19981 }, { "epoch": 2.86521365070261, "grad_norm": 0.2721385061740875, "learning_rate": 6.136921021036302e-08, "loss": 0.2848, "step": 19982 }, { "epoch": 2.865357040435905, "grad_norm": 0.27268415689468384, "learning_rate": 6.123897392817934e-08, "loss": 0.2899, "step": 19983 }, { "epoch": 2.8655004301691998, "grad_norm": 0.26769334077835083, "learning_rate": 6.110887513350594e-08, "loss": 0.2898, "step": 19984 }, { "epoch": 2.8656438199024947, "grad_norm": 0.2742595374584198, "learning_rate": 6.097891382996324e-08, "loss": 0.2926, "step": 19985 }, { "epoch": 2.86578720963579, "grad_norm": 0.26337751746177673, "learning_rate": 6.084909002117057e-08, "loss": 0.2824, "step": 19986 }, { "epoch": 2.865930599369085, "grad_norm": 0.2786029875278473, "learning_rate": 6.071940371074115e-08, "loss": 0.2791, "step": 19987 }, { "epoch": 2.8660739891023805, "grad_norm": 0.26265063881874084, "learning_rate": 6.058985490228541e-08, "loss": 0.2746, "step": 19988 }, { "epoch": 2.8662173788356755, "grad_norm": 0.29209646582603455, "learning_rate": 6.046044359940995e-08, "loss": 0.2849, "step": 19989 }, { "epoch": 2.8663607685689705, "grad_norm": 0.27132219076156616, "learning_rate": 6.033116980571741e-08, "loss": 0.2835, "step": 19990 }, { "epoch": 2.8665041583022655, "grad_norm": 0.33119410276412964, "learning_rate": 6.020203352480658e-08, "loss": 0.2847, "step": 19991 }, { "epoch": 2.8666475480355604, "grad_norm": 0.25990331172943115, "learning_rate": 6.007303476027181e-08, "loss": 0.2691, "step": 19992 }, { "epoch": 2.866790937768856, "grad_norm": 0.25490278005599976, "learning_rate": 5.994417351570525e-08, "loss": 0.2798, "step": 19993 }, { "epoch": 2.866934327502151, "grad_norm": 0.2512967884540558, "learning_rate": 5.981544979469344e-08, "loss": 0.2703, "step": 19994 }, { "epoch": 2.867077717235446, "grad_norm": 0.2618434429168701, "learning_rate": 5.968686360081965e-08, "loss": 0.2817, "step": 19995 }, { "epoch": 2.867221106968741, "grad_norm": 0.2701874077320099, "learning_rate": 5.95584149376649e-08, "loss": 0.2991, "step": 19996 }, { "epoch": 2.867364496702036, "grad_norm": 0.263981431722641, "learning_rate": 5.943010380880354e-08, "loss": 0.2734, "step": 19997 }, { "epoch": 2.867507886435331, "grad_norm": 0.268034428358078, "learning_rate": 5.930193021780772e-08, "loss": 0.2743, "step": 19998 }, { "epoch": 2.867651276168626, "grad_norm": 0.26189982891082764, "learning_rate": 5.917389416824626e-08, "loss": 0.2833, "step": 19999 }, { "epoch": 2.8677946659019216, "grad_norm": 0.28062963485717773, "learning_rate": 5.904599566368352e-08, "loss": 0.2604, "step": 20000 }, { "epoch": 2.8679380556352165, "grad_norm": 0.24893519282341003, "learning_rate": 5.8918234707678875e-08, "loss": 0.2695, "step": 20001 }, { "epoch": 2.8680814453685115, "grad_norm": 0.28045839071273804, "learning_rate": 5.87906113037906e-08, "loss": 0.3025, "step": 20002 }, { "epoch": 2.868224835101807, "grad_norm": 0.2524450421333313, "learning_rate": 5.866312545556973e-08, "loss": 0.2735, "step": 20003 }, { "epoch": 2.868368224835102, "grad_norm": 0.2541910409927368, "learning_rate": 5.853577716656733e-08, "loss": 0.2824, "step": 20004 }, { "epoch": 2.868511614568397, "grad_norm": 0.2687762975692749, "learning_rate": 5.840856644032666e-08, "loss": 0.2682, "step": 20005 }, { "epoch": 2.868655004301692, "grad_norm": 0.25949084758758545, "learning_rate": 5.828149328039046e-08, "loss": 0.2886, "step": 20006 }, { "epoch": 2.868798394034987, "grad_norm": 0.2743053734302521, "learning_rate": 5.815455769029532e-08, "loss": 0.276, "step": 20007 }, { "epoch": 2.8689417837682822, "grad_norm": 0.2531724274158478, "learning_rate": 5.8027759673575655e-08, "loss": 0.2808, "step": 20008 }, { "epoch": 2.869085173501577, "grad_norm": 0.25478535890579224, "learning_rate": 5.790109923376086e-08, "loss": 0.2882, "step": 20009 }, { "epoch": 2.8692285632348726, "grad_norm": 0.2618178725242615, "learning_rate": 5.7774576374377e-08, "loss": 0.2874, "step": 20010 }, { "epoch": 2.8693719529681676, "grad_norm": 0.264104962348938, "learning_rate": 5.764819109894626e-08, "loss": 0.2739, "step": 20011 }, { "epoch": 2.8695153427014626, "grad_norm": 0.2792763411998749, "learning_rate": 5.752194341098749e-08, "loss": 0.2842, "step": 20012 }, { "epoch": 2.8696587324347576, "grad_norm": 0.2496768832206726, "learning_rate": 5.7395833314014545e-08, "loss": 0.2753, "step": 20013 }, { "epoch": 2.8698021221680525, "grad_norm": 0.2845452129840851, "learning_rate": 5.726986081153851e-08, "loss": 0.2881, "step": 20014 }, { "epoch": 2.869945511901348, "grad_norm": 0.28396862745285034, "learning_rate": 5.714402590706603e-08, "loss": 0.2836, "step": 20015 }, { "epoch": 2.870088901634643, "grad_norm": 0.2629060745239258, "learning_rate": 5.701832860410095e-08, "loss": 0.2869, "step": 20016 }, { "epoch": 2.870232291367938, "grad_norm": 0.2672762870788574, "learning_rate": 5.689276890614104e-08, "loss": 0.2883, "step": 20017 }, { "epoch": 2.8703756811012333, "grad_norm": 0.2633512318134308, "learning_rate": 5.67673468166835e-08, "loss": 0.2671, "step": 20018 }, { "epoch": 2.8705190708345283, "grad_norm": 0.2595203220844269, "learning_rate": 5.664206233921832e-08, "loss": 0.2778, "step": 20019 }, { "epoch": 2.8706624605678233, "grad_norm": 0.26022613048553467, "learning_rate": 5.651691547723381e-08, "loss": 0.2982, "step": 20020 }, { "epoch": 2.8708058503011182, "grad_norm": 0.26475462317466736, "learning_rate": 5.6391906234213864e-08, "loss": 0.2719, "step": 20021 }, { "epoch": 2.8709492400344137, "grad_norm": 0.274065226316452, "learning_rate": 5.626703461363847e-08, "loss": 0.2923, "step": 20022 }, { "epoch": 2.8710926297677086, "grad_norm": 0.28018760681152344, "learning_rate": 5.6142300618983736e-08, "loss": 0.264, "step": 20023 }, { "epoch": 2.8712360195010036, "grad_norm": 0.2584024965763092, "learning_rate": 5.601770425372299e-08, "loss": 0.253, "step": 20024 }, { "epoch": 2.871379409234299, "grad_norm": 0.2556830048561096, "learning_rate": 5.589324552132347e-08, "loss": 0.2822, "step": 20025 }, { "epoch": 2.871522798967594, "grad_norm": 0.2773536145687103, "learning_rate": 5.5768924425250724e-08, "loss": 0.2812, "step": 20026 }, { "epoch": 2.871666188700889, "grad_norm": 0.2726592421531677, "learning_rate": 5.564474096896533e-08, "loss": 0.2792, "step": 20027 }, { "epoch": 2.871809578434184, "grad_norm": 0.25170642137527466, "learning_rate": 5.552069515592451e-08, "loss": 0.2764, "step": 20028 }, { "epoch": 2.8719529681674794, "grad_norm": 0.2609086334705353, "learning_rate": 5.5396786989582174e-08, "loss": 0.2902, "step": 20029 }, { "epoch": 2.8720963579007743, "grad_norm": 0.2698427140712738, "learning_rate": 5.5273016473386676e-08, "loss": 0.2655, "step": 20030 }, { "epoch": 2.8722397476340693, "grad_norm": 0.26050305366516113, "learning_rate": 5.514938361078359e-08, "loss": 0.267, "step": 20031 }, { "epoch": 2.8723831373673647, "grad_norm": 0.248459130525589, "learning_rate": 5.5025888405215165e-08, "loss": 0.2721, "step": 20032 }, { "epoch": 2.8725265271006597, "grad_norm": 0.2640722393989563, "learning_rate": 5.490253086011976e-08, "loss": 0.2794, "step": 20033 }, { "epoch": 2.8726699168339547, "grad_norm": 0.2768566608428955, "learning_rate": 5.4779310978930744e-08, "loss": 0.3034, "step": 20034 }, { "epoch": 2.8728133065672496, "grad_norm": 0.2792736887931824, "learning_rate": 5.4656228765078145e-08, "loss": 0.2724, "step": 20035 }, { "epoch": 2.8729566963005446, "grad_norm": 0.2635291814804077, "learning_rate": 5.4533284221989224e-08, "loss": 0.2849, "step": 20036 }, { "epoch": 2.87310008603384, "grad_norm": 0.26370465755462646, "learning_rate": 5.441047735308569e-08, "loss": 0.2668, "step": 20037 }, { "epoch": 2.873243475767135, "grad_norm": 0.27436792850494385, "learning_rate": 5.428780816178647e-08, "loss": 0.2856, "step": 20038 }, { "epoch": 2.8733868655004304, "grad_norm": 0.2606266438961029, "learning_rate": 5.416527665150717e-08, "loss": 0.3009, "step": 20039 }, { "epoch": 2.8735302552337254, "grad_norm": 0.2604557275772095, "learning_rate": 5.4042882825658393e-08, "loss": 0.2792, "step": 20040 }, { "epoch": 2.8736736449670204, "grad_norm": 0.2772839367389679, "learning_rate": 5.3920626687647416e-08, "loss": 0.273, "step": 20041 }, { "epoch": 2.8738170347003154, "grad_norm": 0.28053387999534607, "learning_rate": 5.379850824087818e-08, "loss": 0.2859, "step": 20042 }, { "epoch": 2.8739604244336103, "grad_norm": 0.2563495337963104, "learning_rate": 5.367652748874908e-08, "loss": 0.2682, "step": 20043 }, { "epoch": 2.8741038141669057, "grad_norm": 0.2777227759361267, "learning_rate": 5.3554684434656834e-08, "loss": 0.2722, "step": 20044 }, { "epoch": 2.8742472039002007, "grad_norm": 0.2717822194099426, "learning_rate": 5.343297908199374e-08, "loss": 0.2831, "step": 20045 }, { "epoch": 2.8743905936334957, "grad_norm": 0.27706068754196167, "learning_rate": 5.3311411434146534e-08, "loss": 0.2527, "step": 20046 }, { "epoch": 2.874533983366791, "grad_norm": 0.26339367032051086, "learning_rate": 5.3189981494500276e-08, "loss": 0.2811, "step": 20047 }, { "epoch": 2.874677373100086, "grad_norm": 0.25990691781044006, "learning_rate": 5.306868926643505e-08, "loss": 0.2821, "step": 20048 }, { "epoch": 2.874820762833381, "grad_norm": 0.2566010653972626, "learning_rate": 5.294753475332814e-08, "loss": 0.2704, "step": 20049 }, { "epoch": 2.874964152566676, "grad_norm": 0.2700408101081848, "learning_rate": 5.282651795855187e-08, "loss": 0.2763, "step": 20050 }, { "epoch": 2.8751075422999715, "grad_norm": 0.26366299390792847, "learning_rate": 5.27056388854752e-08, "loss": 0.2895, "step": 20051 }, { "epoch": 2.8752509320332664, "grad_norm": 0.26918891072273254, "learning_rate": 5.258489753746321e-08, "loss": 0.2729, "step": 20052 }, { "epoch": 2.8753943217665614, "grad_norm": 0.29206109046936035, "learning_rate": 5.246429391787655e-08, "loss": 0.28, "step": 20053 }, { "epoch": 2.875537711499857, "grad_norm": 0.2711111605167389, "learning_rate": 5.2343828030073653e-08, "loss": 0.2746, "step": 20054 }, { "epoch": 2.875681101233152, "grad_norm": 0.2684326767921448, "learning_rate": 5.222349987740793e-08, "loss": 0.2803, "step": 20055 }, { "epoch": 2.8758244909664468, "grad_norm": 0.2784750461578369, "learning_rate": 5.210330946322839e-08, "loss": 0.282, "step": 20056 }, { "epoch": 2.8759678806997417, "grad_norm": 0.26572200655937195, "learning_rate": 5.198325679088179e-08, "loss": 0.2898, "step": 20057 }, { "epoch": 2.876111270433037, "grad_norm": 0.26204511523246765, "learning_rate": 5.186334186370934e-08, "loss": 0.2874, "step": 20058 }, { "epoch": 2.876254660166332, "grad_norm": 0.2574787139892578, "learning_rate": 5.174356468505004e-08, "loss": 0.2808, "step": 20059 }, { "epoch": 2.876398049899627, "grad_norm": 0.29413041472435, "learning_rate": 5.162392525823789e-08, "loss": 0.2613, "step": 20060 }, { "epoch": 2.8765414396329225, "grad_norm": 0.26912418007850647, "learning_rate": 5.1504423586604126e-08, "loss": 0.2701, "step": 20061 }, { "epoch": 2.8766848293662175, "grad_norm": 0.2556154727935791, "learning_rate": 5.138505967347496e-08, "loss": 0.2946, "step": 20062 }, { "epoch": 2.8768282190995125, "grad_norm": 0.26113253831863403, "learning_rate": 5.126583352217329e-08, "loss": 0.2754, "step": 20063 }, { "epoch": 2.8769716088328074, "grad_norm": 0.26889804005622864, "learning_rate": 5.1146745136017586e-08, "loss": 0.2798, "step": 20064 }, { "epoch": 2.8771149985661024, "grad_norm": 0.2791602313518524, "learning_rate": 5.1027794518324626e-08, "loss": 0.2603, "step": 20065 }, { "epoch": 2.877258388299398, "grad_norm": 0.27005502581596375, "learning_rate": 5.0908981672404545e-08, "loss": 0.2682, "step": 20066 }, { "epoch": 2.877401778032693, "grad_norm": 0.27893880009651184, "learning_rate": 5.079030660156525e-08, "loss": 0.2905, "step": 20067 }, { "epoch": 2.8775451677659882, "grad_norm": 0.2772795855998993, "learning_rate": 5.0671769309110216e-08, "loss": 0.2891, "step": 20068 }, { "epoch": 2.877688557499283, "grad_norm": 0.26343587040901184, "learning_rate": 5.055336979834014e-08, "loss": 0.2692, "step": 20069 }, { "epoch": 2.877831947232578, "grad_norm": 0.2680703401565552, "learning_rate": 5.043510807255014e-08, "loss": 0.2681, "step": 20070 }, { "epoch": 2.877975336965873, "grad_norm": 0.26472601294517517, "learning_rate": 5.0316984135033167e-08, "loss": 0.251, "step": 20071 }, { "epoch": 2.878118726699168, "grad_norm": 0.2591835856437683, "learning_rate": 5.019899798907768e-08, "loss": 0.2733, "step": 20072 }, { "epoch": 2.8782621164324635, "grad_norm": 0.2821146547794342, "learning_rate": 5.008114963796717e-08, "loss": 0.2778, "step": 20073 }, { "epoch": 2.8784055061657585, "grad_norm": 0.2745947539806366, "learning_rate": 4.9963439084983464e-08, "loss": 0.2802, "step": 20074 }, { "epoch": 2.8785488958990535, "grad_norm": 0.27596548199653625, "learning_rate": 4.984586633340283e-08, "loss": 0.2724, "step": 20075 }, { "epoch": 2.878692285632349, "grad_norm": 0.26998013257980347, "learning_rate": 4.972843138649819e-08, "loss": 0.2928, "step": 20076 }, { "epoch": 2.878835675365644, "grad_norm": 0.2767654359340668, "learning_rate": 4.961113424753972e-08, "loss": 0.2746, "step": 20077 }, { "epoch": 2.878979065098939, "grad_norm": 0.2774394750595093, "learning_rate": 4.949397491979147e-08, "loss": 0.2806, "step": 20078 }, { "epoch": 2.879122454832234, "grad_norm": 0.2720472812652588, "learning_rate": 4.937695340651583e-08, "loss": 0.2893, "step": 20079 }, { "epoch": 2.8792658445655293, "grad_norm": 0.28172093629837036, "learning_rate": 4.926006971097075e-08, "loss": 0.2683, "step": 20080 }, { "epoch": 2.8794092342988242, "grad_norm": 0.27770566940307617, "learning_rate": 4.914332383640919e-08, "loss": 0.2859, "step": 20081 }, { "epoch": 2.879552624032119, "grad_norm": 0.2658330500125885, "learning_rate": 4.902671578608187e-08, "loss": 0.2913, "step": 20082 }, { "epoch": 2.8796960137654146, "grad_norm": 0.2633829116821289, "learning_rate": 4.8910245563235096e-08, "loss": 0.2841, "step": 20083 }, { "epoch": 2.8798394034987096, "grad_norm": 0.269473671913147, "learning_rate": 4.879391317111071e-08, "loss": 0.2948, "step": 20084 }, { "epoch": 2.8799827932320046, "grad_norm": 0.25410196185112, "learning_rate": 4.8677718612947237e-08, "loss": 0.2796, "step": 20085 }, { "epoch": 2.8801261829652995, "grad_norm": 0.26510196924209595, "learning_rate": 4.8561661891979304e-08, "loss": 0.2841, "step": 20086 }, { "epoch": 2.8802695726985945, "grad_norm": 0.26704350113868713, "learning_rate": 4.844574301143823e-08, "loss": 0.2825, "step": 20087 }, { "epoch": 2.88041296243189, "grad_norm": 0.2657632827758789, "learning_rate": 4.832996197455142e-08, "loss": 0.2892, "step": 20088 }, { "epoch": 2.880556352165185, "grad_norm": 0.26473838090896606, "learning_rate": 4.821431878454019e-08, "loss": 0.2673, "step": 20089 }, { "epoch": 2.8806997418984803, "grad_norm": 0.2678011655807495, "learning_rate": 4.809881344462586e-08, "loss": 0.2585, "step": 20090 }, { "epoch": 2.8808431316317753, "grad_norm": 0.26375526189804077, "learning_rate": 4.798344595802251e-08, "loss": 0.2944, "step": 20091 }, { "epoch": 2.8809865213650703, "grad_norm": 0.26058319211006165, "learning_rate": 4.786821632794259e-08, "loss": 0.2849, "step": 20092 }, { "epoch": 2.8811299110983652, "grad_norm": 0.27827751636505127, "learning_rate": 4.7753124557594064e-08, "loss": 0.2786, "step": 20093 }, { "epoch": 2.8812733008316602, "grad_norm": 0.26831644773483276, "learning_rate": 4.763817065017995e-08, "loss": 0.2825, "step": 20094 }, { "epoch": 2.8814166905649556, "grad_norm": 0.2584879696369171, "learning_rate": 4.752335460890156e-08, "loss": 0.2811, "step": 20095 }, { "epoch": 2.8815600802982506, "grad_norm": 0.25367772579193115, "learning_rate": 4.740867643695468e-08, "loss": 0.2811, "step": 20096 }, { "epoch": 2.8817034700315456, "grad_norm": 0.2696325182914734, "learning_rate": 4.72941361375312e-08, "loss": 0.2719, "step": 20097 }, { "epoch": 2.881846859764841, "grad_norm": 0.26716238260269165, "learning_rate": 4.7179733713820784e-08, "loss": 0.2665, "step": 20098 }, { "epoch": 2.881990249498136, "grad_norm": 0.2822612226009369, "learning_rate": 4.706546916900756e-08, "loss": 0.262, "step": 20099 }, { "epoch": 2.882133639231431, "grad_norm": 0.29679274559020996, "learning_rate": 4.695134250627231e-08, "loss": 0.2665, "step": 20100 }, { "epoch": 2.882277028964726, "grad_norm": 0.2676529288291931, "learning_rate": 4.683735372879306e-08, "loss": 0.2909, "step": 20101 }, { "epoch": 2.8824204186980213, "grad_norm": 0.2585527002811432, "learning_rate": 4.672350283974225e-08, "loss": 0.2826, "step": 20102 }, { "epoch": 2.8825638084313163, "grad_norm": 0.2698378562927246, "learning_rate": 4.6609789842289034e-08, "loss": 0.2907, "step": 20103 }, { "epoch": 2.8827071981646113, "grad_norm": 0.2663671374320984, "learning_rate": 4.649621473960031e-08, "loss": 0.2716, "step": 20104 }, { "epoch": 2.8828505878979067, "grad_norm": 0.2523389458656311, "learning_rate": 4.638277753483689e-08, "loss": 0.2822, "step": 20105 }, { "epoch": 2.8829939776312017, "grad_norm": 0.24889865517616272, "learning_rate": 4.6269478231156796e-08, "loss": 0.2744, "step": 20106 }, { "epoch": 2.8831373673644967, "grad_norm": 0.2750464379787445, "learning_rate": 4.615631683171362e-08, "loss": 0.2522, "step": 20107 }, { "epoch": 2.8832807570977916, "grad_norm": 0.2734825909137726, "learning_rate": 4.604329333965874e-08, "loss": 0.2948, "step": 20108 }, { "epoch": 2.883424146831087, "grad_norm": 0.2781856656074524, "learning_rate": 4.5930407758137954e-08, "loss": 0.2731, "step": 20109 }, { "epoch": 2.883567536564382, "grad_norm": 0.25971728563308716, "learning_rate": 4.5817660090293756e-08, "loss": 0.2999, "step": 20110 }, { "epoch": 2.883710926297677, "grad_norm": 0.2758964002132416, "learning_rate": 4.5705050339264754e-08, "loss": 0.2743, "step": 20111 }, { "epoch": 2.8838543160309724, "grad_norm": 0.2798081636428833, "learning_rate": 4.5592578508185657e-08, "loss": 0.2883, "step": 20112 }, { "epoch": 2.8839977057642674, "grad_norm": 0.2713889181613922, "learning_rate": 4.548024460018841e-08, "loss": 0.2782, "step": 20113 }, { "epoch": 2.8841410954975624, "grad_norm": 0.26652792096138, "learning_rate": 4.536804861839939e-08, "loss": 0.2772, "step": 20114 }, { "epoch": 2.8842844852308573, "grad_norm": 0.2738104462623596, "learning_rate": 4.525599056594276e-08, "loss": 0.2958, "step": 20115 }, { "epoch": 2.8844278749641523, "grad_norm": 0.270768404006958, "learning_rate": 4.514407044593716e-08, "loss": 0.2819, "step": 20116 }, { "epoch": 2.8845712646974477, "grad_norm": 0.2706153094768524, "learning_rate": 4.503228826149897e-08, "loss": 0.2669, "step": 20117 }, { "epoch": 2.8847146544307427, "grad_norm": 0.2608526051044464, "learning_rate": 4.492064401573903e-08, "loss": 0.2679, "step": 20118 }, { "epoch": 2.884858044164038, "grad_norm": 0.27140942215919495, "learning_rate": 4.480913771176654e-08, "loss": 0.2817, "step": 20119 }, { "epoch": 2.885001433897333, "grad_norm": 0.2565053403377533, "learning_rate": 4.469776935268566e-08, "loss": 0.2786, "step": 20120 }, { "epoch": 2.885144823630628, "grad_norm": 0.27570241689682007, "learning_rate": 4.4586538941595036e-08, "loss": 0.2826, "step": 20121 }, { "epoch": 2.885288213363923, "grad_norm": 0.26476529240608215, "learning_rate": 4.44754464815933e-08, "loss": 0.2749, "step": 20122 }, { "epoch": 2.885431603097218, "grad_norm": 0.2818916440010071, "learning_rate": 4.436449197577131e-08, "loss": 0.294, "step": 20123 }, { "epoch": 2.8855749928305134, "grad_norm": 0.2659134864807129, "learning_rate": 4.4253675427218814e-08, "loss": 0.2803, "step": 20124 }, { "epoch": 2.8857183825638084, "grad_norm": 0.2541263699531555, "learning_rate": 4.4142996839021126e-08, "loss": 0.2757, "step": 20125 }, { "epoch": 2.8858617722971034, "grad_norm": 0.27677232027053833, "learning_rate": 4.403245621425856e-08, "loss": 0.2665, "step": 20126 }, { "epoch": 2.886005162030399, "grad_norm": 0.2662046551704407, "learning_rate": 4.392205355600865e-08, "loss": 0.2832, "step": 20127 }, { "epoch": 2.886148551763694, "grad_norm": 0.2747907340526581, "learning_rate": 4.381178886734505e-08, "loss": 0.2949, "step": 20128 }, { "epoch": 2.8862919414969888, "grad_norm": 0.2656216025352478, "learning_rate": 4.3701662151336976e-08, "loss": 0.2952, "step": 20129 }, { "epoch": 2.8864353312302837, "grad_norm": 0.26344743371009827, "learning_rate": 4.3591673411050864e-08, "loss": 0.2619, "step": 20130 }, { "epoch": 2.886578720963579, "grad_norm": 0.26608458161354065, "learning_rate": 4.348182264954759e-08, "loss": 0.2718, "step": 20131 }, { "epoch": 2.886722110696874, "grad_norm": 0.27249404788017273, "learning_rate": 4.337210986988583e-08, "loss": 0.271, "step": 20132 }, { "epoch": 2.886865500430169, "grad_norm": 0.2739335298538208, "learning_rate": 4.326253507512035e-08, "loss": 0.2823, "step": 20133 }, { "epoch": 2.8870088901634645, "grad_norm": 0.27089816331863403, "learning_rate": 4.315309826830039e-08, "loss": 0.2794, "step": 20134 }, { "epoch": 2.8871522798967595, "grad_norm": 0.25432437658309937, "learning_rate": 4.30437994524735e-08, "loss": 0.2658, "step": 20135 }, { "epoch": 2.8872956696300545, "grad_norm": 0.25797805190086365, "learning_rate": 4.2934638630682254e-08, "loss": 0.2717, "step": 20136 }, { "epoch": 2.8874390593633494, "grad_norm": 0.2718616724014282, "learning_rate": 4.282561580596478e-08, "loss": 0.283, "step": 20137 }, { "epoch": 2.8875824490966444, "grad_norm": 0.2679327726364136, "learning_rate": 4.271673098135698e-08, "loss": 0.3096, "step": 20138 }, { "epoch": 2.88772583882994, "grad_norm": 0.2805738151073456, "learning_rate": 4.26079841598892e-08, "loss": 0.2754, "step": 20139 }, { "epoch": 2.887869228563235, "grad_norm": 0.26417863368988037, "learning_rate": 4.2499375344589586e-08, "loss": 0.2786, "step": 20140 }, { "epoch": 2.88801261829653, "grad_norm": 0.2666057050228119, "learning_rate": 4.239090453848127e-08, "loss": 0.2757, "step": 20141 }, { "epoch": 2.888156008029825, "grad_norm": 0.2692135274410248, "learning_rate": 4.2282571744584055e-08, "loss": 0.277, "step": 20142 }, { "epoch": 2.88829939776312, "grad_norm": 0.2680499851703644, "learning_rate": 4.217437696591331e-08, "loss": 0.3114, "step": 20143 }, { "epoch": 2.888442787496415, "grad_norm": 0.26075929403305054, "learning_rate": 4.206632020548163e-08, "loss": 0.2716, "step": 20144 }, { "epoch": 2.88858617722971, "grad_norm": 0.24829496443271637, "learning_rate": 4.19584014662966e-08, "loss": 0.2631, "step": 20145 }, { "epoch": 2.8887295669630055, "grad_norm": 0.27052828669548035, "learning_rate": 4.185062075136304e-08, "loss": 0.2738, "step": 20146 }, { "epoch": 2.8888729566963005, "grad_norm": 0.2639271020889282, "learning_rate": 4.1742978063680776e-08, "loss": 0.2777, "step": 20147 }, { "epoch": 2.8890163464295955, "grad_norm": 0.25649335980415344, "learning_rate": 4.163547340624741e-08, "loss": 0.2679, "step": 20148 }, { "epoch": 2.889159736162891, "grad_norm": 0.26159417629241943, "learning_rate": 4.1528106782054436e-08, "loss": 0.2715, "step": 20149 }, { "epoch": 2.889303125896186, "grad_norm": 0.2602561116218567, "learning_rate": 4.142087819409168e-08, "loss": 0.2887, "step": 20150 }, { "epoch": 2.889446515629481, "grad_norm": 0.25937068462371826, "learning_rate": 4.1313787645344e-08, "loss": 0.2859, "step": 20151 }, { "epoch": 2.889589905362776, "grad_norm": 0.2764487862586975, "learning_rate": 4.1206835138792314e-08, "loss": 0.2818, "step": 20152 }, { "epoch": 2.8897332950960712, "grad_norm": 0.25579649209976196, "learning_rate": 4.110002067741481e-08, "loss": 0.2747, "step": 20153 }, { "epoch": 2.889876684829366, "grad_norm": 0.2719323933124542, "learning_rate": 4.099334426418411e-08, "loss": 0.2863, "step": 20154 }, { "epoch": 2.890020074562661, "grad_norm": 0.2678041160106659, "learning_rate": 4.088680590207006e-08, "loss": 0.2962, "step": 20155 }, { "epoch": 2.8901634642959566, "grad_norm": 0.27582406997680664, "learning_rate": 4.078040559403917e-08, "loss": 0.2722, "step": 20156 }, { "epoch": 2.8903068540292516, "grad_norm": 0.26981720328330994, "learning_rate": 4.0674143343052976e-08, "loss": 0.2738, "step": 20157 }, { "epoch": 2.8904502437625466, "grad_norm": 0.2698763608932495, "learning_rate": 4.0568019152070206e-08, "loss": 0.2725, "step": 20158 }, { "epoch": 2.8905936334958415, "grad_norm": 0.2681955099105835, "learning_rate": 4.0462033024044614e-08, "loss": 0.286, "step": 20159 }, { "epoch": 2.890737023229137, "grad_norm": 0.29205405712127686, "learning_rate": 4.035618496192606e-08, "loss": 0.2799, "step": 20160 }, { "epoch": 2.890880412962432, "grad_norm": 0.26916030049324036, "learning_rate": 4.025047496866275e-08, "loss": 0.2678, "step": 20161 }, { "epoch": 2.891023802695727, "grad_norm": 0.2814934253692627, "learning_rate": 4.0144903047196205e-08, "loss": 0.2513, "step": 20162 }, { "epoch": 2.8911671924290223, "grad_norm": 0.2771618664264679, "learning_rate": 4.003946920046631e-08, "loss": 0.2794, "step": 20163 }, { "epoch": 2.8913105821623173, "grad_norm": 0.2558443248271942, "learning_rate": 3.993417343140738e-08, "loss": 0.2795, "step": 20164 }, { "epoch": 2.8914539718956123, "grad_norm": 0.253566712141037, "learning_rate": 3.9829015742951504e-08, "loss": 0.2651, "step": 20165 }, { "epoch": 2.8915973616289072, "grad_norm": 0.2565609812736511, "learning_rate": 3.972399613802525e-08, "loss": 0.2584, "step": 20166 }, { "epoch": 2.891740751362202, "grad_norm": 0.24658933281898499, "learning_rate": 3.9619114619552365e-08, "loss": 0.2804, "step": 20167 }, { "epoch": 2.8918841410954976, "grad_norm": 0.2665776312351227, "learning_rate": 3.951437119045276e-08, "loss": 0.2827, "step": 20168 }, { "epoch": 2.8920275308287926, "grad_norm": 0.2681901454925537, "learning_rate": 3.9409765853642975e-08, "loss": 0.2707, "step": 20169 }, { "epoch": 2.892170920562088, "grad_norm": 0.2770026922225952, "learning_rate": 3.9305298612034584e-08, "loss": 0.288, "step": 20170 }, { "epoch": 2.892314310295383, "grad_norm": 0.2883436977863312, "learning_rate": 3.9200969468535244e-08, "loss": 0.2669, "step": 20171 }, { "epoch": 2.892457700028678, "grad_norm": 0.2622842788696289, "learning_rate": 3.909677842604986e-08, "loss": 0.277, "step": 20172 }, { "epoch": 2.892601089761973, "grad_norm": 0.26517292857170105, "learning_rate": 3.89927254874789e-08, "loss": 0.2629, "step": 20173 }, { "epoch": 2.892744479495268, "grad_norm": 0.2635740339756012, "learning_rate": 3.8888810655719476e-08, "loss": 0.2949, "step": 20174 }, { "epoch": 2.8928878692285633, "grad_norm": 0.2581964433193207, "learning_rate": 3.878503393366373e-08, "loss": 0.2753, "step": 20175 }, { "epoch": 2.8930312589618583, "grad_norm": 0.2867591679096222, "learning_rate": 3.8681395324200455e-08, "loss": 0.2775, "step": 20176 }, { "epoch": 2.8931746486951533, "grad_norm": 0.2867612838745117, "learning_rate": 3.8577894830215124e-08, "loss": 0.2865, "step": 20177 }, { "epoch": 2.8933180384284487, "grad_norm": 0.2906615138053894, "learning_rate": 3.847453245458987e-08, "loss": 0.2627, "step": 20178 }, { "epoch": 2.8934614281617437, "grad_norm": 0.2800901532173157, "learning_rate": 3.837130820020074e-08, "loss": 0.2583, "step": 20179 }, { "epoch": 2.8936048178950387, "grad_norm": 0.26171064376831055, "learning_rate": 3.826822206992209e-08, "loss": 0.2781, "step": 20180 }, { "epoch": 2.8937482076283336, "grad_norm": 0.2662305235862732, "learning_rate": 3.8165274066623846e-08, "loss": 0.2625, "step": 20181 }, { "epoch": 2.893891597361629, "grad_norm": 0.256673127412796, "learning_rate": 3.80624641931715e-08, "loss": 0.2705, "step": 20182 }, { "epoch": 2.894034987094924, "grad_norm": 0.24859197437763214, "learning_rate": 3.7959792452426645e-08, "loss": 0.2662, "step": 20183 }, { "epoch": 2.894178376828219, "grad_norm": 0.2679343819618225, "learning_rate": 3.7857258847248665e-08, "loss": 0.2859, "step": 20184 }, { "epoch": 2.8943217665615144, "grad_norm": 0.25673070549964905, "learning_rate": 3.775486338049139e-08, "loss": 0.2751, "step": 20185 }, { "epoch": 2.8944651562948094, "grad_norm": 0.25960370898246765, "learning_rate": 3.7652606055005314e-08, "loss": 0.2798, "step": 20186 }, { "epoch": 2.8946085460281044, "grad_norm": 0.2639063894748688, "learning_rate": 3.7550486873637046e-08, "loss": 0.3005, "step": 20187 }, { "epoch": 2.8947519357613993, "grad_norm": 0.25116026401519775, "learning_rate": 3.744850583922988e-08, "loss": 0.268, "step": 20188 }, { "epoch": 2.8948953254946943, "grad_norm": 0.26485374569892883, "learning_rate": 3.734666295462208e-08, "loss": 0.2894, "step": 20189 }, { "epoch": 2.8950387152279897, "grad_norm": 0.28168177604675293, "learning_rate": 3.7244958222649175e-08, "loss": 0.278, "step": 20190 }, { "epoch": 2.8951821049612847, "grad_norm": 0.26776123046875, "learning_rate": 3.7143391646143334e-08, "loss": 0.2861, "step": 20191 }, { "epoch": 2.89532549469458, "grad_norm": 0.27691203355789185, "learning_rate": 3.704196322793063e-08, "loss": 0.2752, "step": 20192 }, { "epoch": 2.895468884427875, "grad_norm": 0.25539296865463257, "learning_rate": 3.694067297083492e-08, "loss": 0.2854, "step": 20193 }, { "epoch": 2.89561227416117, "grad_norm": 0.2517138421535492, "learning_rate": 3.6839520877676174e-08, "loss": 0.2811, "step": 20194 }, { "epoch": 2.895755663894465, "grad_norm": 0.27440640330314636, "learning_rate": 3.6738506951271016e-08, "loss": 0.2952, "step": 20195 }, { "epoch": 2.89589905362776, "grad_norm": 0.27649030089378357, "learning_rate": 3.6637631194430535e-08, "loss": 0.2813, "step": 20196 }, { "epoch": 2.8960424433610554, "grad_norm": 0.2688305377960205, "learning_rate": 3.6536893609963044e-08, "loss": 0.2771, "step": 20197 }, { "epoch": 2.8961858330943504, "grad_norm": 0.27277684211730957, "learning_rate": 3.643629420067352e-08, "loss": 0.2902, "step": 20198 }, { "epoch": 2.8963292228276454, "grad_norm": 0.2990667521953583, "learning_rate": 3.633583296936249e-08, "loss": 0.2816, "step": 20199 }, { "epoch": 2.896472612560941, "grad_norm": 0.27059704065322876, "learning_rate": 3.623550991882552e-08, "loss": 0.306, "step": 20200 }, { "epoch": 2.8966160022942358, "grad_norm": 0.26353350281715393, "learning_rate": 3.613532505185702e-08, "loss": 0.2829, "step": 20201 }, { "epoch": 2.8967593920275307, "grad_norm": 0.25047603249549866, "learning_rate": 3.603527837124476e-08, "loss": 0.2785, "step": 20202 }, { "epoch": 2.8969027817608257, "grad_norm": 0.24012961983680725, "learning_rate": 3.5935369879774303e-08, "loss": 0.2869, "step": 20203 }, { "epoch": 2.897046171494121, "grad_norm": 0.2821182310581207, "learning_rate": 3.583559958022731e-08, "loss": 0.2671, "step": 20204 }, { "epoch": 2.897189561227416, "grad_norm": 0.2916445732116699, "learning_rate": 3.573596747538044e-08, "loss": 0.2822, "step": 20205 }, { "epoch": 2.897332950960711, "grad_norm": 0.2780061960220337, "learning_rate": 3.563647356800814e-08, "loss": 0.2704, "step": 20206 }, { "epoch": 2.8974763406940065, "grad_norm": 0.2875034213066101, "learning_rate": 3.5537117860879875e-08, "loss": 0.2719, "step": 20207 }, { "epoch": 2.8976197304273015, "grad_norm": 0.2623744606971741, "learning_rate": 3.543790035676176e-08, "loss": 0.2689, "step": 20208 }, { "epoch": 2.8977631201605965, "grad_norm": 0.27671679854393005, "learning_rate": 3.533882105841491e-08, "loss": 0.2818, "step": 20209 }, { "epoch": 2.8979065098938914, "grad_norm": 0.2731183171272278, "learning_rate": 3.523987996859824e-08, "loss": 0.2878, "step": 20210 }, { "epoch": 2.898049899627187, "grad_norm": 0.2735831141471863, "learning_rate": 3.51410770900662e-08, "loss": 0.2851, "step": 20211 }, { "epoch": 2.898193289360482, "grad_norm": 0.2757660150527954, "learning_rate": 3.5042412425569936e-08, "loss": 0.2864, "step": 20212 }, { "epoch": 2.898336679093777, "grad_norm": 0.2537131607532501, "learning_rate": 3.494388597785448e-08, "loss": 0.3025, "step": 20213 }, { "epoch": 2.898480068827072, "grad_norm": 0.2722790539264679, "learning_rate": 3.4845497749663726e-08, "loss": 0.2882, "step": 20214 }, { "epoch": 2.898623458560367, "grad_norm": 0.26343849301338196, "learning_rate": 3.4747247743736614e-08, "loss": 0.2749, "step": 20215 }, { "epoch": 2.898766848293662, "grad_norm": 0.26169002056121826, "learning_rate": 3.4649135962808165e-08, "loss": 0.2992, "step": 20216 }, { "epoch": 2.898910238026957, "grad_norm": 0.2835831344127655, "learning_rate": 3.455116240960954e-08, "loss": 0.2877, "step": 20217 }, { "epoch": 2.899053627760252, "grad_norm": 0.2739831507205963, "learning_rate": 3.445332708686799e-08, "loss": 0.2913, "step": 20218 }, { "epoch": 2.8991970174935475, "grad_norm": 0.27410122752189636, "learning_rate": 3.435562999730802e-08, "loss": 0.2666, "step": 20219 }, { "epoch": 2.8993404072268425, "grad_norm": 0.2849012315273285, "learning_rate": 3.425807114364799e-08, "loss": 0.2778, "step": 20220 }, { "epoch": 2.899483796960138, "grad_norm": 0.27358806133270264, "learning_rate": 3.4160650528605176e-08, "loss": 0.2789, "step": 20221 }, { "epoch": 2.899627186693433, "grad_norm": 0.26303166151046753, "learning_rate": 3.4063368154890195e-08, "loss": 0.2869, "step": 20222 }, { "epoch": 2.899770576426728, "grad_norm": 0.25052306056022644, "learning_rate": 3.3966224025212544e-08, "loss": 0.2739, "step": 20223 }, { "epoch": 2.899913966160023, "grad_norm": 0.26379215717315674, "learning_rate": 3.386921814227617e-08, "loss": 0.2819, "step": 20224 }, { "epoch": 2.900057355893318, "grad_norm": 0.2807336747646332, "learning_rate": 3.377235050878114e-08, "loss": 0.2832, "step": 20225 }, { "epoch": 2.9002007456266132, "grad_norm": 0.2674909830093384, "learning_rate": 3.3675621127424174e-08, "loss": 0.2912, "step": 20226 }, { "epoch": 2.900344135359908, "grad_norm": 0.26123449206352234, "learning_rate": 3.3579030000898684e-08, "loss": 0.2817, "step": 20227 }, { "epoch": 2.900487525093203, "grad_norm": 0.2680545449256897, "learning_rate": 3.3482577131893066e-08, "loss": 0.2901, "step": 20228 }, { "epoch": 2.9006309148264986, "grad_norm": 0.25764381885528564, "learning_rate": 3.3386262523092406e-08, "loss": 0.2893, "step": 20229 }, { "epoch": 2.9007743045597936, "grad_norm": 0.26823166012763977, "learning_rate": 3.329008617717844e-08, "loss": 0.2717, "step": 20230 }, { "epoch": 2.9009176942930885, "grad_norm": 0.25856074690818787, "learning_rate": 3.319404809682791e-08, "loss": 0.3061, "step": 20231 }, { "epoch": 2.9010610840263835, "grad_norm": 0.2642052471637726, "learning_rate": 3.30981482847148e-08, "loss": 0.27, "step": 20232 }, { "epoch": 2.901204473759679, "grad_norm": 0.2754359841346741, "learning_rate": 3.3002386743508644e-08, "loss": 0.2565, "step": 20233 }, { "epoch": 2.901347863492974, "grad_norm": 0.2553596496582031, "learning_rate": 3.290676347587562e-08, "loss": 0.2868, "step": 20234 }, { "epoch": 2.901491253226269, "grad_norm": 0.26906803250312805, "learning_rate": 3.281127848447752e-08, "loss": 0.2826, "step": 20235 }, { "epoch": 2.9016346429595643, "grad_norm": 0.2653031647205353, "learning_rate": 3.2715931771972184e-08, "loss": 0.281, "step": 20236 }, { "epoch": 2.9017780326928593, "grad_norm": 0.26724135875701904, "learning_rate": 3.262072334101418e-08, "loss": 0.2687, "step": 20237 }, { "epoch": 2.9019214224261543, "grad_norm": 0.26756834983825684, "learning_rate": 3.252565319425416e-08, "loss": 0.2712, "step": 20238 }, { "epoch": 2.9020648121594492, "grad_norm": 0.2669629454612732, "learning_rate": 3.243072133433833e-08, "loss": 0.2752, "step": 20239 }, { "epoch": 2.9022082018927446, "grad_norm": 0.2781514823436737, "learning_rate": 3.233592776391015e-08, "loss": 0.2704, "step": 20240 }, { "epoch": 2.9023515916260396, "grad_norm": 0.2751765251159668, "learning_rate": 3.224127248560749e-08, "loss": 0.2606, "step": 20241 }, { "epoch": 2.9024949813593346, "grad_norm": 0.27812501788139343, "learning_rate": 3.214675550206603e-08, "loss": 0.2647, "step": 20242 }, { "epoch": 2.90263837109263, "grad_norm": 0.24887511134147644, "learning_rate": 3.2052376815916996e-08, "loss": 0.2768, "step": 20243 }, { "epoch": 2.902781760825925, "grad_norm": 0.2776776850223541, "learning_rate": 3.195813642978773e-08, "loss": 0.2678, "step": 20244 }, { "epoch": 2.90292515055922, "grad_norm": 0.24879997968673706, "learning_rate": 3.1864034346301696e-08, "loss": 0.2572, "step": 20245 }, { "epoch": 2.903068540292515, "grad_norm": 0.2602695822715759, "learning_rate": 3.177007056807846e-08, "loss": 0.2668, "step": 20246 }, { "epoch": 2.90321193002581, "grad_norm": 0.26622816920280457, "learning_rate": 3.167624509773426e-08, "loss": 0.2626, "step": 20247 }, { "epoch": 2.9033553197591053, "grad_norm": 0.281203955411911, "learning_rate": 3.158255793788034e-08, "loss": 0.2775, "step": 20248 }, { "epoch": 2.9034987094924003, "grad_norm": 0.2645762860774994, "learning_rate": 3.148900909112462e-08, "loss": 0.2722, "step": 20249 }, { "epoch": 2.9036420992256957, "grad_norm": 0.2850387394428253, "learning_rate": 3.139559856007279e-08, "loss": 0.2789, "step": 20250 }, { "epoch": 2.9037854889589907, "grad_norm": 0.28706127405166626, "learning_rate": 3.1302326347323884e-08, "loss": 0.2764, "step": 20251 }, { "epoch": 2.9039288786922857, "grad_norm": 0.2698863446712494, "learning_rate": 3.1209192455475265e-08, "loss": 0.2691, "step": 20252 }, { "epoch": 2.9040722684255806, "grad_norm": 0.25424543023109436, "learning_rate": 3.111619688711876e-08, "loss": 0.2783, "step": 20253 }, { "epoch": 2.9042156581588756, "grad_norm": 0.2532273828983307, "learning_rate": 3.1023339644843944e-08, "loss": 0.2933, "step": 20254 }, { "epoch": 2.904359047892171, "grad_norm": 0.2702436149120331, "learning_rate": 3.0930620731235985e-08, "loss": 0.3053, "step": 20255 }, { "epoch": 2.904502437625466, "grad_norm": 0.2574457824230194, "learning_rate": 3.083804014887559e-08, "loss": 0.2906, "step": 20256 }, { "epoch": 2.904645827358761, "grad_norm": 0.2566247284412384, "learning_rate": 3.07455979003396e-08, "loss": 0.2751, "step": 20257 }, { "epoch": 2.9047892170920564, "grad_norm": 0.2577296495437622, "learning_rate": 3.0653293988202046e-08, "loss": 0.2975, "step": 20258 }, { "epoch": 2.9049326068253514, "grad_norm": 0.26974430680274963, "learning_rate": 3.0561128415033114e-08, "loss": 0.2697, "step": 20259 }, { "epoch": 2.9050759965586463, "grad_norm": 0.2539215385913849, "learning_rate": 3.046910118339741e-08, "loss": 0.2793, "step": 20260 }, { "epoch": 2.9052193862919413, "grad_norm": 0.2746407985687256, "learning_rate": 3.0377212295857884e-08, "loss": 0.264, "step": 20261 }, { "epoch": 2.9053627760252367, "grad_norm": 0.2482326477766037, "learning_rate": 3.0285461754971935e-08, "loss": 0.253, "step": 20262 }, { "epoch": 2.9055061657585317, "grad_norm": 0.2695566415786743, "learning_rate": 3.019384956329363e-08, "loss": 0.2824, "step": 20263 }, { "epoch": 2.9056495554918267, "grad_norm": 0.2712329626083374, "learning_rate": 3.010237572337371e-08, "loss": 0.2728, "step": 20264 }, { "epoch": 2.905792945225122, "grad_norm": 0.2814684808254242, "learning_rate": 3.001104023775847e-08, "loss": 0.2858, "step": 20265 }, { "epoch": 2.905936334958417, "grad_norm": 0.2695872485637665, "learning_rate": 2.991984310899088e-08, "loss": 0.266, "step": 20266 }, { "epoch": 2.906079724691712, "grad_norm": 0.24555915594100952, "learning_rate": 2.982878433960889e-08, "loss": 0.2702, "step": 20267 }, { "epoch": 2.906223114425007, "grad_norm": 0.28698745369911194, "learning_rate": 2.973786393214828e-08, "loss": 0.2716, "step": 20268 }, { "epoch": 2.906366504158302, "grad_norm": 0.25572749972343445, "learning_rate": 2.9647081889139782e-08, "loss": 0.2862, "step": 20269 }, { "epoch": 2.9065098938915974, "grad_norm": 0.29516443610191345, "learning_rate": 2.9556438213110827e-08, "loss": 0.2978, "step": 20270 }, { "epoch": 2.9066532836248924, "grad_norm": 0.2827273905277252, "learning_rate": 2.9465932906584395e-08, "loss": 0.2851, "step": 20271 }, { "epoch": 2.906796673358188, "grad_norm": 0.28816384077072144, "learning_rate": 2.9375565972080687e-08, "loss": 0.2831, "step": 20272 }, { "epoch": 2.906940063091483, "grad_norm": 0.25587233901023865, "learning_rate": 2.9285337412114367e-08, "loss": 0.2784, "step": 20273 }, { "epoch": 2.9070834528247778, "grad_norm": 0.2839242219924927, "learning_rate": 2.919524722919842e-08, "loss": 0.265, "step": 20274 }, { "epoch": 2.9072268425580727, "grad_norm": 0.2974476218223572, "learning_rate": 2.9105295425840285e-08, "loss": 0.2613, "step": 20275 }, { "epoch": 2.9073702322913677, "grad_norm": 0.2635379135608673, "learning_rate": 2.9015482004543515e-08, "loss": 0.2776, "step": 20276 }, { "epoch": 2.907513622024663, "grad_norm": 0.2768472135066986, "learning_rate": 2.8925806967809444e-08, "loss": 0.2807, "step": 20277 }, { "epoch": 2.907657011757958, "grad_norm": 0.2787726819515228, "learning_rate": 2.8836270318133297e-08, "loss": 0.2555, "step": 20278 }, { "epoch": 2.907800401491253, "grad_norm": 0.26859232783317566, "learning_rate": 2.874687205800919e-08, "loss": 0.2893, "step": 20279 }, { "epoch": 2.9079437912245485, "grad_norm": 0.2763144373893738, "learning_rate": 2.8657612189924023e-08, "loss": 0.29, "step": 20280 }, { "epoch": 2.9080871809578435, "grad_norm": 0.2791830003261566, "learning_rate": 2.8568490716364138e-08, "loss": 0.3038, "step": 20281 }, { "epoch": 2.9082305706911384, "grad_norm": 0.2676641345024109, "learning_rate": 2.8479507639810333e-08, "loss": 0.2834, "step": 20282 }, { "epoch": 2.9083739604244334, "grad_norm": 0.2819864749908447, "learning_rate": 2.839066296273896e-08, "loss": 0.2707, "step": 20283 }, { "epoch": 2.908517350157729, "grad_norm": 0.28256160020828247, "learning_rate": 2.830195668762359e-08, "loss": 0.2794, "step": 20284 }, { "epoch": 2.908660739891024, "grad_norm": 0.2654724717140198, "learning_rate": 2.8213388816933917e-08, "loss": 0.2678, "step": 20285 }, { "epoch": 2.908804129624319, "grad_norm": 0.26746413111686707, "learning_rate": 2.812495935313575e-08, "loss": 0.2934, "step": 20286 }, { "epoch": 2.908947519357614, "grad_norm": 0.2722669243812561, "learning_rate": 2.8036668298689895e-08, "loss": 0.2752, "step": 20287 }, { "epoch": 2.909090909090909, "grad_norm": 0.2572758197784424, "learning_rate": 2.7948515656055498e-08, "loss": 0.2699, "step": 20288 }, { "epoch": 2.909234298824204, "grad_norm": 0.2584381103515625, "learning_rate": 2.786050142768615e-08, "loss": 0.2997, "step": 20289 }, { "epoch": 2.909377688557499, "grad_norm": 0.27956509590148926, "learning_rate": 2.7772625616031557e-08, "loss": 0.2759, "step": 20290 }, { "epoch": 2.9095210782907945, "grad_norm": 0.266169011592865, "learning_rate": 2.7684888223538096e-08, "loss": 0.2842, "step": 20291 }, { "epoch": 2.9096644680240895, "grad_norm": 0.271078884601593, "learning_rate": 2.7597289252648817e-08, "loss": 0.2801, "step": 20292 }, { "epoch": 2.9098078577573845, "grad_norm": 0.264137864112854, "learning_rate": 2.7509828705801768e-08, "loss": 0.2777, "step": 20293 }, { "epoch": 2.90995124749068, "grad_norm": 0.28057220578193665, "learning_rate": 2.742250658543222e-08, "loss": 0.295, "step": 20294 }, { "epoch": 2.910094637223975, "grad_norm": 0.27819541096687317, "learning_rate": 2.733532289397045e-08, "loss": 0.2746, "step": 20295 }, { "epoch": 2.91023802695727, "grad_norm": 0.278594046831131, "learning_rate": 2.724827763384397e-08, "loss": 0.2643, "step": 20296 }, { "epoch": 2.910381416690565, "grad_norm": 0.2689342200756073, "learning_rate": 2.7161370807476383e-08, "loss": 0.2803, "step": 20297 }, { "epoch": 2.91052480642386, "grad_norm": 0.2752261459827423, "learning_rate": 2.7074602417286323e-08, "loss": 0.2891, "step": 20298 }, { "epoch": 2.910668196157155, "grad_norm": 0.26725757122039795, "learning_rate": 2.6987972465689626e-08, "loss": 0.3032, "step": 20299 }, { "epoch": 2.91081158589045, "grad_norm": 0.26556894183158875, "learning_rate": 2.6901480955097703e-08, "loss": 0.2846, "step": 20300 }, { "epoch": 2.9109549756237456, "grad_norm": 0.292755126953125, "learning_rate": 2.6815127887918625e-08, "loss": 0.28, "step": 20301 }, { "epoch": 2.9110983653570406, "grad_norm": 0.2631252408027649, "learning_rate": 2.6728913266556024e-08, "loss": 0.2904, "step": 20302 }, { "epoch": 2.9112417550903356, "grad_norm": 0.2670404016971588, "learning_rate": 2.66428370934102e-08, "loss": 0.2827, "step": 20303 }, { "epoch": 2.9113851448236305, "grad_norm": 0.2751834988594055, "learning_rate": 2.6556899370877576e-08, "loss": 0.2689, "step": 20304 }, { "epoch": 2.9115285345569255, "grad_norm": 0.2682552635669708, "learning_rate": 2.6471100101349568e-08, "loss": 0.2674, "step": 20305 }, { "epoch": 2.911671924290221, "grad_norm": 0.25801223516464233, "learning_rate": 2.638543928721593e-08, "loss": 0.275, "step": 20306 }, { "epoch": 2.911815314023516, "grad_norm": 0.26445457339286804, "learning_rate": 2.629991693086087e-08, "loss": 0.2953, "step": 20307 }, { "epoch": 2.911958703756811, "grad_norm": 0.2412932813167572, "learning_rate": 2.6214533034665257e-08, "loss": 0.2821, "step": 20308 }, { "epoch": 2.9121020934901063, "grad_norm": 0.2745482623577118, "learning_rate": 2.612928760100608e-08, "loss": 0.2847, "step": 20309 }, { "epoch": 2.9122454832234013, "grad_norm": 0.2601633667945862, "learning_rate": 2.6044180632255888e-08, "loss": 0.2739, "step": 20310 }, { "epoch": 2.9123888729566962, "grad_norm": 0.2647874057292938, "learning_rate": 2.5959212130783895e-08, "loss": 0.2763, "step": 20311 }, { "epoch": 2.912532262689991, "grad_norm": 0.2825515568256378, "learning_rate": 2.587438209895654e-08, "loss": 0.294, "step": 20312 }, { "epoch": 2.9126756524232866, "grad_norm": 0.26980090141296387, "learning_rate": 2.578969053913416e-08, "loss": 0.2751, "step": 20313 }, { "epoch": 2.9128190421565816, "grad_norm": 0.2621223032474518, "learning_rate": 2.5705137453675977e-08, "loss": 0.2803, "step": 20314 }, { "epoch": 2.9129624318898766, "grad_norm": 0.2523614168167114, "learning_rate": 2.5620722844934554e-08, "loss": 0.2996, "step": 20315 }, { "epoch": 2.913105821623172, "grad_norm": 0.26419684290885925, "learning_rate": 2.553644671525968e-08, "loss": 0.2763, "step": 20316 }, { "epoch": 2.913249211356467, "grad_norm": 0.2534802258014679, "learning_rate": 2.545230906699836e-08, "loss": 0.2927, "step": 20317 }, { "epoch": 2.913392601089762, "grad_norm": 0.25561991333961487, "learning_rate": 2.536830990249206e-08, "loss": 0.2813, "step": 20318 }, { "epoch": 2.913535990823057, "grad_norm": 0.25334906578063965, "learning_rate": 2.5284449224080023e-08, "loss": 0.2672, "step": 20319 }, { "epoch": 2.913679380556352, "grad_norm": 0.2905609607696533, "learning_rate": 2.5200727034096485e-08, "loss": 0.2706, "step": 20320 }, { "epoch": 2.9138227702896473, "grad_norm": 0.26358872652053833, "learning_rate": 2.5117143334871807e-08, "loss": 0.3039, "step": 20321 }, { "epoch": 2.9139661600229423, "grad_norm": 0.2720035910606384, "learning_rate": 2.503369812873302e-08, "loss": 0.2848, "step": 20322 }, { "epoch": 2.9141095497562377, "grad_norm": 0.27740907669067383, "learning_rate": 2.4950391418003263e-08, "loss": 0.2669, "step": 20323 }, { "epoch": 2.9142529394895327, "grad_norm": 0.25617021322250366, "learning_rate": 2.4867223205001788e-08, "loss": 0.2727, "step": 20324 }, { "epoch": 2.9143963292228277, "grad_norm": 0.2795923352241516, "learning_rate": 2.4784193492042863e-08, "loss": 0.2885, "step": 20325 }, { "epoch": 2.9145397189561226, "grad_norm": 0.2582430839538574, "learning_rate": 2.470130228143963e-08, "loss": 0.2771, "step": 20326 }, { "epoch": 2.9146831086894176, "grad_norm": 0.27423498034477234, "learning_rate": 2.461854957549803e-08, "loss": 0.2602, "step": 20327 }, { "epoch": 2.914826498422713, "grad_norm": 0.27043431997299194, "learning_rate": 2.453593537652288e-08, "loss": 0.2815, "step": 20328 }, { "epoch": 2.914969888156008, "grad_norm": 0.29613298177719116, "learning_rate": 2.4453459686812898e-08, "loss": 0.2774, "step": 20329 }, { "epoch": 2.915113277889303, "grad_norm": 0.26976278424263, "learning_rate": 2.4371122508665135e-08, "loss": 0.2771, "step": 20330 }, { "epoch": 2.9152566676225984, "grad_norm": 0.26645126938819885, "learning_rate": 2.4288923844371648e-08, "loss": 0.2865, "step": 20331 }, { "epoch": 2.9154000573558934, "grad_norm": 0.27454015612602234, "learning_rate": 2.420686369622005e-08, "loss": 0.2643, "step": 20332 }, { "epoch": 2.9155434470891883, "grad_norm": 0.26792049407958984, "learning_rate": 2.4124942066495184e-08, "loss": 0.2859, "step": 20333 }, { "epoch": 2.9156868368224833, "grad_norm": 0.2744520306587219, "learning_rate": 2.4043158957477996e-08, "loss": 0.3, "step": 20334 }, { "epoch": 2.9158302265557787, "grad_norm": 0.2716742157936096, "learning_rate": 2.396151437144445e-08, "loss": 0.2775, "step": 20335 }, { "epoch": 2.9159736162890737, "grad_norm": 0.2765602767467499, "learning_rate": 2.3880008310668278e-08, "loss": 0.2826, "step": 20336 }, { "epoch": 2.9161170060223687, "grad_norm": 0.2700536847114563, "learning_rate": 2.379864077741767e-08, "loss": 0.2946, "step": 20337 }, { "epoch": 2.916260395755664, "grad_norm": 0.26252833008766174, "learning_rate": 2.3717411773957478e-08, "loss": 0.2764, "step": 20338 }, { "epoch": 2.916403785488959, "grad_norm": 0.28068211674690247, "learning_rate": 2.3636321302550336e-08, "loss": 0.2718, "step": 20339 }, { "epoch": 2.916547175222254, "grad_norm": 0.2721122205257416, "learning_rate": 2.3555369365452775e-08, "loss": 0.2983, "step": 20340 }, { "epoch": 2.916690564955549, "grad_norm": 0.2743639051914215, "learning_rate": 2.3474555964917987e-08, "loss": 0.2835, "step": 20341 }, { "epoch": 2.9168339546888444, "grad_norm": 0.24600766599178314, "learning_rate": 2.3393881103196958e-08, "loss": 0.301, "step": 20342 }, { "epoch": 2.9169773444221394, "grad_norm": 0.27064797282218933, "learning_rate": 2.3313344782534554e-08, "loss": 0.2939, "step": 20343 }, { "epoch": 2.9171207341554344, "grad_norm": 0.2687526047229767, "learning_rate": 2.3232947005172867e-08, "loss": 0.2786, "step": 20344 }, { "epoch": 2.91726412388873, "grad_norm": 0.27015814185142517, "learning_rate": 2.3152687773350668e-08, "loss": 0.2837, "step": 20345 }, { "epoch": 2.9174075136220248, "grad_norm": 0.24720776081085205, "learning_rate": 2.3072567089301723e-08, "loss": 0.2725, "step": 20346 }, { "epoch": 2.9175509033553197, "grad_norm": 0.28525251150131226, "learning_rate": 2.2992584955255914e-08, "loss": 0.2785, "step": 20347 }, { "epoch": 2.9176942930886147, "grad_norm": 0.25629860162734985, "learning_rate": 2.2912741373440906e-08, "loss": 0.2993, "step": 20348 }, { "epoch": 2.9178376828219097, "grad_norm": 0.2529173493385315, "learning_rate": 2.2833036346079363e-08, "loss": 0.2716, "step": 20349 }, { "epoch": 2.917981072555205, "grad_norm": 0.2602407932281494, "learning_rate": 2.2753469875389512e-08, "loss": 0.2714, "step": 20350 }, { "epoch": 2.9181244622885, "grad_norm": 0.2709190249443054, "learning_rate": 2.2674041963586247e-08, "loss": 0.2729, "step": 20351 }, { "epoch": 2.9182678520217955, "grad_norm": 0.26070186495780945, "learning_rate": 2.2594752612881133e-08, "loss": 0.2981, "step": 20352 }, { "epoch": 2.9184112417550905, "grad_norm": 0.25134915113449097, "learning_rate": 2.2515601825481848e-08, "loss": 0.2909, "step": 20353 }, { "epoch": 2.9185546314883855, "grad_norm": 0.28280699253082275, "learning_rate": 2.243658960359163e-08, "loss": 0.268, "step": 20354 }, { "epoch": 2.9186980212216804, "grad_norm": 0.28496411442756653, "learning_rate": 2.2357715949409276e-08, "loss": 0.29, "step": 20355 }, { "epoch": 2.9188414109549754, "grad_norm": 0.2752182185649872, "learning_rate": 2.2278980865130806e-08, "loss": 0.2636, "step": 20356 }, { "epoch": 2.918984800688271, "grad_norm": 0.26742023229599, "learning_rate": 2.2200384352948912e-08, "loss": 0.2746, "step": 20357 }, { "epoch": 2.919128190421566, "grad_norm": 0.2595442831516266, "learning_rate": 2.2121926415051287e-08, "loss": 0.2776, "step": 20358 }, { "epoch": 2.9192715801548608, "grad_norm": 0.26641935110092163, "learning_rate": 2.2043607053621185e-08, "loss": 0.2781, "step": 20359 }, { "epoch": 2.919414969888156, "grad_norm": 0.2702522873878479, "learning_rate": 2.196542627083964e-08, "loss": 0.2839, "step": 20360 }, { "epoch": 2.919558359621451, "grad_norm": 0.26878494024276733, "learning_rate": 2.1887384068883243e-08, "loss": 0.2926, "step": 20361 }, { "epoch": 2.919701749354746, "grad_norm": 0.26590511202812195, "learning_rate": 2.1809480449924146e-08, "loss": 0.2798, "step": 20362 }, { "epoch": 2.919845139088041, "grad_norm": 0.2797143757343292, "learning_rate": 2.173171541613117e-08, "loss": 0.2848, "step": 20363 }, { "epoch": 2.9199885288213365, "grad_norm": 0.26193225383758545, "learning_rate": 2.165408896966925e-08, "loss": 0.2655, "step": 20364 }, { "epoch": 2.9201319185546315, "grad_norm": 0.28126606345176697, "learning_rate": 2.1576601112699435e-08, "loss": 0.2846, "step": 20365 }, { "epoch": 2.9202753082879265, "grad_norm": 0.29162803292274475, "learning_rate": 2.149925184737833e-08, "loss": 0.2735, "step": 20366 }, { "epoch": 2.920418698021222, "grad_norm": 0.25532156229019165, "learning_rate": 2.142204117586033e-08, "loss": 0.2819, "step": 20367 }, { "epoch": 2.920562087754517, "grad_norm": 0.2609402537345886, "learning_rate": 2.134496910029371e-08, "loss": 0.2696, "step": 20368 }, { "epoch": 2.920705477487812, "grad_norm": 0.2718856632709503, "learning_rate": 2.126803562282509e-08, "loss": 0.3002, "step": 20369 }, { "epoch": 2.920848867221107, "grad_norm": 0.29132401943206787, "learning_rate": 2.1191240745594976e-08, "loss": 0.2734, "step": 20370 }, { "epoch": 2.920992256954402, "grad_norm": 0.2696775794029236, "learning_rate": 2.1114584470742217e-08, "loss": 0.2853, "step": 20371 }, { "epoch": 2.921135646687697, "grad_norm": 0.2634787857532501, "learning_rate": 2.1038066800400657e-08, "loss": 0.2589, "step": 20372 }, { "epoch": 2.921279036420992, "grad_norm": 0.2708119750022888, "learning_rate": 2.0961687736700264e-08, "loss": 0.2779, "step": 20373 }, { "epoch": 2.9214224261542876, "grad_norm": 0.27310696244239807, "learning_rate": 2.0885447281767113e-08, "loss": 0.282, "step": 20374 }, { "epoch": 2.9215658158875826, "grad_norm": 0.26635125279426575, "learning_rate": 2.0809345437723948e-08, "loss": 0.2727, "step": 20375 }, { "epoch": 2.9217092056208775, "grad_norm": 0.2635437548160553, "learning_rate": 2.073338220668908e-08, "loss": 0.2832, "step": 20376 }, { "epoch": 2.9218525953541725, "grad_norm": 0.2658987045288086, "learning_rate": 2.0657557590776923e-08, "loss": 0.2787, "step": 20377 }, { "epoch": 2.9219959850874675, "grad_norm": 0.27580296993255615, "learning_rate": 2.0581871592099122e-08, "loss": 0.2857, "step": 20378 }, { "epoch": 2.922139374820763, "grad_norm": 0.2917567789554596, "learning_rate": 2.0506324212761775e-08, "loss": 0.2589, "step": 20379 }, { "epoch": 2.922282764554058, "grad_norm": 0.2714194357395172, "learning_rate": 2.0430915454869304e-08, "loss": 0.273, "step": 20380 }, { "epoch": 2.922426154287353, "grad_norm": 0.2580948770046234, "learning_rate": 2.035564532051948e-08, "loss": 0.2764, "step": 20381 }, { "epoch": 2.9225695440206483, "grad_norm": 0.2663893699645996, "learning_rate": 2.02805138118084e-08, "loss": 0.2818, "step": 20382 }, { "epoch": 2.9227129337539433, "grad_norm": 0.2861788272857666, "learning_rate": 2.0205520930827728e-08, "loss": 0.2796, "step": 20383 }, { "epoch": 2.9228563234872382, "grad_norm": 0.2781677544116974, "learning_rate": 2.0130666679665235e-08, "loss": 0.2794, "step": 20384 }, { "epoch": 2.922999713220533, "grad_norm": 0.25455206632614136, "learning_rate": 2.00559510604037e-08, "loss": 0.2774, "step": 20385 }, { "epoch": 2.9231431029538286, "grad_norm": 0.26867154240608215, "learning_rate": 1.9981374075124794e-08, "loss": 0.2964, "step": 20386 }, { "epoch": 2.9232864926871236, "grad_norm": 0.25032880902290344, "learning_rate": 1.9906935725902964e-08, "loss": 0.277, "step": 20387 }, { "epoch": 2.9234298824204186, "grad_norm": 0.25026199221611023, "learning_rate": 1.983263601481211e-08, "loss": 0.2853, "step": 20388 }, { "epoch": 2.923573272153714, "grad_norm": 0.27587583661079407, "learning_rate": 1.9758474943918915e-08, "loss": 0.2796, "step": 20389 }, { "epoch": 2.923716661887009, "grad_norm": 0.2902940809726715, "learning_rate": 1.9684452515289497e-08, "loss": 0.2772, "step": 20390 }, { "epoch": 2.923860051620304, "grad_norm": 0.2849583029747009, "learning_rate": 1.9610568730983326e-08, "loss": 0.2951, "step": 20391 }, { "epoch": 2.924003441353599, "grad_norm": 0.2801741659641266, "learning_rate": 1.953682359305764e-08, "loss": 0.2941, "step": 20392 }, { "epoch": 2.9241468310868943, "grad_norm": 0.29212304949760437, "learning_rate": 1.9463217103565246e-08, "loss": 0.2762, "step": 20393 }, { "epoch": 2.9242902208201893, "grad_norm": 0.263031542301178, "learning_rate": 1.9389749264555613e-08, "loss": 0.2828, "step": 20394 }, { "epoch": 2.9244336105534843, "grad_norm": 0.2748202681541443, "learning_rate": 1.9316420078073774e-08, "loss": 0.2799, "step": 20395 }, { "epoch": 2.9245770002867797, "grad_norm": 0.2656298875808716, "learning_rate": 1.9243229546160868e-08, "loss": 0.2657, "step": 20396 }, { "epoch": 2.9247203900200747, "grad_norm": 0.27433592081069946, "learning_rate": 1.9170177670854716e-08, "loss": 0.2823, "step": 20397 }, { "epoch": 2.9248637797533696, "grad_norm": 0.2771940231323242, "learning_rate": 1.9097264454188692e-08, "loss": 0.2834, "step": 20398 }, { "epoch": 2.9250071694866646, "grad_norm": 0.27336379885673523, "learning_rate": 1.9024489898192833e-08, "loss": 0.2784, "step": 20399 }, { "epoch": 2.9251505592199596, "grad_norm": 0.2589308023452759, "learning_rate": 1.89518540048933e-08, "loss": 0.2752, "step": 20400 }, { "epoch": 2.925293948953255, "grad_norm": 0.2736331522464752, "learning_rate": 1.8879356776311808e-08, "loss": 0.2835, "step": 20401 }, { "epoch": 2.92543733868655, "grad_norm": 0.26639294624328613, "learning_rate": 1.8806998214466188e-08, "loss": 0.2715, "step": 20402 }, { "epoch": 2.9255807284198454, "grad_norm": 0.2814961373806, "learning_rate": 1.8734778321371494e-08, "loss": 0.2787, "step": 20403 }, { "epoch": 2.9257241181531404, "grad_norm": 0.25924044847488403, "learning_rate": 1.866269709903834e-08, "loss": 0.2731, "step": 20404 }, { "epoch": 2.9258675078864353, "grad_norm": 0.2514561414718628, "learning_rate": 1.8590754549472346e-08, "loss": 0.2749, "step": 20405 }, { "epoch": 2.9260108976197303, "grad_norm": 0.2622498869895935, "learning_rate": 1.851895067467746e-08, "loss": 0.2754, "step": 20406 }, { "epoch": 2.9261542873530253, "grad_norm": 0.2750217616558075, "learning_rate": 1.8447285476651533e-08, "loss": 0.271, "step": 20407 }, { "epoch": 2.9262976770863207, "grad_norm": 0.284170925617218, "learning_rate": 1.8375758957390187e-08, "loss": 0.2786, "step": 20408 }, { "epoch": 2.9264410668196157, "grad_norm": 0.25343558192253113, "learning_rate": 1.8304371118884613e-08, "loss": 0.2845, "step": 20409 }, { "epoch": 2.9265844565529107, "grad_norm": 0.28542110323905945, "learning_rate": 1.8233121963122102e-08, "loss": 0.2446, "step": 20410 }, { "epoch": 2.926727846286206, "grad_norm": 0.2727337181568146, "learning_rate": 1.8162011492086075e-08, "loss": 0.2804, "step": 20411 }, { "epoch": 2.926871236019501, "grad_norm": 0.2889551818370819, "learning_rate": 1.809103970775661e-08, "loss": 0.2758, "step": 20412 }, { "epoch": 2.927014625752796, "grad_norm": 0.2651544213294983, "learning_rate": 1.8020206612108794e-08, "loss": 0.2999, "step": 20413 }, { "epoch": 2.927158015486091, "grad_norm": 0.268963098526001, "learning_rate": 1.7949512207114384e-08, "loss": 0.2825, "step": 20414 }, { "epoch": 2.9273014052193864, "grad_norm": 0.2803776264190674, "learning_rate": 1.7878956494741807e-08, "loss": 0.2732, "step": 20415 }, { "epoch": 2.9274447949526814, "grad_norm": 0.2608049213886261, "learning_rate": 1.7808539476955044e-08, "loss": 0.2685, "step": 20416 }, { "epoch": 2.9275881846859764, "grad_norm": 0.27458247542381287, "learning_rate": 1.7738261155714752e-08, "loss": 0.279, "step": 20417 }, { "epoch": 2.927731574419272, "grad_norm": 0.2789829671382904, "learning_rate": 1.766812153297659e-08, "loss": 0.2756, "step": 20418 }, { "epoch": 2.9278749641525668, "grad_norm": 0.2817222774028778, "learning_rate": 1.759812061069399e-08, "loss": 0.276, "step": 20419 }, { "epoch": 2.9280183538858617, "grad_norm": 0.2742539942264557, "learning_rate": 1.75282583908154e-08, "loss": 0.2769, "step": 20420 }, { "epoch": 2.9281617436191567, "grad_norm": 0.2707785367965698, "learning_rate": 1.7458534875285928e-08, "loss": 0.3046, "step": 20421 }, { "epoch": 2.928305133352452, "grad_norm": 0.292001336812973, "learning_rate": 1.7388950066045685e-08, "loss": 0.2759, "step": 20422 }, { "epoch": 2.928448523085747, "grad_norm": 0.2684430181980133, "learning_rate": 1.731950396503257e-08, "loss": 0.276, "step": 20423 }, { "epoch": 2.928591912819042, "grad_norm": 0.2812802493572235, "learning_rate": 1.725019657417948e-08, "loss": 0.2611, "step": 20424 }, { "epoch": 2.9287353025523375, "grad_norm": 0.25863879919052124, "learning_rate": 1.7181027895416537e-08, "loss": 0.2911, "step": 20425 }, { "epoch": 2.9288786922856325, "grad_norm": 0.26518070697784424, "learning_rate": 1.7111997930668312e-08, "loss": 0.2696, "step": 20426 }, { "epoch": 2.9290220820189274, "grad_norm": 0.27114447951316833, "learning_rate": 1.7043106681857158e-08, "loss": 0.2831, "step": 20427 }, { "epoch": 2.9291654717522224, "grad_norm": 0.26709845662117004, "learning_rate": 1.6974354150900985e-08, "loss": 0.271, "step": 20428 }, { "epoch": 2.9293088614855174, "grad_norm": 0.2729540765285492, "learning_rate": 1.6905740339712707e-08, "loss": 0.2604, "step": 20429 }, { "epoch": 2.929452251218813, "grad_norm": 0.26576000452041626, "learning_rate": 1.6837265250203572e-08, "loss": 0.2858, "step": 20430 }, { "epoch": 2.929595640952108, "grad_norm": 0.24363577365875244, "learning_rate": 1.6768928884279834e-08, "loss": 0.2901, "step": 20431 }, { "epoch": 2.929739030685403, "grad_norm": 0.2677817642688751, "learning_rate": 1.670073124384275e-08, "loss": 0.2642, "step": 20432 }, { "epoch": 2.929882420418698, "grad_norm": 0.26546430587768555, "learning_rate": 1.663267233079191e-08, "loss": 0.3071, "step": 20433 }, { "epoch": 2.930025810151993, "grad_norm": 0.2777256369590759, "learning_rate": 1.656475214702191e-08, "loss": 0.2709, "step": 20434 }, { "epoch": 2.930169199885288, "grad_norm": 0.28430694341659546, "learning_rate": 1.6496970694422908e-08, "loss": 0.2759, "step": 20435 }, { "epoch": 2.930312589618583, "grad_norm": 0.2656680643558502, "learning_rate": 1.642932797488228e-08, "loss": 0.2885, "step": 20436 }, { "epoch": 2.9304559793518785, "grad_norm": 0.2591228187084198, "learning_rate": 1.6361823990282965e-08, "loss": 0.2633, "step": 20437 }, { "epoch": 2.9305993690851735, "grad_norm": 0.2559502124786377, "learning_rate": 1.6294458742504017e-08, "loss": 0.3045, "step": 20438 }, { "epoch": 2.9307427588184685, "grad_norm": 0.28343358635902405, "learning_rate": 1.6227232233421152e-08, "loss": 0.2878, "step": 20439 }, { "epoch": 2.930886148551764, "grad_norm": 0.25198104977607727, "learning_rate": 1.6160144464905657e-08, "loss": 0.2844, "step": 20440 }, { "epoch": 2.931029538285059, "grad_norm": 0.2763679027557373, "learning_rate": 1.6093195438824926e-08, "loss": 0.2842, "step": 20441 }, { "epoch": 2.931172928018354, "grad_norm": 0.2606407105922699, "learning_rate": 1.6026385157043023e-08, "loss": 0.2963, "step": 20442 }, { "epoch": 2.931316317751649, "grad_norm": 0.28081995248794556, "learning_rate": 1.5959713621420127e-08, "loss": 0.2678, "step": 20443 }, { "epoch": 2.931459707484944, "grad_norm": 0.26911741495132446, "learning_rate": 1.5893180833811418e-08, "loss": 0.2796, "step": 20444 }, { "epoch": 2.931603097218239, "grad_norm": 0.25589707493782043, "learning_rate": 1.5826786796069858e-08, "loss": 0.291, "step": 20445 }, { "epoch": 2.931746486951534, "grad_norm": 0.248627707362175, "learning_rate": 1.576053151004342e-08, "loss": 0.2863, "step": 20446 }, { "epoch": 2.9318898766848296, "grad_norm": 0.2601732015609741, "learning_rate": 1.5694414977576733e-08, "loss": 0.2621, "step": 20447 }, { "epoch": 2.9320332664181246, "grad_norm": 0.271437406539917, "learning_rate": 1.5628437200509993e-08, "loss": 0.2843, "step": 20448 }, { "epoch": 2.9321766561514195, "grad_norm": 0.26483413577079773, "learning_rate": 1.5562598180680068e-08, "loss": 0.2676, "step": 20449 }, { "epoch": 2.9323200458847145, "grad_norm": 0.2642156183719635, "learning_rate": 1.5496897919920483e-08, "loss": 0.2741, "step": 20450 }, { "epoch": 2.9324634356180095, "grad_norm": 0.2876451313495636, "learning_rate": 1.5431336420059228e-08, "loss": 0.27, "step": 20451 }, { "epoch": 2.932606825351305, "grad_norm": 0.26449114084243774, "learning_rate": 1.5365913682922063e-08, "loss": 0.2654, "step": 20452 }, { "epoch": 2.9327502150846, "grad_norm": 0.26782020926475525, "learning_rate": 1.530062971032975e-08, "loss": 0.2916, "step": 20453 }, { "epoch": 2.9328936048178953, "grad_norm": 0.2673231363296509, "learning_rate": 1.5235484504100286e-08, "loss": 0.2927, "step": 20454 }, { "epoch": 2.9330369945511903, "grad_norm": 0.27223122119903564, "learning_rate": 1.5170478066046656e-08, "loss": 0.3013, "step": 20455 }, { "epoch": 2.9331803842844852, "grad_norm": 0.2893672287464142, "learning_rate": 1.5105610397979087e-08, "loss": 0.2832, "step": 20456 }, { "epoch": 2.93332377401778, "grad_norm": 0.2780216634273529, "learning_rate": 1.5040881501702797e-08, "loss": 0.2973, "step": 20457 }, { "epoch": 2.933467163751075, "grad_norm": 0.26346755027770996, "learning_rate": 1.4976291379020236e-08, "loss": 0.2852, "step": 20458 }, { "epoch": 2.9336105534843706, "grad_norm": 0.26313307881355286, "learning_rate": 1.4911840031729408e-08, "loss": 0.2808, "step": 20459 }, { "epoch": 2.9337539432176656, "grad_norm": 0.2694864273071289, "learning_rate": 1.4847527461624433e-08, "loss": 0.2726, "step": 20460 }, { "epoch": 2.9338973329509606, "grad_norm": 0.268661767244339, "learning_rate": 1.4783353670495549e-08, "loss": 0.2804, "step": 20461 }, { "epoch": 2.934040722684256, "grad_norm": 0.2816968858242035, "learning_rate": 1.4719318660129656e-08, "loss": 0.2674, "step": 20462 }, { "epoch": 2.934184112417551, "grad_norm": 0.2610620856285095, "learning_rate": 1.4655422432308664e-08, "loss": 0.2593, "step": 20463 }, { "epoch": 2.934327502150846, "grad_norm": 0.2731923758983612, "learning_rate": 1.459166498881226e-08, "loss": 0.2891, "step": 20464 }, { "epoch": 2.934470891884141, "grad_norm": 0.2843714952468872, "learning_rate": 1.452804633141458e-08, "loss": 0.2862, "step": 20465 }, { "epoch": 2.9346142816174363, "grad_norm": 0.26611799001693726, "learning_rate": 1.446456646188754e-08, "loss": 0.2639, "step": 20466 }, { "epoch": 2.9347576713507313, "grad_norm": 0.2623940706253052, "learning_rate": 1.4401225381996953e-08, "loss": 0.2745, "step": 20467 }, { "epoch": 2.9349010610840263, "grad_norm": 0.25990426540374756, "learning_rate": 1.4338023093507514e-08, "loss": 0.2858, "step": 20468 }, { "epoch": 2.9350444508173217, "grad_norm": 0.26788243651390076, "learning_rate": 1.4274959598178372e-08, "loss": 0.2717, "step": 20469 }, { "epoch": 2.9351878405506167, "grad_norm": 0.26045119762420654, "learning_rate": 1.4212034897764238e-08, "loss": 0.2831, "step": 20470 }, { "epoch": 2.9353312302839116, "grad_norm": 0.2784305512905121, "learning_rate": 1.414924899401815e-08, "loss": 0.2641, "step": 20471 }, { "epoch": 2.9354746200172066, "grad_norm": 0.28459012508392334, "learning_rate": 1.4086601888686491e-08, "loss": 0.2992, "step": 20472 }, { "epoch": 2.935618009750502, "grad_norm": 0.2635171711444855, "learning_rate": 1.402409358351453e-08, "loss": 0.2959, "step": 20473 }, { "epoch": 2.935761399483797, "grad_norm": 0.2685396075248718, "learning_rate": 1.396172408024199e-08, "loss": 0.2855, "step": 20474 }, { "epoch": 2.935904789217092, "grad_norm": 0.2656923532485962, "learning_rate": 1.3899493380605256e-08, "loss": 0.2994, "step": 20475 }, { "epoch": 2.9360481789503874, "grad_norm": 0.2707024812698364, "learning_rate": 1.3837401486336277e-08, "loss": 0.279, "step": 20476 }, { "epoch": 2.9361915686836824, "grad_norm": 0.2838869094848633, "learning_rate": 1.377544839916367e-08, "loss": 0.2867, "step": 20477 }, { "epoch": 2.9363349584169773, "grad_norm": 0.25158631801605225, "learning_rate": 1.3713634120812725e-08, "loss": 0.2652, "step": 20478 }, { "epoch": 2.9364783481502723, "grad_norm": 0.27054503560066223, "learning_rate": 1.3651958653003727e-08, "loss": 0.2541, "step": 20479 }, { "epoch": 2.9366217378835673, "grad_norm": 0.2710389494895935, "learning_rate": 1.359042199745364e-08, "loss": 0.2733, "step": 20480 }, { "epoch": 2.9367651276168627, "grad_norm": 0.25234344601631165, "learning_rate": 1.3529024155875537e-08, "loss": 0.2658, "step": 20481 }, { "epoch": 2.9369085173501577, "grad_norm": 0.26875796914100647, "learning_rate": 1.346776512997916e-08, "loss": 0.267, "step": 20482 }, { "epoch": 2.937051907083453, "grad_norm": 0.26336896419525146, "learning_rate": 1.340664492146926e-08, "loss": 0.2876, "step": 20483 }, { "epoch": 2.937195296816748, "grad_norm": 0.2623218595981598, "learning_rate": 1.3345663532047804e-08, "loss": 0.2702, "step": 20484 }, { "epoch": 2.937338686550043, "grad_norm": 0.2765173316001892, "learning_rate": 1.328482096341177e-08, "loss": 0.3047, "step": 20485 }, { "epoch": 2.937482076283338, "grad_norm": 0.26341044902801514, "learning_rate": 1.322411721725536e-08, "loss": 0.2708, "step": 20486 }, { "epoch": 2.937625466016633, "grad_norm": 0.25503841042518616, "learning_rate": 1.3163552295268888e-08, "loss": 0.2834, "step": 20487 }, { "epoch": 2.9377688557499284, "grad_norm": 0.2711586356163025, "learning_rate": 1.3103126199137673e-08, "loss": 0.2743, "step": 20488 }, { "epoch": 2.9379122454832234, "grad_norm": 0.2545945644378662, "learning_rate": 1.3042838930544254e-08, "loss": 0.2836, "step": 20489 }, { "epoch": 2.9380556352165184, "grad_norm": 0.25885504484176636, "learning_rate": 1.2982690491166183e-08, "loss": 0.2734, "step": 20490 }, { "epoch": 2.9381990249498138, "grad_norm": 0.2784385085105896, "learning_rate": 1.292268088267934e-08, "loss": 0.2785, "step": 20491 }, { "epoch": 2.9383424146831087, "grad_norm": 0.2699801027774811, "learning_rate": 1.2862810106752943e-08, "loss": 0.2821, "step": 20492 }, { "epoch": 2.9384858044164037, "grad_norm": 0.2761240303516388, "learning_rate": 1.280307816505455e-08, "loss": 0.2879, "step": 20493 }, { "epoch": 2.9386291941496987, "grad_norm": 0.2962341606616974, "learning_rate": 1.2743485059246718e-08, "loss": 0.2903, "step": 20494 }, { "epoch": 2.938772583882994, "grad_norm": 0.27494844794273376, "learning_rate": 1.2684030790988122e-08, "loss": 0.2662, "step": 20495 }, { "epoch": 2.938915973616289, "grad_norm": 0.2888568639755249, "learning_rate": 1.2624715361934658e-08, "loss": 0.2775, "step": 20496 }, { "epoch": 2.939059363349584, "grad_norm": 0.26860320568084717, "learning_rate": 1.2565538773736675e-08, "loss": 0.288, "step": 20497 }, { "epoch": 2.9392027530828795, "grad_norm": 0.25112104415893555, "learning_rate": 1.2506501028042295e-08, "loss": 0.2827, "step": 20498 }, { "epoch": 2.9393461428161745, "grad_norm": 0.25515130162239075, "learning_rate": 1.2447602126494652e-08, "loss": 0.2832, "step": 20499 }, { "epoch": 2.9394895325494694, "grad_norm": 0.2593379318714142, "learning_rate": 1.2388842070733543e-08, "loss": 0.2623, "step": 20500 }, { "epoch": 2.9396329222827644, "grad_norm": 0.2818843126296997, "learning_rate": 1.2330220862394326e-08, "loss": 0.2841, "step": 20501 }, { "epoch": 2.9397763120160594, "grad_norm": 0.28575944900512695, "learning_rate": 1.2271738503109587e-08, "loss": 0.2923, "step": 20502 }, { "epoch": 2.939919701749355, "grad_norm": 0.27119767665863037, "learning_rate": 1.221339499450691e-08, "loss": 0.2816, "step": 20503 }, { "epoch": 2.9400630914826498, "grad_norm": 0.27600961923599243, "learning_rate": 1.2155190338210554e-08, "loss": 0.2804, "step": 20504 }, { "epoch": 2.940206481215945, "grad_norm": 0.2660336196422577, "learning_rate": 1.2097124535841442e-08, "loss": 0.2893, "step": 20505 }, { "epoch": 2.94034987094924, "grad_norm": 0.2733587622642517, "learning_rate": 1.203919758901495e-08, "loss": 0.2783, "step": 20506 }, { "epoch": 2.940493260682535, "grad_norm": 0.26346153020858765, "learning_rate": 1.1981409499344787e-08, "loss": 0.2783, "step": 20507 }, { "epoch": 2.94063665041583, "grad_norm": 0.2554818093776703, "learning_rate": 1.192376026843911e-08, "loss": 0.272, "step": 20508 }, { "epoch": 2.940780040149125, "grad_norm": 0.2857944667339325, "learning_rate": 1.1866249897902749e-08, "loss": 0.2795, "step": 20509 }, { "epoch": 2.9409234298824205, "grad_norm": 0.2805589735507965, "learning_rate": 1.1808878389336642e-08, "loss": 0.2729, "step": 20510 }, { "epoch": 2.9410668196157155, "grad_norm": 0.2573283016681671, "learning_rate": 1.1751645744338402e-08, "loss": 0.269, "step": 20511 }, { "epoch": 2.9412102093490105, "grad_norm": 0.2728295624256134, "learning_rate": 1.1694551964500645e-08, "loss": 0.2687, "step": 20512 }, { "epoch": 2.941353599082306, "grad_norm": 0.2666507065296173, "learning_rate": 1.1637597051413208e-08, "loss": 0.2967, "step": 20513 }, { "epoch": 2.941496988815601, "grad_norm": 0.27374762296676636, "learning_rate": 1.158078100666149e-08, "loss": 0.2756, "step": 20514 }, { "epoch": 2.941640378548896, "grad_norm": 0.2683572769165039, "learning_rate": 1.152410383182756e-08, "loss": 0.2912, "step": 20515 }, { "epoch": 2.941783768282191, "grad_norm": 0.2890944480895996, "learning_rate": 1.1467565528488488e-08, "loss": 0.2752, "step": 20516 }, { "epoch": 2.941927158015486, "grad_norm": 0.2637844383716583, "learning_rate": 1.1411166098218573e-08, "loss": 0.2807, "step": 20517 }, { "epoch": 2.942070547748781, "grad_norm": 0.25657349824905396, "learning_rate": 1.1354905542588224e-08, "loss": 0.2929, "step": 20518 }, { "epoch": 2.942213937482076, "grad_norm": 0.2507227659225464, "learning_rate": 1.1298783863162854e-08, "loss": 0.2831, "step": 20519 }, { "epoch": 2.9423573272153716, "grad_norm": 0.2781990170478821, "learning_rate": 1.124280106150566e-08, "loss": 0.2715, "step": 20520 }, { "epoch": 2.9425007169486666, "grad_norm": 0.25583788752555847, "learning_rate": 1.1186957139174836e-08, "loss": 0.2621, "step": 20521 }, { "epoch": 2.9426441066819615, "grad_norm": 0.2551409900188446, "learning_rate": 1.1131252097724699e-08, "loss": 0.2912, "step": 20522 }, { "epoch": 2.9427874964152565, "grad_norm": 0.26226744055747986, "learning_rate": 1.1075685938706226e-08, "loss": 0.2783, "step": 20523 }, { "epoch": 2.942930886148552, "grad_norm": 0.2553914487361908, "learning_rate": 1.1020258663665961e-08, "loss": 0.2759, "step": 20524 }, { "epoch": 2.943074275881847, "grad_norm": 0.263047993183136, "learning_rate": 1.0964970274147667e-08, "loss": 0.2749, "step": 20525 }, { "epoch": 2.943217665615142, "grad_norm": 0.25203049182891846, "learning_rate": 1.090982077168956e-08, "loss": 0.2845, "step": 20526 }, { "epoch": 2.9433610553484373, "grad_norm": 0.28853705525398254, "learning_rate": 1.0854810157827633e-08, "loss": 0.2816, "step": 20527 }, { "epoch": 2.9435044450817323, "grad_norm": 0.2621358036994934, "learning_rate": 1.0799938434093438e-08, "loss": 0.2831, "step": 20528 }, { "epoch": 2.9436478348150272, "grad_norm": 0.28232359886169434, "learning_rate": 1.074520560201353e-08, "loss": 0.2901, "step": 20529 }, { "epoch": 2.943791224548322, "grad_norm": 0.2765432298183441, "learning_rate": 1.069061166311225e-08, "loss": 0.2808, "step": 20530 }, { "epoch": 2.943934614281617, "grad_norm": 0.25827696919441223, "learning_rate": 1.0636156618909488e-08, "loss": 0.2753, "step": 20531 }, { "epoch": 2.9440780040149126, "grad_norm": 0.25467002391815186, "learning_rate": 1.0581840470921257e-08, "loss": 0.278, "step": 20532 }, { "epoch": 2.9442213937482076, "grad_norm": 0.2751731276512146, "learning_rate": 1.0527663220659124e-08, "loss": 0.2751, "step": 20533 }, { "epoch": 2.944364783481503, "grad_norm": 0.26938512921333313, "learning_rate": 1.0473624869631882e-08, "loss": 0.2797, "step": 20534 }, { "epoch": 2.944508173214798, "grad_norm": 0.2737152874469757, "learning_rate": 1.041972541934333e-08, "loss": 0.2762, "step": 20535 }, { "epoch": 2.944651562948093, "grad_norm": 0.2692643702030182, "learning_rate": 1.0365964871294487e-08, "loss": 0.2893, "step": 20536 }, { "epoch": 2.944794952681388, "grad_norm": 0.25611206889152527, "learning_rate": 1.0312343226981381e-08, "loss": 0.27, "step": 20537 }, { "epoch": 2.944938342414683, "grad_norm": 0.2548570930957794, "learning_rate": 1.025886048789726e-08, "loss": 0.2861, "step": 20538 }, { "epoch": 2.9450817321479783, "grad_norm": 0.2565901577472687, "learning_rate": 1.0205516655530379e-08, "loss": 0.2697, "step": 20539 }, { "epoch": 2.9452251218812733, "grad_norm": 0.26419374346733093, "learning_rate": 1.015231173136677e-08, "loss": 0.2723, "step": 20540 }, { "epoch": 2.9453685116145683, "grad_norm": 0.27643120288848877, "learning_rate": 1.0099245716886363e-08, "loss": 0.2787, "step": 20541 }, { "epoch": 2.9455119013478637, "grad_norm": 0.2823932468891144, "learning_rate": 1.0046318613567418e-08, "loss": 0.2555, "step": 20542 }, { "epoch": 2.9456552910811586, "grad_norm": 0.2587290406227112, "learning_rate": 9.993530422883201e-09, "loss": 0.2794, "step": 20543 }, { "epoch": 2.9457986808144536, "grad_norm": 0.26214033365249634, "learning_rate": 9.94088114630254e-09, "loss": 0.2845, "step": 20544 }, { "epoch": 2.9459420705477486, "grad_norm": 0.2630726993083954, "learning_rate": 9.888370785292034e-09, "loss": 0.2831, "step": 20545 }, { "epoch": 2.946085460281044, "grad_norm": 0.28152889013290405, "learning_rate": 9.835999341312741e-09, "loss": 0.2831, "step": 20546 }, { "epoch": 2.946228850014339, "grad_norm": 0.2916380763053894, "learning_rate": 9.783766815823492e-09, "loss": 0.2691, "step": 20547 }, { "epoch": 2.946372239747634, "grad_norm": 0.2716914415359497, "learning_rate": 9.731673210277015e-09, "loss": 0.2988, "step": 20548 }, { "epoch": 2.9465156294809294, "grad_norm": 0.2726413309574127, "learning_rate": 9.679718526124371e-09, "loss": 0.2674, "step": 20549 }, { "epoch": 2.9466590192142244, "grad_norm": 0.2681063711643219, "learning_rate": 9.62790276481218e-09, "loss": 0.2705, "step": 20550 }, { "epoch": 2.9468024089475193, "grad_norm": 0.25654301047325134, "learning_rate": 9.576225927782623e-09, "loss": 0.2848, "step": 20551 }, { "epoch": 2.9469457986808143, "grad_norm": 0.2807256877422333, "learning_rate": 9.524688016473992e-09, "loss": 0.2757, "step": 20552 }, { "epoch": 2.9470891884141093, "grad_norm": 0.26134902238845825, "learning_rate": 9.473289032321253e-09, "loss": 0.2665, "step": 20553 }, { "epoch": 2.9472325781474047, "grad_norm": 0.2542290687561035, "learning_rate": 9.422028976755481e-09, "loss": 0.287, "step": 20554 }, { "epoch": 2.9473759678806997, "grad_norm": 0.25716501474380493, "learning_rate": 9.370907851203314e-09, "loss": 0.2717, "step": 20555 }, { "epoch": 2.947519357613995, "grad_norm": 0.2619030177593231, "learning_rate": 9.319925657087502e-09, "loss": 0.2736, "step": 20556 }, { "epoch": 2.94766274734729, "grad_norm": 0.2581411600112915, "learning_rate": 9.269082395828578e-09, "loss": 0.2726, "step": 20557 }, { "epoch": 2.947806137080585, "grad_norm": 0.2650856375694275, "learning_rate": 9.21837806884096e-09, "loss": 0.2919, "step": 20558 }, { "epoch": 2.94794952681388, "grad_norm": 0.25281181931495667, "learning_rate": 9.167812677536858e-09, "loss": 0.2751, "step": 20559 }, { "epoch": 2.948092916547175, "grad_norm": 0.2643923759460449, "learning_rate": 9.117386223322922e-09, "loss": 0.2914, "step": 20560 }, { "epoch": 2.9482363062804704, "grad_norm": 0.271879106760025, "learning_rate": 9.067098707603583e-09, "loss": 0.2938, "step": 20561 }, { "epoch": 2.9483796960137654, "grad_norm": 0.253461629152298, "learning_rate": 9.016950131778834e-09, "loss": 0.2767, "step": 20562 }, { "epoch": 2.9485230857470603, "grad_norm": 0.2623954117298126, "learning_rate": 8.966940497245336e-09, "loss": 0.2751, "step": 20563 }, { "epoch": 2.9486664754803558, "grad_norm": 0.27586913108825684, "learning_rate": 8.917069805393642e-09, "loss": 0.274, "step": 20564 }, { "epoch": 2.9488098652136507, "grad_norm": 0.2722621262073517, "learning_rate": 8.867338057613195e-09, "loss": 0.305, "step": 20565 }, { "epoch": 2.9489532549469457, "grad_norm": 0.268574059009552, "learning_rate": 8.817745255288446e-09, "loss": 0.282, "step": 20566 }, { "epoch": 2.9490966446802407, "grad_norm": 0.28631889820098877, "learning_rate": 8.7682913997994e-09, "loss": 0.2991, "step": 20567 }, { "epoch": 2.949240034413536, "grad_norm": 0.26258328557014465, "learning_rate": 8.718976492523845e-09, "loss": 0.27, "step": 20568 }, { "epoch": 2.949383424146831, "grad_norm": 0.27426546812057495, "learning_rate": 8.66980053483346e-09, "loss": 0.2864, "step": 20569 }, { "epoch": 2.949526813880126, "grad_norm": 0.2648751437664032, "learning_rate": 8.620763528097708e-09, "loss": 0.2916, "step": 20570 }, { "epoch": 2.9496702036134215, "grad_norm": 0.2738174796104431, "learning_rate": 8.571865473681607e-09, "loss": 0.2872, "step": 20571 }, { "epoch": 2.9498135933467164, "grad_norm": 0.281587153673172, "learning_rate": 8.523106372946843e-09, "loss": 0.2788, "step": 20572 }, { "epoch": 2.9499569830800114, "grad_norm": 0.2643236517906189, "learning_rate": 8.474486227250666e-09, "loss": 0.2771, "step": 20573 }, { "epoch": 2.9501003728133064, "grad_norm": 0.2796489894390106, "learning_rate": 8.426005037945884e-09, "loss": 0.3018, "step": 20574 }, { "epoch": 2.950243762546602, "grad_norm": 0.2590634524822235, "learning_rate": 8.37766280638308e-09, "loss": 0.2677, "step": 20575 }, { "epoch": 2.950387152279897, "grad_norm": 0.2750518023967743, "learning_rate": 8.329459533907292e-09, "loss": 0.2829, "step": 20576 }, { "epoch": 2.9505305420131918, "grad_norm": 0.27938151359558105, "learning_rate": 8.281395221860777e-09, "loss": 0.2799, "step": 20577 }, { "epoch": 2.950673931746487, "grad_norm": 0.2713223099708557, "learning_rate": 8.233469871581357e-09, "loss": 0.2891, "step": 20578 }, { "epoch": 2.950817321479782, "grad_norm": 0.27178314328193665, "learning_rate": 8.185683484403517e-09, "loss": 0.2771, "step": 20579 }, { "epoch": 2.950960711213077, "grad_norm": 0.2647325098514557, "learning_rate": 8.13803606165786e-09, "loss": 0.2615, "step": 20580 }, { "epoch": 2.951104100946372, "grad_norm": 0.2666780352592468, "learning_rate": 8.090527604669995e-09, "loss": 0.2815, "step": 20581 }, { "epoch": 2.951247490679667, "grad_norm": 0.2811438739299774, "learning_rate": 8.04315811476275e-09, "loss": 0.2685, "step": 20582 }, { "epoch": 2.9513908804129625, "grad_norm": 0.2785024344921112, "learning_rate": 7.995927593254515e-09, "loss": 0.2888, "step": 20583 }, { "epoch": 2.9515342701462575, "grad_norm": 0.2871142625808716, "learning_rate": 7.948836041460906e-09, "loss": 0.2732, "step": 20584 }, { "epoch": 2.951677659879553, "grad_norm": 0.26979860663414, "learning_rate": 7.901883460692539e-09, "loss": 0.2733, "step": 20585 }, { "epoch": 2.951821049612848, "grad_norm": 0.2709020972251892, "learning_rate": 7.855069852256702e-09, "loss": 0.2923, "step": 20586 }, { "epoch": 2.951964439346143, "grad_norm": 0.24950313568115234, "learning_rate": 7.80839521745569e-09, "loss": 0.2843, "step": 20587 }, { "epoch": 2.952107829079438, "grad_norm": 0.25269368290901184, "learning_rate": 7.761859557590123e-09, "loss": 0.2692, "step": 20588 }, { "epoch": 2.952251218812733, "grad_norm": 0.2581959366798401, "learning_rate": 7.715462873954526e-09, "loss": 0.2846, "step": 20589 }, { "epoch": 2.952394608546028, "grad_norm": 0.28059443831443787, "learning_rate": 7.669205167841198e-09, "loss": 0.2789, "step": 20590 }, { "epoch": 2.952537998279323, "grad_norm": 0.2753749191761017, "learning_rate": 7.623086440537441e-09, "loss": 0.2724, "step": 20591 }, { "epoch": 2.952681388012618, "grad_norm": 0.2820153832435608, "learning_rate": 7.57710669332723e-09, "loss": 0.2735, "step": 20592 }, { "epoch": 2.9528247777459136, "grad_norm": 0.2699013948440552, "learning_rate": 7.53126592749065e-09, "loss": 0.2697, "step": 20593 }, { "epoch": 2.9529681674792085, "grad_norm": 0.2800462245941162, "learning_rate": 7.485564144303347e-09, "loss": 0.3084, "step": 20594 }, { "epoch": 2.9531115572125035, "grad_norm": 0.26869645714759827, "learning_rate": 7.440001345038195e-09, "loss": 0.2612, "step": 20595 }, { "epoch": 2.9532549469457985, "grad_norm": 0.2518472671508789, "learning_rate": 7.394577530963621e-09, "loss": 0.2591, "step": 20596 }, { "epoch": 2.953398336679094, "grad_norm": 0.243124321103096, "learning_rate": 7.349292703344169e-09, "loss": 0.2821, "step": 20597 }, { "epoch": 2.953541726412389, "grad_norm": 0.28290149569511414, "learning_rate": 7.304146863439943e-09, "loss": 0.273, "step": 20598 }, { "epoch": 2.953685116145684, "grad_norm": 0.2575276494026184, "learning_rate": 7.25914001250827e-09, "loss": 0.2905, "step": 20599 }, { "epoch": 2.9538285058789793, "grad_norm": 0.25832682847976685, "learning_rate": 7.214272151801483e-09, "loss": 0.2973, "step": 20600 }, { "epoch": 2.9539718956122742, "grad_norm": 0.2660138010978699, "learning_rate": 7.1695432825691355e-09, "loss": 0.2798, "step": 20601 }, { "epoch": 2.954115285345569, "grad_norm": 0.24837952852249146, "learning_rate": 7.124953406056345e-09, "loss": 0.2799, "step": 20602 }, { "epoch": 2.954258675078864, "grad_norm": 0.24956846237182617, "learning_rate": 7.0805025235043405e-09, "loss": 0.2771, "step": 20603 }, { "epoch": 2.9544020648121596, "grad_norm": 0.26654741168022156, "learning_rate": 7.036190636150464e-09, "loss": 0.284, "step": 20604 }, { "epoch": 2.9545454545454546, "grad_norm": 0.26666000485420227, "learning_rate": 6.992017745228729e-09, "loss": 0.2975, "step": 20605 }, { "epoch": 2.9546888442787496, "grad_norm": 0.2667901813983917, "learning_rate": 6.947983851968709e-09, "loss": 0.277, "step": 20606 }, { "epoch": 2.954832234012045, "grad_norm": 0.2910645604133606, "learning_rate": 6.904088957595534e-09, "loss": 0.293, "step": 20607 }, { "epoch": 2.95497562374534, "grad_norm": 0.26673442125320435, "learning_rate": 6.860333063332114e-09, "loss": 0.2596, "step": 20608 }, { "epoch": 2.955119013478635, "grad_norm": 0.2669001817703247, "learning_rate": 6.816716170395809e-09, "loss": 0.2864, "step": 20609 }, { "epoch": 2.95526240321193, "grad_norm": 0.27756839990615845, "learning_rate": 6.773238280001204e-09, "loss": 0.2652, "step": 20610 }, { "epoch": 2.955405792945225, "grad_norm": 0.2700638175010681, "learning_rate": 6.729899393358996e-09, "loss": 0.2747, "step": 20611 }, { "epoch": 2.9555491826785203, "grad_norm": 0.26712140440940857, "learning_rate": 6.6866995116748875e-09, "loss": 0.2769, "step": 20612 }, { "epoch": 2.9556925724118153, "grad_norm": 0.2682025134563446, "learning_rate": 6.643638636152361e-09, "loss": 0.2711, "step": 20613 }, { "epoch": 2.9558359621451107, "grad_norm": 0.2900198698043823, "learning_rate": 6.600716767989346e-09, "loss": 0.2736, "step": 20614 }, { "epoch": 2.9559793518784057, "grad_norm": 0.26035529375076294, "learning_rate": 6.557933908380998e-09, "loss": 0.2839, "step": 20615 }, { "epoch": 2.9561227416117006, "grad_norm": 0.2726856470108032, "learning_rate": 6.515290058518586e-09, "loss": 0.2848, "step": 20616 }, { "epoch": 2.9562661313449956, "grad_norm": 0.26598304510116577, "learning_rate": 6.472785219588939e-09, "loss": 0.2822, "step": 20617 }, { "epoch": 2.9564095210782906, "grad_norm": 0.28467485308647156, "learning_rate": 6.430419392775555e-09, "loss": 0.2698, "step": 20618 }, { "epoch": 2.956552910811586, "grad_norm": 0.2669733762741089, "learning_rate": 6.388192579258046e-09, "loss": 0.2691, "step": 20619 }, { "epoch": 2.956696300544881, "grad_norm": 0.27027779817581177, "learning_rate": 6.346104780211026e-09, "loss": 0.2739, "step": 20620 }, { "epoch": 2.956839690278176, "grad_norm": 0.2653608024120331, "learning_rate": 6.304155996807448e-09, "loss": 0.2926, "step": 20621 }, { "epoch": 2.9569830800114714, "grad_norm": 0.2881121337413788, "learning_rate": 6.262346230214156e-09, "loss": 0.2728, "step": 20622 }, { "epoch": 2.9571264697447663, "grad_norm": 0.28073650598526, "learning_rate": 6.220675481595217e-09, "loss": 0.2655, "step": 20623 }, { "epoch": 2.9572698594780613, "grad_norm": 0.3044630289077759, "learning_rate": 6.179143752110817e-09, "loss": 0.2824, "step": 20624 }, { "epoch": 2.9574132492113563, "grad_norm": 0.2659199833869934, "learning_rate": 6.1377510429172504e-09, "loss": 0.2753, "step": 20625 }, { "epoch": 2.9575566389446517, "grad_norm": 0.2628859877586365, "learning_rate": 6.096497355166375e-09, "loss": 0.2768, "step": 20626 }, { "epoch": 2.9577000286779467, "grad_norm": 0.26082274317741394, "learning_rate": 6.055382690007272e-09, "loss": 0.2906, "step": 20627 }, { "epoch": 2.9578434184112417, "grad_norm": 0.26187366247177124, "learning_rate": 6.014407048584026e-09, "loss": 0.2686, "step": 20628 }, { "epoch": 2.957986808144537, "grad_norm": 0.24996298551559448, "learning_rate": 5.973570432037945e-09, "loss": 0.285, "step": 20629 }, { "epoch": 2.958130197877832, "grad_norm": 0.26049506664276123, "learning_rate": 5.932872841504789e-09, "loss": 0.2791, "step": 20630 }, { "epoch": 2.958273587611127, "grad_norm": 0.27235859632492065, "learning_rate": 5.892314278118649e-09, "loss": 0.2853, "step": 20631 }, { "epoch": 2.958416977344422, "grad_norm": 0.2713833153247833, "learning_rate": 5.8518947430075135e-09, "loss": 0.2936, "step": 20632 }, { "epoch": 2.958560367077717, "grad_norm": 0.29867830872535706, "learning_rate": 5.8116142372977025e-09, "loss": 0.2923, "step": 20633 }, { "epoch": 2.9587037568110124, "grad_norm": 0.26614221930503845, "learning_rate": 5.771472762109986e-09, "loss": 0.2975, "step": 20634 }, { "epoch": 2.9588471465443074, "grad_norm": 0.27603375911712646, "learning_rate": 5.731470318561805e-09, "loss": 0.2814, "step": 20635 }, { "epoch": 2.958990536277603, "grad_norm": 0.28185948729515076, "learning_rate": 5.691606907766712e-09, "loss": 0.2809, "step": 20636 }, { "epoch": 2.9591339260108978, "grad_norm": 0.2789560854434967, "learning_rate": 5.651882530834374e-09, "loss": 0.2801, "step": 20637 }, { "epoch": 2.9592773157441927, "grad_norm": 0.2726489007472992, "learning_rate": 5.612297188871685e-09, "loss": 0.2741, "step": 20638 }, { "epoch": 2.9594207054774877, "grad_norm": 0.25839367508888245, "learning_rate": 5.5728508829794306e-09, "loss": 0.2683, "step": 20639 }, { "epoch": 2.9595640952107827, "grad_norm": 0.2862507402896881, "learning_rate": 5.533543614255621e-09, "loss": 0.2879, "step": 20640 }, { "epoch": 2.959707484944078, "grad_norm": 0.264911025762558, "learning_rate": 5.494375383795491e-09, "loss": 0.2889, "step": 20641 }, { "epoch": 2.959850874677373, "grad_norm": 0.27582085132598877, "learning_rate": 5.455346192689281e-09, "loss": 0.2851, "step": 20642 }, { "epoch": 2.959994264410668, "grad_norm": 0.28709039092063904, "learning_rate": 5.4164560420227886e-09, "loss": 0.2659, "step": 20643 }, { "epoch": 2.9601376541439635, "grad_norm": 0.25887349247932434, "learning_rate": 5.377704932879035e-09, "loss": 0.2761, "step": 20644 }, { "epoch": 2.9602810438772584, "grad_norm": 0.2733154892921448, "learning_rate": 5.339092866337159e-09, "loss": 0.2603, "step": 20645 }, { "epoch": 2.9604244336105534, "grad_norm": 0.24627380073070526, "learning_rate": 5.300619843471299e-09, "loss": 0.2791, "step": 20646 }, { "epoch": 2.9605678233438484, "grad_norm": 0.2643528878688812, "learning_rate": 5.262285865352823e-09, "loss": 0.2646, "step": 20647 }, { "epoch": 2.960711213077144, "grad_norm": 0.26190951466560364, "learning_rate": 5.224090933049208e-09, "loss": 0.2869, "step": 20648 }, { "epoch": 2.9608546028104388, "grad_norm": 0.25589102506637573, "learning_rate": 5.186035047623494e-09, "loss": 0.2706, "step": 20649 }, { "epoch": 2.9609979925437337, "grad_norm": 0.2508915364742279, "learning_rate": 5.148118210135389e-09, "loss": 0.2781, "step": 20650 }, { "epoch": 2.961141382277029, "grad_norm": 0.2741624712944031, "learning_rate": 5.110340421639603e-09, "loss": 0.2956, "step": 20651 }, { "epoch": 2.961284772010324, "grad_norm": 0.2773682773113251, "learning_rate": 5.0727016831886296e-09, "loss": 0.2846, "step": 20652 }, { "epoch": 2.961428161743619, "grad_norm": 0.25511106848716736, "learning_rate": 5.035201995829963e-09, "loss": 0.2768, "step": 20653 }, { "epoch": 2.961571551476914, "grad_norm": 0.26456698775291443, "learning_rate": 4.997841360607769e-09, "loss": 0.2916, "step": 20654 }, { "epoch": 2.9617149412102095, "grad_norm": 0.27961692214012146, "learning_rate": 4.960619778561771e-09, "loss": 0.2709, "step": 20655 }, { "epoch": 2.9618583309435045, "grad_norm": 0.2708696722984314, "learning_rate": 4.923537250728361e-09, "loss": 0.2984, "step": 20656 }, { "epoch": 2.9620017206767995, "grad_norm": 0.28805771470069885, "learning_rate": 4.886593778139492e-09, "loss": 0.2899, "step": 20657 }, { "epoch": 2.962145110410095, "grad_norm": 0.2665158808231354, "learning_rate": 4.849789361824342e-09, "loss": 0.2646, "step": 20658 }, { "epoch": 2.96228850014339, "grad_norm": 0.25602081418037415, "learning_rate": 4.81312400280709e-09, "loss": 0.2807, "step": 20659 }, { "epoch": 2.962431889876685, "grad_norm": 0.2636699974536896, "learning_rate": 4.776597702108032e-09, "loss": 0.2876, "step": 20660 }, { "epoch": 2.96257527960998, "grad_norm": 0.27844560146331787, "learning_rate": 4.740210460744687e-09, "loss": 0.2861, "step": 20661 }, { "epoch": 2.9627186693432748, "grad_norm": 0.2594141364097595, "learning_rate": 4.703962279729579e-09, "loss": 0.2923, "step": 20662 }, { "epoch": 2.96286205907657, "grad_norm": 0.25253212451934814, "learning_rate": 4.6678531600719e-09, "loss": 0.2738, "step": 20663 }, { "epoch": 2.963005448809865, "grad_norm": 0.25245848298072815, "learning_rate": 4.6318831027769575e-09, "loss": 0.2872, "step": 20664 }, { "epoch": 2.9631488385431606, "grad_norm": 0.2673490345478058, "learning_rate": 4.596052108846172e-09, "loss": 0.292, "step": 20665 }, { "epoch": 2.9632922282764556, "grad_norm": 0.27080368995666504, "learning_rate": 4.560360179277079e-09, "loss": 0.2675, "step": 20666 }, { "epoch": 2.9634356180097505, "grad_norm": 0.2642831802368164, "learning_rate": 4.5248073150627734e-09, "loss": 0.276, "step": 20667 }, { "epoch": 2.9635790077430455, "grad_norm": 0.2503068149089813, "learning_rate": 4.489393517193575e-09, "loss": 0.2755, "step": 20668 }, { "epoch": 2.9637223974763405, "grad_norm": 0.26806211471557617, "learning_rate": 4.454118786655359e-09, "loss": 0.2928, "step": 20669 }, { "epoch": 2.963865787209636, "grad_norm": 0.264765202999115, "learning_rate": 4.418983124429565e-09, "loss": 0.274, "step": 20670 }, { "epoch": 2.964009176942931, "grad_norm": 0.27470406889915466, "learning_rate": 4.383986531494855e-09, "loss": 0.2624, "step": 20671 }, { "epoch": 2.964152566676226, "grad_norm": 0.27348628640174866, "learning_rate": 4.3491290088248925e-09, "loss": 0.2699, "step": 20672 }, { "epoch": 2.9642959564095213, "grad_norm": 0.27527594566345215, "learning_rate": 4.314410557390569e-09, "loss": 0.2889, "step": 20673 }, { "epoch": 2.9644393461428162, "grad_norm": 0.2636307179927826, "learning_rate": 4.279831178158333e-09, "loss": 0.273, "step": 20674 }, { "epoch": 2.964582735876111, "grad_norm": 0.26671692728996277, "learning_rate": 4.245390872090749e-09, "loss": 0.2892, "step": 20675 }, { "epoch": 2.964726125609406, "grad_norm": 0.26831284165382385, "learning_rate": 4.211089640146493e-09, "loss": 0.2753, "step": 20676 }, { "epoch": 2.9648695153427016, "grad_norm": 0.2799791991710663, "learning_rate": 4.176927483280913e-09, "loss": 0.2654, "step": 20677 }, { "epoch": 2.9650129050759966, "grad_norm": 0.26312753558158875, "learning_rate": 4.142904402444359e-09, "loss": 0.3054, "step": 20678 }, { "epoch": 2.9651562948092915, "grad_norm": 0.2816106677055359, "learning_rate": 4.1090203985838516e-09, "loss": 0.2951, "step": 20679 }, { "epoch": 2.965299684542587, "grad_norm": 0.2840617001056671, "learning_rate": 4.075275472643636e-09, "loss": 0.2781, "step": 20680 }, { "epoch": 2.965443074275882, "grad_norm": 0.2480948567390442, "learning_rate": 4.041669625562405e-09, "loss": 0.2881, "step": 20681 }, { "epoch": 2.965586464009177, "grad_norm": 0.2596273124217987, "learning_rate": 4.008202858276078e-09, "loss": 0.2752, "step": 20682 }, { "epoch": 2.965729853742472, "grad_norm": 0.2658742070198059, "learning_rate": 3.9748751717161305e-09, "loss": 0.2751, "step": 20683 }, { "epoch": 2.965873243475767, "grad_norm": 0.2720980644226074, "learning_rate": 3.941686566810155e-09, "loss": 0.2781, "step": 20684 }, { "epoch": 2.9660166332090623, "grad_norm": 0.27874988317489624, "learning_rate": 3.9086370444818555e-09, "loss": 0.2826, "step": 20685 }, { "epoch": 2.9661600229423573, "grad_norm": 0.265934556722641, "learning_rate": 3.875726605652164e-09, "loss": 0.2941, "step": 20686 }, { "epoch": 2.9663034126756527, "grad_norm": 0.2667693793773651, "learning_rate": 3.842955251237013e-09, "loss": 0.2876, "step": 20687 }, { "epoch": 2.9664468024089476, "grad_norm": 0.2822639048099518, "learning_rate": 3.810322982147896e-09, "loss": 0.2664, "step": 20688 }, { "epoch": 2.9665901921422426, "grad_norm": 0.2719328999519348, "learning_rate": 3.777829799294086e-09, "loss": 0.3041, "step": 20689 }, { "epoch": 2.9667335818755376, "grad_norm": 0.26603424549102783, "learning_rate": 3.745475703579859e-09, "loss": 0.3025, "step": 20690 }, { "epoch": 2.9668769716088326, "grad_norm": 0.2671798765659332, "learning_rate": 3.7132606959056072e-09, "loss": 0.2893, "step": 20691 }, { "epoch": 2.967020361342128, "grad_norm": 0.2730865776538849, "learning_rate": 3.6811847771689447e-09, "loss": 0.2738, "step": 20692 }, { "epoch": 2.967163751075423, "grad_norm": 0.2895047068595886, "learning_rate": 3.6492479482619357e-09, "loss": 0.2889, "step": 20693 }, { "epoch": 2.967307140808718, "grad_norm": 0.2799583971500397, "learning_rate": 3.6174502100738694e-09, "loss": 0.2839, "step": 20694 }, { "epoch": 2.9674505305420134, "grad_norm": 0.2612422704696655, "learning_rate": 3.5857915634907036e-09, "loss": 0.2849, "step": 20695 }, { "epoch": 2.9675939202753083, "grad_norm": 0.25677093863487244, "learning_rate": 3.554272009392845e-09, "loss": 0.2804, "step": 20696 }, { "epoch": 2.9677373100086033, "grad_norm": 0.2565033435821533, "learning_rate": 3.522891548657925e-09, "loss": 0.2603, "step": 20697 }, { "epoch": 2.9678806997418983, "grad_norm": 0.2581219971179962, "learning_rate": 3.4916501821602446e-09, "loss": 0.2769, "step": 20698 }, { "epoch": 2.9680240894751937, "grad_norm": 0.27631857991218567, "learning_rate": 3.4605479107685525e-09, "loss": 0.2865, "step": 20699 }, { "epoch": 2.9681674792084887, "grad_norm": 0.26860132813453674, "learning_rate": 3.429584735348823e-09, "loss": 0.2924, "step": 20700 }, { "epoch": 2.9683108689417836, "grad_norm": 0.2668074071407318, "learning_rate": 3.3987606567637e-09, "loss": 0.2958, "step": 20701 }, { "epoch": 2.968454258675079, "grad_norm": 0.2674861252307892, "learning_rate": 3.3680756758708298e-09, "loss": 0.2956, "step": 20702 }, { "epoch": 2.968597648408374, "grad_norm": 0.26965564489364624, "learning_rate": 3.337529793523975e-09, "loss": 0.2645, "step": 20703 }, { "epoch": 2.968741038141669, "grad_norm": 0.2833085358142853, "learning_rate": 3.3071230105746755e-09, "loss": 0.2643, "step": 20704 }, { "epoch": 2.968884427874964, "grad_norm": 0.2681191563606262, "learning_rate": 3.2768553278683666e-09, "loss": 0.2867, "step": 20705 }, { "epoch": 2.9690278176082594, "grad_norm": 0.2485501915216446, "learning_rate": 3.2467267462482634e-09, "loss": 0.2792, "step": 20706 }, { "epoch": 2.9691712073415544, "grad_norm": 0.2826113700866699, "learning_rate": 3.216737266552583e-09, "loss": 0.278, "step": 20707 }, { "epoch": 2.9693145970748493, "grad_norm": 0.28101369738578796, "learning_rate": 3.186886889616214e-09, "loss": 0.2812, "step": 20708 }, { "epoch": 2.9694579868081448, "grad_norm": 0.26714953780174255, "learning_rate": 3.1571756162707133e-09, "loss": 0.2576, "step": 20709 }, { "epoch": 2.9696013765414397, "grad_norm": 0.2760668992996216, "learning_rate": 3.127603447343197e-09, "loss": 0.2797, "step": 20710 }, { "epoch": 2.9697447662747347, "grad_norm": 0.2578261196613312, "learning_rate": 3.0981703836557854e-09, "loss": 0.2729, "step": 20711 }, { "epoch": 2.9698881560080297, "grad_norm": 0.2605781853199005, "learning_rate": 3.068876426029488e-09, "loss": 0.2881, "step": 20712 }, { "epoch": 2.9700315457413247, "grad_norm": 0.27966371178627014, "learning_rate": 3.0397215752780985e-09, "loss": 0.2966, "step": 20713 }, { "epoch": 2.97017493547462, "grad_norm": 0.26259905099868774, "learning_rate": 3.010705832214855e-09, "loss": 0.2771, "step": 20714 }, { "epoch": 2.970318325207915, "grad_norm": 0.26890096068382263, "learning_rate": 2.9818291976463353e-09, "loss": 0.3068, "step": 20715 }, { "epoch": 2.9704617149412105, "grad_norm": 0.2594805359840393, "learning_rate": 2.9530916723768956e-09, "loss": 0.2735, "step": 20716 }, { "epoch": 2.9706051046745054, "grad_norm": 0.2841556966304779, "learning_rate": 2.924493257206451e-09, "loss": 0.2822, "step": 20717 }, { "epoch": 2.9707484944078004, "grad_norm": 0.2549891173839569, "learning_rate": 2.896033952931032e-09, "loss": 0.2769, "step": 20718 }, { "epoch": 2.9708918841410954, "grad_norm": 0.26504528522491455, "learning_rate": 2.867713760343338e-09, "loss": 0.323, "step": 20719 }, { "epoch": 2.9710352738743904, "grad_norm": 0.261578768491745, "learning_rate": 2.839532680231072e-09, "loss": 0.2764, "step": 20720 }, { "epoch": 2.971178663607686, "grad_norm": 0.2559586763381958, "learning_rate": 2.8114907133797163e-09, "loss": 0.2709, "step": 20721 }, { "epoch": 2.9713220533409808, "grad_norm": 0.2758708894252777, "learning_rate": 2.7835878605686485e-09, "loss": 0.2796, "step": 20722 }, { "epoch": 2.9714654430742757, "grad_norm": 0.30140718817710876, "learning_rate": 2.755824122576134e-09, "loss": 0.2842, "step": 20723 }, { "epoch": 2.971608832807571, "grad_norm": 0.2548813223838806, "learning_rate": 2.7281995001737782e-09, "loss": 0.2734, "step": 20724 }, { "epoch": 2.971752222540866, "grad_norm": 0.2703484892845154, "learning_rate": 2.7007139941309657e-09, "loss": 0.2943, "step": 20725 }, { "epoch": 2.971895612274161, "grad_norm": 0.2787661850452423, "learning_rate": 2.6733676052131953e-09, "loss": 0.2959, "step": 20726 }, { "epoch": 2.972039002007456, "grad_norm": 0.26147159934043884, "learning_rate": 2.6461603341815245e-09, "loss": 0.2915, "step": 20727 }, { "epoch": 2.9721823917407515, "grad_norm": 0.28485020995140076, "learning_rate": 2.619092181793681e-09, "loss": 0.2691, "step": 20728 }, { "epoch": 2.9723257814740465, "grad_norm": 0.25940191745758057, "learning_rate": 2.5921631488023955e-09, "loss": 0.2854, "step": 20729 }, { "epoch": 2.9724691712073414, "grad_norm": 0.2758941352367401, "learning_rate": 2.5653732359581794e-09, "loss": 0.2899, "step": 20730 }, { "epoch": 2.972612560940637, "grad_norm": 0.2684207260608673, "learning_rate": 2.538722444006547e-09, "loss": 0.2671, "step": 20731 }, { "epoch": 2.972755950673932, "grad_norm": 0.2641375660896301, "learning_rate": 2.5122107736891278e-09, "loss": 0.3006, "step": 20732 }, { "epoch": 2.972899340407227, "grad_norm": 0.26312777400016785, "learning_rate": 2.4858382257442194e-09, "loss": 0.2684, "step": 20733 }, { "epoch": 2.973042730140522, "grad_norm": 0.2675297260284424, "learning_rate": 2.4596048009056796e-09, "loss": 0.271, "step": 20734 }, { "epoch": 2.9731861198738168, "grad_norm": 0.25308340787887573, "learning_rate": 2.43351049990459e-09, "loss": 0.3067, "step": 20735 }, { "epoch": 2.973329509607112, "grad_norm": 0.25862157344818115, "learning_rate": 2.4075553234664817e-09, "loss": 0.2743, "step": 20736 }, { "epoch": 2.973472899340407, "grad_norm": 0.26933759450912476, "learning_rate": 2.3817392723146647e-09, "loss": 0.2945, "step": 20737 }, { "epoch": 2.9736162890737026, "grad_norm": 0.28153538703918457, "learning_rate": 2.3560623471674537e-09, "loss": 0.2864, "step": 20738 }, { "epoch": 2.9737596788069975, "grad_norm": 0.26160186529159546, "learning_rate": 2.330524548739277e-09, "loss": 0.2821, "step": 20739 }, { "epoch": 2.9739030685402925, "grad_norm": 0.2724831998348236, "learning_rate": 2.305125877741232e-09, "loss": 0.2856, "step": 20740 }, { "epoch": 2.9740464582735875, "grad_norm": 0.2768862545490265, "learning_rate": 2.279866334881087e-09, "loss": 0.2743, "step": 20741 }, { "epoch": 2.9741898480068825, "grad_norm": 0.27931585907936096, "learning_rate": 2.254745920861612e-09, "loss": 0.3013, "step": 20742 }, { "epoch": 2.974333237740178, "grad_norm": 0.2540852129459381, "learning_rate": 2.2297646363816927e-09, "loss": 0.2725, "step": 20743 }, { "epoch": 2.974476627473473, "grad_norm": 0.26975056529045105, "learning_rate": 2.2049224821374394e-09, "loss": 0.2811, "step": 20744 }, { "epoch": 2.974620017206768, "grad_norm": 0.2675265371799469, "learning_rate": 2.1802194588199655e-09, "loss": 0.2891, "step": 20745 }, { "epoch": 2.9747634069400632, "grad_norm": 0.26003795862197876, "learning_rate": 2.155655567117054e-09, "loss": 0.2766, "step": 20746 }, { "epoch": 2.9749067966733582, "grad_norm": 0.26709240674972534, "learning_rate": 2.1312308077126032e-09, "loss": 0.267, "step": 20747 }, { "epoch": 2.975050186406653, "grad_norm": 0.2734343409538269, "learning_rate": 2.1069451812860687e-09, "loss": 0.2696, "step": 20748 }, { "epoch": 2.975193576139948, "grad_norm": 0.25591784715652466, "learning_rate": 2.082798688514687e-09, "loss": 0.2885, "step": 20749 }, { "epoch": 2.9753369658732436, "grad_norm": 0.255607932806015, "learning_rate": 2.0587913300695874e-09, "loss": 0.2745, "step": 20750 }, { "epoch": 2.9754803556065386, "grad_norm": 0.27756690979003906, "learning_rate": 2.0349231066191245e-09, "loss": 0.2908, "step": 20751 }, { "epoch": 2.9756237453398335, "grad_norm": 0.2916289269924164, "learning_rate": 2.011194018828877e-09, "loss": 0.274, "step": 20752 }, { "epoch": 2.975767135073129, "grad_norm": 0.27885007858276367, "learning_rate": 1.9876040673583174e-09, "loss": 0.268, "step": 20753 }, { "epoch": 2.975910524806424, "grad_norm": 0.26275888085365295, "learning_rate": 1.9641532528641425e-09, "loss": 0.2629, "step": 20754 }, { "epoch": 2.976053914539719, "grad_norm": 0.25683411955833435, "learning_rate": 1.940841575999719e-09, "loss": 0.2779, "step": 20755 }, { "epoch": 2.976197304273014, "grad_norm": 0.25809529423713684, "learning_rate": 1.9176690374139715e-09, "loss": 0.2759, "step": 20756 }, { "epoch": 2.9763406940063093, "grad_norm": 0.26595181226730347, "learning_rate": 1.8946356377519402e-09, "loss": 0.2649, "step": 20757 }, { "epoch": 2.9764840837396043, "grad_norm": 0.25221216678619385, "learning_rate": 1.8717413776542237e-09, "loss": 0.2783, "step": 20758 }, { "epoch": 2.9766274734728992, "grad_norm": 0.25565728545188904, "learning_rate": 1.8489862577592e-09, "loss": 0.2705, "step": 20759 }, { "epoch": 2.9767708632061947, "grad_norm": 0.2764142155647278, "learning_rate": 1.8263702786996962e-09, "loss": 0.2814, "step": 20760 }, { "epoch": 2.9769142529394896, "grad_norm": 0.2825632393360138, "learning_rate": 1.803893441105209e-09, "loss": 0.2899, "step": 20761 }, { "epoch": 2.9770576426727846, "grad_norm": 0.27233389019966125, "learning_rate": 1.781555745601349e-09, "loss": 0.2758, "step": 20762 }, { "epoch": 2.9772010324060796, "grad_norm": 0.2781013548374176, "learning_rate": 1.759357192810951e-09, "loss": 0.2858, "step": 20763 }, { "epoch": 2.9773444221393746, "grad_norm": 0.2688797116279602, "learning_rate": 1.7372977833507443e-09, "loss": 0.2818, "step": 20764 }, { "epoch": 2.97748781187267, "grad_norm": 0.27062705159187317, "learning_rate": 1.7153775178357924e-09, "loss": 0.2873, "step": 20765 }, { "epoch": 2.977631201605965, "grad_norm": 0.2600039839744568, "learning_rate": 1.6935963968761627e-09, "loss": 0.2917, "step": 20766 }, { "epoch": 2.9777745913392604, "grad_norm": 0.2676563858985901, "learning_rate": 1.6719544210774818e-09, "loss": 0.2781, "step": 20767 }, { "epoch": 2.9779179810725553, "grad_norm": 0.2718135416507721, "learning_rate": 1.650451591042601e-09, "loss": 0.2868, "step": 20768 }, { "epoch": 2.9780613708058503, "grad_norm": 0.2574584484100342, "learning_rate": 1.6290879073704857e-09, "loss": 0.2794, "step": 20769 }, { "epoch": 2.9782047605391453, "grad_norm": 0.2365434467792511, "learning_rate": 1.6078633706556601e-09, "loss": 0.2902, "step": 20770 }, { "epoch": 2.9783481502724403, "grad_norm": 0.2572970390319824, "learning_rate": 1.586777981489318e-09, "loss": 0.2722, "step": 20771 }, { "epoch": 2.9784915400057357, "grad_norm": 0.26275521516799927, "learning_rate": 1.5658317404576574e-09, "loss": 0.2764, "step": 20772 }, { "epoch": 2.9786349297390307, "grad_norm": 0.2523142695426941, "learning_rate": 1.5450246481441e-09, "loss": 0.2688, "step": 20773 }, { "epoch": 2.9787783194723256, "grad_norm": 0.2869583070278168, "learning_rate": 1.5243567051281826e-09, "loss": 0.2769, "step": 20774 }, { "epoch": 2.978921709205621, "grad_norm": 0.2779727876186371, "learning_rate": 1.5038279119850008e-09, "loss": 0.27, "step": 20775 }, { "epoch": 2.979065098938916, "grad_norm": 0.2687893211841583, "learning_rate": 1.4834382692863193e-09, "loss": 0.2716, "step": 20776 }, { "epoch": 2.979208488672211, "grad_norm": 0.2801009714603424, "learning_rate": 1.4631877775989067e-09, "loss": 0.2736, "step": 20777 }, { "epoch": 2.979351878405506, "grad_norm": 0.27961358428001404, "learning_rate": 1.443076437487867e-09, "loss": 0.2736, "step": 20778 }, { "epoch": 2.9794952681388014, "grad_norm": 0.27502796053886414, "learning_rate": 1.4231042495121972e-09, "loss": 0.286, "step": 20779 }, { "epoch": 2.9796386578720964, "grad_norm": 0.2574387192726135, "learning_rate": 1.403271214228119e-09, "loss": 0.2842, "step": 20780 }, { "epoch": 2.9797820476053913, "grad_norm": 0.2563536465167999, "learning_rate": 1.3835773321874136e-09, "loss": 0.262, "step": 20781 }, { "epoch": 2.9799254373386868, "grad_norm": 0.26729661226272583, "learning_rate": 1.3640226039385308e-09, "loss": 0.2738, "step": 20782 }, { "epoch": 2.9800688270719817, "grad_norm": 0.26319465041160583, "learning_rate": 1.3446070300265901e-09, "loss": 0.2628, "step": 20783 }, { "epoch": 2.9802122168052767, "grad_norm": 0.24964159727096558, "learning_rate": 1.325330610990605e-09, "loss": 0.289, "step": 20784 }, { "epoch": 2.9803556065385717, "grad_norm": 0.25547507405281067, "learning_rate": 1.3061933473684784e-09, "loss": 0.2763, "step": 20785 }, { "epoch": 2.980498996271867, "grad_norm": 0.2550308406352997, "learning_rate": 1.2871952396925624e-09, "loss": 0.2764, "step": 20786 }, { "epoch": 2.980642386005162, "grad_norm": 0.28585511445999146, "learning_rate": 1.268336288491323e-09, "loss": 0.2617, "step": 20787 }, { "epoch": 2.980785775738457, "grad_norm": 0.2790571451187134, "learning_rate": 1.249616494290451e-09, "loss": 0.28, "step": 20788 }, { "epoch": 2.9809291654717525, "grad_norm": 0.2783740758895874, "learning_rate": 1.2310358576100857e-09, "loss": 0.3006, "step": 20789 }, { "epoch": 2.9810725552050474, "grad_norm": 0.2719179391860962, "learning_rate": 1.2125943789687011e-09, "loss": 0.2589, "step": 20790 }, { "epoch": 2.9812159449383424, "grad_norm": 0.28351911902427673, "learning_rate": 1.1942920588792206e-09, "loss": 0.2659, "step": 20791 }, { "epoch": 2.9813593346716374, "grad_norm": 0.27872708439826965, "learning_rate": 1.1761288978506812e-09, "loss": 0.2841, "step": 20792 }, { "epoch": 2.9815027244049324, "grad_norm": 0.2699424624443054, "learning_rate": 1.158104896389345e-09, "loss": 0.2676, "step": 20793 }, { "epoch": 2.9816461141382278, "grad_norm": 0.29339131712913513, "learning_rate": 1.140220054996477e-09, "loss": 0.2795, "step": 20794 }, { "epoch": 2.9817895038715228, "grad_norm": 0.2631688416004181, "learning_rate": 1.122474374170568e-09, "loss": 0.2654, "step": 20795 }, { "epoch": 2.981932893604818, "grad_norm": 0.27047014236450195, "learning_rate": 1.1048678544051118e-09, "loss": 0.2812, "step": 20796 }, { "epoch": 2.982076283338113, "grad_norm": 0.27543905377388, "learning_rate": 1.0874004961902718e-09, "loss": 0.2748, "step": 20797 }, { "epoch": 2.982219673071408, "grad_norm": 0.28038889169692993, "learning_rate": 1.070072300012326e-09, "loss": 0.2758, "step": 20798 }, { "epoch": 2.982363062804703, "grad_norm": 0.24682529270648956, "learning_rate": 1.052883266353666e-09, "loss": 0.2604, "step": 20799 }, { "epoch": 2.982506452537998, "grad_norm": 0.2522931396961212, "learning_rate": 1.035833395693353e-09, "loss": 0.2736, "step": 20800 }, { "epoch": 2.9826498422712935, "grad_norm": 0.263886034488678, "learning_rate": 1.0189226885054526e-09, "loss": 0.2993, "step": 20801 }, { "epoch": 2.9827932320045885, "grad_norm": 0.2757667601108551, "learning_rate": 1.0021511452606991e-09, "loss": 0.2826, "step": 20802 }, { "epoch": 2.9829366217378834, "grad_norm": 0.270598441362381, "learning_rate": 9.855187664264964e-10, "loss": 0.2822, "step": 20803 }, { "epoch": 2.983080011471179, "grad_norm": 0.2682611644268036, "learning_rate": 9.690255524652525e-10, "loss": 0.267, "step": 20804 }, { "epoch": 2.983223401204474, "grad_norm": 0.2641597092151642, "learning_rate": 9.526715038365996e-10, "loss": 0.2691, "step": 20805 }, { "epoch": 2.983366790937769, "grad_norm": 0.2653701603412628, "learning_rate": 9.364566209951743e-10, "loss": 0.2748, "step": 20806 }, { "epoch": 2.9835101806710638, "grad_norm": 0.2657640874385834, "learning_rate": 9.203809043928369e-10, "loss": 0.2769, "step": 20807 }, { "epoch": 2.983653570404359, "grad_norm": 0.26939427852630615, "learning_rate": 9.044443544775627e-10, "loss": 0.296, "step": 20808 }, { "epoch": 2.983796960137654, "grad_norm": 0.2913482189178467, "learning_rate": 8.886469716923308e-10, "loss": 0.2678, "step": 20809 }, { "epoch": 2.983940349870949, "grad_norm": 0.2820429503917694, "learning_rate": 8.729887564767892e-10, "loss": 0.2852, "step": 20810 }, { "epoch": 2.9840837396042446, "grad_norm": 0.28947627544403076, "learning_rate": 8.574697092672557e-10, "loss": 0.2674, "step": 20811 }, { "epoch": 2.9842271293375395, "grad_norm": 0.2601093351840973, "learning_rate": 8.420898304961623e-10, "loss": 0.2917, "step": 20812 }, { "epoch": 2.9843705190708345, "grad_norm": 0.27021101117134094, "learning_rate": 8.268491205903895e-10, "loss": 0.3017, "step": 20813 }, { "epoch": 2.9845139088041295, "grad_norm": 0.267762154340744, "learning_rate": 8.117475799751528e-10, "loss": 0.2853, "step": 20814 }, { "epoch": 2.9846572985374245, "grad_norm": 0.2584119141101837, "learning_rate": 7.967852090712269e-10, "loss": 0.2754, "step": 20815 }, { "epoch": 2.98480068827072, "grad_norm": 0.2838386297225952, "learning_rate": 7.819620082938351e-10, "loss": 0.2953, "step": 20816 }, { "epoch": 2.984944078004015, "grad_norm": 0.25873157382011414, "learning_rate": 7.672779780570905e-10, "loss": 0.2847, "step": 20817 }, { "epoch": 2.9850874677373103, "grad_norm": 0.26501473784446716, "learning_rate": 7.52733118768445e-10, "loss": 0.2741, "step": 20818 }, { "epoch": 2.9852308574706052, "grad_norm": 0.26105996966362, "learning_rate": 7.383274308336851e-10, "loss": 0.2699, "step": 20819 }, { "epoch": 2.9853742472039, "grad_norm": 0.260927677154541, "learning_rate": 7.240609146536015e-10, "loss": 0.278, "step": 20820 }, { "epoch": 2.985517636937195, "grad_norm": 0.2626083493232727, "learning_rate": 7.099335706250987e-10, "loss": 0.2516, "step": 20821 }, { "epoch": 2.98566102667049, "grad_norm": 0.27111297845840454, "learning_rate": 6.95945399142306e-10, "loss": 0.2804, "step": 20822 }, { "epoch": 2.9858044164037856, "grad_norm": 0.27529436349868774, "learning_rate": 6.820964005932462e-10, "loss": 0.262, "step": 20823 }, { "epoch": 2.9859478061370806, "grad_norm": 0.2458246797323227, "learning_rate": 6.683865753648322e-10, "loss": 0.2768, "step": 20824 }, { "epoch": 2.9860911958703755, "grad_norm": 0.267179936170578, "learning_rate": 6.548159238378704e-10, "loss": 0.2792, "step": 20825 }, { "epoch": 2.986234585603671, "grad_norm": 0.25549599528312683, "learning_rate": 6.413844463903917e-10, "loss": 0.2948, "step": 20826 }, { "epoch": 2.986377975336966, "grad_norm": 0.24637605249881744, "learning_rate": 6.280921433965415e-10, "loss": 0.2893, "step": 20827 }, { "epoch": 2.986521365070261, "grad_norm": 0.2849138677120209, "learning_rate": 6.149390152254687e-10, "loss": 0.2783, "step": 20828 }, { "epoch": 2.986664754803556, "grad_norm": 0.26384517550468445, "learning_rate": 6.019250622446571e-10, "loss": 0.269, "step": 20829 }, { "epoch": 2.9868081445368513, "grad_norm": 0.2675362229347229, "learning_rate": 5.890502848154844e-10, "loss": 0.2773, "step": 20830 }, { "epoch": 2.9869515342701463, "grad_norm": 0.24981704354286194, "learning_rate": 5.763146832965527e-10, "loss": 0.2869, "step": 20831 }, { "epoch": 2.9870949240034412, "grad_norm": 0.2793658673763275, "learning_rate": 5.637182580425782e-10, "loss": 0.2654, "step": 20832 }, { "epoch": 2.9872383137367367, "grad_norm": 0.26352742314338684, "learning_rate": 5.512610094043914e-10, "loss": 0.2935, "step": 20833 }, { "epoch": 2.9873817034700316, "grad_norm": 0.28337356448173523, "learning_rate": 5.389429377283817e-10, "loss": 0.2755, "step": 20834 }, { "epoch": 2.9875250932033266, "grad_norm": 0.2772452235221863, "learning_rate": 5.267640433570531e-10, "loss": 0.2827, "step": 20835 }, { "epoch": 2.9876684829366216, "grad_norm": 0.2669663727283478, "learning_rate": 5.14724326630689e-10, "loss": 0.2766, "step": 20836 }, { "epoch": 2.987811872669917, "grad_norm": 0.25761255621910095, "learning_rate": 5.028237878834663e-10, "loss": 0.2963, "step": 20837 }, { "epoch": 2.987955262403212, "grad_norm": 0.28493601083755493, "learning_rate": 4.910624274467868e-10, "loss": 0.2873, "step": 20838 }, { "epoch": 2.988098652136507, "grad_norm": 0.2665356993675232, "learning_rate": 4.794402456481661e-10, "loss": 0.2748, "step": 20839 }, { "epoch": 2.9882420418698024, "grad_norm": 0.2608397305011749, "learning_rate": 4.679572428117896e-10, "loss": 0.2726, "step": 20840 }, { "epoch": 2.9883854316030973, "grad_norm": 0.289458692073822, "learning_rate": 4.56613419256291e-10, "loss": 0.2815, "step": 20841 }, { "epoch": 2.9885288213363923, "grad_norm": 0.26066479086875916, "learning_rate": 4.4540877529808403e-10, "loss": 0.2737, "step": 20842 }, { "epoch": 2.9886722110696873, "grad_norm": 0.2765178978443146, "learning_rate": 4.343433112491413e-10, "loss": 0.2677, "step": 20843 }, { "epoch": 2.9888156008029823, "grad_norm": 0.2622855007648468, "learning_rate": 4.2341702741699466e-10, "loss": 0.2818, "step": 20844 }, { "epoch": 2.9889589905362777, "grad_norm": 0.27306243777275085, "learning_rate": 4.1262992410584514e-10, "loss": 0.2729, "step": 20845 }, { "epoch": 2.9891023802695726, "grad_norm": 0.2713325023651123, "learning_rate": 4.019820016171183e-10, "loss": 0.2913, "step": 20846 }, { "epoch": 2.989245770002868, "grad_norm": 0.2610678970813751, "learning_rate": 3.9147326024557843e-10, "loss": 0.2763, "step": 20847 }, { "epoch": 2.989389159736163, "grad_norm": 0.27345237135887146, "learning_rate": 3.811037002848794e-10, "loss": 0.2731, "step": 20848 }, { "epoch": 2.989532549469458, "grad_norm": 0.25128281116485596, "learning_rate": 3.708733220236793e-10, "loss": 0.2649, "step": 20849 }, { "epoch": 2.989675939202753, "grad_norm": 0.2646806240081787, "learning_rate": 3.607821257461952e-10, "loss": 0.2776, "step": 20850 }, { "epoch": 2.989819328936048, "grad_norm": 0.2825934886932373, "learning_rate": 3.5083011173386863e-10, "loss": 0.2819, "step": 20851 }, { "epoch": 2.9899627186693434, "grad_norm": 0.266093909740448, "learning_rate": 3.410172802631451e-10, "loss": 0.2637, "step": 20852 }, { "epoch": 2.9901061084026384, "grad_norm": 0.25620949268341064, "learning_rate": 3.3134363160769454e-10, "loss": 0.3028, "step": 20853 }, { "epoch": 2.9902494981359333, "grad_norm": 0.25470444560050964, "learning_rate": 3.2180916603674615e-10, "loss": 0.2823, "step": 20854 }, { "epoch": 2.9903928878692287, "grad_norm": 0.2675105035305023, "learning_rate": 3.124138838161983e-10, "loss": 0.2881, "step": 20855 }, { "epoch": 2.9905362776025237, "grad_norm": 0.25998765230178833, "learning_rate": 3.031577852063983e-10, "loss": 0.2834, "step": 20856 }, { "epoch": 2.9906796673358187, "grad_norm": 0.2709503769874573, "learning_rate": 2.94040870466028e-10, "loss": 0.288, "step": 20857 }, { "epoch": 2.9908230570691137, "grad_norm": 0.26222872734069824, "learning_rate": 2.850631398487735e-10, "loss": 0.2755, "step": 20858 }, { "epoch": 2.990966446802409, "grad_norm": 0.25101515650749207, "learning_rate": 2.7622459360387986e-10, "loss": 0.2822, "step": 20859 }, { "epoch": 2.991109836535704, "grad_norm": 0.2802886664867401, "learning_rate": 2.6752523197781656e-10, "loss": 0.2741, "step": 20860 }, { "epoch": 2.991253226268999, "grad_norm": 0.26035943627357483, "learning_rate": 2.5896505521316727e-10, "loss": 0.2785, "step": 20861 }, { "epoch": 2.9913966160022945, "grad_norm": 0.25781872868537903, "learning_rate": 2.505440635480749e-10, "loss": 0.2943, "step": 20862 }, { "epoch": 2.9915400057355894, "grad_norm": 0.2720210552215576, "learning_rate": 2.422622572162414e-10, "loss": 0.2693, "step": 20863 }, { "epoch": 2.9916833954688844, "grad_norm": 0.2642492353916168, "learning_rate": 2.3411963644914825e-10, "loss": 0.2854, "step": 20864 }, { "epoch": 2.9918267852021794, "grad_norm": 0.26203396916389465, "learning_rate": 2.2611620147328094e-10, "loss": 0.2903, "step": 20865 }, { "epoch": 2.9919701749354743, "grad_norm": 0.2730087339878082, "learning_rate": 2.182519525106841e-10, "loss": 0.2681, "step": 20866 }, { "epoch": 2.9921135646687698, "grad_norm": 0.2682977318763733, "learning_rate": 2.1052688978118185e-10, "loss": 0.2776, "step": 20867 }, { "epoch": 2.9922569544020647, "grad_norm": 0.2702331244945526, "learning_rate": 2.029410134996024e-10, "loss": 0.2775, "step": 20868 }, { "epoch": 2.99240034413536, "grad_norm": 0.26137790083885193, "learning_rate": 1.9549432387633293e-10, "loss": 0.3032, "step": 20869 }, { "epoch": 2.992543733868655, "grad_norm": 0.2604166865348816, "learning_rate": 1.8818682112009545e-10, "loss": 0.2822, "step": 20870 }, { "epoch": 2.99268712360195, "grad_norm": 0.28057774901390076, "learning_rate": 1.8101850543350563e-10, "loss": 0.2776, "step": 20871 }, { "epoch": 2.992830513335245, "grad_norm": 0.25923916697502136, "learning_rate": 1.739893770158485e-10, "loss": 0.2785, "step": 20872 }, { "epoch": 2.99297390306854, "grad_norm": 0.2747263014316559, "learning_rate": 1.6709943606307842e-10, "loss": 0.2681, "step": 20873 }, { "epoch": 2.9931172928018355, "grad_norm": 0.2714594006538391, "learning_rate": 1.6034868276781913e-10, "loss": 0.2785, "step": 20874 }, { "epoch": 2.9932606825351304, "grad_norm": 0.26054248213768005, "learning_rate": 1.53737117316588e-10, "loss": 0.2845, "step": 20875 }, { "epoch": 2.9934040722684254, "grad_norm": 0.27164673805236816, "learning_rate": 1.472647398942373e-10, "loss": 0.2822, "step": 20876 }, { "epoch": 2.993547462001721, "grad_norm": 0.2696932256221771, "learning_rate": 1.4093155068117814e-10, "loss": 0.2806, "step": 20877 }, { "epoch": 2.993690851735016, "grad_norm": 0.27924707531929016, "learning_rate": 1.3473754985282584e-10, "loss": 0.2981, "step": 20878 }, { "epoch": 2.993834241468311, "grad_norm": 0.27408647537231445, "learning_rate": 1.2868273758237515e-10, "loss": 0.2702, "step": 20879 }, { "epoch": 2.9939776312016058, "grad_norm": 0.2665305733680725, "learning_rate": 1.2276711403802488e-10, "loss": 0.2665, "step": 20880 }, { "epoch": 2.994121020934901, "grad_norm": 0.2725695073604584, "learning_rate": 1.1699067938464316e-10, "loss": 0.2737, "step": 20881 }, { "epoch": 2.994264410668196, "grad_norm": 0.2659679055213928, "learning_rate": 1.1135343378321229e-10, "loss": 0.2774, "step": 20882 }, { "epoch": 2.994407800401491, "grad_norm": 0.2740589678287506, "learning_rate": 1.0585537739027374e-10, "loss": 0.2788, "step": 20883 }, { "epoch": 2.9945511901347865, "grad_norm": 0.2742420732975006, "learning_rate": 1.0049651035903829e-10, "loss": 0.2733, "step": 20884 }, { "epoch": 2.9946945798680815, "grad_norm": 0.24515892565250397, "learning_rate": 9.527683283883093e-11, "loss": 0.2714, "step": 20885 }, { "epoch": 2.9948379696013765, "grad_norm": 0.2571338713169098, "learning_rate": 9.019634497453577e-11, "loss": 0.2682, "step": 20886 }, { "epoch": 2.9949813593346715, "grad_norm": 0.2622793912887573, "learning_rate": 8.525504690826137e-11, "loss": 0.2773, "step": 20887 }, { "epoch": 2.995124749067967, "grad_norm": 0.2629443109035492, "learning_rate": 8.045293877656513e-11, "loss": 0.2972, "step": 20888 }, { "epoch": 2.995268138801262, "grad_norm": 0.25573697686195374, "learning_rate": 7.579002071433917e-11, "loss": 0.2743, "step": 20889 }, { "epoch": 2.995411528534557, "grad_norm": 0.2937605679035187, "learning_rate": 7.126629285036935e-11, "loss": 0.2647, "step": 20890 }, { "epoch": 2.9955549182678523, "grad_norm": 0.2806030809879303, "learning_rate": 6.68817553112211e-11, "loss": 0.2667, "step": 20891 }, { "epoch": 2.9956983080011472, "grad_norm": 0.25909626483917236, "learning_rate": 6.263640821901895e-11, "loss": 0.2704, "step": 20892 }, { "epoch": 2.995841697734442, "grad_norm": 0.26693546772003174, "learning_rate": 5.853025169144656e-11, "loss": 0.285, "step": 20893 }, { "epoch": 2.995985087467737, "grad_norm": 0.2550961971282959, "learning_rate": 5.456328584285686e-11, "loss": 0.2822, "step": 20894 }, { "epoch": 2.996128477201032, "grad_norm": 0.2676776349544525, "learning_rate": 5.073551078427219e-11, "loss": 0.2681, "step": 20895 }, { "epoch": 2.9962718669343276, "grad_norm": 0.2723686397075653, "learning_rate": 4.7046926621163724e-11, "loss": 0.2695, "step": 20896 }, { "epoch": 2.9964152566676225, "grad_norm": 0.2569507360458374, "learning_rate": 4.349753345733731e-11, "loss": 0.2748, "step": 20897 }, { "epoch": 2.996558646400918, "grad_norm": 0.26632529497146606, "learning_rate": 4.00873313910477e-11, "loss": 0.2842, "step": 20898 }, { "epoch": 2.996702036134213, "grad_norm": 0.26865828037261963, "learning_rate": 3.6816320517774063e-11, "loss": 0.2866, "step": 20899 }, { "epoch": 2.996845425867508, "grad_norm": 0.2555462718009949, "learning_rate": 3.3684500927999576e-11, "loss": 0.2669, "step": 20900 }, { "epoch": 2.996988815600803, "grad_norm": 0.27113133668899536, "learning_rate": 3.0691872708876745e-11, "loss": 0.299, "step": 20901 }, { "epoch": 2.997132205334098, "grad_norm": 0.2659740149974823, "learning_rate": 2.7838435943672305e-11, "loss": 0.2818, "step": 20902 }, { "epoch": 2.9972755950673933, "grad_norm": 0.2732630968093872, "learning_rate": 2.5124190712322305e-11, "loss": 0.2789, "step": 20903 }, { "epoch": 2.9974189848006882, "grad_norm": 0.28010988235473633, "learning_rate": 2.2549137089766805e-11, "loss": 0.2825, "step": 20904 }, { "epoch": 2.997562374533983, "grad_norm": 0.2805086672306061, "learning_rate": 2.011327514872541e-11, "loss": 0.2792, "step": 20905 }, { "epoch": 2.9977057642672786, "grad_norm": 0.24863475561141968, "learning_rate": 1.78166049558115e-11, "loss": 0.26, "step": 20906 }, { "epoch": 2.9978491540005736, "grad_norm": 0.27839556336402893, "learning_rate": 1.5659126575418015e-11, "loss": 0.2695, "step": 20907 }, { "epoch": 2.9979925437338686, "grad_norm": 0.25192731618881226, "learning_rate": 1.3640840068052108e-11, "loss": 0.2867, "step": 20908 }, { "epoch": 2.9981359334671636, "grad_norm": 0.2640179991722107, "learning_rate": 1.176174548922493e-11, "loss": 0.2833, "step": 20909 }, { "epoch": 2.998279323200459, "grad_norm": 0.26963481307029724, "learning_rate": 1.0021842891672074e-11, "loss": 0.2573, "step": 20910 }, { "epoch": 2.998422712933754, "grad_norm": 0.288405179977417, "learning_rate": 8.421132323688242e-12, "loss": 0.2813, "step": 20911 }, { "epoch": 2.998566102667049, "grad_norm": 0.2501285672187805, "learning_rate": 6.959613829682355e-12, "loss": 0.2691, "step": 20912 }, { "epoch": 2.9987094924003443, "grad_norm": 0.28352057933807373, "learning_rate": 5.637287450177553e-12, "loss": 0.2887, "step": 20913 }, { "epoch": 2.9988528821336393, "grad_norm": 0.2651002109050751, "learning_rate": 4.45415322292142e-12, "loss": 0.2715, "step": 20914 }, { "epoch": 2.9989962718669343, "grad_norm": 0.2655324935913086, "learning_rate": 3.4102111801104233e-12, "loss": 0.2928, "step": 20915 }, { "epoch": 2.9991396616002293, "grad_norm": 0.2662738263607025, "learning_rate": 2.505461350610361e-12, "loss": 0.2639, "step": 20916 }, { "epoch": 2.9992830513335247, "grad_norm": 0.2579507827758789, "learning_rate": 1.739903759956363e-12, "loss": 0.2775, "step": 20917 }, { "epoch": 2.9994264410668197, "grad_norm": 0.2599847614765167, "learning_rate": 1.113538429797778e-12, "loss": 0.2845, "step": 20918 }, { "epoch": 2.9995698308001146, "grad_norm": 0.25549575686454773, "learning_rate": 6.263653767879518e-13, "loss": 0.279, "step": 20919 }, { "epoch": 2.99971322053341, "grad_norm": 0.2849169075489044, "learning_rate": 2.7838461535978356e-13, "loss": 0.2645, "step": 20920 }, { "epoch": 2.999856610266705, "grad_norm": 0.27539896965026855, "learning_rate": 6.95961543950574e-14, "loss": 0.2798, "step": 20921 }, { "epoch": 3.0, "grad_norm": 0.2680087089538574, "learning_rate": 0.0, "loss": 0.3092, "step": 20922 }, { "epoch": 3.0, "step": 20922, "total_flos": 1.7326765286457344e+16, "train_loss": 0.31734984854321535, "train_runtime": 337873.2791, "train_samples_per_second": 5.944, "train_steps_per_second": 0.062 } ], "logging_steps": 1.0, "max_steps": 20922, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.7326765286457344e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }