{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 3564, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0016835016835016834, "grad_norm": 10.01240348815918, "learning_rate": 5.5865921787709494e-09, "loss": 1.7057493925094604, "step": 2 }, { "epoch": 0.003367003367003367, "grad_norm": 14.913334846496582, "learning_rate": 1.6759776536312847e-08, "loss": 1.2436225414276123, "step": 4 }, { "epoch": 0.005050505050505051, "grad_norm": 22.982995986938477, "learning_rate": 2.7932960893854745e-08, "loss": 1.686056137084961, "step": 6 }, { "epoch": 0.006734006734006734, "grad_norm": 15.24986457824707, "learning_rate": 3.910614525139665e-08, "loss": 1.6055235862731934, "step": 8 }, { "epoch": 0.008417508417508417, "grad_norm": 30.967639923095703, "learning_rate": 5.027932960893855e-08, "loss": 4.50665283203125, "step": 10 }, { "epoch": 0.010101010101010102, "grad_norm": 4.303424835205078, "learning_rate": 6.145251396648044e-08, "loss": 1.9789408445358276, "step": 12 }, { "epoch": 0.011784511784511785, "grad_norm": 5.598588466644287, "learning_rate": 7.262569832402235e-08, "loss": 1.6753730773925781, "step": 14 }, { "epoch": 0.013468013468013467, "grad_norm": 4.323257923126221, "learning_rate": 8.379888268156423e-08, "loss": 1.6596330404281616, "step": 16 }, { "epoch": 0.015151515151515152, "grad_norm": 26.17571258544922, "learning_rate": 9.497206703910614e-08, "loss": 2.7241992950439453, "step": 18 }, { "epoch": 0.016835016835016835, "grad_norm": 9.184181213378906, "learning_rate": 1.0614525139664805e-07, "loss": 1.9634017944335938, "step": 20 }, { "epoch": 0.018518518518518517, "grad_norm": 4.683750152587891, "learning_rate": 1.1731843575418994e-07, "loss": 1.8491621017456055, "step": 22 }, { "epoch": 0.020202020202020204, "grad_norm": 14.232526779174805, "learning_rate": 1.2849162011173183e-07, "loss": 3.537993907928467, "step": 24 }, { "epoch": 0.021885521885521887, "grad_norm": 11.717961311340332, "learning_rate": 1.3966480446927373e-07, "loss": 2.8410818576812744, "step": 26 }, { "epoch": 0.02356902356902357, "grad_norm": 11.476764678955078, "learning_rate": 1.5083798882681565e-07, "loss": 2.1707875728607178, "step": 28 }, { "epoch": 0.025252525252525252, "grad_norm": 42.536720275878906, "learning_rate": 1.6201117318435754e-07, "loss": 3.401388645172119, "step": 30 }, { "epoch": 0.026936026936026935, "grad_norm": 15.799206733703613, "learning_rate": 1.7318435754189943e-07, "loss": 1.8762117624282837, "step": 32 }, { "epoch": 0.02861952861952862, "grad_norm": 56.47621154785156, "learning_rate": 1.8435754189944133e-07, "loss": 4.025151252746582, "step": 34 }, { "epoch": 0.030303030303030304, "grad_norm": 8.71907901763916, "learning_rate": 1.9553072625698322e-07, "loss": 1.9956148862838745, "step": 36 }, { "epoch": 0.03198653198653199, "grad_norm": 13.315755844116211, "learning_rate": 2.0670391061452514e-07, "loss": 1.5647544860839844, "step": 38 }, { "epoch": 0.03367003367003367, "grad_norm": 18.28321647644043, "learning_rate": 2.17877094972067e-07, "loss": 2.4461331367492676, "step": 40 }, { "epoch": 0.03535353535353535, "grad_norm": 7.177945137023926, "learning_rate": 2.2905027932960893e-07, "loss": 3.1400742530822754, "step": 42 }, { "epoch": 0.037037037037037035, "grad_norm": 11.345965385437012, "learning_rate": 2.402234636871508e-07, "loss": 2.982694149017334, "step": 44 }, { "epoch": 0.03872053872053872, "grad_norm": 18.986379623413086, "learning_rate": 2.5139664804469275e-07, "loss": 1.7094351053237915, "step": 46 }, { "epoch": 0.04040404040404041, "grad_norm": 25.200927734375, "learning_rate": 2.6256983240223464e-07, "loss": 3.4711947441101074, "step": 48 }, { "epoch": 0.04208754208754209, "grad_norm": 25.79502296447754, "learning_rate": 2.7374301675977653e-07, "loss": 2.5125930309295654, "step": 50 }, { "epoch": 0.04377104377104377, "grad_norm": 26.86095428466797, "learning_rate": 2.849162011173184e-07, "loss": 2.5184483528137207, "step": 52 }, { "epoch": 0.045454545454545456, "grad_norm": 23.869613647460938, "learning_rate": 2.960893854748603e-07, "loss": 2.1967999935150146, "step": 54 }, { "epoch": 0.04713804713804714, "grad_norm": 4.752484321594238, "learning_rate": 3.072625698324022e-07, "loss": 1.6605415344238281, "step": 56 }, { "epoch": 0.04882154882154882, "grad_norm": 30.32961654663086, "learning_rate": 3.184357541899441e-07, "loss": 2.6820101737976074, "step": 58 }, { "epoch": 0.050505050505050504, "grad_norm": 4.937363624572754, "learning_rate": 3.29608938547486e-07, "loss": 2.046969175338745, "step": 60 }, { "epoch": 0.05218855218855219, "grad_norm": 26.058670043945312, "learning_rate": 3.407821229050279e-07, "loss": 2.126314163208008, "step": 62 }, { "epoch": 0.05387205387205387, "grad_norm": 3.972296714782715, "learning_rate": 3.5195530726256984e-07, "loss": 1.469801902770996, "step": 64 }, { "epoch": 0.05555555555555555, "grad_norm": 36.323368072509766, "learning_rate": 3.6312849162011174e-07, "loss": 2.0382440090179443, "step": 66 }, { "epoch": 0.05723905723905724, "grad_norm": 5.039744853973389, "learning_rate": 3.7430167597765363e-07, "loss": 1.679071068763733, "step": 68 }, { "epoch": 0.058922558922558925, "grad_norm": 5.542041778564453, "learning_rate": 3.8547486033519547e-07, "loss": 1.7368519306182861, "step": 70 }, { "epoch": 0.06060606060606061, "grad_norm": 11.228593826293945, "learning_rate": 3.966480446927374e-07, "loss": 1.9073054790496826, "step": 72 }, { "epoch": 0.06228956228956229, "grad_norm": 6.521553993225098, "learning_rate": 4.078212290502793e-07, "loss": 1.7021303176879883, "step": 74 }, { "epoch": 0.06397306397306397, "grad_norm": 4.614531993865967, "learning_rate": 4.189944134078212e-07, "loss": 1.3584303855895996, "step": 76 }, { "epoch": 0.06565656565656566, "grad_norm": 4.567502021789551, "learning_rate": 4.301675977653631e-07, "loss": 1.7855596542358398, "step": 78 }, { "epoch": 0.06734006734006734, "grad_norm": 4.453341484069824, "learning_rate": 4.41340782122905e-07, "loss": 1.5260930061340332, "step": 80 }, { "epoch": 0.06902356902356903, "grad_norm": 9.207719802856445, "learning_rate": 4.5251396648044694e-07, "loss": 1.7678306102752686, "step": 82 }, { "epoch": 0.0707070707070707, "grad_norm": 11.142820358276367, "learning_rate": 4.6368715083798884e-07, "loss": 1.4878003597259521, "step": 84 }, { "epoch": 0.0723905723905724, "grad_norm": 6.588044166564941, "learning_rate": 4.7486033519553073e-07, "loss": 1.6655892133712769, "step": 86 }, { "epoch": 0.07407407407407407, "grad_norm": 7.762340068817139, "learning_rate": 4.860335195530726e-07, "loss": 1.4147857427597046, "step": 88 }, { "epoch": 0.07575757575757576, "grad_norm": 19.327587127685547, "learning_rate": 4.972067039106145e-07, "loss": 1.6009736061096191, "step": 90 }, { "epoch": 0.07744107744107744, "grad_norm": 16.781408309936523, "learning_rate": 5.083798882681564e-07, "loss": 1.331944227218628, "step": 92 }, { "epoch": 0.07912457912457913, "grad_norm": 5.269062042236328, "learning_rate": 5.195530726256983e-07, "loss": 1.3683245182037354, "step": 94 }, { "epoch": 0.08080808080808081, "grad_norm": 2.652998685836792, "learning_rate": 5.307262569832402e-07, "loss": 1.4645051956176758, "step": 96 }, { "epoch": 0.08249158249158249, "grad_norm": 26.370506286621094, "learning_rate": 5.418994413407821e-07, "loss": 1.4499703645706177, "step": 98 }, { "epoch": 0.08417508417508418, "grad_norm": 4.437371253967285, "learning_rate": 5.53072625698324e-07, "loss": 1.3694539070129395, "step": 100 }, { "epoch": 0.08585858585858586, "grad_norm": 7.602840900421143, "learning_rate": 5.642458100558659e-07, "loss": 1.2753658294677734, "step": 102 }, { "epoch": 0.08754208754208755, "grad_norm": 5.345534801483154, "learning_rate": 5.754189944134078e-07, "loss": 0.9927137494087219, "step": 104 }, { "epoch": 0.08922558922558922, "grad_norm": 57.12667465209961, "learning_rate": 5.865921787709497e-07, "loss": 1.144801378250122, "step": 106 }, { "epoch": 0.09090909090909091, "grad_norm": 3.486433267593384, "learning_rate": 5.977653631284916e-07, "loss": 1.3661882877349854, "step": 108 }, { "epoch": 0.09259259259259259, "grad_norm": 8.98828411102295, "learning_rate": 6.089385474860335e-07, "loss": 0.9164130687713623, "step": 110 }, { "epoch": 0.09427609427609428, "grad_norm": 4.9939141273498535, "learning_rate": 6.201117318435754e-07, "loss": 1.3426786661148071, "step": 112 }, { "epoch": 0.09595959595959595, "grad_norm": 29.148103713989258, "learning_rate": 6.312849162011172e-07, "loss": 1.138382911682129, "step": 114 }, { "epoch": 0.09764309764309764, "grad_norm": 34.31653594970703, "learning_rate": 6.424581005586592e-07, "loss": 0.7960847616195679, "step": 116 }, { "epoch": 0.09932659932659933, "grad_norm": 4.712627410888672, "learning_rate": 6.536312849162011e-07, "loss": 1.2441091537475586, "step": 118 }, { "epoch": 0.10101010101010101, "grad_norm": 5.5220794677734375, "learning_rate": 6.64804469273743e-07, "loss": 1.0892267227172852, "step": 120 }, { "epoch": 0.1026936026936027, "grad_norm": 10.08218765258789, "learning_rate": 6.759776536312849e-07, "loss": 1.266754150390625, "step": 122 }, { "epoch": 0.10437710437710437, "grad_norm": 7.951529026031494, "learning_rate": 6.871508379888268e-07, "loss": 0.8909415006637573, "step": 124 }, { "epoch": 0.10606060606060606, "grad_norm": 3.5433144569396973, "learning_rate": 6.983240223463687e-07, "loss": 0.7614157795906067, "step": 126 }, { "epoch": 0.10774410774410774, "grad_norm": 84.19695281982422, "learning_rate": 7.094972067039106e-07, "loss": 1.1203527450561523, "step": 128 }, { "epoch": 0.10942760942760943, "grad_norm": 6.779047966003418, "learning_rate": 7.206703910614524e-07, "loss": 1.0394889116287231, "step": 130 }, { "epoch": 0.1111111111111111, "grad_norm": 3.759247303009033, "learning_rate": 7.318435754189943e-07, "loss": 0.9934459328651428, "step": 132 }, { "epoch": 0.1127946127946128, "grad_norm": 4.790719032287598, "learning_rate": 7.430167597765363e-07, "loss": 1.2970447540283203, "step": 134 }, { "epoch": 0.11447811447811448, "grad_norm": 11.66688346862793, "learning_rate": 7.541899441340782e-07, "loss": 1.2734112739562988, "step": 136 }, { "epoch": 0.11616161616161616, "grad_norm": 5.437692642211914, "learning_rate": 7.653631284916201e-07, "loss": 1.7463512420654297, "step": 138 }, { "epoch": 0.11784511784511785, "grad_norm": 2.954306125640869, "learning_rate": 7.76536312849162e-07, "loss": 1.2036831378936768, "step": 140 }, { "epoch": 0.11952861952861953, "grad_norm": 3.9827589988708496, "learning_rate": 7.877094972067039e-07, "loss": 1.1270943880081177, "step": 142 }, { "epoch": 0.12121212121212122, "grad_norm": 19.19826316833496, "learning_rate": 7.988826815642458e-07, "loss": 1.0638954639434814, "step": 144 }, { "epoch": 0.12289562289562289, "grad_norm": 2.969254970550537, "learning_rate": 8.100558659217876e-07, "loss": 1.2084304094314575, "step": 146 }, { "epoch": 0.12457912457912458, "grad_norm": 3.5464372634887695, "learning_rate": 8.212290502793295e-07, "loss": 1.0377205610275269, "step": 148 }, { "epoch": 0.12626262626262627, "grad_norm": 26.851030349731445, "learning_rate": 8.324022346368714e-07, "loss": 1.298867106437683, "step": 150 }, { "epoch": 0.12794612794612795, "grad_norm": 12.729865074157715, "learning_rate": 8.435754189944134e-07, "loss": 1.0469536781311035, "step": 152 }, { "epoch": 0.12962962962962962, "grad_norm": 39.720340728759766, "learning_rate": 8.547486033519553e-07, "loss": 1.3842543363571167, "step": 154 }, { "epoch": 0.13131313131313133, "grad_norm": 30.861583709716797, "learning_rate": 8.659217877094972e-07, "loss": 1.2696869373321533, "step": 156 }, { "epoch": 0.132996632996633, "grad_norm": 2.758213520050049, "learning_rate": 8.770949720670391e-07, "loss": 1.1152485609054565, "step": 158 }, { "epoch": 0.13468013468013468, "grad_norm": 5.129064559936523, "learning_rate": 8.88268156424581e-07, "loss": 1.21260666847229, "step": 160 }, { "epoch": 0.13636363636363635, "grad_norm": 2.200296640396118, "learning_rate": 8.994413407821229e-07, "loss": 1.0739009380340576, "step": 162 }, { "epoch": 0.13804713804713806, "grad_norm": 22.802173614501953, "learning_rate": 9.106145251396647e-07, "loss": 1.0534250736236572, "step": 164 }, { "epoch": 0.13973063973063973, "grad_norm": 8.334705352783203, "learning_rate": 9.217877094972066e-07, "loss": 0.9987061023712158, "step": 166 }, { "epoch": 0.1414141414141414, "grad_norm": 3.1446645259857178, "learning_rate": 9.329608938547485e-07, "loss": 1.2239556312561035, "step": 168 }, { "epoch": 0.14309764309764308, "grad_norm": 11.334406852722168, "learning_rate": 9.441340782122904e-07, "loss": 1.1194162368774414, "step": 170 }, { "epoch": 0.1447811447811448, "grad_norm": 3.408362865447998, "learning_rate": 9.553072625698324e-07, "loss": 1.085777997970581, "step": 172 }, { "epoch": 0.14646464646464646, "grad_norm": 6.2441534996032715, "learning_rate": 9.664804469273742e-07, "loss": 0.7717651128768921, "step": 174 }, { "epoch": 0.14814814814814814, "grad_norm": 3.749255895614624, "learning_rate": 9.776536312849163e-07, "loss": 1.1312694549560547, "step": 176 }, { "epoch": 0.14983164983164984, "grad_norm": 3.902320384979248, "learning_rate": 9.888268156424581e-07, "loss": 1.3509280681610107, "step": 178 }, { "epoch": 0.15151515151515152, "grad_norm": 6.110651969909668, "learning_rate": 1e-06, "loss": 1.075784683227539, "step": 180 }, { "epoch": 0.1531986531986532, "grad_norm": 9.884479522705078, "learning_rate": 9.999992247803292e-07, "loss": 1.4511303901672363, "step": 182 }, { "epoch": 0.15488215488215487, "grad_norm": 22.860551834106445, "learning_rate": 9.999968991239885e-07, "loss": 1.0601496696472168, "step": 184 }, { "epoch": 0.15656565656565657, "grad_norm": 47.76069641113281, "learning_rate": 9.9999302303899e-07, "loss": 1.175671100616455, "step": 186 }, { "epoch": 0.15824915824915825, "grad_norm": 7.632693290710449, "learning_rate": 9.999875965386889e-07, "loss": 0.9617436528205872, "step": 188 }, { "epoch": 0.15993265993265993, "grad_norm": 14.18217945098877, "learning_rate": 9.999806196417815e-07, "loss": 0.8225744962692261, "step": 190 }, { "epoch": 0.16161616161616163, "grad_norm": 3.5702500343322754, "learning_rate": 9.999720923723065e-07, "loss": 1.3951547145843506, "step": 192 }, { "epoch": 0.1632996632996633, "grad_norm": 6.512271881103516, "learning_rate": 9.999620147596435e-07, "loss": 1.3134064674377441, "step": 194 }, { "epoch": 0.16498316498316498, "grad_norm": 4.347053050994873, "learning_rate": 9.999503868385147e-07, "loss": 1.1201355457305908, "step": 196 }, { "epoch": 0.16666666666666666, "grad_norm": 4.274275779724121, "learning_rate": 9.999372086489827e-07, "loss": 1.2217128276824951, "step": 198 }, { "epoch": 0.16835016835016836, "grad_norm": 36.957733154296875, "learning_rate": 9.999224802364522e-07, "loss": 0.9089727997779846, "step": 200 }, { "epoch": 0.17003367003367004, "grad_norm": 10.688148498535156, "learning_rate": 9.999062016516683e-07, "loss": 0.9836642742156982, "step": 202 }, { "epoch": 0.1717171717171717, "grad_norm": 5.000755310058594, "learning_rate": 9.998883729507182e-07, "loss": 1.0589679479599, "step": 204 }, { "epoch": 0.1734006734006734, "grad_norm": 3.18554425239563, "learning_rate": 9.998689941950286e-07, "loss": 1.1106410026550293, "step": 206 }, { "epoch": 0.1750841750841751, "grad_norm": 3.399953842163086, "learning_rate": 9.99848065451368e-07, "loss": 1.259597897529602, "step": 208 }, { "epoch": 0.17676767676767677, "grad_norm": 34.06399917602539, "learning_rate": 9.998255867918447e-07, "loss": 0.7958086729049683, "step": 210 }, { "epoch": 0.17845117845117844, "grad_norm": 11.635184288024902, "learning_rate": 9.99801558293907e-07, "loss": 0.974760115146637, "step": 212 }, { "epoch": 0.18013468013468015, "grad_norm": 3.804048776626587, "learning_rate": 9.997759800403432e-07, "loss": 1.4053202867507935, "step": 214 }, { "epoch": 0.18181818181818182, "grad_norm": 3.969377279281616, "learning_rate": 9.99748852119281e-07, "loss": 0.8879891633987427, "step": 216 }, { "epoch": 0.1835016835016835, "grad_norm": 13.216470718383789, "learning_rate": 9.997201746241877e-07, "loss": 0.7051749229431152, "step": 218 }, { "epoch": 0.18518518518518517, "grad_norm": 21.844314575195312, "learning_rate": 9.996899476538694e-07, "loss": 1.4015233516693115, "step": 220 }, { "epoch": 0.18686868686868688, "grad_norm": 4.534096717834473, "learning_rate": 9.996581713124706e-07, "loss": 0.972633957862854, "step": 222 }, { "epoch": 0.18855218855218855, "grad_norm": 3.273697853088379, "learning_rate": 9.99624845709474e-07, "loss": 1.2434642314910889, "step": 224 }, { "epoch": 0.19023569023569023, "grad_norm": 4.797500133514404, "learning_rate": 9.995899709597006e-07, "loss": 1.0040223598480225, "step": 226 }, { "epoch": 0.1919191919191919, "grad_norm": 12.437410354614258, "learning_rate": 9.995535471833086e-07, "loss": 1.2370095252990723, "step": 228 }, { "epoch": 0.1936026936026936, "grad_norm": 7.460165023803711, "learning_rate": 9.995155745057929e-07, "loss": 1.4212405681610107, "step": 230 }, { "epoch": 0.19528619528619529, "grad_norm": 9.647342681884766, "learning_rate": 9.994760530579857e-07, "loss": 1.1002936363220215, "step": 232 }, { "epoch": 0.19696969696969696, "grad_norm": 11.12820053100586, "learning_rate": 9.994349829760549e-07, "loss": 1.237018346786499, "step": 234 }, { "epoch": 0.19865319865319866, "grad_norm": 5.350140571594238, "learning_rate": 9.993923644015042e-07, "loss": 1.0195953845977783, "step": 236 }, { "epoch": 0.20033670033670034, "grad_norm": 3.050861358642578, "learning_rate": 9.993481974811725e-07, "loss": 1.22686767578125, "step": 238 }, { "epoch": 0.20202020202020202, "grad_norm": 7.857388019561768, "learning_rate": 9.993024823672335e-07, "loss": 1.0028936862945557, "step": 240 }, { "epoch": 0.2037037037037037, "grad_norm": 7.335727214813232, "learning_rate": 9.99255219217195e-07, "loss": 1.2266963720321655, "step": 242 }, { "epoch": 0.2053872053872054, "grad_norm": 6.673895359039307, "learning_rate": 9.992064081938982e-07, "loss": 1.0401980876922607, "step": 244 }, { "epoch": 0.20707070707070707, "grad_norm": 11.121489524841309, "learning_rate": 9.99156049465518e-07, "loss": 0.704534649848938, "step": 246 }, { "epoch": 0.20875420875420875, "grad_norm": 6.052087306976318, "learning_rate": 9.99104143205561e-07, "loss": 1.2733914852142334, "step": 248 }, { "epoch": 0.21043771043771045, "grad_norm": 8.680047988891602, "learning_rate": 9.990506895928664e-07, "loss": 1.0285900831222534, "step": 250 }, { "epoch": 0.21212121212121213, "grad_norm": 3.77591609954834, "learning_rate": 9.989956888116044e-07, "loss": 0.925588071346283, "step": 252 }, { "epoch": 0.2138047138047138, "grad_norm": 17.994216918945312, "learning_rate": 9.989391410512756e-07, "loss": 1.09348726272583, "step": 254 }, { "epoch": 0.21548821548821548, "grad_norm": 3.3857617378234863, "learning_rate": 9.988810465067111e-07, "loss": 1.2375221252441406, "step": 256 }, { "epoch": 0.21717171717171718, "grad_norm": 2.9572367668151855, "learning_rate": 9.988214053780707e-07, "loss": 0.8651703000068665, "step": 258 }, { "epoch": 0.21885521885521886, "grad_norm": 3.476825714111328, "learning_rate": 9.987602178708435e-07, "loss": 1.0651121139526367, "step": 260 }, { "epoch": 0.22053872053872053, "grad_norm": 3.713834047317505, "learning_rate": 9.986974841958463e-07, "loss": 1.0779788494110107, "step": 262 }, { "epoch": 0.2222222222222222, "grad_norm": 4.407167911529541, "learning_rate": 9.986332045692227e-07, "loss": 1.1462655067443848, "step": 264 }, { "epoch": 0.2239057239057239, "grad_norm": 3.255230665206909, "learning_rate": 9.98567379212443e-07, "loss": 1.245474100112915, "step": 266 }, { "epoch": 0.2255892255892256, "grad_norm": 30.358354568481445, "learning_rate": 9.985000083523037e-07, "loss": 0.6667277216911316, "step": 268 }, { "epoch": 0.22727272727272727, "grad_norm": 5.595312595367432, "learning_rate": 9.984310922209254e-07, "loss": 1.0221211910247803, "step": 270 }, { "epoch": 0.22895622895622897, "grad_norm": 16.317052841186523, "learning_rate": 9.983606310557533e-07, "loss": 1.3395957946777344, "step": 272 }, { "epoch": 0.23063973063973064, "grad_norm": 11.24964714050293, "learning_rate": 9.982886250995556e-07, "loss": 1.1954050064086914, "step": 274 }, { "epoch": 0.23232323232323232, "grad_norm": 44.74198913574219, "learning_rate": 9.982150746004232e-07, "loss": 0.9265189170837402, "step": 276 }, { "epoch": 0.234006734006734, "grad_norm": 3.8386383056640625, "learning_rate": 9.981399798117685e-07, "loss": 1.198085069656372, "step": 278 }, { "epoch": 0.2356902356902357, "grad_norm": 44.37248992919922, "learning_rate": 9.980633409923247e-07, "loss": 1.0136717557907104, "step": 280 }, { "epoch": 0.23737373737373738, "grad_norm": 7.57785701751709, "learning_rate": 9.979851584061449e-07, "loss": 0.9574207663536072, "step": 282 }, { "epoch": 0.23905723905723905, "grad_norm": 8.24811840057373, "learning_rate": 9.97905432322601e-07, "loss": 1.3114678859710693, "step": 284 }, { "epoch": 0.24074074074074073, "grad_norm": 5.775442600250244, "learning_rate": 9.978241630163826e-07, "loss": 0.9548346400260925, "step": 286 }, { "epoch": 0.24242424242424243, "grad_norm": 11.149243354797363, "learning_rate": 9.977413507674968e-07, "loss": 0.8632457852363586, "step": 288 }, { "epoch": 0.2441077441077441, "grad_norm": 78.07566833496094, "learning_rate": 9.976569958612667e-07, "loss": 1.2243592739105225, "step": 290 }, { "epoch": 0.24579124579124578, "grad_norm": 4.65302848815918, "learning_rate": 9.975710985883304e-07, "loss": 0.6913841366767883, "step": 292 }, { "epoch": 0.2474747474747475, "grad_norm": 15.239048957824707, "learning_rate": 9.974836592446402e-07, "loss": 1.3095204830169678, "step": 294 }, { "epoch": 0.24915824915824916, "grad_norm": 13.059560775756836, "learning_rate": 9.973946781314614e-07, "loss": 1.106144666671753, "step": 296 }, { "epoch": 0.25084175084175087, "grad_norm": 5.432850360870361, "learning_rate": 9.973041555553712e-07, "loss": 0.6466901898384094, "step": 298 }, { "epoch": 0.25252525252525254, "grad_norm": 9.237662315368652, "learning_rate": 9.972120918282583e-07, "loss": 0.8612852096557617, "step": 300 }, { "epoch": 0.2542087542087542, "grad_norm": 19.600900650024414, "learning_rate": 9.971184872673208e-07, "loss": 1.105349063873291, "step": 302 }, { "epoch": 0.2558922558922559, "grad_norm": 40.91580581665039, "learning_rate": 9.970233421950659e-07, "loss": 0.9198004603385925, "step": 304 }, { "epoch": 0.25757575757575757, "grad_norm": 4.66962194442749, "learning_rate": 9.969266569393081e-07, "loss": 1.3845856189727783, "step": 306 }, { "epoch": 0.25925925925925924, "grad_norm": 60.427490234375, "learning_rate": 9.968284318331692e-07, "loss": 1.1327593326568604, "step": 308 }, { "epoch": 0.2609427609427609, "grad_norm": 22.725788116455078, "learning_rate": 9.967286672150757e-07, "loss": 1.1523091793060303, "step": 310 }, { "epoch": 0.26262626262626265, "grad_norm": 24.43414878845215, "learning_rate": 9.96627363428759e-07, "loss": 1.234093189239502, "step": 312 }, { "epoch": 0.26430976430976433, "grad_norm": 3.773989200592041, "learning_rate": 9.965245208232528e-07, "loss": 1.123462200164795, "step": 314 }, { "epoch": 0.265993265993266, "grad_norm": 4.06792688369751, "learning_rate": 9.964201397528935e-07, "loss": 1.274748682975769, "step": 316 }, { "epoch": 0.2676767676767677, "grad_norm": 6.183606147766113, "learning_rate": 9.963142205773178e-07, "loss": 1.0359277725219727, "step": 318 }, { "epoch": 0.26936026936026936, "grad_norm": 17.0985164642334, "learning_rate": 9.962067636614617e-07, "loss": 0.7821587920188904, "step": 320 }, { "epoch": 0.27104377104377103, "grad_norm": 8.39433765411377, "learning_rate": 9.960977693755597e-07, "loss": 1.007806420326233, "step": 322 }, { "epoch": 0.2727272727272727, "grad_norm": 6.79010534286499, "learning_rate": 9.959872380951425e-07, "loss": 1.306843638420105, "step": 324 }, { "epoch": 0.27441077441077444, "grad_norm": 3.4290051460266113, "learning_rate": 9.958751702010373e-07, "loss": 1.0737717151641846, "step": 326 }, { "epoch": 0.2760942760942761, "grad_norm": 3.778372287750244, "learning_rate": 9.957615660793653e-07, "loss": 0.842218816280365, "step": 328 }, { "epoch": 0.2777777777777778, "grad_norm": 4.193020343780518, "learning_rate": 9.9564642612154e-07, "loss": 0.9259565472602844, "step": 330 }, { "epoch": 0.27946127946127947, "grad_norm": 5.208146572113037, "learning_rate": 9.955297507242673e-07, "loss": 1.1419891119003296, "step": 332 }, { "epoch": 0.28114478114478114, "grad_norm": 5.717302322387695, "learning_rate": 9.95411540289543e-07, "loss": 1.1330386400222778, "step": 334 }, { "epoch": 0.2828282828282828, "grad_norm": 5.831217288970947, "learning_rate": 9.952917952246516e-07, "loss": 1.0413146018981934, "step": 336 }, { "epoch": 0.2845117845117845, "grad_norm": 3.645052433013916, "learning_rate": 9.951705159421654e-07, "loss": 1.235117793083191, "step": 338 }, { "epoch": 0.28619528619528617, "grad_norm": 22.020658493041992, "learning_rate": 9.950477028599428e-07, "loss": 1.043231725692749, "step": 340 }, { "epoch": 0.2878787878787879, "grad_norm": 21.875652313232422, "learning_rate": 9.94923356401126e-07, "loss": 1.175392985343933, "step": 342 }, { "epoch": 0.2895622895622896, "grad_norm": 27.17024803161621, "learning_rate": 9.947974769941413e-07, "loss": 0.9123649001121521, "step": 344 }, { "epoch": 0.29124579124579125, "grad_norm": 6.494509220123291, "learning_rate": 9.946700650726963e-07, "loss": 1.1428461074829102, "step": 346 }, { "epoch": 0.29292929292929293, "grad_norm": 6.450991630554199, "learning_rate": 9.94541121075778e-07, "loss": 1.08597731590271, "step": 348 }, { "epoch": 0.2946127946127946, "grad_norm": 4.014670372009277, "learning_rate": 9.944106454476535e-07, "loss": 0.8208044171333313, "step": 350 }, { "epoch": 0.2962962962962963, "grad_norm": 5.806086540222168, "learning_rate": 9.94278638637866e-07, "loss": 0.6253402829170227, "step": 352 }, { "epoch": 0.29797979797979796, "grad_norm": 3.053389310836792, "learning_rate": 9.941451011012342e-07, "loss": 1.0509334802627563, "step": 354 }, { "epoch": 0.2996632996632997, "grad_norm": 7.8727521896362305, "learning_rate": 9.940100332978513e-07, "loss": 1.0956045389175415, "step": 356 }, { "epoch": 0.30134680134680136, "grad_norm": 32.331748962402344, "learning_rate": 9.938734356930828e-07, "loss": 1.004880666732788, "step": 358 }, { "epoch": 0.30303030303030304, "grad_norm": 4.172276020050049, "learning_rate": 9.93735308757565e-07, "loss": 0.8379921317100525, "step": 360 }, { "epoch": 0.3047138047138047, "grad_norm": 6.15704870223999, "learning_rate": 9.93595652967203e-07, "loss": 1.0078097581863403, "step": 362 }, { "epoch": 0.3063973063973064, "grad_norm": 4.966274261474609, "learning_rate": 9.9345446880317e-07, "loss": 1.2672780752182007, "step": 364 }, { "epoch": 0.30808080808080807, "grad_norm": 8.712943077087402, "learning_rate": 9.933117567519048e-07, "loss": 0.8534368276596069, "step": 366 }, { "epoch": 0.30976430976430974, "grad_norm": 6.715219020843506, "learning_rate": 9.931675173051105e-07, "loss": 0.8929988145828247, "step": 368 }, { "epoch": 0.3114478114478115, "grad_norm": 8.526223182678223, "learning_rate": 9.930217509597527e-07, "loss": 1.1088082790374756, "step": 370 }, { "epoch": 0.31313131313131315, "grad_norm": 13.495247840881348, "learning_rate": 9.928744582180574e-07, "loss": 1.2500221729278564, "step": 372 }, { "epoch": 0.3148148148148148, "grad_norm": 23.23642921447754, "learning_rate": 9.927256395875107e-07, "loss": 0.7106721997261047, "step": 374 }, { "epoch": 0.3164983164983165, "grad_norm": 6.651264190673828, "learning_rate": 9.925752955808548e-07, "loss": 1.0243923664093018, "step": 376 }, { "epoch": 0.3181818181818182, "grad_norm": 5.95202112197876, "learning_rate": 9.924234267160885e-07, "loss": 1.370633840560913, "step": 378 }, { "epoch": 0.31986531986531985, "grad_norm": 11.883193016052246, "learning_rate": 9.922700335164638e-07, "loss": 0.7322716116905212, "step": 380 }, { "epoch": 0.32154882154882153, "grad_norm": 7.471388816833496, "learning_rate": 9.92115116510485e-07, "loss": 0.913710355758667, "step": 382 }, { "epoch": 0.32323232323232326, "grad_norm": 44.468502044677734, "learning_rate": 9.919586762319058e-07, "loss": 1.1375393867492676, "step": 384 }, { "epoch": 0.32491582491582494, "grad_norm": 73.37066650390625, "learning_rate": 9.918007132197294e-07, "loss": 0.750845193862915, "step": 386 }, { "epoch": 0.3265993265993266, "grad_norm": 13.170440673828125, "learning_rate": 9.916412280182047e-07, "loss": 0.9285147190093994, "step": 388 }, { "epoch": 0.3282828282828283, "grad_norm": 27.329137802124023, "learning_rate": 9.91480221176825e-07, "loss": 1.1141570806503296, "step": 390 }, { "epoch": 0.32996632996632996, "grad_norm": 23.576858520507812, "learning_rate": 9.913176932503269e-07, "loss": 0.8426070809364319, "step": 392 }, { "epoch": 0.33164983164983164, "grad_norm": 4.582382678985596, "learning_rate": 9.911536447986874e-07, "loss": 1.3466606140136719, "step": 394 }, { "epoch": 0.3333333333333333, "grad_norm": 18.690176010131836, "learning_rate": 9.909880763871225e-07, "loss": 1.2158761024475098, "step": 396 }, { "epoch": 0.335016835016835, "grad_norm": 12.741125106811523, "learning_rate": 9.90820988586085e-07, "loss": 0.7843135595321655, "step": 398 }, { "epoch": 0.3367003367003367, "grad_norm": 8.261248588562012, "learning_rate": 9.906523819712627e-07, "loss": 0.9648294448852539, "step": 400 }, { "epoch": 0.3383838383838384, "grad_norm": 13.866211891174316, "learning_rate": 9.904822571235764e-07, "loss": 0.9860712289810181, "step": 402 }, { "epoch": 0.3400673400673401, "grad_norm": 7.611033916473389, "learning_rate": 9.903106146291776e-07, "loss": 1.0380196571350098, "step": 404 }, { "epoch": 0.34175084175084175, "grad_norm": 4.44096565246582, "learning_rate": 9.901374550794471e-07, "loss": 1.0885226726531982, "step": 406 }, { "epoch": 0.3434343434343434, "grad_norm": 7.336009502410889, "learning_rate": 9.899627790709922e-07, "loss": 0.978155255317688, "step": 408 }, { "epoch": 0.3451178451178451, "grad_norm": 35.349571228027344, "learning_rate": 9.897865872056454e-07, "loss": 0.5597323179244995, "step": 410 }, { "epoch": 0.3468013468013468, "grad_norm": 5.807060718536377, "learning_rate": 9.896088800904617e-07, "loss": 0.8961684703826904, "step": 412 }, { "epoch": 0.3484848484848485, "grad_norm": 18.415029525756836, "learning_rate": 9.894296583377171e-07, "loss": 0.9247993230819702, "step": 414 }, { "epoch": 0.3501683501683502, "grad_norm": 16.985078811645508, "learning_rate": 9.892489225649058e-07, "loss": 1.2044103145599365, "step": 416 }, { "epoch": 0.35185185185185186, "grad_norm": 6.910268306732178, "learning_rate": 9.890666733947386e-07, "loss": 0.7405315637588501, "step": 418 }, { "epoch": 0.35353535353535354, "grad_norm": 9.06907844543457, "learning_rate": 9.888829114551404e-07, "loss": 0.9250643253326416, "step": 420 }, { "epoch": 0.3552188552188552, "grad_norm": 10.192124366760254, "learning_rate": 9.886976373792488e-07, "loss": 1.1218069791793823, "step": 422 }, { "epoch": 0.3569023569023569, "grad_norm": 9.159024238586426, "learning_rate": 9.885108518054106e-07, "loss": 0.6351463794708252, "step": 424 }, { "epoch": 0.35858585858585856, "grad_norm": 29.38273811340332, "learning_rate": 9.883225553771807e-07, "loss": 1.0669465065002441, "step": 426 }, { "epoch": 0.3602693602693603, "grad_norm": 8.669297218322754, "learning_rate": 9.881327487433198e-07, "loss": 0.8117149472236633, "step": 428 }, { "epoch": 0.36195286195286197, "grad_norm": 6.67222785949707, "learning_rate": 9.879414325577916e-07, "loss": 1.2592154741287231, "step": 430 }, { "epoch": 0.36363636363636365, "grad_norm": 6.638124942779541, "learning_rate": 9.877486074797602e-07, "loss": 0.9993456602096558, "step": 432 }, { "epoch": 0.3653198653198653, "grad_norm": 3.7449495792388916, "learning_rate": 9.8755427417359e-07, "loss": 0.8662674427032471, "step": 434 }, { "epoch": 0.367003367003367, "grad_norm": 4.553740978240967, "learning_rate": 9.873584333088407e-07, "loss": 1.0476055145263672, "step": 436 }, { "epoch": 0.3686868686868687, "grad_norm": 9.034341812133789, "learning_rate": 9.871610855602662e-07, "loss": 1.1130859851837158, "step": 438 }, { "epoch": 0.37037037037037035, "grad_norm": 7.609111785888672, "learning_rate": 9.869622316078128e-07, "loss": 0.9781308770179749, "step": 440 }, { "epoch": 0.3720538720538721, "grad_norm": 15.675320625305176, "learning_rate": 9.86761872136616e-07, "loss": 0.9868993759155273, "step": 442 }, { "epoch": 0.37373737373737376, "grad_norm": 4.52480936050415, "learning_rate": 9.865600078369985e-07, "loss": 0.7887587547302246, "step": 444 }, { "epoch": 0.37542087542087543, "grad_norm": 21.339006423950195, "learning_rate": 9.863566394044677e-07, "loss": 0.6558203101158142, "step": 446 }, { "epoch": 0.3771043771043771, "grad_norm": 5.116230010986328, "learning_rate": 9.861517675397135e-07, "loss": 1.1714262962341309, "step": 448 }, { "epoch": 0.3787878787878788, "grad_norm": 16.112041473388672, "learning_rate": 9.859453929486054e-07, "loss": 1.1047420501708984, "step": 450 }, { "epoch": 0.38047138047138046, "grad_norm": 3.787045478820801, "learning_rate": 9.857375163421912e-07, "loss": 0.7425003051757812, "step": 452 }, { "epoch": 0.38215488215488214, "grad_norm": 11.478412628173828, "learning_rate": 9.855281384366928e-07, "loss": 1.0151433944702148, "step": 454 }, { "epoch": 0.3838383838383838, "grad_norm": 3.6095988750457764, "learning_rate": 9.853172599535054e-07, "loss": 0.8090977668762207, "step": 456 }, { "epoch": 0.38552188552188554, "grad_norm": 7.952422618865967, "learning_rate": 9.85104881619194e-07, "loss": 0.9961310625076294, "step": 458 }, { "epoch": 0.3872053872053872, "grad_norm": 2.0787007808685303, "learning_rate": 9.848910041654915e-07, "loss": 1.1424083709716797, "step": 460 }, { "epoch": 0.3888888888888889, "grad_norm": 3.276982545852661, "learning_rate": 9.846756283292955e-07, "loss": 0.8972825407981873, "step": 462 }, { "epoch": 0.39057239057239057, "grad_norm": 6.26957368850708, "learning_rate": 9.844587548526665e-07, "loss": 0.8542879223823547, "step": 464 }, { "epoch": 0.39225589225589225, "grad_norm": 32.88930892944336, "learning_rate": 9.842403844828249e-07, "loss": 0.9769890308380127, "step": 466 }, { "epoch": 0.3939393939393939, "grad_norm": 10.898834228515625, "learning_rate": 9.840205179721486e-07, "loss": 0.9689866304397583, "step": 468 }, { "epoch": 0.3956228956228956, "grad_norm": 3.45035457611084, "learning_rate": 9.837991560781698e-07, "loss": 0.9729927778244019, "step": 470 }, { "epoch": 0.39730639730639733, "grad_norm": 7.222962379455566, "learning_rate": 9.835762995635739e-07, "loss": 0.8332297801971436, "step": 472 }, { "epoch": 0.398989898989899, "grad_norm": 3.249415636062622, "learning_rate": 9.833519491961951e-07, "loss": 1.0173261165618896, "step": 474 }, { "epoch": 0.4006734006734007, "grad_norm": 6.285678863525391, "learning_rate": 9.831261057490148e-07, "loss": 0.7735811471939087, "step": 476 }, { "epoch": 0.40235690235690236, "grad_norm": 3.5245249271392822, "learning_rate": 9.82898770000159e-07, "loss": 0.9958957433700562, "step": 478 }, { "epoch": 0.40404040404040403, "grad_norm": 13.678420066833496, "learning_rate": 9.826699427328944e-07, "loss": 1.0717885494232178, "step": 480 }, { "epoch": 0.4057239057239057, "grad_norm": 12.059322357177734, "learning_rate": 9.824396247356276e-07, "loss": 1.0886049270629883, "step": 482 }, { "epoch": 0.4074074074074074, "grad_norm": 9.42127513885498, "learning_rate": 9.822078168019012e-07, "loss": 0.8954146504402161, "step": 484 }, { "epoch": 0.4090909090909091, "grad_norm": 13.108272552490234, "learning_rate": 9.819745197303907e-07, "loss": 0.881049633026123, "step": 486 }, { "epoch": 0.4107744107744108, "grad_norm": 3.574754238128662, "learning_rate": 9.817397343249028e-07, "loss": 1.1146478652954102, "step": 488 }, { "epoch": 0.41245791245791247, "grad_norm": 3.4290618896484375, "learning_rate": 9.815034613943722e-07, "loss": 1.118224859237671, "step": 490 }, { "epoch": 0.41414141414141414, "grad_norm": 16.978740692138672, "learning_rate": 9.812657017528584e-07, "loss": 1.0728644132614136, "step": 492 }, { "epoch": 0.4158249158249158, "grad_norm": 5.449537754058838, "learning_rate": 9.810264562195432e-07, "loss": 0.9440809488296509, "step": 494 }, { "epoch": 0.4175084175084175, "grad_norm": 2.756265640258789, "learning_rate": 9.807857256187283e-07, "loss": 1.1065900325775146, "step": 496 }, { "epoch": 0.41919191919191917, "grad_norm": 3.4030373096466064, "learning_rate": 9.805435107798322e-07, "loss": 1.0974758863449097, "step": 498 }, { "epoch": 0.4208754208754209, "grad_norm": 9.233179092407227, "learning_rate": 9.802998125373864e-07, "loss": 0.851800799369812, "step": 500 }, { "epoch": 0.4225589225589226, "grad_norm": 11.157843589782715, "learning_rate": 9.800546317310343e-07, "loss": 0.6602354645729065, "step": 502 }, { "epoch": 0.42424242424242425, "grad_norm": 15.149531364440918, "learning_rate": 9.798079692055267e-07, "loss": 0.9472991228103638, "step": 504 }, { "epoch": 0.42592592592592593, "grad_norm": 30.058595657348633, "learning_rate": 9.7955982581072e-07, "loss": 0.9938538670539856, "step": 506 }, { "epoch": 0.4276094276094276, "grad_norm": 23.41927719116211, "learning_rate": 9.793102024015724e-07, "loss": 1.4200940132141113, "step": 508 }, { "epoch": 0.4292929292929293, "grad_norm": 3.9220423698425293, "learning_rate": 9.790590998381417e-07, "loss": 1.0478514432907104, "step": 510 }, { "epoch": 0.43097643097643096, "grad_norm": 3.723065137863159, "learning_rate": 9.788065189855817e-07, "loss": 1.2064735889434814, "step": 512 }, { "epoch": 0.43265993265993263, "grad_norm": 3.486267566680908, "learning_rate": 9.7855246071414e-07, "loss": 1.140267014503479, "step": 514 }, { "epoch": 0.43434343434343436, "grad_norm": 8.95257568359375, "learning_rate": 9.78296925899154e-07, "loss": 1.0755705833435059, "step": 516 }, { "epoch": 0.43602693602693604, "grad_norm": 4.213111400604248, "learning_rate": 9.780399154210487e-07, "loss": 1.0637681484222412, "step": 518 }, { "epoch": 0.4377104377104377, "grad_norm": 26.23670196533203, "learning_rate": 9.777814301653336e-07, "loss": 0.9591152667999268, "step": 520 }, { "epoch": 0.4393939393939394, "grad_norm": 2.839754343032837, "learning_rate": 9.775214710225987e-07, "loss": 0.8415237665176392, "step": 522 }, { "epoch": 0.44107744107744107, "grad_norm": 5.952809810638428, "learning_rate": 9.77260038888513e-07, "loss": 1.133270502090454, "step": 524 }, { "epoch": 0.44276094276094274, "grad_norm": 8.995283126831055, "learning_rate": 9.769971346638203e-07, "loss": 0.7777677774429321, "step": 526 }, { "epoch": 0.4444444444444444, "grad_norm": 3.4373066425323486, "learning_rate": 9.767327592543359e-07, "loss": 1.2248082160949707, "step": 528 }, { "epoch": 0.44612794612794615, "grad_norm": 7.905541896820068, "learning_rate": 9.764669135709443e-07, "loss": 0.8326348066329956, "step": 530 }, { "epoch": 0.4478114478114478, "grad_norm": 2.997097969055176, "learning_rate": 9.76199598529596e-07, "loss": 0.8697119355201721, "step": 532 }, { "epoch": 0.4494949494949495, "grad_norm": 3.4758172035217285, "learning_rate": 9.759308150513039e-07, "loss": 0.9715222716331482, "step": 534 }, { "epoch": 0.4511784511784512, "grad_norm": 4.66405725479126, "learning_rate": 9.756605640621397e-07, "loss": 1.2556489706039429, "step": 536 }, { "epoch": 0.45286195286195285, "grad_norm": 11.930469512939453, "learning_rate": 9.753888464932322e-07, "loss": 1.1018869876861572, "step": 538 }, { "epoch": 0.45454545454545453, "grad_norm": 13.772843360900879, "learning_rate": 9.751156632807626e-07, "loss": 0.8878042101860046, "step": 540 }, { "epoch": 0.4562289562289562, "grad_norm": 4.206384181976318, "learning_rate": 9.748410153659618e-07, "loss": 1.0389076471328735, "step": 542 }, { "epoch": 0.45791245791245794, "grad_norm": 3.624582052230835, "learning_rate": 9.745649036951079e-07, "loss": 1.1431198120117188, "step": 544 }, { "epoch": 0.4595959595959596, "grad_norm": 12.98609733581543, "learning_rate": 9.742873292195213e-07, "loss": 1.1605827808380127, "step": 546 }, { "epoch": 0.4612794612794613, "grad_norm": 7.910975456237793, "learning_rate": 9.740082928955634e-07, "loss": 1.3202755451202393, "step": 548 }, { "epoch": 0.46296296296296297, "grad_norm": 5.325044631958008, "learning_rate": 9.737277956846313e-07, "loss": 0.9252653121948242, "step": 550 }, { "epoch": 0.46464646464646464, "grad_norm": 14.551304817199707, "learning_rate": 9.73445838553156e-07, "loss": 0.876882791519165, "step": 552 }, { "epoch": 0.4663299663299663, "grad_norm": 3.202234983444214, "learning_rate": 9.731624224725986e-07, "loss": 1.0558652877807617, "step": 554 }, { "epoch": 0.468013468013468, "grad_norm": 4.0583086013793945, "learning_rate": 9.728775484194464e-07, "loss": 0.740475594997406, "step": 556 }, { "epoch": 0.4696969696969697, "grad_norm": 3.9330027103424072, "learning_rate": 9.725912173752106e-07, "loss": 1.2117640972137451, "step": 558 }, { "epoch": 0.4713804713804714, "grad_norm": 7.207095146179199, "learning_rate": 9.723034303264225e-07, "loss": 0.4382402002811432, "step": 560 }, { "epoch": 0.4730639730639731, "grad_norm": 4.947695255279541, "learning_rate": 9.72014188264629e-07, "loss": 0.6228041648864746, "step": 562 }, { "epoch": 0.47474747474747475, "grad_norm": 9.088849067687988, "learning_rate": 9.71723492186391e-07, "loss": 1.3076156377792358, "step": 564 }, { "epoch": 0.4764309764309764, "grad_norm": 4.49135160446167, "learning_rate": 9.714313430932785e-07, "loss": 1.1357098817825317, "step": 566 }, { "epoch": 0.4781144781144781, "grad_norm": 9.188185691833496, "learning_rate": 9.711377419918683e-07, "loss": 0.4768811762332916, "step": 568 }, { "epoch": 0.4797979797979798, "grad_norm": 23.397979736328125, "learning_rate": 9.708426898937399e-07, "loss": 1.1221351623535156, "step": 570 }, { "epoch": 0.48148148148148145, "grad_norm": 27.77615737915039, "learning_rate": 9.705461878154714e-07, "loss": 0.7149933576583862, "step": 572 }, { "epoch": 0.4831649831649832, "grad_norm": 4.684352874755859, "learning_rate": 9.702482367786377e-07, "loss": 0.9776611924171448, "step": 574 }, { "epoch": 0.48484848484848486, "grad_norm": 7.567526817321777, "learning_rate": 9.699488378098055e-07, "loss": 0.8799599409103394, "step": 576 }, { "epoch": 0.48653198653198654, "grad_norm": 9.130019187927246, "learning_rate": 9.696479919405298e-07, "loss": 1.1031641960144043, "step": 578 }, { "epoch": 0.4882154882154882, "grad_norm": 9.574334144592285, "learning_rate": 9.693457002073517e-07, "loss": 0.8267420530319214, "step": 580 }, { "epoch": 0.4898989898989899, "grad_norm": 4.069400787353516, "learning_rate": 9.69041963651793e-07, "loss": 1.3716950416564941, "step": 582 }, { "epoch": 0.49158249158249157, "grad_norm": 4.066318988800049, "learning_rate": 9.68736783320354e-07, "loss": 1.017892837524414, "step": 584 }, { "epoch": 0.49326599326599324, "grad_norm": 2.714144706726074, "learning_rate": 9.684301602645098e-07, "loss": 0.861703097820282, "step": 586 }, { "epoch": 0.494949494949495, "grad_norm": 3.8651719093322754, "learning_rate": 9.681220955407053e-07, "loss": 0.6647518873214722, "step": 588 }, { "epoch": 0.49663299663299665, "grad_norm": 3.4340827465057373, "learning_rate": 9.67812590210353e-07, "loss": 1.1181421279907227, "step": 590 }, { "epoch": 0.4983164983164983, "grad_norm": 3.8552682399749756, "learning_rate": 9.675016453398296e-07, "loss": 1.1666280031204224, "step": 592 }, { "epoch": 0.5, "grad_norm": 13.408713340759277, "learning_rate": 9.671892620004706e-07, "loss": 0.8374857902526855, "step": 594 }, { "epoch": 0.5016835016835017, "grad_norm": 7.0116424560546875, "learning_rate": 9.66875441268568e-07, "loss": 0.960757851600647, "step": 596 }, { "epoch": 0.5033670033670034, "grad_norm": 2.764244556427002, "learning_rate": 9.665601842253666e-07, "loss": 1.3247270584106445, "step": 598 }, { "epoch": 0.5050505050505051, "grad_norm": 13.236382484436035, "learning_rate": 9.662434919570592e-07, "loss": 0.8124715685844421, "step": 600 }, { "epoch": 0.5067340067340067, "grad_norm": 39.5108528137207, "learning_rate": 9.659253655547843e-07, "loss": 1.0799833536148071, "step": 602 }, { "epoch": 0.5084175084175084, "grad_norm": 13.359992027282715, "learning_rate": 9.656058061146207e-07, "loss": 1.0351530313491821, "step": 604 }, { "epoch": 0.51010101010101, "grad_norm": 4.374532699584961, "learning_rate": 9.652848147375853e-07, "loss": 1.1660369634628296, "step": 606 }, { "epoch": 0.5117845117845118, "grad_norm": 7.170238018035889, "learning_rate": 9.649623925296288e-07, "loss": 0.6313941478729248, "step": 608 }, { "epoch": 0.5134680134680135, "grad_norm": 2.792412519454956, "learning_rate": 9.646385406016313e-07, "loss": 0.9415972232818604, "step": 610 }, { "epoch": 0.5151515151515151, "grad_norm": 13.598429679870605, "learning_rate": 9.643132600693983e-07, "loss": 0.9117315411567688, "step": 612 }, { "epoch": 0.5168350168350169, "grad_norm": 4.011415481567383, "learning_rate": 9.639865520536588e-07, "loss": 0.7065603137016296, "step": 614 }, { "epoch": 0.5185185185185185, "grad_norm": 9.801816940307617, "learning_rate": 9.636584176800593e-07, "loss": 1.1204071044921875, "step": 616 }, { "epoch": 0.5202020202020202, "grad_norm": 10.913689613342285, "learning_rate": 9.633288580791603e-07, "loss": 1.031501054763794, "step": 618 }, { "epoch": 0.5218855218855218, "grad_norm": 2.2291688919067383, "learning_rate": 9.62997874386434e-07, "loss": 1.0308109521865845, "step": 620 }, { "epoch": 0.5235690235690236, "grad_norm": 12.420637130737305, "learning_rate": 9.62665467742258e-07, "loss": 1.0678637027740479, "step": 622 }, { "epoch": 0.5252525252525253, "grad_norm": 17.982452392578125, "learning_rate": 9.623316392919132e-07, "loss": 0.7635082006454468, "step": 624 }, { "epoch": 0.5269360269360269, "grad_norm": 31.17810821533203, "learning_rate": 9.619963901855789e-07, "loss": 0.9803504943847656, "step": 626 }, { "epoch": 0.5286195286195287, "grad_norm": 3.1647303104400635, "learning_rate": 9.616597215783295e-07, "loss": 0.8586722612380981, "step": 628 }, { "epoch": 0.5303030303030303, "grad_norm": 10.497335433959961, "learning_rate": 9.6132163463013e-07, "loss": 0.7892797589302063, "step": 630 }, { "epoch": 0.531986531986532, "grad_norm": 11.274188995361328, "learning_rate": 9.609821305058324e-07, "loss": 1.1465822458267212, "step": 632 }, { "epoch": 0.5336700336700336, "grad_norm": 4.127675533294678, "learning_rate": 9.606412103751707e-07, "loss": 0.9373839497566223, "step": 634 }, { "epoch": 0.5353535353535354, "grad_norm": 4.121032238006592, "learning_rate": 9.602988754127585e-07, "loss": 0.8166585564613342, "step": 636 }, { "epoch": 0.5370370370370371, "grad_norm": 29.52313804626465, "learning_rate": 9.59955126798084e-07, "loss": 1.0028636455535889, "step": 638 }, { "epoch": 0.5387205387205387, "grad_norm": 4.636293888092041, "learning_rate": 9.596099657155056e-07, "loss": 0.8631769418716431, "step": 640 }, { "epoch": 0.5404040404040404, "grad_norm": 2.6743357181549072, "learning_rate": 9.592633933542484e-07, "loss": 0.9822747707366943, "step": 642 }, { "epoch": 0.5420875420875421, "grad_norm": 12.097616195678711, "learning_rate": 9.589154109084e-07, "loss": 0.9199867844581604, "step": 644 }, { "epoch": 0.5437710437710438, "grad_norm": 4.201647758483887, "learning_rate": 9.585660195769066e-07, "loss": 0.9225333333015442, "step": 646 }, { "epoch": 0.5454545454545454, "grad_norm": 7.826382160186768, "learning_rate": 9.582152205635682e-07, "loss": 1.0213161706924438, "step": 648 }, { "epoch": 0.5471380471380471, "grad_norm": 8.643582344055176, "learning_rate": 9.578630150770348e-07, "loss": 1.1659046411514282, "step": 650 }, { "epoch": 0.5488215488215489, "grad_norm": 16.885889053344727, "learning_rate": 9.575094043308027e-07, "loss": 1.0685768127441406, "step": 652 }, { "epoch": 0.5505050505050505, "grad_norm": 3.666364908218384, "learning_rate": 9.5715438954321e-07, "loss": 1.0853323936462402, "step": 654 }, { "epoch": 0.5521885521885522, "grad_norm": 21.654556274414062, "learning_rate": 9.567979719374313e-07, "loss": 0.9922153353691101, "step": 656 }, { "epoch": 0.5538720538720538, "grad_norm": 7.106581211090088, "learning_rate": 9.564401527414757e-07, "loss": 0.8094037771224976, "step": 658 }, { "epoch": 0.5555555555555556, "grad_norm": 6.885115146636963, "learning_rate": 9.56080933188181e-07, "loss": 0.7689495086669922, "step": 660 }, { "epoch": 0.5572390572390572, "grad_norm": 3.9134387969970703, "learning_rate": 9.557203145152093e-07, "loss": 1.064096212387085, "step": 662 }, { "epoch": 0.5589225589225589, "grad_norm": 3.955990791320801, "learning_rate": 9.55358297965044e-07, "loss": 1.1137442588806152, "step": 664 }, { "epoch": 0.5606060606060606, "grad_norm": 4.690779209136963, "learning_rate": 9.549948847849842e-07, "loss": 0.5054531693458557, "step": 666 }, { "epoch": 0.5622895622895623, "grad_norm": 32.8538818359375, "learning_rate": 9.546300762271414e-07, "loss": 0.6846545934677124, "step": 668 }, { "epoch": 0.563973063973064, "grad_norm": 18.116151809692383, "learning_rate": 9.542638735484346e-07, "loss": 1.099835991859436, "step": 670 }, { "epoch": 0.5656565656565656, "grad_norm": 26.123899459838867, "learning_rate": 9.538962780105855e-07, "loss": 0.6106569766998291, "step": 672 }, { "epoch": 0.5673400673400674, "grad_norm": 6.80141019821167, "learning_rate": 9.535272908801164e-07, "loss": 0.6078236103057861, "step": 674 }, { "epoch": 0.569023569023569, "grad_norm": 3.6088900566101074, "learning_rate": 9.531569134283426e-07, "loss": 0.6979132890701294, "step": 676 }, { "epoch": 0.5707070707070707, "grad_norm": 35.824989318847656, "learning_rate": 9.527851469313703e-07, "loss": 1.3292642831802368, "step": 678 }, { "epoch": 0.5723905723905723, "grad_norm": 13.528051376342773, "learning_rate": 9.524119926700916e-07, "loss": 0.41806691884994507, "step": 680 }, { "epoch": 0.5740740740740741, "grad_norm": 10.345752716064453, "learning_rate": 9.520374519301801e-07, "loss": 1.0647339820861816, "step": 682 }, { "epoch": 0.5757575757575758, "grad_norm": 5.383781433105469, "learning_rate": 9.516615260020859e-07, "loss": 1.1695669889450073, "step": 684 }, { "epoch": 0.5774410774410774, "grad_norm": 4.6796770095825195, "learning_rate": 9.512842161810322e-07, "loss": 1.1320273876190186, "step": 686 }, { "epoch": 0.5791245791245792, "grad_norm": 3.494124412536621, "learning_rate": 9.509055237670101e-07, "loss": 0.8368796706199646, "step": 688 }, { "epoch": 0.5808080808080808, "grad_norm": 18.290544509887695, "learning_rate": 9.505254500647742e-07, "loss": 0.7732558250427246, "step": 690 }, { "epoch": 0.5824915824915825, "grad_norm": 3.7307989597320557, "learning_rate": 9.501439963838383e-07, "loss": 0.8185931444168091, "step": 692 }, { "epoch": 0.5841750841750841, "grad_norm": 5.913649559020996, "learning_rate": 9.497611640384712e-07, "loss": 1.0147478580474854, "step": 694 }, { "epoch": 0.5858585858585859, "grad_norm": 14.875641822814941, "learning_rate": 9.493769543476909e-07, "loss": 0.9212662577629089, "step": 696 }, { "epoch": 0.5875420875420876, "grad_norm": 10.849754333496094, "learning_rate": 9.489913686352616e-07, "loss": 0.8869191408157349, "step": 698 }, { "epoch": 0.5892255892255892, "grad_norm": 7.326023578643799, "learning_rate": 9.486044082296886e-07, "loss": 0.8455855846405029, "step": 700 }, { "epoch": 0.5909090909090909, "grad_norm": 8.260787010192871, "learning_rate": 9.48216074464213e-07, "loss": 0.944000780582428, "step": 702 }, { "epoch": 0.5925925925925926, "grad_norm": 4.983551979064941, "learning_rate": 9.47826368676808e-07, "loss": 1.0806821584701538, "step": 704 }, { "epoch": 0.5942760942760943, "grad_norm": 7.548647880554199, "learning_rate": 9.474352922101741e-07, "loss": 1.0155982971191406, "step": 706 }, { "epoch": 0.5959595959595959, "grad_norm": 9.95559024810791, "learning_rate": 9.470428464117344e-07, "loss": 0.8041818141937256, "step": 708 }, { "epoch": 0.5976430976430976, "grad_norm": 22.083297729492188, "learning_rate": 9.466490326336298e-07, "loss": 0.8329028487205505, "step": 710 }, { "epoch": 0.5993265993265994, "grad_norm": 3.1762735843658447, "learning_rate": 9.462538522327144e-07, "loss": 1.1545898914337158, "step": 712 }, { "epoch": 0.601010101010101, "grad_norm": 2.4671504497528076, "learning_rate": 9.458573065705507e-07, "loss": 1.081796407699585, "step": 714 }, { "epoch": 0.6026936026936027, "grad_norm": 4.517568111419678, "learning_rate": 9.454593970134058e-07, "loss": 0.7743735313415527, "step": 716 }, { "epoch": 0.6043771043771043, "grad_norm": 11.208656311035156, "learning_rate": 9.45060124932245e-07, "loss": 0.9187523126602173, "step": 718 }, { "epoch": 0.6060606060606061, "grad_norm": 10.369696617126465, "learning_rate": 9.446594917027293e-07, "loss": 0.965773344039917, "step": 720 }, { "epoch": 0.6077441077441077, "grad_norm": 11.875805854797363, "learning_rate": 9.442574987052082e-07, "loss": 0.9600865840911865, "step": 722 }, { "epoch": 0.6094276094276094, "grad_norm": 9.8040189743042, "learning_rate": 9.438541473247169e-07, "loss": 0.9117884635925293, "step": 724 }, { "epoch": 0.6111111111111112, "grad_norm": 61.72325134277344, "learning_rate": 9.434494389509707e-07, "loss": 1.0104196071624756, "step": 726 }, { "epoch": 0.6127946127946128, "grad_norm": 19.176124572753906, "learning_rate": 9.430433749783601e-07, "loss": 0.9295721650123596, "step": 728 }, { "epoch": 0.6144781144781145, "grad_norm": 6.085904121398926, "learning_rate": 9.426359568059465e-07, "loss": 1.1639102697372437, "step": 730 }, { "epoch": 0.6161616161616161, "grad_norm": 2.7430849075317383, "learning_rate": 9.422271858374567e-07, "loss": 1.1210119724273682, "step": 732 }, { "epoch": 0.6178451178451179, "grad_norm": 6.412540435791016, "learning_rate": 9.418170634812789e-07, "loss": 0.8046259880065918, "step": 734 }, { "epoch": 0.6195286195286195, "grad_norm": 15.164510726928711, "learning_rate": 9.41405591150457e-07, "loss": 0.8280715942382812, "step": 736 }, { "epoch": 0.6212121212121212, "grad_norm": 13.97409725189209, "learning_rate": 9.409927702626865e-07, "loss": 0.6932380199432373, "step": 738 }, { "epoch": 0.622895622895623, "grad_norm": 2.947274684906006, "learning_rate": 9.405786022403089e-07, "loss": 1.2565734386444092, "step": 740 }, { "epoch": 0.6245791245791246, "grad_norm": 4.588158130645752, "learning_rate": 9.401630885103074e-07, "loss": 1.0269739627838135, "step": 742 }, { "epoch": 0.6262626262626263, "grad_norm": 4.135093688964844, "learning_rate": 9.397462305043016e-07, "loss": 1.2328283786773682, "step": 744 }, { "epoch": 0.6279461279461279, "grad_norm": 3.079167127609253, "learning_rate": 9.393280296585427e-07, "loss": 0.968951404094696, "step": 746 }, { "epoch": 0.6296296296296297, "grad_norm": 2.28676176071167, "learning_rate": 9.389084874139085e-07, "loss": 1.2347244024276733, "step": 748 }, { "epoch": 0.6313131313131313, "grad_norm": 8.729804992675781, "learning_rate": 9.384876052158987e-07, "loss": 1.3113691806793213, "step": 750 }, { "epoch": 0.632996632996633, "grad_norm": 7.039168357849121, "learning_rate": 9.380653845146294e-07, "loss": 0.7496945858001709, "step": 752 }, { "epoch": 0.6346801346801347, "grad_norm": 14.870685577392578, "learning_rate": 9.37641826764829e-07, "loss": 1.0088348388671875, "step": 754 }, { "epoch": 0.6363636363636364, "grad_norm": 3.9592251777648926, "learning_rate": 9.372169334258315e-07, "loss": 0.7920987606048584, "step": 756 }, { "epoch": 0.6380471380471381, "grad_norm": 10.84424114227295, "learning_rate": 9.367907059615737e-07, "loss": 0.85060054063797, "step": 758 }, { "epoch": 0.6397306397306397, "grad_norm": 235.0703582763672, "learning_rate": 9.363631458405885e-07, "loss": 0.6581774353981018, "step": 760 }, { "epoch": 0.6414141414141414, "grad_norm": 5.294051647186279, "learning_rate": 9.359342545360002e-07, "loss": 0.46980541944503784, "step": 762 }, { "epoch": 0.6430976430976431, "grad_norm": 29.527233123779297, "learning_rate": 9.355040335255201e-07, "loss": 1.0372706651687622, "step": 764 }, { "epoch": 0.6447811447811448, "grad_norm": 4.027895927429199, "learning_rate": 9.350724842914403e-07, "loss": 1.104457139968872, "step": 766 }, { "epoch": 0.6464646464646465, "grad_norm": 45.400699615478516, "learning_rate": 9.346396083206297e-07, "loss": 0.8071002960205078, "step": 768 }, { "epoch": 0.6481481481481481, "grad_norm": 4.046747207641602, "learning_rate": 9.342054071045281e-07, "loss": 0.8214056491851807, "step": 770 }, { "epoch": 0.6498316498316499, "grad_norm": 4.489753723144531, "learning_rate": 9.337698821391413e-07, "loss": 0.8206263780593872, "step": 772 }, { "epoch": 0.6515151515151515, "grad_norm": 3.739696502685547, "learning_rate": 9.333330349250363e-07, "loss": 0.7051388025283813, "step": 774 }, { "epoch": 0.6531986531986532, "grad_norm": 5.395556449890137, "learning_rate": 9.328948669673353e-07, "loss": 0.9473454356193542, "step": 776 }, { "epoch": 0.6548821548821548, "grad_norm": 3.432518720626831, "learning_rate": 9.324553797757113e-07, "loss": 1.0663374662399292, "step": 778 }, { "epoch": 0.6565656565656566, "grad_norm": 15.647449493408203, "learning_rate": 9.320145748643827e-07, "loss": 1.015528678894043, "step": 780 }, { "epoch": 0.6582491582491582, "grad_norm": 18.728303909301758, "learning_rate": 9.315724537521078e-07, "loss": 1.0769071578979492, "step": 782 }, { "epoch": 0.6599326599326599, "grad_norm": 9.825349807739258, "learning_rate": 9.311290179621801e-07, "loss": 1.0078058242797852, "step": 784 }, { "epoch": 0.6616161616161617, "grad_norm": 8.568079948425293, "learning_rate": 9.306842690224221e-07, "loss": 1.1149715185165405, "step": 786 }, { "epoch": 0.6632996632996633, "grad_norm": 14.216378211975098, "learning_rate": 9.302382084651813e-07, "loss": 0.9104188680648804, "step": 788 }, { "epoch": 0.664983164983165, "grad_norm": 3.3533241748809814, "learning_rate": 9.297908378273238e-07, "loss": 0.9613898992538452, "step": 790 }, { "epoch": 0.6666666666666666, "grad_norm": 22.51508140563965, "learning_rate": 9.293421586502299e-07, "loss": 1.0459431409835815, "step": 792 }, { "epoch": 0.6683501683501684, "grad_norm": 4.43943977355957, "learning_rate": 9.288921724797881e-07, "loss": 0.6562730073928833, "step": 794 }, { "epoch": 0.67003367003367, "grad_norm": 3.550076484680176, "learning_rate": 9.2844088086639e-07, "loss": 0.9962120056152344, "step": 796 }, { "epoch": 0.6717171717171717, "grad_norm": 4.60956335067749, "learning_rate": 9.279882853649251e-07, "loss": 1.0277674198150635, "step": 798 }, { "epoch": 0.6734006734006734, "grad_norm": 4.80393648147583, "learning_rate": 9.275343875347754e-07, "loss": 0.6581063866615295, "step": 800 }, { "epoch": 0.6750841750841751, "grad_norm": 5.648859024047852, "learning_rate": 9.270791889398098e-07, "loss": 1.016190528869629, "step": 802 }, { "epoch": 0.6767676767676768, "grad_norm": 27.92025375366211, "learning_rate": 9.266226911483792e-07, "loss": 0.77015221118927, "step": 804 }, { "epoch": 0.6784511784511784, "grad_norm": 15.88348388671875, "learning_rate": 9.261648957333104e-07, "loss": 0.7054531574249268, "step": 806 }, { "epoch": 0.6801346801346801, "grad_norm": 11.626742362976074, "learning_rate": 9.257058042719014e-07, "loss": 1.162412405014038, "step": 808 }, { "epoch": 0.6818181818181818, "grad_norm": 3.297008752822876, "learning_rate": 9.252454183459151e-07, "loss": 1.0062317848205566, "step": 810 }, { "epoch": 0.6835016835016835, "grad_norm": 3.699937343597412, "learning_rate": 9.24783739541575e-07, "loss": 1.1737666130065918, "step": 812 }, { "epoch": 0.6851851851851852, "grad_norm": 12.622026443481445, "learning_rate": 9.243207694495587e-07, "loss": 0.5980294942855835, "step": 814 }, { "epoch": 0.6868686868686869, "grad_norm": 2.4388279914855957, "learning_rate": 9.238565096649931e-07, "loss": 1.1263744831085205, "step": 816 }, { "epoch": 0.6885521885521886, "grad_norm": 5.467193603515625, "learning_rate": 9.233909617874485e-07, "loss": 0.8187447786331177, "step": 818 }, { "epoch": 0.6902356902356902, "grad_norm": 19.933046340942383, "learning_rate": 9.229241274209331e-07, "loss": 0.7387347221374512, "step": 820 }, { "epoch": 0.6919191919191919, "grad_norm": 4.639487266540527, "learning_rate": 9.224560081738876e-07, "loss": 0.8205159902572632, "step": 822 }, { "epoch": 0.6936026936026936, "grad_norm": 5.4859089851379395, "learning_rate": 9.219866056591803e-07, "loss": 0.8951364755630493, "step": 824 }, { "epoch": 0.6952861952861953, "grad_norm": 10.06679916381836, "learning_rate": 9.215159214940999e-07, "loss": 0.924353837966919, "step": 826 }, { "epoch": 0.696969696969697, "grad_norm": 4.803708076477051, "learning_rate": 9.210439573003513e-07, "loss": 0.8230616450309753, "step": 828 }, { "epoch": 0.6986531986531986, "grad_norm": 2.6663763523101807, "learning_rate": 9.205707147040502e-07, "loss": 1.2476671934127808, "step": 830 }, { "epoch": 0.7003367003367004, "grad_norm": 11.887960433959961, "learning_rate": 9.200961953357161e-07, "loss": 0.9090033173561096, "step": 832 }, { "epoch": 0.702020202020202, "grad_norm": 12.790818214416504, "learning_rate": 9.196204008302679e-07, "loss": 0.7313128709793091, "step": 834 }, { "epoch": 0.7037037037037037, "grad_norm": 19.932966232299805, "learning_rate": 9.191433328270181e-07, "loss": 0.9331467151641846, "step": 836 }, { "epoch": 0.7053872053872053, "grad_norm": 14.500178337097168, "learning_rate": 9.186649929696663e-07, "loss": 0.6199721097946167, "step": 838 }, { "epoch": 0.7070707070707071, "grad_norm": 3.49665904045105, "learning_rate": 9.181853829062953e-07, "loss": 1.2793331146240234, "step": 840 }, { "epoch": 0.7087542087542088, "grad_norm": 4.229721546173096, "learning_rate": 9.177045042893626e-07, "loss": 1.1469063758850098, "step": 842 }, { "epoch": 0.7104377104377104, "grad_norm": 110.08422088623047, "learning_rate": 9.172223587756982e-07, "loss": 1.1059083938598633, "step": 844 }, { "epoch": 0.7121212121212122, "grad_norm": 18.97850799560547, "learning_rate": 9.167389480264958e-07, "loss": 0.8827245235443115, "step": 846 }, { "epoch": 0.7138047138047138, "grad_norm": 17.975536346435547, "learning_rate": 9.162542737073089e-07, "loss": 0.8001298904418945, "step": 848 }, { "epoch": 0.7154882154882155, "grad_norm": 7.855954647064209, "learning_rate": 9.157683374880446e-07, "loss": 0.9649063348770142, "step": 850 }, { "epoch": 0.7171717171717171, "grad_norm": 8.463844299316406, "learning_rate": 9.152811410429576e-07, "loss": 0.972816526889801, "step": 852 }, { "epoch": 0.7188552188552189, "grad_norm": 12.091350555419922, "learning_rate": 9.147926860506445e-07, "loss": 0.7975931763648987, "step": 854 }, { "epoch": 0.7205387205387206, "grad_norm": 4.873641014099121, "learning_rate": 9.143029741940385e-07, "loss": 1.1548885107040405, "step": 856 }, { "epoch": 0.7222222222222222, "grad_norm": 11.703914642333984, "learning_rate": 9.138120071604027e-07, "loss": 0.7869529724121094, "step": 858 }, { "epoch": 0.7239057239057239, "grad_norm": 8.07150650024414, "learning_rate": 9.133197866413254e-07, "loss": 1.0205129384994507, "step": 860 }, { "epoch": 0.7255892255892256, "grad_norm": 9.105744361877441, "learning_rate": 9.128263143327132e-07, "loss": 1.2168781757354736, "step": 862 }, { "epoch": 0.7272727272727273, "grad_norm": 13.992351531982422, "learning_rate": 9.12331591934786e-07, "loss": 1.0841448307037354, "step": 864 }, { "epoch": 0.7289562289562289, "grad_norm": 44.512203216552734, "learning_rate": 9.118356211520704e-07, "loss": 1.0125892162322998, "step": 866 }, { "epoch": 0.7306397306397306, "grad_norm": 3.5231881141662598, "learning_rate": 9.113384036933945e-07, "loss": 1.2724123001098633, "step": 868 }, { "epoch": 0.7323232323232324, "grad_norm": 5.931739330291748, "learning_rate": 9.108399412718818e-07, "loss": 1.1999413967132568, "step": 870 }, { "epoch": 0.734006734006734, "grad_norm": 5.34647798538208, "learning_rate": 9.103402356049452e-07, "loss": 1.127119541168213, "step": 872 }, { "epoch": 0.7356902356902357, "grad_norm": 4.207188606262207, "learning_rate": 9.098392884142805e-07, "loss": 1.114919900894165, "step": 874 }, { "epoch": 0.7373737373737373, "grad_norm": 21.882280349731445, "learning_rate": 9.093371014258618e-07, "loss": 0.9378777742385864, "step": 876 }, { "epoch": 0.7390572390572391, "grad_norm": 127.14752197265625, "learning_rate": 9.088336763699347e-07, "loss": 0.6694403886795044, "step": 878 }, { "epoch": 0.7407407407407407, "grad_norm": 81.61506652832031, "learning_rate": 9.083290149810101e-07, "loss": 0.6651909351348877, "step": 880 }, { "epoch": 0.7424242424242424, "grad_norm": 3.663316488265991, "learning_rate": 9.07823118997859e-07, "loss": 1.182866096496582, "step": 882 }, { "epoch": 0.7441077441077442, "grad_norm": 3.8022303581237793, "learning_rate": 9.07315990163506e-07, "loss": 1.1220306158065796, "step": 884 }, { "epoch": 0.7457912457912458, "grad_norm": 3.328054189682007, "learning_rate": 9.06807630225223e-07, "loss": 0.6599295139312744, "step": 886 }, { "epoch": 0.7474747474747475, "grad_norm": 3.3686916828155518, "learning_rate": 9.062980409345242e-07, "loss": 1.0259349346160889, "step": 888 }, { "epoch": 0.7491582491582491, "grad_norm": 20.480480194091797, "learning_rate": 9.05787224047159e-07, "loss": 0.9568924903869629, "step": 890 }, { "epoch": 0.7508417508417509, "grad_norm": 6.441938877105713, "learning_rate": 9.052751813231064e-07, "loss": 0.9797095060348511, "step": 892 }, { "epoch": 0.7525252525252525, "grad_norm": 9.020792007446289, "learning_rate": 9.047619145265693e-07, "loss": 0.786825954914093, "step": 894 }, { "epoch": 0.7542087542087542, "grad_norm": 12.181696891784668, "learning_rate": 9.042474254259673e-07, "loss": 0.9024474620819092, "step": 896 }, { "epoch": 0.7558922558922558, "grad_norm": 28.832189559936523, "learning_rate": 9.037317157939322e-07, "loss": 0.6734418869018555, "step": 898 }, { "epoch": 0.7575757575757576, "grad_norm": 3.2818045616149902, "learning_rate": 9.032147874073007e-07, "loss": 0.9285035133361816, "step": 900 }, { "epoch": 0.7592592592592593, "grad_norm": 18.371009826660156, "learning_rate": 9.026966420471087e-07, "loss": 0.7218674421310425, "step": 902 }, { "epoch": 0.7609427609427609, "grad_norm": 2.9429922103881836, "learning_rate": 9.021772814985844e-07, "loss": 1.222078800201416, "step": 904 }, { "epoch": 0.7626262626262627, "grad_norm": 2.7464704513549805, "learning_rate": 9.016567075511441e-07, "loss": 0.9446361064910889, "step": 906 }, { "epoch": 0.7643097643097643, "grad_norm": 6.568495750427246, "learning_rate": 9.011349219983836e-07, "loss": 0.929685115814209, "step": 908 }, { "epoch": 0.765993265993266, "grad_norm": 4.631781578063965, "learning_rate": 9.006119266380738e-07, "loss": 0.8691076040267944, "step": 910 }, { "epoch": 0.7676767676767676, "grad_norm": 19.05845069885254, "learning_rate": 9.000877232721539e-07, "loss": 1.0112216472625732, "step": 912 }, { "epoch": 0.7693602693602694, "grad_norm": 14.539863586425781, "learning_rate": 8.99562313706725e-07, "loss": 0.890055775642395, "step": 914 }, { "epoch": 0.7710437710437711, "grad_norm": 5.530696868896484, "learning_rate": 8.99035699752044e-07, "loss": 1.0191471576690674, "step": 916 }, { "epoch": 0.7727272727272727, "grad_norm": 14.078718185424805, "learning_rate": 8.985078832225178e-07, "loss": 0.6652472019195557, "step": 918 }, { "epoch": 0.7744107744107744, "grad_norm": 28.123485565185547, "learning_rate": 8.979788659366963e-07, "loss": 0.5262911319732666, "step": 920 }, { "epoch": 0.7760942760942761, "grad_norm": 12.658363342285156, "learning_rate": 8.974486497172664e-07, "loss": 0.6195323467254639, "step": 922 }, { "epoch": 0.7777777777777778, "grad_norm": 9.243937492370605, "learning_rate": 8.969172363910464e-07, "loss": 0.9786189198493958, "step": 924 }, { "epoch": 0.7794612794612794, "grad_norm": 6.694032669067383, "learning_rate": 8.963846277889788e-07, "loss": 1.1813392639160156, "step": 926 }, { "epoch": 0.7811447811447811, "grad_norm": 2.7287495136260986, "learning_rate": 8.95850825746124e-07, "loss": 0.5288863182067871, "step": 928 }, { "epoch": 0.7828282828282829, "grad_norm": 12.440982818603516, "learning_rate": 8.953158321016549e-07, "loss": 1.3665971755981445, "step": 930 }, { "epoch": 0.7845117845117845, "grad_norm": 6.197256565093994, "learning_rate": 8.947796486988499e-07, "loss": 0.934798002243042, "step": 932 }, { "epoch": 0.7861952861952862, "grad_norm": 5.526829719543457, "learning_rate": 8.942422773850861e-07, "loss": 1.0153696537017822, "step": 934 }, { "epoch": 0.7878787878787878, "grad_norm": 3.1978728771209717, "learning_rate": 8.937037200118339e-07, "loss": 0.8981832265853882, "step": 936 }, { "epoch": 0.7895622895622896, "grad_norm": 2.9995744228363037, "learning_rate": 8.931639784346499e-07, "loss": 0.8695104718208313, "step": 938 }, { "epoch": 0.7912457912457912, "grad_norm": 6.706093788146973, "learning_rate": 8.926230545131711e-07, "loss": 1.0370559692382812, "step": 940 }, { "epoch": 0.7929292929292929, "grad_norm": 17.68717384338379, "learning_rate": 8.920809501111082e-07, "loss": 0.43204930424690247, "step": 942 }, { "epoch": 0.7946127946127947, "grad_norm": 4.556012153625488, "learning_rate": 8.915376670962384e-07, "loss": 0.934272289276123, "step": 944 }, { "epoch": 0.7962962962962963, "grad_norm": 4.898090362548828, "learning_rate": 8.90993207340401e-07, "loss": 0.910577654838562, "step": 946 }, { "epoch": 0.797979797979798, "grad_norm": 3.080552577972412, "learning_rate": 8.904475727194881e-07, "loss": 1.0652995109558105, "step": 948 }, { "epoch": 0.7996632996632996, "grad_norm": 6.745321273803711, "learning_rate": 8.899007651134413e-07, "loss": 0.8568437099456787, "step": 950 }, { "epoch": 0.8013468013468014, "grad_norm": 13.813915252685547, "learning_rate": 8.893527864062427e-07, "loss": 0.47221675515174866, "step": 952 }, { "epoch": 0.803030303030303, "grad_norm": 5.70471715927124, "learning_rate": 8.88803638485909e-07, "loss": 0.9682356119155884, "step": 954 }, { "epoch": 0.8047138047138047, "grad_norm": 9.05542278289795, "learning_rate": 8.882533232444864e-07, "loss": 0.9946258068084717, "step": 956 }, { "epoch": 0.8063973063973064, "grad_norm": 30.702098846435547, "learning_rate": 8.877018425780425e-07, "loss": 1.1317826509475708, "step": 958 }, { "epoch": 0.8080808080808081, "grad_norm": 14.64018726348877, "learning_rate": 8.8714919838666e-07, "loss": 0.7012873888015747, "step": 960 }, { "epoch": 0.8097643097643098, "grad_norm": 3.149690866470337, "learning_rate": 8.865953925744305e-07, "loss": 0.795744776725769, "step": 962 }, { "epoch": 0.8114478114478114, "grad_norm": 6.090580463409424, "learning_rate": 8.860404270494483e-07, "loss": 0.7089242935180664, "step": 964 }, { "epoch": 0.8131313131313131, "grad_norm": 3.53495192527771, "learning_rate": 8.85484303723803e-07, "loss": 1.0081251859664917, "step": 966 }, { "epoch": 0.8148148148148148, "grad_norm": 4.274377346038818, "learning_rate": 8.849270245135737e-07, "loss": 1.2170288562774658, "step": 968 }, { "epoch": 0.8164983164983165, "grad_norm": 3.169619560241699, "learning_rate": 8.843685913388216e-07, "loss": 1.0120604038238525, "step": 970 }, { "epoch": 0.8181818181818182, "grad_norm": 9.656790733337402, "learning_rate": 8.838090061235839e-07, "loss": 1.0408661365509033, "step": 972 }, { "epoch": 0.8198653198653199, "grad_norm": 3.6206579208374023, "learning_rate": 8.832482707958671e-07, "loss": 0.7572422027587891, "step": 974 }, { "epoch": 0.8215488215488216, "grad_norm": 4.2206034660339355, "learning_rate": 8.826863872876405e-07, "loss": 0.9668401479721069, "step": 976 }, { "epoch": 0.8232323232323232, "grad_norm": 2.5796895027160645, "learning_rate": 8.82123357534829e-07, "loss": 1.2220442295074463, "step": 978 }, { "epoch": 0.8249158249158249, "grad_norm": 3.009799003601074, "learning_rate": 8.815591834773073e-07, "loss": 1.1853399276733398, "step": 980 }, { "epoch": 0.8265993265993266, "grad_norm": 3.527939796447754, "learning_rate": 8.80993867058892e-07, "loss": 1.1044703722000122, "step": 982 }, { "epoch": 0.8282828282828283, "grad_norm": 5.415159225463867, "learning_rate": 8.804274102273362e-07, "loss": 1.0707950592041016, "step": 984 }, { "epoch": 0.82996632996633, "grad_norm": 41.4835205078125, "learning_rate": 8.798598149343223e-07, "loss": 0.9894696474075317, "step": 986 }, { "epoch": 0.8316498316498316, "grad_norm": 11.90714168548584, "learning_rate": 8.792910831354544e-07, "loss": 0.8949055671691895, "step": 988 }, { "epoch": 0.8333333333333334, "grad_norm": 19.156835556030273, "learning_rate": 8.787212167902533e-07, "loss": 0.847869336605072, "step": 990 }, { "epoch": 0.835016835016835, "grad_norm": 9.593557357788086, "learning_rate": 8.781502178621481e-07, "loss": 0.7175034284591675, "step": 992 }, { "epoch": 0.8367003367003367, "grad_norm": 7.256720066070557, "learning_rate": 8.775780883184705e-07, "loss": 0.9604957103729248, "step": 994 }, { "epoch": 0.8383838383838383, "grad_norm": 6.030484676361084, "learning_rate": 8.770048301304473e-07, "loss": 0.69129478931427, "step": 996 }, { "epoch": 0.8400673400673401, "grad_norm": 19.379892349243164, "learning_rate": 8.764304452731941e-07, "loss": 0.9693500399589539, "step": 998 }, { "epoch": 0.8417508417508418, "grad_norm": 5.4144086837768555, "learning_rate": 8.758549357257088e-07, "loss": 1.0944030284881592, "step": 1000 }, { "epoch": 0.8434343434343434, "grad_norm": 3.4778013229370117, "learning_rate": 8.752783034708636e-07, "loss": 0.7972965240478516, "step": 1002 }, { "epoch": 0.8451178451178452, "grad_norm": 18.509031295776367, "learning_rate": 8.747005504953994e-07, "loss": 0.7230968475341797, "step": 1004 }, { "epoch": 0.8468013468013468, "grad_norm": 4.210479736328125, "learning_rate": 8.741216787899185e-07, "loss": 1.1015040874481201, "step": 1006 }, { "epoch": 0.8484848484848485, "grad_norm": 2.3543701171875, "learning_rate": 8.73541690348877e-07, "loss": 0.6013465523719788, "step": 1008 }, { "epoch": 0.8501683501683501, "grad_norm": 4.900216579437256, "learning_rate": 8.729605871705794e-07, "loss": 0.9569622278213501, "step": 1010 }, { "epoch": 0.8518518518518519, "grad_norm": 13.174873352050781, "learning_rate": 8.723783712571706e-07, "loss": 0.891572117805481, "step": 1012 }, { "epoch": 0.8535353535353535, "grad_norm": 7.153807163238525, "learning_rate": 8.717950446146296e-07, "loss": 0.7898436784744263, "step": 1014 }, { "epoch": 0.8552188552188552, "grad_norm": 17.859582901000977, "learning_rate": 8.712106092527618e-07, "loss": 0.6778484582901001, "step": 1016 }, { "epoch": 0.8569023569023569, "grad_norm": 25.399763107299805, "learning_rate": 8.706250671851929e-07, "loss": 1.0100421905517578, "step": 1018 }, { "epoch": 0.8585858585858586, "grad_norm": 4.458539962768555, "learning_rate": 8.70038420429362e-07, "loss": 1.280473232269287, "step": 1020 }, { "epoch": 0.8602693602693603, "grad_norm": 13.934873580932617, "learning_rate": 8.694506710065139e-07, "loss": 0.9307641386985779, "step": 1022 }, { "epoch": 0.8619528619528619, "grad_norm": 6.230085372924805, "learning_rate": 8.688618209416927e-07, "loss": 0.9810340404510498, "step": 1024 }, { "epoch": 0.8636363636363636, "grad_norm": 7.749796390533447, "learning_rate": 8.682718722637344e-07, "loss": 0.9103548526763916, "step": 1026 }, { "epoch": 0.8653198653198653, "grad_norm": 5.378295421600342, "learning_rate": 8.676808270052607e-07, "loss": 1.0003798007965088, "step": 1028 }, { "epoch": 0.867003367003367, "grad_norm": 5.721936225891113, "learning_rate": 8.670886872026711e-07, "loss": 0.6671168804168701, "step": 1030 }, { "epoch": 0.8686868686868687, "grad_norm": 10.666192054748535, "learning_rate": 8.664954548961363e-07, "loss": 0.8651524782180786, "step": 1032 }, { "epoch": 0.8703703703703703, "grad_norm": 7.22635555267334, "learning_rate": 8.659011321295913e-07, "loss": 0.9622019529342651, "step": 1034 }, { "epoch": 0.8720538720538721, "grad_norm": 4.455495357513428, "learning_rate": 8.65305720950728e-07, "loss": 0.9549316167831421, "step": 1036 }, { "epoch": 0.8737373737373737, "grad_norm": 7.26788854598999, "learning_rate": 8.647092234109884e-07, "loss": 1.1264393329620361, "step": 1038 }, { "epoch": 0.8754208754208754, "grad_norm": 6.3819499015808105, "learning_rate": 8.64111641565558e-07, "loss": 1.0972923040390015, "step": 1040 }, { "epoch": 0.877104377104377, "grad_norm": 4.891845226287842, "learning_rate": 8.63512977473357e-07, "loss": 0.9982548952102661, "step": 1042 }, { "epoch": 0.8787878787878788, "grad_norm": 16.61280059814453, "learning_rate": 8.629132331970353e-07, "loss": 1.1183404922485352, "step": 1044 }, { "epoch": 0.8804713804713805, "grad_norm": 3.0736172199249268, "learning_rate": 8.623124108029645e-07, "loss": 1.0902597904205322, "step": 1046 }, { "epoch": 0.8821548821548821, "grad_norm": 15.772442817687988, "learning_rate": 8.617105123612304e-07, "loss": 0.9946341514587402, "step": 1048 }, { "epoch": 0.8838383838383839, "grad_norm": 22.210824966430664, "learning_rate": 8.611075399456263e-07, "loss": 0.8030619025230408, "step": 1050 }, { "epoch": 0.8855218855218855, "grad_norm": 13.653421401977539, "learning_rate": 8.605034956336462e-07, "loss": 1.084486484527588, "step": 1052 }, { "epoch": 0.8872053872053872, "grad_norm": 13.737056732177734, "learning_rate": 8.598983815064766e-07, "loss": 0.5944472551345825, "step": 1054 }, { "epoch": 0.8888888888888888, "grad_norm": 2.5293309688568115, "learning_rate": 8.592921996489902e-07, "loss": 0.9724396467208862, "step": 1056 }, { "epoch": 0.8905723905723906, "grad_norm": 2.733849287033081, "learning_rate": 8.586849521497389e-07, "loss": 0.9384986162185669, "step": 1058 }, { "epoch": 0.8922558922558923, "grad_norm": 18.489913940429688, "learning_rate": 8.580766411009455e-07, "loss": 0.9987908601760864, "step": 1060 }, { "epoch": 0.8939393939393939, "grad_norm": 5.748605251312256, "learning_rate": 8.574672685984979e-07, "loss": 0.9200767278671265, "step": 1062 }, { "epoch": 0.8956228956228957, "grad_norm": 11.951451301574707, "learning_rate": 8.568568367419404e-07, "loss": 0.844304621219635, "step": 1064 }, { "epoch": 0.8973063973063973, "grad_norm": 2.693372964859009, "learning_rate": 8.562453476344677e-07, "loss": 1.1123064756393433, "step": 1066 }, { "epoch": 0.898989898989899, "grad_norm": 3.8241171836853027, "learning_rate": 8.556328033829172e-07, "loss": 0.8062398433685303, "step": 1068 }, { "epoch": 0.9006734006734006, "grad_norm": 11.482207298278809, "learning_rate": 8.550192060977614e-07, "loss": 0.9785133600234985, "step": 1070 }, { "epoch": 0.9023569023569024, "grad_norm": 3.1708807945251465, "learning_rate": 8.544045578931013e-07, "loss": 1.2256948947906494, "step": 1072 }, { "epoch": 0.9040404040404041, "grad_norm": 3.0588254928588867, "learning_rate": 8.537888608866584e-07, "loss": 0.8702206611633301, "step": 1074 }, { "epoch": 0.9057239057239057, "grad_norm": 6.964415073394775, "learning_rate": 8.531721171997681e-07, "loss": 0.5286012291908264, "step": 1076 }, { "epoch": 0.9074074074074074, "grad_norm": 19.570329666137695, "learning_rate": 8.525543289573718e-07, "loss": 1.1106371879577637, "step": 1078 }, { "epoch": 0.9090909090909091, "grad_norm": 3.5319879055023193, "learning_rate": 8.519354982880099e-07, "loss": 0.9486319422721863, "step": 1080 }, { "epoch": 0.9107744107744108, "grad_norm": 3.6544623374938965, "learning_rate": 8.513156273238146e-07, "loss": 0.9495224356651306, "step": 1082 }, { "epoch": 0.9124579124579124, "grad_norm": 27.266931533813477, "learning_rate": 8.50694718200502e-07, "loss": 0.766098141670227, "step": 1084 }, { "epoch": 0.9141414141414141, "grad_norm": 4.358726978302002, "learning_rate": 8.500727730573655e-07, "loss": 1.1725554466247559, "step": 1086 }, { "epoch": 0.9158249158249159, "grad_norm": 13.713922500610352, "learning_rate": 8.494497940372675e-07, "loss": 0.9348576068878174, "step": 1088 }, { "epoch": 0.9175084175084175, "grad_norm": 2.8525874614715576, "learning_rate": 8.488257832866332e-07, "loss": 0.9388105869293213, "step": 1090 }, { "epoch": 0.9191919191919192, "grad_norm": 19.22268295288086, "learning_rate": 8.482007429554419e-07, "loss": 1.0528115034103394, "step": 1092 }, { "epoch": 0.9208754208754208, "grad_norm": 7.083608627319336, "learning_rate": 8.475746751972207e-07, "loss": 0.9258947968482971, "step": 1094 }, { "epoch": 0.9225589225589226, "grad_norm": 17.767122268676758, "learning_rate": 8.469475821690364e-07, "loss": 0.7900251746177673, "step": 1096 }, { "epoch": 0.9242424242424242, "grad_norm": 11.199775695800781, "learning_rate": 8.463194660314884e-07, "loss": 0.43797174096107483, "step": 1098 }, { "epoch": 0.9259259259259259, "grad_norm": 6.160865306854248, "learning_rate": 8.456903289487008e-07, "loss": 1.0159149169921875, "step": 1100 }, { "epoch": 0.9276094276094277, "grad_norm": 23.086267471313477, "learning_rate": 8.45060173088316e-07, "loss": 0.5812975168228149, "step": 1102 }, { "epoch": 0.9292929292929293, "grad_norm": 5.783674240112305, "learning_rate": 8.444290006214858e-07, "loss": 1.1394703388214111, "step": 1104 }, { "epoch": 0.930976430976431, "grad_norm": 5.126986026763916, "learning_rate": 8.43796813722865e-07, "loss": 1.0383517742156982, "step": 1106 }, { "epoch": 0.9326599326599326, "grad_norm": 16.552364349365234, "learning_rate": 8.431636145706035e-07, "loss": 0.8570190072059631, "step": 1108 }, { "epoch": 0.9343434343434344, "grad_norm": 6.3068037033081055, "learning_rate": 8.425294053463387e-07, "loss": 1.227846384048462, "step": 1110 }, { "epoch": 0.936026936026936, "grad_norm": 21.39204978942871, "learning_rate": 8.418941882351883e-07, "loss": 1.2234206199645996, "step": 1112 }, { "epoch": 0.9377104377104377, "grad_norm": 3.4600205421447754, "learning_rate": 8.412579654257424e-07, "loss": 1.0893580913543701, "step": 1114 }, { "epoch": 0.9393939393939394, "grad_norm": 9.739093780517578, "learning_rate": 8.406207391100564e-07, "loss": 1.1603511571884155, "step": 1116 }, { "epoch": 0.9410774410774411, "grad_norm": 9.583012580871582, "learning_rate": 8.399825114836431e-07, "loss": 1.036285161972046, "step": 1118 }, { "epoch": 0.9427609427609428, "grad_norm": 3.670794725418091, "learning_rate": 8.393432847454651e-07, "loss": 1.2967090606689453, "step": 1120 }, { "epoch": 0.9444444444444444, "grad_norm": 3.190880060195923, "learning_rate": 8.387030610979276e-07, "loss": 0.7892323732376099, "step": 1122 }, { "epoch": 0.9461279461279462, "grad_norm": 2.7288999557495117, "learning_rate": 8.380618427468703e-07, "loss": 0.8631899356842041, "step": 1124 }, { "epoch": 0.9478114478114478, "grad_norm": 80.42435455322266, "learning_rate": 8.374196319015605e-07, "loss": 0.8700990080833435, "step": 1126 }, { "epoch": 0.9494949494949495, "grad_norm": 2.7032294273376465, "learning_rate": 8.367764307746843e-07, "loss": 0.9584017992019653, "step": 1128 }, { "epoch": 0.9511784511784511, "grad_norm": 29.493919372558594, "learning_rate": 8.361322415823407e-07, "loss": 0.9330191016197205, "step": 1130 }, { "epoch": 0.9528619528619529, "grad_norm": 2.8431601524353027, "learning_rate": 8.354870665440322e-07, "loss": 0.9470508098602295, "step": 1132 }, { "epoch": 0.9545454545454546, "grad_norm": 4.1329240798950195, "learning_rate": 8.348409078826586e-07, "loss": 1.003962755203247, "step": 1134 }, { "epoch": 0.9562289562289562, "grad_norm": 21.232402801513672, "learning_rate": 8.341937678245078e-07, "loss": 0.8706526756286621, "step": 1136 }, { "epoch": 0.9579124579124579, "grad_norm": 6.638863563537598, "learning_rate": 8.335456485992501e-07, "loss": 0.7324610948562622, "step": 1138 }, { "epoch": 0.9595959595959596, "grad_norm": 9.82058048248291, "learning_rate": 8.328965524399288e-07, "loss": 0.5701298713684082, "step": 1140 }, { "epoch": 0.9612794612794613, "grad_norm": 4.2321672439575195, "learning_rate": 8.322464815829531e-07, "loss": 0.8950085639953613, "step": 1142 }, { "epoch": 0.9629629629629629, "grad_norm": 4.749987602233887, "learning_rate": 8.315954382680909e-07, "loss": 0.6259889602661133, "step": 1144 }, { "epoch": 0.9646464646464646, "grad_norm": 3.1439943313598633, "learning_rate": 8.309434247384601e-07, "loss": 0.9208143949508667, "step": 1146 }, { "epoch": 0.9663299663299664, "grad_norm": 9.139312744140625, "learning_rate": 8.302904432405219e-07, "loss": 0.7828265428543091, "step": 1148 }, { "epoch": 0.968013468013468, "grad_norm": 8.519466400146484, "learning_rate": 8.296364960240722e-07, "loss": 0.9561738967895508, "step": 1150 }, { "epoch": 0.9696969696969697, "grad_norm": 21.469980239868164, "learning_rate": 8.289815853422342e-07, "loss": 0.608352541923523, "step": 1152 }, { "epoch": 0.9713804713804713, "grad_norm": 6.825742721557617, "learning_rate": 8.283257134514507e-07, "loss": 0.9338740110397339, "step": 1154 }, { "epoch": 0.9730639730639731, "grad_norm": 4.129487991333008, "learning_rate": 8.276688826114768e-07, "loss": 0.5884324312210083, "step": 1156 }, { "epoch": 0.9747474747474747, "grad_norm": 5.313873291015625, "learning_rate": 8.270110950853706e-07, "loss": 0.9547237753868103, "step": 1158 }, { "epoch": 0.9764309764309764, "grad_norm": 6.063114643096924, "learning_rate": 8.263523531394872e-07, "loss": 0.44445914030075073, "step": 1160 }, { "epoch": 0.9781144781144782, "grad_norm": 17.088842391967773, "learning_rate": 8.256926590434696e-07, "loss": 1.1655336618423462, "step": 1162 }, { "epoch": 0.9797979797979798, "grad_norm": 2.781656265258789, "learning_rate": 8.250320150702416e-07, "loss": 0.6978096961975098, "step": 1164 }, { "epoch": 0.9814814814814815, "grad_norm": 4.57460355758667, "learning_rate": 8.243704234959996e-07, "loss": 0.8053257465362549, "step": 1166 }, { "epoch": 0.9831649831649831, "grad_norm": 7.392634391784668, "learning_rate": 8.237078866002051e-07, "loss": 0.8369849920272827, "step": 1168 }, { "epoch": 0.9848484848484849, "grad_norm": 7.21369743347168, "learning_rate": 8.230444066655763e-07, "loss": 0.8643122911453247, "step": 1170 }, { "epoch": 0.9865319865319865, "grad_norm": 8.024483680725098, "learning_rate": 8.223799859780808e-07, "loss": 0.6412187814712524, "step": 1172 }, { "epoch": 0.9882154882154882, "grad_norm": 25.19280433654785, "learning_rate": 8.217146268269274e-07, "loss": 0.917904257774353, "step": 1174 }, { "epoch": 0.98989898989899, "grad_norm": 9.128271102905273, "learning_rate": 8.210483315045584e-07, "loss": 0.4360630214214325, "step": 1176 }, { "epoch": 0.9915824915824916, "grad_norm": 10.149953842163086, "learning_rate": 8.203811023066416e-07, "loss": 1.071942925453186, "step": 1178 }, { "epoch": 0.9932659932659933, "grad_norm": 8.710041999816895, "learning_rate": 8.197129415320622e-07, "loss": 0.4572172164916992, "step": 1180 }, { "epoch": 0.9949494949494949, "grad_norm": 3.669222116470337, "learning_rate": 8.190438514829151e-07, "loss": 0.9243024587631226, "step": 1182 }, { "epoch": 0.9966329966329966, "grad_norm": 12.717865943908691, "learning_rate": 8.183738344644973e-07, "loss": 1.0385701656341553, "step": 1184 }, { "epoch": 0.9983164983164983, "grad_norm": 4.85836935043335, "learning_rate": 8.177028927852992e-07, "loss": 0.6608575582504272, "step": 1186 }, { "epoch": 1.0, "grad_norm": 11.576709747314453, "learning_rate": 8.170310287569973e-07, "loss": 0.7577022910118103, "step": 1188 }, { "epoch": 1.0016835016835017, "grad_norm": 9.695958137512207, "learning_rate": 8.163582446944456e-07, "loss": 0.4615962505340576, "step": 1190 }, { "epoch": 1.0033670033670035, "grad_norm": 6.610690116882324, "learning_rate": 8.156845429156687e-07, "loss": 0.4831297993659973, "step": 1192 }, { "epoch": 1.005050505050505, "grad_norm": 3.4326443672180176, "learning_rate": 8.150099257418522e-07, "loss": 1.146728515625, "step": 1194 }, { "epoch": 1.0067340067340067, "grad_norm": 20.49312973022461, "learning_rate": 8.143343954973366e-07, "loss": 0.8859339356422424, "step": 1196 }, { "epoch": 1.0084175084175084, "grad_norm": 3.5065126419067383, "learning_rate": 8.136579545096076e-07, "loss": 1.0677597522735596, "step": 1198 }, { "epoch": 1.0101010101010102, "grad_norm": 13.90986156463623, "learning_rate": 8.129806051092889e-07, "loss": 1.1894700527191162, "step": 1200 }, { "epoch": 1.0117845117845117, "grad_norm": 3.6254143714904785, "learning_rate": 8.123023496301343e-07, "loss": 0.985792338848114, "step": 1202 }, { "epoch": 1.0134680134680134, "grad_norm": 2.666475296020508, "learning_rate": 8.116231904090192e-07, "loss": 1.0036242008209229, "step": 1204 }, { "epoch": 1.0151515151515151, "grad_norm": 15.559446334838867, "learning_rate": 8.109431297859332e-07, "loss": 1.0831941366195679, "step": 1206 }, { "epoch": 1.0168350168350169, "grad_norm": 16.54594612121582, "learning_rate": 8.10262170103971e-07, "loss": 0.6582114696502686, "step": 1208 }, { "epoch": 1.0185185185185186, "grad_norm": 4.971505641937256, "learning_rate": 8.095803137093252e-07, "loss": 0.7359082698822021, "step": 1210 }, { "epoch": 1.02020202020202, "grad_norm": 3.355790853500366, "learning_rate": 8.088975629512781e-07, "loss": 0.5685245990753174, "step": 1212 }, { "epoch": 1.0218855218855218, "grad_norm": 9.155191421508789, "learning_rate": 8.082139201821933e-07, "loss": 0.8225246667861938, "step": 1214 }, { "epoch": 1.0235690235690236, "grad_norm": 12.392461776733398, "learning_rate": 8.075293877575079e-07, "loss": 0.4670335352420807, "step": 1216 }, { "epoch": 1.0252525252525253, "grad_norm": 15.242469787597656, "learning_rate": 8.068439680357239e-07, "loss": 0.9990904331207275, "step": 1218 }, { "epoch": 1.026936026936027, "grad_norm": 5.101475238800049, "learning_rate": 8.06157663378401e-07, "loss": 0.8169501423835754, "step": 1220 }, { "epoch": 1.0286195286195285, "grad_norm": 45.69724655151367, "learning_rate": 8.054704761501471e-07, "loss": 0.9720203280448914, "step": 1222 }, { "epoch": 1.0303030303030303, "grad_norm": 9.621611595153809, "learning_rate": 8.047824087186116e-07, "loss": 1.1497771739959717, "step": 1224 }, { "epoch": 1.031986531986532, "grad_norm": 17.078630447387695, "learning_rate": 8.040934634544761e-07, "loss": 0.6966054439544678, "step": 1226 }, { "epoch": 1.0336700336700337, "grad_norm": 12.26323413848877, "learning_rate": 8.03403642731447e-07, "loss": 0.9055821299552917, "step": 1228 }, { "epoch": 1.0353535353535352, "grad_norm": 4.618709564208984, "learning_rate": 8.027129489262472e-07, "loss": 0.8367654085159302, "step": 1230 }, { "epoch": 1.037037037037037, "grad_norm": 14.03416919708252, "learning_rate": 8.020213844186071e-07, "loss": 0.5471811294555664, "step": 1232 }, { "epoch": 1.0387205387205387, "grad_norm": 2.462353229522705, "learning_rate": 8.013289515912575e-07, "loss": 0.9337582588195801, "step": 1234 }, { "epoch": 1.0404040404040404, "grad_norm": 3.580676794052124, "learning_rate": 8.006356528299211e-07, "loss": 0.9284713268280029, "step": 1236 }, { "epoch": 1.0420875420875422, "grad_norm": 14.55753231048584, "learning_rate": 7.999414905233035e-07, "loss": 0.5675897598266602, "step": 1238 }, { "epoch": 1.0437710437710437, "grad_norm": 3.7598259449005127, "learning_rate": 7.992464670630862e-07, "loss": 1.0432960987091064, "step": 1240 }, { "epoch": 1.0454545454545454, "grad_norm": 6.506076335906982, "learning_rate": 7.985505848439171e-07, "loss": 1.0147356986999512, "step": 1242 }, { "epoch": 1.0471380471380471, "grad_norm": 4.660027503967285, "learning_rate": 7.978538462634036e-07, "loss": 0.7054228782653809, "step": 1244 }, { "epoch": 1.0488215488215489, "grad_norm": 15.017945289611816, "learning_rate": 7.971562537221032e-07, "loss": 0.7315689325332642, "step": 1246 }, { "epoch": 1.0505050505050506, "grad_norm": 11.619869232177734, "learning_rate": 7.964578096235156e-07, "loss": 0.9787733554840088, "step": 1248 }, { "epoch": 1.0521885521885521, "grad_norm": 25.001440048217773, "learning_rate": 7.957585163740746e-07, "loss": 0.7732163667678833, "step": 1250 }, { "epoch": 1.0538720538720538, "grad_norm": 13.280570030212402, "learning_rate": 7.950583763831398e-07, "loss": 0.7055392861366272, "step": 1252 }, { "epoch": 1.0555555555555556, "grad_norm": 8.0188627243042, "learning_rate": 7.943573920629879e-07, "loss": 1.0268526077270508, "step": 1254 }, { "epoch": 1.0572390572390573, "grad_norm": 8.311823844909668, "learning_rate": 7.936555658288051e-07, "loss": 0.7499762177467346, "step": 1256 }, { "epoch": 1.0589225589225588, "grad_norm": 12.510072708129883, "learning_rate": 7.929529000986778e-07, "loss": 0.5642093420028687, "step": 1258 }, { "epoch": 1.0606060606060606, "grad_norm": 8.302406311035156, "learning_rate": 7.922493972935851e-07, "loss": 0.8775455355644226, "step": 1260 }, { "epoch": 1.0622895622895623, "grad_norm": 4.110003471374512, "learning_rate": 7.915450598373903e-07, "loss": 0.6986871957778931, "step": 1262 }, { "epoch": 1.063973063973064, "grad_norm": 5.865422248840332, "learning_rate": 7.908398901568324e-07, "loss": 0.8195330500602722, "step": 1264 }, { "epoch": 1.0656565656565657, "grad_norm": 9.913485527038574, "learning_rate": 7.901338906815174e-07, "loss": 0.8037704229354858, "step": 1266 }, { "epoch": 1.0673400673400673, "grad_norm": 111.66101837158203, "learning_rate": 7.894270638439106e-07, "loss": 0.6612458825111389, "step": 1268 }, { "epoch": 1.069023569023569, "grad_norm": 6.807026386260986, "learning_rate": 7.88719412079328e-07, "loss": 0.6571763157844543, "step": 1270 }, { "epoch": 1.0707070707070707, "grad_norm": 6.202319622039795, "learning_rate": 7.880109378259274e-07, "loss": 0.7407518625259399, "step": 1272 }, { "epoch": 1.0723905723905724, "grad_norm": 18.488807678222656, "learning_rate": 7.873016435247011e-07, "loss": 0.5137653350830078, "step": 1274 }, { "epoch": 1.074074074074074, "grad_norm": 6.398234844207764, "learning_rate": 7.865915316194661e-07, "loss": 0.7220208644866943, "step": 1276 }, { "epoch": 1.0757575757575757, "grad_norm": 24.44901466369629, "learning_rate": 7.858806045568568e-07, "loss": 1.0816729068756104, "step": 1278 }, { "epoch": 1.0774410774410774, "grad_norm": 42.94617462158203, "learning_rate": 7.85168864786316e-07, "loss": 0.569089412689209, "step": 1280 }, { "epoch": 1.0791245791245792, "grad_norm": 17.059085845947266, "learning_rate": 7.844563147600869e-07, "loss": 0.34395474195480347, "step": 1282 }, { "epoch": 1.0808080808080809, "grad_norm": 5.726075172424316, "learning_rate": 7.837429569332038e-07, "loss": 1.104400873184204, "step": 1284 }, { "epoch": 1.0824915824915824, "grad_norm": 5.970583915710449, "learning_rate": 7.830287937634848e-07, "loss": 0.9108725786209106, "step": 1286 }, { "epoch": 1.0841750841750841, "grad_norm": 3.13798451423645, "learning_rate": 7.823138277115227e-07, "loss": 0.6928012371063232, "step": 1288 }, { "epoch": 1.0858585858585859, "grad_norm": 3.2338767051696777, "learning_rate": 7.81598061240676e-07, "loss": 0.6945496797561646, "step": 1290 }, { "epoch": 1.0875420875420876, "grad_norm": 9.174521446228027, "learning_rate": 7.808814968170612e-07, "loss": 1.177178144454956, "step": 1292 }, { "epoch": 1.0892255892255893, "grad_norm": 2.838789463043213, "learning_rate": 7.801641369095449e-07, "loss": 0.8742045164108276, "step": 1294 }, { "epoch": 1.0909090909090908, "grad_norm": 33.68141555786133, "learning_rate": 7.794459839897334e-07, "loss": 0.5730578899383545, "step": 1296 }, { "epoch": 1.0925925925925926, "grad_norm": 8.239413261413574, "learning_rate": 7.787270405319656e-07, "loss": 0.6627512574195862, "step": 1298 }, { "epoch": 1.0942760942760943, "grad_norm": 10.630107879638672, "learning_rate": 7.780073090133045e-07, "loss": 0.6856255531311035, "step": 1300 }, { "epoch": 1.095959595959596, "grad_norm": 8.586835861206055, "learning_rate": 7.772867919135278e-07, "loss": 0.7367527484893799, "step": 1302 }, { "epoch": 1.0976430976430978, "grad_norm": 26.07152557373047, "learning_rate": 7.765654917151201e-07, "loss": 0.6313869953155518, "step": 1304 }, { "epoch": 1.0993265993265993, "grad_norm": 26.481813430786133, "learning_rate": 7.758434109032642e-07, "loss": 0.6839025020599365, "step": 1306 }, { "epoch": 1.101010101010101, "grad_norm": 11.492305755615234, "learning_rate": 7.751205519658321e-07, "loss": 0.5959317684173584, "step": 1308 }, { "epoch": 1.1026936026936027, "grad_norm": 5.645211219787598, "learning_rate": 7.743969173933771e-07, "loss": 0.5784125924110413, "step": 1310 }, { "epoch": 1.1043771043771045, "grad_norm": 4.408408164978027, "learning_rate": 7.736725096791249e-07, "loss": 1.2098188400268555, "step": 1312 }, { "epoch": 1.106060606060606, "grad_norm": 9.238399505615234, "learning_rate": 7.729473313189647e-07, "loss": 0.9550820589065552, "step": 1314 }, { "epoch": 1.1077441077441077, "grad_norm": 15.260536193847656, "learning_rate": 7.722213848114411e-07, "loss": 0.9281185865402222, "step": 1316 }, { "epoch": 1.1094276094276094, "grad_norm": 11.378418922424316, "learning_rate": 7.714946726577453e-07, "loss": 0.9321832656860352, "step": 1318 }, { "epoch": 1.1111111111111112, "grad_norm": 27.803199768066406, "learning_rate": 7.707671973617066e-07, "loss": 0.7850360870361328, "step": 1320 }, { "epoch": 1.112794612794613, "grad_norm": 11.86633586883545, "learning_rate": 7.700389614297832e-07, "loss": 0.8705657124519348, "step": 1322 }, { "epoch": 1.1144781144781144, "grad_norm": 13.372186660766602, "learning_rate": 7.693099673710545e-07, "loss": 0.5348168015480042, "step": 1324 }, { "epoch": 1.1161616161616161, "grad_norm": 7.737417697906494, "learning_rate": 7.685802176972117e-07, "loss": 0.8875303268432617, "step": 1326 }, { "epoch": 1.1178451178451179, "grad_norm": 4.609512805938721, "learning_rate": 7.678497149225494e-07, "loss": 0.7146286964416504, "step": 1328 }, { "epoch": 1.1195286195286196, "grad_norm": 3.953033447265625, "learning_rate": 7.671184615639573e-07, "loss": 1.0624680519104004, "step": 1330 }, { "epoch": 1.121212121212121, "grad_norm": 15.329386711120605, "learning_rate": 7.663864601409106e-07, "loss": 0.7291280031204224, "step": 1332 }, { "epoch": 1.1228956228956228, "grad_norm": 5.592386722564697, "learning_rate": 7.656537131754621e-07, "loss": 1.146779179573059, "step": 1334 }, { "epoch": 1.1245791245791246, "grad_norm": 19.50740623474121, "learning_rate": 7.649202231922338e-07, "loss": 0.6419116258621216, "step": 1336 }, { "epoch": 1.1262626262626263, "grad_norm": 3.845174789428711, "learning_rate": 7.641859927184071e-07, "loss": 0.7372583150863647, "step": 1338 }, { "epoch": 1.127946127946128, "grad_norm": 8.609213829040527, "learning_rate": 7.634510242837149e-07, "loss": 0.603482723236084, "step": 1340 }, { "epoch": 1.1296296296296295, "grad_norm": 7.67048454284668, "learning_rate": 7.627153204204329e-07, "loss": 0.9267317056655884, "step": 1342 }, { "epoch": 1.1313131313131313, "grad_norm": 3.1689493656158447, "learning_rate": 7.619788836633701e-07, "loss": 1.1948891878128052, "step": 1344 }, { "epoch": 1.132996632996633, "grad_norm": 44.90256118774414, "learning_rate": 7.612417165498611e-07, "loss": 1.0813300609588623, "step": 1346 }, { "epoch": 1.1346801346801347, "grad_norm": 30.334089279174805, "learning_rate": 7.605038216197569e-07, "loss": 0.7344606518745422, "step": 1348 }, { "epoch": 1.1363636363636362, "grad_norm": 7.781182765960693, "learning_rate": 7.597652014154162e-07, "loss": 0.5709810256958008, "step": 1350 }, { "epoch": 1.138047138047138, "grad_norm": 17.377174377441406, "learning_rate": 7.590258584816957e-07, "loss": 0.32737797498703003, "step": 1352 }, { "epoch": 1.1397306397306397, "grad_norm": 3.968998908996582, "learning_rate": 7.582857953659437e-07, "loss": 1.0901448726654053, "step": 1354 }, { "epoch": 1.1414141414141414, "grad_norm": 4.9800801277160645, "learning_rate": 7.575450146179887e-07, "loss": 1.098610281944275, "step": 1356 }, { "epoch": 1.1430976430976432, "grad_norm": 11.949906349182129, "learning_rate": 7.56803518790132e-07, "loss": 0.8105623722076416, "step": 1358 }, { "epoch": 1.144781144781145, "grad_norm": 3.4032137393951416, "learning_rate": 7.560613104371386e-07, "loss": 0.7330828905105591, "step": 1360 }, { "epoch": 1.1464646464646464, "grad_norm": 2.8660380840301514, "learning_rate": 7.553183921162289e-07, "loss": 0.9020315408706665, "step": 1362 }, { "epoch": 1.1481481481481481, "grad_norm": 12.72059154510498, "learning_rate": 7.545747663870687e-07, "loss": 0.9371917843818665, "step": 1364 }, { "epoch": 1.1498316498316499, "grad_norm": 23.1413631439209, "learning_rate": 7.53830435811762e-07, "loss": 0.7397361993789673, "step": 1366 }, { "epoch": 1.1515151515151516, "grad_norm": 13.042642593383789, "learning_rate": 7.530854029548404e-07, "loss": 0.8247054815292358, "step": 1368 }, { "epoch": 1.1531986531986531, "grad_norm": 4.0835795402526855, "learning_rate": 7.523396703832557e-07, "loss": 1.090425968170166, "step": 1370 }, { "epoch": 1.1548821548821548, "grad_norm": 3.6361794471740723, "learning_rate": 7.515932406663705e-07, "loss": 1.0872161388397217, "step": 1372 }, { "epoch": 1.1565656565656566, "grad_norm": 13.066899299621582, "learning_rate": 7.508461163759493e-07, "loss": 0.49930015206336975, "step": 1374 }, { "epoch": 1.1582491582491583, "grad_norm": 5.910285472869873, "learning_rate": 7.500983000861493e-07, "loss": 0.46187859773635864, "step": 1376 }, { "epoch": 1.15993265993266, "grad_norm": 12.718847274780273, "learning_rate": 7.493497943735124e-07, "loss": 0.9587620496749878, "step": 1378 }, { "epoch": 1.1616161616161615, "grad_norm": 2.7174603939056396, "learning_rate": 7.48600601816956e-07, "loss": 0.7705467939376831, "step": 1380 }, { "epoch": 1.1632996632996633, "grad_norm": 10.425454139709473, "learning_rate": 7.478507249977632e-07, "loss": 0.5908098220825195, "step": 1382 }, { "epoch": 1.164983164983165, "grad_norm": 4.83370304107666, "learning_rate": 7.471001664995757e-07, "loss": 0.4560571312904358, "step": 1384 }, { "epoch": 1.1666666666666667, "grad_norm": 16.3512020111084, "learning_rate": 7.46348928908383e-07, "loss": 0.6046204566955566, "step": 1386 }, { "epoch": 1.1683501683501682, "grad_norm": 3.3071091175079346, "learning_rate": 7.455970148125145e-07, "loss": 0.6498188972473145, "step": 1388 }, { "epoch": 1.17003367003367, "grad_norm": 3.1778576374053955, "learning_rate": 7.44844426802631e-07, "loss": 0.9177660942077637, "step": 1390 }, { "epoch": 1.1717171717171717, "grad_norm": 6.8912577629089355, "learning_rate": 7.440911674717148e-07, "loss": 0.9661788940429688, "step": 1392 }, { "epoch": 1.1734006734006734, "grad_norm": 2.982248306274414, "learning_rate": 7.433372394150613e-07, "loss": 0.7623599171638489, "step": 1394 }, { "epoch": 1.1750841750841752, "grad_norm": 6.73823356628418, "learning_rate": 7.425826452302695e-07, "loss": 0.6162515878677368, "step": 1396 }, { "epoch": 1.1767676767676767, "grad_norm": 7.467746734619141, "learning_rate": 7.418273875172344e-07, "loss": 0.7228857278823853, "step": 1398 }, { "epoch": 1.1784511784511784, "grad_norm": 10.521594047546387, "learning_rate": 7.410714688781362e-07, "loss": 0.547920823097229, "step": 1400 }, { "epoch": 1.1801346801346801, "grad_norm": 4.692141056060791, "learning_rate": 7.403148919174327e-07, "loss": 1.011480450630188, "step": 1402 }, { "epoch": 1.1818181818181819, "grad_norm": 6.844545841217041, "learning_rate": 7.3955765924185e-07, "loss": 0.7596945762634277, "step": 1404 }, { "epoch": 1.1835016835016834, "grad_norm": 8.648809432983398, "learning_rate": 7.387997734603734e-07, "loss": 0.771956205368042, "step": 1406 }, { "epoch": 1.1851851851851851, "grad_norm": 15.440680503845215, "learning_rate": 7.38041237184238e-07, "loss": 1.2356925010681152, "step": 1408 }, { "epoch": 1.1868686868686869, "grad_norm": 5.456315040588379, "learning_rate": 7.372820530269203e-07, "loss": 0.727834165096283, "step": 1410 }, { "epoch": 1.1885521885521886, "grad_norm": 33.579254150390625, "learning_rate": 7.365222236041298e-07, "loss": 0.780275821685791, "step": 1412 }, { "epoch": 1.1902356902356903, "grad_norm": 14.142115592956543, "learning_rate": 7.35761751533798e-07, "loss": 0.9008167386054993, "step": 1414 }, { "epoch": 1.1919191919191918, "grad_norm": 9.76620864868164, "learning_rate": 7.350006394360716e-07, "loss": 0.7642953991889954, "step": 1416 }, { "epoch": 1.1936026936026936, "grad_norm": 17.838695526123047, "learning_rate": 7.342388899333014e-07, "loss": 1.0995585918426514, "step": 1418 }, { "epoch": 1.1952861952861953, "grad_norm": 65.10449981689453, "learning_rate": 7.334765056500356e-07, "loss": 0.947974443435669, "step": 1420 }, { "epoch": 1.196969696969697, "grad_norm": 4.257263660430908, "learning_rate": 7.327134892130085e-07, "loss": 0.7925307750701904, "step": 1422 }, { "epoch": 1.1986531986531987, "grad_norm": 3.5786726474761963, "learning_rate": 7.319498432511329e-07, "loss": 0.6507192850112915, "step": 1424 }, { "epoch": 1.2003367003367003, "grad_norm": 9.806020736694336, "learning_rate": 7.311855703954901e-07, "loss": 0.9374374747276306, "step": 1426 }, { "epoch": 1.202020202020202, "grad_norm": 16.49274253845215, "learning_rate": 7.304206732793222e-07, "loss": 0.5745439529418945, "step": 1428 }, { "epoch": 1.2037037037037037, "grad_norm": 10.744287490844727, "learning_rate": 7.296551545380213e-07, "loss": 0.9440407752990723, "step": 1430 }, { "epoch": 1.2053872053872055, "grad_norm": 4.190220832824707, "learning_rate": 7.288890168091214e-07, "loss": 0.7019326686859131, "step": 1432 }, { "epoch": 1.2070707070707072, "grad_norm": 4.626961708068848, "learning_rate": 7.281222627322897e-07, "loss": 1.2138803005218506, "step": 1434 }, { "epoch": 1.2087542087542087, "grad_norm": 29.172809600830078, "learning_rate": 7.273548949493166e-07, "loss": 0.6954021453857422, "step": 1436 }, { "epoch": 1.2104377104377104, "grad_norm": 6.540690898895264, "learning_rate": 7.265869161041065e-07, "loss": 0.5005062818527222, "step": 1438 }, { "epoch": 1.2121212121212122, "grad_norm": 1.7837268114089966, "learning_rate": 7.258183288426703e-07, "loss": 0.4664597511291504, "step": 1440 }, { "epoch": 1.2138047138047139, "grad_norm": 6.852010250091553, "learning_rate": 7.25049135813114e-07, "loss": 0.7454104423522949, "step": 1442 }, { "epoch": 1.2154882154882154, "grad_norm": 3.7926137447357178, "learning_rate": 7.242793396656315e-07, "loss": 0.9171748757362366, "step": 1444 }, { "epoch": 1.2171717171717171, "grad_norm": 4.602051734924316, "learning_rate": 7.235089430524943e-07, "loss": 0.9297394156455994, "step": 1446 }, { "epoch": 1.2188552188552189, "grad_norm": 8.485408782958984, "learning_rate": 7.227379486280432e-07, "loss": 0.6902468800544739, "step": 1448 }, { "epoch": 1.2205387205387206, "grad_norm": 3.0322980880737305, "learning_rate": 7.219663590486778e-07, "loss": 0.9321104288101196, "step": 1450 }, { "epoch": 1.2222222222222223, "grad_norm": 21.71652603149414, "learning_rate": 7.211941769728493e-07, "loss": 0.9111616611480713, "step": 1452 }, { "epoch": 1.2239057239057238, "grad_norm": 5.9835405349731445, "learning_rate": 7.204214050610498e-07, "loss": 0.6736348867416382, "step": 1454 }, { "epoch": 1.2255892255892256, "grad_norm": 5.00324010848999, "learning_rate": 7.196480459758035e-07, "loss": 0.8823907375335693, "step": 1456 }, { "epoch": 1.2272727272727273, "grad_norm": 9.083556175231934, "learning_rate": 7.188741023816581e-07, "loss": 0.8732795715332031, "step": 1458 }, { "epoch": 1.228956228956229, "grad_norm": 6.7020440101623535, "learning_rate": 7.180995769451747e-07, "loss": 0.9818441867828369, "step": 1460 }, { "epoch": 1.2306397306397305, "grad_norm": 13.26759147644043, "learning_rate": 7.173244723349194e-07, "loss": 0.7110154628753662, "step": 1462 }, { "epoch": 1.2323232323232323, "grad_norm": 3.6703689098358154, "learning_rate": 7.165487912214538e-07, "loss": 0.6870818138122559, "step": 1464 }, { "epoch": 1.234006734006734, "grad_norm": 4.100058078765869, "learning_rate": 7.157725362773258e-07, "loss": 0.8629697561264038, "step": 1466 }, { "epoch": 1.2356902356902357, "grad_norm": 7.570556640625, "learning_rate": 7.1499571017706e-07, "loss": 0.9524326324462891, "step": 1468 }, { "epoch": 1.2373737373737375, "grad_norm": 3.626100778579712, "learning_rate": 7.142183155971493e-07, "loss": 1.1208899021148682, "step": 1470 }, { "epoch": 1.239057239057239, "grad_norm": 6.774829387664795, "learning_rate": 7.13440355216045e-07, "loss": 0.6910721063613892, "step": 1472 }, { "epoch": 1.2407407407407407, "grad_norm": 12.941313743591309, "learning_rate": 7.126618317141482e-07, "loss": 0.6839091777801514, "step": 1474 }, { "epoch": 1.2424242424242424, "grad_norm": 13.288043022155762, "learning_rate": 7.118827477737999e-07, "loss": 0.4849187135696411, "step": 1476 }, { "epoch": 1.2441077441077442, "grad_norm": 14.330803871154785, "learning_rate": 7.111031060792719e-07, "loss": 0.7669592499732971, "step": 1478 }, { "epoch": 1.2457912457912457, "grad_norm": 3.7719264030456543, "learning_rate": 7.103229093167579e-07, "loss": 0.7678747773170471, "step": 1480 }, { "epoch": 1.2474747474747474, "grad_norm": 5.733471393585205, "learning_rate": 7.095421601743643e-07, "loss": 0.7603921890258789, "step": 1482 }, { "epoch": 1.2491582491582491, "grad_norm": 3.3023183345794678, "learning_rate": 7.087608613421e-07, "loss": 0.475089430809021, "step": 1484 }, { "epoch": 1.2508417508417509, "grad_norm": 6.135479927062988, "learning_rate": 7.079790155118684e-07, "loss": 0.6280136108398438, "step": 1486 }, { "epoch": 1.2525252525252526, "grad_norm": 14.41522216796875, "learning_rate": 7.071966253774575e-07, "loss": 0.7469892501831055, "step": 1488 }, { "epoch": 1.2542087542087543, "grad_norm": 2.5887715816497803, "learning_rate": 7.064136936345304e-07, "loss": 0.7018432021141052, "step": 1490 }, { "epoch": 1.2558922558922558, "grad_norm": 3.5334408283233643, "learning_rate": 7.056302229806163e-07, "loss": 0.825816810131073, "step": 1492 }, { "epoch": 1.2575757575757576, "grad_norm": 3.581906795501709, "learning_rate": 7.048462161151012e-07, "loss": 0.8269777297973633, "step": 1494 }, { "epoch": 1.2592592592592593, "grad_norm": 8.52226734161377, "learning_rate": 7.040616757392188e-07, "loss": 0.7199699282646179, "step": 1496 }, { "epoch": 1.2609427609427608, "grad_norm": 2.9323740005493164, "learning_rate": 7.032766045560408e-07, "loss": 0.9787487387657166, "step": 1498 }, { "epoch": 1.2626262626262625, "grad_norm": 6.500389099121094, "learning_rate": 7.024910052704677e-07, "loss": 1.0706979036331177, "step": 1500 }, { "epoch": 1.2643097643097643, "grad_norm": 5.391655445098877, "learning_rate": 7.017048805892194e-07, "loss": 0.5319828987121582, "step": 1502 }, { "epoch": 1.265993265993266, "grad_norm": 5.92175817489624, "learning_rate": 7.009182332208266e-07, "loss": 0.7819663286209106, "step": 1504 }, { "epoch": 1.2676767676767677, "grad_norm": 4.497714996337891, "learning_rate": 7.001310658756201e-07, "loss": 1.1338582038879395, "step": 1506 }, { "epoch": 1.2693602693602695, "grad_norm": 4.954183578491211, "learning_rate": 6.993433812657226e-07, "loss": 1.1781617403030396, "step": 1508 }, { "epoch": 1.271043771043771, "grad_norm": 17.044879913330078, "learning_rate": 6.985551821050395e-07, "loss": 0.5676237344741821, "step": 1510 }, { "epoch": 1.2727272727272727, "grad_norm": 17.09630012512207, "learning_rate": 6.97766471109248e-07, "loss": 0.6173258423805237, "step": 1512 }, { "epoch": 1.2744107744107744, "grad_norm": 5.917657375335693, "learning_rate": 6.969772509957895e-07, "loss": 0.8361184597015381, "step": 1514 }, { "epoch": 1.2760942760942762, "grad_norm": 4.721149921417236, "learning_rate": 6.961875244838596e-07, "loss": 0.8495975732803345, "step": 1516 }, { "epoch": 1.2777777777777777, "grad_norm": 11.31229019165039, "learning_rate": 6.953972942943981e-07, "loss": 0.7243598699569702, "step": 1518 }, { "epoch": 1.2794612794612794, "grad_norm": 3.162838935852051, "learning_rate": 6.946065631500806e-07, "loss": 0.9145760536193848, "step": 1520 }, { "epoch": 1.2811447811447811, "grad_norm": 9.259127616882324, "learning_rate": 6.938153337753088e-07, "loss": 0.6645021438598633, "step": 1522 }, { "epoch": 1.2828282828282829, "grad_norm": 26.91777229309082, "learning_rate": 6.930236088962004e-07, "loss": 0.651879072189331, "step": 1524 }, { "epoch": 1.2845117845117846, "grad_norm": 5.544670104980469, "learning_rate": 6.922313912405811e-07, "loss": 0.8310514688491821, "step": 1526 }, { "epoch": 1.2861952861952861, "grad_norm": 16.370214462280273, "learning_rate": 6.914386835379738e-07, "loss": 0.7569658756256104, "step": 1528 }, { "epoch": 1.2878787878787878, "grad_norm": 8.142780303955078, "learning_rate": 6.906454885195904e-07, "loss": 0.4488654136657715, "step": 1530 }, { "epoch": 1.2895622895622896, "grad_norm": 5.413924217224121, "learning_rate": 6.898518089183211e-07, "loss": 0.8656577467918396, "step": 1532 }, { "epoch": 1.2912457912457913, "grad_norm": 4.607274532318115, "learning_rate": 6.890576474687263e-07, "loss": 1.0356013774871826, "step": 1534 }, { "epoch": 1.2929292929292928, "grad_norm": 6.521271705627441, "learning_rate": 6.882630069070262e-07, "loss": 0.9825664758682251, "step": 1536 }, { "epoch": 1.2946127946127945, "grad_norm": 5.2521071434021, "learning_rate": 6.874678899710923e-07, "loss": 0.6595628261566162, "step": 1538 }, { "epoch": 1.2962962962962963, "grad_norm": 16.32155990600586, "learning_rate": 6.866722994004364e-07, "loss": 0.7686331868171692, "step": 1540 }, { "epoch": 1.297979797979798, "grad_norm": 15.72677230834961, "learning_rate": 6.858762379362032e-07, "loss": 0.8358673453330994, "step": 1542 }, { "epoch": 1.2996632996632997, "grad_norm": 5.651062965393066, "learning_rate": 6.850797083211591e-07, "loss": 0.9706641435623169, "step": 1544 }, { "epoch": 1.3013468013468015, "grad_norm": 10.415674209594727, "learning_rate": 6.842827132996841e-07, "loss": 0.8287351131439209, "step": 1546 }, { "epoch": 1.303030303030303, "grad_norm": 16.539886474609375, "learning_rate": 6.83485255617761e-07, "loss": 0.8370147943496704, "step": 1548 }, { "epoch": 1.3047138047138047, "grad_norm": 6.127871036529541, "learning_rate": 6.826873380229673e-07, "loss": 0.6265941858291626, "step": 1550 }, { "epoch": 1.3063973063973064, "grad_norm": 6.429442882537842, "learning_rate": 6.818889632644649e-07, "loss": 0.9182727336883545, "step": 1552 }, { "epoch": 1.308080808080808, "grad_norm": 4.870426654815674, "learning_rate": 6.810901340929906e-07, "loss": 0.962719202041626, "step": 1554 }, { "epoch": 1.3097643097643097, "grad_norm": 4.017622947692871, "learning_rate": 6.802908532608472e-07, "loss": 1.0228416919708252, "step": 1556 }, { "epoch": 1.3114478114478114, "grad_norm": 6.815629482269287, "learning_rate": 6.794911235218932e-07, "loss": 0.9271608591079712, "step": 1558 }, { "epoch": 1.3131313131313131, "grad_norm": 18.521018981933594, "learning_rate": 6.786909476315342e-07, "loss": 0.473792165517807, "step": 1560 }, { "epoch": 1.3148148148148149, "grad_norm": 7.367074966430664, "learning_rate": 6.778903283467128e-07, "loss": 0.5411000847816467, "step": 1562 }, { "epoch": 1.3164983164983166, "grad_norm": 23.994110107421875, "learning_rate": 6.770892684258995e-07, "loss": 0.5685646533966064, "step": 1564 }, { "epoch": 1.3181818181818181, "grad_norm": 2.764239549636841, "learning_rate": 6.762877706290823e-07, "loss": 1.0790038108825684, "step": 1566 }, { "epoch": 1.3198653198653199, "grad_norm": 13.226496696472168, "learning_rate": 6.754858377177587e-07, "loss": 0.6365941762924194, "step": 1568 }, { "epoch": 1.3215488215488216, "grad_norm": 8.614484786987305, "learning_rate": 6.74683472454925e-07, "loss": 0.9468154907226562, "step": 1570 }, { "epoch": 1.3232323232323233, "grad_norm": 6.47797966003418, "learning_rate": 6.738806776050672e-07, "loss": 0.8475841283798218, "step": 1572 }, { "epoch": 1.3249158249158248, "grad_norm": 5.899251461029053, "learning_rate": 6.730774559341512e-07, "loss": 0.7157614231109619, "step": 1574 }, { "epoch": 1.3265993265993266, "grad_norm": 2.9437053203582764, "learning_rate": 6.722738102096135e-07, "loss": 1.0155985355377197, "step": 1576 }, { "epoch": 1.3282828282828283, "grad_norm": 7.690277576446533, "learning_rate": 6.714697432003519e-07, "loss": 0.8999049663543701, "step": 1578 }, { "epoch": 1.32996632996633, "grad_norm": 8.396078109741211, "learning_rate": 6.706652576767156e-07, "loss": 0.6247600317001343, "step": 1580 }, { "epoch": 1.3316498316498318, "grad_norm": 7.930534362792969, "learning_rate": 6.698603564104958e-07, "loss": 0.6954329013824463, "step": 1582 }, { "epoch": 1.3333333333333333, "grad_norm": 3.4395411014556885, "learning_rate": 6.690550421749157e-07, "loss": 1.0694022178649902, "step": 1584 }, { "epoch": 1.335016835016835, "grad_norm": 4.19919490814209, "learning_rate": 6.682493177446221e-07, "loss": 0.946961522102356, "step": 1586 }, { "epoch": 1.3367003367003367, "grad_norm": 6.434517860412598, "learning_rate": 6.674431858956743e-07, "loss": 0.5836731195449829, "step": 1588 }, { "epoch": 1.3383838383838385, "grad_norm": 4.509551048278809, "learning_rate": 6.666366494055358e-07, "loss": 0.72353595495224, "step": 1590 }, { "epoch": 1.34006734006734, "grad_norm": 17.6490478515625, "learning_rate": 6.658297110530646e-07, "loss": 0.598315954208374, "step": 1592 }, { "epoch": 1.3417508417508417, "grad_norm": 8.337894439697266, "learning_rate": 6.650223736185023e-07, "loss": 1.0166845321655273, "step": 1594 }, { "epoch": 1.3434343434343434, "grad_norm": 27.206972122192383, "learning_rate": 6.642146398834663e-07, "loss": 0.4449620842933655, "step": 1596 }, { "epoch": 1.3451178451178452, "grad_norm": 8.197900772094727, "learning_rate": 6.63406512630939e-07, "loss": 0.9014108180999756, "step": 1598 }, { "epoch": 1.3468013468013469, "grad_norm": 7.576647758483887, "learning_rate": 6.625979946452592e-07, "loss": 0.809765100479126, "step": 1600 }, { "epoch": 1.3484848484848486, "grad_norm": 44.33702087402344, "learning_rate": 6.617890887121111e-07, "loss": 0.7150375843048096, "step": 1602 }, { "epoch": 1.3501683501683501, "grad_norm": 7.649331569671631, "learning_rate": 6.60979797618516e-07, "loss": 0.8225715756416321, "step": 1604 }, { "epoch": 1.3518518518518519, "grad_norm": 4.691690444946289, "learning_rate": 6.601701241528228e-07, "loss": 1.2066047191619873, "step": 1606 }, { "epoch": 1.3535353535353536, "grad_norm": 4.63446569442749, "learning_rate": 6.593600711046969e-07, "loss": 0.924203097820282, "step": 1608 }, { "epoch": 1.355218855218855, "grad_norm": 10.622723579406738, "learning_rate": 6.585496412651116e-07, "loss": 0.5192527770996094, "step": 1610 }, { "epoch": 1.3569023569023568, "grad_norm": 3.938314914703369, "learning_rate": 6.57738837426339e-07, "loss": 1.000133752822876, "step": 1612 }, { "epoch": 1.3585858585858586, "grad_norm": 4.946197986602783, "learning_rate": 6.569276623819396e-07, "loss": 0.6809890270233154, "step": 1614 }, { "epoch": 1.3602693602693603, "grad_norm": 10.934066772460938, "learning_rate": 6.561161189267526e-07, "loss": 0.6985521912574768, "step": 1616 }, { "epoch": 1.361952861952862, "grad_norm": 3.1545114517211914, "learning_rate": 6.553042098568865e-07, "loss": 0.916617214679718, "step": 1618 }, { "epoch": 1.3636363636363638, "grad_norm": 3.2825722694396973, "learning_rate": 6.544919379697099e-07, "loss": 0.729028582572937, "step": 1620 }, { "epoch": 1.3653198653198653, "grad_norm": 3.8294615745544434, "learning_rate": 6.536793060638412e-07, "loss": 1.0753536224365234, "step": 1622 }, { "epoch": 1.367003367003367, "grad_norm": 2.503497838973999, "learning_rate": 6.528663169391391e-07, "loss": 0.9852238893508911, "step": 1624 }, { "epoch": 1.3686868686868687, "grad_norm": 8.0145263671875, "learning_rate": 6.520529733966932e-07, "loss": 0.6827946901321411, "step": 1626 }, { "epoch": 1.3703703703703702, "grad_norm": 3.5943119525909424, "learning_rate": 6.512392782388144e-07, "loss": 0.9226878881454468, "step": 1628 }, { "epoch": 1.372053872053872, "grad_norm": 4.993966102600098, "learning_rate": 6.504252342690247e-07, "loss": 0.9282613396644592, "step": 1630 }, { "epoch": 1.3737373737373737, "grad_norm": 8.258445739746094, "learning_rate": 6.496108442920482e-07, "loss": 1.0419143438339233, "step": 1632 }, { "epoch": 1.3754208754208754, "grad_norm": 11.405352592468262, "learning_rate": 6.48796111113801e-07, "loss": 0.7039163112640381, "step": 1634 }, { "epoch": 1.3771043771043772, "grad_norm": 2.947396755218506, "learning_rate": 6.479810375413819e-07, "loss": 0.39542487263679504, "step": 1636 }, { "epoch": 1.378787878787879, "grad_norm": 8.117940902709961, "learning_rate": 6.471656263830618e-07, "loss": 0.6473898887634277, "step": 1638 }, { "epoch": 1.3804713804713804, "grad_norm": 3.3129611015319824, "learning_rate": 6.463498804482757e-07, "loss": 0.7153133153915405, "step": 1640 }, { "epoch": 1.3821548821548821, "grad_norm": 7.839447498321533, "learning_rate": 6.455338025476116e-07, "loss": 0.9829051494598389, "step": 1642 }, { "epoch": 1.3838383838383839, "grad_norm": 6.2321343421936035, "learning_rate": 6.447173954928011e-07, "loss": 1.191624641418457, "step": 1644 }, { "epoch": 1.3855218855218856, "grad_norm": 8.412511825561523, "learning_rate": 6.439006620967097e-07, "loss": 0.8809744715690613, "step": 1646 }, { "epoch": 1.387205387205387, "grad_norm": 3.1402454376220703, "learning_rate": 6.430836051733282e-07, "loss": 1.0235364437103271, "step": 1648 }, { "epoch": 1.3888888888888888, "grad_norm": 4.362932205200195, "learning_rate": 6.42266227537761e-07, "loss": 0.9193822741508484, "step": 1650 }, { "epoch": 1.3905723905723906, "grad_norm": 2.9106979370117188, "learning_rate": 6.414485320062181e-07, "loss": 1.2303351163864136, "step": 1652 }, { "epoch": 1.3922558922558923, "grad_norm": 3.156247854232788, "learning_rate": 6.406305213960045e-07, "loss": 1.0456502437591553, "step": 1654 }, { "epoch": 1.393939393939394, "grad_norm": 13.951912879943848, "learning_rate": 6.398121985255116e-07, "loss": 0.6429623365402222, "step": 1656 }, { "epoch": 1.3956228956228955, "grad_norm": 2.782015323638916, "learning_rate": 6.389935662142053e-07, "loss": 0.6639566421508789, "step": 1658 }, { "epoch": 1.3973063973063973, "grad_norm": 9.586030960083008, "learning_rate": 6.381746272826186e-07, "loss": 0.9411950707435608, "step": 1660 }, { "epoch": 1.398989898989899, "grad_norm": 25.26241111755371, "learning_rate": 6.373553845523407e-07, "loss": 0.8540170192718506, "step": 1662 }, { "epoch": 1.4006734006734007, "grad_norm": 40.64924240112305, "learning_rate": 6.365358408460076e-07, "loss": 0.7800917625427246, "step": 1664 }, { "epoch": 1.4023569023569022, "grad_norm": 5.472312927246094, "learning_rate": 6.35715998987292e-07, "loss": 0.5686221718788147, "step": 1666 }, { "epoch": 1.404040404040404, "grad_norm": 15.37363338470459, "learning_rate": 6.348958618008943e-07, "loss": 0.8799217939376831, "step": 1668 }, { "epoch": 1.4057239057239057, "grad_norm": 2.726579189300537, "learning_rate": 6.340754321125318e-07, "loss": 0.8866001963615417, "step": 1670 }, { "epoch": 1.4074074074074074, "grad_norm": 2.6039483547210693, "learning_rate": 6.332547127489305e-07, "loss": 0.8179314136505127, "step": 1672 }, { "epoch": 1.4090909090909092, "grad_norm": 3.4876205921173096, "learning_rate": 6.324337065378136e-07, "loss": 1.2043547630310059, "step": 1674 }, { "epoch": 1.410774410774411, "grad_norm": 13.763050079345703, "learning_rate": 6.316124163078927e-07, "loss": 0.488219678401947, "step": 1676 }, { "epoch": 1.4124579124579124, "grad_norm": 8.983017921447754, "learning_rate": 6.307908448888588e-07, "loss": 1.0192590951919556, "step": 1678 }, { "epoch": 1.4141414141414141, "grad_norm": 25.50023651123047, "learning_rate": 6.299689951113709e-07, "loss": 1.12066650390625, "step": 1680 }, { "epoch": 1.4158249158249159, "grad_norm": 2.510024309158325, "learning_rate": 6.29146869807047e-07, "loss": 0.6489291191101074, "step": 1682 }, { "epoch": 1.4175084175084174, "grad_norm": 20.36254119873047, "learning_rate": 6.283244718084551e-07, "loss": 0.5022568702697754, "step": 1684 }, { "epoch": 1.4191919191919191, "grad_norm": 25.578750610351562, "learning_rate": 6.27501803949102e-07, "loss": 0.6631441712379456, "step": 1686 }, { "epoch": 1.4208754208754208, "grad_norm": 3.3692312240600586, "learning_rate": 6.266788690634247e-07, "loss": 1.16062593460083, "step": 1688 }, { "epoch": 1.4225589225589226, "grad_norm": 34.72169876098633, "learning_rate": 6.258556699867804e-07, "loss": 0.5728762149810791, "step": 1690 }, { "epoch": 1.4242424242424243, "grad_norm": 6.119333744049072, "learning_rate": 6.25032209555436e-07, "loss": 0.6605233550071716, "step": 1692 }, { "epoch": 1.425925925925926, "grad_norm": 5.281041622161865, "learning_rate": 6.242084906065592e-07, "loss": 0.6033918261528015, "step": 1694 }, { "epoch": 1.4276094276094276, "grad_norm": 7.152311325073242, "learning_rate": 6.233845159782085e-07, "loss": 1.2653751373291016, "step": 1696 }, { "epoch": 1.4292929292929293, "grad_norm": 5.30517053604126, "learning_rate": 6.22560288509323e-07, "loss": 1.0920519828796387, "step": 1698 }, { "epoch": 1.430976430976431, "grad_norm": 2.4701943397521973, "learning_rate": 6.217358110397133e-07, "loss": 0.8582168817520142, "step": 1700 }, { "epoch": 1.4326599326599325, "grad_norm": 3.8614046573638916, "learning_rate": 6.209110864100511e-07, "loss": 0.8965442776679993, "step": 1702 }, { "epoch": 1.4343434343434343, "grad_norm": 6.42789888381958, "learning_rate": 6.200861174618599e-07, "loss": 0.570695698261261, "step": 1704 }, { "epoch": 1.436026936026936, "grad_norm": 35.690406799316406, "learning_rate": 6.192609070375045e-07, "loss": 0.4622350335121155, "step": 1706 }, { "epoch": 1.4377104377104377, "grad_norm": 12.375661849975586, "learning_rate": 6.184354579801825e-07, "loss": 1.0623770952224731, "step": 1708 }, { "epoch": 1.4393939393939394, "grad_norm": 80.91400146484375, "learning_rate": 6.176097731339128e-07, "loss": 1.1389422416687012, "step": 1710 }, { "epoch": 1.4410774410774412, "grad_norm": 10.364182472229004, "learning_rate": 6.167838553435273e-07, "loss": 1.0922863483428955, "step": 1712 }, { "epoch": 1.4427609427609427, "grad_norm": 2.8876967430114746, "learning_rate": 6.159577074546601e-07, "loss": 1.0083891153335571, "step": 1714 }, { "epoch": 1.4444444444444444, "grad_norm": 26.479930877685547, "learning_rate": 6.151313323137387e-07, "loss": 0.958626925945282, "step": 1716 }, { "epoch": 1.4461279461279462, "grad_norm": 2.9834275245666504, "learning_rate": 6.14304732767973e-07, "loss": 0.8797729015350342, "step": 1718 }, { "epoch": 1.4478114478114479, "grad_norm": 7.285353660583496, "learning_rate": 6.134779116653459e-07, "loss": 0.7979905605316162, "step": 1720 }, { "epoch": 1.4494949494949494, "grad_norm": 13.242645263671875, "learning_rate": 6.126508718546044e-07, "loss": 0.6679774522781372, "step": 1722 }, { "epoch": 1.4511784511784511, "grad_norm": 5.430975437164307, "learning_rate": 6.118236161852486e-07, "loss": 0.7967842221260071, "step": 1724 }, { "epoch": 1.4528619528619529, "grad_norm": 15.7615385055542, "learning_rate": 6.10996147507522e-07, "loss": 0.9348810911178589, "step": 1726 }, { "epoch": 1.4545454545454546, "grad_norm": 14.294349670410156, "learning_rate": 6.101684686724027e-07, "loss": 0.7149630188941956, "step": 1728 }, { "epoch": 1.4562289562289563, "grad_norm": 4.464022636413574, "learning_rate": 6.093405825315923e-07, "loss": 1.0214498043060303, "step": 1730 }, { "epoch": 1.457912457912458, "grad_norm": 3.1939473152160645, "learning_rate": 6.08512491937507e-07, "loss": 1.2640581130981445, "step": 1732 }, { "epoch": 1.4595959595959596, "grad_norm": 4.230099678039551, "learning_rate": 6.076841997432677e-07, "loss": 0.9663617014884949, "step": 1734 }, { "epoch": 1.4612794612794613, "grad_norm": 11.766712188720703, "learning_rate": 6.06855708802689e-07, "loss": 0.7833054065704346, "step": 1736 }, { "epoch": 1.462962962962963, "grad_norm": 6.552529811859131, "learning_rate": 6.060270219702709e-07, "loss": 0.6994054317474365, "step": 1738 }, { "epoch": 1.4646464646464645, "grad_norm": 2.931861400604248, "learning_rate": 6.051981421011882e-07, "loss": 1.1358039379119873, "step": 1740 }, { "epoch": 1.4663299663299663, "grad_norm": 9.284839630126953, "learning_rate": 6.043690720512812e-07, "loss": 0.7364188432693481, "step": 1742 }, { "epoch": 1.468013468013468, "grad_norm": 5.37172794342041, "learning_rate": 6.035398146770444e-07, "loss": 0.5165277123451233, "step": 1744 }, { "epoch": 1.4696969696969697, "grad_norm": 5.121616363525391, "learning_rate": 6.027103728356189e-07, "loss": 1.0125455856323242, "step": 1746 }, { "epoch": 1.4713804713804715, "grad_norm": 2.773219347000122, "learning_rate": 6.018807493847804e-07, "loss": 1.035334825515747, "step": 1748 }, { "epoch": 1.4730639730639732, "grad_norm": 7.262451171875, "learning_rate": 6.010509471829312e-07, "loss": 0.7966405153274536, "step": 1750 }, { "epoch": 1.4747474747474747, "grad_norm": 7.338104248046875, "learning_rate": 6.002209690890889e-07, "loss": 0.7077836990356445, "step": 1752 }, { "epoch": 1.4764309764309764, "grad_norm": 7.950678825378418, "learning_rate": 5.993908179628772e-07, "loss": 0.7144612073898315, "step": 1754 }, { "epoch": 1.4781144781144782, "grad_norm": 9.630928039550781, "learning_rate": 5.985604966645159e-07, "loss": 0.8856356143951416, "step": 1756 }, { "epoch": 1.4797979797979797, "grad_norm": 15.059102058410645, "learning_rate": 5.977300080548113e-07, "loss": 0.7022537589073181, "step": 1758 }, { "epoch": 1.4814814814814814, "grad_norm": 8.7070894241333, "learning_rate": 5.968993549951463e-07, "loss": 0.764058530330658, "step": 1760 }, { "epoch": 1.4831649831649831, "grad_norm": 8.469696998596191, "learning_rate": 5.9606854034747e-07, "loss": 0.9842470288276672, "step": 1762 }, { "epoch": 1.4848484848484849, "grad_norm": 3.3772764205932617, "learning_rate": 5.952375669742885e-07, "loss": 0.9660754799842834, "step": 1764 }, { "epoch": 1.4865319865319866, "grad_norm": 15.527210235595703, "learning_rate": 5.944064377386546e-07, "loss": 0.7293991446495056, "step": 1766 }, { "epoch": 1.4882154882154883, "grad_norm": 7.509492874145508, "learning_rate": 5.935751555041584e-07, "loss": 0.8063384294509888, "step": 1768 }, { "epoch": 1.4898989898989898, "grad_norm": 4.355234622955322, "learning_rate": 5.927437231349168e-07, "loss": 1.001720666885376, "step": 1770 }, { "epoch": 1.4915824915824916, "grad_norm": 12.318822860717773, "learning_rate": 5.919121434955643e-07, "loss": 0.4859294295310974, "step": 1772 }, { "epoch": 1.4932659932659933, "grad_norm": 2.495269536972046, "learning_rate": 5.910804194512425e-07, "loss": 0.8450926542282104, "step": 1774 }, { "epoch": 1.494949494949495, "grad_norm": 11.203375816345215, "learning_rate": 5.902485538675909e-07, "loss": 0.8008178472518921, "step": 1776 }, { "epoch": 1.4966329966329965, "grad_norm": 7.061748504638672, "learning_rate": 5.894165496107362e-07, "loss": 0.9183659553527832, "step": 1778 }, { "epoch": 1.4983164983164983, "grad_norm": 10.182241439819336, "learning_rate": 5.885844095472832e-07, "loss": 0.9454483985900879, "step": 1780 }, { "epoch": 1.5, "grad_norm": 10.898093223571777, "learning_rate": 5.877521365443047e-07, "loss": 0.612937331199646, "step": 1782 }, { "epoch": 1.5016835016835017, "grad_norm": 4.307864665985107, "learning_rate": 5.869197334693311e-07, "loss": 1.2052326202392578, "step": 1784 }, { "epoch": 1.5033670033670035, "grad_norm": 5.633955478668213, "learning_rate": 5.860872031903415e-07, "loss": 0.8493650555610657, "step": 1786 }, { "epoch": 1.5050505050505052, "grad_norm": 4.648436069488525, "learning_rate": 5.85254548575753e-07, "loss": 1.030457615852356, "step": 1788 }, { "epoch": 1.5067340067340067, "grad_norm": 19.26193618774414, "learning_rate": 5.84421772494411e-07, "loss": 0.6253769397735596, "step": 1790 }, { "epoch": 1.5084175084175084, "grad_norm": 18.525527954101562, "learning_rate": 5.835888778155793e-07, "loss": 0.6486117839813232, "step": 1792 }, { "epoch": 1.51010101010101, "grad_norm": 11.307801246643066, "learning_rate": 5.827558674089309e-07, "loss": 0.9593780636787415, "step": 1794 }, { "epoch": 1.5117845117845117, "grad_norm": 8.364538192749023, "learning_rate": 5.81922744144537e-07, "loss": 0.9520887136459351, "step": 1796 }, { "epoch": 1.5134680134680134, "grad_norm": 20.700618743896484, "learning_rate": 5.810895108928576e-07, "loss": 1.0315901041030884, "step": 1798 }, { "epoch": 1.5151515151515151, "grad_norm": 5.827000617980957, "learning_rate": 5.802561705247322e-07, "loss": 0.8540360331535339, "step": 1800 }, { "epoch": 1.5168350168350169, "grad_norm": 5.0441365242004395, "learning_rate": 5.794227259113688e-07, "loss": 1.0596797466278076, "step": 1802 }, { "epoch": 1.5185185185185186, "grad_norm": 15.39765453338623, "learning_rate": 5.785891799243345e-07, "loss": 0.9995817542076111, "step": 1804 }, { "epoch": 1.5202020202020203, "grad_norm": 3.5208356380462646, "learning_rate": 5.777555354355465e-07, "loss": 0.8799208402633667, "step": 1806 }, { "epoch": 1.5218855218855218, "grad_norm": 19.627885818481445, "learning_rate": 5.769217953172606e-07, "loss": 0.7398556470870972, "step": 1808 }, { "epoch": 1.5235690235690236, "grad_norm": 6.568966865539551, "learning_rate": 5.760879624420619e-07, "loss": 0.7647089958190918, "step": 1810 }, { "epoch": 1.5252525252525253, "grad_norm": 1.675675868988037, "learning_rate": 5.752540396828562e-07, "loss": 0.31169167160987854, "step": 1812 }, { "epoch": 1.5269360269360268, "grad_norm": 2.579169273376465, "learning_rate": 5.744200299128579e-07, "loss": 1.1429425477981567, "step": 1814 }, { "epoch": 1.5286195286195285, "grad_norm": 23.35523796081543, "learning_rate": 5.735859360055814e-07, "loss": 0.8635933995246887, "step": 1816 }, { "epoch": 1.5303030303030303, "grad_norm": 21.272926330566406, "learning_rate": 5.727517608348317e-07, "loss": 0.947623610496521, "step": 1818 }, { "epoch": 1.531986531986532, "grad_norm": 5.091054916381836, "learning_rate": 5.719175072746926e-07, "loss": 0.8388112187385559, "step": 1820 }, { "epoch": 1.5336700336700337, "grad_norm": 5.891815185546875, "learning_rate": 5.710831781995191e-07, "loss": 0.7908442616462708, "step": 1822 }, { "epoch": 1.5353535353535355, "grad_norm": 5.613356113433838, "learning_rate": 5.702487764839258e-07, "loss": 1.0302139520645142, "step": 1824 }, { "epoch": 1.5370370370370372, "grad_norm": 5.067226886749268, "learning_rate": 5.694143050027778e-07, "loss": 0.9267786145210266, "step": 1826 }, { "epoch": 1.5387205387205387, "grad_norm": 5.48887300491333, "learning_rate": 5.685797666311801e-07, "loss": 0.9696795344352722, "step": 1828 }, { "epoch": 1.5404040404040404, "grad_norm": 6.487977981567383, "learning_rate": 5.677451642444689e-07, "loss": 0.7679098844528198, "step": 1830 }, { "epoch": 1.542087542087542, "grad_norm": 8.134166717529297, "learning_rate": 5.669105007182005e-07, "loss": 0.7442073822021484, "step": 1832 }, { "epoch": 1.5437710437710437, "grad_norm": 4.254693984985352, "learning_rate": 5.660757789281417e-07, "loss": 1.0978777408599854, "step": 1834 }, { "epoch": 1.5454545454545454, "grad_norm": 11.371539115905762, "learning_rate": 5.652410017502606e-07, "loss": 0.9501652717590332, "step": 1836 }, { "epoch": 1.5471380471380471, "grad_norm": 16.133960723876953, "learning_rate": 5.644061720607157e-07, "loss": 0.536079466342926, "step": 1838 }, { "epoch": 1.5488215488215489, "grad_norm": 3.337813377380371, "learning_rate": 5.635712927358466e-07, "loss": 0.7914686799049377, "step": 1840 }, { "epoch": 1.5505050505050506, "grad_norm": 3.199794292449951, "learning_rate": 5.627363666521635e-07, "loss": 0.6903548240661621, "step": 1842 }, { "epoch": 1.5521885521885523, "grad_norm": 6.261875152587891, "learning_rate": 5.619013966863388e-07, "loss": 0.5220504403114319, "step": 1844 }, { "epoch": 1.5538720538720538, "grad_norm": 3.182934284210205, "learning_rate": 5.610663857151945e-07, "loss": 0.9434134364128113, "step": 1846 }, { "epoch": 1.5555555555555556, "grad_norm": 10.680120468139648, "learning_rate": 5.602313366156953e-07, "loss": 1.0320630073547363, "step": 1848 }, { "epoch": 1.557239057239057, "grad_norm": 3.8439457416534424, "learning_rate": 5.593962522649366e-07, "loss": 0.837065577507019, "step": 1850 }, { "epoch": 1.5589225589225588, "grad_norm": 3.5652201175689697, "learning_rate": 5.585611355401352e-07, "loss": 0.9864023923873901, "step": 1852 }, { "epoch": 1.5606060606060606, "grad_norm": 20.397172927856445, "learning_rate": 5.577259893186196e-07, "loss": 0.6269755363464355, "step": 1854 }, { "epoch": 1.5622895622895623, "grad_norm": 5.430056571960449, "learning_rate": 5.568908164778201e-07, "loss": 0.6682024598121643, "step": 1856 }, { "epoch": 1.563973063973064, "grad_norm": 3.305800199508667, "learning_rate": 5.560556198952585e-07, "loss": 1.017985224723816, "step": 1858 }, { "epoch": 1.5656565656565657, "grad_norm": 10.345197677612305, "learning_rate": 5.552204024485382e-07, "loss": 0.46250391006469727, "step": 1860 }, { "epoch": 1.5673400673400675, "grad_norm": 3.9617226123809814, "learning_rate": 5.543851670153353e-07, "loss": 1.0285084247589111, "step": 1862 }, { "epoch": 1.569023569023569, "grad_norm": 4.295073509216309, "learning_rate": 5.535499164733869e-07, "loss": 0.44839808344841003, "step": 1864 }, { "epoch": 1.5707070707070707, "grad_norm": 9.806756973266602, "learning_rate": 5.527146537004823e-07, "loss": 1.037379503250122, "step": 1866 }, { "epoch": 1.5723905723905722, "grad_norm": 7.301255702972412, "learning_rate": 5.518793815744538e-07, "loss": 0.6518345475196838, "step": 1868 }, { "epoch": 1.574074074074074, "grad_norm": 2.5327539443969727, "learning_rate": 5.510441029731648e-07, "loss": 0.8190163969993591, "step": 1870 }, { "epoch": 1.5757575757575757, "grad_norm": 5.190461158752441, "learning_rate": 5.502088207745018e-07, "loss": 0.8958265781402588, "step": 1872 }, { "epoch": 1.5774410774410774, "grad_norm": 4.127246379852295, "learning_rate": 5.493735378563634e-07, "loss": 1.0178121328353882, "step": 1874 }, { "epoch": 1.5791245791245792, "grad_norm": 6.272322654724121, "learning_rate": 5.485382570966506e-07, "loss": 0.6380331516265869, "step": 1876 }, { "epoch": 1.5808080808080809, "grad_norm": 4.318612575531006, "learning_rate": 5.477029813732572e-07, "loss": 1.184647798538208, "step": 1878 }, { "epoch": 1.5824915824915826, "grad_norm": 13.378433227539062, "learning_rate": 5.468677135640595e-07, "loss": 0.8356841802597046, "step": 1880 }, { "epoch": 1.5841750841750841, "grad_norm": 6.793831825256348, "learning_rate": 5.460324565469065e-07, "loss": 0.5384290218353271, "step": 1882 }, { "epoch": 1.5858585858585859, "grad_norm": 10.296432495117188, "learning_rate": 5.4519721319961e-07, "loss": 0.574350893497467, "step": 1884 }, { "epoch": 1.5875420875420876, "grad_norm": 2.8328824043273926, "learning_rate": 5.443619863999349e-07, "loss": 0.7007859945297241, "step": 1886 }, { "epoch": 1.589225589225589, "grad_norm": 4.71426248550415, "learning_rate": 5.435267790255889e-07, "loss": 1.0490843057632446, "step": 1888 }, { "epoch": 1.5909090909090908, "grad_norm": 4.223301887512207, "learning_rate": 5.426915939542127e-07, "loss": 0.2507448196411133, "step": 1890 }, { "epoch": 1.5925925925925926, "grad_norm": 4.74931526184082, "learning_rate": 5.418564340633704e-07, "loss": 1.1350317001342773, "step": 1892 }, { "epoch": 1.5942760942760943, "grad_norm": 4.932158470153809, "learning_rate": 5.410213022305395e-07, "loss": 0.8503820300102234, "step": 1894 }, { "epoch": 1.595959595959596, "grad_norm": 14.765481948852539, "learning_rate": 5.401862013331e-07, "loss": 1.011979103088379, "step": 1896 }, { "epoch": 1.5976430976430978, "grad_norm": 19.991121292114258, "learning_rate": 5.393511342483262e-07, "loss": 0.9245116710662842, "step": 1898 }, { "epoch": 1.5993265993265995, "grad_norm": 8.519593238830566, "learning_rate": 5.385161038533756e-07, "loss": 1.0895578861236572, "step": 1900 }, { "epoch": 1.601010101010101, "grad_norm": 7.841440200805664, "learning_rate": 5.376811130252791e-07, "loss": 0.9659103155136108, "step": 1902 }, { "epoch": 1.6026936026936027, "grad_norm": 20.247276306152344, "learning_rate": 5.368461646409316e-07, "loss": 0.796362042427063, "step": 1904 }, { "epoch": 1.6043771043771042, "grad_norm": 3.420994281768799, "learning_rate": 5.360112615770814e-07, "loss": 1.1793514490127563, "step": 1906 }, { "epoch": 1.606060606060606, "grad_norm": 3.9010415077209473, "learning_rate": 5.351764067103209e-07, "loss": 0.9917897582054138, "step": 1908 }, { "epoch": 1.6077441077441077, "grad_norm": 2.7143442630767822, "learning_rate": 5.343416029170767e-07, "loss": 0.6407607793807983, "step": 1910 }, { "epoch": 1.6094276094276094, "grad_norm": 30.25970458984375, "learning_rate": 5.335068530735986e-07, "loss": 0.6329153776168823, "step": 1912 }, { "epoch": 1.6111111111111112, "grad_norm": 24.24694061279297, "learning_rate": 5.326721600559513e-07, "loss": 0.8712905645370483, "step": 1914 }, { "epoch": 1.612794612794613, "grad_norm": 8.489051818847656, "learning_rate": 5.318375267400035e-07, "loss": 0.7373044490814209, "step": 1916 }, { "epoch": 1.6144781144781146, "grad_norm": 17.98837661743164, "learning_rate": 5.310029560014182e-07, "loss": 0.6858376860618591, "step": 1918 }, { "epoch": 1.6161616161616161, "grad_norm": 8.963407516479492, "learning_rate": 5.301684507156424e-07, "loss": 0.7940559983253479, "step": 1920 }, { "epoch": 1.6178451178451179, "grad_norm": 3.7711410522460938, "learning_rate": 5.293340137578983e-07, "loss": 0.9433008432388306, "step": 1922 }, { "epoch": 1.6195286195286194, "grad_norm": 3.9224212169647217, "learning_rate": 5.284996480031722e-07, "loss": 0.7148711085319519, "step": 1924 }, { "epoch": 1.621212121212121, "grad_norm": 4.903892993927002, "learning_rate": 5.276653563262053e-07, "loss": 0.6378931403160095, "step": 1926 }, { "epoch": 1.6228956228956228, "grad_norm": 2.4689173698425293, "learning_rate": 5.268311416014831e-07, "loss": 0.8439034223556519, "step": 1928 }, { "epoch": 1.6245791245791246, "grad_norm": 10.568015098571777, "learning_rate": 5.259970067032267e-07, "loss": 0.8784427642822266, "step": 1930 }, { "epoch": 1.6262626262626263, "grad_norm": 3.8736679553985596, "learning_rate": 5.251629545053817e-07, "loss": 0.711959958076477, "step": 1932 }, { "epoch": 1.627946127946128, "grad_norm": 8.50756549835205, "learning_rate": 5.243289878816088e-07, "loss": 1.071230173110962, "step": 1934 }, { "epoch": 1.6296296296296298, "grad_norm": 7.397336006164551, "learning_rate": 5.23495109705274e-07, "loss": 1.075880765914917, "step": 1936 }, { "epoch": 1.6313131313131313, "grad_norm": 4.465485572814941, "learning_rate": 5.226613228494383e-07, "loss": 1.1016345024108887, "step": 1938 }, { "epoch": 1.632996632996633, "grad_norm": 3.5152881145477295, "learning_rate": 5.218276301868484e-07, "loss": 0.8878377676010132, "step": 1940 }, { "epoch": 1.6346801346801347, "grad_norm": 2.7900075912475586, "learning_rate": 5.209940345899263e-07, "loss": 1.0775192975997925, "step": 1942 }, { "epoch": 1.6363636363636362, "grad_norm": 16.28611183166504, "learning_rate": 5.201605389307595e-07, "loss": 0.8081328868865967, "step": 1944 }, { "epoch": 1.638047138047138, "grad_norm": 4.536927223205566, "learning_rate": 5.193271460810912e-07, "loss": 0.5076104998588562, "step": 1946 }, { "epoch": 1.6397306397306397, "grad_norm": 7.646073341369629, "learning_rate": 5.184938589123105e-07, "loss": 1.030837059020996, "step": 1948 }, { "epoch": 1.6414141414141414, "grad_norm": 3.751291036605835, "learning_rate": 5.176606802954427e-07, "loss": 1.0447328090667725, "step": 1950 }, { "epoch": 1.6430976430976432, "grad_norm": 10.364418983459473, "learning_rate": 5.168276131011378e-07, "loss": 0.5750001072883606, "step": 1952 }, { "epoch": 1.644781144781145, "grad_norm": 12.142930030822754, "learning_rate": 5.159946601996638e-07, "loss": 0.5072500705718994, "step": 1954 }, { "epoch": 1.6464646464646466, "grad_norm": 26.362775802612305, "learning_rate": 5.151618244608931e-07, "loss": 0.3224486708641052, "step": 1956 }, { "epoch": 1.6481481481481481, "grad_norm": 5.14034366607666, "learning_rate": 5.143291087542957e-07, "loss": 0.7505396604537964, "step": 1958 }, { "epoch": 1.6498316498316499, "grad_norm": 3.361147880554199, "learning_rate": 5.134965159489276e-07, "loss": 0.8362823128700256, "step": 1960 }, { "epoch": 1.6515151515151514, "grad_norm": 19.640413284301758, "learning_rate": 5.126640489134211e-07, "loss": 0.7406565546989441, "step": 1962 }, { "epoch": 1.6531986531986531, "grad_norm": 8.70249080657959, "learning_rate": 5.118317105159754e-07, "loss": 0.5722910761833191, "step": 1964 }, { "epoch": 1.6548821548821548, "grad_norm": 4.43184232711792, "learning_rate": 5.109995036243469e-07, "loss": 0.6934190392494202, "step": 1966 }, { "epoch": 1.6565656565656566, "grad_norm": 6.205933094024658, "learning_rate": 5.10167431105838e-07, "loss": 0.8717750310897827, "step": 1968 }, { "epoch": 1.6582491582491583, "grad_norm": 11.131174087524414, "learning_rate": 5.093354958272888e-07, "loss": 0.8401749730110168, "step": 1970 }, { "epoch": 1.65993265993266, "grad_norm": 7.66545295715332, "learning_rate": 5.085037006550664e-07, "loss": 0.9823508858680725, "step": 1972 }, { "epoch": 1.6616161616161618, "grad_norm": 2.336907148361206, "learning_rate": 5.076720484550552e-07, "loss": 0.8289145231246948, "step": 1974 }, { "epoch": 1.6632996632996633, "grad_norm": 4.420996189117432, "learning_rate": 5.068405420926468e-07, "loss": 0.787537693977356, "step": 1976 }, { "epoch": 1.664983164983165, "grad_norm": 16.187654495239258, "learning_rate": 5.060091844327308e-07, "loss": 0.8101489543914795, "step": 1978 }, { "epoch": 1.6666666666666665, "grad_norm": 6.166725158691406, "learning_rate": 5.051779783396839e-07, "loss": 0.9080666303634644, "step": 1980 }, { "epoch": 1.6683501683501682, "grad_norm": 14.882169723510742, "learning_rate": 5.043469266773607e-07, "loss": 0.5505136251449585, "step": 1982 }, { "epoch": 1.67003367003367, "grad_norm": 20.98061180114746, "learning_rate": 5.035160323090842e-07, "loss": 0.4539128839969635, "step": 1984 }, { "epoch": 1.6717171717171717, "grad_norm": 3.427556276321411, "learning_rate": 5.026852980976348e-07, "loss": 1.0426026582717896, "step": 1986 }, { "epoch": 1.6734006734006734, "grad_norm": 13.226459503173828, "learning_rate": 5.018547269052416e-07, "loss": 0.9861583113670349, "step": 1988 }, { "epoch": 1.6750841750841752, "grad_norm": 3.2640278339385986, "learning_rate": 5.010243215935715e-07, "loss": 0.6827632784843445, "step": 1990 }, { "epoch": 1.676767676767677, "grad_norm": 3.51690673828125, "learning_rate": 5.001940850237208e-07, "loss": 1.151839256286621, "step": 1992 }, { "epoch": 1.6784511784511784, "grad_norm": 9.070838928222656, "learning_rate": 4.993640200562031e-07, "loss": 0.7563179731369019, "step": 1994 }, { "epoch": 1.6801346801346801, "grad_norm": 7.1710896492004395, "learning_rate": 4.985341295509421e-07, "loss": 0.6942537426948547, "step": 1996 }, { "epoch": 1.6818181818181817, "grad_norm": 2.580467939376831, "learning_rate": 4.977044163672595e-07, "loss": 0.9790170192718506, "step": 1998 }, { "epoch": 1.6835016835016834, "grad_norm": 13.908555030822754, "learning_rate": 4.968748833638661e-07, "loss": 0.7780789136886597, "step": 2000 }, { "epoch": 1.6851851851851851, "grad_norm": 4.1657209396362305, "learning_rate": 4.960455333988525e-07, "loss": 0.6467783451080322, "step": 2002 }, { "epoch": 1.6868686868686869, "grad_norm": 8.925399780273438, "learning_rate": 4.952163693296782e-07, "loss": 0.7447915077209473, "step": 2004 }, { "epoch": 1.6885521885521886, "grad_norm": 9.181722640991211, "learning_rate": 4.943873940131618e-07, "loss": 0.6678234338760376, "step": 2006 }, { "epoch": 1.6902356902356903, "grad_norm": 4.147680282592773, "learning_rate": 4.935586103054729e-07, "loss": 0.9828382730484009, "step": 2008 }, { "epoch": 1.691919191919192, "grad_norm": 4.527743339538574, "learning_rate": 4.927300210621198e-07, "loss": 0.6916370987892151, "step": 2010 }, { "epoch": 1.6936026936026936, "grad_norm": 8.658282279968262, "learning_rate": 4.919016291379407e-07, "loss": 0.9242024421691895, "step": 2012 }, { "epoch": 1.6952861952861953, "grad_norm": 4.856821537017822, "learning_rate": 4.910734373870946e-07, "loss": 0.6717578172683716, "step": 2014 }, { "epoch": 1.696969696969697, "grad_norm": 6.037668704986572, "learning_rate": 4.902454486630506e-07, "loss": 0.8340665102005005, "step": 2016 }, { "epoch": 1.6986531986531985, "grad_norm": 46.316307067871094, "learning_rate": 4.894176658185781e-07, "loss": 0.8020853996276855, "step": 2018 }, { "epoch": 1.7003367003367003, "grad_norm": 7.765172958374023, "learning_rate": 4.885900917057374e-07, "loss": 0.8143132328987122, "step": 2020 }, { "epoch": 1.702020202020202, "grad_norm": 2.976177930831909, "learning_rate": 4.877627291758697e-07, "loss": 1.0872082710266113, "step": 2022 }, { "epoch": 1.7037037037037037, "grad_norm": 7.9460225105285645, "learning_rate": 4.869355810795866e-07, "loss": 0.8318688273429871, "step": 2024 }, { "epoch": 1.7053872053872055, "grad_norm": 5.210888385772705, "learning_rate": 4.861086502667617e-07, "loss": 0.9813876152038574, "step": 2026 }, { "epoch": 1.7070707070707072, "grad_norm": 6.269561767578125, "learning_rate": 4.852819395865196e-07, "loss": 1.1104636192321777, "step": 2028 }, { "epoch": 1.708754208754209, "grad_norm": 14.44339656829834, "learning_rate": 4.844554518872261e-07, "loss": 0.6626958847045898, "step": 2030 }, { "epoch": 1.7104377104377104, "grad_norm": 22.18317413330078, "learning_rate": 4.836291900164793e-07, "loss": 0.5179702639579773, "step": 2032 }, { "epoch": 1.7121212121212122, "grad_norm": 4.272150039672852, "learning_rate": 4.82803156821099e-07, "loss": 1.0629268884658813, "step": 2034 }, { "epoch": 1.7138047138047137, "grad_norm": 4.388714790344238, "learning_rate": 4.81977355147117e-07, "loss": 0.8111241459846497, "step": 2036 }, { "epoch": 1.7154882154882154, "grad_norm": 10.06100082397461, "learning_rate": 4.811517878397676e-07, "loss": 0.4932488799095154, "step": 2038 }, { "epoch": 1.7171717171717171, "grad_norm": 6.3692474365234375, "learning_rate": 4.803264577434778e-07, "loss": 0.5541532039642334, "step": 2040 }, { "epoch": 1.7188552188552189, "grad_norm": 3.8727078437805176, "learning_rate": 4.795013677018567e-07, "loss": 0.9600075483322144, "step": 2042 }, { "epoch": 1.7205387205387206, "grad_norm": 3.6546130180358887, "learning_rate": 4.786765205576866e-07, "loss": 0.9439678192138672, "step": 2044 }, { "epoch": 1.7222222222222223, "grad_norm": 2.5347650051116943, "learning_rate": 4.778519191529133e-07, "loss": 1.1322201490402222, "step": 2046 }, { "epoch": 1.723905723905724, "grad_norm": 3.2225019931793213, "learning_rate": 4.770275663286354e-07, "loss": 1.0858080387115479, "step": 2048 }, { "epoch": 1.7255892255892256, "grad_norm": 7.802936553955078, "learning_rate": 4.762034649250951e-07, "loss": 0.4231239855289459, "step": 2050 }, { "epoch": 1.7272727272727273, "grad_norm": 2.6615946292877197, "learning_rate": 4.753796177816688e-07, "loss": 1.0833523273468018, "step": 2052 }, { "epoch": 1.7289562289562288, "grad_norm": 12.313030242919922, "learning_rate": 4.745560277368563e-07, "loss": 0.9946305751800537, "step": 2054 }, { "epoch": 1.7306397306397305, "grad_norm": 4.885106563568115, "learning_rate": 4.7373269762827196e-07, "loss": 0.8092712163925171, "step": 2056 }, { "epoch": 1.7323232323232323, "grad_norm": 6.8623809814453125, "learning_rate": 4.7290963029263453e-07, "loss": 1.1297715902328491, "step": 2058 }, { "epoch": 1.734006734006734, "grad_norm": 3.386683702468872, "learning_rate": 4.720868285657571e-07, "loss": 0.6623663902282715, "step": 2060 }, { "epoch": 1.7356902356902357, "grad_norm": 7.138562202453613, "learning_rate": 4.7126429528253775e-07, "loss": 1.0328242778778076, "step": 2062 }, { "epoch": 1.7373737373737375, "grad_norm": 12.482364654541016, "learning_rate": 4.7044203327694995e-07, "loss": 0.7162414789199829, "step": 2064 }, { "epoch": 1.7390572390572392, "grad_norm": 2.729790449142456, "learning_rate": 4.6962004538203224e-07, "loss": 0.74675053358078, "step": 2066 }, { "epoch": 1.7407407407407407, "grad_norm": 11.824975967407227, "learning_rate": 4.687983344298786e-07, "loss": 0.8567626476287842, "step": 2068 }, { "epoch": 1.7424242424242424, "grad_norm": 18.96659278869629, "learning_rate": 4.679769032516293e-07, "loss": 0.7988073825836182, "step": 2070 }, { "epoch": 1.7441077441077442, "grad_norm": 9.409902572631836, "learning_rate": 4.6715575467746014e-07, "loss": 0.6924943923950195, "step": 2072 }, { "epoch": 1.7457912457912457, "grad_norm": 4.546468257904053, "learning_rate": 4.663348915365735e-07, "loss": 0.5785316228866577, "step": 2074 }, { "epoch": 1.7474747474747474, "grad_norm": 19.581872940063477, "learning_rate": 4.6551431665718833e-07, "loss": 1.1338218450546265, "step": 2076 }, { "epoch": 1.7491582491582491, "grad_norm": 5.740807056427002, "learning_rate": 4.646940328665302e-07, "loss": 0.8011679649353027, "step": 2078 }, { "epoch": 1.7508417508417509, "grad_norm": 11.178342819213867, "learning_rate": 4.638740429908222e-07, "loss": 1.0102814435958862, "step": 2080 }, { "epoch": 1.7525252525252526, "grad_norm": 5.02017879486084, "learning_rate": 4.6305434985527437e-07, "loss": 0.7039767503738403, "step": 2082 }, { "epoch": 1.7542087542087543, "grad_norm": 2.7927052974700928, "learning_rate": 4.6223495628407427e-07, "loss": 1.2280118465423584, "step": 2084 }, { "epoch": 1.7558922558922558, "grad_norm": 3.4108667373657227, "learning_rate": 4.614158651003778e-07, "loss": 0.8403428196907043, "step": 2086 }, { "epoch": 1.7575757575757576, "grad_norm": 7.315975189208984, "learning_rate": 4.605970791262984e-07, "loss": 0.5117719769477844, "step": 2088 }, { "epoch": 1.7592592592592593, "grad_norm": 26.32462501525879, "learning_rate": 4.5977860118289846e-07, "loss": 0.5781146287918091, "step": 2090 }, { "epoch": 1.7609427609427608, "grad_norm": 6.7758965492248535, "learning_rate": 4.5896043409017895e-07, "loss": 0.6854249238967896, "step": 2092 }, { "epoch": 1.7626262626262625, "grad_norm": 8.735897064208984, "learning_rate": 4.5814258066706946e-07, "loss": 0.4588479995727539, "step": 2094 }, { "epoch": 1.7643097643097643, "grad_norm": 3.5783393383026123, "learning_rate": 4.5732504373141957e-07, "loss": 0.6785897612571716, "step": 2096 }, { "epoch": 1.765993265993266, "grad_norm": 3.7991697788238525, "learning_rate": 4.5650782609998785e-07, "loss": 1.091996192932129, "step": 2098 }, { "epoch": 1.7676767676767677, "grad_norm": 4.503328800201416, "learning_rate": 4.556909305884327e-07, "loss": 0.9916384816169739, "step": 2100 }, { "epoch": 1.7693602693602695, "grad_norm": 4.141926288604736, "learning_rate": 4.5487436001130295e-07, "loss": 0.9449851512908936, "step": 2102 }, { "epoch": 1.7710437710437712, "grad_norm": 13.318826675415039, "learning_rate": 4.5405811718202804e-07, "loss": 0.5735121369361877, "step": 2104 }, { "epoch": 1.7727272727272727, "grad_norm": 4.924741268157959, "learning_rate": 4.5324220491290765e-07, "loss": 0.7375026941299438, "step": 2106 }, { "epoch": 1.7744107744107744, "grad_norm": 7.583310127258301, "learning_rate": 4.5242662601510305e-07, "loss": 0.9382034540176392, "step": 2108 }, { "epoch": 1.776094276094276, "grad_norm": 60.219932556152344, "learning_rate": 4.516113832986267e-07, "loss": 0.6118134260177612, "step": 2110 }, { "epoch": 1.7777777777777777, "grad_norm": 2.8085687160491943, "learning_rate": 4.5079647957233256e-07, "loss": 0.869990348815918, "step": 2112 }, { "epoch": 1.7794612794612794, "grad_norm": 2.663541078567505, "learning_rate": 4.499819176439071e-07, "loss": 0.9881576299667358, "step": 2114 }, { "epoch": 1.7811447811447811, "grad_norm": 3.3883938789367676, "learning_rate": 4.4916770031985887e-07, "loss": 0.9770991206169128, "step": 2116 }, { "epoch": 1.7828282828282829, "grad_norm": 3.3858611583709717, "learning_rate": 4.48353830405509e-07, "loss": 1.073500394821167, "step": 2118 }, { "epoch": 1.7845117845117846, "grad_norm": 29.282451629638672, "learning_rate": 4.475403107049819e-07, "loss": 0.6810465455055237, "step": 2120 }, { "epoch": 1.7861952861952863, "grad_norm": 12.527602195739746, "learning_rate": 4.4672714402119514e-07, "loss": 0.682815432548523, "step": 2122 }, { "epoch": 1.7878787878787878, "grad_norm": 8.23759937286377, "learning_rate": 4.4591433315585025e-07, "loss": 0.7326172590255737, "step": 2124 }, { "epoch": 1.7895622895622896, "grad_norm": 2.9576361179351807, "learning_rate": 4.4510188090942246e-07, "loss": 0.736370861530304, "step": 2126 }, { "epoch": 1.791245791245791, "grad_norm": 2.443329095840454, "learning_rate": 4.4428979008115173e-07, "loss": 0.7781453728675842, "step": 2128 }, { "epoch": 1.7929292929292928, "grad_norm": 8.095796585083008, "learning_rate": 4.434780634690326e-07, "loss": 0.7423359155654907, "step": 2130 }, { "epoch": 1.7946127946127945, "grad_norm": 4.694947719573975, "learning_rate": 4.426667038698049e-07, "loss": 0.5872843265533447, "step": 2132 }, { "epoch": 1.7962962962962963, "grad_norm": 4.841182708740234, "learning_rate": 4.418557140789436e-07, "loss": 0.769493579864502, "step": 2134 }, { "epoch": 1.797979797979798, "grad_norm": 35.13887023925781, "learning_rate": 4.4104509689065016e-07, "loss": 0.40486854314804077, "step": 2136 }, { "epoch": 1.7996632996632997, "grad_norm": 4.075418472290039, "learning_rate": 4.402348550978414e-07, "loss": 1.0084233283996582, "step": 2138 }, { "epoch": 1.8013468013468015, "grad_norm": 5.782071590423584, "learning_rate": 4.394249914921415e-07, "loss": 0.852903425693512, "step": 2140 }, { "epoch": 1.803030303030303, "grad_norm": 5.194396018981934, "learning_rate": 4.3861550886387133e-07, "loss": 0.8081188201904297, "step": 2142 }, { "epoch": 1.8047138047138047, "grad_norm": 5.665356636047363, "learning_rate": 4.378064100020391e-07, "loss": 0.7818201780319214, "step": 2144 }, { "epoch": 1.8063973063973064, "grad_norm": 13.495172500610352, "learning_rate": 4.369976976943307e-07, "loss": 0.9256261587142944, "step": 2146 }, { "epoch": 1.808080808080808, "grad_norm": 4.425163269042969, "learning_rate": 4.361893747271005e-07, "loss": 0.9166650772094727, "step": 2148 }, { "epoch": 1.8097643097643097, "grad_norm": 4.725872039794922, "learning_rate": 4.3538144388536105e-07, "loss": 1.0181063413619995, "step": 2150 }, { "epoch": 1.8114478114478114, "grad_norm": 11.999753952026367, "learning_rate": 4.3457390795277415e-07, "loss": 1.053621530532837, "step": 2152 }, { "epoch": 1.8131313131313131, "grad_norm": 7.518166542053223, "learning_rate": 4.3376676971164096e-07, "loss": 0.8652574419975281, "step": 2154 }, { "epoch": 1.8148148148148149, "grad_norm": 4.472687244415283, "learning_rate": 4.3296003194289224e-07, "loss": 0.7134494781494141, "step": 2156 }, { "epoch": 1.8164983164983166, "grad_norm": 5.567774772644043, "learning_rate": 4.321536974260788e-07, "loss": 0.5291237831115723, "step": 2158 }, { "epoch": 1.8181818181818183, "grad_norm": 3.6279776096343994, "learning_rate": 4.313477689393628e-07, "loss": 0.9376990795135498, "step": 2160 }, { "epoch": 1.8198653198653199, "grad_norm": 5.327882766723633, "learning_rate": 4.305422492595063e-07, "loss": 0.8061087131500244, "step": 2162 }, { "epoch": 1.8215488215488216, "grad_norm": 8.221955299377441, "learning_rate": 4.2973714116186433e-07, "loss": 0.9052633047103882, "step": 2164 }, { "epoch": 1.823232323232323, "grad_norm": 5.31283712387085, "learning_rate": 4.289324474203726e-07, "loss": 0.9636974930763245, "step": 2166 }, { "epoch": 1.8249158249158248, "grad_norm": 4.392337799072266, "learning_rate": 4.281281708075397e-07, "loss": 0.9123021364212036, "step": 2168 }, { "epoch": 1.8265993265993266, "grad_norm": 5.4881744384765625, "learning_rate": 4.2732431409443694e-07, "loss": 0.6539809703826904, "step": 2170 }, { "epoch": 1.8282828282828283, "grad_norm": 4.425382614135742, "learning_rate": 4.26520880050689e-07, "loss": 0.7706068158149719, "step": 2172 }, { "epoch": 1.82996632996633, "grad_norm": 5.568687915802002, "learning_rate": 4.25717871444464e-07, "loss": 0.47670307755470276, "step": 2174 }, { "epoch": 1.8316498316498318, "grad_norm": 3.117622137069702, "learning_rate": 4.249152910424648e-07, "loss": 0.49261391162872314, "step": 2176 }, { "epoch": 1.8333333333333335, "grad_norm": 14.853697776794434, "learning_rate": 4.2411314160991827e-07, "loss": 0.7614182233810425, "step": 2178 }, { "epoch": 1.835016835016835, "grad_norm": 3.1323137283325195, "learning_rate": 4.23311425910567e-07, "loss": 0.8719555735588074, "step": 2180 }, { "epoch": 1.8367003367003367, "grad_norm": 16.170852661132812, "learning_rate": 4.225101467066587e-07, "loss": 0.5341575741767883, "step": 2182 }, { "epoch": 1.8383838383838382, "grad_norm": 32.08811950683594, "learning_rate": 4.2170930675893745e-07, "loss": 0.9574685096740723, "step": 2184 }, { "epoch": 1.84006734006734, "grad_norm": 3.1121602058410645, "learning_rate": 4.209089088266337e-07, "loss": 1.0799657106399536, "step": 2186 }, { "epoch": 1.8417508417508417, "grad_norm": 18.168685913085938, "learning_rate": 4.201089556674553e-07, "loss": 0.9567815065383911, "step": 2188 }, { "epoch": 1.8434343434343434, "grad_norm": 11.96113109588623, "learning_rate": 4.193094500375772e-07, "loss": 0.6286576390266418, "step": 2190 }, { "epoch": 1.8451178451178452, "grad_norm": 4.731752395629883, "learning_rate": 4.1851039469163306e-07, "loss": 0.8796607255935669, "step": 2192 }, { "epoch": 1.8468013468013469, "grad_norm": 10.77106761932373, "learning_rate": 4.177117923827046e-07, "loss": 0.6798102855682373, "step": 2194 }, { "epoch": 1.8484848484848486, "grad_norm": 7.331570148468018, "learning_rate": 4.169136458623126e-07, "loss": 0.8384144902229309, "step": 2196 }, { "epoch": 1.8501683501683501, "grad_norm": 22.100555419921875, "learning_rate": 4.161159578804079e-07, "loss": 0.46593400835990906, "step": 2198 }, { "epoch": 1.8518518518518519, "grad_norm": 3.0996837615966797, "learning_rate": 4.153187311853611e-07, "loss": 1.0288646221160889, "step": 2200 }, { "epoch": 1.8535353535353534, "grad_norm": 8.950583457946777, "learning_rate": 4.145219685239535e-07, "loss": 0.7397197484970093, "step": 2202 }, { "epoch": 1.855218855218855, "grad_norm": 4.474309921264648, "learning_rate": 4.1372567264136806e-07, "loss": 0.652114987373352, "step": 2204 }, { "epoch": 1.8569023569023568, "grad_norm": 15.298965454101562, "learning_rate": 4.129298462811789e-07, "loss": 0.9816831350326538, "step": 2206 }, { "epoch": 1.8585858585858586, "grad_norm": 6.661653518676758, "learning_rate": 4.121344921853426e-07, "loss": 0.782197892665863, "step": 2208 }, { "epoch": 1.8602693602693603, "grad_norm": 13.741491317749023, "learning_rate": 4.1133961309418885e-07, "loss": 0.35760360956192017, "step": 2210 }, { "epoch": 1.861952861952862, "grad_norm": 10.694735527038574, "learning_rate": 4.1054521174641065e-07, "loss": 0.9551196098327637, "step": 2212 }, { "epoch": 1.8636363636363638, "grad_norm": 3.1088006496429443, "learning_rate": 4.097512908790546e-07, "loss": 1.12099027633667, "step": 2214 }, { "epoch": 1.8653198653198653, "grad_norm": 25.677371978759766, "learning_rate": 4.089578532275123e-07, "loss": 0.3952019214630127, "step": 2216 }, { "epoch": 1.867003367003367, "grad_norm": 9.417123794555664, "learning_rate": 4.081649015255104e-07, "loss": 0.6426748633384705, "step": 2218 }, { "epoch": 1.8686868686868687, "grad_norm": 11.567337989807129, "learning_rate": 4.0737243850510097e-07, "loss": 0.6122760772705078, "step": 2220 }, { "epoch": 1.8703703703703702, "grad_norm": 5.804279327392578, "learning_rate": 4.065804668966527e-07, "loss": 0.6974793672561646, "step": 2222 }, { "epoch": 1.872053872053872, "grad_norm": 2.7025163173675537, "learning_rate": 4.057889894288409e-07, "loss": 1.073783040046692, "step": 2224 }, { "epoch": 1.8737373737373737, "grad_norm": 4.876822471618652, "learning_rate": 4.049980088286384e-07, "loss": 0.6222144365310669, "step": 2226 }, { "epoch": 1.8754208754208754, "grad_norm": 9.058050155639648, "learning_rate": 4.042075278213065e-07, "loss": 0.44170594215393066, "step": 2228 }, { "epoch": 1.8771043771043772, "grad_norm": 7.270165920257568, "learning_rate": 4.0341754913038463e-07, "loss": 1.0838236808776855, "step": 2230 }, { "epoch": 1.878787878787879, "grad_norm": 5.1195969581604, "learning_rate": 4.0262807547768164e-07, "loss": 0.9825941324234009, "step": 2232 }, { "epoch": 1.8804713804713806, "grad_norm": 6.805301666259766, "learning_rate": 4.018391095832665e-07, "loss": 0.5576257705688477, "step": 2234 }, { "epoch": 1.8821548821548821, "grad_norm": 5.000736236572266, "learning_rate": 4.0105065416545904e-07, "loss": 0.7729544639587402, "step": 2236 }, { "epoch": 1.8838383838383839, "grad_norm": 26.782957077026367, "learning_rate": 4.002627119408196e-07, "loss": 0.9620450735092163, "step": 2238 }, { "epoch": 1.8855218855218854, "grad_norm": 2.8780593872070312, "learning_rate": 3.994752856241407e-07, "loss": 1.1825776100158691, "step": 2240 }, { "epoch": 1.887205387205387, "grad_norm": 22.294492721557617, "learning_rate": 3.9868837792843744e-07, "loss": 1.0324305295944214, "step": 2242 }, { "epoch": 1.8888888888888888, "grad_norm": 4.332054615020752, "learning_rate": 3.97901991564938e-07, "loss": 0.9554680585861206, "step": 2244 }, { "epoch": 1.8905723905723906, "grad_norm": 3.9427170753479004, "learning_rate": 3.971161292430738e-07, "loss": 1.0006061792373657, "step": 2246 }, { "epoch": 1.8922558922558923, "grad_norm": 13.30428695678711, "learning_rate": 3.9633079367047176e-07, "loss": 0.9314384460449219, "step": 2248 }, { "epoch": 1.893939393939394, "grad_norm": 7.6282057762146, "learning_rate": 3.9554598755294313e-07, "loss": 1.031144380569458, "step": 2250 }, { "epoch": 1.8956228956228958, "grad_norm": 4.266255855560303, "learning_rate": 3.947617135944751e-07, "loss": 1.2106260061264038, "step": 2252 }, { "epoch": 1.8973063973063973, "grad_norm": 4.872833251953125, "learning_rate": 3.9397797449722157e-07, "loss": 0.9372920393943787, "step": 2254 }, { "epoch": 1.898989898989899, "grad_norm": 6.311352729797363, "learning_rate": 3.931947729614935e-07, "loss": 0.8530165553092957, "step": 2256 }, { "epoch": 1.9006734006734005, "grad_norm": 4.680610656738281, "learning_rate": 3.924121116857496e-07, "loss": 1.026566505432129, "step": 2258 }, { "epoch": 1.9023569023569022, "grad_norm": 9.47015380859375, "learning_rate": 3.9162999336658754e-07, "loss": 0.778825044631958, "step": 2260 }, { "epoch": 1.904040404040404, "grad_norm": 4.094303607940674, "learning_rate": 3.908484206987338e-07, "loss": 0.837942361831665, "step": 2262 }, { "epoch": 1.9057239057239057, "grad_norm": 4.3366522789001465, "learning_rate": 3.9006739637503504e-07, "loss": 0.5546213388442993, "step": 2264 }, { "epoch": 1.9074074074074074, "grad_norm": 4.000308036804199, "learning_rate": 3.8928692308644873e-07, "loss": 0.8694909811019897, "step": 2266 }, { "epoch": 1.9090909090909092, "grad_norm": 9.866999626159668, "learning_rate": 3.8850700352203393e-07, "loss": 0.7251837253570557, "step": 2268 }, { "epoch": 1.910774410774411, "grad_norm": 25.70250129699707, "learning_rate": 3.8772764036894135e-07, "loss": 0.8718059659004211, "step": 2270 }, { "epoch": 1.9124579124579124, "grad_norm": 3.6470611095428467, "learning_rate": 3.8694883631240525e-07, "loss": 0.9727774858474731, "step": 2272 }, { "epoch": 1.9141414141414141, "grad_norm": 17.55716896057129, "learning_rate": 3.8617059403573315e-07, "loss": 0.7658140659332275, "step": 2274 }, { "epoch": 1.9158249158249159, "grad_norm": 321.7969665527344, "learning_rate": 3.8539291622029726e-07, "loss": 0.9249438047409058, "step": 2276 }, { "epoch": 1.9175084175084174, "grad_norm": 12.505878448486328, "learning_rate": 3.8461580554552473e-07, "loss": 0.6528811454772949, "step": 2278 }, { "epoch": 1.9191919191919191, "grad_norm": 11.336592674255371, "learning_rate": 3.8383926468888894e-07, "loss": 0.4342978596687317, "step": 2280 }, { "epoch": 1.9208754208754208, "grad_norm": 5.1289567947387695, "learning_rate": 3.830632963258998e-07, "loss": 1.0175809860229492, "step": 2282 }, { "epoch": 1.9225589225589226, "grad_norm": 13.561434745788574, "learning_rate": 3.82287903130095e-07, "loss": 0.5333043336868286, "step": 2284 }, { "epoch": 1.9242424242424243, "grad_norm": 1.7644530534744263, "learning_rate": 3.815130877730299e-07, "loss": 0.6704491376876831, "step": 2286 }, { "epoch": 1.925925925925926, "grad_norm": 5.901256084442139, "learning_rate": 3.807388529242699e-07, "loss": 1.0216944217681885, "step": 2288 }, { "epoch": 1.9276094276094278, "grad_norm": 7.095743179321289, "learning_rate": 3.799652012513795e-07, "loss": 0.9275904893875122, "step": 2290 }, { "epoch": 1.9292929292929293, "grad_norm": 11.874740600585938, "learning_rate": 3.791921354199145e-07, "loss": 0.5191354751586914, "step": 2292 }, { "epoch": 1.930976430976431, "grad_norm": 5.656970500946045, "learning_rate": 3.784196580934117e-07, "loss": 0.9575490951538086, "step": 2294 }, { "epoch": 1.9326599326599325, "grad_norm": 37.43537521362305, "learning_rate": 3.776477719333806e-07, "loss": 0.8639167547225952, "step": 2296 }, { "epoch": 1.9343434343434343, "grad_norm": 3.692530632019043, "learning_rate": 3.768764795992939e-07, "loss": 0.8566898107528687, "step": 2298 }, { "epoch": 1.936026936026936, "grad_norm": 7.679093837738037, "learning_rate": 3.761057837485782e-07, "loss": 0.7409002780914307, "step": 2300 }, { "epoch": 1.9377104377104377, "grad_norm": 4.928491592407227, "learning_rate": 3.753356870366049e-07, "loss": 1.2324477434158325, "step": 2302 }, { "epoch": 1.9393939393939394, "grad_norm": 6.215064525604248, "learning_rate": 3.745661921166813e-07, "loss": 1.0157601833343506, "step": 2304 }, { "epoch": 1.9410774410774412, "grad_norm": 9.274593353271484, "learning_rate": 3.73797301640041e-07, "loss": 0.39169979095458984, "step": 2306 }, { "epoch": 1.942760942760943, "grad_norm": 5.436382293701172, "learning_rate": 3.730290182558352e-07, "loss": 0.9424724578857422, "step": 2308 }, { "epoch": 1.9444444444444444, "grad_norm": 6.4669389724731445, "learning_rate": 3.722613446111238e-07, "loss": 1.0893113613128662, "step": 2310 }, { "epoch": 1.9461279461279462, "grad_norm": 3.2764360904693604, "learning_rate": 3.7149428335086505e-07, "loss": 0.9788646697998047, "step": 2312 }, { "epoch": 1.9478114478114477, "grad_norm": 6.531454086303711, "learning_rate": 3.70727837117908e-07, "loss": 0.9268249869346619, "step": 2314 }, { "epoch": 1.9494949494949494, "grad_norm": 10.755637168884277, "learning_rate": 3.6996200855298243e-07, "loss": 0.7596557140350342, "step": 2316 }, { "epoch": 1.9511784511784511, "grad_norm": 4.191674709320068, "learning_rate": 3.691968002946899e-07, "loss": 0.8969882130622864, "step": 2318 }, { "epoch": 1.9528619528619529, "grad_norm": 3.586559772491455, "learning_rate": 3.684322149794947e-07, "loss": 0.926864743232727, "step": 2320 }, { "epoch": 1.9545454545454546, "grad_norm": 3.739887237548828, "learning_rate": 3.676682552417152e-07, "loss": 1.0153056383132935, "step": 2322 }, { "epoch": 1.9562289562289563, "grad_norm": 5.188506603240967, "learning_rate": 3.669049237135139e-07, "loss": 0.7965476512908936, "step": 2324 }, { "epoch": 1.957912457912458, "grad_norm": 19.38260269165039, "learning_rate": 3.6614222302488915e-07, "loss": 0.5549055337905884, "step": 2326 }, { "epoch": 1.9595959595959596, "grad_norm": 3.7028939723968506, "learning_rate": 3.6538015580366585e-07, "loss": 1.1440973281860352, "step": 2328 }, { "epoch": 1.9612794612794613, "grad_norm": 3.7629427909851074, "learning_rate": 3.6461872467548625e-07, "loss": 1.0486090183258057, "step": 2330 }, { "epoch": 1.9629629629629628, "grad_norm": 4.191500186920166, "learning_rate": 3.638579322638007e-07, "loss": 1.029564619064331, "step": 2332 }, { "epoch": 1.9646464646464645, "grad_norm": 3.297617197036743, "learning_rate": 3.6309778118985943e-07, "loss": 1.0488507747650146, "step": 2334 }, { "epoch": 1.9663299663299663, "grad_norm": 3.2955570220947266, "learning_rate": 3.623382740727028e-07, "loss": 0.9328145384788513, "step": 2336 }, { "epoch": 1.968013468013468, "grad_norm": 3.8717432022094727, "learning_rate": 3.61579413529152e-07, "loss": 1.0710524320602417, "step": 2338 }, { "epoch": 1.9696969696969697, "grad_norm": 16.362682342529297, "learning_rate": 3.608212021738011e-07, "loss": 0.565844714641571, "step": 2340 }, { "epoch": 1.9713804713804715, "grad_norm": 4.6352057456970215, "learning_rate": 3.600636426190075e-07, "loss": 0.7352415919303894, "step": 2342 }, { "epoch": 1.9730639730639732, "grad_norm": 30.211545944213867, "learning_rate": 3.593067374748823e-07, "loss": 0.5901581645011902, "step": 2344 }, { "epoch": 1.9747474747474747, "grad_norm": 14.593511581420898, "learning_rate": 3.585504893492821e-07, "loss": 0.8802275657653809, "step": 2346 }, { "epoch": 1.9764309764309764, "grad_norm": 6.9926438331604, "learning_rate": 3.577949008478004e-07, "loss": 0.7798852920532227, "step": 2348 }, { "epoch": 1.9781144781144782, "grad_norm": 27.421436309814453, "learning_rate": 3.57039974573757e-07, "loss": 0.726132333278656, "step": 2350 }, { "epoch": 1.9797979797979797, "grad_norm": 3.9214117527008057, "learning_rate": 3.562857131281907e-07, "loss": 0.7651845216751099, "step": 2352 }, { "epoch": 1.9814814814814814, "grad_norm": 2.7648415565490723, "learning_rate": 3.555321191098498e-07, "loss": 0.4599582552909851, "step": 2354 }, { "epoch": 1.9831649831649831, "grad_norm": 3.028148651123047, "learning_rate": 3.547791951151824e-07, "loss": 1.0578691959381104, "step": 2356 }, { "epoch": 1.9848484848484849, "grad_norm": 3.8524043560028076, "learning_rate": 3.5402694373832863e-07, "loss": 0.9566428065299988, "step": 2358 }, { "epoch": 1.9865319865319866, "grad_norm": 6.2976250648498535, "learning_rate": 3.53275367571111e-07, "loss": 0.9507308602333069, "step": 2360 }, { "epoch": 1.9882154882154883, "grad_norm": 9.228460311889648, "learning_rate": 3.525244692030256e-07, "loss": 0.646575927734375, "step": 2362 }, { "epoch": 1.98989898989899, "grad_norm": 3.0814363956451416, "learning_rate": 3.517742512212333e-07, "loss": 0.9748328924179077, "step": 2364 }, { "epoch": 1.9915824915824916, "grad_norm": 15.091059684753418, "learning_rate": 3.5102471621055083e-07, "loss": 0.8788052797317505, "step": 2366 }, { "epoch": 1.9932659932659933, "grad_norm": 2.8277087211608887, "learning_rate": 3.5027586675344134e-07, "loss": 1.026127576828003, "step": 2368 }, { "epoch": 1.9949494949494948, "grad_norm": 155.9358673095703, "learning_rate": 3.495277054300065e-07, "loss": 0.41760489344596863, "step": 2370 }, { "epoch": 1.9966329966329965, "grad_norm": 47.22990036010742, "learning_rate": 3.487802348179771e-07, "loss": 0.6611791849136353, "step": 2372 }, { "epoch": 1.9983164983164983, "grad_norm": 3.25925612449646, "learning_rate": 3.480334574927034e-07, "loss": 0.9254864454269409, "step": 2374 }, { "epoch": 2.0, "grad_norm": 3.941661834716797, "learning_rate": 3.4728737602714777e-07, "loss": 0.8802586793899536, "step": 2376 }, { "epoch": 2.0016835016835017, "grad_norm": 49.501678466796875, "learning_rate": 3.465419929918748e-07, "loss": 0.709393322467804, "step": 2378 }, { "epoch": 2.0033670033670035, "grad_norm": 5.433994770050049, "learning_rate": 3.457973109550426e-07, "loss": 1.1732385158538818, "step": 2380 }, { "epoch": 2.005050505050505, "grad_norm": 29.29537582397461, "learning_rate": 3.4505333248239437e-07, "loss": 0.6126368641853333, "step": 2382 }, { "epoch": 2.006734006734007, "grad_norm": 15.888188362121582, "learning_rate": 3.443100601372486e-07, "loss": 0.534448504447937, "step": 2384 }, { "epoch": 2.008417508417508, "grad_norm": 4.571238040924072, "learning_rate": 3.435674964804913e-07, "loss": 0.6711810827255249, "step": 2386 }, { "epoch": 2.01010101010101, "grad_norm": 5.0248517990112305, "learning_rate": 3.4282564407056714e-07, "loss": 0.856137752532959, "step": 2388 }, { "epoch": 2.0117845117845117, "grad_norm": 3.410614490509033, "learning_rate": 3.420845054634693e-07, "loss": 1.0443634986877441, "step": 2390 }, { "epoch": 2.0134680134680134, "grad_norm": 8.341497421264648, "learning_rate": 3.413440832127323e-07, "loss": 0.6617559194564819, "step": 2392 }, { "epoch": 2.015151515151515, "grad_norm": 9.096837043762207, "learning_rate": 3.406043798694226e-07, "loss": 0.7012159824371338, "step": 2394 }, { "epoch": 2.016835016835017, "grad_norm": 6.935766220092773, "learning_rate": 3.39865397982129e-07, "loss": 0.8126204013824463, "step": 2396 }, { "epoch": 2.0185185185185186, "grad_norm": 10.908267974853516, "learning_rate": 3.3912714009695525e-07, "loss": 0.7988526225090027, "step": 2398 }, { "epoch": 2.0202020202020203, "grad_norm": 3.9570505619049072, "learning_rate": 3.3838960875751057e-07, "loss": 0.6374803781509399, "step": 2400 }, { "epoch": 2.021885521885522, "grad_norm": 149.55186462402344, "learning_rate": 3.3765280650490043e-07, "loss": 0.4227946400642395, "step": 2402 }, { "epoch": 2.0235690235690234, "grad_norm": 9.027767181396484, "learning_rate": 3.3691673587771866e-07, "loss": 0.8504242897033691, "step": 2404 }, { "epoch": 2.025252525252525, "grad_norm": 8.02371883392334, "learning_rate": 3.361813994120386e-07, "loss": 0.7173169851303101, "step": 2406 }, { "epoch": 2.026936026936027, "grad_norm": 2.9558398723602295, "learning_rate": 3.354467996414034e-07, "loss": 0.8256983757019043, "step": 2408 }, { "epoch": 2.0286195286195285, "grad_norm": 15.695834159851074, "learning_rate": 3.3471293909681844e-07, "loss": 0.8146846294403076, "step": 2410 }, { "epoch": 2.0303030303030303, "grad_norm": 4.859415531158447, "learning_rate": 3.339798203067422e-07, "loss": 0.9352428913116455, "step": 2412 }, { "epoch": 2.031986531986532, "grad_norm": 9.821430206298828, "learning_rate": 3.332474457970773e-07, "loss": 0.7644020318984985, "step": 2414 }, { "epoch": 2.0336700336700337, "grad_norm": 2.8421833515167236, "learning_rate": 3.32515818091162e-07, "loss": 0.936759889125824, "step": 2416 }, { "epoch": 2.0353535353535355, "grad_norm": 4.6429243087768555, "learning_rate": 3.3178493970976183e-07, "loss": 0.7487270832061768, "step": 2418 }, { "epoch": 2.037037037037037, "grad_norm": 8.991228103637695, "learning_rate": 3.310548131710601e-07, "loss": 0.4855067729949951, "step": 2420 }, { "epoch": 2.038720538720539, "grad_norm": 3.958752393722534, "learning_rate": 3.3032544099065003e-07, "loss": 0.7952554821968079, "step": 2422 }, { "epoch": 2.04040404040404, "grad_norm": 4.862611293792725, "learning_rate": 3.295968256815257e-07, "loss": 0.36966073513031006, "step": 2424 }, { "epoch": 2.042087542087542, "grad_norm": 10.325581550598145, "learning_rate": 3.288689697540733e-07, "loss": 0.4272541403770447, "step": 2426 }, { "epoch": 2.0437710437710437, "grad_norm": 46.957489013671875, "learning_rate": 3.281418757160629e-07, "loss": 0.6797230839729309, "step": 2428 }, { "epoch": 2.0454545454545454, "grad_norm": 3.5477898120880127, "learning_rate": 3.274155460726392e-07, "loss": 0.8319392204284668, "step": 2430 }, { "epoch": 2.047138047138047, "grad_norm": 6.360466480255127, "learning_rate": 3.2668998332631374e-07, "loss": 0.6863579154014587, "step": 2432 }, { "epoch": 2.048821548821549, "grad_norm": 6.163110733032227, "learning_rate": 3.259651899769552e-07, "loss": 0.845360279083252, "step": 2434 }, { "epoch": 2.0505050505050506, "grad_norm": 4.161473274230957, "learning_rate": 3.2524116852178163e-07, "loss": 1.2110919952392578, "step": 2436 }, { "epoch": 2.0521885521885523, "grad_norm": 9.452940940856934, "learning_rate": 3.245179214553519e-07, "loss": 0.7553325891494751, "step": 2438 }, { "epoch": 2.053872053872054, "grad_norm": 2.847379207611084, "learning_rate": 3.23795451269556e-07, "loss": 0.8473318219184875, "step": 2440 }, { "epoch": 2.0555555555555554, "grad_norm": 8.454484939575195, "learning_rate": 3.2307376045360804e-07, "loss": 0.7530231475830078, "step": 2442 }, { "epoch": 2.057239057239057, "grad_norm": 3.292670965194702, "learning_rate": 3.223528514940365e-07, "loss": 0.8452006578445435, "step": 2444 }, { "epoch": 2.058922558922559, "grad_norm": 5.7511820793151855, "learning_rate": 3.216327268746759e-07, "loss": 1.0079270601272583, "step": 2446 }, { "epoch": 2.0606060606060606, "grad_norm": 3.7970666885375977, "learning_rate": 3.2091338907665864e-07, "loss": 0.8261886835098267, "step": 2448 }, { "epoch": 2.0622895622895623, "grad_norm": 4.847807884216309, "learning_rate": 3.201948405784062e-07, "loss": 0.7386308908462524, "step": 2450 }, { "epoch": 2.063973063973064, "grad_norm": 4.138136386871338, "learning_rate": 3.1947708385562033e-07, "loss": 0.967164158821106, "step": 2452 }, { "epoch": 2.0656565656565657, "grad_norm": 6.592377185821533, "learning_rate": 3.1876012138127525e-07, "loss": 0.820540189743042, "step": 2454 }, { "epoch": 2.0673400673400675, "grad_norm": 3.9689667224884033, "learning_rate": 3.1804395562560795e-07, "loss": 0.884551465511322, "step": 2456 }, { "epoch": 2.069023569023569, "grad_norm": 3.013533353805542, "learning_rate": 3.173285890561109e-07, "loss": 0.7905436158180237, "step": 2458 }, { "epoch": 2.0707070707070705, "grad_norm": 12.277018547058105, "learning_rate": 3.166140241375233e-07, "loss": 0.6569070219993591, "step": 2460 }, { "epoch": 2.0723905723905722, "grad_norm": 6.178629398345947, "learning_rate": 3.159002633318214e-07, "loss": 0.6464763879776001, "step": 2462 }, { "epoch": 2.074074074074074, "grad_norm": 41.42927932739258, "learning_rate": 3.151873090982117e-07, "loss": 0.7555403709411621, "step": 2464 }, { "epoch": 2.0757575757575757, "grad_norm": 6.328332424163818, "learning_rate": 3.144751638931219e-07, "loss": 0.8773843050003052, "step": 2466 }, { "epoch": 2.0774410774410774, "grad_norm": 3.656052589416504, "learning_rate": 3.137638301701912e-07, "loss": 0.5875815749168396, "step": 2468 }, { "epoch": 2.079124579124579, "grad_norm": 5.517158031463623, "learning_rate": 3.13053310380264e-07, "loss": 1.0708808898925781, "step": 2470 }, { "epoch": 2.080808080808081, "grad_norm": 3.563227891921997, "learning_rate": 3.123436069713801e-07, "loss": 1.0506317615509033, "step": 2472 }, { "epoch": 2.0824915824915826, "grad_norm": 3.579038619995117, "learning_rate": 3.116347223887658e-07, "loss": 0.5262918472290039, "step": 2474 }, { "epoch": 2.0841750841750843, "grad_norm": 4.360744953155518, "learning_rate": 3.1092665907482705e-07, "loss": 0.6860552430152893, "step": 2476 }, { "epoch": 2.0858585858585856, "grad_norm": 5.244029521942139, "learning_rate": 3.102194194691402e-07, "loss": 0.8589056730270386, "step": 2478 }, { "epoch": 2.0875420875420874, "grad_norm": 2.6665806770324707, "learning_rate": 3.0951300600844277e-07, "loss": 0.7219854593276978, "step": 2480 }, { "epoch": 2.089225589225589, "grad_norm": 4.833096027374268, "learning_rate": 3.088074211266265e-07, "loss": 0.6794151067733765, "step": 2482 }, { "epoch": 2.090909090909091, "grad_norm": 4.1560773849487305, "learning_rate": 3.0810266725472843e-07, "loss": 1.1254472732543945, "step": 2484 }, { "epoch": 2.0925925925925926, "grad_norm": 13.069584846496582, "learning_rate": 3.073987468209218e-07, "loss": 0.7453956604003906, "step": 2486 }, { "epoch": 2.0942760942760943, "grad_norm": 2.8535449504852295, "learning_rate": 3.0669566225050904e-07, "loss": 0.7250915765762329, "step": 2488 }, { "epoch": 2.095959595959596, "grad_norm": 7.14851188659668, "learning_rate": 3.059934159659122e-07, "loss": 0.9290302991867065, "step": 2490 }, { "epoch": 2.0976430976430978, "grad_norm": 4.216705799102783, "learning_rate": 3.052920103866651e-07, "loss": 0.9226129055023193, "step": 2492 }, { "epoch": 2.0993265993265995, "grad_norm": 6.80454158782959, "learning_rate": 3.0459144792940506e-07, "loss": 0.6964681148529053, "step": 2494 }, { "epoch": 2.101010101010101, "grad_norm": 4.916928291320801, "learning_rate": 3.038917310078648e-07, "loss": 0.9581238627433777, "step": 2496 }, { "epoch": 2.1026936026936025, "grad_norm": 6.742152214050293, "learning_rate": 3.031928620328632e-07, "loss": 0.5878009796142578, "step": 2498 }, { "epoch": 2.1043771043771042, "grad_norm": 8.78212833404541, "learning_rate": 3.024948434122981e-07, "loss": 0.6806055307388306, "step": 2500 }, { "epoch": 2.106060606060606, "grad_norm": 5.455780029296875, "learning_rate": 3.017976775511374e-07, "loss": 1.1094366312026978, "step": 2502 }, { "epoch": 2.1077441077441077, "grad_norm": 16.17888641357422, "learning_rate": 3.011013668514106e-07, "loss": 0.9498310089111328, "step": 2504 }, { "epoch": 2.1094276094276094, "grad_norm": 33.786502838134766, "learning_rate": 3.0040591371220126e-07, "loss": 0.9682769775390625, "step": 2506 }, { "epoch": 2.111111111111111, "grad_norm": 4.037204265594482, "learning_rate": 2.997113205296381e-07, "loss": 0.6556534171104431, "step": 2508 }, { "epoch": 2.112794612794613, "grad_norm": 4.9933576583862305, "learning_rate": 2.990175896968867e-07, "loss": 0.6443968415260315, "step": 2510 }, { "epoch": 2.1144781144781146, "grad_norm": 9.915764808654785, "learning_rate": 2.983247236041416e-07, "loss": 0.8275219202041626, "step": 2512 }, { "epoch": 2.1161616161616164, "grad_norm": 8.728922843933105, "learning_rate": 2.9763272463861846e-07, "loss": 0.4485883116722107, "step": 2514 }, { "epoch": 2.1178451178451176, "grad_norm": 4.324676513671875, "learning_rate": 2.9694159518454436e-07, "loss": 1.0087292194366455, "step": 2516 }, { "epoch": 2.1195286195286194, "grad_norm": 29.97382164001465, "learning_rate": 2.9625133762315134e-07, "loss": 0.30623072385787964, "step": 2518 }, { "epoch": 2.121212121212121, "grad_norm": 13.100899696350098, "learning_rate": 2.9556195433266724e-07, "loss": 0.5369913578033447, "step": 2520 }, { "epoch": 2.122895622895623, "grad_norm": 5.657482147216797, "learning_rate": 2.94873447688307e-07, "loss": 0.3709213137626648, "step": 2522 }, { "epoch": 2.1245791245791246, "grad_norm": 8.130796432495117, "learning_rate": 2.9418582006226644e-07, "loss": 0.528016209602356, "step": 2524 }, { "epoch": 2.1262626262626263, "grad_norm": 12.914457321166992, "learning_rate": 2.9349907382371175e-07, "loss": 0.5530096888542175, "step": 2526 }, { "epoch": 2.127946127946128, "grad_norm": 13.510022163391113, "learning_rate": 2.9281321133877256e-07, "loss": 0.4185825288295746, "step": 2528 }, { "epoch": 2.1296296296296298, "grad_norm": 4.050384998321533, "learning_rate": 2.921282349705338e-07, "loss": 0.6386127471923828, "step": 2530 }, { "epoch": 2.1313131313131315, "grad_norm": 6.590632915496826, "learning_rate": 2.914441470790274e-07, "loss": 0.9100687503814697, "step": 2532 }, { "epoch": 2.1329966329966332, "grad_norm": 4.762322425842285, "learning_rate": 2.9076095002122373e-07, "loss": 0.5006492137908936, "step": 2534 }, { "epoch": 2.1346801346801345, "grad_norm": 5.085036754608154, "learning_rate": 2.900786461510243e-07, "loss": 0.7980141639709473, "step": 2536 }, { "epoch": 2.1363636363636362, "grad_norm": 7.086611270904541, "learning_rate": 2.8939723781925304e-07, "loss": 0.5176095962524414, "step": 2538 }, { "epoch": 2.138047138047138, "grad_norm": 8.522965431213379, "learning_rate": 2.8871672737364814e-07, "loss": 0.4830123782157898, "step": 2540 }, { "epoch": 2.1397306397306397, "grad_norm": 9.686579704284668, "learning_rate": 2.8803711715885457e-07, "loss": 0.7633793354034424, "step": 2542 }, { "epoch": 2.1414141414141414, "grad_norm": 3.3301565647125244, "learning_rate": 2.8735840951641566e-07, "loss": 0.21130666136741638, "step": 2544 }, { "epoch": 2.143097643097643, "grad_norm": 2.9118270874023438, "learning_rate": 2.866806067847645e-07, "loss": 0.4212937355041504, "step": 2546 }, { "epoch": 2.144781144781145, "grad_norm": 3.857438087463379, "learning_rate": 2.860037112992167e-07, "loss": 0.7907487154006958, "step": 2548 }, { "epoch": 2.1464646464646466, "grad_norm": 3.103694438934326, "learning_rate": 2.8532772539196236e-07, "loss": 0.9942638874053955, "step": 2550 }, { "epoch": 2.148148148148148, "grad_norm": 5.230748653411865, "learning_rate": 2.8465265139205696e-07, "loss": 0.6354756951332092, "step": 2552 }, { "epoch": 2.1498316498316496, "grad_norm": 22.947580337524414, "learning_rate": 2.839784916254147e-07, "loss": 0.3525312840938568, "step": 2554 }, { "epoch": 2.1515151515151514, "grad_norm": 4.191389083862305, "learning_rate": 2.8330524841479964e-07, "loss": 0.6104186773300171, "step": 2556 }, { "epoch": 2.153198653198653, "grad_norm": 3.742684841156006, "learning_rate": 2.8263292407981777e-07, "loss": 0.6527650356292725, "step": 2558 }, { "epoch": 2.154882154882155, "grad_norm": 17.96328353881836, "learning_rate": 2.819615209369093e-07, "loss": 0.5300241112709045, "step": 2560 }, { "epoch": 2.1565656565656566, "grad_norm": 27.818559646606445, "learning_rate": 2.812910412993409e-07, "loss": 0.5620636940002441, "step": 2562 }, { "epoch": 2.1582491582491583, "grad_norm": 3.4521737098693848, "learning_rate": 2.806214874771965e-07, "loss": 0.8342366218566895, "step": 2564 }, { "epoch": 2.15993265993266, "grad_norm": 7.760178089141846, "learning_rate": 2.799528617773711e-07, "loss": 0.6607711315155029, "step": 2566 }, { "epoch": 2.1616161616161618, "grad_norm": 4.2295379638671875, "learning_rate": 2.792851665035616e-07, "loss": 0.5361987352371216, "step": 2568 }, { "epoch": 2.1632996632996635, "grad_norm": 4.252224922180176, "learning_rate": 2.7861840395625887e-07, "loss": 1.0253345966339111, "step": 2570 }, { "epoch": 2.164983164983165, "grad_norm": 5.132496356964111, "learning_rate": 2.779525764327406e-07, "loss": 1.1341686248779297, "step": 2572 }, { "epoch": 2.1666666666666665, "grad_norm": 10.062471389770508, "learning_rate": 2.7728768622706294e-07, "loss": 0.8332287073135376, "step": 2574 }, { "epoch": 2.1683501683501682, "grad_norm": 11.787501335144043, "learning_rate": 2.7662373563005206e-07, "loss": 0.3077271282672882, "step": 2576 }, { "epoch": 2.17003367003367, "grad_norm": 6.277064323425293, "learning_rate": 2.7596072692929724e-07, "loss": 0.7766256332397461, "step": 2578 }, { "epoch": 2.1717171717171717, "grad_norm": 22.47462272644043, "learning_rate": 2.752986624091427e-07, "loss": 0.32620465755462646, "step": 2580 }, { "epoch": 2.1734006734006734, "grad_norm": 7.154234886169434, "learning_rate": 2.746375443506788e-07, "loss": 0.5342273116111755, "step": 2582 }, { "epoch": 2.175084175084175, "grad_norm": 10.44245719909668, "learning_rate": 2.739773750317358e-07, "loss": 0.45068609714508057, "step": 2584 }, { "epoch": 2.176767676767677, "grad_norm": 7.9054155349731445, "learning_rate": 2.7331815672687476e-07, "loss": 0.6677770614624023, "step": 2586 }, { "epoch": 2.1784511784511786, "grad_norm": 10.807048797607422, "learning_rate": 2.726598917073798e-07, "loss": 0.7541825175285339, "step": 2588 }, { "epoch": 2.18013468013468, "grad_norm": 7.9458746910095215, "learning_rate": 2.720025822412512e-07, "loss": 0.7445704340934753, "step": 2590 }, { "epoch": 2.1818181818181817, "grad_norm": 12.376832962036133, "learning_rate": 2.713462305931966e-07, "loss": 0.5584303736686707, "step": 2592 }, { "epoch": 2.1835016835016834, "grad_norm": 3.6782195568084717, "learning_rate": 2.706908390246232e-07, "loss": 0.42317822575569153, "step": 2594 }, { "epoch": 2.185185185185185, "grad_norm": 33.567867279052734, "learning_rate": 2.7003640979363133e-07, "loss": 0.8278957605361938, "step": 2596 }, { "epoch": 2.186868686868687, "grad_norm": 8.654823303222656, "learning_rate": 2.6938294515500463e-07, "loss": 0.8979749083518982, "step": 2598 }, { "epoch": 2.1885521885521886, "grad_norm": 4.184791564941406, "learning_rate": 2.687304473602039e-07, "loss": 0.7217346429824829, "step": 2600 }, { "epoch": 2.1902356902356903, "grad_norm": 22.945192337036133, "learning_rate": 2.6807891865735865e-07, "loss": 0.9164705276489258, "step": 2602 }, { "epoch": 2.191919191919192, "grad_norm": 8.126714706420898, "learning_rate": 2.674283612912591e-07, "loss": 0.853008508682251, "step": 2604 }, { "epoch": 2.1936026936026938, "grad_norm": 9.629704475402832, "learning_rate": 2.6677877750334935e-07, "loss": 0.6331396102905273, "step": 2606 }, { "epoch": 2.1952861952861955, "grad_norm": 3.6879630088806152, "learning_rate": 2.6613016953171894e-07, "loss": 0.9496104121208191, "step": 2608 }, { "epoch": 2.196969696969697, "grad_norm": 3.8279647827148438, "learning_rate": 2.65482539611095e-07, "loss": 0.7846404910087585, "step": 2610 }, { "epoch": 2.1986531986531985, "grad_norm": 20.262147903442383, "learning_rate": 2.648358899728351e-07, "loss": 0.489252507686615, "step": 2612 }, { "epoch": 2.2003367003367003, "grad_norm": 8.78268051147461, "learning_rate": 2.6419022284491965e-07, "loss": 0.8057292699813843, "step": 2614 }, { "epoch": 2.202020202020202, "grad_norm": 4.244235038757324, "learning_rate": 2.635455404519433e-07, "loss": 0.6223278641700745, "step": 2616 }, { "epoch": 2.2037037037037037, "grad_norm": 11.954898834228516, "learning_rate": 2.629018450151081e-07, "loss": 0.5752437114715576, "step": 2618 }, { "epoch": 2.2053872053872055, "grad_norm": 13.8149995803833, "learning_rate": 2.6225913875221594e-07, "loss": 0.3817511796951294, "step": 2620 }, { "epoch": 2.207070707070707, "grad_norm": 4.223384857177734, "learning_rate": 2.6161742387766e-07, "loss": 0.6272555589675903, "step": 2622 }, { "epoch": 2.208754208754209, "grad_norm": 21.69821548461914, "learning_rate": 2.609767026024182e-07, "loss": 0.7172547578811646, "step": 2624 }, { "epoch": 2.2104377104377106, "grad_norm": 8.276639938354492, "learning_rate": 2.6033697713404514e-07, "loss": 0.6655735373497009, "step": 2626 }, { "epoch": 2.212121212121212, "grad_norm": 8.716046333312988, "learning_rate": 2.5969824967666374e-07, "loss": 0.6124321818351746, "step": 2628 }, { "epoch": 2.2138047138047137, "grad_norm": 8.070818901062012, "learning_rate": 2.590605224309592e-07, "loss": 0.4091968536376953, "step": 2630 }, { "epoch": 2.2154882154882154, "grad_norm": 5.510800361633301, "learning_rate": 2.5842379759417023e-07, "loss": 0.521186113357544, "step": 2632 }, { "epoch": 2.217171717171717, "grad_norm": 39.2961540222168, "learning_rate": 2.5778807736008153e-07, "loss": 0.2366686761379242, "step": 2634 }, { "epoch": 2.218855218855219, "grad_norm": 3.912489891052246, "learning_rate": 2.5715336391901695e-07, "loss": 0.8710294961929321, "step": 2636 }, { "epoch": 2.2205387205387206, "grad_norm": 4.83061408996582, "learning_rate": 2.565196594578315e-07, "loss": 1.1489973068237305, "step": 2638 }, { "epoch": 2.2222222222222223, "grad_norm": 4.629734516143799, "learning_rate": 2.5588696615990336e-07, "loss": 1.078352928161621, "step": 2640 }, { "epoch": 2.223905723905724, "grad_norm": 7.968264102935791, "learning_rate": 2.5525528620512737e-07, "loss": 0.7425380349159241, "step": 2642 }, { "epoch": 2.225589225589226, "grad_norm": 13.291003227233887, "learning_rate": 2.5462462176990686e-07, "loss": 0.7818918228149414, "step": 2644 }, { "epoch": 2.227272727272727, "grad_norm": 11.734708786010742, "learning_rate": 2.539949750271458e-07, "loss": 0.7145400047302246, "step": 2646 }, { "epoch": 2.228956228956229, "grad_norm": 5.949611186981201, "learning_rate": 2.533663481462424e-07, "loss": 0.4055989980697632, "step": 2648 }, { "epoch": 2.2306397306397305, "grad_norm": 5.281031608581543, "learning_rate": 2.5273874329308083e-07, "loss": 1.0042195320129395, "step": 2650 }, { "epoch": 2.2323232323232323, "grad_norm": 8.864117622375488, "learning_rate": 2.5211216263002375e-07, "loss": 0.604977011680603, "step": 2652 }, { "epoch": 2.234006734006734, "grad_norm": 28.879344940185547, "learning_rate": 2.514866083159053e-07, "loss": 0.566184937953949, "step": 2654 }, { "epoch": 2.2356902356902357, "grad_norm": 7.084741592407227, "learning_rate": 2.508620825060231e-07, "loss": 0.6506372094154358, "step": 2656 }, { "epoch": 2.2373737373737375, "grad_norm": 22.613136291503906, "learning_rate": 2.5023858735213156e-07, "loss": 0.9167625904083252, "step": 2658 }, { "epoch": 2.239057239057239, "grad_norm": 6.915469169616699, "learning_rate": 2.4961612500243364e-07, "loss": 0.7674777507781982, "step": 2660 }, { "epoch": 2.240740740740741, "grad_norm": 8.177582740783691, "learning_rate": 2.4899469760157413e-07, "loss": 0.8097570538520813, "step": 2662 }, { "epoch": 2.242424242424242, "grad_norm": 14.568964004516602, "learning_rate": 2.48374307290632e-07, "loss": 0.4266725182533264, "step": 2664 }, { "epoch": 2.244107744107744, "grad_norm": 4.135527610778809, "learning_rate": 2.4775495620711254e-07, "loss": 0.7610059976577759, "step": 2666 }, { "epoch": 2.2457912457912457, "grad_norm": 7.860456466674805, "learning_rate": 2.4713664648494133e-07, "loss": 0.6509280204772949, "step": 2668 }, { "epoch": 2.2474747474747474, "grad_norm": 7.511784553527832, "learning_rate": 2.465193802544552e-07, "loss": 0.5061072111129761, "step": 2670 }, { "epoch": 2.249158249158249, "grad_norm": 4.732418060302734, "learning_rate": 2.4590315964239606e-07, "loss": 0.36101067066192627, "step": 2672 }, { "epoch": 2.250841750841751, "grad_norm": 10.225937843322754, "learning_rate": 2.452879867719034e-07, "loss": 0.6636744737625122, "step": 2674 }, { "epoch": 2.2525252525252526, "grad_norm": 6.152078628540039, "learning_rate": 2.4467386376250633e-07, "loss": 0.8210121989250183, "step": 2676 }, { "epoch": 2.2542087542087543, "grad_norm": 6.384221076965332, "learning_rate": 2.440607927301171e-07, "loss": 0.5604538917541504, "step": 2678 }, { "epoch": 2.255892255892256, "grad_norm": 3.0290005207061768, "learning_rate": 2.4344877578702355e-07, "loss": 0.9680004119873047, "step": 2680 }, { "epoch": 2.257575757575758, "grad_norm": 8.649748802185059, "learning_rate": 2.4283781504188126e-07, "loss": 0.2856512665748596, "step": 2682 }, { "epoch": 2.259259259259259, "grad_norm": 12.650278091430664, "learning_rate": 2.422279125997073e-07, "loss": 0.21757878363132477, "step": 2684 }, { "epoch": 2.260942760942761, "grad_norm": 5.625198841094971, "learning_rate": 2.416190705618722e-07, "loss": 0.7161245346069336, "step": 2686 }, { "epoch": 2.2626262626262625, "grad_norm": 3.8364768028259277, "learning_rate": 2.4101129102609273e-07, "loss": 0.44631901383399963, "step": 2688 }, { "epoch": 2.2643097643097643, "grad_norm": 2.489049196243286, "learning_rate": 2.404045760864253e-07, "loss": 1.060034155845642, "step": 2690 }, { "epoch": 2.265993265993266, "grad_norm": 22.317943572998047, "learning_rate": 2.397989278332583e-07, "loss": 0.8590011596679688, "step": 2692 }, { "epoch": 2.2676767676767677, "grad_norm": 3.2131800651550293, "learning_rate": 2.391943483533044e-07, "loss": 0.7794303297996521, "step": 2694 }, { "epoch": 2.2693602693602695, "grad_norm": 3.656132936477661, "learning_rate": 2.385908397295945e-07, "loss": 0.6720019578933716, "step": 2696 }, { "epoch": 2.271043771043771, "grad_norm": 3.8519668579101562, "learning_rate": 2.3798840404146995e-07, "loss": 0.7614578008651733, "step": 2698 }, { "epoch": 2.2727272727272725, "grad_norm": 4.142553329467773, "learning_rate": 2.3738704336457484e-07, "loss": 0.8712958097457886, "step": 2700 }, { "epoch": 2.274410774410774, "grad_norm": 6.8363237380981445, "learning_rate": 2.3678675977084986e-07, "loss": 0.5424622297286987, "step": 2702 }, { "epoch": 2.276094276094276, "grad_norm": 3.5155107975006104, "learning_rate": 2.3618755532852466e-07, "loss": 0.973854660987854, "step": 2704 }, { "epoch": 2.2777777777777777, "grad_norm": 7.004105091094971, "learning_rate": 2.3558943210211047e-07, "loss": 1.0108654499053955, "step": 2706 }, { "epoch": 2.2794612794612794, "grad_norm": 1.2474193572998047, "learning_rate": 2.3499239215239357e-07, "loss": 0.5368537306785583, "step": 2708 }, { "epoch": 2.281144781144781, "grad_norm": 5.437285423278809, "learning_rate": 2.3439643753642798e-07, "loss": 0.690973162651062, "step": 2710 }, { "epoch": 2.282828282828283, "grad_norm": 11.235260009765625, "learning_rate": 2.3380157030752775e-07, "loss": 0.6230310201644897, "step": 2712 }, { "epoch": 2.2845117845117846, "grad_norm": 9.484489440917969, "learning_rate": 2.33207792515261e-07, "loss": 0.5481805205345154, "step": 2714 }, { "epoch": 2.2861952861952863, "grad_norm": 9.018638610839844, "learning_rate": 2.3261510620544208e-07, "loss": 0.8037227392196655, "step": 2716 }, { "epoch": 2.287878787878788, "grad_norm": 12.419392585754395, "learning_rate": 2.3202351342012452e-07, "loss": 0.6880577802658081, "step": 2718 }, { "epoch": 2.28956228956229, "grad_norm": 29.25603485107422, "learning_rate": 2.3143301619759456e-07, "loss": 0.579788327217102, "step": 2720 }, { "epoch": 2.291245791245791, "grad_norm": 12.553728103637695, "learning_rate": 2.308436165723636e-07, "loss": 0.7886263132095337, "step": 2722 }, { "epoch": 2.292929292929293, "grad_norm": 14.242766380310059, "learning_rate": 2.3025531657516115e-07, "loss": 0.7852193117141724, "step": 2724 }, { "epoch": 2.2946127946127945, "grad_norm": 7.794075012207031, "learning_rate": 2.2966811823292842e-07, "loss": 0.7775453925132751, "step": 2726 }, { "epoch": 2.2962962962962963, "grad_norm": 7.859867572784424, "learning_rate": 2.2908202356881075e-07, "loss": 0.6673729419708252, "step": 2728 }, { "epoch": 2.297979797979798, "grad_norm": 6.257922172546387, "learning_rate": 2.2849703460215077e-07, "loss": 1.060187816619873, "step": 2730 }, { "epoch": 2.2996632996632997, "grad_norm": 5.627756595611572, "learning_rate": 2.2791315334848162e-07, "loss": 0.6064283847808838, "step": 2732 }, { "epoch": 2.3013468013468015, "grad_norm": 6.193628787994385, "learning_rate": 2.2733038181952e-07, "loss": 0.648173451423645, "step": 2734 }, { "epoch": 2.303030303030303, "grad_norm": 10.281158447265625, "learning_rate": 2.2674872202315892e-07, "loss": 0.49927544593811035, "step": 2736 }, { "epoch": 2.3047138047138045, "grad_norm": 7.590847969055176, "learning_rate": 2.2616817596346103e-07, "loss": 0.7152895927429199, "step": 2738 }, { "epoch": 2.3063973063973062, "grad_norm": 7.842513084411621, "learning_rate": 2.2558874564065215e-07, "loss": 0.5551795959472656, "step": 2740 }, { "epoch": 2.308080808080808, "grad_norm": 5.1881890296936035, "learning_rate": 2.2501043305111313e-07, "loss": 0.8357152938842773, "step": 2742 }, { "epoch": 2.3097643097643097, "grad_norm": 5.037477493286133, "learning_rate": 2.2443324018737436e-07, "loss": 0.8395123481750488, "step": 2744 }, { "epoch": 2.3114478114478114, "grad_norm": 4.545862674713135, "learning_rate": 2.2385716903810822e-07, "loss": 0.8929284811019897, "step": 2746 }, { "epoch": 2.313131313131313, "grad_norm": 10.017370223999023, "learning_rate": 2.2328222158812198e-07, "loss": 0.707942008972168, "step": 2748 }, { "epoch": 2.314814814814815, "grad_norm": 7.563255310058594, "learning_rate": 2.227083998183516e-07, "loss": 0.12098832428455353, "step": 2750 }, { "epoch": 2.3164983164983166, "grad_norm": 7.330215930938721, "learning_rate": 2.221357057058546e-07, "loss": 0.4100933074951172, "step": 2752 }, { "epoch": 2.3181818181818183, "grad_norm": 4.7282185554504395, "learning_rate": 2.2156414122380307e-07, "loss": 0.5965608358383179, "step": 2754 }, { "epoch": 2.31986531986532, "grad_norm": 3.0822274684906006, "learning_rate": 2.2099370834147712e-07, "loss": 0.945094645023346, "step": 2756 }, { "epoch": 2.3215488215488214, "grad_norm": 7.529977321624756, "learning_rate": 2.2042440902425822e-07, "loss": 0.7363934516906738, "step": 2758 }, { "epoch": 2.323232323232323, "grad_norm": 13.28249740600586, "learning_rate": 2.1985624523362185e-07, "loss": 0.7786830067634583, "step": 2760 }, { "epoch": 2.324915824915825, "grad_norm": 11.899820327758789, "learning_rate": 2.1928921892713132e-07, "loss": 0.6262949705123901, "step": 2762 }, { "epoch": 2.3265993265993266, "grad_norm": 4.841851234436035, "learning_rate": 2.187233320584311e-07, "loss": 0.9699975252151489, "step": 2764 }, { "epoch": 2.3282828282828283, "grad_norm": 9.435696601867676, "learning_rate": 2.181585865772393e-07, "loss": 0.8197389245033264, "step": 2766 }, { "epoch": 2.32996632996633, "grad_norm": 4.551506042480469, "learning_rate": 2.175949844293417e-07, "loss": 0.6494600772857666, "step": 2768 }, { "epoch": 2.3316498316498318, "grad_norm": 19.35220718383789, "learning_rate": 2.1703252755658512e-07, "loss": 0.7402999997138977, "step": 2770 }, { "epoch": 2.3333333333333335, "grad_norm": 5.450087070465088, "learning_rate": 2.1647121789686985e-07, "loss": 0.7242530584335327, "step": 2772 }, { "epoch": 2.3350168350168348, "grad_norm": 6.281241416931152, "learning_rate": 2.1591105738414395e-07, "loss": 0.7737699151039124, "step": 2774 }, { "epoch": 2.3367003367003365, "grad_norm": 4.79439640045166, "learning_rate": 2.153520479483962e-07, "loss": 0.7753046751022339, "step": 2776 }, { "epoch": 2.3383838383838382, "grad_norm": 7.926896095275879, "learning_rate": 2.1479419151564908e-07, "loss": 0.5965973138809204, "step": 2778 }, { "epoch": 2.34006734006734, "grad_norm": 13.744224548339844, "learning_rate": 2.1423749000795286e-07, "loss": 0.7432798743247986, "step": 2780 }, { "epoch": 2.3417508417508417, "grad_norm": 4.1591949462890625, "learning_rate": 2.1368194534337864e-07, "loss": 0.6963976621627808, "step": 2782 }, { "epoch": 2.3434343434343434, "grad_norm": 5.26281213760376, "learning_rate": 2.1312755943601113e-07, "loss": 0.8363964557647705, "step": 2784 }, { "epoch": 2.345117845117845, "grad_norm": 4.026867389678955, "learning_rate": 2.1257433419594329e-07, "loss": 0.6121779680252075, "step": 2786 }, { "epoch": 2.346801346801347, "grad_norm": 3.700312614440918, "learning_rate": 2.1202227152926898e-07, "loss": 1.0569815635681152, "step": 2788 }, { "epoch": 2.3484848484848486, "grad_norm": 5.786956310272217, "learning_rate": 2.114713733380761e-07, "loss": 0.8500775098800659, "step": 2790 }, { "epoch": 2.3501683501683504, "grad_norm": 3.6336448192596436, "learning_rate": 2.1092164152044082e-07, "loss": 0.6126809120178223, "step": 2792 }, { "epoch": 2.351851851851852, "grad_norm": 16.343307495117188, "learning_rate": 2.1037307797042073e-07, "loss": 0.7721902132034302, "step": 2794 }, { "epoch": 2.3535353535353534, "grad_norm": 4.7194600105285645, "learning_rate": 2.0982568457804772e-07, "loss": 1.0643179416656494, "step": 2796 }, { "epoch": 2.355218855218855, "grad_norm": 5.305932998657227, "learning_rate": 2.0927946322932257e-07, "loss": 0.6048824191093445, "step": 2798 }, { "epoch": 2.356902356902357, "grad_norm": 2.0404253005981445, "learning_rate": 2.0873441580620778e-07, "loss": 1.1490514278411865, "step": 2800 }, { "epoch": 2.3585858585858586, "grad_norm": 4.3384480476379395, "learning_rate": 2.0819054418662068e-07, "loss": 1.0097895860671997, "step": 2802 }, { "epoch": 2.3602693602693603, "grad_norm": 7.471581935882568, "learning_rate": 2.0764785024442816e-07, "loss": 0.8789470791816711, "step": 2804 }, { "epoch": 2.361952861952862, "grad_norm": 9.630654335021973, "learning_rate": 2.071063358494392e-07, "loss": 0.8657972812652588, "step": 2806 }, { "epoch": 2.3636363636363638, "grad_norm": 9.908742904663086, "learning_rate": 2.0656600286739846e-07, "loss": 0.9500114917755127, "step": 2808 }, { "epoch": 2.3653198653198655, "grad_norm": 3.0417370796203613, "learning_rate": 2.060268531599806e-07, "loss": 1.0881528854370117, "step": 2810 }, { "epoch": 2.3670033670033668, "grad_norm": 15.979384422302246, "learning_rate": 2.0548888858478314e-07, "loss": 0.8370237350463867, "step": 2812 }, { "epoch": 2.3686868686868685, "grad_norm": 2.701646327972412, "learning_rate": 2.0495211099532051e-07, "loss": 0.7017450332641602, "step": 2814 }, { "epoch": 2.3703703703703702, "grad_norm": 3.518488645553589, "learning_rate": 2.0441652224101739e-07, "loss": 0.7352346777915955, "step": 2816 }, { "epoch": 2.372053872053872, "grad_norm": 5.064514636993408, "learning_rate": 2.038821241672022e-07, "loss": 0.7799332141876221, "step": 2818 }, { "epoch": 2.3737373737373737, "grad_norm": 4.822017192840576, "learning_rate": 2.0334891861510124e-07, "loss": 0.8013976812362671, "step": 2820 }, { "epoch": 2.3754208754208754, "grad_norm": 13.20271110534668, "learning_rate": 2.0281690742183214e-07, "loss": 0.5635098814964294, "step": 2822 }, { "epoch": 2.377104377104377, "grad_norm": 4.322653293609619, "learning_rate": 2.0228609242039707e-07, "loss": 1.05335533618927, "step": 2824 }, { "epoch": 2.378787878787879, "grad_norm": 8.060440063476562, "learning_rate": 2.017564754396771e-07, "loss": 0.9288073778152466, "step": 2826 }, { "epoch": 2.3804713804713806, "grad_norm": 6.93074369430542, "learning_rate": 2.012280583044258e-07, "loss": 0.49736571311950684, "step": 2828 }, { "epoch": 2.3821548821548824, "grad_norm": 11.825316429138184, "learning_rate": 2.0070084283526223e-07, "loss": 1.044695258140564, "step": 2830 }, { "epoch": 2.3838383838383836, "grad_norm": 7.59405517578125, "learning_rate": 2.001748308486656e-07, "loss": 0.8302027583122253, "step": 2832 }, { "epoch": 2.3855218855218854, "grad_norm": 3.9063162803649902, "learning_rate": 1.9965002415696878e-07, "loss": 0.658703088760376, "step": 2834 }, { "epoch": 2.387205387205387, "grad_norm": 9.86563491821289, "learning_rate": 1.9912642456835125e-07, "loss": 0.6858144998550415, "step": 2836 }, { "epoch": 2.388888888888889, "grad_norm": 4.106326580047607, "learning_rate": 1.9860403388683408e-07, "loss": 0.5258500576019287, "step": 2838 }, { "epoch": 2.3905723905723906, "grad_norm": 3.920785427093506, "learning_rate": 1.980828539122731e-07, "loss": 0.9032931327819824, "step": 2840 }, { "epoch": 2.3922558922558923, "grad_norm": 1.4234728813171387, "learning_rate": 1.9756288644035244e-07, "loss": 0.43326181173324585, "step": 2842 }, { "epoch": 2.393939393939394, "grad_norm": 4.104327201843262, "learning_rate": 1.970441332625788e-07, "loss": 1.114197015762329, "step": 2844 }, { "epoch": 2.3956228956228958, "grad_norm": 7.699793815612793, "learning_rate": 1.965265961662753e-07, "loss": 0.8347800970077515, "step": 2846 }, { "epoch": 2.3973063973063975, "grad_norm": 4.057286262512207, "learning_rate": 1.9601027693457485e-07, "loss": 1.1171047687530518, "step": 2848 }, { "epoch": 2.398989898989899, "grad_norm": 4.676527976989746, "learning_rate": 1.9549517734641453e-07, "loss": 0.8913414478302002, "step": 2850 }, { "epoch": 2.4006734006734005, "grad_norm": 5.339909076690674, "learning_rate": 1.9498129917652917e-07, "loss": 0.5060603022575378, "step": 2852 }, { "epoch": 2.4023569023569022, "grad_norm": 7.147670269012451, "learning_rate": 1.9446864419544517e-07, "loss": 0.7295070886611938, "step": 2854 }, { "epoch": 2.404040404040404, "grad_norm": 6.569252014160156, "learning_rate": 1.9395721416947475e-07, "loss": 0.6507788896560669, "step": 2856 }, { "epoch": 2.4057239057239057, "grad_norm": 6.89575719833374, "learning_rate": 1.9344701086070957e-07, "loss": 0.7100333571434021, "step": 2858 }, { "epoch": 2.4074074074074074, "grad_norm": 7.443199634552002, "learning_rate": 1.9293803602701458e-07, "loss": 0.49127644300460815, "step": 2860 }, { "epoch": 2.409090909090909, "grad_norm": 3.398568868637085, "learning_rate": 1.924302914220222e-07, "loss": 0.8142455816268921, "step": 2862 }, { "epoch": 2.410774410774411, "grad_norm": 7.437132835388184, "learning_rate": 1.9192377879512656e-07, "loss": 0.5337988138198853, "step": 2864 }, { "epoch": 2.4124579124579126, "grad_norm": 4.250380516052246, "learning_rate": 1.914184998914764e-07, "loss": 0.7382153868675232, "step": 2866 }, { "epoch": 2.4141414141414144, "grad_norm": 4.774903297424316, "learning_rate": 1.9091445645197024e-07, "loss": 0.9528558254241943, "step": 2868 }, { "epoch": 2.4158249158249157, "grad_norm": 3.7426023483276367, "learning_rate": 1.9041165021324986e-07, "loss": 0.8381022214889526, "step": 2870 }, { "epoch": 2.4175084175084174, "grad_norm": 2.178778648376465, "learning_rate": 1.899100829076945e-07, "loss": 0.5464705228805542, "step": 2872 }, { "epoch": 2.419191919191919, "grad_norm": 4.025269031524658, "learning_rate": 1.894097562634142e-07, "loss": 1.0029910802841187, "step": 2874 }, { "epoch": 2.420875420875421, "grad_norm": 5.291448593139648, "learning_rate": 1.8891067200424498e-07, "loss": 0.8049919605255127, "step": 2876 }, { "epoch": 2.4225589225589226, "grad_norm": 3.155411720275879, "learning_rate": 1.8841283184974216e-07, "loss": 0.5165250301361084, "step": 2878 }, { "epoch": 2.4242424242424243, "grad_norm": 3.550431251525879, "learning_rate": 1.8791623751517432e-07, "loss": 0.9810848832130432, "step": 2880 }, { "epoch": 2.425925925925926, "grad_norm": 8.65785026550293, "learning_rate": 1.8742089071151812e-07, "loss": 0.6320451498031616, "step": 2882 }, { "epoch": 2.4276094276094278, "grad_norm": 24.364227294921875, "learning_rate": 1.8692679314545155e-07, "loss": 0.691448450088501, "step": 2884 }, { "epoch": 2.429292929292929, "grad_norm": 3.4331605434417725, "learning_rate": 1.8643394651934867e-07, "loss": 0.5786364078521729, "step": 2886 }, { "epoch": 2.430976430976431, "grad_norm": 16.977510452270508, "learning_rate": 1.8594235253127372e-07, "loss": 0.6802031993865967, "step": 2888 }, { "epoch": 2.4326599326599325, "grad_norm": 5.363550662994385, "learning_rate": 1.8545201287497442e-07, "loss": 0.5717660188674927, "step": 2890 }, { "epoch": 2.4343434343434343, "grad_norm": 23.09035873413086, "learning_rate": 1.849629292398774e-07, "loss": 0.7750734686851501, "step": 2892 }, { "epoch": 2.436026936026936, "grad_norm": 2.4943952560424805, "learning_rate": 1.8447510331108163e-07, "loss": 0.9770002365112305, "step": 2894 }, { "epoch": 2.4377104377104377, "grad_norm": 6.26854133605957, "learning_rate": 1.839885367693526e-07, "loss": 0.8726930618286133, "step": 2896 }, { "epoch": 2.4393939393939394, "grad_norm": 11.332048416137695, "learning_rate": 1.8350323129111672e-07, "loss": 0.7943978309631348, "step": 2898 }, { "epoch": 2.441077441077441, "grad_norm": 20.655651092529297, "learning_rate": 1.8301918854845577e-07, "loss": 0.5449969172477722, "step": 2900 }, { "epoch": 2.442760942760943, "grad_norm": 3.659409761428833, "learning_rate": 1.8253641020910043e-07, "loss": 0.9310587644577026, "step": 2902 }, { "epoch": 2.4444444444444446, "grad_norm": 11.209527969360352, "learning_rate": 1.820548979364253e-07, "loss": 0.5611803531646729, "step": 2904 }, { "epoch": 2.4461279461279464, "grad_norm": 6.078222751617432, "learning_rate": 1.815746533894429e-07, "loss": 0.4734145998954773, "step": 2906 }, { "epoch": 2.4478114478114477, "grad_norm": 43.15976333618164, "learning_rate": 1.8109567822279753e-07, "loss": 0.6027005910873413, "step": 2908 }, { "epoch": 2.4494949494949494, "grad_norm": 8.55388355255127, "learning_rate": 1.8061797408676023e-07, "loss": 0.7029461860656738, "step": 2910 }, { "epoch": 2.451178451178451, "grad_norm": 3.861863374710083, "learning_rate": 1.801415426272229e-07, "loss": 0.5813450813293457, "step": 2912 }, { "epoch": 2.452861952861953, "grad_norm": 4.0164103507995605, "learning_rate": 1.796663854856922e-07, "loss": 0.8507091999053955, "step": 2914 }, { "epoch": 2.4545454545454546, "grad_norm": 9.30286693572998, "learning_rate": 1.7919250429928446e-07, "loss": 0.7457901239395142, "step": 2916 }, { "epoch": 2.4562289562289563, "grad_norm": 1.883726954460144, "learning_rate": 1.7871990070071987e-07, "loss": 0.45636504888534546, "step": 2918 }, { "epoch": 2.457912457912458, "grad_norm": 4.311119079589844, "learning_rate": 1.7824857631831648e-07, "loss": 0.9269647002220154, "step": 2920 }, { "epoch": 2.45959595959596, "grad_norm": 19.505645751953125, "learning_rate": 1.7777853277598522e-07, "loss": 0.5110766887664795, "step": 2922 }, { "epoch": 2.461279461279461, "grad_norm": 3.043074131011963, "learning_rate": 1.7730977169322397e-07, "loss": 0.41358011960983276, "step": 2924 }, { "epoch": 2.462962962962963, "grad_norm": 3.94612455368042, "learning_rate": 1.768422946851117e-07, "loss": 0.7347300052642822, "step": 2926 }, { "epoch": 2.4646464646464645, "grad_norm": 13.160529136657715, "learning_rate": 1.763761033623034e-07, "loss": 0.652132511138916, "step": 2928 }, { "epoch": 2.4663299663299663, "grad_norm": 7.081724643707275, "learning_rate": 1.7591119933102455e-07, "loss": 0.4731465280056, "step": 2930 }, { "epoch": 2.468013468013468, "grad_norm": 7.2086358070373535, "learning_rate": 1.7544758419306493e-07, "loss": 0.8537788391113281, "step": 2932 }, { "epoch": 2.4696969696969697, "grad_norm": 5.239010810852051, "learning_rate": 1.749852595457738e-07, "loss": 0.7587542533874512, "step": 2934 }, { "epoch": 2.4713804713804715, "grad_norm": 7.071168899536133, "learning_rate": 1.7452422698205427e-07, "loss": 0.5985921621322632, "step": 2936 }, { "epoch": 2.473063973063973, "grad_norm": 3.5129053592681885, "learning_rate": 1.7406448809035723e-07, "loss": 0.674223780632019, "step": 2938 }, { "epoch": 2.474747474747475, "grad_norm": 4.072961807250977, "learning_rate": 1.736060444546768e-07, "loss": 0.6285250186920166, "step": 2940 }, { "epoch": 2.4764309764309766, "grad_norm": 5.048702239990234, "learning_rate": 1.731488976545442e-07, "loss": 0.5890775918960571, "step": 2942 }, { "epoch": 2.478114478114478, "grad_norm": 5.47603178024292, "learning_rate": 1.726930492650223e-07, "loss": 0.6147992610931396, "step": 2944 }, { "epoch": 2.4797979797979797, "grad_norm": 3.560030221939087, "learning_rate": 1.7223850085670082e-07, "loss": 0.9968768358230591, "step": 2946 }, { "epoch": 2.4814814814814814, "grad_norm": 2.8818583488464355, "learning_rate": 1.7178525399569026e-07, "loss": 1.031359314918518, "step": 2948 }, { "epoch": 2.483164983164983, "grad_norm": 2.790241241455078, "learning_rate": 1.7133331024361668e-07, "loss": 1.090069055557251, "step": 2950 }, { "epoch": 2.484848484848485, "grad_norm": 56.298179626464844, "learning_rate": 1.7088267115761645e-07, "loss": 0.9623196125030518, "step": 2952 }, { "epoch": 2.4865319865319866, "grad_norm": 4.772038459777832, "learning_rate": 1.7043333829033093e-07, "loss": 0.6764428019523621, "step": 2954 }, { "epoch": 2.4882154882154883, "grad_norm": 15.111934661865234, "learning_rate": 1.6998531318990084e-07, "loss": 0.9181029796600342, "step": 2956 }, { "epoch": 2.48989898989899, "grad_norm": 17.119279861450195, "learning_rate": 1.695385973999612e-07, "loss": 0.603553056716919, "step": 2958 }, { "epoch": 2.4915824915824913, "grad_norm": 3.2011559009552, "learning_rate": 1.690931924596359e-07, "loss": 0.9430979490280151, "step": 2960 }, { "epoch": 2.493265993265993, "grad_norm": 10.394431114196777, "learning_rate": 1.6864909990353222e-07, "loss": 0.6838173866271973, "step": 2962 }, { "epoch": 2.494949494949495, "grad_norm": 2.8894050121307373, "learning_rate": 1.6820632126173595e-07, "loss": 0.829933762550354, "step": 2964 }, { "epoch": 2.4966329966329965, "grad_norm": 6.6212544441223145, "learning_rate": 1.6776485805980593e-07, "loss": 0.7385812997817993, "step": 2966 }, { "epoch": 2.4983164983164983, "grad_norm": 3.0128917694091797, "learning_rate": 1.673247118187685e-07, "loss": 0.9367114901542664, "step": 2968 }, { "epoch": 2.5, "grad_norm": 7.358500003814697, "learning_rate": 1.6688588405511265e-07, "loss": 0.9481908082962036, "step": 2970 }, { "epoch": 2.5016835016835017, "grad_norm": 7.319785118103027, "learning_rate": 1.6644837628078485e-07, "loss": 0.4760739207267761, "step": 2972 }, { "epoch": 2.5033670033670035, "grad_norm": 24.174762725830078, "learning_rate": 1.6601219000318317e-07, "loss": 0.529428243637085, "step": 2974 }, { "epoch": 2.505050505050505, "grad_norm": 90.50502014160156, "learning_rate": 1.6557732672515305e-07, "loss": 0.8081066012382507, "step": 2976 }, { "epoch": 2.506734006734007, "grad_norm": 9.112408638000488, "learning_rate": 1.6514378794498152e-07, "loss": 0.46742603182792664, "step": 2978 }, { "epoch": 2.5084175084175087, "grad_norm": 3.2685351371765137, "learning_rate": 1.6471157515639195e-07, "loss": 0.8512880802154541, "step": 2980 }, { "epoch": 2.51010101010101, "grad_norm": 11.603774070739746, "learning_rate": 1.6428068984853923e-07, "loss": 0.8741171360015869, "step": 2982 }, { "epoch": 2.5117845117845117, "grad_norm": 6.443422317504883, "learning_rate": 1.6385113350600476e-07, "loss": 0.4871176779270172, "step": 2984 }, { "epoch": 2.5134680134680134, "grad_norm": 22.373445510864258, "learning_rate": 1.6342290760879064e-07, "loss": 0.8540467023849487, "step": 2986 }, { "epoch": 2.515151515151515, "grad_norm": 5.546900272369385, "learning_rate": 1.6299601363231542e-07, "loss": 0.7414556741714478, "step": 2988 }, { "epoch": 2.516835016835017, "grad_norm": 4.198864459991455, "learning_rate": 1.6257045304740842e-07, "loss": 0.11034494638442993, "step": 2990 }, { "epoch": 2.5185185185185186, "grad_norm": 3.5712265968322754, "learning_rate": 1.6214622732030483e-07, "loss": 0.988459050655365, "step": 2992 }, { "epoch": 2.5202020202020203, "grad_norm": 6.247505187988281, "learning_rate": 1.617233379126409e-07, "loss": 0.6715781092643738, "step": 2994 }, { "epoch": 2.5218855218855216, "grad_norm": 4.307699680328369, "learning_rate": 1.6130178628144858e-07, "loss": 0.7559702396392822, "step": 2996 }, { "epoch": 2.5235690235690234, "grad_norm": 32.38378143310547, "learning_rate": 1.6088157387915046e-07, "loss": 0.61976158618927, "step": 2998 }, { "epoch": 2.525252525252525, "grad_norm": 5.182736396789551, "learning_rate": 1.6046270215355522e-07, "loss": 0.5721726417541504, "step": 3000 }, { "epoch": 2.526936026936027, "grad_norm": 11.062474250793457, "learning_rate": 1.600451725478522e-07, "loss": 0.5903807878494263, "step": 3002 }, { "epoch": 2.5286195286195285, "grad_norm": 3.9762520790100098, "learning_rate": 1.5962898650060646e-07, "loss": 1.0528504848480225, "step": 3004 }, { "epoch": 2.5303030303030303, "grad_norm": 12.059609413146973, "learning_rate": 1.5921414544575406e-07, "loss": 0.8805992603302002, "step": 3006 }, { "epoch": 2.531986531986532, "grad_norm": 5.079036235809326, "learning_rate": 1.5880065081259714e-07, "loss": 0.8200486898422241, "step": 3008 }, { "epoch": 2.5336700336700337, "grad_norm": 6.0202741622924805, "learning_rate": 1.583885040257985e-07, "loss": 0.5228027105331421, "step": 3010 }, { "epoch": 2.5353535353535355, "grad_norm": 10.853965759277344, "learning_rate": 1.579777065053773e-07, "loss": 0.8398838639259338, "step": 3012 }, { "epoch": 2.537037037037037, "grad_norm": 7.994739055633545, "learning_rate": 1.5756825966670399e-07, "loss": 0.7166822552680969, "step": 3014 }, { "epoch": 2.538720538720539, "grad_norm": 8.935235977172852, "learning_rate": 1.5716016492049495e-07, "loss": 0.7087036371231079, "step": 3016 }, { "epoch": 2.5404040404040407, "grad_norm": 11.376523971557617, "learning_rate": 1.5675342367280838e-07, "loss": 1.0162254571914673, "step": 3018 }, { "epoch": 2.542087542087542, "grad_norm": 3.1120080947875977, "learning_rate": 1.563480373250392e-07, "loss": 0.7916754484176636, "step": 3020 }, { "epoch": 2.5437710437710437, "grad_norm": 16.1352596282959, "learning_rate": 1.559440072739137e-07, "loss": 0.8983919024467468, "step": 3022 }, { "epoch": 2.5454545454545454, "grad_norm": 5.045600891113281, "learning_rate": 1.5554133491148556e-07, "loss": 1.0679364204406738, "step": 3024 }, { "epoch": 2.547138047138047, "grad_norm": 24.617691040039062, "learning_rate": 1.5514002162513035e-07, "loss": 0.3964739143848419, "step": 3026 }, { "epoch": 2.548821548821549, "grad_norm": 4.4443359375, "learning_rate": 1.5474006879754137e-07, "loss": 0.7372143268585205, "step": 3028 }, { "epoch": 2.5505050505050506, "grad_norm": 6.077057838439941, "learning_rate": 1.5434147780672437e-07, "loss": 0.6355978846549988, "step": 3030 }, { "epoch": 2.5521885521885523, "grad_norm": 2.639094829559326, "learning_rate": 1.539442500259929e-07, "loss": 0.5554131269454956, "step": 3032 }, { "epoch": 2.5538720538720536, "grad_norm": 5.577948093414307, "learning_rate": 1.5354838682396384e-07, "loss": 0.9816339612007141, "step": 3034 }, { "epoch": 2.5555555555555554, "grad_norm": 4.363624572753906, "learning_rate": 1.5315388956455266e-07, "loss": 1.0391297340393066, "step": 3036 }, { "epoch": 2.557239057239057, "grad_norm": 9.215215682983398, "learning_rate": 1.5276075960696817e-07, "loss": 0.7156937122344971, "step": 3038 }, { "epoch": 2.558922558922559, "grad_norm": 2.8174784183502197, "learning_rate": 1.5236899830570854e-07, "loss": 1.0105350017547607, "step": 3040 }, { "epoch": 2.5606060606060606, "grad_norm": 3.025399923324585, "learning_rate": 1.5197860701055643e-07, "loss": 0.767303466796875, "step": 3042 }, { "epoch": 2.5622895622895623, "grad_norm": 8.97220230102539, "learning_rate": 1.515895870665739e-07, "loss": 0.99961256980896, "step": 3044 }, { "epoch": 2.563973063973064, "grad_norm": 10.237662315368652, "learning_rate": 1.5120193981409848e-07, "loss": 0.7313355207443237, "step": 3046 }, { "epoch": 2.5656565656565657, "grad_norm": 4.435790538787842, "learning_rate": 1.508156665887381e-07, "loss": 0.9470257759094238, "step": 3048 }, { "epoch": 2.5673400673400675, "grad_norm": 8.973566055297852, "learning_rate": 1.5043076872136646e-07, "loss": 0.4554850459098816, "step": 3050 }, { "epoch": 2.569023569023569, "grad_norm": 3.580697774887085, "learning_rate": 1.5004724753811864e-07, "loss": 1.0283160209655762, "step": 3052 }, { "epoch": 2.570707070707071, "grad_norm": 3.4427924156188965, "learning_rate": 1.496651043603866e-07, "loss": 0.12811371684074402, "step": 3054 }, { "epoch": 2.5723905723905722, "grad_norm": 30.826913833618164, "learning_rate": 1.4928434050481424e-07, "loss": 0.7465952634811401, "step": 3056 }, { "epoch": 2.574074074074074, "grad_norm": 12.796523094177246, "learning_rate": 1.4890495728329334e-07, "loss": 0.4082253873348236, "step": 3058 }, { "epoch": 2.5757575757575757, "grad_norm": 3.8712823390960693, "learning_rate": 1.485269560029587e-07, "loss": 0.8437204360961914, "step": 3060 }, { "epoch": 2.5774410774410774, "grad_norm": 4.653648376464844, "learning_rate": 1.481503379661838e-07, "loss": 0.7468912601470947, "step": 3062 }, { "epoch": 2.579124579124579, "grad_norm": 9.738509178161621, "learning_rate": 1.4777510447057616e-07, "loss": 0.6074585318565369, "step": 3064 }, { "epoch": 2.580808080808081, "grad_norm": 4.1727495193481445, "learning_rate": 1.4740125680897328e-07, "loss": 0.7406507730484009, "step": 3066 }, { "epoch": 2.5824915824915826, "grad_norm": 9.242506980895996, "learning_rate": 1.470287962694373e-07, "loss": 0.4214972257614136, "step": 3068 }, { "epoch": 2.584175084175084, "grad_norm": 12.610301971435547, "learning_rate": 1.4665772413525175e-07, "loss": 0.17865464091300964, "step": 3070 }, { "epoch": 2.5858585858585856, "grad_norm": 21.455978393554688, "learning_rate": 1.4628804168491636e-07, "loss": 0.6329761743545532, "step": 3072 }, { "epoch": 2.5875420875420874, "grad_norm": 5.749107837677002, "learning_rate": 1.4591975019214238e-07, "loss": 1.0531988143920898, "step": 3074 }, { "epoch": 2.589225589225589, "grad_norm": 6.151569366455078, "learning_rate": 1.4555285092584917e-07, "loss": 0.4620995819568634, "step": 3076 }, { "epoch": 2.590909090909091, "grad_norm": 9.935331344604492, "learning_rate": 1.451873451501592e-07, "loss": 0.9808303117752075, "step": 3078 }, { "epoch": 2.5925925925925926, "grad_norm": 8.355198860168457, "learning_rate": 1.448232341243933e-07, "loss": 0.7373911142349243, "step": 3080 }, { "epoch": 2.5942760942760943, "grad_norm": 3.359959125518799, "learning_rate": 1.4446051910306743e-07, "loss": 1.0398435592651367, "step": 3082 }, { "epoch": 2.595959595959596, "grad_norm": 6.27101469039917, "learning_rate": 1.440992013358875e-07, "loss": 0.6558928489685059, "step": 3084 }, { "epoch": 2.5976430976430978, "grad_norm": 4.128625869750977, "learning_rate": 1.4373928206774504e-07, "loss": 0.6560384035110474, "step": 3086 }, { "epoch": 2.5993265993265995, "grad_norm": 4.040182113647461, "learning_rate": 1.4338076253871345e-07, "loss": 0.9103618264198303, "step": 3088 }, { "epoch": 2.601010101010101, "grad_norm": 3.0742857456207275, "learning_rate": 1.4302364398404344e-07, "loss": 0.9666507244110107, "step": 3090 }, { "epoch": 2.602693602693603, "grad_norm": 6.105360507965088, "learning_rate": 1.4266792763415863e-07, "loss": 0.7367033362388611, "step": 3092 }, { "epoch": 2.6043771043771042, "grad_norm": 4.493860244750977, "learning_rate": 1.4231361471465143e-07, "loss": 0.614148736000061, "step": 3094 }, { "epoch": 2.606060606060606, "grad_norm": 2.295088052749634, "learning_rate": 1.4196070644627903e-07, "loss": 0.7760593891143799, "step": 3096 }, { "epoch": 2.6077441077441077, "grad_norm": 3.1130990982055664, "learning_rate": 1.4160920404495887e-07, "loss": 0.6581928730010986, "step": 3098 }, { "epoch": 2.6094276094276094, "grad_norm": 4.99691104888916, "learning_rate": 1.4125910872176466e-07, "loss": 0.7904366254806519, "step": 3100 }, { "epoch": 2.611111111111111, "grad_norm": 5.191680908203125, "learning_rate": 1.4091042168292211e-07, "loss": 0.6951947212219238, "step": 3102 }, { "epoch": 2.612794612794613, "grad_norm": 3.600395679473877, "learning_rate": 1.4056314412980463e-07, "loss": 0.9162784218788147, "step": 3104 }, { "epoch": 2.6144781144781146, "grad_norm": 7.186698913574219, "learning_rate": 1.402172772589297e-07, "loss": 0.917360782623291, "step": 3106 }, { "epoch": 2.616161616161616, "grad_norm": 19.83490753173828, "learning_rate": 1.3987282226195416e-07, "loss": 0.2932959198951721, "step": 3108 }, { "epoch": 2.6178451178451176, "grad_norm": 3.4233388900756836, "learning_rate": 1.395297803256703e-07, "loss": 0.9224929809570312, "step": 3110 }, { "epoch": 2.6195286195286194, "grad_norm": 5.625677585601807, "learning_rate": 1.39188152632002e-07, "loss": 0.526210367679596, "step": 3112 }, { "epoch": 2.621212121212121, "grad_norm": 3.952099323272705, "learning_rate": 1.3884794035800056e-07, "loss": 0.610154926776886, "step": 3114 }, { "epoch": 2.622895622895623, "grad_norm": 3.5759785175323486, "learning_rate": 1.3850914467584013e-07, "loss": 0.9689432382583618, "step": 3116 }, { "epoch": 2.6245791245791246, "grad_norm": 3.9002864360809326, "learning_rate": 1.3817176675281456e-07, "loss": 1.0947141647338867, "step": 3118 }, { "epoch": 2.6262626262626263, "grad_norm": 8.866259574890137, "learning_rate": 1.378358077513328e-07, "loss": 0.7083148956298828, "step": 3120 }, { "epoch": 2.627946127946128, "grad_norm": 2.6722095012664795, "learning_rate": 1.3750126882891475e-07, "loss": 0.9863229393959045, "step": 3122 }, { "epoch": 2.6296296296296298, "grad_norm": 3.054203510284424, "learning_rate": 1.371681511381879e-07, "loss": 0.9456894397735596, "step": 3124 }, { "epoch": 2.6313131313131315, "grad_norm": 7.191009521484375, "learning_rate": 1.3683645582688296e-07, "loss": 0.7224574685096741, "step": 3126 }, { "epoch": 2.6329966329966332, "grad_norm": 4.021665096282959, "learning_rate": 1.3650618403782963e-07, "loss": 0.8824139833450317, "step": 3128 }, { "epoch": 2.634680134680135, "grad_norm": 8.395366668701172, "learning_rate": 1.3617733690895327e-07, "loss": 0.6597309112548828, "step": 3130 }, { "epoch": 2.6363636363636362, "grad_norm": 5.360447883605957, "learning_rate": 1.3584991557327076e-07, "loss": 0.3653567433357239, "step": 3132 }, { "epoch": 2.638047138047138, "grad_norm": 5.391804218292236, "learning_rate": 1.355239211588861e-07, "loss": 0.9479780793190002, "step": 3134 }, { "epoch": 2.6397306397306397, "grad_norm": 5.806617736816406, "learning_rate": 1.3519935478898732e-07, "loss": 0.880384087562561, "step": 3136 }, { "epoch": 2.6414141414141414, "grad_norm": 5.112968921661377, "learning_rate": 1.348762175818422e-07, "loss": 0.5330120921134949, "step": 3138 }, { "epoch": 2.643097643097643, "grad_norm": 5.756229400634766, "learning_rate": 1.345545106507943e-07, "loss": 1.0363292694091797, "step": 3140 }, { "epoch": 2.644781144781145, "grad_norm": 3.318345785140991, "learning_rate": 1.3423423510425942e-07, "loss": 0.6152174472808838, "step": 3142 }, { "epoch": 2.6464646464646466, "grad_norm": 2.97463321685791, "learning_rate": 1.3391539204572155e-07, "loss": 0.9172265529632568, "step": 3144 }, { "epoch": 2.648148148148148, "grad_norm": 3.3665931224823, "learning_rate": 1.3359798257372913e-07, "loss": 1.0443644523620605, "step": 3146 }, { "epoch": 2.6498316498316496, "grad_norm": 18.04493522644043, "learning_rate": 1.332820077818914e-07, "loss": 0.6649324297904968, "step": 3148 }, { "epoch": 2.6515151515151514, "grad_norm": 3.4756081104278564, "learning_rate": 1.3296746875887445e-07, "loss": 0.9889142513275146, "step": 3150 }, { "epoch": 2.653198653198653, "grad_norm": 6.93226432800293, "learning_rate": 1.3265436658839757e-07, "loss": 0.3890528082847595, "step": 3152 }, { "epoch": 2.654882154882155, "grad_norm": 26.966766357421875, "learning_rate": 1.3234270234922947e-07, "loss": 1.0187561511993408, "step": 3154 }, { "epoch": 2.6565656565656566, "grad_norm": 7.550897121429443, "learning_rate": 1.3203247711518466e-07, "loss": 0.691092848777771, "step": 3156 }, { "epoch": 2.6582491582491583, "grad_norm": 26.227054595947266, "learning_rate": 1.3172369195511945e-07, "loss": 0.5036376118659973, "step": 3158 }, { "epoch": 2.65993265993266, "grad_norm": 28.069713592529297, "learning_rate": 1.3141634793292868e-07, "loss": 0.5947234034538269, "step": 3160 }, { "epoch": 2.6616161616161618, "grad_norm": 4.802524566650391, "learning_rate": 1.3111044610754202e-07, "loss": 0.7470720410346985, "step": 3162 }, { "epoch": 2.6632996632996635, "grad_norm": 7.207154273986816, "learning_rate": 1.3080598753291972e-07, "loss": 0.9500914812088013, "step": 3164 }, { "epoch": 2.6649831649831652, "grad_norm": 9.974961280822754, "learning_rate": 1.3050297325804975e-07, "loss": 0.7958386540412903, "step": 3166 }, { "epoch": 2.6666666666666665, "grad_norm": 4.640317440032959, "learning_rate": 1.3020140432694386e-07, "loss": 0.8439849615097046, "step": 3168 }, { "epoch": 2.6683501683501682, "grad_norm": 8.00158977508545, "learning_rate": 1.2990128177863372e-07, "loss": 0.7472466230392456, "step": 3170 }, { "epoch": 2.67003367003367, "grad_norm": 5.717433929443359, "learning_rate": 1.2960260664716803e-07, "loss": 1.0863356590270996, "step": 3172 }, { "epoch": 2.6717171717171717, "grad_norm": 3.502814769744873, "learning_rate": 1.293053799616082e-07, "loss": 1.0433530807495117, "step": 3174 }, { "epoch": 2.6734006734006734, "grad_norm": 8.597749710083008, "learning_rate": 1.2900960274602512e-07, "loss": 0.6032207608222961, "step": 3176 }, { "epoch": 2.675084175084175, "grad_norm": 2.7261288166046143, "learning_rate": 1.2871527601949583e-07, "loss": 1.049224853515625, "step": 3178 }, { "epoch": 2.676767676767677, "grad_norm": 3.249244213104248, "learning_rate": 1.284224007960998e-07, "loss": 0.7596105337142944, "step": 3180 }, { "epoch": 2.678451178451178, "grad_norm": 3.4446780681610107, "learning_rate": 1.281309780849153e-07, "loss": 0.9340767860412598, "step": 3182 }, { "epoch": 2.68013468013468, "grad_norm": 4.839624404907227, "learning_rate": 1.278410088900162e-07, "loss": 1.0885896682739258, "step": 3184 }, { "epoch": 2.6818181818181817, "grad_norm": 4.542941093444824, "learning_rate": 1.2755249421046854e-07, "loss": 0.9115286469459534, "step": 3186 }, { "epoch": 2.6835016835016834, "grad_norm": 42.488826751708984, "learning_rate": 1.2726543504032654e-07, "loss": 0.7943265438079834, "step": 3188 }, { "epoch": 2.685185185185185, "grad_norm": 1.467315435409546, "learning_rate": 1.2697983236862997e-07, "loss": 0.7184177041053772, "step": 3190 }, { "epoch": 2.686868686868687, "grad_norm": 4.0649213790893555, "learning_rate": 1.2669568717940022e-07, "loss": 0.7381956577301025, "step": 3192 }, { "epoch": 2.6885521885521886, "grad_norm": 3.688559055328369, "learning_rate": 1.2641300045163692e-07, "loss": 0.8747034072875977, "step": 3194 }, { "epoch": 2.6902356902356903, "grad_norm": 6.0027337074279785, "learning_rate": 1.2613177315931483e-07, "loss": 0.6696113348007202, "step": 3196 }, { "epoch": 2.691919191919192, "grad_norm": 4.327197551727295, "learning_rate": 1.258520062713804e-07, "loss": 0.8139593601226807, "step": 3198 }, { "epoch": 2.6936026936026938, "grad_norm": 3.2183837890625, "learning_rate": 1.255737007517482e-07, "loss": 0.9807404279708862, "step": 3200 }, { "epoch": 2.6952861952861955, "grad_norm": 7.404758453369141, "learning_rate": 1.2529685755929779e-07, "loss": 0.8705126047134399, "step": 3202 }, { "epoch": 2.6969696969696972, "grad_norm": 4.221065044403076, "learning_rate": 1.250214776478705e-07, "loss": 0.7467024326324463, "step": 3204 }, { "epoch": 2.6986531986531985, "grad_norm": 8.078044891357422, "learning_rate": 1.2474756196626604e-07, "loss": 0.9621119499206543, "step": 3206 }, { "epoch": 2.7003367003367003, "grad_norm": 4.394944190979004, "learning_rate": 1.2447511145823904e-07, "loss": 0.6447912454605103, "step": 3208 }, { "epoch": 2.702020202020202, "grad_norm": 3.8557052612304688, "learning_rate": 1.2420412706249637e-07, "loss": 0.9262001514434814, "step": 3210 }, { "epoch": 2.7037037037037037, "grad_norm": 4.75685977935791, "learning_rate": 1.2393460971269306e-07, "loss": 0.6955965161323547, "step": 3212 }, { "epoch": 2.7053872053872055, "grad_norm": 4.87318229675293, "learning_rate": 1.2366656033742985e-07, "loss": 0.6773475408554077, "step": 3214 }, { "epoch": 2.707070707070707, "grad_norm": 5.815934658050537, "learning_rate": 1.233999798602498e-07, "loss": 0.48384755849838257, "step": 3216 }, { "epoch": 2.708754208754209, "grad_norm": 3.4917256832122803, "learning_rate": 1.2313486919963455e-07, "loss": 0.8545089960098267, "step": 3218 }, { "epoch": 2.71043771043771, "grad_norm": 19.76070785522461, "learning_rate": 1.2287122926900205e-07, "loss": 0.4410606026649475, "step": 3220 }, { "epoch": 2.712121212121212, "grad_norm": 5.099394798278809, "learning_rate": 1.2260906097670272e-07, "loss": 0.8183356523513794, "step": 3222 }, { "epoch": 2.7138047138047137, "grad_norm": 4.549499034881592, "learning_rate": 1.2234836522601667e-07, "loss": 0.5615583062171936, "step": 3224 }, { "epoch": 2.7154882154882154, "grad_norm": 4.583916187286377, "learning_rate": 1.2208914291515035e-07, "loss": 0.4506787657737732, "step": 3226 }, { "epoch": 2.717171717171717, "grad_norm": 3.6649909019470215, "learning_rate": 1.218313949372339e-07, "loss": 0.8952913284301758, "step": 3228 }, { "epoch": 2.718855218855219, "grad_norm": 69.62271118164062, "learning_rate": 1.2157512218031732e-07, "loss": 0.4370509088039398, "step": 3230 }, { "epoch": 2.7205387205387206, "grad_norm": 8.150357246398926, "learning_rate": 1.2132032552736818e-07, "loss": 0.9717521071434021, "step": 3232 }, { "epoch": 2.7222222222222223, "grad_norm": 3.7782340049743652, "learning_rate": 1.2106700585626828e-07, "loss": 0.7311519384384155, "step": 3234 }, { "epoch": 2.723905723905724, "grad_norm": 3.866910219192505, "learning_rate": 1.208151640398103e-07, "loss": 0.8734760880470276, "step": 3236 }, { "epoch": 2.725589225589226, "grad_norm": 4.73469877243042, "learning_rate": 1.2056480094569536e-07, "loss": 0.855620265007019, "step": 3238 }, { "epoch": 2.7272727272727275, "grad_norm": 4.2583699226379395, "learning_rate": 1.203159174365296e-07, "loss": 0.8622401356697083, "step": 3240 }, { "epoch": 2.728956228956229, "grad_norm": 3.042707920074463, "learning_rate": 1.200685143698214e-07, "loss": 0.8962169885635376, "step": 3242 }, { "epoch": 2.7306397306397305, "grad_norm": 3.5346055030822754, "learning_rate": 1.1982259259797856e-07, "loss": 0.6588426232337952, "step": 3244 }, { "epoch": 2.7323232323232323, "grad_norm": 3.266772747039795, "learning_rate": 1.1957815296830494e-07, "loss": 0.8494440317153931, "step": 3246 }, { "epoch": 2.734006734006734, "grad_norm": 15.043532371520996, "learning_rate": 1.1933519632299793e-07, "loss": 0.9317235946655273, "step": 3248 }, { "epoch": 2.7356902356902357, "grad_norm": 5.527817249298096, "learning_rate": 1.1909372349914553e-07, "loss": 0.9118114709854126, "step": 3250 }, { "epoch": 2.7373737373737375, "grad_norm": 3.4315481185913086, "learning_rate": 1.1885373532872297e-07, "loss": 0.4174748957157135, "step": 3252 }, { "epoch": 2.739057239057239, "grad_norm": 3.0668060779571533, "learning_rate": 1.1861523263859069e-07, "loss": 0.6279425621032715, "step": 3254 }, { "epoch": 2.7407407407407405, "grad_norm": 5.9774651527404785, "learning_rate": 1.1837821625049076e-07, "loss": 0.6725097894668579, "step": 3256 }, { "epoch": 2.742424242424242, "grad_norm": 3.119798183441162, "learning_rate": 1.1814268698104425e-07, "loss": 0.70163893699646, "step": 3258 }, { "epoch": 2.744107744107744, "grad_norm": 5.229933261871338, "learning_rate": 1.1790864564174873e-07, "loss": 0.5799877643585205, "step": 3260 }, { "epoch": 2.7457912457912457, "grad_norm": 8.287593841552734, "learning_rate": 1.1767609303897506e-07, "loss": 0.7188424468040466, "step": 3262 }, { "epoch": 2.7474747474747474, "grad_norm": 19.248619079589844, "learning_rate": 1.1744502997396474e-07, "loss": 0.9669326543807983, "step": 3264 }, { "epoch": 2.749158249158249, "grad_norm": 6.006091594696045, "learning_rate": 1.1721545724282727e-07, "loss": 1.0581872463226318, "step": 3266 }, { "epoch": 2.750841750841751, "grad_norm": 20.20122528076172, "learning_rate": 1.1698737563653745e-07, "loss": 0.5354408621788025, "step": 3268 }, { "epoch": 2.7525252525252526, "grad_norm": 3.2330803871154785, "learning_rate": 1.1676078594093212e-07, "loss": 1.0935049057006836, "step": 3270 }, { "epoch": 2.7542087542087543, "grad_norm": 15.937528610229492, "learning_rate": 1.1653568893670834e-07, "loss": 0.5233392715454102, "step": 3272 }, { "epoch": 2.755892255892256, "grad_norm": 5.933197498321533, "learning_rate": 1.1631208539941993e-07, "loss": 0.8539717197418213, "step": 3274 }, { "epoch": 2.757575757575758, "grad_norm": 34.71628189086914, "learning_rate": 1.1608997609947508e-07, "loss": 0.35395973920822144, "step": 3276 }, { "epoch": 2.7592592592592595, "grad_norm": 3.8929779529571533, "learning_rate": 1.158693618021339e-07, "loss": 0.09008853882551193, "step": 3278 }, { "epoch": 2.760942760942761, "grad_norm": 3.974247694015503, "learning_rate": 1.1565024326750545e-07, "loss": 1.1840243339538574, "step": 3280 }, { "epoch": 2.7626262626262625, "grad_norm": 4.763762474060059, "learning_rate": 1.1543262125054523e-07, "loss": 1.1094727516174316, "step": 3282 }, { "epoch": 2.7643097643097643, "grad_norm": 26.06635093688965, "learning_rate": 1.1521649650105264e-07, "loss": 0.40256187319755554, "step": 3284 }, { "epoch": 2.765993265993266, "grad_norm": 3.0916554927825928, "learning_rate": 1.150018697636685e-07, "loss": 0.9139037132263184, "step": 3286 }, { "epoch": 2.7676767676767677, "grad_norm": 5.2920026779174805, "learning_rate": 1.1478874177787204e-07, "loss": 0.8635107278823853, "step": 3288 }, { "epoch": 2.7693602693602695, "grad_norm": 7.0219573974609375, "learning_rate": 1.1457711327797898e-07, "loss": 0.3769862651824951, "step": 3290 }, { "epoch": 2.771043771043771, "grad_norm": 4.999329090118408, "learning_rate": 1.1436698499313855e-07, "loss": 1.1161870956420898, "step": 3292 }, { "epoch": 2.7727272727272725, "grad_norm": 3.669652223587036, "learning_rate": 1.1415835764733103e-07, "loss": 0.949033796787262, "step": 3294 }, { "epoch": 2.774410774410774, "grad_norm": 3.6179237365722656, "learning_rate": 1.1395123195936543e-07, "loss": 0.9398729801177979, "step": 3296 }, { "epoch": 2.776094276094276, "grad_norm": 4.2878947257995605, "learning_rate": 1.1374560864287696e-07, "loss": 0.3119538426399231, "step": 3298 }, { "epoch": 2.7777777777777777, "grad_norm": 3.065671443939209, "learning_rate": 1.1354148840632437e-07, "loss": 0.5504776239395142, "step": 3300 }, { "epoch": 2.7794612794612794, "grad_norm": 8.210501670837402, "learning_rate": 1.1333887195298781e-07, "loss": 0.6545171737670898, "step": 3302 }, { "epoch": 2.781144781144781, "grad_norm": 24.731203079223633, "learning_rate": 1.1313775998096624e-07, "loss": 0.5451493263244629, "step": 3304 }, { "epoch": 2.782828282828283, "grad_norm": 24.600292205810547, "learning_rate": 1.1293815318317493e-07, "loss": 0.8595808148384094, "step": 3306 }, { "epoch": 2.7845117845117846, "grad_norm": 9.5689058303833, "learning_rate": 1.1274005224734338e-07, "loss": 0.573542058467865, "step": 3308 }, { "epoch": 2.7861952861952863, "grad_norm": 7.985528945922852, "learning_rate": 1.1254345785601264e-07, "loss": 0.7355629205703735, "step": 3310 }, { "epoch": 2.787878787878788, "grad_norm": 4.394935607910156, "learning_rate": 1.1234837068653313e-07, "loss": 0.5512019395828247, "step": 3312 }, { "epoch": 2.78956228956229, "grad_norm": 3.483132839202881, "learning_rate": 1.1215479141106207e-07, "loss": 0.8127498626708984, "step": 3314 }, { "epoch": 2.791245791245791, "grad_norm": 18.29281234741211, "learning_rate": 1.119627206965618e-07, "loss": 1.0899267196655273, "step": 3316 }, { "epoch": 2.792929292929293, "grad_norm": 9.126869201660156, "learning_rate": 1.1177215920479654e-07, "loss": 0.8100671172142029, "step": 3318 }, { "epoch": 2.7946127946127945, "grad_norm": 7.040780544281006, "learning_rate": 1.1158310759233083e-07, "loss": 0.43027007579803467, "step": 3320 }, { "epoch": 2.7962962962962963, "grad_norm": 23.27480697631836, "learning_rate": 1.113955665105271e-07, "loss": 0.7666506767272949, "step": 3322 }, { "epoch": 2.797979797979798, "grad_norm": 3.0902600288391113, "learning_rate": 1.1120953660554319e-07, "loss": 0.769917905330658, "step": 3324 }, { "epoch": 2.7996632996632997, "grad_norm": 12.026223182678223, "learning_rate": 1.110250185183305e-07, "loss": 0.4246026277542114, "step": 3326 }, { "epoch": 2.8013468013468015, "grad_norm": 3.31463360786438, "learning_rate": 1.108420128846314e-07, "loss": 0.8164354562759399, "step": 3328 }, { "epoch": 2.8030303030303028, "grad_norm": 4.4698486328125, "learning_rate": 1.1066052033497734e-07, "loss": 0.8739584684371948, "step": 3330 }, { "epoch": 2.8047138047138045, "grad_norm": 11.48538875579834, "learning_rate": 1.1048054149468646e-07, "loss": 0.6384426951408386, "step": 3332 }, { "epoch": 2.8063973063973062, "grad_norm": 3.5100231170654297, "learning_rate": 1.1030207698386169e-07, "loss": 0.7716495990753174, "step": 3334 }, { "epoch": 2.808080808080808, "grad_norm": 14.741443634033203, "learning_rate": 1.1012512741738827e-07, "loss": 0.7237218618392944, "step": 3336 }, { "epoch": 2.8097643097643097, "grad_norm": 4.032273292541504, "learning_rate": 1.0994969340493191e-07, "loss": 0.4440898895263672, "step": 3338 }, { "epoch": 2.8114478114478114, "grad_norm": 9.446307182312012, "learning_rate": 1.0977577555093672e-07, "loss": 0.791456937789917, "step": 3340 }, { "epoch": 2.813131313131313, "grad_norm": 5.13400936126709, "learning_rate": 1.0960337445462273e-07, "loss": 0.897986650466919, "step": 3342 }, { "epoch": 2.814814814814815, "grad_norm": 3.0911977291107178, "learning_rate": 1.0943249070998429e-07, "loss": 0.4430878162384033, "step": 3344 }, { "epoch": 2.8164983164983166, "grad_norm": 3.849289655685425, "learning_rate": 1.0926312490578795e-07, "loss": 0.9019819498062134, "step": 3346 }, { "epoch": 2.8181818181818183, "grad_norm": 11.346426010131836, "learning_rate": 1.0909527762556997e-07, "loss": 0.6365593671798706, "step": 3348 }, { "epoch": 2.81986531986532, "grad_norm": 33.87907028198242, "learning_rate": 1.089289494476349e-07, "loss": 0.9726608395576477, "step": 3350 }, { "epoch": 2.821548821548822, "grad_norm": 13.32236385345459, "learning_rate": 1.0876414094505339e-07, "loss": 0.9321683049201965, "step": 3352 }, { "epoch": 2.823232323232323, "grad_norm": 3.9304733276367188, "learning_rate": 1.0860085268566002e-07, "loss": 0.8083049058914185, "step": 3354 }, { "epoch": 2.824915824915825, "grad_norm": 14.073712348937988, "learning_rate": 1.084390852320515e-07, "loss": 0.8524267673492432, "step": 3356 }, { "epoch": 2.8265993265993266, "grad_norm": 3.060366630554199, "learning_rate": 1.0827883914158484e-07, "loss": 0.6664683818817139, "step": 3358 }, { "epoch": 2.8282828282828283, "grad_norm": 7.288575172424316, "learning_rate": 1.0812011496637521e-07, "loss": 0.6165136098861694, "step": 3360 }, { "epoch": 2.82996632996633, "grad_norm": 9.925680160522461, "learning_rate": 1.0796291325329419e-07, "loss": 0.782645583152771, "step": 3362 }, { "epoch": 2.8316498316498318, "grad_norm": 7.882792949676514, "learning_rate": 1.0780723454396788e-07, "loss": 0.6414890289306641, "step": 3364 }, { "epoch": 2.8333333333333335, "grad_norm": 3.061633825302124, "learning_rate": 1.0765307937477489e-07, "loss": 0.5577088594436646, "step": 3366 }, { "epoch": 2.8350168350168348, "grad_norm": 10.289335250854492, "learning_rate": 1.0750044827684457e-07, "loss": 0.5626717209815979, "step": 3368 }, { "epoch": 2.8367003367003365, "grad_norm": 4.862819194793701, "learning_rate": 1.073493417760554e-07, "loss": 0.8943830132484436, "step": 3370 }, { "epoch": 2.8383838383838382, "grad_norm": 5.187376499176025, "learning_rate": 1.0719976039303275e-07, "loss": 0.747265100479126, "step": 3372 }, { "epoch": 2.84006734006734, "grad_norm": 11.796051979064941, "learning_rate": 1.0705170464314741e-07, "loss": 0.46709078550338745, "step": 3374 }, { "epoch": 2.8417508417508417, "grad_norm": 2.545010805130005, "learning_rate": 1.069051750365139e-07, "loss": 1.0213559865951538, "step": 3376 }, { "epoch": 2.8434343434343434, "grad_norm": 4.854709148406982, "learning_rate": 1.0676017207798818e-07, "loss": 0.872999906539917, "step": 3378 }, { "epoch": 2.845117845117845, "grad_norm": 4.323747158050537, "learning_rate": 1.0661669626716654e-07, "loss": 0.6622998118400574, "step": 3380 }, { "epoch": 2.846801346801347, "grad_norm": 15.708161354064941, "learning_rate": 1.0647474809838358e-07, "loss": 0.6927282810211182, "step": 3382 }, { "epoch": 2.8484848484848486, "grad_norm": 5.333789348602295, "learning_rate": 1.0633432806071032e-07, "loss": 0.6410980224609375, "step": 3384 }, { "epoch": 2.8501683501683504, "grad_norm": 5.245711803436279, "learning_rate": 1.0619543663795291e-07, "loss": 0.8350679874420166, "step": 3386 }, { "epoch": 2.851851851851852, "grad_norm": 1.0896079540252686, "learning_rate": 1.0605807430865085e-07, "loss": 0.8719289302825928, "step": 3388 }, { "epoch": 2.8535353535353534, "grad_norm": 6.591011047363281, "learning_rate": 1.0592224154607507e-07, "loss": 0.6173574328422546, "step": 3390 }, { "epoch": 2.855218855218855, "grad_norm": 6.573550224304199, "learning_rate": 1.0578793881822661e-07, "loss": 0.5777392387390137, "step": 3392 }, { "epoch": 2.856902356902357, "grad_norm": 2.5468170642852783, "learning_rate": 1.056551665878349e-07, "loss": 1.0248732566833496, "step": 3394 }, { "epoch": 2.8585858585858586, "grad_norm": 3.26706862449646, "learning_rate": 1.055239253123561e-07, "loss": 1.0530986785888672, "step": 3396 }, { "epoch": 2.8602693602693603, "grad_norm": 4.729337215423584, "learning_rate": 1.0539421544397163e-07, "loss": 0.5177785158157349, "step": 3398 }, { "epoch": 2.861952861952862, "grad_norm": 10.192523002624512, "learning_rate": 1.052660374295866e-07, "loss": 0.48648959398269653, "step": 3400 }, { "epoch": 2.8636363636363638, "grad_norm": 9.012269973754883, "learning_rate": 1.0513939171082812e-07, "loss": 0.5270302295684814, "step": 3402 }, { "epoch": 2.865319865319865, "grad_norm": 4.075140476226807, "learning_rate": 1.0501427872404407e-07, "loss": 0.49075964093208313, "step": 3404 }, { "epoch": 2.8670033670033668, "grad_norm": 5.544951438903809, "learning_rate": 1.0489069890030129e-07, "loss": 0.883784294128418, "step": 3406 }, { "epoch": 2.8686868686868685, "grad_norm": 4.438905715942383, "learning_rate": 1.0476865266538431e-07, "loss": 0.43367594480514526, "step": 3408 }, { "epoch": 2.8703703703703702, "grad_norm": 4.913120269775391, "learning_rate": 1.0464814043979367e-07, "loss": 0.9170664548873901, "step": 3410 }, { "epoch": 2.872053872053872, "grad_norm": 4.941054821014404, "learning_rate": 1.0452916263874477e-07, "loss": 0.5428977608680725, "step": 3412 }, { "epoch": 2.8737373737373737, "grad_norm": 5.263132572174072, "learning_rate": 1.0441171967216618e-07, "loss": 0.7901989817619324, "step": 3414 }, { "epoch": 2.8754208754208754, "grad_norm": 9.088157653808594, "learning_rate": 1.042958119446983e-07, "loss": 0.2979390025138855, "step": 3416 }, { "epoch": 2.877104377104377, "grad_norm": 10.997780799865723, "learning_rate": 1.0418143985569209e-07, "loss": 0.6635469198226929, "step": 3418 }, { "epoch": 2.878787878787879, "grad_norm": 3.945129871368408, "learning_rate": 1.0406860379920746e-07, "loss": 0.4760744273662567, "step": 3420 }, { "epoch": 2.8804713804713806, "grad_norm": 8.165509223937988, "learning_rate": 1.0395730416401211e-07, "loss": 0.8622602820396423, "step": 3422 }, { "epoch": 2.8821548821548824, "grad_norm": 2.621253728866577, "learning_rate": 1.0384754133358014e-07, "loss": 0.6706223487854004, "step": 3424 }, { "epoch": 2.883838383838384, "grad_norm": 7.00462532043457, "learning_rate": 1.0373931568609063e-07, "loss": 0.7515609264373779, "step": 3426 }, { "epoch": 2.8855218855218854, "grad_norm": 6.707590103149414, "learning_rate": 1.0363262759442654e-07, "loss": 0.6428268551826477, "step": 3428 }, { "epoch": 2.887205387205387, "grad_norm": 15.433223724365234, "learning_rate": 1.0352747742617327e-07, "loss": 0.4187021851539612, "step": 3430 }, { "epoch": 2.888888888888889, "grad_norm": 5.754827976226807, "learning_rate": 1.0342386554361728e-07, "loss": 0.6734333634376526, "step": 3432 }, { "epoch": 2.8905723905723906, "grad_norm": 16.424394607543945, "learning_rate": 1.0332179230374509e-07, "loss": 0.6447641253471375, "step": 3434 }, { "epoch": 2.8922558922558923, "grad_norm": 7.980709552764893, "learning_rate": 1.032212580582421e-07, "loss": 1.063244104385376, "step": 3436 }, { "epoch": 2.893939393939394, "grad_norm": 11.630234718322754, "learning_rate": 1.0312226315349098e-07, "loss": 0.9426344037055969, "step": 3438 }, { "epoch": 2.8956228956228958, "grad_norm": 4.453112602233887, "learning_rate": 1.0302480793057082e-07, "loss": 0.8930955529212952, "step": 3440 }, { "epoch": 2.897306397306397, "grad_norm": 4.323969841003418, "learning_rate": 1.0292889272525597e-07, "loss": 1.0264780521392822, "step": 3442 }, { "epoch": 2.898989898989899, "grad_norm": 4.514182090759277, "learning_rate": 1.0283451786801456e-07, "loss": 0.4191988706588745, "step": 3444 }, { "epoch": 2.9006734006734005, "grad_norm": 3.260342597961426, "learning_rate": 1.0274168368400774e-07, "loss": 0.5836988687515259, "step": 3446 }, { "epoch": 2.9023569023569022, "grad_norm": 3.745016574859619, "learning_rate": 1.0265039049308834e-07, "loss": 1.1238579750061035, "step": 3448 }, { "epoch": 2.904040404040404, "grad_norm": 12.539746284484863, "learning_rate": 1.0256063860979977e-07, "loss": 0.40760430693626404, "step": 3450 }, { "epoch": 2.9057239057239057, "grad_norm": 2.8215339183807373, "learning_rate": 1.0247242834337502e-07, "loss": 0.7182443737983704, "step": 3452 }, { "epoch": 2.9074074074074074, "grad_norm": 27.12503433227539, "learning_rate": 1.0238575999773569e-07, "loss": 0.6834052205085754, "step": 3454 }, { "epoch": 2.909090909090909, "grad_norm": 2.9466779232025146, "learning_rate": 1.0230063387149058e-07, "loss": 1.065738320350647, "step": 3456 }, { "epoch": 2.910774410774411, "grad_norm": 7.221368789672852, "learning_rate": 1.0221705025793505e-07, "loss": 0.8638687133789062, "step": 3458 }, { "epoch": 2.9124579124579126, "grad_norm": 15.298805236816406, "learning_rate": 1.021350094450498e-07, "loss": 1.0362968444824219, "step": 3460 }, { "epoch": 2.9141414141414144, "grad_norm": 2.772352695465088, "learning_rate": 1.0205451171549999e-07, "loss": 1.0920348167419434, "step": 3462 }, { "epoch": 2.915824915824916, "grad_norm": 6.832037448883057, "learning_rate": 1.0197555734663415e-07, "loss": 0.8181166648864746, "step": 3464 }, { "epoch": 2.9175084175084174, "grad_norm": 10.260382652282715, "learning_rate": 1.0189814661048329e-07, "loss": 1.0308600664138794, "step": 3466 }, { "epoch": 2.919191919191919, "grad_norm": 8.912053108215332, "learning_rate": 1.0182227977375995e-07, "loss": 0.6785660982131958, "step": 3468 }, { "epoch": 2.920875420875421, "grad_norm": 3.7292585372924805, "learning_rate": 1.0174795709785737e-07, "loss": 0.2668553590774536, "step": 3470 }, { "epoch": 2.9225589225589226, "grad_norm": 15.397346496582031, "learning_rate": 1.0167517883884837e-07, "loss": 0.8357558250427246, "step": 3472 }, { "epoch": 2.9242424242424243, "grad_norm": 5.987993240356445, "learning_rate": 1.016039452474847e-07, "loss": 0.7866486310958862, "step": 3474 }, { "epoch": 2.925925925925926, "grad_norm": 5.408625602722168, "learning_rate": 1.0153425656919609e-07, "loss": 0.40831270813941956, "step": 3476 }, { "epoch": 2.9276094276094278, "grad_norm": 5.647230625152588, "learning_rate": 1.0146611304408931e-07, "loss": 0.8993617296218872, "step": 3478 }, { "epoch": 2.929292929292929, "grad_norm": 4.667529106140137, "learning_rate": 1.0139951490694746e-07, "loss": 0.570891261100769, "step": 3480 }, { "epoch": 2.930976430976431, "grad_norm": 3.3206403255462646, "learning_rate": 1.013344623872292e-07, "loss": 0.8598926663398743, "step": 3482 }, { "epoch": 2.9326599326599325, "grad_norm": 16.66160774230957, "learning_rate": 1.0127095570906781e-07, "loss": 0.6207292079925537, "step": 3484 }, { "epoch": 2.9343434343434343, "grad_norm": 5.509361267089844, "learning_rate": 1.0120899509127051e-07, "loss": 0.7470987439155579, "step": 3486 }, { "epoch": 2.936026936026936, "grad_norm": 4.825704097747803, "learning_rate": 1.0114858074731771e-07, "loss": 0.8294214606285095, "step": 3488 }, { "epoch": 2.9377104377104377, "grad_norm": 6.74330997467041, "learning_rate": 1.0108971288536224e-07, "loss": 0.8246122598648071, "step": 3490 }, { "epoch": 2.9393939393939394, "grad_norm": 14.303301811218262, "learning_rate": 1.0103239170822867e-07, "loss": 0.936402440071106, "step": 3492 }, { "epoch": 2.941077441077441, "grad_norm": 11.945917129516602, "learning_rate": 1.0097661741341254e-07, "loss": 0.5219341516494751, "step": 3494 }, { "epoch": 2.942760942760943, "grad_norm": 4.1998820304870605, "learning_rate": 1.0092239019307974e-07, "loss": 0.8593817949295044, "step": 3496 }, { "epoch": 2.9444444444444446, "grad_norm": 11.838610649108887, "learning_rate": 1.0086971023406596e-07, "loss": 0.4355551600456238, "step": 3498 }, { "epoch": 2.9461279461279464, "grad_norm": 7.149326801300049, "learning_rate": 1.0081857771787575e-07, "loss": 0.6722170114517212, "step": 3500 }, { "epoch": 2.9478114478114477, "grad_norm": 6.950955390930176, "learning_rate": 1.0076899282068215e-07, "loss": 0.8052189350128174, "step": 3502 }, { "epoch": 2.9494949494949494, "grad_norm": 8.336926460266113, "learning_rate": 1.00720955713326e-07, "loss": 0.3886244297027588, "step": 3504 }, { "epoch": 2.951178451178451, "grad_norm": 2.504477024078369, "learning_rate": 1.0067446656131536e-07, "loss": 0.7975258231163025, "step": 3506 }, { "epoch": 2.952861952861953, "grad_norm": 3.895413398742676, "learning_rate": 1.0062952552482489e-07, "loss": 0.9311509132385254, "step": 3508 }, { "epoch": 2.9545454545454546, "grad_norm": 3.7988312244415283, "learning_rate": 1.0058613275869534e-07, "loss": 0.7745894193649292, "step": 3510 }, { "epoch": 2.9562289562289563, "grad_norm": 5.594661235809326, "learning_rate": 1.0054428841243314e-07, "loss": 0.8809847235679626, "step": 3512 }, { "epoch": 2.957912457912458, "grad_norm": 10.233572006225586, "learning_rate": 1.0050399263020963e-07, "loss": 0.5408470630645752, "step": 3514 }, { "epoch": 2.9595959595959593, "grad_norm": 4.301779747009277, "learning_rate": 1.0046524555086075e-07, "loss": 0.9347457885742188, "step": 3516 }, { "epoch": 2.961279461279461, "grad_norm": 3.945042610168457, "learning_rate": 1.0042804730788647e-07, "loss": 0.9306644797325134, "step": 3518 }, { "epoch": 2.962962962962963, "grad_norm": 8.176459312438965, "learning_rate": 1.0039239802945032e-07, "loss": 0.5422787666320801, "step": 3520 }, { "epoch": 2.9646464646464645, "grad_norm": 7.75977087020874, "learning_rate": 1.003582978383792e-07, "loss": 0.8068456649780273, "step": 3522 }, { "epoch": 2.9663299663299663, "grad_norm": 4.503453731536865, "learning_rate": 1.003257468521625e-07, "loss": 0.329245924949646, "step": 3524 }, { "epoch": 2.968013468013468, "grad_norm": 5.413825035095215, "learning_rate": 1.0029474518295213e-07, "loss": 0.9549334049224854, "step": 3526 }, { "epoch": 2.9696969696969697, "grad_norm": 5.911332607269287, "learning_rate": 1.0026529293756189e-07, "loss": 0.8807719945907593, "step": 3528 }, { "epoch": 2.9713804713804715, "grad_norm": 4.6141462326049805, "learning_rate": 1.0023739021746709e-07, "loss": 0.9357779026031494, "step": 3530 }, { "epoch": 2.973063973063973, "grad_norm": 22.926517486572266, "learning_rate": 1.002110371188044e-07, "loss": 0.7154991626739502, "step": 3532 }, { "epoch": 2.974747474747475, "grad_norm": 10.70749282836914, "learning_rate": 1.0018623373237139e-07, "loss": 0.3366190493106842, "step": 3534 }, { "epoch": 2.9764309764309766, "grad_norm": 5.614308834075928, "learning_rate": 1.0016298014362602e-07, "loss": 0.9368351697921753, "step": 3536 }, { "epoch": 2.9781144781144784, "grad_norm": 17.715473175048828, "learning_rate": 1.0014127643268678e-07, "loss": 0.5009272694587708, "step": 3538 }, { "epoch": 2.9797979797979797, "grad_norm": 4.969324111938477, "learning_rate": 1.0012112267433204e-07, "loss": 1.1264997720718384, "step": 3540 }, { "epoch": 2.9814814814814814, "grad_norm": 4.244232177734375, "learning_rate": 1.0010251893799999e-07, "loss": 0.9415320158004761, "step": 3542 }, { "epoch": 2.983164983164983, "grad_norm": 5.749111652374268, "learning_rate": 1.0008546528778836e-07, "loss": 0.5878887176513672, "step": 3544 }, { "epoch": 2.984848484848485, "grad_norm": 4.314813613891602, "learning_rate": 1.0006996178245414e-07, "loss": 0.934430718421936, "step": 3546 }, { "epoch": 2.9865319865319866, "grad_norm": 11.60458755493164, "learning_rate": 1.0005600847541344e-07, "loss": 0.4331338703632355, "step": 3548 }, { "epoch": 2.9882154882154883, "grad_norm": 16.16608428955078, "learning_rate": 1.0004360541474121e-07, "loss": 0.4102497398853302, "step": 3550 }, { "epoch": 2.98989898989899, "grad_norm": 4.306326866149902, "learning_rate": 1.0003275264317129e-07, "loss": 0.6111245155334473, "step": 3552 }, { "epoch": 2.9915824915824913, "grad_norm": 3.421985387802124, "learning_rate": 1.00023450198096e-07, "loss": 1.004423975944519, "step": 3554 }, { "epoch": 2.993265993265993, "grad_norm": 5.036525249481201, "learning_rate": 1.0001569811156621e-07, "loss": 0.9042291045188904, "step": 3556 }, { "epoch": 2.994949494949495, "grad_norm": 2.6041200160980225, "learning_rate": 1.0000949641029108e-07, "loss": 0.8039933443069458, "step": 3558 }, { "epoch": 2.9966329966329965, "grad_norm": 4.743382930755615, "learning_rate": 1.000048451156381e-07, "loss": 0.5211207270622253, "step": 3560 }, { "epoch": 2.9983164983164983, "grad_norm": 7.2076416015625, "learning_rate": 1.0000174424363293e-07, "loss": 0.7096606492996216, "step": 3562 }, { "epoch": 3.0, "grad_norm": 8.714981079101562, "learning_rate": 1.0000019380495939e-07, "loss": 0.6827124953269958, "step": 3564 }, { "epoch": 3.0, "step": 3564, "total_flos": 4.2988160857187287e+18, "train_loss": 0.8751117374819068, "train_runtime": 6006.754, "train_samples_per_second": 9.493, "train_steps_per_second": 0.593 } ], "logging_steps": 2, "max_steps": 3564, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 99999, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.2988160857187287e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }